From f366a2b2e8499c94521475ffa6bca998a3aaadd0 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 19 Oct 2017 13:41:51 +0200 Subject: [SERVER] Use monotonic clock for measuring time Introduces new shared source unit timing.[ch] Closes #3214 --- src/server/altservers.c | 30 ++++++----- src/server/globals.h | 14 +++-- src/server/image.c | 45 +++++++++------- src/server/locks.c | 34 +++++++----- src/server/net.c | 5 +- src/server/server.c | 12 +++-- src/server/server.h | 2 +- src/server/uplink.c | 36 +++++++------ src/shared/timing.c | 21 ++++++++ src/shared/timing.h | 139 ++++++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 264 insertions(+), 74 deletions(-) create mode 100644 src/shared/timing.c create mode 100644 src/shared/timing.h diff --git a/src/server/altservers.c b/src/server/altservers.c index 3a9eda5..f9dec4b 100644 --- a/src/server/altservers.c +++ b/src/server/altservers.c @@ -4,6 +4,7 @@ #include "image.h" #include "fileutil.h" #include "../shared/protocol.h" +#include "../shared/timing.h" #include "../serverconfig.h" #include #include @@ -222,7 +223,8 @@ int altservers_get(dnbd3_host_t *output, int size, int emergency) { if ( size <= 0 ) return 0; int count = 0, i; - const time_t now = time( NULL ); + ticks now; + timing_get( &now ); spin_lock( &altServersLock ); // Flip first server in list with a random one every time this is called if ( numAltServers > 1 ) { @@ -238,7 +240,7 @@ int altservers_get(dnbd3_host_t *output, int size, int emergency) if ( _proxyPrivateOnly && !altServers[i].isPrivate ) continue; // Config says to consider private alt-servers only? ignore! if ( altServers[i].isClientOnly ) continue; if ( !emergency && altServers[i].numFails > SERVER_MAX_UPLINK_FAILS // server failed X times in a row - && now - altServers[i].lastFail > SERVER_BAD_UPLINK_IGNORE ) continue; // and last fail was not too long ago? ignore! + && timing_diff( &altServers[i].lastFail, &now ) > SERVER_BAD_UPLINK_IGNORE ) continue; // and last fail was not too long ago? ignore! // server seems ok, include in output and reset its fail counter if ( !emergency ) altServers[i].numFails = 0; output[count++] = altServers[i].host; @@ -304,7 +306,8 @@ int altservers_netCloseness(dnbd3_host_t *host1, dnbd3_host_t *host2) void altservers_serverFailed(const dnbd3_host_t * const host) { int i; - const time_t now = time( NULL ); + ticks now; + timing_get( &now ); spin_lock( &altServersLock ); for (i = 0; i < numAltServers; ++i) { if ( !isSameAddressPort( host, &altServers[i].host ) ) continue; @@ -312,7 +315,7 @@ void altservers_serverFailed(const dnbd3_host_t * const host) // to prevent the counter from increasing rapidly if many images use the // same uplink. If there's a network hickup, all uplinks will call this // function and would increase the counter too quickly, disabling the server. - if ( now - altServers[i].lastFail > SERVER_RTT_DELAY_INIT ) { + if ( timing_diff( &altServers[i].lastFail, &now ) > SERVER_RTT_DELAY_INIT ) { altServers[i].numFails++; altServers[i].lastFail = now; } @@ -338,11 +341,12 @@ static void *altservers_main(void *data UNUSED) dnbd3_host_t servers[ALTS + 1]; serialized_buffer_t serialized; struct timespec start, end; - time_t nextCacheMapSave = time( NULL ) + 90; - time_t nextCloseUnusedFd = time( NULL ) + 900; + ticks nextCacheMapSave, nextCloseUnusedFd; setThreadName( "altserver-check" ); blockNoncriticalSignals(); + timing_gets( &nextCacheMapSave, 90 ); + timing_gets( &nextCloseUnusedFd, 900 ); // Init spinlock // Init waiting links queue spin_lock( &pendingLockWrite ); @@ -413,7 +417,7 @@ static void *altservers_main(void *data UNUSED) for (itAlt = 0; itAlt < numAlts; ++itAlt) { usleep( 1000 ); // Wait a very short moment for the network to recover (we might be doing lots of measurements...) // Connect - clock_gettime( CLOCK_MONOTONIC, &start ); + clock_gettime( BEST_CLOCK_SOURCE, &start ); int sock = sock_connect( &servers[itAlt], 750, _uplinkTimeout ); if ( sock < 0 ) continue; // Select image ++++++++++++++++++++++++++++++ @@ -458,7 +462,7 @@ static void *altservers_main(void *data UNUSED) if ( recv( sock, buffer, DNBD3_BLOCK_SIZE, MSG_WAITALL ) != DNBD3_BLOCK_SIZE ) { ERROR_GOTO( server_failed, "[RTT] Could not read first block payload for %s", image->name ); } - clock_gettime( CLOCK_MONOTONIC, &end ); + clock_gettime( BEST_CLOCK_SOURCE, &end ); // Measurement done - everything fine so far spin_lock( &uplink->rttLock ); const bool isCurrent = isSameAddressPort( &servers[itAlt], &uplink->currentServer ); @@ -533,14 +537,14 @@ static void *altservers_main(void *data UNUSED) } // Save cache maps of all images if applicable // TODO: Has nothing to do with alt servers really, maybe move somewhere else? - const time_t now = time( NULL ); - if ( now > nextCacheMapSave ) { - nextCacheMapSave = now + SERVER_CACHE_MAP_SAVE_INTERVAL; + declare_now; + if ( timing_reached( &nextCacheMapSave, &now ) ) { + timing_gets( &nextCacheMapSave, SERVER_CACHE_MAP_SAVE_INTERVAL ); image_saveAllCacheMaps(); } // TODO: More random crap - if ( _closeUnusedFd && now > nextCloseUnusedFd ) { - nextCloseUnusedFd = now + 900; + if ( _closeUnusedFd && timing_reached( &nextCloseUnusedFd, &now ) ) { + timing_gets( &nextCloseUnusedFd, 900 ); image_closeUnusedFd(); } } diff --git a/src/server/globals.h b/src/server/globals.h index 41d52df..31ac841 100644 --- a/src/server/globals.h +++ b/src/server/globals.h @@ -8,6 +8,8 @@ #include #include +typedef struct timespec ticks; + // ######### All structs/types used by the server ######## typedef struct _dnbd3_connection dnbd3_connection_t; @@ -33,7 +35,9 @@ typedef struct uint64_t to; // Last byte + 1 of requested block (ie. 8192, if request len is 4096, resulting in bytes 4096-8191) dnbd3_client_t * client; // Client to send reply to int status; // status of this entry: ULR_* - time_t entered; // When this request entered the queue (for debugging) +#ifdef _DEBUG + ticks entered; // When this request entered the queue (for debugging) +#endif uint8_t hopCount; // How many hops this request has already taken across proxies } dnbd3_queued_request_t; @@ -75,7 +79,7 @@ typedef struct int rtt[SERVER_RTT_PROBES]; int rttIndex; bool isPrivate, isClientOnly; - time_t lastFail; + ticks lastFail; int numFails; } dnbd3_alt_server_t; @@ -101,9 +105,9 @@ struct _dnbd3_image uint8_t *cache_map; // cache map telling which parts are locally cached, NULL if complete uint64_t virtualFilesize; // virtual size of image (real size rounded up to multiple of 4k) uint64_t realFilesize; // actual file size on disk - time_t atime; // last access time - time_t lastWorkCheck; // last time a non-working image has been checked - time_t nextCompletenessEstimate; // last time the completeness estimate was updated + ticks atime; // last access time + ticks lastWorkCheck; // last time a non-working image has been checked + ticks nextCompletenessEstimate; // next time the completeness estimate should be updated uint32_t *crc32; // list of crc32 checksums for each 16MiB block in image uint32_t masterCrc32; // CRC-32 of the crc-32 list int readFd; // used to read the image. Used from multiple threads, so use atomic operations (pread et al) diff --git a/src/server/image.c b/src/server/image.c index db758f6..47bcb0b 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -6,6 +6,7 @@ #include "integrity.h" #include "altservers.h" #include "../shared/protocol.h" +#include "../shared/timing.h" #include #include @@ -33,7 +34,7 @@ typedef struct { char name[NAMELEN]; uint16_t rid; - time_t deadline; + ticks deadline; } imagecache; static imagecache remoteCloneCache[CACHELEN]; @@ -313,7 +314,7 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking) if ( newFd == -1 ) { spin_lock( &candidate->lock ); candidate->working = false; - candidate->lastWorkCheck = time( NULL ); + timing_get( &candidate->lastWorkCheck ); spin_unlock( &candidate->lock ); if ( _removeMissingImages ) { candidate = image_remove( candidate ); // No release here, the image is still returned and should be released by caller @@ -339,8 +340,8 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking) // Don't re-check too often spin_lock( &candidate->lock ); bool check; - const time_t now = time( NULL ); - check = ( now - candidate->lastWorkCheck ) > NONWORKING_RECHECK_INTERVAL_SECONDS; + declare_now; + check = timing_diff( &candidate->lastWorkCheck, &now ) > NONWORKING_RECHECK_INTERVAL_SECONDS; if ( check ) { candidate->lastWorkCheck = now; } @@ -907,11 +908,15 @@ static bool image_load(char *base, char *path, int withUplink) image->cacheFd = -1; image->working = (image->cache_map == NULL ); spin_init( &image->lock, PTHREAD_PROCESS_PRIVATE ); + int offset; if ( stat( path, &st ) == 0 ) { - image->atime = st.st_mtime; + // Negatively offset atime by file modification time + offset = st.st_mtime - time( NULL ); + if ( offset > 0 ) offset = 0; } else { - image->atime = time( NULL ); + offset = 0; } + timing_gets( &image->atime, offset ); // Prevent freeing in cleanup cache_map = NULL; @@ -1169,7 +1174,7 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision, } // Doesn't exist or is rid 0, try remote if not already tried it recently - const time_t now = time( NULL ); + declare_now; char *cmpname = name; int useIndex = -1, fallbackIndex = 0; if ( len >= NAMELEN ) cmpname += 1 + len - NAMELEN; @@ -1177,11 +1182,11 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision, for (i = 0; i < CACHELEN; ++i) { if ( remoteCloneCache[i].rid == revision && strcmp( cmpname, remoteCloneCache[i].name ) == 0 ) { useIndex = i; - if ( remoteCloneCache[i].deadline < now ) break; + if ( timing_reached( &remoteCloneCache[i].deadline, &now ) ) break; pthread_mutex_unlock( &remoteCloneLock ); // Was recently checked... return image; } - if ( remoteCloneCache[i].deadline < remoteCloneCache[fallbackIndex].deadline ) { + if ( timing_1le2( &remoteCloneCache[i].deadline, &remoteCloneCache[fallbackIndex].deadline ) ) { fallbackIndex = i; } } @@ -1200,7 +1205,7 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision, if ( useIndex == -1 ) { useIndex = fallbackIndex; } - remoteCloneCache[useIndex].deadline = now + SERVER_REMOTE_IMAGE_CHECK_CACHETIME; + timing_set( &remoteCloneCache[useIndex].deadline, &now, SERVER_REMOTE_IMAGE_CHECK_CACHETIME ); snprintf( remoteCloneCache[useIndex].name, NAMELEN, "%s", cmpname ); remoteCloneCache[useIndex].rid = revision; pthread_mutex_unlock( &remoteCloneLock ); @@ -1540,8 +1545,8 @@ int image_getCompletenessEstimate(dnbd3_image_t * const image) { assert( image != NULL ); if ( image->cache_map == NULL ) return image->working ? 100 : 0; - const time_t now = time( NULL ); - if ( now < image->nextCompletenessEstimate ) { + declare_now; + if ( timing_reached( &image->nextCompletenessEstimate, &now ) ) { // Since this operation is relatively expensive, we cache the result for a while return image->completenessEstimate; } @@ -1557,7 +1562,7 @@ int image_getCompletenessEstimate(dnbd3_image_t * const image) } } image->completenessEstimate = percent / len; - image->nextCompletenessEstimate = now + 10 + rand() % 30; + timing_set( &image->nextCompletenessEstimate, &now, 8 + rand() % 32 ); return image->completenessEstimate; } @@ -1654,16 +1659,17 @@ static bool image_ensureDiskSpace(uint64_t size) if ( _images[i] == NULL ) continue; dnbd3_image_t *current = image_lock( _images[i] ); if ( current == NULL ) continue; - if ( current->atime != 0 && current->users == 1 ) { // Just from the lock above - if ( oldest == NULL || oldest->atime > current->atime ) { + if ( current->users == 1 ) { // Just from the lock above + if ( oldest == NULL || timing_1le2( ¤t->atime, &oldest->atime ) ) { // Oldest access time so far oldest = current; } } current = image_release( current ); } - if ( oldest == NULL || time( NULL ) - oldest->atime < 86400 ) { - logadd( LOG_DEBUG1, "No image is old enough :-(\n" ); + declare_now; + if ( oldest == NULL || timing_diff( &oldest->atime, &now ) < 86400 ) { + logadd( LOG_DEBUG1, "No image is old enough (all have been in use in past 24h) :-(\n" ); return false; } oldest = image_lock( oldest ); @@ -1687,7 +1693,8 @@ static bool image_ensureDiskSpace(uint64_t size) void image_closeUnusedFd() { int fd, i; - time_t deadline = time( NULL ) - UNUSED_FD_TIMEOUT; + ticks deadline; + timing_gets( &deadline, -UNUSED_FD_TIMEOUT ); char imgstr[300]; spin_lock( &imageListLock ); for (i = 0; i < _num_images; ++i) { @@ -1696,7 +1703,7 @@ void image_closeUnusedFd() continue; spin_lock( &image->lock ); spin_unlock( &imageListLock ); - if ( image->users == 0 && image->atime < deadline ) { + if ( image->users == 0 && timing_reached( &image->atime, &deadline ) ) { snprintf( imgstr, sizeof(imgstr), "%s:%d", image->name, (int)image->rid ); fd = image->readFd; image->readFd = -1; diff --git a/src/server/locks.c b/src/server/locks.c index 60acfb9..3bd5103 100644 --- a/src/server/locks.c +++ b/src/server/locks.c @@ -7,6 +7,7 @@ #include "locks.h" #include "helper.h" +#include "../shared/timing.h" #ifdef _DEBUG #define MAXLOCKS (SERVER_MAX_CLIENTS * 2 + SERVER_MAX_ALTS + 200 + SERVER_MAX_IMAGES) @@ -15,7 +16,7 @@ typedef struct { void *lock; - time_t locktime; + ticks locktime; char locked; pthread_t thread; int lockId; @@ -26,7 +27,7 @@ typedef struct typedef struct { pthread_t tid; - time_t time; + ticks time; char name[LOCKLEN]; char where[LOCKLEN]; @@ -96,24 +97,27 @@ int debug_spin_lock(const char *name, const char *file, int line, pthread_spinlo for (int i = 0; i < MAXTHREADS; ++i) { if ( threads[i].tid != 0 ) continue; threads[i].tid = pthread_self(); - threads[i].time = time( NULL ); + timing_get( &threads[i].time ); snprintf( threads[i].name, LOCKLEN, "%s", name ); snprintf( threads[i].where, LOCKLEN, "%s:%d", file, line ); t = &threads[i]; break; } pthread_spin_unlock( &initdestory ); + if ( t == NULL ) { + logadd( LOG_ERROR, "Lock sanity check: Too many waiting threads at %s:%d\n", (void*)lock, name, file, line ); + exit( 4 ); + } const int retval = pthread_spin_lock( lock ); pthread_spin_lock( &initdestory ); t->tid = 0; - t->time = 0; pthread_spin_unlock( &initdestory ); if ( l->locked ) { logadd( LOG_ERROR, "Lock sanity check: lock %p (%s) already locked at %s:%d\n", (void*)lock, name, file, line ); exit( 4 ); } l->locked = 1; - l->locktime = time( NULL ); + timing_get( &l->locktime ); l->thread = pthread_self(); snprintf( l->where, LOCKLEN, "L %s:%d", file, line ); pthread_spin_lock( &initdestory ); @@ -143,17 +147,20 @@ int debug_spin_trylock(const char *name, const char *file, int line, pthread_spi for (int i = 0; i < MAXTHREADS; ++i) { if ( threads[i].tid != 0 ) continue; threads[i].tid = pthread_self(); - threads[i].time = time( NULL ); + timing_get( &threads[i].time ); snprintf( threads[i].name, LOCKLEN, "%s", name ); snprintf( threads[i].where, LOCKLEN, "%s:%d", file, line ); t = &threads[i]; break; } pthread_spin_unlock( &initdestory ); + if ( t == NULL ) { + logadd( LOG_ERROR, "Lock sanity check: Too many waiting threads at %s:%d\n", (void*)lock, name, file, line ); + exit( 4 ); + } const int retval = pthread_spin_trylock( lock ); pthread_spin_lock( &initdestory ); t->tid = 0; - t->time = 0; pthread_spin_unlock( &initdestory ); if ( retval == 0 ) { if ( l->locked ) { @@ -161,7 +168,7 @@ int debug_spin_trylock(const char *name, const char *file, int line, pthread_spi exit( 4 ); } l->locked = 1; - l->locktime = time( NULL ); + timing_get( &l->locktime ); l->thread = pthread_self(); snprintf( l->where, LOCKLEN, "L %s:%d", file, line ); pthread_spin_lock( &initdestory ); @@ -191,7 +198,6 @@ int debug_spin_unlock(const char *name, const char *file, int line, pthread_spin exit( 4 ); } l->locked = 0; - l->locktime = 0; l->thread = 0; snprintf( l->where, LOCKLEN, "U %s:%d", file, line ); int retval = pthread_spin_unlock( lock ); @@ -219,7 +225,7 @@ int debug_spin_destroy(const char *name, const char *file, int line, pthread_spi void debug_dump_lock_stats() { - time_t now = time( NULL ); + declare_now; pthread_spin_lock( &initdestory ); printf( "\n **** LOCKS ****\n\n" ); for (int i = 0; i < MAXLOCKS; ++i) { @@ -230,7 +236,7 @@ void debug_dump_lock_stats() "* When: %d secs ago\n" "* Locked: %d\n" "* Serial: %d\n" - "* Thread: %d\n", locks[i].name, locks[i].where, (int)(now - locks[i].locktime), (int)locks[i].locked, locks[i].lockId, + "* Thread: %d\n", locks[i].name, locks[i].where, (int)timing_diff( &locks[i].locktime, &now ), (int)locks[i].locked, locks[i].lockId, (int)locks[i].thread ); } else { printf( "* *** %s ***\n" @@ -244,7 +250,7 @@ void debug_dump_lock_stats() printf( "* *** Thread %d ***\n" "* Lock: %s\n" "* Where: %s\n" - "* How long: %d secs\n", (int)threads[i].tid, threads[i].name, threads[i].where, (int)(now - threads[i].time) ); + "* How long: %d secs\n", (int)threads[i].tid, threads[i].name, threads[i].where, (int)timing_diff( &threads[i].time, &now ) ); } pthread_spin_unlock( &initdestory ); } @@ -254,11 +260,11 @@ static void *debug_thread_watchdog(void *something UNUSED) setThreadName( "debug-watchdog" ); while ( !_shutdown ) { if ( init_done ) { - time_t now = time( NULL ); + declare_now; pthread_spin_lock( &initdestory ); for (int i = 0; i < MAXTHREADS; ++i) { if ( threads[i].tid == 0 ) continue; - const int diff = now - threads[i].time; + const uint32_t diff = timing_diff( &threads[i].time, &now ); if ( diff > 6 && diff < 100000 ) { printf( "\n\n +++++++++ DEADLOCK ++++++++++++\n\n" ); pthread_spin_unlock( &initdestory ); diff --git a/src/server/net.c b/src/server/net.c index acb3dbe..cd91b22 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -26,6 +26,7 @@ #include "altservers.h" #include "../shared/sockhelper.h" +#include "../shared/timing.h" #include "../serialize.h" #include @@ -265,7 +266,7 @@ void* net_handleNewConnection(void *clientPtr) if ( bOk ) { spin_lock( &image->lock ); image_file = image->readFd; - image->atime = time( NULL ); + timing_get( &image->atime ); spin_unlock( &image->lock ); serializer_reset_write( &payload ); serializer_put_uint16( &payload, PROTOCOL_VERSION ); @@ -485,7 +486,7 @@ void* net_handleNewConnection(void *clientPtr) send_reply( client->sock, &reply, NULL ); pthread_mutex_unlock( &client->sendMutex ); spin_lock( &image->lock ); - image->atime = time( NULL ); + timing_get( &image->atime ); spin_unlock( &image->lock ); set_name: ; if ( !hasName ) { diff --git a/src/server/server.c b/src/server/server.c index bfff659..6cbea3a 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -31,6 +31,7 @@ #include "../version.h" #include "../shared/sockhelper.h" +#include "../shared/timing.h" #include #include @@ -47,7 +48,7 @@ poll_list_t *listeners = NULL; /** * Time the server was started */ -static time_t startupTime = 0; +static ticks startupTime; static bool sigReload = false, sigLogCycle = false; static dnbd3_client_t* dnbd3_prepareClient(struct sockaddr_storage *client, int fd); @@ -233,6 +234,7 @@ int main(int argc, char *argv[]) // No one-shot detected, normal server operation if ( demonize ) daemon( 1, 0 ); + timing_setBase(); image_serverStartup(); altservers_init(); integrity_init(); @@ -264,7 +266,7 @@ int main(int argc, char *argv[]) return 0; } - startupTime = time( NULL ); + timing_get( &startupTime ); // Give other threads some time to start up before accepting connections sleep( 1 ); @@ -377,9 +379,11 @@ static void dnbd3_handleSignal(int signum) } } -int dnbd3_serverUptime() +uint32_t dnbd3_serverUptime() { - return (int)(time( NULL ) - startupTime); + ticks now; + timing_get( &now ); + return timing_diff( &startupTime, &now ); } static void* server_asyncImageListLoad(void *data UNUSED) diff --git a/src/server/server.h b/src/server/server.h index 54e1a24..bab8421 100644 --- a/src/server/server.h +++ b/src/server/server.h @@ -25,7 +25,7 @@ #include "../types.h" void dnbd3_cleanup(); -int dnbd3_serverUptime(); +uint32_t dnbd3_serverUptime(); #if !defined(_FILE_OFFSET_BITS) || _FILE_OFFSET_BITS != 64 #error Please set _FILE_OFFSET_BITS to 64 in your makefile/configuration diff --git a/src/server/uplink.c b/src/server/uplink.c index 9fb5c13..cb5f426 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -5,6 +5,7 @@ #include "altservers.h" #include "../shared/sockhelper.h" #include "../shared/protocol.h" +#include "../shared/timing.h" #include #include @@ -243,7 +244,7 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin uplink->queue[freeSlot].status = (foundExisting == -1 ? ULR_NEW : ULR_PENDING); uplink->queue[freeSlot].hopCount = hops; #ifdef _DEBUG - uplink->queue[freeSlot].entered = time( NULL ); + timing_get( &uplink->queue[freeSlot].entered ); //logadd( LOG_DEBUG2 %p] Inserting request at slot %d, was %d, now %d, handle %" PRIu64 ", Range: %" PRIu64 "-%" PRIu64 "\n", (void*)uplink, freeSlot, old, uplink->queue[freeSlot].status, uplink->queue[freeSlot, ".handle, start, end ); #endif spin_unlock( &uplink->queueLock ); @@ -270,9 +271,11 @@ static void* uplink_mainloop(void *data) int numSocks, i, waitTime; int altCheckInterval = SERVER_RTT_DELAY_INIT; int discoverFailCount = 0; - time_t nextAltCheck = 0, nextKeepalive = 0; + ticks nextAltCheck, nextKeepalive; char buffer[200]; memset( events, 0, sizeof(events) ); + timing_get( &nextAltCheck ); + nextKeepalive = nextAltCheck; // assert( link != NULL ); setThreadName( "idle-uplink" ); @@ -321,12 +324,15 @@ static void* uplink_mainloop(void *data) uplink_sendReplicationRequest( link ); events[EV_SOCKET].events = POLLIN | POLLRDHUP; events[EV_SOCKET].fd = link->fd; - nextAltCheck = time( NULL ) + altCheckInterval; + timing_gets( &nextAltCheck, altCheckInterval ); // The rtt worker already did the handshake for our image, so there's nothing // more to do here } // poll() - waitTime = (time( NULL ) - nextAltCheck) * 1000; + do { + declare_now; + waitTime = timing_diffMs( &now, &nextAltCheck ); + } while(0); if ( waitTime < 1500 ) waitTime = 1500; if ( waitTime > 5000 ) waitTime = 5000; numSocks = poll( events, EV_COUNT, waitTime ); @@ -360,13 +366,13 @@ static void* uplink_mainloop(void *data) logadd( LOG_DEBUG1, "Uplink gone away, panic!\n" ); } else if ( (events[EV_SOCKET].revents & POLLIN) ) { uplink_handleReceive( link ); - if ( link->fd == -1 ) nextAltCheck = 0; + if ( link->fd == -1 ) timing_get( &nextAltCheck ); if ( _shutdown || link->shutdown ) goto cleanup; } // Send keep alive if nothing is happening - const time_t now = time( NULL ); - if ( link->fd != -1 && link->replicationHandle == 0 && now > nextKeepalive ) { - nextKeepalive = now + 20; + declare_now; + if ( link->fd != -1 && link->replicationHandle == 0 && timing_reached( &nextKeepalive, &now ) ) { + timing_set( &nextKeepalive, &now, 20 ); if ( !uplink_sendKeepalive( link->fd ) ) { const int fd = link->fd; link->fd = -1; @@ -378,10 +384,7 @@ static void* uplink_mainloop(void *data) const int rttTestResult = link->rttTestResult; spin_unlock( &link->rttLock ); if ( rttTestResult == RTT_IDLE || rttTestResult == RTT_DONTCHANGE ) { - if ( now + SERVER_RTT_DELAY_FAILED < nextAltCheck ) { - // This probably means the system time was changed - handle this case properly by capping the timeout - nextAltCheck = now + SERVER_RTT_DELAY_FAILED / 2; - } else if ( now >= nextAltCheck || link->fd == -1 || link->cycleDetected ) { + if ( timing_reached( &nextAltCheck, &now ) || link->fd == -1 || link->cycleDetected ) { // It seems it's time for a check if ( image_isComplete( link->image ) ) { // Quit work if image is complete @@ -393,22 +396,23 @@ static void* uplink_mainloop(void *data) altservers_findUplink( link ); // This will set RTT_INPROGRESS (synchronous) } altCheckInterval = MIN(altCheckInterval + 1, SERVER_RTT_DELAY_MAX); - nextAltCheck = now + altCheckInterval; + timing_set( &nextAltCheck, &now, altCheckInterval ); } } else if ( rttTestResult == RTT_NOT_REACHABLE ) { spin_lock( &link->rttLock ); link->rttTestResult = RTT_IDLE; spin_unlock( &link->rttLock ); discoverFailCount++; - nextAltCheck = now + (discoverFailCount < 5 ? altCheckInterval : SERVER_RTT_DELAY_FAILED); + timing_set( &nextAltCheck, &now, (discoverFailCount < 5 ? altCheckInterval : SERVER_RTT_DELAY_FAILED) ); } #ifdef _DEBUG if ( link->fd != -1 && !link->shutdown ) { bool resend = false; - const time_t deadline = now - 10; + ticks deadline; + timing_set( &deadline, &now, -10 ); spin_lock( &link->queueLock ); for (i = 0; i < link->queueLen; ++i) { - if ( link->queue[i].status != ULR_FREE && link->queue[i].entered < deadline ) { + if ( link->queue[i].status != ULR_FREE && timing_reached( &link->queue[i].entered, &deadline ) ) { snprintf( buffer, sizeof(buffer), "[DEBUG %p] Starving request slot %d detected:\n" "%s\n(from %" PRIu64 " to %" PRIu64 ", status: %d)\n", (void*)link, i, link->queue[i].client->image->name, link->queue[i].from, link->queue[i].to, link->queue[i].status ); diff --git a/src/shared/timing.c b/src/shared/timing.c new file mode 100644 index 0000000..4ca1002 --- /dev/null +++ b/src/shared/timing.c @@ -0,0 +1,21 @@ +#include "timing.h" +#include +#include +#include +#include + +struct timespec basetime; + +void timing_abort() +{ + printf( "Cannot get CLOCK_MONOTONIC(_RAW), errno=%d\n", errno ); + exit( 1 ); +} + +void timing_setBase() +{ + if ( clock_gettime( BEST_CLOCK_SOURCE, &basetime ) == -1 ) { + memset( &basetime, 0, sizeof(basetime) ); + } +} + diff --git a/src/shared/timing.h b/src/shared/timing.h new file mode 100644 index 0000000..a8a51a4 --- /dev/null +++ b/src/shared/timing.h @@ -0,0 +1,139 @@ +#ifndef _D_TIMING_H +#define _D_TIMING_H + +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 199309L +#endif + +#include +#include +#include + +#ifdef CLOCK_MONOTONIC_RAW +#define BEST_CLOCK_SOURCE CLOCK_MONOTONIC_RAW +#else +#define BEST_CLOCK_SOURCE CLOCK_MONOTONIC +#endif + +typedef struct timespec ticks; + +extern struct timespec basetime; + +/** + * Assign src to dst while adding secs seconds. + */ +#define timing_set(dst,src,secs) do { (dst)->tv_sec = (src)->tv_sec + secs; (dst)->tv_nsec = (src)->tv_nsec; } while (0) + +/** + * Define variable now, initialize to timing_get. + */ +#define declare_now ticks now; timing_get( &now ) + +/** + * Call this once to calibrate on startup. + * Although overflows of CLOCK_MONOTONIC(_RAW) should + * by definition never happen, we still have a fixed size + * int that could at some point. By forcing the counter + * to start at 0 on startup the point of overflow + * will be very far in the future (decades for 32bit time_t, + * end of universe for 64bit). + */ +void timing_setBase(); + +/** + * Internal, do not use. Moved to another function + * to prevent inlining of error handling code, which + * should be very unlikely to ever trigger. + */ +_Noreturn void timing_abort(); + +/** + * Get current time. Shortcut for clock_gettime with error check. + */ +static inline void timing_get(ticks* retval) +{ + if ( clock_gettime( BEST_CLOCK_SOURCE, retval ) == -1 ) timing_abort(); + retval->tv_sec -= basetime.tv_sec; +} + +/** + * Get a ticks instance somewhere in the future. + * Useful for timeouts. + */ +static inline void timing_gets(ticks* retval, uint32_t addSeconds) +{ + timing_get( retval ); + retval->tv_sec += addSeconds; +} + +/** + * Check whether given timeout is reached. + * Might trigger up to one second early. + */ +static inline bool timing_reached(const ticks* timeout, const ticks* now) +{ + return now->tv_sec >= timeout->tv_sec; +} +#define timing_1le2(one,two) timing_reached(one,two) + +/** + * Precise check whether given timeout has been reached. + */ +static inline bool timing_reachedPrecise(const ticks* timeout, const ticks* now) +{ + return now->tv_sec > timeout->tv_sec + || (now->tv_sec == timeout->tv_sec && now->tv_nsec > timeout->tv_nsec); +} + +/** + * Shortcut for above. Useful if not used in loop. + * Might trigger up to one second early. + */ +static inline bool timing_isReached(const ticks* timeout) +{ + ticks now; + timing_get( &now ); + return timing_reached( timeout, &now ); +} +/** + * Shortcut for above. Useful if not used in loop. + */ +static inline bool timing_isReachedPrecise(const ticks* timeout) +{ + ticks now; + timing_get( &now ); + return timing_reachedPrecise( timeout, &now ); +} + + +/** + * Get difference between two ticks, rounded down to seconds. + * Make sure you pass the arguments in the proper order. If + * end is before start, 0 will always be returned. + */ +static inline uint32_t timing_diff(const ticks *start, const ticks *end) +{ + if ( end->tv_sec <= start->tv_sec ) return 0; + return (uint32_t)( ( end->tv_sec - start->tv_sec ) + + ( start->tv_nsec > end->tv_nsec ? -1 : 0 ) ); +} + +/** + * Get difference between two ticks, rounded down to milliseconds. + * Same as above; passing arguments in reverse will always return 0. + */ +static inline uint64_t timing_diffMs(const ticks *start, const ticks *end) +{ + if ( end->tv_sec < start->tv_sec ) return 0; + uint64_t diff = (uint64_t)( end->tv_sec - start->tv_sec ) * 1000; + if ( start->tv_nsec >= end->tv_nsec ) { + if ( diff == 0 ) return 0; + diff -= (start->tv_nsec - end->tv_nsec) / 1000000; + } else { + diff += (end->tv_nsec - start->tv_nsec) / 1000000; + } + return diff; +} + + +#endif -- cgit v1.2.3-55-g7522