From be7d7d95850c30a154aaa56e95d6a7f36793409d Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Wed, 7 Aug 2019 17:11:51 +0200 Subject: [SERVER] Better lock debugging: Always check lock order Lock order is predefined in locks.h. Immediately bail out if a lock with lower priority is obtained while the same thread already holds one with higher priority. --- src/server/server.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'src/server/server.c') diff --git a/src/server/server.c b/src/server/server.c index 10ab208..838aec2 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -133,9 +133,6 @@ void dnbd3_cleanup() // Wait for clients to disconnect net_waitForAllDisconnected(); - // Watchdog not needed anymore - debug_locks_stop_watchdog(); - // Clean up images retries = 5; while ( !image_tryFreeAll() && --retries > 0 ) { @@ -303,10 +300,6 @@ int main(int argc, char *argv[]) logadd( LOG_WARNING, "Could not load alt-servers. Does the file exist in %s?", _configDir ); } -#ifdef _DEBUG - debug_locks_start_watchdog(); -#endif - // setup signal handler struct sigaction sa; memset( &sa, 0, sizeof(sa) ); -- cgit v1.2.3-55-g7522 From 5fb4ef278be86fb6bda487f65ec4855d830bf4e5 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 22 Aug 2019 16:14:27 +0200 Subject: [SERVER] Get rid of alt-servers thread, per-uplink rtt history Alt-Server checks are now run using the threadpool, so we don't need a queue and dedicated thread anymore. The rtt history is now kept per uplink, so many uplinks won't overwhelm the history, making its time window very short. Also the fail counter is now split up; a global one for when the server actually isn't reachable, a local (per-uplink) one for when the server is reachable but doesn't serve the requested image. --- src/server/altservers.c | 738 ++++++++++++++++++++++-------------------------- src/server/altservers.h | 16 +- src/server/globals.h | 41 ++- src/server/image.c | 6 +- src/server/net.c | 16 +- src/server/server.c | 8 +- src/server/uplink.c | 117 ++++---- src/server/uplink.h | 2 + src/serverconfig.h | 10 +- 9 files changed, 469 insertions(+), 485 deletions(-) (limited to 'src/server/server.c') diff --git a/src/server/altservers.c b/src/server/altservers.c index fbe10a8..493ed9e 100644 --- a/src/server/altservers.c +++ b/src/server/altservers.c @@ -1,5 +1,6 @@ #include "altservers.h" #include "locks.h" +#include "threadpool.h" #include "helper.h" #include "image.h" #include "fileutil.h" @@ -14,46 +15,22 @@ #define LOG_GOTO(jumplabel, lvl, ...) do { LOG(lvl, __VA_ARGS__); goto jumplabel; } while (0); #define ERROR_GOTO(jumplabel, ...) LOG_GOTO(jumplabel, LOG_ERROR, __VA_ARGS__) -static dnbd3_uplink_t * _Atomic pending[SERVER_MAX_PENDING_ALT_CHECKS]; -static dnbd3_signal_t * _Atomic runSignal = NULL; - static dnbd3_alt_server_t altServers[SERVER_MAX_ALTS]; static atomic_int numAltServers = 0; static pthread_mutex_t altServersLock; +static ticks nextCloseUnusedFd; // TODO: Move away -static pthread_t altThread; - -static void *altservers_main(void *data); -static unsigned int altservers_updateRtt(const dnbd3_host_t * const host, const unsigned int rtt); +static void *altservers_runCheck(void *data); +static int altservers_getListForUplink(dnbd3_uplink_t *uplink, int *servers, int size, int current); +static void altservers_findUplinkInternal(dnbd3_uplink_t *uplink); +static uint32_t altservers_updateRtt(dnbd3_uplink_t *uplink, int index, uint32_t rtt); +static void altservers_imageFailed(dnbd3_uplink_t *uplink, int server); void altservers_init() { srand( (unsigned int)time( NULL ) ); - // Init spinlock + // Init lock mutex_init( &altServersLock, LOCK_ALT_SERVER_LIST ); - // Init signal - runSignal = signal_new(); - if ( runSignal == NULL ) { - logadd( LOG_ERROR, "Error creating signal object. Uplink feature unavailable." ); - exit( EXIT_FAILURE ); - } - memset( altServers, 0, SERVER_MAX_ALTS * sizeof(dnbd3_alt_server_t) ); - if ( 0 != thread_create( &altThread, NULL, &altservers_main, (void *)NULL ) ) { - logadd( LOG_ERROR, "Could not start altservers connector thread" ); - exit( EXIT_FAILURE ); - } - // Init waiting links queue -- this is currently a global static array so - // it will already be zero, but in case we refactor later do it explicitly - for (int i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i) { - pending[i] = NULL; - } -} - -void altservers_shutdown() -{ - if ( runSignal == NULL ) return; - signal_call( runSignal ); // Wake altservers thread up - thread_join( altThread, NULL ); } static void addalt(int argc, char **argv, void *data) @@ -121,7 +98,7 @@ bool altservers_add(dnbd3_host_t *host, const char *comment, const int isPrivate /** * ONLY called from the passed uplink's main thread */ -void altservers_findUplink(dnbd3_uplink_t *uplink) +void altservers_findUplinkAsync(dnbd3_uplink_t *uplink) { if ( uplink->shutdown ) return; @@ -135,67 +112,11 @@ void altservers_findUplink(dnbd3_uplink_t *uplink) assert( uplink->better.fd == -1 ); // it is however possible that an RTT measurement is currently in progress, // so check for that case and do nothing if one is in progress - // XXX As this function is only ever called by the image's uplink thread, - // it cannot happen that the uplink ends up in this list concurrently mutex_lock( &uplink->rttLock ); - if ( uplink->rttTestResult == RTT_INPROGRESS ) { - for (i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i) { - if ( pending[i] != uplink ) continue; - // Yep, measuring right now - return; - } - } - // Find free slot for measurement - uplink->rttTestResult = RTT_INPROGRESS; - for (i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i) { - if ( pending[i] != NULL ) continue; - dnbd3_uplink_t *null = NULL; - if ( atomic_compare_exchange_strong( &pending[i], &null, uplink ) ) { - mutex_unlock( &uplink->rttLock ); - atomic_thread_fence( memory_order_release ); - signal_call( runSignal ); // Wake altservers thread up - return; - } + if ( uplink->rttTestResult != RTT_INPROGRESS ) { + threadpool_run( &altservers_runCheck, uplink ); } - // End of loop - no free slot - uplink->rttTestResult = RTT_NOT_REACHABLE; mutex_unlock( &uplink->rttLock ); - logadd( LOG_WARNING, "No more free RTT measurement slots, ignoring a request..." ); -} - -/** - * The given uplink is about to disappear, - * wait until any pending RTT check is done. - */ -void altservers_removeUplink(dnbd3_uplink_t *uplink) -{ - assert( uplink != NULL ); - assert( uplink->shutdown ); - int i; - for ( i = 1 ;; ++i ) { - atomic_thread_fence( memory_order_acquire ); - if ( runSignal == NULL ) { - // Thread is already done, remove manually - uplink->rttTestResult = RTT_NOT_REACHABLE; - break; - } - // Thread still running, wait until test is done - bool found = false; - for (int i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i) { - if ( pending[i] == uplink ) { - found = true; - break; - } - } - if ( !found ) // No more test running - break; - usleep( 10000 ); // 10ms - signal_call( runSignal ); // Wake altservers thread up - if ( i % 500 == 0 ) { - logadd( LOG_INFO, "Still waiting for altserver check for uplink %p...", (void*)uplink ); - } - } - logadd( LOG_DEBUG1, "Waited for %d iterations for altservers check when tearing down uplink", i ); } /** @@ -209,90 +130,124 @@ int altservers_getListForClient(dnbd3_host_t *host, dnbd3_server_entry_t *output if ( host == NULL || host->type == 0 || numAltServers == 0 || output == NULL || size <= 0 ) return 0; int i, j; int count = 0; - int scores[size]; - int score; - mutex_lock( &altServersLock ); + uint16_t scores[SERVER_MAX_ALTS] = { 0 }; if ( size > numAltServers ) size = numAltServers; - for (i = 0; i < numAltServers; ++i) { - if ( altServers[i].host.type == 0 ) continue; // Slot is empty - if ( altServers[i].isPrivate ) continue; // Do not tell clients about private servers + mutex_lock( &altServersLock ); + for ( i = 0; i < numAltServers; ++i ) { + if ( altServers[i].host.type == 0 || altServers[i].isPrivate ) + continue; // Slot is empty or uplink is for replication only if ( host->type == altServers[i].host.type ) { - score = altservers_netCloseness( host, &altServers[i].host ) - altServers[i].numFails; + scores[i] = 10 + altservers_netCloseness( host, &altServers[i].host ); } else { - score = -( altServers[i].numFails + 128 ); // Wrong address family + scores[i] = 1; // Wrong address family } - if ( count == 0 ) { - // Trivial - this is the first entry - output[0].host = altServers[i].host; - output[0].failures = 0; - scores[0] = score; - count++; - } else { - // Other entries already exist, insert in proper position - for (j = 0; j < size; ++j) { - if ( j < count && score <= scores[j] ) continue; - if ( j > count ) break; // Should never happen but just in case... - if ( j < count && j + 1 < size ) { - // Check if we're in the middle and need to move other entries... - memmove( &output[j + 1], &output[j], sizeof(dnbd3_server_entry_t) * (size - j - 1) ); - memmove( &scores[j + 1], &scores[j], sizeof(int) * (size - j - 1) ); - } - if ( count < size ) { - count++; - } - output[j].host = altServers[i].host; - output[j].failures = 0; - scores[j] = score; - break; + } + while ( count < size ) { + i = -1; + for ( j = 0; j < numAltServers; ++j ) { + if ( scores[j] == 0 ) + continue; + if ( i == -1 || scores[j] > scores[i] ) { + i = j; } } + if ( i == -1 ) + break; + output[count].host = altServers[i].host; + output[count].failures = 0; + count++; } mutex_unlock( &altServersLock ); return count; } +bool altservers_toString(int server, char *buffer, size_t len) +{ + return host_to_string( &altServers[server].host, buffer, len ); +} + +static bool isUsableForUplink( dnbd3_uplink_t *uplink, int server, ticks *now ) +{ + dnbd3_alt_local_t *local = ( uplink == NULL ? NULL : &uplink->altData[server] ); + dnbd3_alt_server_t *global = &altServers[server]; + if ( global->isClientOnly || ( !global->isPrivate && _proxyPrivateOnly ) ) + return false; + // Blocked locally (image not found on server...) + if ( local != NULL && local->blocked ) { + if ( --local->fails > 0 ) + return false; + local->blocked = false; + } + if ( global->blocked ) { + if ( timing_diff( &global->lastFail, now ) < SERVER_GLOBAL_DUP_TIME ) + return false; + global->lastFail = *now; + if ( --global->fails > 0 ) + return false; + global->blocked = false; + } + // Not blocked, depend on both fail counters + int fails = ( local == NULL ? 0 : local->fails ) + global->fails; + return fails < SERVER_BAD_UPLINK_MIN || ( rand() % fails ) < SERVER_BAD_UPLINK_MIN; +} + +int altservers_getHostListForReplication(dnbd3_host_t *servers, int size) +{ + int idx[size]; + int num = altservers_getListForUplink( NULL, idx, size, -1 ); + for ( int i = 0; i < num; ++i ) { + servers[i] = altServers[i].host; + } + return num; +} + /** * Get alt servers. If there are more alt servers than * requested, random servers will be picked. * This function is suited for finding uplink servers as * it includes private servers and ignores any "client only" servers + * @param current index of server for current connection, or -1 in panic mode */ -int altservers_getListForUplink(dnbd3_host_t *output, int size, int emergency) +static int altservers_getListForUplink(dnbd3_uplink_t *uplink, int *servers, int size, int current) { - if ( size <= 0 ) return 0; - int count = 0, i; - ticks now; - timing_get( &now ); + if ( size <= 0 ) + return 0; + int count = 0; + declare_now; mutex_lock( &altServersLock ); - // Flip first server in list with a random one every time this is called - if ( numAltServers > 1 ) { - const dnbd3_alt_server_t tmp = altServers[0]; - do { - i = rand() % numAltServers; - } while ( i == 0 ); - altServers[0] = altServers[i]; - altServers[i] = tmp; - } - // We iterate over the list twice. First run adds servers with 0 failures only, - // second one also considers those that failed (not too many times) - if ( size > numAltServers ) size = numAltServers; - for (i = 0; i < numAltServers * 2; ++i) { - dnbd3_alt_server_t *srv = &altServers[i % numAltServers]; - if ( srv->host.type == 0 ) continue; // Slot is empty - if ( _proxyPrivateOnly && !srv->isPrivate ) continue; // Config says to consider private alt-servers only? ignore! - if ( srv->isClientOnly ) continue; - bool first = ( i < numAltServers ); - if ( first ) { - if ( srv->numFails > 0 ) continue; - } else { - if ( srv->numFails == 0 ) continue; // Already added in first iteration - if ( !emergency && srv->numFails > SERVER_BAD_UPLINK_THRES // server failed X times in a row - && timing_diff( &srv->lastFail, &now ) < SERVER_BAD_UPLINK_IGNORE ) continue; // and last fail was not too long ago? ignore! - if ( !emergency ) srv->numFails--; + // If we don't have enough servers to randomize, take a shortcut + if ( numAltServers <= size ) { + for ( int i = 0; i < numAltServers; ++i ) { + if ( current == -1 || i == current || isUsableForUplink( uplink, i, &now ) ) { + servers[count++] = i; + } + } + } else { + // Plenty of alt servers; randomize + uint8_t state[SERVER_MAX_ALTS] = { 0 }; + if ( current != -1 ) { // Make sure we also test the current server + servers[count++] = current; + state[current] = 2; + } + for ( int tr = size * 10; tr > 0 && count < size; --tr ) { + int idx = rand() % numAltServers; + if ( state[idx] != 0 ) + continue; + if ( isUsableForUplink( uplink, idx, &now ) ) { + servers[count++] = idx; + state[idx] = 2; // Used + } else { + state[idx] = 1; // Potential + } + } + // If panic mode, consider others too + for ( int tr = size * 10; current == -1 && tr > 0 && count < size; --tr ) { + int idx = rand() % numAltServers; + if ( state[idx] == 2 ) + continue; + servers[count++] = idx; + state[idx] = 2; // Used } - // server seems ok, include in output and decrease its fail counter - output[count++] = srv->host; - if ( count >= size ) break; } mutex_unlock( &altServersLock ); return count; @@ -320,7 +275,7 @@ json_t* altservers_toJson() "rtt", rtts, "isPrivate", (int)src[i].isPrivate, "isClientOnly", (int)src[i].isClientOnly, - "numFails", src[i].numFails + "numFails", src[i].fails ); json_array_append_new( list, server ); } @@ -329,32 +284,27 @@ json_t* altservers_toJson() /** * Update rtt history of given server - returns the new average for that server. - * XXX HOLD altServersLock WHEN CALLING THIS! */ -static unsigned int altservers_updateRtt(const dnbd3_host_t * const host, const unsigned int rtt) +static uint32_t altservers_updateRtt(dnbd3_uplink_t *uplink, int index, uint32_t rtt) { - unsigned int avg = rtt; - int i; - for (i = 0; i < numAltServers; ++i) { - if ( !isSameAddressPort( host, &altServers[i].host ) ) continue; - altServers[i].rtt[++altServers[i].rttIndex % SERVER_RTT_PROBES] = rtt; -#if SERVER_RTT_PROBES == 5 - avg = (altServers[i].rtt[0] + altServers[i].rtt[1] + altServers[i].rtt[2] - + altServers[i].rtt[3] + altServers[i].rtt[4]) / SERVER_RTT_PROBES; -#else -#warning You might want to change the code in altservers_update_rtt if you changed SERVER_RTT_PROBES - avg = 0; - for (int j = 0; j < SERVER_RTT_PROBES; ++j) { - avg += altServers[i].rtt[j]; + uint32_t avg = 0, j; + dnbd3_alt_local_t *local = &uplink->altData[index]; + mutex_lock( &altServersLock ); + if ( likely( local->initDone ) ) { + local->rtt[++local->rttIndex % SERVER_RTT_PROBES] = rtt; + for ( j = 0; j < SERVER_RTT_PROBES; ++j ) { + avg += local->rtt[j]; } avg /= SERVER_RTT_PROBES; -#endif - // If we got a new rtt value, server must be working - if ( altServers[i].numFails > 0 ) { - altServers[i].numFails--; + } else { // First rtt measurement -- copy to every slot + for ( j = 0; j < SERVER_RTT_PROBES; ++j ) { + local->rtt[j] = rtt; } - break; + avg = rtt; + local->initDone = true; } + altServers[index].rtt[++altServers[index].rttIndex % SERVER_RTT_PROBES] = avg; + mutex_unlock( &altServersLock ); return avg; } @@ -383,40 +333,33 @@ int altservers_netCloseness(dnbd3_host_t *host1, dnbd3_host_t *host2) * track of how often servers fail, and consider them disabled for some time if they * fail too many times. */ -void altservers_serverFailed(const dnbd3_host_t * const host) +void altservers_serverFailed(int server) { - int i; - int foundIndex = -1, lastOk = -1; - ticks now; - timing_get( &now ); + declare_now; mutex_lock( &altServersLock ); - for (i = 0; i < numAltServers; ++i) { - if ( foundIndex == -1 ) { - // Looking for the failed server in list - if ( isSameAddressPort( host, &altServers[i].host ) ) { - foundIndex = i; - } - } else if ( altServers[i].host.type != 0 && altServers[i].numFails == 0 ) { - lastOk = i; + if ( timing_diff( &altServers[server].lastFail, &now ) > SERVER_GLOBAL_DUP_TIME ) { + altServers[server].lastFail = now; + if ( altServers[server].fails++ >= SERVER_BAD_UPLINK_MAX ) { + altServers[server].blocked = true; } } - // Do only increase counter if last fail was not too recent. This is - // to prevent the counter from increasing rapidly if many images use the - // same uplink. If there's a network hickup, all uplinks will call this - // function and would increase the counter too quickly, disabling the server. - if ( foundIndex != -1 && timing_diff( &altServers[foundIndex].lastFail, &now ) > SERVER_RTT_INTERVAL_INIT ) { - altServers[foundIndex].numFails += SERVER_UPLINK_FAIL_INCREASE; - altServers[foundIndex].lastFail = now; - if ( lastOk != -1 ) { - // Make sure non-working servers are put at the end of the list, so they're less likely - // to get picked when testing servers for uplink connections. - const dnbd3_alt_server_t tmp = altServers[foundIndex]; - altServers[foundIndex] = altServers[lastOk]; - altServers[lastOk] = tmp; - } + mutex_unlock( &altServersLock ); +} + +/** + * Called from RTT checker if connecting to a server succeeded but + * subsequently selecting the given image failed. Handle this within + * the uplink and don't increase the global fail counter. + */ +static void altservers_imageFailed(dnbd3_uplink_t *uplink, int server) +{ + mutex_lock( &altServersLock ); + if ( uplink->altData[server].fails++ >= SERVER_BAD_UPLINK_MAX ) { + uplink->altData[server].blocked = true; } mutex_unlock( &altServersLock ); } + /** * Mainloop of this module. It will wait for requests by uplinks to find a * suitable uplink server for them. If found, it will tell the uplink about @@ -425,206 +368,213 @@ void altservers_serverFailed(const dnbd3_host_t * const host) * will update quite quickly. Needs to be improved some time, ie. by only * updating the rtt if the last update was at least X seconds ago. */ -static void *altservers_main(void *data UNUSED) +static void *altservers_runCheck(void *data) +{ + dnbd3_uplink_t * const uplink = (dnbd3_uplink_t*)data; + + assert( uplink != NULL ); + setThreadName( "altserver-check" ); + altservers_findUplinkInternal( uplink ); + // Save cache maps of all images if applicable + // TODO: Has nothing to do with alt servers really, maybe move somewhere else? + declare_now; + if ( _closeUnusedFd && timing_reached( &nextCloseUnusedFd, &now ) ) { + timing_gets( &nextCloseUnusedFd, 900 ); + image_closeUnusedFd(); + } + return NULL; +} + +void altservers_findUplink(dnbd3_uplink_t *uplink) +{ + altservers_findUplinkInternal( uplink ); + while ( uplink->rttTestResult == RTT_INPROGRESS ) { + usleep( 5000 ); + } +} + +int altservers_hostToIndex(dnbd3_host_t *host) +{ + for ( int i = 0; i < numAltServers; ++i ) { + if ( isSameAddressPort( host, &altServers[i].host ) ) + return i; + } + return -1; +} + +const dnbd3_host_t* altservers_indexToHost(int server) +{ + return &altServers[server].host; +} + +// XXX Sync call above must block until async worker has finished XXX +static void altservers_findUplinkInternal(dnbd3_uplink_t *uplink) { const int ALTS = 4; - int ret, itLink, itAlt, numAlts; - bool found; - char buffer[DNBD3_BLOCK_SIZE ]; - dnbd3_reply_t reply; - dnbd3_host_t servers[ALTS + 1]; - serialized_buffer_t serialized; + int ret, itAlt, numAlts, current; + bool panic; + int servers[ALTS + 1]; struct timespec start, end; - ticks nextCloseUnusedFd; - setThreadName( "altserver-check" ); - blockNoncriticalSignals(); - timing_gets( &nextCloseUnusedFd, 900 ); - // LOOP - while ( !_shutdown ) { - // Wait 5 seconds max. - ret = signal_wait( runSignal, 5000 ); - if ( _shutdown ) goto cleanup; - if ( ret == SIGNAL_ERROR ) { - if ( errno == EAGAIN || errno == EINTR ) continue; - logadd( LOG_WARNING, "Error %d on signal_clear on alservers_main! Things will break!", errno ); - usleep( 100000 ); + if ( _shutdown ) + return; + mutex_lock( &uplink->rttLock ); + // Maybe we already have a result, or check is currently running + if ( uplink->better.fd != -1 || uplink->rttTestResult == RTT_INPROGRESS ) { + mutex_unlock( &uplink->rttLock ); + return; + } + assert( uplink->rttTestResult != RTT_DOCHANGE ); + uplink->rttTestResult = RTT_INPROGRESS; + panic = ( uplink->current.fd == -1 ); + current = uplink->current.index; // Current server index (or last one in panic mode) + mutex_unlock( &uplink->rttLock ); + // First, get 4 alt servers + numAlts = altservers_getListForUplink( uplink, servers, ALTS, panic ? -1 : current ); + // If we're already connected and only got one server anyways, there isn't much to do + if ( numAlts == 0 || ( numAlts == 1 && !panic ) ) { + uplink->rttTestResult = RTT_DONTCHANGE; + return; + } + dnbd3_image_t * const image = image_lock( uplink->image ); + if ( image == NULL ) { // Check again after locking + uplink->rttTestResult = RTT_NOT_REACHABLE; + logadd( LOG_WARNING, "Image has gone away that was queued for RTT measurement" ); + return; + } + LOG( LOG_DEBUG2, "Running alt check for %s:%d", image->name, (int)image->rid ); + assert( uplink->rttTestResult == RTT_INPROGRESS ); + // Test them all + dnbd3_server_connection_t best = { .fd = -1 }; + unsigned long bestRtt = RTT_UNREACHABLE; + unsigned long currentRtt = RTT_UNREACHABLE; + for (itAlt = 0; itAlt < numAlts; ++itAlt) { + int server = servers[itAlt]; + // Connect + clock_gettime( BEST_CLOCK_SOURCE, &start ); + int sock = sock_connect( &altServers[server].host, 750, 1000 ); + if ( sock == -1 ) { // Connection failed means global error + altservers_serverFailed( server ); + continue; } - // Work your way through the queue - atomic_thread_fence( memory_order_acquire ); - for (itLink = 0; itLink < SERVER_MAX_PENDING_ALT_CHECKS; ++itLink) { - dnbd3_uplink_t * const uplink = pending[itLink]; - if ( uplink == NULL ) - continue; - // First, get 4 alt servers - numAlts = altservers_getListForUplink( servers, ALTS, uplink->current.fd == -1 ); - // If we're already connected and only got one server anyways, there isn't much to do - if ( numAlts <= 1 && uplink->current.fd != -1 ) { - uplink->rttTestResult = RTT_DONTCHANGE; - continue; - } - dnbd3_image_t * const image = image_lock( uplink->image ); - if ( image == NULL ) { // Check again after locking - mutex_lock( &uplink->rttLock ); - uplink->rttTestResult = RTT_NOT_REACHABLE; - assert( pending[itLink] == uplink ); - pending[itLink] = NULL; - mutex_unlock( &uplink->rttLock ); - logadd( LOG_WARNING, "Image has gone away that was queued for RTT measurement" ); - continue; - } - LOG( LOG_DEBUG2, "[%d] Running alt check", itLink ); - assert( uplink->rttTestResult == RTT_INPROGRESS ); - if ( uplink->current.fd != -1 ) { - // Add current server if not already in list - found = false; - for (itAlt = 0; itAlt < numAlts; ++itAlt) { - if ( !isSameAddressPort( &uplink->current.host, &servers[itAlt] ) ) continue; - found = true; - break; - } - if ( !found ) servers[numAlts++] = uplink->current.host; - } - // Test them all - int bestSock = -1; - int bestIndex = -1; - int bestProtocolVersion = -1; - unsigned long bestRtt = RTT_UNREACHABLE; - unsigned long currentRtt = RTT_UNREACHABLE; - for (itAlt = 0; itAlt < numAlts; ++itAlt) { - usleep( 1000 ); // Wait a very short moment for the network to recover (we might be doing lots of measurements...) - // Connect - clock_gettime( BEST_CLOCK_SOURCE, &start ); - int sock = sock_connect( &servers[itAlt], 750, 1000 ); - if ( sock < 0 ) continue; - // Select image ++++++++++++++++++++++++++++++ - if ( !dnbd3_select_image( sock, image->name, image->rid, SI_SERVER_FLAGS ) ) { - goto server_failed; - } - // See if selecting the image succeeded ++++++++++++++++++++++++++++++ - uint16_t protocolVersion, rid; - uint64_t imageSize; - char *name; - if ( !dnbd3_select_image_reply( &serialized, sock, &protocolVersion, &name, &rid, &imageSize ) ) { - goto server_image_not_available; - } - if ( protocolVersion < MIN_SUPPORTED_SERVER ) goto server_failed; - if ( name == NULL || strcmp( name, image->name ) != 0 ) { - ERROR_GOTO( server_failed, "[RTT] Server offers image '%s'", name ); - } - if ( rid != image->rid ) { - ERROR_GOTO( server_failed, "[RTT] Server provides rid %d", (int)rid ); - } - if ( imageSize != image->virtualFilesize ) { - ERROR_GOTO( server_failed, "[RTT] Remote size: %" PRIu64 ", expected: %" PRIu64, imageSize, image->virtualFilesize ); - } - // Request first block (NOT random!) ++++++++++++++++++++++++++++++ - if ( !dnbd3_get_block( sock, 0, DNBD3_BLOCK_SIZE, 0, COND_HOPCOUNT( protocolVersion, 1 ) ) ) { - LOG_GOTO( server_failed, LOG_DEBUG1, "[RTT%d] Could not request first block", itLink ); - } - // See if requesting the block succeeded ++++++++++++++++++++++ - if ( !dnbd3_get_reply( sock, &reply ) ) { - LOG_GOTO( server_failed, LOG_DEBUG1, "[RTT%d] Received corrupted reply header after CMD_GET_BLOCK", itLink ); - } - // check reply header - if ( reply.cmd != CMD_GET_BLOCK || reply.size != DNBD3_BLOCK_SIZE ) { - ERROR_GOTO( server_failed, "[RTT] Reply to first block request is %" PRIu32 " bytes", reply.size ); - } - if ( recv( sock, buffer, DNBD3_BLOCK_SIZE, MSG_WAITALL ) != DNBD3_BLOCK_SIZE ) { - ERROR_GOTO( server_failed, "[RTT%d] Could not read first block payload", itLink ); - } - clock_gettime( BEST_CLOCK_SOURCE, &end ); - // Measurement done - everything fine so far - mutex_lock( &altServersLock ); - mutex_lock( &uplink->rttLock ); - const bool isCurrent = isSameAddressPort( &servers[itAlt], &uplink->current.host ); - // Penaltize rtt if this was a cycle; this will treat this server with lower priority - // in the near future too, so we prevent alternating between two servers that are both - // part of a cycle and have the lowest latency. - const unsigned int rtt = (unsigned int)((end.tv_sec - start.tv_sec) * 1000000 - + (end.tv_nsec - start.tv_nsec) / 1000 - + ( (isCurrent && uplink->cycleDetected) ? 1000000 : 0 )); // µs - unsigned int avg = altservers_updateRtt( &servers[itAlt], rtt ); - mutex_unlock( &altServersLock ); - // If a cycle was detected, or we lost connection to the current (last) server, penaltize it one time - if ( ( uplink->cycleDetected || uplink->current.fd == -1 ) && isCurrent ) avg = (avg * 2) + 50000; - mutex_unlock( &uplink->rttLock ); - if ( uplink->current.fd != -1 && isCurrent ) { - // Was measuring current server - currentRtt = avg; - close( sock ); - } else if ( avg < bestRtt ) { - // Was another server, update "best" - if ( bestSock != -1 ) close( bestSock ); - bestSock = sock; - bestRtt = avg; - bestIndex = itAlt; - bestProtocolVersion = protocolVersion; - } else { - // Was too slow, ignore - close( sock ); - } - // We're done, call continue - continue; - // Jump here if anything went wrong - // This will cleanup and continue - server_failed: ; - altservers_serverFailed( &servers[itAlt] ); - server_image_not_available: ; - close( sock ); - } - // Done testing all servers. See if we should switch - if ( bestSock != -1 && (uplink->current.fd == -1 || (bestRtt < 10000000 && RTT_THRESHOLD_FACTOR(currentRtt) > bestRtt)) ) { - // yep - if ( currentRtt > 10000000 || uplink->current.fd == -1 ) { - LOG( LOG_DEBUG1, "Change - best: %luµs, current: -", bestRtt ); - } else { - LOG( LOG_DEBUG1, "Change - best: %luµs, current: %luµs", bestRtt, currentRtt ); - } - sock_setTimeout( bestSock, _uplinkTimeout ); - mutex_lock( &uplink->rttLock ); - uplink->better.fd = bestSock; - uplink->better.host = servers[bestIndex]; - uplink->better.version = bestProtocolVersion; - uplink->rttTestResult = RTT_DOCHANGE; - mutex_unlock( &uplink->rttLock ); - signal_call( uplink->signal ); - } else if ( bestSock == -1 && currentRtt == RTT_UNREACHABLE ) { - // No server was reachable - mutex_lock( &uplink->rttLock ); - uplink->rttTestResult = RTT_NOT_REACHABLE; - mutex_unlock( &uplink->rttLock ); - } else { - // nope - if ( bestSock != -1 ) close( bestSock ); - mutex_lock( &uplink->rttLock ); - uplink->rttTestResult = RTT_DONTCHANGE; - uplink->cycleDetected = false; // It's a lie, but prevents rtt measurement triggering again right away - mutex_unlock( &uplink->rttLock ); - if ( !image->working ) { - image->working = true; - LOG( LOG_DEBUG1, "[%d] No better alt server found, enabling again", itLink ); - } - } - image_release( image ); - // end of loop over all pending uplinks - assert( pending[itLink] == uplink ); - pending[itLink] = NULL; - atomic_thread_fence( memory_order_release ); + // Select image ++++++++++++++++++++++++++++++ + if ( !dnbd3_select_image( sock, image->name, image->rid, SI_SERVER_FLAGS ) ) { + goto image_failed; } - // Save cache maps of all images if applicable - declare_now; - // TODO: Has nothing to do with alt servers really, maybe move somewhere else? - if ( _closeUnusedFd && timing_reached( &nextCloseUnusedFd, &now ) ) { - timing_gets( &nextCloseUnusedFd, 900 ); - image_closeUnusedFd(); + // See if selecting the image succeeded ++++++++++++++++++++++++++++++ + uint16_t protocolVersion, rid; + uint64_t imageSize; + char *name; + serialized_buffer_t serialized; + if ( !dnbd3_select_image_reply( &serialized, sock, &protocolVersion, &name, &rid, &imageSize ) ) { + goto image_failed; } + if ( protocolVersion < MIN_SUPPORTED_SERVER ) { // Server version unsupported; global fail + goto server_failed; + } + if ( name == NULL || strcmp( name, image->name ) != 0 ) { + ERROR_GOTO( image_failed, "[RTT] Server offers image '%s' instead of '%s'", name, image->name ); + } + if ( rid != image->rid ) { + ERROR_GOTO( image_failed, "[RTT] Server provides rid %d instead of %d", (int)rid, (int)image->rid ); + } + if ( imageSize != image->virtualFilesize ) { + ERROR_GOTO( image_failed, "[RTT] Remote size: %" PRIu64 ", expected: %" PRIu64, imageSize, image->virtualFilesize ); + } + // Request first block (NOT random!) ++++++++++++++++++++++++++++++ + if ( !dnbd3_get_block( sock, 0, DNBD3_BLOCK_SIZE, 0, COND_HOPCOUNT( protocolVersion, 1 ) ) ) { + LOG_GOTO( image_failed, LOG_DEBUG1, "[RTT%d] Could not request first block", server ); + } + // See if requesting the block succeeded ++++++++++++++++++++++ + dnbd3_reply_t reply; + if ( !dnbd3_get_reply( sock, &reply ) ) { + LOG_GOTO( image_failed, LOG_DEBUG1, "[RTT%d] Received corrupted reply header after CMD_GET_BLOCK", server ); + } + // check reply header + if ( reply.cmd != CMD_GET_BLOCK || reply.size != DNBD3_BLOCK_SIZE ) { + // Sanity check failed; count this as global error (malicious/broken server) + ERROR_GOTO( server_failed, "[RTT] Reply to first block request is %" PRIu32 " bytes", reply.size ); + } + // flush payload to include this into measurement + char buffer[DNBD3_BLOCK_SIZE]; + if ( recv( sock, buffer, DNBD3_BLOCK_SIZE, MSG_WAITALL ) != DNBD3_BLOCK_SIZE ) { + ERROR_GOTO( image_failed, "[RTT%d] Could not read first block payload", server ); + } + clock_gettime( BEST_CLOCK_SOURCE, &end ); + // Measurement done - everything fine so far + mutex_lock( &uplink->rttLock ); + const bool isCurrent = ( uplink->current.index == server ); + mutex_unlock( &uplink->rttLock ); + // Penaltize rtt if this was a cycle; this will treat this server with lower priority + // in the near future too, so we prevent alternating between two servers that are both + // part of a cycle and have the lowest latency. + uint32_t rtt = (uint32_t)((end.tv_sec - start.tv_sec) * 1000000 + + (end.tv_nsec - start.tv_nsec) / 1000); // µs + uint32_t avg = altservers_updateRtt( uplink, server, rtt ); + // If a cycle was detected, or we lost connection to the current (last) server, penaltize it one time + if ( ( uplink->cycleDetected || panic ) && isCurrent ) { + avg = (avg * 2) + 50000; + } + if ( !panic && isCurrent ) { + // Was measuring current server + currentRtt = avg; + close( sock ); + } else if ( avg < bestRtt ) { + // Was another server, update "best" + if ( best.fd != -1 ) { + close( best.fd ); + } + best.fd = sock; + bestRtt = avg; + best.index = server; + best.version = protocolVersion; + } else { + // Was too slow, ignore + close( sock ); + } + // We're done, call continue + continue; + // Jump here if anything went wrong + // This will cleanup and continue +image_failed: + altservers_imageFailed( uplink, server ); + goto failed; +server_failed: + altservers_serverFailed( server ); +failed: + close( sock ); } - cleanup: ; - if ( runSignal != NULL ) { - signal_close( runSignal ); + // Done testing all servers. See if we should switch + if ( best.fd != -1 && (panic || (bestRtt < 10000000 && RTT_THRESHOLD_FACTOR(currentRtt) > bestRtt)) ) { + // yep + if ( currentRtt > 10000000 || panic ) { + LOG( LOG_DEBUG1, "Change - best: %luµs, current: -", bestRtt ); + } else { + LOG( LOG_DEBUG1, "Change - best: %luµs, current: %luµs", bestRtt, currentRtt ); + } + sock_setTimeout( best.fd, _uplinkTimeout ); + mutex_lock( &uplink->rttLock ); + uplink->better = best; + uplink->rttTestResult = RTT_DOCHANGE; + mutex_unlock( &uplink->rttLock ); + signal_call( uplink->signal ); + } else if ( best.fd == -1 && currentRtt == RTT_UNREACHABLE ) { + // No server was reachable, including current + uplink->rttTestResult = RTT_NOT_REACHABLE; + } else { + // nope + if ( best.fd != -1 ) { + close( best.fd ); + } + if ( !image->working || uplink->cycleDetected ) { + image->working = true; + LOG( LOG_DEBUG1, "[RTT] No better alt server found, enabling '%s:%d' again... :-(", image->name, (int)image->rid ); + } + uplink->cycleDetected = false; // It's a lie, but prevents rtt measurement triggering again right away + mutex_lock( &uplink->rttLock ); + uplink->rttTestResult = RTT_DONTCHANGE; + mutex_unlock( &uplink->rttLock ); } - runSignal = NULL; - return NULL ; + image_release( image ); } diff --git a/src/server/altservers.h b/src/server/altservers.h index e03b900..8e2b964 100644 --- a/src/server/altservers.h +++ b/src/server/altservers.h @@ -7,23 +7,27 @@ struct json_t; void altservers_init(); -void altservers_shutdown(); - int altservers_load(); bool altservers_add(dnbd3_host_t *host, const char *comment, const int isPrivate, const int isClientOnly); -void altservers_findUplink(dnbd3_uplink_t *uplink); +void altservers_findUplinkAsync(dnbd3_uplink_t *uplink); -void altservers_removeUplink(dnbd3_uplink_t *uplink); +void altservers_findUplink(dnbd3_uplink_t *uplink); int altservers_getListForClient(dnbd3_host_t *host, dnbd3_server_entry_t *output, int size); -int altservers_getListForUplink(dnbd3_host_t *output, int size, int emergency); +int altservers_getHostListForReplication(dnbd3_host_t *servers, int size); + +bool altservers_toString(int server, char *buffer, size_t len); int altservers_netCloseness(dnbd3_host_t *host1, dnbd3_host_t *host2); -void altservers_serverFailed(const dnbd3_host_t * const host); +void altservers_serverFailed(int server); + +int altservers_hostToIndex(dnbd3_host_t *host); + +const dnbd3_host_t* altservers_indexToHost(int server); struct json_t* altservers_toJson(); diff --git a/src/server/globals.h b/src/server/globals.h index 659e5a2..4d97c6b 100644 --- a/src/server/globals.h +++ b/src/server/globals.h @@ -30,10 +30,31 @@ typedef struct uint8_t hopCount; // How many hops this request has already taken across proxies } dnbd3_queued_request_t; +typedef struct +{ + int fails; // Hard fail: Connection failed + int rttIndex; + uint32_t rtt[SERVER_RTT_PROBES]; + bool isPrivate, isClientOnly; + bool blocked; // If true count down fails until 0 to enable again + ticks lastFail; // Last hard fail + dnbd3_host_t host; + char comment[COMMENT_LENGTH]; +} dnbd3_alt_server_t; + +typedef struct +{ + int fails; // Soft fail: Image not found + int rttIndex; + uint32_t rtt[SERVER_RTT_PROBES]; + bool blocked; // True if server is to be ignored and fails should be counted down + bool initDone; +} dnbd3_alt_local_t; + typedef struct { - int fd; // Socket fd for this connection - int version; // Protocol version of remote server - dnbd3_host_t host; // IP/Port of remote server + int fd; // Socket fd for this connection + int version; // Protocol version of remote server + int index; // Entry in uplinks list } dnbd3_server_connection_t; #define RTT_IDLE 0 // Not in progress @@ -51,7 +72,7 @@ struct _dnbd3_uplink pthread_mutex_t queueLock; // lock for synchronization on request queue etc. dnbd3_image_t *image; // image that this uplink is used for; do not call get/release for this pointer pthread_mutex_t rttLock; // When accessing rttTestResult, betterFd or betterServer - int rttTestResult; // RTT_* + atomic_int rttTestResult; // RTT_* int cacheFd; // used to write to the image, in case it is relayed. ONLY USE FROM UPLINK THREAD! uint8_t *recvBuffer; // Buffer for receiving payload uint32_t recvBufferLen; // Len of ^^ @@ -65,19 +86,9 @@ struct _dnbd3_uplink atomic_int queueLen; // length of queue uint32_t idleTime; // How many seconds the uplink was idle (apart from keep-alives) dnbd3_queued_request_t queue[SERVER_MAX_UPLINK_QUEUE]; + dnbd3_alt_local_t altData[SERVER_MAX_ALTS]; }; -typedef struct -{ - char comment[COMMENT_LENGTH]; - dnbd3_host_t host; - unsigned int rtt[SERVER_RTT_PROBES]; - unsigned int rttIndex; - bool isPrivate, isClientOnly; - ticks lastFail; - int numFails; -} dnbd3_alt_server_t; - typedef struct { uint8_t host[16]; diff --git a/src/server/image.c b/src/server/image.c index d250715..1a6e0f8 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -1178,7 +1178,7 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision, dnbd3_host_t servers[REP_NUM_SRV]; int uplinkSock = -1; dnbd3_host_t uplinkServer; - const int count = altservers_getListForUplink( servers, REP_NUM_SRV, false ); + const int count = altservers_getHostListForReplication( servers, REP_NUM_SRV ); uint16_t remoteProtocolVersion; uint16_t remoteRid = revision; uint64_t remoteImageSize; @@ -1491,7 +1491,7 @@ json_t* image_getListAsJson() json_t *imagesJson = json_array(); json_t *jsonImage; int i; - char uplinkName[100] = { 0 }; + char uplinkName[100]; uint64_t bytesReceived; int completeness, idleTime; declare_now; @@ -1508,7 +1508,7 @@ json_t* image_getListAsJson() uplinkName[0] = '\0'; } else { bytesReceived = image->uplink->bytesReceived; - if ( image->uplink->current.fd == -1 || !host_to_string( &image->uplink->current.host, uplinkName, sizeof(uplinkName) ) ) { + if ( !uplink_getHostString( image->uplink, uplinkName, sizeof(uplinkName) ) ) { uplinkName[0] = '\0'; } } diff --git a/src/server/net.c b/src/server/net.c index 7f3c1ce..4976eea 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -669,11 +669,19 @@ static void removeFromList(dnbd3_client_t *client) { int i; mutex_lock( &_clients_lock ); - for ( i = _num_clients - 1; i >= 0; --i ) { - if ( _clients[i] == client ) { - _clients[i] = NULL; + if ( _num_clients != 0 ) { + for ( i = _num_clients - 1; i >= 0; --i ) { + if ( _clients[i] == client ) { + _clients[i] = NULL; + break; + } + } + if ( i != 0 && i + 1 == _num_clients ) { + do { + i--; + } while ( _clients[i] == NULL && i > 0 ); + _num_clients = i + 1; } - if ( _clients[i] == NULL && i + 1 == _num_clients ) --_num_clients; } mutex_unlock( &_clients_lock ); } diff --git a/src/server/server.c b/src/server/server.c index 838aec2..640048a 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -121,9 +121,6 @@ void dnbd3_cleanup() // Disable threadpool threadpool_close(); - // Terminate the altserver checking thread - altservers_shutdown(); - // Terminate all uplinks image_killUplinks(); @@ -198,6 +195,11 @@ int main(int argc, char *argv[]) case LONGOPT_CRC4: return image_generateCrcFile( optarg ) ? 0 : EXIT_FAILURE; case LONGOPT_ASSERT: + printf( "Testing use after free:\n" ); + volatile char * volatile test = malloc( 10 ); + test[0] = 1; + free( test ); + test[1] = 2; printf( "Testing a failing assertion:\n" ); assert( 4 == 5 ); printf( "Assertion 4 == 5 seems to hold. ;-)\n" ); diff --git a/src/server/uplink.c b/src/server/uplink.c index e21e28c..6c85580 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -96,17 +96,18 @@ bool uplink_init(dnbd3_image_t *image, int sock, dnbd3_host_t *host, int version uplink->bytesReceived = 0; uplink->idleTime = 0; uplink->queueLen = 0; - mutex_lock( &uplink->sendMutex ); - uplink->current.fd = -1; - mutex_unlock( &uplink->sendMutex ); uplink->cacheFd = -1; uplink->signal = NULL; uplink->replicationHandle = REP_NONE; mutex_lock( &uplink->rttLock ); + mutex_lock( &uplink->sendMutex ); + uplink->current.fd = -1; + mutex_unlock( &uplink->sendMutex ); uplink->cycleDetected = false; - if ( sock >= 0 ) { + if ( sock != -1 ) { uplink->better.fd = sock; - uplink->better.host = *host; + int index = altservers_hostToIndex( host ); + uplink->better.index = index == -1 ? 0 : index; // Prevent invalid array access uplink->rttTestResult = RTT_DOCHANGE; uplink->better.version = version; } else { @@ -116,7 +117,7 @@ bool uplink_init(dnbd3_image_t *image, int sock, dnbd3_host_t *host, int version mutex_unlock( &uplink->rttLock ); uplink->recvBufferLen = 0; uplink->shutdown = false; - if ( 0 != thread_create( &(uplink->thread), NULL, &uplink_mainloop, (void *)link ) ) { + if ( 0 != thread_create( &(uplink->thread), NULL, &uplink_mainloop, (void *)uplink ) ) { logadd( LOG_ERROR, "Could not start thread for new uplink." ); goto failure; } @@ -148,8 +149,8 @@ void uplink_shutdown(dnbd3_image_t *image) } dnbd3_uplink_t * const uplink = image->uplink; mutex_lock( &uplink->queueLock ); - if ( !uplink->shutdown ) { - uplink->shutdown = true; + bool exp = false; + if ( atomic_compare_exchange_strong( &uplink->shutdown, &exp, true ) ) { signal_call( uplink->signal ); thread = uplink->thread; join = true; @@ -211,13 +212,11 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin } // Check if the client is the same host as the uplink. If so assume this is a circular proxy chain // This might be a false positive if there are multiple instances running on the same host (IP) - if ( hops != 0 && isSameAddress( &uplink->current.host, &client->host ) ) { - mutex_unlock( &client->image->lock ); - logadd( LOG_WARNING, "Proxy cycle detected (same host)." ); - mutex_lock( &uplink->rttLock ); + if ( hops != 0 && isSameAddress( altservers_indexToHost( uplink->current.index ), &client->host ) ) { uplink->cycleDetected = true; - mutex_unlock( &uplink->rttLock ); signal_call( uplink->signal ); + mutex_unlock( &client->image->lock ); + logadd( LOG_WARNING, "Proxy cycle detected (same host)." ); return false; } @@ -256,12 +255,10 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin } } if ( unlikely( requestLoop ) ) { - mutex_unlock( &uplink->queueLock ); - logadd( LOG_WARNING, "Rejecting relay of request to upstream proxy because of possible cyclic proxy chain. Incoming hop-count is %" PRIu8 ".", hops ); - mutex_lock( &uplink->rttLock ); uplink->cycleDetected = true; - mutex_unlock( &uplink->rttLock ); signal_call( uplink->signal ); + mutex_unlock( &uplink->queueLock ); + logadd( LOG_WARNING, "Rejecting relay of request to upstream proxy because of possible cyclic proxy chain. Incoming hop-count is %" PRIu8 ".", hops ); return false; } if ( freeSlot < firstUsedSlot && firstUsedSlot < 10 && existingType != ULR_PROCESSING ) { @@ -311,6 +308,8 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin if ( foundExisting != -1 ) return true; // Attached to pending request, do nothing + usleep( 10000 ); + // See if we can fire away the request if ( mutex_trylock( &uplink->sendMutex ) != 0 ) { logadd( LOG_DEBUG2, "Could not trylock send mutex, queueing uplink request" ); @@ -342,7 +341,7 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin if ( state == -1 ) { logadd( LOG_DEBUG2, "Direct uplink request queue entry gone after sending and re-locking queue. *shrug*" ); } else if ( state == ULR_NEW ) { - logadd( LOG_DEBUG2, "Succesful direct uplink request" ); + //logadd( LOG_DEBUG2, "Direct uplink request" ); } else { logadd( LOG_DEBUG2, "Direct uplink request queue entry changed to %s afte sending (expected ULR_NEW).", NAMES_ULR[uplink->queue[freeSlot].status] ); } @@ -352,10 +351,8 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin } } - if ( foundExisting == -1 ) { // Only wake up uplink thread if the request needs to be relayed - if ( signal_call( uplink->signal ) == SIGNAL_ERROR ) { - logadd( LOG_WARNING, "Cannot wake up uplink thread; errno=%d", (int)errno ); - } + if ( signal_call( uplink->signal ) == SIGNAL_ERROR ) { + logadd( LOG_WARNING, "Cannot wake up uplink thread; errno=%d", (int)errno ); } return true; } @@ -443,7 +440,7 @@ static void* uplink_mainloop(void *data) uplink->image->working = true; uplink->replicatedLastBlock = false; // Reset this to be safe - request could've been sent but reply was never received buffer[0] = '@'; - if ( host_to_string( &uplink->current.host, buffer + 1, sizeof(buffer) - 1 ) ) { + if ( altservers_toString( uplink->current.index, buffer + 1, sizeof(buffer) - 1 ) ) { logadd( LOG_DEBUG1, "(Uplink %s) Now connected to %s\n", uplink->image->name, buffer + 1 ); setThreadName( buffer ); } @@ -525,9 +522,7 @@ static void* uplink_mainloop(void *data) } } // See if we should trigger an RTT measurement - mutex_lock( &uplink->rttLock ); - const int rttTestResult = uplink->rttTestResult; - mutex_unlock( &uplink->rttLock ); + int rttTestResult = uplink->rttTestResult; if ( rttTestResult == RTT_IDLE || rttTestResult == RTT_DONTCHANGE ) { if ( timing_reached( &nextAltCheck, &now ) || ( uplink->current.fd == -1 && !uplink_connectionShouldShutdown( uplink ) ) || uplink->cycleDetected ) { // It seems it's time for a check @@ -538,7 +533,7 @@ static void* uplink_mainloop(void *data) goto cleanup; } else if ( !uplink_connectionShouldShutdown( uplink ) ) { // Not complete - do measurement - altservers_findUplink( uplink ); // This will set RTT_INPROGRESS (synchronous) + altservers_findUplinkAsync( uplink ); // This will set RTT_INPROGRESS (synchronous) if ( _backgroundReplication == BGR_FULL && uplink->nextReplicationIndex == -1 ) { uplink->nextReplicationIndex = 0; } @@ -547,11 +542,9 @@ static void* uplink_mainloop(void *data) timing_set( &nextAltCheck, &now, altCheckInterval ); } } else if ( rttTestResult == RTT_NOT_REACHABLE ) { - mutex_lock( &uplink->rttLock ); - uplink->rttTestResult = RTT_IDLE; - mutex_unlock( &uplink->rttLock ); + atomic_compare_exchange_strong( &uplink->rttTestResult, &rttTestResult, RTT_IDLE ); discoverFailCount++; - timing_set( &nextAltCheck, &now, (discoverFailCount < SERVER_RTT_BACKOFF_COUNT ? altCheckInterval : SERVER_RTT_INTERVAL_FAILED) ); + timing_set( &nextAltCheck, &now, (discoverFailCount < SERVER_RTT_MAX_UNREACH ? altCheckInterval : SERVER_RTT_INTERVAL_FAILED) ); } #ifdef _DEBUG if ( uplink->current.fd != -1 && !uplink->shutdown ) { @@ -581,36 +574,38 @@ static void* uplink_mainloop(void *data) #endif } cleanup: ; - if ( !uplink->shutdown ) { - uplink->shutdown = true; + // Detach depends on whether someone is joining this thread... + bool exp = false; + if ( atomic_compare_exchange_strong( &uplink->shutdown, &exp, true ) ) { thread_detach( uplink->thread ); } - altservers_removeUplink( uplink ); uplink_saveCacheMap( uplink ); - mutex_lock( &uplink->image->lock ); - if ( uplink->image->uplink == uplink ) { - uplink->image->uplink = NULL; + dnbd3_image_t *image = uplink->image; + mutex_lock( &image->lock ); + // in the list anymore, but we want to prevent it from being freed in either case + if ( image->uplink == uplink ) { + image->uplink = NULL; } + mutex_unlock( &image->lock ); // Do NOT use image without locking it mutex_lock( &uplink->queueLock ); - const int fd = uplink->current.fd; - const dnbd3_signal_t* signal = uplink->signal; - mutex_lock( &uplink->sendMutex ); - uplink->current.fd = -1; - mutex_unlock( &uplink->sendMutex ); - uplink->signal = NULL; - // Do not access uplink->image after unlocking, since we set - // image->uplink to NULL. Acquire with image_lock first, - // like done below when checking whether to re-init uplink - mutex_unlock( &uplink->image->lock ); - mutex_unlock( &uplink->queueLock ); - if ( fd != -1 ) close( fd ); - if ( signal != NULL ) signal_close( signal ); - // Wait for the RTT check to finish/fail if it's in progress - while ( uplink->rttTestResult == RTT_INPROGRESS ) + // Wait for active RTT measurement to finish + while ( uplink->rttTestResult == RTT_INPROGRESS ) { usleep( 10000 ); + } + signal_close( uplink->signal ); + mutex_lock( &uplink->rttLock ); + mutex_lock( &uplink->sendMutex ); + if ( uplink->current.fd != -1 ) { + close( uplink->current.fd ); + uplink->current.fd = -1; + } if ( uplink->better.fd != -1 ) { close( uplink->better.fd ); + uplink->better.fd = -1; } + mutex_unlock( &uplink->sendMutex ); + mutex_unlock( &uplink->rttLock ); + mutex_unlock( &uplink->queueLock ); mutex_destroy( &uplink->queueLock ); mutex_destroy( &uplink->rttLock ); mutex_destroy( &uplink->sendMutex ); @@ -619,9 +614,9 @@ static void* uplink_mainloop(void *data) if ( uplink->cacheFd != -1 ) { close( uplink->cacheFd ); } - dnbd3_image_t *image = image_lock( uplink->image ); free( uplink ); // !!! - if ( image != NULL ) { + if ( image_lock( image ) != NULL ) { + // Image is still in list... if ( !_shutdown && image->cache_map != NULL ) { // Ingegrity checker must have found something in the meantime uplink_init( image, -1, NULL, 0 ); @@ -656,7 +651,7 @@ static void uplink_sendRequests(dnbd3_uplink_t *uplink, bool newOnly) // the thread will re-send this request as soon as the connection // is reestablished. logadd( LOG_DEBUG1, "Error forwarding request to uplink server!\n" ); - altservers_serverFailed( &uplink->current.host ); + altservers_serverFailed( uplink->current.index ); return; } mutex_lock( &uplink->queueLock ); @@ -973,7 +968,7 @@ static void uplink_connectionFailed(dnbd3_uplink_t *uplink, bool findNew) { if ( uplink->current.fd == -1 ) return; - altservers_serverFailed( &uplink->current.host ); + altservers_serverFailed( uplink->current.index ); mutex_lock( &uplink->sendMutex ); close( uplink->current.fd ); uplink->current.fd = -1; @@ -1138,3 +1133,13 @@ static bool uplink_connectionShouldShutdown(dnbd3_uplink_t *uplink) && ( _backgroundReplication != BGR_FULL || _bgrMinClients > uplink->image->users ) ); } +bool uplink_getHostString(dnbd3_uplink_t *uplink, char *buffer, size_t len) +{ + int current; + mutex_lock( &uplink->rttLock ); + current = uplink->current.fd == -1 ? -1 : uplink->current.index; + mutex_unlock( &uplink->rttLock ); + if ( current == -1 ) + return false; + return altservers_toString( current, buffer, len ); +} diff --git a/src/server/uplink.h b/src/server/uplink.h index 4fd41b0..acc8e11 100644 --- a/src/server/uplink.h +++ b/src/server/uplink.h @@ -16,4 +16,6 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin void uplink_shutdown(dnbd3_image_t *image); +bool uplink_getHostString(dnbd3_uplink_t *uplink, char *buffer, size_t len); + #endif /* UPLINK_H_ */ diff --git a/src/serverconfig.h b/src/serverconfig.h index 0cbb320..239f0a2 100644 --- a/src/serverconfig.h +++ b/src/serverconfig.h @@ -6,10 +6,12 @@ // +++++ Performance/memory related #define SERVER_MAX_CLIENTS 4000 #define SERVER_MAX_IMAGES 5000 -#define SERVER_MAX_ALTS 100 +#define SERVER_MAX_ALTS 50 // +++++ Uplink handling (proxy mode) -#define SERVER_UPLINK_FAIL_INCREASE 5 // On server failure, increase numFails by this value -#define SERVER_BAD_UPLINK_THRES 40 // Thresold for numFails at which we ignore a server for the time span below +#define SERVER_GLOBAL_DUP_TIME 6 // How many seconds to wait before changing global fail counter again +#define SERVER_BAD_UPLINK_MIN 10 // Thresold for fails at which we start ignoring the server occasionally +#define SERVER_BAD_UPLINK_MAX 20 // Hard block server if it failed this many times +#define SERVER_BAD_UPLINK_LOCAL_BLOCK 10 // If a server didn't supply the requested image this many times, block it for some time #define SERVER_BAD_UPLINK_IGNORE 180 // How many seconds is a server ignored #define SERVER_MAX_UPLINK_QUEUE 1500 // Maximum number of queued requests per uplink #define SERVER_UPLINK_QUEUELEN_THRES 900 // Threshold where we start dropping incoming clients @@ -33,7 +35,7 @@ #define SERVER_RTT_PROBES 5 // How many probes to average over #define SERVER_RTT_INTERVAL_INIT 5 // Initial interval between probes #define SERVER_RTT_INTERVAL_MAX 45 // Maximum interval between probes -#define SERVER_RTT_BACKOFF_COUNT 5 // If we can't reach any uplink server this many times, consider the uplink bad +#define SERVER_RTT_MAX_UNREACH 10 // If no server was reachable this many times, stop RTT measurements for a while #define SERVER_RTT_INTERVAL_FAILED 180 // Interval to use if no uplink server is reachable for above many times #define SERVER_REMOTE_IMAGE_CHECK_CACHETIME 120 // 2 minutes -- cgit v1.2.3-55-g7522 From e86ee9ba6a0b5299e835a51f62fe5979fc36788c Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Mon, 26 Aug 2019 12:00:00 +0200 Subject: [SERVER] Fix warnings, simplify locking --- src/server/server.c | 2 +- src/server/uplink.c | 23 ++++++++++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'src/server/server.c') diff --git a/src/server/server.c b/src/server/server.c index 640048a..922740a 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -198,7 +198,7 @@ int main(int argc, char *argv[]) printf( "Testing use after free:\n" ); volatile char * volatile test = malloc( 10 ); test[0] = 1; - free( test ); + free( (void*)test ); test[1] = 2; printf( "Testing a failing assertion:\n" ); assert( 4 == 5 ); diff --git a/src/server/uplink.c b/src/server/uplink.c index 6c85580..abfebf0 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -370,6 +370,7 @@ static void* uplink_mainloop(void *data) dnbd3_uplink_t * const uplink = (dnbd3_uplink_t*)data; int numSocks, i, waitTime; int altCheckInterval = SERVER_RTT_INTERVAL_INIT; + int rttTestResult; uint32_t discoverFailCount = 0; uint32_t unsavedSeconds = 0; ticks nextAltCheck, lastKeepalive; @@ -397,11 +398,9 @@ static void* uplink_mainloop(void *data) events[EV_SOCKET].fd = -1; while ( !_shutdown && !uplink->shutdown ) { // poll() - mutex_lock( &uplink->rttLock ); waitTime = uplink->rttTestResult == RTT_DOCHANGE ? 0 : -1; - mutex_unlock( &uplink->rttLock ); if ( waitTime == 0 ) { - // Nothing + // 0 means poll, since we're about to change the server } else if ( uplink->current.fd == -1 && !uplink_connectionShouldShutdown( uplink ) ) { waitTime = 1000; } else { @@ -420,10 +419,9 @@ static void* uplink_mainloop(void *data) continue; } // Check if server switch is in order - mutex_lock( &uplink->rttLock ); - if ( uplink->rttTestResult != RTT_DOCHANGE ) { - mutex_unlock( &uplink->rttLock ); - } else { + if ( unlikely( uplink->rttTestResult == RTT_DOCHANGE ) ) { + mutex_lock( &uplink->rttLock ); + assert( uplink->rttTestResult == RTT_DOCHANGE ); uplink->rttTestResult = RTT_IDLE; // The rttTest worker thread has finished our request. // And says it's better to switch to another server @@ -476,7 +474,7 @@ static void* uplink_mainloop(void *data) // Uplink socket if ( (events[EV_SOCKET].revents & (POLLERR | POLLHUP | POLLRDHUP | POLLNVAL)) ) { uplink_connectionFailed( uplink, true ); - logadd( LOG_DEBUG1, "Uplink gone away, panic!\n" ); + logadd( LOG_DEBUG1, "Uplink gone away, panic! (revents=%d)\n", (int)events[EV_SOCKET].revents ); setThreadName( "panic-uplink" ); } else if ( (events[EV_SOCKET].revents & POLLIN) ) { uplink_handleReceive( uplink ); @@ -509,7 +507,7 @@ static void* uplink_mainloop(void *data) if ( uplink->current.fd != -1 && uplink_connectionShouldShutdown( uplink ) ) { mutex_lock( &uplink->sendMutex ); close( uplink->current.fd ); - uplink->current.fd = events[EV_SOCKET].fd = -1; + uplink->current.fd = -1; mutex_unlock( &uplink->sendMutex ); uplink->cycleDetected = false; if ( uplink->recvBufferLen != 0 ) { @@ -522,7 +520,7 @@ static void* uplink_mainloop(void *data) } } // See if we should trigger an RTT measurement - int rttTestResult = uplink->rttTestResult; + rttTestResult = uplink->rttTestResult; if ( rttTestResult == RTT_IDLE || rttTestResult == RTT_DONTCHANGE ) { if ( timing_reached( &nextAltCheck, &now ) || ( uplink->current.fd == -1 && !uplink_connectionShouldShutdown( uplink ) ) || uplink->cycleDetected ) { // It seems it's time for a check @@ -964,6 +962,9 @@ static void uplink_handleReceive(dnbd3_uplink_t *uplink) uplink_connectionFailed( uplink, true ); } +/** + * Only call from uplink thread + */ static void uplink_connectionFailed(dnbd3_uplink_t *uplink, bool findNew) { if ( uplink->current.fd == -1 ) @@ -984,7 +985,7 @@ static void uplink_connectionFailed(dnbd3_uplink_t *uplink, bool findNew) mutex_unlock( &uplink->rttLock ); if ( bail ) return; - altservers_findUplink( uplink ); + altservers_findUplinkAsync( uplink ); } /** -- cgit v1.2.3-55-g7522 From ff228ee519c27bb80234e6eadbf9cbe1adda4318 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Tue, 27 Aug 2019 23:20:20 +0200 Subject: [SERVER] Add timer infrastructure To be used by "close unused fd" checks, and others... --- src/server/server.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++---- src/server/server.h | 2 +- 2 files changed, 134 insertions(+), 10 deletions(-) (limited to 'src/server/server.c') diff --git a/src/server/server.c b/src/server/server.c index 922740a..1cdd2ab 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -45,6 +45,25 @@ #define LONGOPT_SIZE 1004 #define LONGOPT_ERRORMSG 1005 +typedef struct _job job_t; + +struct _job { + job_t *next; + void *(*startRoutine)(void *); + void *arg; + ticks dueDate; + int intervalSecs; +}; + +static job_t *jobHead; +static _Atomic(job_t *) newJob; +static bool hasTimerThread = false; +static pthread_t timerThread; + +static pthread_t mainThread; + +#define DEFAULT_TIMER_TIMEOUT (60) + static poll_list_t *listeners = NULL; /** @@ -71,6 +90,12 @@ static void dnbd3_handleSignal2(int signum, siginfo_t *info, void *data); static void* server_asyncImageListLoad(void *data); +static void* timerMainloop(void*); + +static int handlePendingJobs(void); + +static void queueJobInternal(job_t *job); + /** * Print help text for usage instructions */ @@ -105,14 +130,21 @@ void dnbd3_printVersion() /** * Clean up structs, connections, write out data, then exit */ -void dnbd3_cleanup() +_Noreturn static void dnbd3_cleanup() { int retries; _shutdown = true; logadd( LOG_INFO, "Cleanup..." ); - if ( listeners != NULL ) sock_destroyPollList( listeners ); + if ( hasTimerThread ) { + pthread_kill( timerThread, SIGHUP ); + thread_join( timerThread, NULL ); + } + + if ( listeners != NULL ) { + sock_destroyPollList( listeners ); + } listeners = NULL; // Kill connection to all clients @@ -172,6 +204,7 @@ int main(int argc, char *argv[]) { 0, 0, 0, 0 } }; + mainThread = pthread_self(); opt = getopt_long( argc, argv, optString, longOpts, &longIndex ); while ( opt != -1 ) { @@ -195,8 +228,12 @@ int main(int argc, char *argv[]) case LONGOPT_CRC4: return image_generateCrcFile( optarg ) ? 0 : EXIT_FAILURE; case LONGOPT_ASSERT: + printf( "Now leaking memory:\n" ); + char *bla = malloc( 10 ); + bla[2] = 3; + bla = NULL; printf( "Testing use after free:\n" ); - volatile char * volatile test = malloc( 10 ); + char *test = malloc( 10 ); test[0] = 1; free( (void*)test ); test[1] = 2; @@ -303,11 +340,10 @@ int main(int argc, char *argv[]) } // setup signal handler - struct sigaction sa; - memset( &sa, 0, sizeof(sa) ); - sa.sa_sigaction = dnbd3_handleSignal2; - sa.sa_flags = SA_SIGINFO; - //sa.sa_mask = ; + struct sigaction sa = { + .sa_sigaction = dnbd3_handleSignal2, + .sa_flags = SA_SIGINFO, + }; sigaction( SIGTERM, &sa, NULL ); sigaction( SIGINT, &sa, NULL ); sigaction( SIGUSR1, &sa, NULL ); @@ -342,6 +378,10 @@ int main(int argc, char *argv[]) logadd( LOG_INFO, "Server is ready. (%s)", VERSION_STRING ); + if ( thread_create( &timerThread, NULL, &timerMainloop, NULL ) == 0 ) { + hasTimerThread = true; + } + // +++++++++++++++++++++++++++++++++++++++++++++++++++ main loop struct sockaddr_storage client; socklen_t len; @@ -365,7 +405,7 @@ int main(int argc, char *argv[]) // len = sizeof(client); fd = sock_accept( listeners, &client, &len ); - if ( fd < 0 ) { + if ( fd == -1 ) { const int err = errno; if ( err == EINTR || err == EAGAIN ) continue; logadd( LOG_ERROR, "Client accept failure (err=%d)", err ); @@ -469,6 +509,8 @@ static void dnbd3_handleSignal(int signum) static void dnbd3_handleSignal2(int signum, siginfo_t *info, void *data UNUSED) { + if ( !pthread_equal( pthread_self(), mainThread ) ) + return; memcpy( &lastSignal, info, sizeof(siginfo_t) ); dnbd3_handleSignal( signum ); } @@ -488,3 +530,85 @@ static void* server_asyncImageListLoad(void *data UNUSED) return NULL; } +static void* timerMainloop(void* stuff UNUSED) +{ + setThreadName( "timer" ); + while ( !_shutdown ) { + // Handle jobs/timer events; returns timeout until next event + int to = handlePendingJobs(); + sleep( MIN( MAX( 1, to ), DEFAULT_TIMER_TIMEOUT ) ); + } + logadd( LOG_DEBUG1, "Timer thread done" ); + return NULL; +} + +static int handlePendingJobs(void) +{ + declare_now; + job_t *todo, **temp, *old; + int diff; + todo = jobHead; + for ( temp = &todo; *temp != NULL; temp = &(*temp)->next ) { + diff = (int)timing_diff( &now, &(*temp)->dueDate ); + if ( diff > 0 ) // Found one that's in the future + break; + } + jobHead = *temp; // Make it list head + *temp = NULL; // Split off part before that + while ( todo != NULL ) { + threadpool_run( todo->startRoutine, todo->arg ); + old = todo; + todo = todo->next; + if ( old->intervalSecs == 0 ) { + free( old ); // oneshot + } else { + timing_set( &old->dueDate, &now, old->intervalSecs ); + queueJobInternal( old ); // repeated + } + } + // See if any new jobs have been queued + while ( newJob != NULL ) { + todo = newJob; + // NULL should never happen since we're the only consumer + assert( todo != NULL ); + if ( !atomic_compare_exchange_weak( &newJob, &todo, NULL ) ) + continue; + do { + old = todo; + todo = todo->next; + queueJobInternal( old ); + } while ( todo != NULL ); + } + // Return new timeout + if ( jobHead == NULL ) + return DEFAULT_TIMER_TIMEOUT; + return (int)timing_diff( &now, &jobHead->dueDate ); +} + +static void queueJobInternal(job_t *job) +{ + assert( job != NULL ); + job_t **it; + for ( it = &jobHead; *it != NULL; it = &(*it)->next ) { + if ( timing_1le2( &job->dueDate, &(*it)->dueDate ) ) + break; + } + job->next = *it; + *it = job; +} + +void server_addJob(void *(*startRoutine)(void *), void *arg, int delaySecs, int intervalSecs) +{ + declare_now; + job_t *new = malloc( sizeof(*new) ); + new->startRoutine = startRoutine; + new->arg = arg; + new->intervalSecs = intervalSecs; + timing_set( &new->dueDate, &now, delaySecs ); + for ( ;; ) { + new->next = newJob; + if ( atomic_compare_exchange_weak( &newJob, &new->next, new ) ) + break; + } +} + diff --git a/src/server/server.h b/src/server/server.h index bab8421..a026eb6 100644 --- a/src/server/server.h +++ b/src/server/server.h @@ -24,8 +24,8 @@ #include "globals.h" #include "../types.h" -void dnbd3_cleanup(); uint32_t dnbd3_serverUptime(); +void server_addJob(void *(*startRoutine)(void *), void *arg, int delaySecs, int intervalSecs); #if !defined(_FILE_OFFSET_BITS) || _FILE_OFFSET_BITS != 64 #error Please set _FILE_OFFSET_BITS to 64 in your makefile/configuration -- cgit v1.2.3-55-g7522 From ac1bf45ebdd630fbc9ad2c1fa3c0ea99f5206799 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Wed, 28 Aug 2019 13:07:13 +0200 Subject: [SERVER] Make signal handling more POSIX According to POSIX, a signal sent to a PID can be delivered to an arbitrary thread of that process that hasn't the signal blocked. This seens to never happen on Linux, but would mess things up since the code expected the main signal handler to only be executed by the main thread. This should now be fixed by examining the destination PID of the signal as well as the ID of the thread currently running the signal handler. If we notice the signal wasn't sent by our own PID and the handler is not currently run by the main thread, we re-send the signal to the main thread. Otherwise, if the signal was sent by our own PID but the handler is not run in the main thread, do nothing. This way we can use pthread_kill() to wake up threads that might be stuck in a blocking syscall when it's time to shut down. --- src/server/globals.h | 1 + src/server/image.c | 10 ++-------- src/server/integrity.c | 17 +++++++++++++---- src/server/net.c | 11 ++++++----- src/server/rpc.c | 13 ++++++++----- src/server/server.c | 22 +++++++++++++++++----- src/server/threadpool.c | 28 ++++++++++++++++++++++------ src/server/threadpool.h | 5 +++++ 8 files changed, 74 insertions(+), 33 deletions(-) (limited to 'src/server/server.c') diff --git a/src/server/globals.h b/src/server/globals.h index 5dd205a..f940666 100644 --- a/src/server/globals.h +++ b/src/server/globals.h @@ -138,6 +138,7 @@ struct _dnbd3_client char hostName[HOSTNAMELEN]; // inet_ntop version of host pthread_mutex_t sendMutex; // Held while writing to sock if image is incomplete (since uplink uses socket too) pthread_mutex_t lock; + pthread_t thread; }; // ####################################################### diff --git a/src/server/image.c b/src/server/image.c index de93cd4..248c12c 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -562,9 +562,7 @@ bool image_tryFreeAll() if ( _images[i] != NULL && _images[i]->users == 0 ) { dnbd3_image_t *image = _images[i]; _images[i] = NULL; - mutex_unlock( &imageListLock ); image = image_free( image ); - mutex_lock( &imageListLock ); } if ( i + 1 == _num_images && _images[i] == NULL ) _num_images--; } @@ -574,15 +572,13 @@ bool image_tryFreeAll() /** * Free image. DOES NOT check if it's in use. - * Indirectly locks on imageListLock, image.lock, uplink.queueLock + * (Indirectly) locks on image.lock, uplink.queueLock */ static dnbd3_image_t* image_free(dnbd3_image_t *image) { assert( image != NULL ); assert( image->users == 0 ); - if ( !_shutdown ) { - logadd( LOG_INFO, "Freeing image %s:%d", image->name, (int)image->rid ); - } + logadd( ( _shutdown ? LOG_DEBUG1 : LOG_INFO ), "Freeing image %s:%d", image->name, (int)image->rid ); // uplink_shutdown might return false to tell us // that the shutdown is in progress. Bail out since // this will get called again when the uplink is done. @@ -600,8 +596,6 @@ static dnbd3_image_t* image_free(dnbd3_image_t *image) mutex_unlock( &image->lock ); if ( image->readFd != -1 ) close( image->readFd ); mutex_destroy( &image->lock ); - // - memset( image, 0, sizeof(*image) ); free( image ); return NULL ; } diff --git a/src/server/integrity.c b/src/server/integrity.c index f358c46..e7ebeb2 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -184,13 +184,20 @@ static void* integrity_main(void * data UNUSED) mutex_unlock( &image->lock ); } #if defined(linux) || defined(__linux) - if ( sync_file_range( fd, start, end - start, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER ) == -1 ) { + while ( sync_file_range( fd, start, end - start, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER ) == -1 ) #else - if ( fsync( fd ) == -1 ) { + while ( fsync( fd ) == -1 ) #endif - logadd( LOG_ERROR, "Cannot flush %s for integrity check", image->path ); + { + if ( _shutdown ) + break; + if ( errno == EINTR ) + continue; + logadd( LOG_ERROR, "Cannot flush %s for integrity check (errno=%d)", image->path, errno ); exit( 1 ); } + if ( _shutdown ) + break; // Use direct I/O only if read length is multiple of 4096 to be on the safe side int tfd; if ( direct && ( end % DNBD3_BLOCK_SIZE ) == 0 ) { @@ -266,7 +273,9 @@ static void* integrity_main(void * data UNUSED) } } mutex_unlock( &integrityQueueLock ); - if ( buffer != NULL ) free( buffer ); + if ( buffer != NULL ) { + free( buffer ); + } bRunning = false; return NULL; } diff --git a/src/server/net.c b/src/server/net.c index e0b516e..9c855e4 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -44,6 +44,7 @@ #include #include #include +#include static dnbd3_client_t *_clients[SERVER_MAX_CLIENTS]; static int _num_clients = 0; @@ -153,6 +154,7 @@ void* net_handleNewConnection(void *clientPtr) { dnbd3_client_t * const client = (dnbd3_client_t *)clientPtr; dnbd3_request_t request; + client->thread = pthread_self(); // Await data from client. Since this is a fresh connection, we expect data right away sock_setTimeout( client->sock, _clientTimeout ); @@ -631,11 +633,10 @@ void net_disconnectAll() int i; mutex_lock( &_clients_lock ); for (i = 0; i < _num_clients; ++i) { - if ( _clients[i] == NULL ) continue; - dnbd3_client_t * const client = _clients[i]; - mutex_lock( &client->lock ); - if ( client->sock >= 0 ) shutdown( client->sock, SHUT_RDWR ); - mutex_unlock( &client->lock ); + if ( _clients[i] == NULL ) + continue; + shutdown( _clients[i]->sock, SHUT_RDWR ); + pthread_kill( _clients[i]->thread, SIGINT ); } mutex_unlock( &_clients_lock ); } diff --git a/src/server/rpc.c b/src/server/rpc.c index 261c6c0..662263e 100644 --- a/src/server/rpc.c +++ b/src/server/rpc.c @@ -137,13 +137,13 @@ void rpc_sendStatsJson(int sock, dnbd3_host_t* host, const void* data, const int bool hasName = false; bool ok; int keepAlive = HTTP_KEEPALIVE; - do { + while ( !_shutdown ) { // Read request from client struct phr_header headers[100]; size_t numHeaders, prevLen = 0, consumed; struct string method, path; int minorVersion; - do { + while ( !_shutdown ) { // Parse before calling recv, there might be a complete pipelined request in the buffer already // If the request is incomplete, we allow exactly one additional recv() to complete it. // This should suffice for real world scenarios as I don't know of any @@ -188,7 +188,9 @@ void rpc_sendStatsJson(int sock, dnbd3_host_t* host, const void* data, const int sendReply( sock, "400 Bad Request", "text/plain", "Server cannot understand what you're trying to say", -1, HTTP_CLOSE ); goto func_return; } - } while ( true ); + } // Loop while request header incomplete + if ( _shutdown ) + break; if ( keepAlive == HTTP_KEEPALIVE ) { // Only keep the connection alive (and indicate so) if the client seems to support this if ( minorVersion == 0 || hasHeaderValue( headers, numHeaders, &STR_CONNECTION, &STR_CLOSE ) ) { @@ -213,7 +215,8 @@ void rpc_sendStatsJson(int sock, dnbd3_host_t* host, const void* data, const int } else { ok = sendReply( sock, "404 Not found", "text/plain", "Nothing", -1, keepAlive ); } - if ( !ok ) break; + if ( !ok ) + break; } // hoff might be beyond end if the client sent another request (burst) const ssize_t extra = hoff - consumed; @@ -225,7 +228,7 @@ void rpc_sendStatsJson(int sock, dnbd3_host_t* host, const void* data, const int hasName = true; setThreadName( "HTTP" ); } - } while (true); + } // Loop while more requests func_return:; do { const int curCount = --status.count; diff --git a/src/server/server.c b/src/server/server.c index 1cdd2ab..0dddea7 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #define LONGOPT_CRC4 1000 #define LONGOPT_ASSERT 1001 @@ -60,6 +62,7 @@ static _Atomic(job_t *) newJob; static bool hasTimerThread = false; static pthread_t timerThread; +static pid_t mainPid; static pthread_t mainThread; #define DEFAULT_TIMER_TIMEOUT (60) @@ -138,7 +141,7 @@ _Noreturn static void dnbd3_cleanup() logadd( LOG_INFO, "Cleanup..." ); if ( hasTimerThread ) { - pthread_kill( timerThread, SIGHUP ); + pthread_kill( timerThread, SIGINT ); thread_join( timerThread, NULL ); } @@ -162,6 +165,8 @@ _Noreturn static void dnbd3_cleanup() // Wait for clients to disconnect net_waitForAllDisconnected(); + threadpool_waitEmpty(); + // Clean up images retries = 5; while ( !image_tryFreeAll() && --retries > 0 ) { @@ -204,6 +209,7 @@ int main(int argc, char *argv[]) { 0, 0, 0, 0 } }; + mainPid = getpid(); mainThread = pthread_self(); opt = getopt_long( argc, argv, optString, longOpts, &longIndex ); @@ -509,10 +515,16 @@ static void dnbd3_handleSignal(int signum) static void dnbd3_handleSignal2(int signum, siginfo_t *info, void *data UNUSED) { - if ( !pthread_equal( pthread_self(), mainThread ) ) - return; - memcpy( &lastSignal, info, sizeof(siginfo_t) ); - dnbd3_handleSignal( signum ); + if ( info->si_pid != mainPid ) { // Source is not this process + memcpy( &lastSignal, info, sizeof(siginfo_t) ); // Copy signal info + if ( info->si_pid != 0 && !pthread_equal( pthread_self(), mainThread ) ) { + pthread_kill( mainThread, info->si_signo ); // And relay signal if we're not the main thread + } + } + if ( pthread_equal( pthread_self(), mainThread ) ) { + // Signal received by main thread -- handle + dnbd3_handleSignal( signum ); + } } uint32_t dnbd3_serverUptime() diff --git a/src/server/threadpool.c b/src/server/threadpool.c index 3947677..0b46fd6 100644 --- a/src/server/threadpool.c +++ b/src/server/threadpool.c @@ -15,6 +15,7 @@ static void *threadpool_worker(void *entryPtr); static pthread_attr_t threadAttrs; static atomic_int maxIdleThreads = -1; static _Atomic(entry_t *) *pool = NULL; +static atomic_int activeThreads = 0; bool threadpool_init(int maxIdle) { @@ -34,10 +35,9 @@ bool threadpool_init(int maxIdle) void threadpool_close() { - _shutdown = true; - int max = maxIdleThreads; - maxIdleThreads = -1; - if ( max <= 0 ) return; + int max = atomic_exchange( &maxIdleThreads, -1 ); + if ( max <= 0 ) + return; for ( int i = 0; i < max; ++i ) { entry_t *cur = pool[i]; if ( cur != NULL && atomic_compare_exchange_strong( &pool[i], &cur, NULL ) ) { @@ -46,9 +46,23 @@ void threadpool_close() } } +void threadpool_waitEmpty() +{ + if ( activeThreads == 0 ) + return; + do { + sleep( 1 ); + logadd( LOG_INFO, "Threadpool: %d threads still active", (int)activeThreads ); + } while ( activeThreads != 0 ); +} + bool threadpool_run(void *(*startRoutine)(void *), void *arg) { - if ( startRoutine == NULL ) { + if ( unlikely( _shutdown ) ) { + logadd( LOG_MINOR, "Cannot submit work to threadpool while shutting down!" ); + return false; + } + if ( unlikely( startRoutine == NULL ) ) { logadd( LOG_ERROR, "Trying to queue work for thread pool with NULL startRoutine" ); return false; // Or bail out!? } @@ -60,7 +74,7 @@ bool threadpool_run(void *(*startRoutine)(void *), void *arg) break; } } - if ( entry == NULL ) { + if ( unlikely( entry == NULL ) ) { entry = malloc( sizeof(entry_t) ); if ( entry == NULL ) { logadd( LOG_WARNING, "Could not alloc entry_t for new thread\n" ); @@ -78,6 +92,7 @@ bool threadpool_run(void *(*startRoutine)(void *), void *arg) free( entry ); return false; } + activeThreads++; } entry->startRoutine = startRoutine; entry->arg = arg; @@ -130,6 +145,7 @@ keep_going:; } signal_close( entry->signal ); free( entry ); + activeThreads--; return NULL; } diff --git a/src/server/threadpool.h b/src/server/threadpool.h index 15dd151..ee0b3aa 100644 --- a/src/server/threadpool.h +++ b/src/server/threadpool.h @@ -17,6 +17,11 @@ bool threadpool_init(int maxIdleThreadCount); */ void threadpool_close(); +/** + * Block until all threads spawned have exited + */ +void threadpool_waitEmpty(); + /** * Run a thread using the thread pool. * @param startRoutine function to run in new thread -- cgit v1.2.3-55-g7522 From ff4e770e645c05da48baddb30a77b9dc15ca76fd Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Fri, 6 Mar 2020 15:00:46 +0100 Subject: [SERVER] Handle "warn unused result" cases --- src/server/fileutil.c | 2 +- src/server/globals.c | 5 ++++- src/server/image.c | 8 ++++++-- src/server/rpc.c | 2 +- src/server/server.c | 5 ++++- src/server/uplink.c | 14 ++++++++++---- 6 files changed, 26 insertions(+), 10 deletions(-) (limited to 'src/server/server.c') diff --git a/src/server/fileutil.c b/src/server/fileutil.c index 336ab68..9a9f066 100644 --- a/src/server/fileutil.c +++ b/src/server/fileutil.c @@ -68,7 +68,7 @@ bool file_setSize(int fd, uint64_t size) // Try really hard... image loading logic relies on the file // having the proper apparent size uint8_t byte = 0; - pread( fd, &byte, 1, size - 1 ); + (void)!pread( fd, &byte, 1, size - 1 ); if ( pwrite( fd, &byte, 1, size - 1 ) == 1 ) return true; return false; } diff --git a/src/server/globals.c b/src/server/globals.c index 2e87400..ac079b1 100644 --- a/src/server/globals.c +++ b/src/server/globals.c @@ -113,7 +113,10 @@ static int ini_handler(void *custom UNUSED, const char* section, const char* key void globals_loadConfig() { char *name = NULL; - asprintf( &name, "%s/%s", _configDir, CONFIG_FILENAME ); + if ( asprintf( &name, "%s/%s", _configDir, CONFIG_FILENAME ) == -1 ) { + logadd( LOG_ERROR, "Memory allocation error for config filename" ); + exit( 1 ); + } if ( name == NULL ) return; if ( initialLoad ) { mutex_init( &loadLock, LOCK_LOAD_CONFIG ); diff --git a/src/server/image.c b/src/server/image.c index 7ffe041..32c9efe 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -1446,9 +1446,13 @@ static bool image_clone(int sock, char *name, uint16_t revision, uint64_t imageS logadd( LOG_WARNING, "OTF-Clone: Corrupted CRC-32 list. ignored. (%s)", name ); } else { int fd = open( crcFile, O_WRONLY | O_CREAT, 0644 ); - write( fd, &masterCrc, sizeof(uint32_t) ); - write( fd, crc32list, crc32len ); + ssize_t ret = write( fd, &masterCrc, sizeof(masterCrc) ); + ret += write( fd, crc32list, crc32len ); close( fd ); + if ( (size_t)ret != crc32len + sizeof(masterCrc) ) { + logadd( LOG_WARNING, "Could not save freshly received crc32 list for %s:%d", name, (int)revision ); + unlink( crcFile ); + } } } free( crc32list ); diff --git a/src/server/rpc.c b/src/server/rpc.c index a454d6d..b66b8fe 100644 --- a/src/server/rpc.c +++ b/src/server/rpc.c @@ -101,7 +101,7 @@ void rpc_init() int fd = open( "/dev/urandom", O_RDONLY ); if ( fd != -1 ) { uint32_t bla = 1; - read( fd, &bla, 4 ); + (void)!read( fd, &bla, 4 ); randomRunId = (randomRunId << 32) | bla; } close( fd ); diff --git a/src/server/server.c b/src/server/server.c index 0dddea7..c9edc05 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -315,7 +315,10 @@ int main(int argc, char *argv[]) // No one-shot detected, normal server operation or errormsg serving if ( demonize ) { logadd( LOG_INFO, "Forking into background, see log file for further information" ); - daemon( 1, 0 ); + if ( daemon( 0, 0 ) == -1 ) { + logadd( LOG_ERROR, "Could not daemon(): errno=%d", errno ); + exit( 1 ); + } } if ( errorMsg != NULL ) { setupNetwork( bindAddress ); diff --git a/src/server/uplink.c b/src/server/uplink.c index e644e56..71d9f94 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -1098,7 +1098,8 @@ static void uplink_addCrc32(dnbd3_uplink_t *uplink) lists_crc = crc32( lists_crc, (uint8_t*)buffer, bytes ); lists_crc = net_order_32( lists_crc ); if ( lists_crc != masterCrc ) { - logadd( LOG_WARNING, "Received corrupted crc32 list from uplink server (%s)!", uplink->image->name ); + logadd( LOG_WARNING, "Received corrupted crc32 list from uplink server (%s:%d)!", + uplink->image->name, (int)uplink->image->rid ); free( buffer ); return; } @@ -1108,10 +1109,15 @@ static void uplink_addCrc32(dnbd3_uplink_t *uplink) char path[len]; snprintf( path, len, "%s.crc", uplink->image->path ); const int fd = open( path, O_WRONLY | O_CREAT, 0644 ); - if ( fd >= 0 ) { - write( fd, &masterCrc, sizeof(uint32_t) ); - write( fd, buffer, bytes ); + if ( fd != -1 ) { + ssize_t ret = write( fd, &masterCrc, sizeof(masterCrc) ); + ret += write( fd, buffer, bytes ); close( fd ); + if ( (size_t)ret != sizeof(masterCrc) + bytes ) { + unlink( path ); + logadd( LOG_WARNING, "Could not write crc32 file for %s:%d", + uplink->image->name, (int)uplink->image->rid ); + } } } -- cgit v1.2.3-55-g7522 From 2e70a0836173c9502ff5cddd849165d432a883cb Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Tue, 17 Mar 2020 13:01:37 +0100 Subject: [SERVER] Include build type and version in RPC Added new query type q=version, which uses the STATS access rights. --- CMakeLists.txt | 7 +++---- get-version.sh | 2 +- src/server/rpc.c | 10 ++++++++-- src/server/server.c | 7 +++++-- src/types.h | 3 +++ src/version.h | 4 ---- 6 files changed, 20 insertions(+), 13 deletions(-) (limited to 'src/server/server.c') diff --git a/CMakeLists.txt b/CMakeLists.txt index cc8bfb7..b263f77 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") ADD_DEFINITIONS(-D_FILE_OFFSET_BITS=64) ADD_DEFINITIONS(-DWITH_IPV6) +ADD_DEFINITIONS(-DBUILD_TYPE=${CMAKE_BUILD_TYPE}) FIND_PACKAGE(Threads) @@ -133,14 +134,12 @@ ADD_CUSTOM_TARGET( -P ${CMAKE_BINARY_DIR}/version.cmake ) -INCLUDE_DIRECTORIES( ${CMAKE_BINARY_DIR}/generated ) - ################################################################################ # CLIENT # ################################################################################ if(BUILD_KERNEL_MODULE) - FILE(GLOB_RECURSE CLIENT_SRCS src/client/*.c) + FILE(GLOB_RECURSE CLIENT_SRCS ${CMAKE_BINARY_DIR}/generated/version.c src/client/*.c) ADD_EXECUTABLE(dnbd3-client ${CLIENT_SRCS}) TARGET_LINK_LIBRARIES(dnbd3-client) ADD_DEPENDENCIES(dnbd3-client version) @@ -157,7 +156,7 @@ if(BUILD_SERVER) message(" ######################## Building server for AFL mode - will be useless otherwise!") ADD_DEFINITIONS(-DAFL_MODE) ENDIF() - FILE(GLOB SERVER_SRCS src/server/*.c src/shared/*.c src/server/picohttpparser/*.c) + FILE(GLOB SERVER_SRCS ${CMAKE_BINARY_DIR}/generated/version.c src/server/*.c src/shared/*.c src/server/picohttpparser/*.c) ADD_EXECUTABLE(dnbd3-server ${SERVER_SRCS}) TARGET_INCLUDE_DIRECTORIES(dnbd3-server PRIVATE ${JANSSON_INCLUDE_DIR}) TARGET_LINK_LIBRARIES(dnbd3-server ${CMAKE_THREAD_LIBS_INIT} ${JANSSON_LIBRARIES}) diff --git a/get-version.sh b/get-version.sh index 1d4a8cb..5e5b3e1 100755 --- a/get-version.sh +++ b/get-version.sh @@ -8,7 +8,7 @@ ROOT_DIR="$(dirname "${SELF}")" cd "$ROOT_DIR" if [ -d .git ]; then - [ -n "$(git diff)" ] && MODDED='+MOD' + [ -n "$(git diff HEAD)" ] && MODDED='+MOD' echo $(git describe)$MODDED, branch $(git rev-parse --abbrev-ref HEAD), built "$(date +%Y-%m-%d)" exit 0 fi diff --git a/src/server/rpc.c b/src/server/rpc.c index b66b8fe..12ad0dd 100644 --- a/src/server/rpc.c +++ b/src/server/rpc.c @@ -6,6 +6,7 @@ #include "image.h" #include "altservers.h" #include "../shared/sockhelper.h" +#include "../version.h" #include "fileutil.h" #include "picohttpparser/picohttpparser.h" #include "urldecode.h" @@ -259,7 +260,7 @@ static bool handleStatus(int sock, int permissions, struct field *fields, size_t { bool ok; bool stats = false, images = false, clients = false, space = false; - bool logfile = false, config = false, altservers = false; + bool logfile = false, config = false, altservers = false, version = false; #define SETVAR(var) if ( !var && STRCMP(fields[i].value, #var) ) var = true for (size_t i = 0; i < fields_num; ++i) { if ( !equals( &fields[i].name, &STR_Q ) ) continue; @@ -270,9 +271,10 @@ static bool handleStatus(int sock, int permissions, struct field *fields, size_t else SETVAR(logfile); else SETVAR(config); else SETVAR(altservers); + else SETVAR(version); } #undef SETVAR - if ( ( stats || space ) && !(permissions & ACL_STATS) ) { + if ( ( stats || space || version ) && !(permissions & ACL_STATS) ) { return sendReply( sock, "403 Forbidden", "text/plain", "No permission to access statistics", -1, keepAlive ); } if ( images && !(permissions & ACL_IMAGE_LIST) ) { @@ -308,6 +310,10 @@ static bool handleStatus(int sock, int permissions, struct field *fields, size_t statisticsJson = json_pack( "{sI}", "runId", randomRunId ); } + if ( version ) { + json_object_set_new( statisticsJson, "version", json_string( VERSION_STRING ) ); + json_object_set_new( statisticsJson, "build", json_string( TOSTRING( BUILD_TYPE ) ) ); + } if ( space ) { uint64_t spaceTotal = 0, spaceAvail = 0; file_freeDiskSpace( _basePath, &spaceTotal, &spaceAvail ); diff --git a/src/server/server.c b/src/server/server.c index c9edc05..71a49b9 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -342,7 +342,10 @@ int main(int argc, char *argv[]) net_init(); uplink_globalsInit(); rpc_init(); - logadd( LOG_INFO, "DNBD3 server starting.... Machine type: " ENDIAN_MODE ); + logadd( LOG_INFO, "DNBD3 server starting...." ); + logadd( LOG_INFO, "Machine type: " ENDIAN_MODE ); + logadd( LOG_INFO, "Build Type: " TOSTRING( BUILD_TYPE ) ); + logadd( LOG_INFO, "Version: %s", VERSION_STRING ); if ( altservers_load() < 0 ) { logadd( LOG_WARNING, "Could not load alt-servers. Does the file exist in %s?", _configDir ); @@ -385,7 +388,7 @@ int main(int argc, char *argv[]) exit( EXIT_FAILURE ); } - logadd( LOG_INFO, "Server is ready. (%s)", VERSION_STRING ); + logadd( LOG_INFO, "Server is ready." ); if ( thread_create( &timerThread, NULL, &timerMainloop, NULL ) == 0 ) { hasTimerThread = true; diff --git a/src/types.h b/src/types.h index cb0ccfd..dc8e501 100644 --- a/src/types.h +++ b/src/types.h @@ -34,6 +34,9 @@ #define MAX(a,b) ((a) > (b) ? (a) : (b)) #endif +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + #ifdef __GNUC__ #define UNUSED __attribute__ ((unused)) #else diff --git a/src/version.h b/src/version.h index 0c4a66b..1c17442 100644 --- a/src/version.h +++ b/src/version.h @@ -23,8 +23,4 @@ extern const char *VERSION_STRING; -// This is done in a little weird way but otherwise eclipse complains about -// unresolvable symbols etc... -#include "version.c" - #endif /* VERSION_H_ */ -- cgit v1.2.3-55-g7522 From ba617b55eb606ab487f154b124750e121518d5e5 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 19 Mar 2020 11:26:12 +0100 Subject: [SERVER] Add name param to threadpool_run --- src/server/altservers.c | 2 +- src/server/image.c | 2 ++ src/server/server.c | 6 +++--- src/server/threadpool.c | 8 +++++++- src/server/threadpool.h | 3 ++- src/server/uplink.c | 2 +- 6 files changed, 16 insertions(+), 7 deletions(-) (limited to 'src/server/server.c') diff --git a/src/server/altservers.c b/src/server/altservers.c index 1ba75f4..5076a05 100644 --- a/src/server/altservers.c +++ b/src/server/altservers.c @@ -172,7 +172,7 @@ void altservers_findUplinkAsync(dnbd3_uplink_t *uplink) if ( uplink->rttTestResult != RTT_INPROGRESS ) { dnbd3_uplink_t *current = ref_get_uplink( &uplink->image->uplinkref ); if ( current == uplink ) { - threadpool_run( &altservers_runCheck, uplink ); + threadpool_run( &altservers_runCheck, uplink, "UPLINK" ); } else if ( current != NULL ) { ref_put( ¤t->reference ); } diff --git a/src/server/image.c b/src/server/image.c index 81ec479..0ec1d58 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -1817,6 +1817,7 @@ static void* closeUnusedFds(void* nix UNUSED) timing_gets( &deadline, -UNUSED_FD_TIMEOUT ); int fds[FDCOUNT]; int fdindex = 0; + setThreadName( "unused-fd-close" ); mutex_lock( &imageListLock ); for ( int i = 0; i < _num_images; ++i ) { dnbd3_image_t * const image = _images[i]; @@ -1857,6 +1858,7 @@ static void* saveLoadAllCacheMaps(void* nix UNUSED) static ticks nextSave; declare_now; bool full = timing_reached( &nextSave, &now ); + setThreadName( "cache-mapper" ); mutex_lock( &imageListLock ); for ( int i = 0; i < _num_images; ++i ) { dnbd3_image_t * const image = _images[i]; diff --git a/src/server/server.c b/src/server/server.c index 71a49b9..fa7bcda 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -404,7 +404,7 @@ int main(int argc, char *argv[]) if ( sigReload ) { sigReload = false; logadd( LOG_INFO, "SIGHUP received, re-scanning image directory" ); - threadpool_run( &server_asyncImageListLoad, NULL ); + threadpool_run( &server_asyncImageListLoad, NULL, "IMAGE_RELOAD" ); } if ( sigLogCycle ) { sigLogCycle = false; @@ -431,7 +431,7 @@ int main(int argc, char *argv[]) continue; } - if ( !threadpool_run( &net_handleNewConnection, (void *)dnbd3_client ) ) { + if ( !threadpool_run( &net_handleNewConnection, (void *)dnbd3_client, "CLIENT" ) ) { logadd( LOG_ERROR, "Could not start thread for new connection." ); free( dnbd3_client ); continue; @@ -574,7 +574,7 @@ static int handlePendingJobs(void) jobHead = *temp; // Make it list head *temp = NULL; // Split off part before that while ( todo != NULL ) { - threadpool_run( todo->startRoutine, todo->arg ); + threadpool_run( todo->startRoutine, todo->arg, "TIMER_TASK" ); old = todo; todo = todo->next; if ( old->intervalSecs == 0 ) { diff --git a/src/server/threadpool.c b/src/server/threadpool.c index 96162a6..63ae19f 100644 --- a/src/server/threadpool.c +++ b/src/server/threadpool.c @@ -8,6 +8,7 @@ typedef struct _entry_t { dnbd3_signal_t* signal; void *(*startRoutine)(void *); void * arg; + const char *name; } entry_t; static void *threadpool_worker(void *entryPtr); @@ -56,7 +57,7 @@ void threadpool_waitEmpty() } while ( activeThreads != 0 ); } -bool threadpool_run(void *(*startRoutine)(void *), void *arg) +bool threadpool_run(void *(*startRoutine)(void *), void *arg, const char *name) { if ( unlikely( _shutdown ) ) { logadd( LOG_MINOR, "Cannot submit work to threadpool while shutting down!" ); @@ -97,6 +98,7 @@ bool threadpool_run(void *(*startRoutine)(void *), void *arg) } entry->startRoutine = startRoutine; entry->arg = arg; + entry->name = name; atomic_thread_fence( memory_order_release ); signal_call( entry->signal ); return true; @@ -126,6 +128,9 @@ keep_going:; logadd( LOG_ERROR, "Worker woke up but has no work to do!" ); exit( 1 ); } + if ( entry->name != NULL ) { + setThreadName( entry->name ); + } #endif // Start assigned work (*entry->startRoutine)( entry->arg ); @@ -146,6 +151,7 @@ keep_going:; // Reaching here means pool is full; just let the thread exit break; } + setThreadName( "[dead]" ); signal_close( entry->signal ); free( entry ); activeThreads--; diff --git a/src/server/threadpool.h b/src/server/threadpool.h index ee0b3aa..d8a526e 100644 --- a/src/server/threadpool.h +++ b/src/server/threadpool.h @@ -26,9 +26,10 @@ void threadpool_waitEmpty(); * Run a thread using the thread pool. * @param startRoutine function to run in new thread * @param arg argument to pass to thead + * @param name STRING CONSTANT (literal) for debugging purposes * @return true if thread was started */ -bool threadpool_run(void *(*startRoutine)(void *), void *arg); +bool threadpool_run(void *(*startRoutine)(void *), void *arg, const char *name); #endif diff --git a/src/server/uplink.c b/src/server/uplink.c index af854d6..a7f140f 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -435,7 +435,7 @@ success_ref: job->length = len; job->uplink = uplink; ref_inc( &uplink->reference ); // Hold one for the thread, thread will return it - threadpool_run( &prefetchForClient, (void*)job ); + threadpool_run( &prefetchForClient, (void*)job, "PREFETCH" ); } } if ( getUplink ) { -- cgit v1.2.3-55-g7522