From 77499f086631d0f6eeb96a3e0391cf72eb40ff5e Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Sat, 3 Aug 2019 16:35:02 +0200 Subject: [SERVER] Atomicize some global flags --- src/server/integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/server/integrity.c') diff --git a/src/server/integrity.c b/src/server/integrity.c index 8f17855..a66a364 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -29,7 +29,7 @@ static queue_entry checkQueue[CHECK_QUEUE_SIZE]; static pthread_mutex_t integrityQueueLock; static pthread_cond_t queueSignal; static int queueLen = -1; -static volatile bool bRunning = false; +static atomic_bool bRunning = false; static void* integrity_main(void *data); -- cgit v1.2.3-55-g7522 From be7d7d95850c30a154aaa56e95d6a7f36793409d Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Wed, 7 Aug 2019 17:11:51 +0200 Subject: [SERVER] Better lock debugging: Always check lock order Lock order is predefined in locks.h. Immediately bail out if a lock with lower priority is obtained while the same thread already holds one with higher priority. --- LOCKS | 13 +- src/server/altservers.c | 9 +- src/server/globals.c | 2 +- src/server/image.c | 10 +- src/server/integrity.c | 2 +- src/server/locks.c | 319 ++++++++++++++++++++++-------------------------- src/server/locks.h | 36 ++++-- src/server/net.c | 6 +- src/server/rpc.c | 14 +-- src/server/server.c | 7 -- src/server/uplink.c | 6 +- 11 files changed, 198 insertions(+), 226 deletions(-) (limited to 'src/server/integrity.c') diff --git a/LOCKS b/LOCKS index 4b5b07c..77e44a8 100644 --- a/LOCKS +++ b/LOCKS @@ -16,23 +16,22 @@ requests.lock ===== SERVER ===== This is a list of used locks, in the order they -have to be aquired if you must hold multiple locks: -remoteCloneLock | reloadLock +have to be aquired if you must hold multiple locks. +Note this list might be out of date, take a look at the +defines in lock.h for the effective order. +reloadLock +remoteCloneLock _clients_lock _clients[].lock integrityQueueLock _images_lock _images[].lock -pendingLockConsume -pendingLockProduce uplink.queueLock altServersLock client.sendMutex -client.statsLock -statisticsSentLock -statisticsReceivedLock uplink.rttLock uplink.sendMutex +aclLock If you need to lock multiple clients/images/... at once, lock the client with the lowest array index first. diff --git a/src/server/altservers.c b/src/server/altservers.c index a270bf3..3d5e71e 100644 --- a/src/server/altservers.c +++ b/src/server/altservers.c @@ -30,7 +30,7 @@ void altservers_init() { srand( (unsigned int)time( NULL ) ); // Init spinlock - mutex_init( &altServersLock ); + mutex_init( &altServersLock, LOCK_ALT_SERVER_LIST ); // Init signal runSignal = signal_new(); if ( runSignal == NULL ) { @@ -326,13 +326,13 @@ json_t* altservers_toJson() } /** - * Update rtt history of given server - returns the new average for that server + * Update rtt history of given server - returns the new average for that server. + * XXX HOLD altServersLock WHEN CALLING THIS! */ static unsigned int altservers_updateRtt(const dnbd3_host_t * const host, const unsigned int rtt) { unsigned int avg = rtt; int i; - mutex_lock( &altServersLock ); for (i = 0; i < numAltServers; ++i) { if ( !isSameAddressPort( host, &altServers[i].host ) ) continue; altServers[i].rtt[++altServers[i].rttIndex % SERVER_RTT_PROBES] = rtt; @@ -353,7 +353,6 @@ static unsigned int altservers_updateRtt(const dnbd3_host_t * const host, const } break; } - mutex_unlock( &altServersLock ); return avg; } @@ -529,6 +528,7 @@ static void *altservers_main(void *data UNUSED) } clock_gettime( BEST_CLOCK_SOURCE, &end ); // Measurement done - everything fine so far + mutex_lock( &altServersLock ); mutex_lock( &uplink->rttLock ); const bool isCurrent = isSameAddressPort( &servers[itAlt], &uplink->currentServer ); // Penaltize rtt if this was a cycle; this will treat this server with lower priority @@ -538,6 +538,7 @@ static void *altservers_main(void *data UNUSED) + (end.tv_nsec - start.tv_nsec) / 1000 + ( (isCurrent && uplink->cycleDetected) ? 1000000 : 0 )); // µs unsigned int avg = altservers_updateRtt( &servers[itAlt], rtt ); + mutex_unlock( &altServersLock ); // If a cycle was detected, or we lost connection to the current (last) server, penaltize it one time if ( ( uplink->cycleDetected || uplink->fd == -1 ) && isCurrent ) avg = (avg * 2) + 50000; mutex_unlock( &uplink->rttLock ); diff --git a/src/server/globals.c b/src/server/globals.c index 69e8a6e..46c1030 100644 --- a/src/server/globals.c +++ b/src/server/globals.c @@ -112,7 +112,7 @@ void globals_loadConfig() asprintf( &name, "%s/%s", _configDir, CONFIG_FILENAME ); if ( name == NULL ) return; if ( initialLoad ) { - mutex_init( &loadLock ); + mutex_init( &loadLock, LOCK_LOAD_CONFIG ); } if ( mutex_trylock( &loadLock ) != 0 ) { logadd( LOG_INFO, "Ignoring config reload request due to already running reload" ); diff --git a/src/server/image.c b/src/server/image.c index 1f12eda..4a65ed3 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -59,9 +59,9 @@ static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t void image_serverStartup() { srand( (unsigned int)time( NULL ) ); - mutex_init( &imageListLock ); - mutex_init( &remoteCloneLock ); - mutex_init( &reloadLock ); + mutex_init( &imageListLock, LOCK_IMAGE_LIST ); + mutex_init( &remoteCloneLock, LOCK_REMOTE_CLONE ); + mutex_init( &reloadLock, LOCK_RELOAD ); } /** @@ -347,7 +347,7 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking) img->rid = candidate->rid; img->users = 1; img->working = false; - mutex_init( &img->lock ); + mutex_init( &img->lock, LOCK_IMAGE ); if ( candidate->crc32 != NULL ) { const size_t mb = IMGSIZE_TO_HASHBLOCKS( candidate->virtualFilesize ) * sizeof(uint32_t); img->crc32 = malloc( mb ); @@ -869,7 +869,7 @@ static bool image_load(char *base, char *path, int withUplink) image->working = (image->cache_map == NULL ); timing_get( &image->nextCompletenessEstimate ); image->completenessEstimate = -1; - mutex_init( &image->lock ); + mutex_init( &image->lock, LOCK_IMAGE ); int32_t offset; if ( stat( path, &st ) == 0 ) { // Negatively offset atime by file modification time diff --git a/src/server/integrity.c b/src/server/integrity.c index a66a364..c52d17b 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -39,7 +39,7 @@ static void* integrity_main(void *data); void integrity_init() { assert( queueLen == -1 ); - mutex_init( &integrityQueueLock ); + mutex_init( &integrityQueueLock, LOCK_INTEGRITY_QUEUE ); pthread_cond_init( &queueSignal, NULL ); mutex_lock( &integrityQueueLock ); queueLen = 0; diff --git a/src/server/locks.c b/src/server/locks.c index 2c0cb27..b39576b 100644 --- a/src/server/locks.c +++ b/src/server/locks.c @@ -12,47 +12,45 @@ #ifdef _DEBUG #define MAXLOCKS (SERVER_MAX_CLIENTS * 2 + SERVER_MAX_ALTS + 200 + SERVER_MAX_IMAGES) #define MAXTHREADS (SERVER_MAX_CLIENTS + 100) +#define MAXLPT 20 #define LOCKLEN 60 typedef struct { - void *lock; + void * _Atomic lock; ticks locktime; - char locked; - pthread_t thread; + bool _Atomic locked; + pthread_t _Atomic thread; int lockId; + int prio; char name[LOCKLEN]; char where[LOCKLEN]; } debug_lock_t; typedef struct { - pthread_t tid; + pthread_t _Atomic tid; ticks time; char name[LOCKLEN]; char where[LOCKLEN]; - + debug_lock_t *locks[MAXLPT]; } debug_thread_t; int debugThreadCount = 0; static debug_lock_t locks[MAXLOCKS]; static debug_thread_t threads[MAXTHREADS]; -static int init_done = 0; -static pthread_mutex_t initdestory; +static pthread_mutex_t initdestory = PTHREAD_MUTEX_INITIALIZER; static int lockId = 0; -static pthread_t watchdog = 0; -static dnbd3_signal_t* watchdogSignal = NULL; -static void *debug_thread_watchdog(void *something); +#define ULDE(...) do { \ + pthread_mutex_unlock( &initdestory ); \ + logadd( LOG_ERROR, __VA_ARGS__ ); \ + debug_dump_lock_stats(); \ + exit( 4 ); \ +} while(0) -int debug_mutex_init(const char *name, const char *file, int line, pthread_mutex_t *lock) +int debug_mutex_init(const char *name, const char *file, int line, pthread_mutex_t *lock, int priority) { - if ( !init_done ) { - memset( locks, 0, MAXLOCKS * sizeof(debug_lock_t) ); - memset( threads, 0, MAXTHREADS * sizeof(debug_thread_t) ); - pthread_mutex_init( &initdestory, NULL ); - init_done = 1; - } int first = -1; pthread_mutex_lock( &initdestory ); for (int i = 0; i < MAXLOCKS; ++i) { @@ -63,20 +61,18 @@ int debug_mutex_init(const char *name, const char *file, int line, pthread_mutex if ( first == -1 && locks[i].lock == NULL ) first = i; } if ( first == -1 ) { - logadd( LOG_ERROR, "No more free debug locks (%s:%d)\n", file, line ); - pthread_mutex_unlock( &initdestory ); - debug_dump_lock_stats(); - exit( 4 ); + ULDE( "No more free debug locks (%s:%d)\n", file, line ); } locks[first].lock = (void*)lock; - locks[first].locked = 0; + locks[first].locked = false; + locks[first].prio = priority; snprintf( locks[first].name, LOCKLEN, "%s", name ); snprintf( locks[first].where, LOCKLEN, "I %s:%d", file, line ); pthread_mutex_unlock( &initdestory ); return pthread_mutex_init( lock, NULL ); } -int debug_mutex_lock(const char *name, const char *file, int line, pthread_mutex_t *lock) +int debug_mutex_lock(const char *name, const char *file, int line, pthread_mutex_t *lock, bool try) { debug_lock_t *l = NULL; pthread_mutex_lock( &initdestory ); @@ -86,163 +82,180 @@ int debug_mutex_lock(const char *name, const char *file, int line, pthread_mutex break; } } - pthread_mutex_unlock( &initdestory ); if ( l == NULL ) { - logadd( LOG_ERROR, "Tried to lock uninitialized lock %p (%s) at %s:%d\n", (void*)lock, name, file, line ); - debug_dump_lock_stats(); - exit( 4 ); + ULDE( "Tried to lock uninitialized lock %p (%s) at %s:%d\n", (void*)lock, name, file, line ); } debug_thread_t *t = NULL; - pthread_mutex_lock( &initdestory ); + int first = -1; + const pthread_t self = pthread_self(); for (int i = 0; i < MAXTHREADS; ++i) { - if ( threads[i].tid != 0 ) continue; - threads[i].tid = pthread_self(); - timing_get( &threads[i].time ); - snprintf( threads[i].name, LOCKLEN, "%s", name ); - snprintf( threads[i].where, LOCKLEN, "%s:%d", file, line ); - t = &threads[i]; - break; - } - pthread_mutex_unlock( &initdestory ); - if ( t == NULL ) { - logadd( LOG_ERROR, "Lock sanity check: Too many waiting threads for lock %p (%s) at %s:%d\n", (void*)lock, name, file, line ); - exit( 4 ); - } - const int retval = pthread_mutex_lock( lock ); - pthread_mutex_lock( &initdestory ); - t->tid = 0; - pthread_mutex_unlock( &initdestory ); - if ( l->locked ) { - logadd( LOG_ERROR, "Lock sanity check: lock %p (%s) already locked at %s:%d\n", (void*)lock, name, file, line ); - exit( 4 ); - } - l->locked = 1; - timing_get( &l->locktime ); - l->thread = pthread_self(); - snprintf( l->where, LOCKLEN, "L %s:%d", file, line ); - pthread_mutex_lock( &initdestory ); - l->lockId = ++lockId; - pthread_mutex_unlock( &initdestory ); - return retval; -} - -int debug_mutex_trylock(const char *name, const char *file, int line, pthread_mutex_t *lock) -{ - debug_lock_t *l = NULL; - pthread_mutex_lock( &initdestory ); - for (int i = 0; i < MAXLOCKS; ++i) { - if ( locks[i].lock == lock ) { - l = &locks[i]; + if ( threads[i].tid == self ) { + t = &threads[i]; break; } + if ( first == -1 && threads[i].tid == 0 ) { + first = i; + } } - pthread_mutex_unlock( &initdestory ); - if ( l == NULL ) { - logadd( LOG_ERROR, "Tried to lock uninitialized lock %p (%s) at %s:%d\n", (void*)lock, name, file, line ); - debug_dump_lock_stats(); - exit( 4 ); - } - debug_thread_t *t = NULL; - pthread_mutex_lock( &initdestory ); - for (int i = 0; i < MAXTHREADS; ++i) { - if ( threads[i].tid != 0 ) continue; - threads[i].tid = pthread_self(); - timing_get( &threads[i].time ); - snprintf( threads[i].name, LOCKLEN, "%s", name ); - snprintf( threads[i].where, LOCKLEN, "%s:%d", file, line ); - t = &threads[i]; - break; - } - pthread_mutex_unlock( &initdestory ); + int idx; if ( t == NULL ) { - logadd( LOG_ERROR, "Lock sanity check: Too many waiting threads for %p (%s) at %s:%d\n", (void*)lock, name, file, line ); - exit( 4 ); + if ( first == -1 ) { + ULDE( "Lock sanity check: Too many waiting threads for lock %p (%s) at %s:%d\n", (void*)lock, name, file, line ); + } + t = &threads[first]; + timing_get( &t->time ); + t->tid = self; + snprintf( t->name, LOCKLEN, "%s", name ); + snprintf( t->where, LOCKLEN, "%s:%d", file, line ); + memset( t->locks, 0, sizeof(t->locks) ); + idx = 0; + } else { + // Thread already has locks, check for order violation + idx = -1; + for (int i = 0; i < MAXLPT; ++i) { + if ( t->locks[i] == NULL ) { + if ( idx == -1 ) { + idx = i; + } + continue; + } + if ( t->locks[i]->prio >= l->prio ) { + ULDE( "Lock priority violation: %s at %s:%d (%d) when already holding %s at %s (%d)", + name, file, line, l->prio, + t->locks[i]->name, t->locks[i]->where, t->locks[i]->prio ); + } + if ( t->locks[i] == l ) { + ULDE( "Tried to recusively lock %s in the same thread. Tried at %s:%d, when already locked at %s", + name, file, line, t->locks[i]->name ); + } + } + if ( idx == -1 ) { + ULDE( "Thread %d tried to lock more than %d locks.", (int)self, (int)MAXLPT ); + } } - const int retval = pthread_mutex_trylock( lock ); - pthread_mutex_lock( &initdestory ); - t->tid = 0; pthread_mutex_unlock( &initdestory ); + const int retval = try ? pthread_mutex_trylock( lock ) : pthread_mutex_lock( lock ); if ( retval == 0 ) { + timing_get( &l->locktime ); + l->thread = self; + snprintf( l->where, LOCKLEN, "L %s:%d", file, line ); + pthread_mutex_lock( &initdestory ); if ( l->locked ) { logadd( LOG_ERROR, "Lock sanity check: lock %p (%s) already locked at %s:%d\n", (void*)lock, name, file, line ); exit( 4 ); } - l->locked = 1; - timing_get( &l->locktime ); - l->thread = pthread_self(); - snprintf( l->where, LOCKLEN, "L %s:%d", file, line ); - pthread_mutex_lock( &initdestory ); + l->locked = true; + t->locks[idx] = l; l->lockId = ++lockId; pthread_mutex_unlock( &initdestory ); + } else if ( !try || retval != EBUSY ) { + logadd( LOG_ERROR, "Acquiring lock %s at %s:%d failed with error code %d", name, file, line, retval ); + debug_dump_lock_stats(); + exit( 4 ); } return retval; } int debug_mutex_unlock(const char *name, const char *file, int line, pthread_mutex_t *lock) { - debug_lock_t *l = NULL; + debug_thread_t *t = NULL; + pthread_t self = pthread_self(); pthread_mutex_lock( &initdestory ); - for (int i = 0; i < MAXLOCKS; ++i) { - if ( locks[i].lock == lock ) { - l = &locks[i]; + for (int i = 0; i < MAXTHREADS; ++i) { + if ( threads[i].tid == self ) { + t = &threads[i]; break; } } - pthread_mutex_unlock( &initdestory ); - if ( l == NULL ) { - logadd( LOG_ERROR, "Tried to unlock uninitialized lock %p (%s) at %s:%d\n", (void*)lock, name, file, line ); - exit( 4 ); + if ( t == NULL ) { + ULDE( "Unlock called from unknown thread for %s at %s:%d", name, file, line ); } - if ( !l->locked ) { - logadd( LOG_ERROR, "Unlock sanity check: lock %p (%s) not locked at %s:%d\n", (void*)lock, name, file, line ); - exit( 4 ); + int idx = -1; + int cnt = 0; + for (int i = 0; i < MAXLPT; ++i) { + if ( t->locks[i] == NULL ) + continue; + cnt++; + if ( t->locks[i]->lock == lock ) { + idx = i; + } + } + if ( idx == -1 ) { + ULDE( "Unlock: Calling thread doesn't hold lock %s at %s:%d", name, file, line ); } - l->locked = 0; + debug_lock_t *l = t->locks[idx]; + if ( l->thread != self || !l->locked ) { + ULDE( "Unlock sanity check for lock debugger failed! Lock %s is assigned to calling thread, but lock's meta data doesn't match up at %s:%d", name, file, line ); + } + l->locked = false; l->thread = 0; + t->locks[idx] = NULL; + if ( cnt == 1 ) { + t->tid = 0; // No more locks held, free up slot + } snprintf( l->where, LOCKLEN, "U %s:%d", file, line ); - int retval = pthread_mutex_unlock( lock ); + pthread_mutex_unlock( &initdestory ); + const int retval = pthread_mutex_unlock( lock ); + if ( retval != 0 ) { + logadd( LOG_ERROR, "pthread_mutex_unlock returned %d for %s at %s:%d", retval, name, file, line ); + exit( 4 ); + } return retval; } int debug_mutex_cond_wait(const char *name, const char *file, int line, pthread_cond_t *restrict cond, pthread_mutex_t *restrict lock) { debug_lock_t *l = NULL; + debug_thread_t *t = NULL; + pthread_t self = pthread_self(); pthread_mutex_lock( &initdestory ); - for (int i = 0; i < MAXLOCKS; ++i) { - if ( locks[i].lock == lock ) { - l = &locks[i]; + for (int i = 0; i < MAXTHREADS; ++i) { + if ( threads[i].tid == self ) { + t = &threads[i]; break; } } - pthread_mutex_unlock( &initdestory ); + if ( t == NULL ) { + ULDE( "Unlock called from unknown thread for %s at %s:%d", name, file, line ); + } + int mp = 0, mpi = -1; + for (int i = 0; i < MAXLPT; ++i) { + if ( t->locks[i] == NULL ) + continue; + if ( t->locks[i]->lock == lock ) { + l = t->locks[i]; + } else if ( t->locks[i]->prio > mp ) { + mp = t->locks[i]->prio; + mpi = i; + } + } if ( l == NULL ) { - logadd( LOG_ERROR, "Tried to cond_wait on uninitialized lock %p (%s) at %s:%d\n", (void*)lock, name, file, line ); - exit( 4 ); + ULDE( "cond_wait: Calling thread doesn't hold lock %s at %s:%d", name, file, line ); } - if ( !l->locked ) { - logadd( LOG_ERROR, "Cond_wait sanity check: lock %p (%s) not locked at %s:%d\n", (void*)lock, name, file, line ); - exit( 4 ); + if ( l->thread != self || !l->locked ) { + ULDE( "cond_wait: Sanity check for lock debugger failed! Lock %s is assigned to calling thread, but lock's meta data doesn't match up at %s:%d", name, file, line ); } - pthread_t self = pthread_self(); - if ( l->thread != self ) { - logadd( LOG_ERROR, "Cond_wait called from non-owning thread for %p (%s) at %s:%d\n", (void*)lock, name, file, line ); - exit( 4 ); + if ( mp >= l->prio ) { + ULDE( "cond_wait: Yielding a mutex while holding another one with higher prio: %s at %s:%d (%d) while also holding %s at %s (%d)", + name, file, line, l->prio, + t->locks[mpi]->name, t->locks[mpi]->where, mp ); } - l->locked = 0; + l->locked = false; l->thread = 0; - snprintf( l->where, LOCKLEN, "CW %s:%d", file, line ); + snprintf( l->where, LOCKLEN, "CWU %s:%d", file, line ); + pthread_mutex_unlock( &initdestory ); int retval = pthread_cond_wait( cond, lock ); if ( retval != 0 ) { logadd( LOG_ERROR, "pthread_cond_wait returned %d for lock %p (%s) at %s:%d\n", retval, (void*)lock, name, file, line ); exit( 4 ); } - if ( l->locked != 0 || l->thread != 0 ) { + if ( l->locked || l->thread != 0 ) { logadd( LOG_ERROR, "Lock is not free after returning from pthread_cond_wait for %p (%s) at %s:%d\n", (void*)lock, name, file, line ); exit( 4 ); } - l->locked = 1; l->thread = self; timing_get( &l->locktime ); + l->locked = true; pthread_mutex_lock( &initdestory ); l->lockId = ++lockId; pthread_mutex_unlock( &initdestory ); @@ -290,63 +303,21 @@ void debug_dump_lock_stats() "* Locked: %d\n", locks[i].name, locks[i].where, (int)locks[i].locked ); } } - printf( "\n **** WAITING THREADS ****\n\n" ); + printf( "\n **** ACTIVE THREADS ****\n\n" ); for (int i = 0; i < MAXTHREADS; ++i) { - if ( threads[i].tid == 0 ) continue; + if ( threads[i].tid == 0 ) + continue; printf( "* *** Thread %d ***\n" "* Lock: %s\n" "* Where: %s\n" "* How long: %d secs\n", (int)threads[i].tid, threads[i].name, threads[i].where, (int)timing_diff( &threads[i].time, &now ) ); - } - pthread_mutex_unlock( &initdestory ); -} - -static void *debug_thread_watchdog(void *something UNUSED) -{ - setThreadName( "debug-watchdog" ); - while ( !_shutdown ) { - if ( init_done ) { - declare_now; - pthread_mutex_lock( &initdestory ); - for (int i = 0; i < MAXTHREADS; ++i) { - if ( threads[i].tid == 0 ) continue; - const uint32_t diff = timing_diff( &threads[i].time, &now ); - if ( diff > 6 && diff < 100000 ) { - printf( "\n\n +++++++++ DEADLOCK ++++++++++++\n\n" ); - pthread_mutex_unlock( &initdestory ); - debug_dump_lock_stats(); - exit( 99 ); - } - } - pthread_mutex_unlock( &initdestory ); + for (int j = 0; j < MAXLPT; ++j) { + if ( threads[i].locks[j] == NULL ) + continue; + printf( " * Lock %s @ %s\n", threads[i].locks[j]->name, threads[i].locks[j]->where ); } - if ( watchdogSignal == NULL || signal_wait( watchdogSignal, 5000 ) == SIGNAL_ERROR ) sleep( 5 ); } - return NULL ; -} - -#endif - -void debug_locks_start_watchdog() -{ -#ifdef _DEBUG - watchdogSignal = signal_new(); - if ( 0 != thread_create( &watchdog, NULL, &debug_thread_watchdog, (void *)NULL ) ) { - logadd( LOG_ERROR, "Could not start debug-lock watchdog." ); - return; - } -#endif + pthread_mutex_unlock( &initdestory ); } -void debug_locks_stop_watchdog() -{ -#ifdef _DEBUG - _shutdown = true; - printf( "Killing debug watchdog...\n" ); - pthread_mutex_lock( &initdestory ); - signal_call( watchdogSignal ); - pthread_mutex_unlock( &initdestory ); - thread_join( watchdog, NULL ); - signal_close( watchdogSignal ); #endif -} diff --git a/src/server/locks.h b/src/server/locks.h index 7f72722..e5c9801 100644 --- a/src/server/locks.h +++ b/src/server/locks.h @@ -5,19 +5,38 @@ #include #include #include +#include + +// Lock priority + +#define LOCK_RELOAD 90 +#define LOCK_LOAD_CONFIG 100 +#define LOCK_REMOTE_CLONE 110 +#define LOCK_CLIENT_LIST 120 +#define LOCK_CLIENT 130 +#define LOCK_INTEGRITY_QUEUE 140 +#define LOCK_IMAGE_LIST 150 +#define LOCK_IMAGE 160 +#define LOCK_UPLINK_QUEUE 170 +#define LOCK_ALT_SERVER_LIST 180 +#define LOCK_CLIENT_SEND 190 +#define LOCK_UPLINK_RTT 200 +#define LOCK_UPLINK_SEND 210 +#define LOCK_RPC_ACL 220 + +// #ifdef _DEBUG -#define mutex_init( lock ) debug_mutex_init( #lock, __FILE__, __LINE__, lock) -#define mutex_lock( lock ) debug_mutex_lock( #lock, __FILE__, __LINE__, lock) -#define mutex_trylock( lock ) debug_mutex_trylock( #lock, __FILE__, __LINE__, lock) +#define mutex_init( lock, prio ) debug_mutex_init( #lock, __FILE__, __LINE__, lock, prio) +#define mutex_lock( lock ) debug_mutex_lock( #lock, __FILE__, __LINE__, lock, false) +#define mutex_trylock( lock ) debug_mutex_lock( #lock, __FILE__, __LINE__, lock, true) #define mutex_unlock( lock ) debug_mutex_unlock( #lock, __FILE__, __LINE__, lock) #define mutex_cond_wait( cond, lock ) debug_mutex_cond_wait( #lock, __FILE__, __LINE__, cond, lock) #define mutex_destroy( lock ) debug_mutex_destroy( #lock, __FILE__, __LINE__, lock) -int debug_mutex_init(const char *name, const char *file, int line, pthread_mutex_t *lock); -int debug_mutex_lock(const char *name, const char *file, int line, pthread_mutex_t *lock); -int debug_mutex_trylock(const char *name, const char *file, int line, pthread_mutex_t *lock); +int debug_mutex_init(const char *name, const char *file, int line, pthread_mutex_t *lock, int priority); +int debug_mutex_lock(const char *name, const char *file, int line, pthread_mutex_t *lock, bool try); int debug_mutex_unlock(const char *name, const char *file, int line, pthread_mutex_t *lock); int debug_mutex_cond_wait(const char *name, const char *file, int line, pthread_cond_t *restrict cond, pthread_mutex_t *restrict lock); int debug_mutex_destroy(const char *name, const char *file, int line, pthread_mutex_t *lock); @@ -27,7 +46,7 @@ void debug_dump_lock_stats(); #else -#define mutex_init( lock ) pthread_mutex_init(lock, NULL) +#define mutex_init( lock, prio ) pthread_mutex_init(lock, NULL) #define mutex_lock( lock ) pthread_mutex_lock(lock) #define mutex_trylock( lock ) pthread_mutex_trylock(lock) #define mutex_unlock( lock ) pthread_mutex_unlock(lock) @@ -82,7 +101,4 @@ static inline int debug_thread_join(pthread_t thread, void **value_ptr) #endif -void debug_locks_start_watchdog(); -void debug_locks_stop_watchdog(); - #endif /* LOCKS_H_ */ diff --git a/src/server/net.c b/src/server/net.c index 92728c0..8f97a12 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -145,7 +145,7 @@ static inline bool sendPadding( const int fd, uint32_t bytes ) void net_init() { - mutex_init( &_clients_lock ); + mutex_init( &_clients_lock, LOCK_CLIENT_LIST ); } void* net_handleNewConnection(void *clientPtr) @@ -186,8 +186,8 @@ void* net_handleNewConnection(void *clientPtr) } } while (0); // Fully init client struct - mutex_init( &client->lock ); - mutex_init( &client->sendMutex ); + mutex_init( &client->lock, LOCK_CLIENT ); + mutex_init( &client->sendMutex, LOCK_CLIENT_SEND ); mutex_lock( &client->lock ); host_to_string( &client->host, client->hostName, HOSTNAMELEN ); diff --git a/src/server/rpc.c b/src/server/rpc.c index 5dbcafe..261c6c0 100644 --- a/src/server/rpc.c +++ b/src/server/rpc.c @@ -75,10 +75,9 @@ static json_int_t randomRunId; static pthread_mutex_t aclLock; #define MAX_CLIENTS 50 #define CUTOFF_START 40 -static pthread_mutex_t statusLock; static struct { - int count; - bool overloaded; + atomic_int count; + atomic_bool overloaded; } status; static bool handleStatus(int sock, int permissions, struct field *fields, size_t fields_num, int keepAlive); @@ -91,8 +90,7 @@ static void loadAcl(); void rpc_init() { - mutex_init( &aclLock ); - mutex_init( &statusLock ); + mutex_init( &aclLock, LOCK_RPC_ACL ); randomRunId = (((json_int_t)getpid()) << 16) | (json_int_t)time(NULL); // if ( sizeof(randomRunId) > 4 ) { @@ -123,10 +121,8 @@ void rpc_sendStatsJson(int sock, dnbd3_host_t* host, const void* data, const int return; } do { - mutex_lock( &statusLock ); const int curCount = ++status.count; UPDATE_LOADSTATE( curCount ); - mutex_unlock( &statusLock ); if ( curCount > MAX_CLIENTS ) { sendReply( sock, "503 Service Temporarily Unavailable", "text/plain", "Too many HTTP clients", -1, HTTP_CLOSE ); goto func_return; @@ -198,9 +194,7 @@ void rpc_sendStatsJson(int sock, dnbd3_host_t* host, const void* data, const int if ( minorVersion == 0 || hasHeaderValue( headers, numHeaders, &STR_CONNECTION, &STR_CLOSE ) ) { keepAlive = HTTP_CLOSE; } else { // And if there aren't too many active HTTP sessions - mutex_lock( &statusLock ); if ( status.overloaded ) keepAlive = HTTP_CLOSE; - mutex_unlock( &statusLock ); } } if ( method.s != NULL && path.s != NULL ) { @@ -234,10 +228,8 @@ void rpc_sendStatsJson(int sock, dnbd3_host_t* host, const void* data, const int } while (true); func_return:; do { - mutex_lock( &statusLock ); const int curCount = --status.count; UPDATE_LOADSTATE( curCount ); - mutex_unlock( &statusLock ); } while (0); } diff --git a/src/server/server.c b/src/server/server.c index 10ab208..838aec2 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -133,9 +133,6 @@ void dnbd3_cleanup() // Wait for clients to disconnect net_waitForAllDisconnected(); - // Watchdog not needed anymore - debug_locks_stop_watchdog(); - // Clean up images retries = 5; while ( !image_tryFreeAll() && --retries > 0 ) { @@ -303,10 +300,6 @@ int main(int argc, char *argv[]) logadd( LOG_WARNING, "Could not load alt-servers. Does the file exist in %s?", _configDir ); } -#ifdef _DEBUG - debug_locks_start_watchdog(); -#endif - // setup signal handler struct sigaction sa; memset( &sa, 0, sizeof(sa) ); diff --git a/src/server/uplink.c b/src/server/uplink.c index bb1ffdc..9570273 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -89,9 +89,9 @@ bool uplink_init(dnbd3_image_t *image, int sock, dnbd3_host_t *host, int version goto failure; } link = image->uplink = calloc( 1, sizeof(dnbd3_connection_t) ); - mutex_init( &link->queueLock ); - mutex_init( &link->rttLock ); - mutex_init( &link->sendMutex ); + mutex_init( &link->queueLock, LOCK_UPLINK_QUEUE ); + mutex_init( &link->rttLock, LOCK_UPLINK_RTT ); + mutex_init( &link->sendMutex, LOCK_UPLINK_SEND ); link->image = image; link->bytesReceived = 0; link->idleTime = 0; -- cgit v1.2.3-55-g7522 From 1d2295131020688b5a688286ce8c53d6bb7abdb8 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Sun, 18 Aug 2019 21:59:26 +0200 Subject: [SERVER] Add struct representing active connection to uplink server --- src/server/altservers.c | 30 +++++++++---------- src/server/globals.h | 14 ++++----- src/server/image.c | 2 +- src/server/integrity.c | 2 +- src/server/uplink.c | 78 ++++++++++++++++++++++++------------------------- 5 files changed, 60 insertions(+), 66 deletions(-) (limited to 'src/server/integrity.c') diff --git a/src/server/altservers.c b/src/server/altservers.c index 1001981..fbe10a8 100644 --- a/src/server/altservers.c +++ b/src/server/altservers.c @@ -125,14 +125,14 @@ void altservers_findUplink(dnbd3_uplink_t *uplink) { if ( uplink->shutdown ) return; - if ( uplink->fd != -1 && numAltServers <= 1 ) + if ( uplink->current.fd != -1 && numAltServers <= 1 ) return; int i; // if betterFd != -1 it means the uplink is supposed to switch to another // server. As this function here is called by the uplink thread, it can // never be that the uplink is supposed to switch, but instead calls // this function. - assert( uplink->betterFd == -1 ); + assert( uplink->better.fd == -1 ); // it is however possible that an RTT measurement is currently in progress, // so check for that case and do nothing if one is in progress // XXX As this function is only ever called by the image's uplink thread, @@ -457,9 +457,9 @@ static void *altservers_main(void *data UNUSED) if ( uplink == NULL ) continue; // First, get 4 alt servers - numAlts = altservers_getListForUplink( servers, ALTS, uplink->fd == -1 ); + numAlts = altservers_getListForUplink( servers, ALTS, uplink->current.fd == -1 ); // If we're already connected and only got one server anyways, there isn't much to do - if ( numAlts <= 1 && uplink->fd != -1 ) { + if ( numAlts <= 1 && uplink->current.fd != -1 ) { uplink->rttTestResult = RTT_DONTCHANGE; continue; } @@ -475,15 +475,15 @@ static void *altservers_main(void *data UNUSED) } LOG( LOG_DEBUG2, "[%d] Running alt check", itLink ); assert( uplink->rttTestResult == RTT_INPROGRESS ); - if ( uplink->fd != -1 ) { + if ( uplink->current.fd != -1 ) { // Add current server if not already in list found = false; for (itAlt = 0; itAlt < numAlts; ++itAlt) { - if ( !isSameAddressPort( &uplink->currentServer, &servers[itAlt] ) ) continue; + if ( !isSameAddressPort( &uplink->current.host, &servers[itAlt] ) ) continue; found = true; break; } - if ( !found ) servers[numAlts++] = uplink->currentServer; + if ( !found ) servers[numAlts++] = uplink->current.host; } // Test them all int bestSock = -1; @@ -537,7 +537,7 @@ static void *altservers_main(void *data UNUSED) // Measurement done - everything fine so far mutex_lock( &altServersLock ); mutex_lock( &uplink->rttLock ); - const bool isCurrent = isSameAddressPort( &servers[itAlt], &uplink->currentServer ); + const bool isCurrent = isSameAddressPort( &servers[itAlt], &uplink->current.host ); // Penaltize rtt if this was a cycle; this will treat this server with lower priority // in the near future too, so we prevent alternating between two servers that are both // part of a cycle and have the lowest latency. @@ -547,9 +547,9 @@ static void *altservers_main(void *data UNUSED) unsigned int avg = altservers_updateRtt( &servers[itAlt], rtt ); mutex_unlock( &altServersLock ); // If a cycle was detected, or we lost connection to the current (last) server, penaltize it one time - if ( ( uplink->cycleDetected || uplink->fd == -1 ) && isCurrent ) avg = (avg * 2) + 50000; + if ( ( uplink->cycleDetected || uplink->current.fd == -1 ) && isCurrent ) avg = (avg * 2) + 50000; mutex_unlock( &uplink->rttLock ); - if ( uplink->fd != -1 && isCurrent ) { + if ( uplink->current.fd != -1 && isCurrent ) { // Was measuring current server currentRtt = avg; close( sock ); @@ -574,18 +574,18 @@ static void *altservers_main(void *data UNUSED) close( sock ); } // Done testing all servers. See if we should switch - if ( bestSock != -1 && (uplink->fd == -1 || (bestRtt < 10000000 && RTT_THRESHOLD_FACTOR(currentRtt) > bestRtt)) ) { + if ( bestSock != -1 && (uplink->current.fd == -1 || (bestRtt < 10000000 && RTT_THRESHOLD_FACTOR(currentRtt) > bestRtt)) ) { // yep - if ( currentRtt > 10000000 || uplink->fd == -1 ) { + if ( currentRtt > 10000000 || uplink->current.fd == -1 ) { LOG( LOG_DEBUG1, "Change - best: %luµs, current: -", bestRtt ); } else { LOG( LOG_DEBUG1, "Change - best: %luµs, current: %luµs", bestRtt, currentRtt ); } sock_setTimeout( bestSock, _uplinkTimeout ); mutex_lock( &uplink->rttLock ); - uplink->betterFd = bestSock; - uplink->betterServer = servers[bestIndex]; - uplink->betterVersion = bestProtocolVersion; + uplink->better.fd = bestSock; + uplink->better.host = servers[bestIndex]; + uplink->better.version = bestProtocolVersion; uplink->rttTestResult = RTT_DOCHANGE; mutex_unlock( &uplink->rttLock ); signal_call( uplink->signal ); diff --git a/src/server/globals.h b/src/server/globals.h index 0371e33..659e5a2 100644 --- a/src/server/globals.h +++ b/src/server/globals.h @@ -31,9 +31,9 @@ typedef struct } dnbd3_queued_request_t; typedef struct { - int fd; - int version; - dnbd3_host_t host; + int fd; // Socket fd for this connection + int version; // Protocol version of remote server + dnbd3_host_t host; // IP/Port of remote server } dnbd3_server_connection_t; #define RTT_IDLE 0 // Not in progress @@ -43,20 +43,16 @@ typedef struct { #define RTT_NOT_REACHABLE 4 // No uplink was reachable struct _dnbd3_uplink { - int fd; // socket fd to remote server - int version; // remote server protocol version + dnbd3_server_connection_t current; // Currently active connection; fd == -1 means disconnected + dnbd3_server_connection_t better; // Better connection as found by altserver worker; fd == -1 means none dnbd3_signal_t* signal; // used to wake up the process pthread_t thread; // thread holding the connection pthread_mutex_t sendMutex; // For locking socket while sending pthread_mutex_t queueLock; // lock for synchronization on request queue etc. dnbd3_image_t *image; // image that this uplink is used for; do not call get/release for this pointer - dnbd3_host_t currentServer; // Current server we're connected to pthread_mutex_t rttLock; // When accessing rttTestResult, betterFd or betterServer int rttTestResult; // RTT_* int cacheFd; // used to write to the image, in case it is relayed. ONLY USE FROM UPLINK THREAD! - int betterVersion; // protocol version of better server - int betterFd; // Active connection to better server, ready to use - dnbd3_host_t betterServer; // The better server uint8_t *recvBuffer; // Buffer for receiving payload uint32_t recvBufferLen; // Len of ^^ atomic_bool shutdown; // signal this thread to stop, must only be set from uplink_shutdown() or cleanup in uplink_mainloop() diff --git a/src/server/image.c b/src/server/image.c index 4a65ed3..d250715 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -1508,7 +1508,7 @@ json_t* image_getListAsJson() uplinkName[0] = '\0'; } else { bytesReceived = image->uplink->bytesReceived; - if ( image->uplink->fd == -1 || !host_to_string( &image->uplink->currentServer, uplinkName, sizeof(uplinkName) ) ) { + if ( image->uplink->current.fd == -1 || !host_to_string( &image->uplink->current.host, uplinkName, sizeof(uplinkName) ) ) { uplinkName[0] = '\0'; } } diff --git a/src/server/integrity.c b/src/server/integrity.c index c52d17b..3d1ac9b 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -240,7 +240,7 @@ static void* integrity_main(void * data UNUSED) if ( !foundCorrupted ) { mutex_lock( &image->lock ); if ( image->uplink != NULL ) { // TODO: image_determineWorkingState() helper? - image->working = image->uplink->fd != -1 && image->readFd != -1; + image->working = image->uplink->current.fd != -1 && image->readFd != -1; } mutex_unlock( &image->lock ); } diff --git a/src/server/uplink.c b/src/server/uplink.c index 7d66b21..e21e28c 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -97,7 +97,7 @@ bool uplink_init(dnbd3_image_t *image, int sock, dnbd3_host_t *host, int version uplink->idleTime = 0; uplink->queueLen = 0; mutex_lock( &uplink->sendMutex ); - uplink->fd = -1; + uplink->current.fd = -1; mutex_unlock( &uplink->sendMutex ); uplink->cacheFd = -1; uplink->signal = NULL; @@ -105,12 +105,12 @@ bool uplink_init(dnbd3_image_t *image, int sock, dnbd3_host_t *host, int version mutex_lock( &uplink->rttLock ); uplink->cycleDetected = false; if ( sock >= 0 ) { - uplink->betterFd = sock; - uplink->betterServer = *host; + uplink->better.fd = sock; + uplink->better.host = *host; uplink->rttTestResult = RTT_DOCHANGE; - uplink->betterVersion = version; + uplink->better.version = version; } else { - uplink->betterFd = -1; + uplink->better.fd = -1; uplink->rttTestResult = RTT_IDLE; } mutex_unlock( &uplink->rttLock ); @@ -211,7 +211,7 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin } // Check if the client is the same host as the uplink. If so assume this is a circular proxy chain // This might be a false positive if there are multiple instances running on the same host (IP) - if ( hops != 0 && isSameAddress( &uplink->currentServer, &client->host ) ) { + if ( hops != 0 && isSameAddress( &uplink->current.host, &client->host ) ) { mutex_unlock( &client->image->lock ); logadd( LOG_WARNING, "Proxy cycle detected (same host)." ); mutex_lock( &uplink->rttLock ); @@ -315,14 +315,14 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin if ( mutex_trylock( &uplink->sendMutex ) != 0 ) { logadd( LOG_DEBUG2, "Could not trylock send mutex, queueing uplink request" ); } else { - if ( uplink->fd == -1 ) { + if ( uplink->current.fd == -1 ) { mutex_unlock( &uplink->sendMutex ); logadd( LOG_DEBUG2, "Cannot do direct uplink request: No socket open" ); } else { const uint64_t reqStart = uplink->queue[freeSlot].from & ~(uint64_t)(DNBD3_BLOCK_SIZE - 1); const uint32_t reqSize = (uint32_t)(((uplink->queue[freeSlot].to + DNBD3_BLOCK_SIZE - 1) & ~(uint64_t)(DNBD3_BLOCK_SIZE - 1)) - reqStart); if ( hops < 200 ) ++hops; - const bool ret = dnbd3_get_block( uplink->fd, reqStart, reqSize, reqStart, COND_HOPCOUNT( uplink->version, hops ) ); + const bool ret = dnbd3_get_block( uplink->current.fd, reqStart, reqSize, reqStart, COND_HOPCOUNT( uplink->current.version, hops ) ); mutex_unlock( &uplink->sendMutex ); if ( !ret ) { logadd( LOG_DEBUG2, "Could not send out direct uplink request, queueing" ); @@ -405,7 +405,7 @@ static void* uplink_mainloop(void *data) mutex_unlock( &uplink->rttLock ); if ( waitTime == 0 ) { // Nothing - } else if ( uplink->fd == -1 && !uplink_connectionShouldShutdown( uplink ) ) { + } else if ( uplink->current.fd == -1 && !uplink_connectionShouldShutdown( uplink ) ) { waitTime = 1000; } else { declare_now; @@ -413,7 +413,7 @@ static void* uplink_mainloop(void *data) if ( waitTime < 100 ) waitTime = 100; if ( waitTime > 5000 ) waitTime = 5000; } - events[EV_SOCKET].fd = uplink->fd; + events[EV_SOCKET].fd = uplink->current.fd; numSocks = poll( events, EV_COUNT, waitTime ); if ( _shutdown || uplink->shutdown ) goto cleanup; if ( numSocks == -1 ) { // Error? @@ -430,13 +430,11 @@ static void* uplink_mainloop(void *data) uplink->rttTestResult = RTT_IDLE; // The rttTest worker thread has finished our request. // And says it's better to switch to another server - const int fd = uplink->fd; + const int fd = uplink->current.fd; mutex_lock( &uplink->sendMutex ); - uplink->fd = uplink->betterFd; + uplink->current = uplink->better; mutex_unlock( &uplink->sendMutex ); - uplink->betterFd = -1; - uplink->currentServer = uplink->betterServer; - uplink->version = uplink->betterVersion; + uplink->better.fd = -1; uplink->cycleDetected = false; mutex_unlock( &uplink->rttLock ); discoverFailCount = 0; @@ -445,7 +443,7 @@ static void* uplink_mainloop(void *data) uplink->image->working = true; uplink->replicatedLastBlock = false; // Reset this to be safe - request could've been sent but reply was never received buffer[0] = '@'; - if ( host_to_string( &uplink->currentServer, buffer + 1, sizeof(buffer) - 1 ) ) { + if ( host_to_string( &uplink->current.host, buffer + 1, sizeof(buffer) - 1 ) ) { logadd( LOG_DEBUG1, "(Uplink %s) Now connected to %s\n", uplink->image->name, buffer + 1 ); setThreadName( buffer ); } @@ -471,7 +469,7 @@ static void* uplink_mainloop(void *data) if ( signal_clear( uplink->signal ) == SIGNAL_ERROR ) { logadd( LOG_WARNING, "Errno on signal on uplink for %s! Things will break!", uplink->image->name ); } - if ( uplink->fd != -1 ) { + if ( uplink->current.fd != -1 ) { // Uplink seems fine, relay requests to it... uplink_sendRequests( uplink, true ); } else { // No uplink; maybe it was shutdown since it was idle for too long @@ -499,9 +497,9 @@ static void* uplink_mainloop(void *data) uplink_saveCacheMap( uplink ); } // Keep-alive - if ( uplink->fd != -1 && uplink->replicationHandle == REP_NONE ) { + if ( uplink->current.fd != -1 && uplink->replicationHandle == REP_NONE ) { // Send keep-alive if nothing is happening - if ( uplink_sendKeepalive( uplink->fd ) ) { + if ( uplink_sendKeepalive( uplink->current.fd ) ) { // Re-trigger periodically, in case it requires a minimum user count uplink_sendReplicationRequest( uplink ); } else { @@ -511,10 +509,10 @@ static void* uplink_mainloop(void *data) } } // Don't keep uplink established if we're idle for too much - if ( uplink->fd != -1 && uplink_connectionShouldShutdown( uplink ) ) { + if ( uplink->current.fd != -1 && uplink_connectionShouldShutdown( uplink ) ) { mutex_lock( &uplink->sendMutex ); - close( uplink->fd ); - uplink->fd = events[EV_SOCKET].fd = -1; + close( uplink->current.fd ); + uplink->current.fd = events[EV_SOCKET].fd = -1; mutex_unlock( &uplink->sendMutex ); uplink->cycleDetected = false; if ( uplink->recvBufferLen != 0 ) { @@ -531,7 +529,7 @@ static void* uplink_mainloop(void *data) const int rttTestResult = uplink->rttTestResult; mutex_unlock( &uplink->rttLock ); if ( rttTestResult == RTT_IDLE || rttTestResult == RTT_DONTCHANGE ) { - if ( timing_reached( &nextAltCheck, &now ) || ( uplink->fd == -1 && !uplink_connectionShouldShutdown( uplink ) ) || uplink->cycleDetected ) { + if ( timing_reached( &nextAltCheck, &now ) || ( uplink->current.fd == -1 && !uplink_connectionShouldShutdown( uplink ) ) || uplink->cycleDetected ) { // It seems it's time for a check if ( image_isComplete( uplink->image ) ) { // Quit work if image is complete @@ -556,7 +554,7 @@ static void* uplink_mainloop(void *data) timing_set( &nextAltCheck, &now, (discoverFailCount < SERVER_RTT_BACKOFF_COUNT ? altCheckInterval : SERVER_RTT_INTERVAL_FAILED) ); } #ifdef _DEBUG - if ( uplink->fd != -1 && !uplink->shutdown ) { + if ( uplink->current.fd != -1 && !uplink->shutdown ) { bool resend = false; ticks deadline; timing_set( &deadline, &now, -10 ); @@ -594,10 +592,10 @@ static void* uplink_mainloop(void *data) uplink->image->uplink = NULL; } mutex_lock( &uplink->queueLock ); - const int fd = uplink->fd; + const int fd = uplink->current.fd; const dnbd3_signal_t* signal = uplink->signal; mutex_lock( &uplink->sendMutex ); - uplink->fd = -1; + uplink->current.fd = -1; mutex_unlock( &uplink->sendMutex ); uplink->signal = NULL; // Do not access uplink->image after unlocking, since we set @@ -610,8 +608,8 @@ static void* uplink_mainloop(void *data) // Wait for the RTT check to finish/fail if it's in progress while ( uplink->rttTestResult == RTT_INPROGRESS ) usleep( 10000 ); - if ( uplink->betterFd != -1 ) { - close( uplink->betterFd ); + if ( uplink->better.fd != -1 ) { + close( uplink->better.fd ); } mutex_destroy( &uplink->queueLock ); mutex_destroy( &uplink->rttLock ); @@ -651,14 +649,14 @@ static void uplink_sendRequests(dnbd3_uplink_t *uplink, bool newOnly) mutex_unlock( &uplink->queueLock ); if ( hops < 200 ) ++hops; mutex_lock( &uplink->sendMutex ); - const bool ret = dnbd3_get_block( uplink->fd, reqStart, reqSize, reqStart, COND_HOPCOUNT( uplink->version, hops ) ); + const bool ret = dnbd3_get_block( uplink->current.fd, reqStart, reqSize, reqStart, COND_HOPCOUNT( uplink->current.version, hops ) ); mutex_unlock( &uplink->sendMutex ); if ( !ret ) { // Non-critical - if the connection dropped or the server was changed // the thread will re-send this request as soon as the connection // is reestablished. logadd( LOG_DEBUG1, "Error forwarding request to uplink server!\n" ); - altservers_serverFailed( &uplink->currentServer ); + altservers_serverFailed( &uplink->current.host ); return; } mutex_lock( &uplink->queueLock ); @@ -678,7 +676,7 @@ static void uplink_sendRequests(dnbd3_uplink_t *uplink, bool newOnly) */ static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink) { - if ( uplink == NULL || uplink->fd == -1 ) return; + if ( uplink == NULL || uplink->current.fd == -1 ) return; if ( _backgroundReplication == BGR_DISABLED || uplink->cacheFd == -1 ) return; // Don't do background replication if ( uplink->nextReplicationIndex == -1 || uplink->replicationHandle != REP_NONE ) return; @@ -724,7 +722,7 @@ static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink) uplink->replicationHandle = offset; const uint32_t size = (uint32_t)MIN( image->virtualFilesize - offset, FILE_BYTES_PER_MAP_BYTE ); mutex_lock( &uplink->sendMutex ); - bool sendOk = dnbd3_get_block( uplink->fd, offset, size, uplink->replicationHandle, COND_HOPCOUNT( uplink->version, 1 ) ); + bool sendOk = dnbd3_get_block( uplink->current.fd, offset, size, uplink->replicationHandle, COND_HOPCOUNT( uplink->current.version, 1 ) ); mutex_unlock( &uplink->sendMutex ); if ( !sendOk ) { logadd( LOG_DEBUG1, "Error sending background replication request to uplink server!\n" ); @@ -798,7 +796,7 @@ static void uplink_handleReceive(dnbd3_uplink_t *uplink) dnbd3_reply_t inReply, outReply; int ret, i; for (;;) { - ret = dnbd3_read_reply( uplink->fd, &inReply, false ); + ret = dnbd3_read_reply( uplink->current.fd, &inReply, false ); if ( unlikely( ret == REPLY_INTR ) && likely( !_shutdown && !uplink->shutdown ) ) continue; if ( ret == REPLY_AGAIN ) break; if ( unlikely( ret == REPLY_CLOSED ) ) { @@ -826,7 +824,7 @@ static void uplink_handleReceive(dnbd3_uplink_t *uplink) exit( 1 ); } } - if ( unlikely( (uint32_t)sock_recv( uplink->fd, uplink->recvBuffer, inReply.size ) != inReply.size ) ) { + if ( unlikely( (uint32_t)sock_recv( uplink->current.fd, uplink->recvBuffer, inReply.size ) != inReply.size ) ) { logadd( LOG_INFO, "Lost connection to uplink server of %s (payload)", uplink->image->path ); goto error_cleanup; } @@ -973,12 +971,12 @@ static void uplink_handleReceive(dnbd3_uplink_t *uplink) static void uplink_connectionFailed(dnbd3_uplink_t *uplink, bool findNew) { - if ( uplink->fd == -1 ) + if ( uplink->current.fd == -1 ) return; - altservers_serverFailed( &uplink->currentServer ); + altservers_serverFailed( &uplink->current.host ); mutex_lock( &uplink->sendMutex ); - close( uplink->fd ); - uplink->fd = -1; + close( uplink->current.fd ); + uplink->current.fd = -1; mutex_unlock( &uplink->sendMutex ); uplink->replicationHandle = REP_NONE; if ( _backgroundReplication == BGR_FULL && uplink->nextReplicationIndex == -1 ) { @@ -987,7 +985,7 @@ static void uplink_connectionFailed(dnbd3_uplink_t *uplink, bool findNew) if ( !findNew ) return; mutex_lock( &uplink->rttLock ); - bool bail = uplink->rttTestResult == RTT_INPROGRESS || uplink->betterFd != -1; + bool bail = uplink->rttTestResult == RTT_INPROGRESS || uplink->better.fd != -1; mutex_unlock( &uplink->rttLock ); if ( bail ) return; @@ -1016,7 +1014,7 @@ static void uplink_addCrc32(dnbd3_uplink_t *uplink) uint32_t masterCrc; uint32_t *buffer = malloc( bytes ); mutex_lock( &uplink->sendMutex ); - bool sendOk = dnbd3_get_crc32( uplink->fd, &masterCrc, buffer, &bytes ); + bool sendOk = dnbd3_get_crc32( uplink->current.fd, &masterCrc, buffer, &bytes ); mutex_unlock( &uplink->sendMutex ); if ( !sendOk || bytes == 0 ) { free( buffer ); -- cgit v1.2.3-55-g7522 From 69f5bf408b9587a6e2008fba2224c2d506f1a895 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Tue, 27 Aug 2019 16:13:07 +0200 Subject: [SERVER] Use reference counting for uplink First step towards less locking for proxy mode --- src/server/altservers.c | 13 ++- src/server/globals.h | 4 +- src/server/image.c | 39 ++++----- src/server/integrity.c | 17 ++-- src/server/net.c | 48 +++++++---- src/server/net.h | 2 + src/server/reference.c | 33 ++++++++ src/server/reference.h | 54 ++++++++++++ src/server/reftypes.h | 25 ++++++ src/server/uplink.c | 214 ++++++++++++++++++++++++++++-------------------- src/server/uplink.h | 2 +- 11 files changed, 311 insertions(+), 140 deletions(-) create mode 100644 src/server/reference.c create mode 100644 src/server/reference.h create mode 100644 src/server/reftypes.h (limited to 'src/server/integrity.c') diff --git a/src/server/altservers.c b/src/server/altservers.c index 493ed9e..7d7fdbe 100644 --- a/src/server/altservers.c +++ b/src/server/altservers.c @@ -7,6 +7,8 @@ #include "../shared/protocol.h" #include "../shared/timing.h" #include "../serverconfig.h" +#include "reference.h" + #include #include #include @@ -104,7 +106,6 @@ void altservers_findUplinkAsync(dnbd3_uplink_t *uplink) return; if ( uplink->current.fd != -1 && numAltServers <= 1 ) return; - int i; // if betterFd != -1 it means the uplink is supposed to switch to another // server. As this function here is called by the uplink thread, it can // never be that the uplink is supposed to switch, but instead calls @@ -112,11 +113,14 @@ void altservers_findUplinkAsync(dnbd3_uplink_t *uplink) assert( uplink->better.fd == -1 ); // it is however possible that an RTT measurement is currently in progress, // so check for that case and do nothing if one is in progress - mutex_lock( &uplink->rttLock ); if ( uplink->rttTestResult != RTT_INPROGRESS ) { - threadpool_run( &altservers_runCheck, uplink ); + dnbd3_uplink_t *current = ref_get_uplink( &uplink->image->uplinkref ); + if ( current == uplink ) { + threadpool_run( &altservers_runCheck, uplink ); + } else if ( current != NULL ) { + ref_put( ¤t->reference ); + } } - mutex_unlock( &uplink->rttLock ); } /** @@ -375,6 +379,7 @@ static void *altservers_runCheck(void *data) assert( uplink != NULL ); setThreadName( "altserver-check" ); altservers_findUplinkInternal( uplink ); + ref_put( &uplink->reference ); // Acquired in findUplinkAsync // Save cache maps of all images if applicable // TODO: Has nothing to do with alt servers really, maybe move somewhere else? declare_now; diff --git a/src/server/globals.h b/src/server/globals.h index 4d97c6b..5dd205a 100644 --- a/src/server/globals.h +++ b/src/server/globals.h @@ -8,6 +8,7 @@ #include #include #include +#include "reftypes.h" typedef struct timespec ticks; @@ -64,6 +65,7 @@ typedef struct { #define RTT_NOT_REACHABLE 4 // No uplink was reachable struct _dnbd3_uplink { + ref reference; dnbd3_server_connection_t current; // Currently active connection; fd == -1 means disconnected dnbd3_server_connection_t better; // Better connection as found by altserver worker; fd == -1 means none dnbd3_signal_t* signal; // used to wake up the process @@ -107,7 +109,7 @@ struct _dnbd3_image { char *path; // absolute path of the image char *name; // public name of the image (usually relative path minus revision ID) - dnbd3_uplink_t *uplink; // pointer to a server connection + weakref uplinkref; // pointer to a server connection uint8_t *cache_map; // cache map telling which parts are locally cached, NULL if complete uint64_t virtualFilesize; // virtual size of image (real size rounded up to multiple of 4k) uint64_t realFilesize; // actual file size on disk diff --git a/src/server/image.c b/src/server/image.c index 1a6e0f8..5b58347 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -8,6 +8,7 @@ #include "../shared/protocol.h" #include "../shared/timing.h" #include "../shared/crc32.h" +#include "reference.h" #include #include @@ -375,9 +376,7 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking) // Check if image is incomplete, handle if ( candidate->cache_map != NULL ) { - if ( candidate->uplink == NULL ) { - uplink_init( candidate, -1, NULL, -1 ); - } + uplink_init( candidate, -1, NULL, -1 ); } return candidate; // We did all we can, hopefully it's working @@ -484,17 +483,7 @@ void image_killUplinks() mutex_lock( &imageListLock ); for (i = 0; i < _num_images; ++i) { if ( _images[i] == NULL ) continue; - mutex_lock( &_images[i]->lock ); - if ( _images[i]->uplink != NULL ) { - mutex_lock( &_images[i]->uplink->queueLock ); - if ( !_images[i]->uplink->shutdown ) { - thread_detach( _images[i]->uplink->thread ); - _images[i]->uplink->shutdown = true; - } - mutex_unlock( &_images[i]->uplink->queueLock ); - signal_call( _images[i]->uplink->signal ); - } - mutex_unlock( &_images[i]->lock ); + uplink_shutdown( _images[i] ); } mutex_unlock( &imageListLock ); } @@ -588,11 +577,15 @@ bool image_tryFreeAll() static dnbd3_image_t* image_free(dnbd3_image_t *image) { assert( image != NULL ); + assert( image->users == 0 ); if ( !_shutdown ) { logadd( LOG_INFO, "Freeing image %s:%d", image->name, (int)image->rid ); } - // - uplink_shutdown( image ); + // uplink_shutdown might return false to tell us + // that the shutdown is in progress. Bail out since + // this will get called again when the uplink is done. + if ( !uplink_shutdown( image ) ) + return NULL; mutex_lock( &image->lock ); free( image->cache_map ); free( image->crc32 ); @@ -860,7 +853,7 @@ static bool image_load(char *base, char *path, int withUplink) image->cache_map = cache_map; image->crc32 = crc32list; image->masterCrc32 = masterCrc; - image->uplink = NULL; + image->uplinkref = NULL; image->realFilesize = realFilesize; image->virtualFilesize = virtualFilesize; image->rid = (uint16_t)revision; @@ -1503,16 +1496,18 @@ json_t* image_getListAsJson() mutex_lock( &image->lock ); idleTime = (int)timing_diff( &image->atime, &now ); completeness = image_getCompletenessEstimate( image ); - if ( image->uplink == NULL ) { + mutex_unlock( &image->lock ); + dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref ); + if ( uplink == NULL ) { bytesReceived = 0; uplinkName[0] = '\0'; } else { - bytesReceived = image->uplink->bytesReceived; - if ( !uplink_getHostString( image->uplink, uplinkName, sizeof(uplinkName) ) ) { + bytesReceived = uplink->bytesReceived; + if ( !uplink_getHostString( uplink, uplinkName, sizeof(uplinkName) ) ) { uplinkName[0] = '\0'; } + ref_put( &uplink->reference ); } - mutex_unlock( &image->lock ); jsonImage = json_pack( "{sisssisisisisI}", "id", image->id, // id, name, rid never change, so access them without locking @@ -1734,7 +1729,7 @@ void image_closeUnusedFd() if ( image == NULL ) continue; mutex_lock( &image->lock ); - if ( image->users == 0 && image->uplink == NULL && timing_reached( &image->atime, &deadline ) ) { + if ( image->users == 0 && image->uplinkref == NULL && timing_reached( &image->atime, &deadline ) ) { snprintf( imgstr, sizeof(imgstr), "%s:%d", image->name, (int)image->rid ); fd = image->readFd; image->readFd = -1; diff --git a/src/server/integrity.c b/src/server/integrity.c index 3d1ac9b..f358c46 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -4,6 +4,7 @@ #include "locks.h" #include "image.h" #include "uplink.h" +#include "reference.h" #include #include @@ -238,11 +239,13 @@ static void* integrity_main(void * data UNUSED) if ( i + 1 == queueLen ) queueLen--; // Mark as working again if applicable if ( !foundCorrupted ) { - mutex_lock( &image->lock ); - if ( image->uplink != NULL ) { // TODO: image_determineWorkingState() helper? - image->working = image->uplink->current.fd != -1 && image->readFd != -1; + dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref ); + if ( uplink != NULL ) { // TODO: image_determineWorkingState() helper? + mutex_lock( &image->lock ); + image->working = uplink->current.fd != -1 && image->readFd != -1; + mutex_unlock( &image->lock ); + ref_put( &uplink->reference ); } - mutex_unlock( &image->lock ); } } else { // Still more blocks to go... @@ -255,12 +258,8 @@ static void* integrity_main(void * data UNUSED) // Something was fishy, make sure uplink exists mutex_lock( &image->lock ); image->working = false; - bool restart = image->uplink == NULL || image->uplink->shutdown; mutex_unlock( &image->lock ); - if ( restart ) { - uplink_shutdown( image ); - uplink_init( image, -1, NULL, -1 ); - } + uplink_init( image, -1, NULL, -1 ); } // Release :-) image_release( image ); diff --git a/src/server/net.c b/src/server/net.c index 4976eea..e0b516e 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -24,6 +24,7 @@ #include "locks.h" #include "rpc.h" #include "altservers.h" +#include "reference.h" #include "../shared/sockhelper.h" #include "../shared/timing.h" @@ -229,7 +230,7 @@ void* net_handleNewConnection(void *clientPtr) rid = serializer_get_uint16( &payload ); const uint8_t flags = serializer_get_uint8( &payload ); client->isServer = ( flags & FLAGS8_SERVER ); - if ( request.size < 3 || !image_name || client_version < MIN_SUPPORTED_CLIENT ) { + if ( unlikely( request.size < 3 || !image_name || client_version < MIN_SUPPORTED_CLIENT ) ) { if ( client_version < MIN_SUPPORTED_CLIENT ) { logadd( LOG_DEBUG1, "Client %s too old", client->hostName ); } else { @@ -257,22 +258,25 @@ void* net_handleNewConnection(void *clientPtr) } client->image = image; atomic_thread_fence( memory_order_release ); - if ( image == NULL ) { + if ( unlikely( image == NULL ) ) { //logadd( LOG_DEBUG1, "Client requested non-existent image '%s' (rid:%d), rejected\n", image_name, (int)rid ); - } else if ( !image->working ) { + } else if ( unlikely( !image->working ) ) { logadd( LOG_DEBUG1, "Client %s requested non-working image '%s' (rid:%d), rejected\n", client->hostName, image_name, (int)rid ); } else { - bool penalty; // Image is fine so far, but occasionally drop a client if the uplink for the image is clogged or unavailable bOk = true; if ( image->cache_map != NULL ) { - mutex_lock( &image->lock ); - if ( image->uplink == NULL || image->uplink->cacheFd == -1 || image->uplink->queueLen > SERVER_UPLINK_QUEUELEN_THRES ) { + dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref ); + if ( uplink == NULL || uplink->cacheFd == -1 || uplink->queueLen > SERVER_UPLINK_QUEUELEN_THRES ) { bOk = ( rand() % 4 ) == 1; } - penalty = bOk && image->uplink != NULL && image->uplink->cacheFd == -1; - mutex_unlock( &image->lock ); + bool penalty = bOk && ( uplink == NULL || uplink->cacheFd == -1 ); + if ( uplink == NULL ) { + uplink_init( image, -1, NULL, 0 ); + } else { + ref_put( &uplink->reference ); + } if ( penalty ) { // Wait 100ms if local caching is not working so this usleep( 100000 ); // server gets a penalty and is less likely to be selected } @@ -300,7 +304,7 @@ void* net_handleNewConnection(void *clientPtr) } } - if ( bOk ) { + if ( likely( bOk ) ) { // add artificial delay if applicable if ( client->isServer && _serverPenalty != 0 ) { usleep( _serverPenalty ); @@ -315,7 +319,7 @@ void* net_handleNewConnection(void *clientPtr) case CMD_GET_BLOCK:; const uint64_t offset = request.offset_small; // Copy to full uint64 to prevent repeated masking reply.handle = request.handle; - if ( offset >= image->virtualFilesize ) { + if ( unlikely( offset >= image->virtualFilesize ) ) { // Sanity check logadd( LOG_WARNING, "Client %s requested non-existent block", client->hostName ); reply.size = 0; @@ -323,7 +327,7 @@ void* net_handleNewConnection(void *clientPtr) send_reply( client->sock, &reply, NULL ); break; } - if ( offset + request.size > image->virtualFilesize ) { + if ( unlikely( offset + request.size > image->virtualFilesize ) ) { // Sanity check logadd( LOG_WARNING, "Client %s requested data block that extends beyond image size", client->hostName ); reply.size = 0; @@ -398,7 +402,7 @@ void* net_handleNewConnection(void *clientPtr) reply.size = request.size; fixup_reply( reply ); - const bool lock = image->uplink != NULL; + const bool lock = image->uplinkref != NULL; if ( lock ) mutex_lock( &client->sendMutex ); // Send reply header if ( send( client->sock, &reply, sizeof(dnbd3_reply_t), (request.size == 0 ? 0 : MSG_MORE) ) != sizeof(dnbd3_reply_t) ) { @@ -696,9 +700,11 @@ static dnbd3_client_t* freeClientStruct(dnbd3_client_t *client) { mutex_lock( &client->lock ); if ( client->image != NULL ) { - mutex_lock( &client->image->lock ); - if ( client->image->uplink != NULL ) uplink_removeClient( client->image->uplink, client ); - mutex_unlock( &client->image->lock ); + dnbd3_uplink_t *uplink = ref_get_uplink( &client->image->uplinkref ); + if ( uplink != NULL ) { + uplink_removeClient( uplink, client ); + ref_put( &uplink->reference ); + } } mutex_lock( &client->sendMutex ); if ( client->sock != -1 ) { @@ -740,3 +746,15 @@ static bool addToList(dnbd3_client_t *client) return true; } +void net_sendReply(dnbd3_client_t *client, uint16_t cmd, uint64_t handle) +{ + dnbd3_reply_t reply; + reply.magic = dnbd3_packet_magic; + reply.cmd = cmd; + reply.handle = handle; + reply.size = 0; + mutex_lock( &client->sendMutex ); + send_reply( client->sock, &reply, NULL ); + mutex_unlock( &client->sendMutex ); +} + diff --git a/src/server/net.h b/src/server/net.h index 6813b49..7719aef 100644 --- a/src/server/net.h +++ b/src/server/net.h @@ -37,4 +37,6 @@ void net_disconnectAll(); void net_waitForAllDisconnected(); +void net_sendReply(dnbd3_client_t *client, uint16_t cmd, uint64_t handle); + #endif /* NET_H_ */ diff --git a/src/server/reference.c b/src/server/reference.c new file mode 100644 index 0000000..468e00b --- /dev/null +++ b/src/server/reference.c @@ -0,0 +1,33 @@ +#ifndef unlikely +#define unlikely(x) (x) +#endif +#include "reference.h" +#include +#include + +void ref_init( ref *reference, void ( *freefun )( ref * ), long count ) +{ + reference->count = count; + reference->free = freefun; +} + +_Noreturn void _ref_error( const char *message ) +{ + fprintf( stderr, "Reference counter overflow\n" ); + abort(); +} + +void ref_setref( weakref *weakref, ref *ref ) +{ + union _aligned_ref_ *new_weakref = 0; + if ( ref ) { + ( new_weakref = aligned_ref( ref->_aligned_ref ) )->ref = ref; + ref->count += sizeof( union _aligned_ref_ ) + 1; + } + char *old_weakref = (char *)atomic_exchange( weakref, new_weakref ); + if ( !old_weakref ) + return; + struct _ref_ *old_ref = aligned_ref( old_weakref )->ref; + old_ref->count += old_weakref - (char *)aligned_ref( old_weakref ) - sizeof( union _aligned_ref_ ); + ref_put( old_ref ); +} diff --git a/src/server/reference.h b/src/server/reference.h new file mode 100644 index 0000000..0bc081a --- /dev/null +++ b/src/server/reference.h @@ -0,0 +1,54 @@ +#ifndef _REFERENCE_H_ +#define _REFERENCE_H_ + +#include "reftypes.h" +#include +#include + +#define container_of(ptr, type, member) \ + ((type *)((char *)(ptr) - (char *)&(((type *)NULL)->member))) + +void ref_init( ref *reference, void ( *freefun )( ref * ), long count ); + +void ref_setref( weakref *weakref, ref *ref ); + +_Noreturn void _ref_error( const char *message ); + +static inline ref *ref_get( weakref *weakref ) +{ + char *old_weakref = (char *)*weakref; + do { + if ( old_weakref == NULL ) + return NULL; + if ( aligned_ref( old_weakref ) != aligned_ref( old_weakref + 1 ) ) { + old_weakref = (char *)*weakref; + continue; + } + } while ( !atomic_compare_exchange_weak( weakref, (void **)&old_weakref, old_weakref + 1 ) ); + struct _ref_ *ref = aligned_ref( old_weakref )->ref; + if ( unlikely( ++ref->count == -1 ) ) { + _ref_error( "Reference counter overflow. Aborting.\n" ); + } + char *cur_weakref = ( char * )*weakref; + do { + if ( aligned_ref( cur_weakref ) != aligned_ref( old_weakref ) ) { + ref->count--; + break; + } + } while ( !atomic_compare_exchange_weak( weakref, (void **)&cur_weakref, cur_weakref - 1 ) ); + return ref; +} + +static inline void ref_put( ref *ref ) +{ + if ( --ref->count == 0 ) { + ref->free( ref ); + } +} + +#define ref_get_uplink(wr) ({ \ + ref* ref = ref_get( wr ); \ + ref == NULL ? NULL : container_of(ref, dnbd3_uplink_t, reference); \ +}) + +#endif diff --git a/src/server/reftypes.h b/src/server/reftypes.h new file mode 100644 index 0000000..45c0c20 --- /dev/null +++ b/src/server/reftypes.h @@ -0,0 +1,25 @@ +#ifndef _REFTYPES_H_ +#define _REFTYPES_H_ + +#include + +_Static_assert( sizeof( void * ) == sizeof( _Atomic( void * ) ), "Atomic pointer bad" ); + +typedef _Atomic( void * ) weakref; + +#define aligned_ref(ptr) \ + ((union _aligned_ref_ *)((ptr) - (uintptr_t)(ptr) % sizeof(union _aligned_ref_))) + +union _aligned_ref_ { + struct _ref_ *ref; + void *_padding[( 32 - 1 ) / sizeof( void * ) + 1]; +}; + +typedef struct _ref_ { + _Atomic long count; + void ( *free )( struct _ref_ * ); + char _padding[sizeof( union _aligned_ref_ )]; + char _aligned_ref[sizeof( union _aligned_ref_ )]; +} ref; + +#endif diff --git a/src/server/uplink.c b/src/server/uplink.c index abfebf0..7a39887 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -3,10 +3,12 @@ #include "locks.h" #include "image.h" #include "altservers.h" +#include "net.h" #include "../shared/sockhelper.h" #include "../shared/protocol.h" #include "../shared/timing.h" #include "../shared/crc32.h" +#include "reference.h" #include #include @@ -45,6 +47,8 @@ static const char *const NAMES_ULR[4] = { static atomic_uint_fast64_t totalBytesReceived = 0; +static void cancelAllRequests(dnbd3_uplink_t *uplink); +static void uplink_free(ref *ref); static void* uplink_mainloop(void *data); static void uplink_sendRequests(dnbd3_uplink_t *uplink, bool newOnly); static int uplink_findNextIncompleteHashBlock(dnbd3_uplink_t *uplink, const int lastBlockIndex); @@ -76,19 +80,24 @@ uint64_t uplink_getTotalBytesReceived() bool uplink_init(dnbd3_image_t *image, int sock, dnbd3_host_t *host, int version) { if ( !_isProxy || _shutdown ) return false; - dnbd3_uplink_t *uplink = NULL; assert( image != NULL ); mutex_lock( &image->lock ); - if ( image->uplink != NULL && !image->uplink->shutdown ) { + dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref ); + if ( uplink != NULL ) { mutex_unlock( &image->lock ); - if ( sock >= 0 ) close( sock ); + if ( sock != -1 ) { + close( sock ); + } + ref_put( &uplink->reference ); return true; // There's already an uplink, so should we consider this success or failure? } if ( image->cache_map == NULL ) { logadd( LOG_WARNING, "Uplink was requested for image %s, but it is already complete", image->name ); goto failure; } - uplink = image->uplink = calloc( 1, sizeof(dnbd3_uplink_t) ); + uplink = calloc( 1, sizeof(dnbd3_uplink_t) ); + // Start with one reference for the uplink thread. We'll return it when the thread finishes + ref_init( &uplink->reference, uplink_free, 1 ); mutex_init( &uplink->queueLock, LOCK_UPLINK_QUEUE ); mutex_init( &uplink->rttLock, LOCK_UPLINK_RTT ); mutex_init( &uplink->sendMutex, LOCK_UPLINK_SEND ); @@ -121,12 +130,13 @@ bool uplink_init(dnbd3_image_t *image, int sock, dnbd3_host_t *host, int version logadd( LOG_ERROR, "Could not start thread for new uplink." ); goto failure; } + ref_setref( &image->uplinkref, &uplink->reference ); mutex_unlock( &image->lock ); return true; failure: ; if ( uplink != NULL ) { free( uplink ); - uplink = image->uplink = NULL; + uplink = NULL; } mutex_unlock( &image->lock ); return false; @@ -137,34 +147,83 @@ failure: ; * Calling it multiple times, even concurrently, will * not break anything. */ -void uplink_shutdown(dnbd3_image_t *image) +bool uplink_shutdown(dnbd3_image_t *image) { - bool join = false; - pthread_t thread; assert( image != NULL ); mutex_lock( &image->lock ); - if ( image->uplink == NULL ) { + dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref ); + if ( uplink == NULL ) { mutex_unlock( &image->lock ); - return; + return true; } - dnbd3_uplink_t * const uplink = image->uplink; mutex_lock( &uplink->queueLock ); bool exp = false; if ( atomic_compare_exchange_strong( &uplink->shutdown, &exp, true ) ) { + image->users++; // Prevent free while uplink shuts down signal_call( uplink->signal ); - thread = uplink->thread; - join = true; + } else { + logadd( LOG_ERROR, "This will never happen. '%s:%d'", image->name, (int)image->rid ); } + cancelAllRequests( uplink ); + ref_setref( &image->uplinkref, NULL ); + ref_put( &uplink->reference ); mutex_unlock( &uplink->queueLock ); - bool wait = image->uplink != NULL; + bool retval = ( exp && image->users == 0 ); mutex_unlock( &image->lock ); - if ( join ) thread_join( thread, NULL ); - while ( wait ) { - usleep( 5000 ); - mutex_lock( &image->lock ); - wait = image->uplink != NULL && image->uplink->shutdown; - mutex_unlock( &image->lock ); + return exp; +} + +/** + * Cancel all requests of this uplink. + * HOLD QUEUE LOCK WHILE CALLING + */ +static void cancelAllRequests(dnbd3_uplink_t *uplink) +{ + for ( int i = 0; i < uplink->queueLen; ++i ) { + if ( uplink->queue[i].status != ULR_FREE ) { + net_sendReply( uplink->queue[i].client, CMD_ERROR, uplink->queue[i].handle ); + uplink->queue[i].status = ULR_FREE; + } + } + uplink->queueLen = 0; +} + +static void uplink_free(ref *ref) +{ + dnbd3_uplink_t *uplink = container_of(ref, dnbd3_uplink_t, reference); + logadd( LOG_DEBUG1, "Freeing uplink for '%s:%d'", uplink->image->name, (int)uplink->image->rid ); + assert( uplink->queueLen == 0 ); + signal_close( uplink->signal ); + if ( uplink->current.fd != -1 ) { + close( uplink->current.fd ); + uplink->current.fd = -1; + } + if ( uplink->better.fd != -1 ) { + close( uplink->better.fd ); + uplink->better.fd = -1; + } + mutex_destroy( &uplink->queueLock ); + mutex_destroy( &uplink->rttLock ); + mutex_destroy( &uplink->sendMutex ); + free( uplink->recvBuffer ); + uplink->recvBuffer = NULL; + if ( uplink->cacheFd != -1 ) { + close( uplink->cacheFd ); } + // TODO Requeue any requests + dnbd3_image_t *image = image_lock( uplink->image ); + if ( image != NULL ) { + // != NULL means image is still in list... + if ( !_shutdown && image->cache_map != NULL ) { + // Ingegrity checker must have found something in the meantime + uplink_init( image, -1, NULL, 0 ); + } + image_release( image ); + } + // Finally let go of image. It was acquired either in uplink_shutdown or in the cleanup code + // of the uplink thread, depending on who set the uplink->shutdown flag. + image_release( image ); + free( uplink ); // !!! } /** @@ -193,31 +252,28 @@ void uplink_removeClient(dnbd3_uplink_t *uplink, dnbd3_client_t *client) */ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uint32_t length, uint8_t hops) { - if ( client == NULL || client->image == NULL ) return false; + if ( client == NULL || client->image == NULL ) + return false; if ( length > (uint32_t)_maxPayload ) { logadd( LOG_WARNING, "Cannot relay request by client; length of %" PRIu32 " exceeds maximum payload", length ); return false; } - mutex_lock( &client->image->lock ); - if ( client->image->uplink == NULL ) { - mutex_unlock( &client->image->lock ); + dnbd3_uplink_t * const uplink = ref_get_uplink( &client->image->uplinkref ); + if ( uplink == NULL ) { logadd( LOG_DEBUG1, "Uplink request for image with no uplink" ); return false; } - dnbd3_uplink_t * const uplink = client->image->uplink; if ( uplink->shutdown ) { - mutex_unlock( &client->image->lock ); logadd( LOG_DEBUG1, "Uplink request for image with uplink shutting down" ); - return false; + goto fail_ref; } // Check if the client is the same host as the uplink. If so assume this is a circular proxy chain // This might be a false positive if there are multiple instances running on the same host (IP) if ( hops != 0 && isSameAddress( altservers_indexToHost( uplink->current.index ), &client->host ) ) { uplink->cycleDetected = true; signal_call( uplink->signal ); - mutex_unlock( &client->image->lock ); logadd( LOG_WARNING, "Proxy cycle detected (same host)." ); - return false; + goto fail_ref; } int foundExisting = -1; // Index of a pending request that is a superset of our range, -1 otherwise @@ -229,7 +285,9 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin const uint64_t end = start + length; mutex_lock( &uplink->queueLock ); - mutex_unlock( &client->image->lock ); + if ( uplink->shutdown ) { // Check again after locking to prevent lost requests + goto fail_lock; + } for (i = 0; i < uplink->queueLen; ++i) { // find free slot to place this request into if ( uplink->queue[i].status == ULR_FREE ) { @@ -257,18 +315,16 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin if ( unlikely( requestLoop ) ) { uplink->cycleDetected = true; signal_call( uplink->signal ); - mutex_unlock( &uplink->queueLock ); logadd( LOG_WARNING, "Rejecting relay of request to upstream proxy because of possible cyclic proxy chain. Incoming hop-count is %" PRIu8 ".", hops ); - return false; + goto fail_lock; } if ( freeSlot < firstUsedSlot && firstUsedSlot < 10 && existingType != ULR_PROCESSING ) { freeSlot = -1; // Not attaching to existing request, make it use a higher slot } if ( freeSlot == -1 ) { if ( uplink->queueLen >= SERVER_MAX_UPLINK_QUEUE ) { - mutex_unlock( &uplink->queueLock ); logadd( LOG_WARNING, "Uplink queue is full, consider increasing SERVER_MAX_UPLINK_QUEUE. Dropping client..." ); - return false; + goto fail_lock; } freeSlot = uplink->queueLen++; } @@ -305,16 +361,16 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin #endif mutex_unlock( &uplink->queueLock ); - if ( foundExisting != -1 ) + if ( foundExisting != -1 ) { + ref_put( &uplink->reference ); return true; // Attached to pending request, do nothing - - usleep( 10000 ); + } // See if we can fire away the request - if ( mutex_trylock( &uplink->sendMutex ) != 0 ) { + if ( unlikely( mutex_trylock( &uplink->sendMutex ) != 0 ) ) { logadd( LOG_DEBUG2, "Could not trylock send mutex, queueing uplink request" ); } else { - if ( uplink->current.fd == -1 ) { + if ( unlikely( uplink->current.fd == -1 ) ) { mutex_unlock( &uplink->sendMutex ); logadd( LOG_DEBUG2, "Cannot do direct uplink request: No socket open" ); } else { @@ -323,13 +379,13 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin if ( hops < 200 ) ++hops; const bool ret = dnbd3_get_block( uplink->current.fd, reqStart, reqSize, reqStart, COND_HOPCOUNT( uplink->current.version, hops ) ); mutex_unlock( &uplink->sendMutex ); - if ( !ret ) { + if ( unlikely( !ret ) ) { logadd( LOG_DEBUG2, "Could not send out direct uplink request, queueing" ); } else { // Direct send succeeded, update queue entry from NEW to PENDING, so the request won't be sent again int state; mutex_lock( &uplink->queueLock ); - if ( uplink->queue[freeSlot].handle == handle && uplink->queue[freeSlot].client == client ) { + if ( !uplink->shutdown && uplink->queue[freeSlot].handle == handle && uplink->queue[freeSlot].client == client ) { state = uplink->queue[freeSlot].status; if ( uplink->queue[freeSlot].status == ULR_NEW ) { uplink->queue[freeSlot].status = ULR_PENDING; @@ -345,6 +401,7 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin } else { logadd( LOG_DEBUG2, "Direct uplink request queue entry changed to %s afte sending (expected ULR_NEW).", NAMES_ULR[uplink->queue[freeSlot].status] ); } + ref_put( &uplink->reference ); return true; } // Fall through to waking up sender thread @@ -354,7 +411,13 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin if ( signal_call( uplink->signal ) == SIGNAL_ERROR ) { logadd( LOG_WARNING, "Cannot wake up uplink thread; errno=%d", (int)errno ); } + ref_put( &uplink->reference ); return true; +fail_lock: + mutex_unlock( &uplink->queueLock ); +fail_ref: + ref_put( &uplink->reference ); + return false; } /** @@ -381,6 +444,7 @@ static void* uplink_mainloop(void *data) // assert( uplink != NULL ); setThreadName( "idle-uplink" ); + thread_detach( uplink->thread ); blockNoncriticalSignals(); // Make sure file is open for writing if ( !uplink_reopenCacheFd( uplink, false ) ) { @@ -553,7 +617,7 @@ static void* uplink_mainloop(void *data) for (i = 0; i < uplink->queueLen; ++i) { if ( uplink->queue[i].status != ULR_FREE && timing_reached( &uplink->queue[i].entered, &deadline ) ) { snprintf( buffer, sizeof(buffer), "[DEBUG %p] Starving request slot %d detected:\n" - "%s\n(from %" PRIu64 " to %" PRIu64 ", status: %d)\n", (void*)link, i, uplink->queue[i].client->image->name, + "%s\n(from %" PRIu64 " to %" PRIu64 ", status: %d)\n", (void*)uplink, i, uplink->queue[i].client->image->name, uplink->queue[i].from, uplink->queue[i].to, uplink->queue[i].status ); uplink->queue[i].entered = now; #ifdef _DEBUG_RESEND_STARVING @@ -572,55 +636,26 @@ static void* uplink_mainloop(void *data) #endif } cleanup: ; - // Detach depends on whether someone is joining this thread... - bool exp = false; - if ( atomic_compare_exchange_strong( &uplink->shutdown, &exp, true ) ) { - thread_detach( uplink->thread ); - } uplink_saveCacheMap( uplink ); dnbd3_image_t *image = uplink->image; mutex_lock( &image->lock ); - // in the list anymore, but we want to prevent it from being freed in either case - if ( image->uplink == uplink ) { - image->uplink = NULL; - } - mutex_unlock( &image->lock ); // Do NOT use image without locking it - mutex_lock( &uplink->queueLock ); - // Wait for active RTT measurement to finish - while ( uplink->rttTestResult == RTT_INPROGRESS ) { - usleep( 10000 ); - } - signal_close( uplink->signal ); - mutex_lock( &uplink->rttLock ); - mutex_lock( &uplink->sendMutex ); - if ( uplink->current.fd != -1 ) { - close( uplink->current.fd ); - uplink->current.fd = -1; - } - if ( uplink->better.fd != -1 ) { - close( uplink->better.fd ); - uplink->better.fd = -1; + bool exp = false; + if ( atomic_compare_exchange_strong( &uplink->shutdown, &exp, true ) ) { + image->users++; // We set the flag - hold onto image } - mutex_unlock( &uplink->sendMutex ); - mutex_unlock( &uplink->rttLock ); - mutex_unlock( &uplink->queueLock ); - mutex_destroy( &uplink->queueLock ); - mutex_destroy( &uplink->rttLock ); - mutex_destroy( &uplink->sendMutex ); - free( uplink->recvBuffer ); - uplink->recvBuffer = NULL; - if ( uplink->cacheFd != -1 ) { - close( uplink->cacheFd ); + dnbd3_uplink_t *current = ref_get_uplink( &image->uplinkref ); + if ( current == uplink ) { // Set NULL if it's still us... + mutex_lock( &uplink->queueLock ); + cancelAllRequests( uplink ); + mutex_unlock( &uplink->queueLock ); + ref_setref( &image->uplinkref, NULL ); } - free( uplink ); // !!! - if ( image_lock( image ) != NULL ) { - // Image is still in list... - if ( !_shutdown && image->cache_map != NULL ) { - // Ingegrity checker must have found something in the meantime - uplink_init( image, -1, NULL, 0 ); - } - image_release( image ); + if ( current != NULL ) { // Decrease ref in any case + ref_put( ¤t->reference ); } + mutex_unlock( &image->lock ); + // Finally as the thread is done, decrease our own ref that we initialized with + ref_put( &uplink->reference ); return NULL ; } @@ -637,7 +672,7 @@ static void uplink_sendRequests(dnbd3_uplink_t *uplink, bool newOnly) const uint32_t reqSize = (uint32_t)(((uplink->queue[j].to + DNBD3_BLOCK_SIZE - 1) & ~(uint64_t)(DNBD3_BLOCK_SIZE - 1)) - reqStart); /* logadd( LOG_DEBUG2, "[%p] Sending slot %d, now %d, handle %" PRIu64 ", Range: %" PRIu64 "-%" PRIu64 " (%" PRIu64 "-%" PRIu64 ")", - (void*)link, j, uplink->queue[j].status, uplink->queue[j].handle, uplink->queue[j].from, uplink->queue[j].to, reqStart, reqStart+reqSize ); + (void*)uplink, j, uplink->queue[j].status, uplink->queue[j].handle, uplink->queue[j].from, uplink->queue[j].to, reqStart, reqStart+reqSize ); */ mutex_unlock( &uplink->queueLock ); if ( hops < 200 ) ++hops; @@ -782,7 +817,7 @@ static int uplink_findNextIncompleteHashBlock(dnbd3_uplink_t *uplink, const int /** * Receive data from uplink server and process/dispatch - * Locks on: link.lock, images[].lock + * Locks on: uplink.lock, images[].lock */ static void uplink_handleReceive(dnbd3_uplink_t *uplink) { @@ -924,13 +959,16 @@ static void uplink_handleReceive(dnbd3_uplink_t *uplink) } mutex_unlock( &client->sendMutex ); mutex_lock( &uplink->queueLock ); + if ( i > uplink->queueLen ) { + uplink->queueLen = i; // Might have been set to 0 by cancelAllRequests + } } if ( req->status == ULR_FREE && i == uplink->queueLen - 1 ) uplink->queueLen--; } mutex_unlock( &uplink->queueLock ); #ifdef _DEBUG if ( !served && start != uplink->replicationHandle ) { - logadd( LOG_DEBUG2, "%p, %s -- Unmatched reply: %" PRIu64 " to %" PRIu64, (void*)link, uplink->image->name, start, end ); + logadd( LOG_DEBUG2, "%p, %s -- Unmatched reply: %" PRIu64 " to %" PRIu64, (void*)uplink, uplink->image->name, start, end ); } #endif if ( start == uplink->replicationHandle ) { diff --git a/src/server/uplink.h b/src/server/uplink.h index acc8e11..49ff0b4 100644 --- a/src/server/uplink.h +++ b/src/server/uplink.h @@ -14,7 +14,7 @@ void uplink_removeClient(dnbd3_uplink_t *uplink, dnbd3_client_t *client); bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uint32_t length, uint8_t hopCount); -void uplink_shutdown(dnbd3_image_t *image); +bool uplink_shutdown(dnbd3_image_t *image); bool uplink_getHostString(dnbd3_uplink_t *uplink, char *buffer, size_t len); -- cgit v1.2.3-55-g7522 From ac1bf45ebdd630fbc9ad2c1fa3c0ea99f5206799 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Wed, 28 Aug 2019 13:07:13 +0200 Subject: [SERVER] Make signal handling more POSIX According to POSIX, a signal sent to a PID can be delivered to an arbitrary thread of that process that hasn't the signal blocked. This seens to never happen on Linux, but would mess things up since the code expected the main signal handler to only be executed by the main thread. This should now be fixed by examining the destination PID of the signal as well as the ID of the thread currently running the signal handler. If we notice the signal wasn't sent by our own PID and the handler is not currently run by the main thread, we re-send the signal to the main thread. Otherwise, if the signal was sent by our own PID but the handler is not run in the main thread, do nothing. This way we can use pthread_kill() to wake up threads that might be stuck in a blocking syscall when it's time to shut down. --- src/server/globals.h | 1 + src/server/image.c | 10 ++-------- src/server/integrity.c | 17 +++++++++++++---- src/server/net.c | 11 ++++++----- src/server/rpc.c | 13 ++++++++----- src/server/server.c | 22 +++++++++++++++++----- src/server/threadpool.c | 28 ++++++++++++++++++++++------ src/server/threadpool.h | 5 +++++ 8 files changed, 74 insertions(+), 33 deletions(-) (limited to 'src/server/integrity.c') diff --git a/src/server/globals.h b/src/server/globals.h index 5dd205a..f940666 100644 --- a/src/server/globals.h +++ b/src/server/globals.h @@ -138,6 +138,7 @@ struct _dnbd3_client char hostName[HOSTNAMELEN]; // inet_ntop version of host pthread_mutex_t sendMutex; // Held while writing to sock if image is incomplete (since uplink uses socket too) pthread_mutex_t lock; + pthread_t thread; }; // ####################################################### diff --git a/src/server/image.c b/src/server/image.c index de93cd4..248c12c 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -562,9 +562,7 @@ bool image_tryFreeAll() if ( _images[i] != NULL && _images[i]->users == 0 ) { dnbd3_image_t *image = _images[i]; _images[i] = NULL; - mutex_unlock( &imageListLock ); image = image_free( image ); - mutex_lock( &imageListLock ); } if ( i + 1 == _num_images && _images[i] == NULL ) _num_images--; } @@ -574,15 +572,13 @@ bool image_tryFreeAll() /** * Free image. DOES NOT check if it's in use. - * Indirectly locks on imageListLock, image.lock, uplink.queueLock + * (Indirectly) locks on image.lock, uplink.queueLock */ static dnbd3_image_t* image_free(dnbd3_image_t *image) { assert( image != NULL ); assert( image->users == 0 ); - if ( !_shutdown ) { - logadd( LOG_INFO, "Freeing image %s:%d", image->name, (int)image->rid ); - } + logadd( ( _shutdown ? LOG_DEBUG1 : LOG_INFO ), "Freeing image %s:%d", image->name, (int)image->rid ); // uplink_shutdown might return false to tell us // that the shutdown is in progress. Bail out since // this will get called again when the uplink is done. @@ -600,8 +596,6 @@ static dnbd3_image_t* image_free(dnbd3_image_t *image) mutex_unlock( &image->lock ); if ( image->readFd != -1 ) close( image->readFd ); mutex_destroy( &image->lock ); - // - memset( image, 0, sizeof(*image) ); free( image ); return NULL ; } diff --git a/src/server/integrity.c b/src/server/integrity.c index f358c46..e7ebeb2 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -184,13 +184,20 @@ static void* integrity_main(void * data UNUSED) mutex_unlock( &image->lock ); } #if defined(linux) || defined(__linux) - if ( sync_file_range( fd, start, end - start, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER ) == -1 ) { + while ( sync_file_range( fd, start, end - start, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER ) == -1 ) #else - if ( fsync( fd ) == -1 ) { + while ( fsync( fd ) == -1 ) #endif - logadd( LOG_ERROR, "Cannot flush %s for integrity check", image->path ); + { + if ( _shutdown ) + break; + if ( errno == EINTR ) + continue; + logadd( LOG_ERROR, "Cannot flush %s for integrity check (errno=%d)", image->path, errno ); exit( 1 ); } + if ( _shutdown ) + break; // Use direct I/O only if read length is multiple of 4096 to be on the safe side int tfd; if ( direct && ( end % DNBD3_BLOCK_SIZE ) == 0 ) { @@ -266,7 +273,9 @@ static void* integrity_main(void * data UNUSED) } } mutex_unlock( &integrityQueueLock ); - if ( buffer != NULL ) free( buffer ); + if ( buffer != NULL ) { + free( buffer ); + } bRunning = false; return NULL; } diff --git a/src/server/net.c b/src/server/net.c index e0b516e..9c855e4 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -44,6 +44,7 @@ #include #include #include +#include static dnbd3_client_t *_clients[SERVER_MAX_CLIENTS]; static int _num_clients = 0; @@ -153,6 +154,7 @@ void* net_handleNewConnection(void *clientPtr) { dnbd3_client_t * const client = (dnbd3_client_t *)clientPtr; dnbd3_request_t request; + client->thread = pthread_self(); // Await data from client. Since this is a fresh connection, we expect data right away sock_setTimeout( client->sock, _clientTimeout ); @@ -631,11 +633,10 @@ void net_disconnectAll() int i; mutex_lock( &_clients_lock ); for (i = 0; i < _num_clients; ++i) { - if ( _clients[i] == NULL ) continue; - dnbd3_client_t * const client = _clients[i]; - mutex_lock( &client->lock ); - if ( client->sock >= 0 ) shutdown( client->sock, SHUT_RDWR ); - mutex_unlock( &client->lock ); + if ( _clients[i] == NULL ) + continue; + shutdown( _clients[i]->sock, SHUT_RDWR ); + pthread_kill( _clients[i]->thread, SIGINT ); } mutex_unlock( &_clients_lock ); } diff --git a/src/server/rpc.c b/src/server/rpc.c index 261c6c0..662263e 100644 --- a/src/server/rpc.c +++ b/src/server/rpc.c @@ -137,13 +137,13 @@ void rpc_sendStatsJson(int sock, dnbd3_host_t* host, const void* data, const int bool hasName = false; bool ok; int keepAlive = HTTP_KEEPALIVE; - do { + while ( !_shutdown ) { // Read request from client struct phr_header headers[100]; size_t numHeaders, prevLen = 0, consumed; struct string method, path; int minorVersion; - do { + while ( !_shutdown ) { // Parse before calling recv, there might be a complete pipelined request in the buffer already // If the request is incomplete, we allow exactly one additional recv() to complete it. // This should suffice for real world scenarios as I don't know of any @@ -188,7 +188,9 @@ void rpc_sendStatsJson(int sock, dnbd3_host_t* host, const void* data, const int sendReply( sock, "400 Bad Request", "text/plain", "Server cannot understand what you're trying to say", -1, HTTP_CLOSE ); goto func_return; } - } while ( true ); + } // Loop while request header incomplete + if ( _shutdown ) + break; if ( keepAlive == HTTP_KEEPALIVE ) { // Only keep the connection alive (and indicate so) if the client seems to support this if ( minorVersion == 0 || hasHeaderValue( headers, numHeaders, &STR_CONNECTION, &STR_CLOSE ) ) { @@ -213,7 +215,8 @@ void rpc_sendStatsJson(int sock, dnbd3_host_t* host, const void* data, const int } else { ok = sendReply( sock, "404 Not found", "text/plain", "Nothing", -1, keepAlive ); } - if ( !ok ) break; + if ( !ok ) + break; } // hoff might be beyond end if the client sent another request (burst) const ssize_t extra = hoff - consumed; @@ -225,7 +228,7 @@ void rpc_sendStatsJson(int sock, dnbd3_host_t* host, const void* data, const int hasName = true; setThreadName( "HTTP" ); } - } while (true); + } // Loop while more requests func_return:; do { const int curCount = --status.count; diff --git a/src/server/server.c b/src/server/server.c index 1cdd2ab..0dddea7 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #define LONGOPT_CRC4 1000 #define LONGOPT_ASSERT 1001 @@ -60,6 +62,7 @@ static _Atomic(job_t *) newJob; static bool hasTimerThread = false; static pthread_t timerThread; +static pid_t mainPid; static pthread_t mainThread; #define DEFAULT_TIMER_TIMEOUT (60) @@ -138,7 +141,7 @@ _Noreturn static void dnbd3_cleanup() logadd( LOG_INFO, "Cleanup..." ); if ( hasTimerThread ) { - pthread_kill( timerThread, SIGHUP ); + pthread_kill( timerThread, SIGINT ); thread_join( timerThread, NULL ); } @@ -162,6 +165,8 @@ _Noreturn static void dnbd3_cleanup() // Wait for clients to disconnect net_waitForAllDisconnected(); + threadpool_waitEmpty(); + // Clean up images retries = 5; while ( !image_tryFreeAll() && --retries > 0 ) { @@ -204,6 +209,7 @@ int main(int argc, char *argv[]) { 0, 0, 0, 0 } }; + mainPid = getpid(); mainThread = pthread_self(); opt = getopt_long( argc, argv, optString, longOpts, &longIndex ); @@ -509,10 +515,16 @@ static void dnbd3_handleSignal(int signum) static void dnbd3_handleSignal2(int signum, siginfo_t *info, void *data UNUSED) { - if ( !pthread_equal( pthread_self(), mainThread ) ) - return; - memcpy( &lastSignal, info, sizeof(siginfo_t) ); - dnbd3_handleSignal( signum ); + if ( info->si_pid != mainPid ) { // Source is not this process + memcpy( &lastSignal, info, sizeof(siginfo_t) ); // Copy signal info + if ( info->si_pid != 0 && !pthread_equal( pthread_self(), mainThread ) ) { + pthread_kill( mainThread, info->si_signo ); // And relay signal if we're not the main thread + } + } + if ( pthread_equal( pthread_self(), mainThread ) ) { + // Signal received by main thread -- handle + dnbd3_handleSignal( signum ); + } } uint32_t dnbd3_serverUptime() diff --git a/src/server/threadpool.c b/src/server/threadpool.c index 3947677..0b46fd6 100644 --- a/src/server/threadpool.c +++ b/src/server/threadpool.c @@ -15,6 +15,7 @@ static void *threadpool_worker(void *entryPtr); static pthread_attr_t threadAttrs; static atomic_int maxIdleThreads = -1; static _Atomic(entry_t *) *pool = NULL; +static atomic_int activeThreads = 0; bool threadpool_init(int maxIdle) { @@ -34,10 +35,9 @@ bool threadpool_init(int maxIdle) void threadpool_close() { - _shutdown = true; - int max = maxIdleThreads; - maxIdleThreads = -1; - if ( max <= 0 ) return; + int max = atomic_exchange( &maxIdleThreads, -1 ); + if ( max <= 0 ) + return; for ( int i = 0; i < max; ++i ) { entry_t *cur = pool[i]; if ( cur != NULL && atomic_compare_exchange_strong( &pool[i], &cur, NULL ) ) { @@ -46,9 +46,23 @@ void threadpool_close() } } +void threadpool_waitEmpty() +{ + if ( activeThreads == 0 ) + return; + do { + sleep( 1 ); + logadd( LOG_INFO, "Threadpool: %d threads still active", (int)activeThreads ); + } while ( activeThreads != 0 ); +} + bool threadpool_run(void *(*startRoutine)(void *), void *arg) { - if ( startRoutine == NULL ) { + if ( unlikely( _shutdown ) ) { + logadd( LOG_MINOR, "Cannot submit work to threadpool while shutting down!" ); + return false; + } + if ( unlikely( startRoutine == NULL ) ) { logadd( LOG_ERROR, "Trying to queue work for thread pool with NULL startRoutine" ); return false; // Or bail out!? } @@ -60,7 +74,7 @@ bool threadpool_run(void *(*startRoutine)(void *), void *arg) break; } } - if ( entry == NULL ) { + if ( unlikely( entry == NULL ) ) { entry = malloc( sizeof(entry_t) ); if ( entry == NULL ) { logadd( LOG_WARNING, "Could not alloc entry_t for new thread\n" ); @@ -78,6 +92,7 @@ bool threadpool_run(void *(*startRoutine)(void *), void *arg) free( entry ); return false; } + activeThreads++; } entry->startRoutine = startRoutine; entry->arg = arg; @@ -130,6 +145,7 @@ keep_going:; } signal_close( entry->signal ); free( entry ); + activeThreads--; return NULL; } diff --git a/src/server/threadpool.h b/src/server/threadpool.h index 15dd151..ee0b3aa 100644 --- a/src/server/threadpool.h +++ b/src/server/threadpool.h @@ -17,6 +17,11 @@ bool threadpool_init(int maxIdleThreadCount); */ void threadpool_close(); +/** + * Block until all threads spawned have exited + */ +void threadpool_waitEmpty(); + /** * Run a thread using the thread pool. * @param startRoutine function to run in new thread -- cgit v1.2.3-55-g7522 From 0fb5ec7152c79d10711139158533f96204755788 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Wed, 28 Aug 2019 22:14:34 +0200 Subject: [SERVER] Speed up shutdown of integrity checker --- src/server/integrity.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/server/integrity.c') diff --git a/src/server/integrity.c b/src/server/integrity.c index e7ebeb2..1fcb558 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #define CHECK_QUEUE_SIZE 200 @@ -56,13 +58,14 @@ void integrity_init() void integrity_shutdown() { assert( queueLen != -1 ); + if ( !bRunning ) + return; logadd( LOG_DEBUG1, "Shutting down integrity checker...\n" ); + pthread_kill( thread, SIGINT ); mutex_lock( &integrityQueueLock ); pthread_cond_signal( &queueSignal ); mutex_unlock( &integrityQueueLock ); thread_join( thread, NULL ); - while ( bRunning ) - usleep( 10000 ); mutex_destroy( &integrityQueueLock ); pthread_cond_destroy( &queueSignal ); logadd( LOG_DEBUG1, "Integrity checker exited normally.\n" ); -- cgit v1.2.3-55-g7522 From 88695877f085af475a6ca8a01c2fbb08eb5b15da Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 29 Aug 2019 14:49:18 +0200 Subject: [SERVER] Use weakref for cache maps Gets rid of a bunch of locking, especially the hot path in net.c where clients are requesting data. Many clients unsing the same incomplete image previously created a bottleneck here. --- src/server/globals.h | 10 ++- src/server/image.c | 208 +++++++++++++++++++++++++++++++------------------ src/server/image.h | 2 +- src/server/integrity.c | 10 ++- src/server/net.c | 81 +++++++++---------- src/server/reference.h | 5 ++ src/server/uplink.c | 64 +++++++-------- 7 files changed, 220 insertions(+), 160 deletions(-) (limited to 'src/server/integrity.c') diff --git a/src/server/globals.h b/src/server/globals.h index f940666..221af78 100644 --- a/src/server/globals.h +++ b/src/server/globals.h @@ -99,6 +99,12 @@ typedef struct int permissions; } dnbd3_access_rule_t; +typedef struct +{ + ref reference; + atomic_uint_least8_t map[]; +} dnbd3_cache_map_t; + /** * Image struct. An image path could be something like * /mnt/images/rz/zfs/Windows7 ZfS.vmdk.r1 @@ -110,7 +116,7 @@ struct _dnbd3_image char *path; // absolute path of the image char *name; // public name of the image (usually relative path minus revision ID) weakref uplinkref; // pointer to a server connection - uint8_t *cache_map; // cache map telling which parts are locally cached, NULL if complete + weakref ref_cacheMap; // cache map telling which parts are locally cached, NULL if complete uint64_t virtualFilesize; // virtual size of image (real size rounded up to multiple of 4k) uint64_t realFilesize; // actual file size on disk ticks atime; // last access time @@ -119,7 +125,7 @@ struct _dnbd3_image uint32_t *crc32; // list of crc32 checksums for each 16MiB block in image uint32_t masterCrc32; // CRC-32 of the crc-32 list int readFd; // used to read the image. Used from multiple threads, so use atomic operations (pread et al) - int completenessEstimate; // Completeness estimate in percent + atomic_int completenessEstimate; // Completeness estimate in percent atomic_int users; // clients currently using this image. XXX Lock on imageListLock when modifying and checking whether the image should be freed. Reading it elsewhere is fine without the lock. int id; // Unique ID of this image. Only unique in the context of this running instance of DNBD3-Server atomic_bool working; // true if image exists and completeness is == 100% or a working upstream proxy is connected diff --git a/src/server/image.c b/src/server/image.c index 4eab1d2..1972f48 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -51,10 +51,18 @@ static bool image_clone(int sock, char *name, uint16_t revision, uint64_t imageS static bool image_calcBlockCrc32(const int fd, const size_t block, const uint64_t realFilesize, uint32_t *crc); static bool image_ensureDiskSpace(uint64_t size, bool force); -static uint8_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize); +static dnbd3_cache_map_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize); static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t fileSize, uint32_t *masterCrc); -static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t fileSize, uint32_t * const crc32list, uint8_t * const cache_map); +static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t fileSize, uint32_t * const crc32list, atomic_uint_least8_t * const cache_map); static void* closeUnusedFds(void*); +static void allocCacheMap(dnbd3_image_t *image, bool complete); + +static void cmfree(ref *ref) +{ + dnbd3_cache_map_t *cache = container_of(ref, dnbd3_cache_map_t, reference); + logadd( LOG_DEBUG2, "Freeing a cache map" ); + free( cache ); +} // ########################################## @@ -70,7 +78,6 @@ void image_serverStartup() /** * Update cache-map of given image for the given byte range * start (inclusive) - end (exclusive) - * Locks on: images[].lock */ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, const bool set) { @@ -91,33 +98,55 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co if ( start >= end ) return; bool setNewBlocks = false; - uint64_t pos = start; - mutex_lock( &image->lock ); - if ( image->cache_map == NULL ) { + dnbd3_cache_map_t *cache = ref_get_cachemap( image ); + if ( cache == NULL ) { // Image seems already complete if ( set ) { // This makes no sense - mutex_unlock( &image->lock ); - logadd( LOG_DEBUG1, "image_updateCachemap(true) with no cache_map: %s", image->path ); + logadd( LOG_DEBUG1, "image_updateCachemap(true) with no cache map: %s", image->path ); return; } // Recreate a cache map, set it to all 1 initially as we assume the image was complete - const int byteSize = IMGSIZE_TO_MAPBYTES( image->virtualFilesize ); - image->cache_map = malloc( byteSize ); - memset( image->cache_map, 0xff, byteSize ); - } - while ( pos < end ) { - const size_t map_y = (int)( pos >> 15 ); - const int map_x = (int)( (pos >> 12) & 7 ); // mod 8 - const int bit_mask = 1 << map_x; - if ( set ) { - if ( (image->cache_map[map_y] & bit_mask) == 0 ) setNewBlocks = true; - image->cache_map[map_y] |= (uint8_t)bit_mask; - } else { - image->cache_map[map_y] &= (uint8_t)~bit_mask; + allocCacheMap( image, true ); + cache = ref_get_cachemap( image ); + if ( cache == NULL ) { + logadd( LOG_WARNING, "WHAT!!!?!?!= No cache map right after alloc?! %s", image->path ); + return; } - pos += DNBD3_BLOCK_SIZE; } + // Set/unset + const uint64_t firstByteInMap = start >> 15; + const uint64_t lastByteInMap = (end - 1) >> 15; + uint64_t pos; + // First byte + uint8_t fb = 0, lb = 0; + for ( pos = start; firstByteInMap == (pos >> 15) && pos < end; pos += DNBD3_BLOCK_SIZE ) { + const int map_x = (pos >> 12) & 7; // mod 8 + const uint8_t bit_mask = (uint8_t)( 1 << map_x ); + fb |= bit_mask; + } + // Last byte + for ( pos = lastByteInMap << 15; pos < end; pos += DNBD3_BLOCK_SIZE ) { + const int map_x = (pos >> 12) & 7; // mod 8 + const uint8_t bit_mask = (uint8_t)( 1 << map_x ); + lb |= bit_mask; + } + if ( set ) { + uint8_t fo = atomic_fetch_or_explicit( &cache->map[firstByteInMap], fb, memory_order_relaxed ); + uint8_t lo = atomic_fetch_or_explicit( &cache->map[lastByteInMap], lb, memory_order_relaxed ); + setNewBlocks = ( fo != cache->map[firstByteInMap] || lo != cache->map[lastByteInMap] ); + } else { + atomic_fetch_and_explicit( &cache->map[firstByteInMap], (uint8_t)~fb, memory_order_relaxed ); + atomic_fetch_and_explicit( &cache->map[lastByteInMap], (uint8_t)~lb, memory_order_relaxed ); + } + const uint8_t nval = set ? 0xff : 0; + // Everything in between + for ( pos = firstByteInMap + 1; pos < lastByteInMap; ++pos ) { + if ( atomic_exchange_explicit( &cache->map[pos], nval, memory_order_relaxed ) != nval && set ) { + setNewBlocks = true; + } + } + atomic_thread_fence( memory_order_release ); if ( setNewBlocks && image->crc32 != NULL ) { // If setNewBlocks is set, at least one of the blocks was not cached before, so queue all hash blocks // for checking, even though this might lead to checking some hash block again, if it was @@ -125,19 +154,14 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co // First set start and end to borders of hash blocks start &= ~(uint64_t)(HASH_BLOCK_SIZE - 1); end = (end + HASH_BLOCK_SIZE - 1) & ~(uint64_t)(HASH_BLOCK_SIZE - 1); - pos = start; - while ( pos < end ) { - if ( image->cache_map == NULL ) break; + for ( pos = start; pos < end; pos += HASH_BLOCK_SIZE ) { const int block = (int)( pos / HASH_BLOCK_SIZE ); - if ( image_isHashBlockComplete( image->cache_map, block, image->realFilesize ) ) { - mutex_unlock( &image->lock ); + if ( image_isHashBlockComplete( cache->map, block, image->realFilesize ) ) { integrity_check( image, block ); - mutex_lock( &image->lock ); } - pos += HASH_BLOCK_SIZE; } } - mutex_unlock( &image->lock ); + ref_put( &cache->reference ); } /** @@ -149,20 +173,18 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co bool image_isComplete(dnbd3_image_t *image) { assert( image != NULL ); - mutex_lock( &image->lock ); if ( image->virtualFilesize == 0 ) { - mutex_unlock( &image->lock ); return false; } - if ( image->cache_map == NULL ) { - mutex_unlock( &image->lock ); + dnbd3_cache_map_t *cache = ref_get_cachemap( image ); + if ( cache == NULL ) { return true; } bool complete = true; int j; const int map_len_bytes = IMGSIZE_TO_MAPBYTES( image->virtualFilesize ); for (j = 0; j < map_len_bytes - 1; ++j) { - if ( image->cache_map[j] != 0xFF ) { + if ( cache->map[j] != 0xFF ) { complete = false; break; } @@ -177,18 +199,27 @@ bool image_isComplete(dnbd3_image_t *image) for (j = 0; j < blocks_in_last_byte; ++j) last_byte |= (uint8_t)(1 << j); } - complete = ((image->cache_map[map_len_bytes - 1] & last_byte) == last_byte); + complete = ((cache->map[map_len_bytes - 1] & last_byte) == last_byte); } - if ( !complete ) { - mutex_unlock( &image->lock ); + ref_put( &cache->reference ); + if ( !complete ) return false; + mutex_lock( &image->lock ); + // Lock and make sure current cache map is still the one we saw complete + dnbd3_cache_map_t *current = ref_get_cachemap( image ); + if ( current == cache ) { + // Set cache map NULL as it's complete + ref_setref( &image->ref_cacheMap, NULL ); + } + if ( current != NULL ) { + ref_put( ¤t->reference ); } - char mapfile[PATHLEN] = ""; - free( image->cache_map ); - image->cache_map = NULL; - snprintf( mapfile, PATHLEN, "%s.map", image->path ); mutex_unlock( &image->lock ); - unlink( mapfile ); + if ( current == cache ) { // Successfully set cache map to NULL above + char mapfile[PATHLEN] = ""; + snprintf( mapfile, PATHLEN, "%s.map", image->path ); + unlink( mapfile ); + } return true; } @@ -350,19 +381,18 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking) img->rid = candidate->rid; img->users = 1; img->working = false; + img->ref_cacheMap = NULL; mutex_init( &img->lock, LOCK_IMAGE ); if ( candidate->crc32 != NULL ) { const size_t mb = IMGSIZE_TO_HASHBLOCKS( candidate->virtualFilesize ) * sizeof(uint32_t); img->crc32 = malloc( mb ); memcpy( img->crc32, candidate->crc32, mb ); } - mutex_lock( &candidate->lock ); - if ( candidate->cache_map != NULL ) { - const size_t mb = IMGSIZE_TO_MAPBYTES( candidate->virtualFilesize ); - img->cache_map = malloc( mb ); - memcpy( img->cache_map, candidate->cache_map, mb ); + dnbd3_cache_map_t *cache = ref_get_cachemap( candidate ); + if ( cache != NULL ) { + ref_setref( &img->ref_cacheMap, &cache->reference ); + ref_put( &cache->reference ); } - mutex_unlock( &candidate->lock ); if ( image_addToList( img ) ) { image_release( candidate ); candidate = img; @@ -377,7 +407,7 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking) } // Check if image is incomplete, handle - if ( candidate->cache_map != NULL ) { + if ( candidate->ref_cacheMap != NULL ) { uplink_init( candidate, -1, NULL, -1 ); } @@ -585,11 +615,10 @@ static dnbd3_image_t* image_free(dnbd3_image_t *image) if ( !uplink_shutdown( image ) ) return NULL; mutex_lock( &image->lock ); - free( image->cache_map ); + ref_setref( &image->ref_cacheMap, NULL ); free( image->crc32 ); free( image->path ); free( image->name ); - image->cache_map = NULL; image->crc32 = NULL; image->path = NULL; image->name = NULL; @@ -600,7 +629,7 @@ static dnbd3_image_t* image_free(dnbd3_image_t *image) return NULL ; } -bool image_isHashBlockComplete(const uint8_t * const cacheMap, const uint64_t block, const uint64_t realFilesize) +bool image_isHashBlockComplete(atomic_uint_least8_t * const cacheMap, const uint64_t block, const uint64_t realFilesize) { if ( cacheMap == NULL ) return true; const uint64_t end = (block + 1) * HASH_BLOCK_SIZE; @@ -707,7 +736,7 @@ static bool image_load(char *base, char *path, int withUplink) { int revision = -1; struct stat st; - uint8_t *cache_map = NULL; + dnbd3_cache_map_t *cache = NULL; uint32_t *crc32list = NULL; dnbd3_image_t *existing = NULL; int fdImage = -1; @@ -790,7 +819,7 @@ static bool image_load(char *base, char *path, int withUplink) } // 1. Allocate memory for the cache map if the image is incomplete - cache_map = image_loadCacheMap( path, virtualFilesize ); + cache = image_loadCacheMap( path, virtualFilesize ); // XXX: Maybe try sha-256 or 512 first if you're paranoid (to be implemented) @@ -802,7 +831,7 @@ static bool image_load(char *base, char *path, int withUplink) // Check CRC32 if ( crc32list != NULL ) { - if ( !image_checkRandomBlocks( 4, fdImage, realFilesize, crc32list, cache_map ) ) { + if ( !image_checkRandomBlocks( 4, fdImage, realFilesize, crc32list, cache != NULL ? cache->map : NULL ) ) { logadd( LOG_ERROR, "quick crc32 check of %s failed. Data corruption?", path ); doFullCheck = true; } @@ -826,7 +855,7 @@ static bool image_load(char *base, char *path, int withUplink) crc32list = NULL; function_return = true; goto load_error; // Keep existing - } else if ( existing->cache_map != NULL && cache_map == NULL ) { + } else if ( existing->ref_cacheMap != NULL && cache == NULL ) { // Just ignore that fact, if replication is really complete the cache map will be removed anyways logadd( LOG_INFO, "Image '%s:%d' has no cache map on disk!", existing->name, (int)existing->rid ); function_return = true; @@ -846,7 +875,8 @@ static bool image_load(char *base, char *path, int withUplink) dnbd3_image_t *image = calloc( 1, sizeof(dnbd3_image_t) ); image->path = strdup( path ); image->name = strdup( imgName ); - image->cache_map = cache_map; + image->ref_cacheMap = NULL; + ref_setref( &image->ref_cacheMap, &cache->reference ); image->crc32 = crc32list; image->masterCrc32 = masterCrc; image->uplinkref = NULL; @@ -855,7 +885,7 @@ static bool image_load(char *base, char *path, int withUplink) image->rid = (uint16_t)revision; image->users = 0; image->readFd = -1; - image->working = (image->cache_map == NULL ); + image->working = ( cache == NULL ); timing_get( &image->nextCompletenessEstimate ); image->completenessEstimate = -1; mutex_init( &image->lock, LOCK_IMAGE ); @@ -870,16 +900,16 @@ static bool image_load(char *base, char *path, int withUplink) timing_gets( &image->atime, offset ); // Prevent freeing in cleanup - cache_map = NULL; + cache = NULL; crc32list = NULL; // Get rid of cache map if image is complete - if ( image->cache_map != NULL ) { + if ( image->ref_cacheMap != NULL ) { image_isComplete( image ); } // Image is definitely incomplete, initialize uplink worker - if ( image->cache_map != NULL ) { + if ( image->ref_cacheMap != NULL ) { image->working = false; if ( withUplink ) { uplink_init( image, -1, NULL, -1 ); @@ -910,21 +940,22 @@ static bool image_load(char *base, char *path, int withUplink) load_error: ; if ( existing != NULL ) existing = image_release( existing ); if ( crc32list != NULL ) free( crc32list ); - if ( cache_map != NULL ) free( cache_map ); + if ( cache != NULL ) free( cache ); if ( fdImage != -1 ) close( fdImage ); return function_return; } -static uint8_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize) +static dnbd3_cache_map_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize) { - uint8_t *retval = NULL; + dnbd3_cache_map_t *retval = NULL; char mapFile[strlen( imagePath ) + 10 + 1]; sprintf( mapFile, "%s.map", imagePath ); int fdMap = open( mapFile, O_RDONLY ); - if ( fdMap >= 0 ) { + if ( fdMap != -1 ) { const int map_size = IMGSIZE_TO_MAPBYTES( fileSize ); - retval = calloc( 1, map_size ); - const ssize_t rd = read( fdMap, retval, map_size ); + retval = calloc( 1, sizeof(*retval) + map_size ); + ref_init( &retval->reference, cmfree, 0 ); + const ssize_t rd = read( fdMap, retval->map, map_size ); if ( map_size != rd ) { logadd( LOG_WARNING, "Could only read %d of expected %d bytes of cache map of '%s'", (int)rd, (int)map_size, imagePath ); // Could not read complete map, that means the rest of the image file will be considered incomplete @@ -985,7 +1016,7 @@ static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t f return retval; } -static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t realFilesize, uint32_t * const crc32list, uint8_t * const cache_map) +static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t realFilesize, uint32_t * const crc32list, atomic_uint_least8_t * const cache_map) { // This checks the first block and (up to) count - 1 random blocks for corruption // via the known crc32 list. This is very sloppy and is merely supposed to detect @@ -1529,30 +1560,37 @@ json_t* image_getListAsJson() /** * Get completeness of an image in percent. Only estimated, not exact. * Returns: 0-100 - * DOES NOT LOCK, so make sure to do so before calling */ int image_getCompletenessEstimate(dnbd3_image_t * const image) { assert( image != NULL ); - if ( image->cache_map == NULL ) return image->working ? 100 : 0; + dnbd3_cache_map_t *cache = ref_get_cachemap( image ); + if ( cache == NULL ) + return image->working ? 100 : 0; + const int len = IMGSIZE_TO_MAPBYTES( image->virtualFilesize ); + if ( unlikely( len == 0 ) ) { + ref_put( &cache->reference ); + return 0; + } declare_now; if ( !timing_reached( &image->nextCompletenessEstimate, &now ) ) { // Since this operation is relatively expensive, we cache the result for a while + ref_put( &cache->reference ); return image->completenessEstimate; } int i; int percent = 0; - const int len = IMGSIZE_TO_MAPBYTES( image->virtualFilesize ); - if ( len == 0 ) return 0; for ( i = 0; i < len; ++i ) { - if ( image->cache_map[i] == 0xff ) { + const uint8_t v = atomic_load_explicit( &cache->map[i], memory_order_relaxed ); + if ( v == 0xff ) { percent += 100; - } else if ( image->cache_map[i] != 0 ) { + } else if ( v != 0 ) { percent += 50; } } + ref_put( &cache->reference ); image->completenessEstimate = percent / len; - timing_set( &image->nextCompletenessEstimate, &now, 8 + rand() % 32 ); + timing_set( &image->nextCompletenessEstimate, &now, 4 + rand() % 16 ); return image->completenessEstimate; } @@ -1744,3 +1782,21 @@ static void* closeUnusedFds(void* nix UNUSED) } return NULL; } + +static void allocCacheMap(dnbd3_image_t *image, bool complete) +{ + const uint8_t val = complete ? 0xff : 0; + const int byteSize = IMGSIZE_TO_MAPBYTES( image->virtualFilesize ); + dnbd3_cache_map_t *cache = malloc( sizeof(*cache) + byteSize ); + ref_init( &cache->reference, cmfree, 0 ); + memset( cache->map, val, byteSize ); + mutex_lock( &image->lock ); + if ( image->ref_cacheMap != NULL ) { + logadd( LOG_WARNING, "BUG: allocCacheMap called but there already is a cache map for %s:%d", image->name, (int)image->rid ); + free( cache ); + } else { + ref_setref( &image->ref_cacheMap, &cache->reference ); + } + mutex_unlock( &image->lock ); +} + diff --git a/src/server/image.h b/src/server/image.h index 4668eff..cd87f03 100644 --- a/src/server/image.h +++ b/src/server/image.h @@ -9,7 +9,7 @@ void image_serverStartup(); bool image_isComplete(dnbd3_image_t *image); -bool image_isHashBlockComplete(const uint8_t * const cacheMap, const uint64_t block, const uint64_t fileSize); +bool image_isHashBlockComplete(atomic_uint_least8_t * const cacheMap, const uint64_t block, const uint64_t fileSize); void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, const bool set); diff --git a/src/server/integrity.c b/src/server/integrity.c index 1fcb558..a9fbae6 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -181,10 +181,12 @@ static void* integrity_main(void * data UNUSED) const uint64_t end = MIN( (uint64_t)(blocks[0] + 1) * HASH_BLOCK_SIZE, image->virtualFilesize ); bool complete = true; if ( qCount == CHECK_ALL ) { - // When checking full image, skip incomplete blocks, otherwise assume block is complete - mutex_lock( &image->lock ); - complete = image_isHashBlockComplete( image->cache_map, blocks[0], fileSize ); - mutex_unlock( &image->lock ); + dnbd3_cache_map_t *cache = ref_get_cachemap( image ); + if ( cache != NULL ) { + // When checking full image, skip incomplete blocks, otherwise assume block is complete + complete = image_isHashBlockComplete( cache->map, blocks[0], fileSize ); + ref_put( &cache->reference ); + } } #if defined(linux) || defined(__linux) while ( sync_file_range( fd, start, end - start, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER ) == -1 ) diff --git a/src/server/net.c b/src/server/net.c index 9c855e4..12bcdad 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -246,7 +246,7 @@ void* net_handleNewConnection(void *clientPtr) // We're a proxy, client is another proxy, we don't do BGR, but connecting proxy does... // Reject, as this would basically force this proxy to do BGR too. image = image_get( image_name, rid, true ); - if ( image != NULL && image->cache_map != NULL ) { + if ( image != NULL && image->ref_cacheMap != NULL ) { // Only exception is if the image is complete locally image = image_release( image ); } @@ -268,7 +268,7 @@ void* net_handleNewConnection(void *clientPtr) } else { // Image is fine so far, but occasionally drop a client if the uplink for the image is clogged or unavailable bOk = true; - if ( image->cache_map != NULL ) { + if ( image->ref_cacheMap != NULL ) { dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref ); if ( uplink == NULL || uplink->cacheFd == -1 || uplink->queueLen > SERVER_UPLINK_QUEUELEN_THRES ) { bOk = ( rand() % 4 ) == 1; @@ -338,57 +338,52 @@ void* net_handleNewConnection(void *clientPtr) break; } - if ( request.size != 0 && image->cache_map != NULL ) { + dnbd3_cache_map_t *cache; + if ( request.size != 0 && ( cache = ref_get_cachemap( image ) ) != NULL ) { // This is a proxyed image, check if we need to relay the request... start = offset & ~(uint64_t)(DNBD3_BLOCK_SIZE - 1); end = (offset + request.size + DNBD3_BLOCK_SIZE - 1) & ~(uint64_t)(DNBD3_BLOCK_SIZE - 1); bool isCached = true; - mutex_lock( &image->lock ); - // Check again as we only aquired the lock just now - if ( image->cache_map != NULL ) { - const uint64_t firstByteInMap = start >> 15; - const uint64_t lastByteInMap = (end - 1) >> 15; - uint64_t pos; - // Middle - quick checking - if ( isCached ) { - pos = firstByteInMap + 1; - while ( pos < lastByteInMap ) { - if ( image->cache_map[pos] != 0xff ) { - isCached = false; - break; - } - ++pos; + const uint64_t firstByteInMap = start >> 15; + const uint64_t lastByteInMap = (end - 1) >> 15; + uint64_t pos; + uint8_t b; + atomic_thread_fence( memory_order_acquire ); + // Middle - quick checking + if ( isCached ) { + for ( pos = firstByteInMap + 1; pos < lastByteInMap; ++pos ) { + if ( atomic_load_explicit( &cache->map[pos], memory_order_relaxed ) != 0xff ) { + isCached = false; + break; } } - // First byte - if ( isCached ) { - pos = start; - do { - const int map_x = (pos >> 12) & 7; // mod 8 - const uint8_t bit_mask = (uint8_t)( 1 << map_x ); - if ( (image->cache_map[firstByteInMap] & bit_mask) == 0 ) { - isCached = false; - break; - } - pos += DNBD3_BLOCK_SIZE; - } while ( firstByteInMap == (pos >> 15) && pos < end ); + } + // First byte + if ( isCached ) { + b = atomic_load_explicit( &cache->map[firstByteInMap], memory_order_relaxed ); + for ( pos = start; firstByteInMap == (pos >> 15) && pos < end; pos += DNBD3_BLOCK_SIZE ) { + const int map_x = (pos >> 12) & 7; // mod 8 + const uint8_t bit_mask = (uint8_t)( 1 << map_x ); + if ( (b & bit_mask) == 0 ) { + isCached = false; + break; + } } - // Last byte - only check if request spans multiple bytes in cache map - if ( isCached && firstByteInMap != lastByteInMap ) { - pos = lastByteInMap << 15; - while ( pos < end ) { - assert( lastByteInMap == (pos >> 15) ); - const int map_x = (pos >> 12) & 7; // mod 8 - const uint8_t bit_mask = (uint8_t)( 1 << map_x ); - if ( (image->cache_map[lastByteInMap] & bit_mask) == 0 ) { - isCached = false; - break; - } - pos += DNBD3_BLOCK_SIZE; + } + // Last byte - only check if request spans multiple bytes in cache map + if ( isCached && firstByteInMap != lastByteInMap ) { + b = atomic_load_explicit( &cache->map[lastByteInMap], memory_order_relaxed ); + for ( pos = lastByteInMap << 15; pos < end; pos += DNBD3_BLOCK_SIZE ) { + assert( lastByteInMap == (pos >> 15) ); + const int map_x = (pos >> 12) & 7; // mod 8 + const uint8_t bit_mask = (uint8_t)( 1 << map_x ); + if ( (b & bit_mask) == 0 ) { + isCached = false; + break; } } } - mutex_unlock( &image->lock ); + ref_put( &cache->reference ); if ( !isCached ) { if ( !uplink_request( client, request.handle, offset, request.size, request.hops ) ) { logadd( LOG_DEBUG1, "Could not relay uncached request from %s to upstream proxy, disabling image %s:%d", diff --git a/src/server/reference.h b/src/server/reference.h index 8883eb1..2a80955 100644 --- a/src/server/reference.h +++ b/src/server/reference.h @@ -51,4 +51,9 @@ static inline void ref_put( ref *ref ) ref == NULL ? NULL : container_of(ref, dnbd3_uplink_t, reference); \ }) +#define ref_get_cachemap(image) ({ \ + ref* ref = ref_get( &(image)->ref_cacheMap ); \ + ref == NULL ? NULL : container_of(ref, dnbd3_cache_map_t, reference); \ +}) + #endif diff --git a/src/server/uplink.c b/src/server/uplink.c index d77be9c..0a6bd11 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -91,7 +91,7 @@ bool uplink_init(dnbd3_image_t *image, int sock, dnbd3_host_t *host, int version ref_put( &uplink->reference ); return true; // There's already an uplink, so should we consider this success or failure? } - if ( image->cache_map == NULL ) { + if ( image->ref_cacheMap == NULL ) { logadd( LOG_WARNING, "Uplink was requested for image %s, but it is already complete", image->name ); goto failure; } @@ -170,7 +170,7 @@ bool uplink_shutdown(dnbd3_image_t *image) mutex_unlock( &uplink->queueLock ); bool retval = ( exp && image->users == 0 ); mutex_unlock( &image->lock ); - return exp; + return retval; } /** @@ -214,7 +214,7 @@ static void uplink_free(ref *ref) dnbd3_image_t *image = image_lock( uplink->image ); if ( image != NULL ) { // != NULL means image is still in list... - if ( !_shutdown && image->cache_map != NULL ) { + if ( !_shutdown && image->ref_cacheMap != NULL ) { // Ingegrity checker must have found something in the meantime uplink_init( image, -1, NULL, 0 ); } @@ -707,13 +707,14 @@ static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink) if ( uplink == NULL || uplink->current.fd == -1 ) return; if ( _backgroundReplication == BGR_DISABLED || uplink->cacheFd == -1 ) return; // Don't do background replication if ( uplink->nextReplicationIndex == -1 || uplink->replicationHandle != REP_NONE ) - return; + return; // Already a replication request on the wire, or no more blocks to replicate dnbd3_image_t * const image = uplink->image; if ( image->virtualFilesize < DNBD3_BLOCK_SIZE ) return; - mutex_lock( &image->lock ); - if ( image == NULL || image->cache_map == NULL || image->users < _bgrMinClients ) { - // No cache map (=image complete), or replication pending, or not enough users, do nothing - mutex_unlock( &image->lock ); + if ( image->users < _bgrMinClients ) return; // Not enough active users + dnbd3_cache_map_t *cache = ref_get_cachemap( image ); + if ( cache == NULL || image->users < _bgrMinClients ) { + // No cache map (=image complete) + ref_put( &cache->reference ); return; } const int mapBytes = IMGSIZE_TO_MAPBYTES( image->virtualFilesize ); @@ -727,16 +728,18 @@ static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink) endByte = mapBytes; } } + atomic_thread_fence( memory_order_acquire ); int replicationIndex = -1; for ( int j = uplink->nextReplicationIndex; j < endByte; ++j ) { const int i = j % ( mapBytes ); // Wrap around for BGR_FULL - if ( image->cache_map[i] != 0xff && ( i != lastBlockIndex || !uplink->replicatedLastBlock ) ) { + if ( atomic_load_explicit( &cache->map[i], memory_order_relaxed ) != 0xff + && ( i != lastBlockIndex || !uplink->replicatedLastBlock ) ) { // Found incomplete one replicationIndex = i; break; } } - mutex_unlock( &image->lock ); + ref_put( &cache->reference ); if ( replicationIndex == -1 && _backgroundReplication == BGR_HASHBLOCK ) { // Nothing left in current block, find next one replicationIndex = uplink_findNextIncompleteHashBlock( uplink, endByte ); @@ -768,23 +771,24 @@ static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink) } /** - * find next index into cache_map that corresponds to the beginning + * find next index into cache map that corresponds to the beginning * of a hash block which is neither completely empty nor completely * replicated yet. Returns -1 if no match. */ static int uplink_findNextIncompleteHashBlock(dnbd3_uplink_t *uplink, const int startMapIndex) { int retval = -1; - mutex_lock( &uplink->image->lock ); - const int mapBytes = IMGSIZE_TO_MAPBYTES( uplink->image->virtualFilesize ); - const uint8_t *cache_map = uplink->image->cache_map; - if ( cache_map != NULL ) { - int j; + dnbd3_cache_map_t *cache = ref_get_cachemap( uplink->image ); + if ( cache != NULL ) { + const int mapBytes = IMGSIZE_TO_MAPBYTES( uplink->image->virtualFilesize ); const int start = ( startMapIndex & MAP_INDEX_HASH_START_MASK ); + atomic_thread_fence( memory_order_acquire ); + int j; for (j = 0; j < mapBytes; ++j) { const int i = ( start + j ) % mapBytes; - const bool isFull = cache_map[i] == 0xff || ( i + 1 == mapBytes && uplink->replicatedLastBlock ); - const bool isEmpty = cache_map[i] == 0; + const uint8_t b = atomic_load_explicit( &cache->map[i], memory_order_relaxed ); + const bool isFull = b == 0xff || ( i + 1 == mapBytes && uplink->replicatedLastBlock ); + const bool isEmpty = b == 0; if ( !isEmpty && !isFull ) { // Neither full nor empty, replicate if ( retval == -1 ) { @@ -811,7 +815,7 @@ static int uplink_findNextIncompleteHashBlock(dnbd3_uplink_t *uplink, const int retval = -1; } } - mutex_unlock( &uplink->image->lock ); + ref_put( &cache->reference ); return retval; } @@ -1107,7 +1111,7 @@ static bool uplink_saveCacheMap(dnbd3_uplink_t *uplink) if ( fsync( uplink->cacheFd ) == -1 ) { // A failing fsync means we have no guarantee that any data // since the last fsync (or open if none) has been saved. Apart - // from keeping the cache_map from the last successful fsync + // from keeping the cache map from the last successful fsync // around and restoring it there isn't much we can do to recover // a consistent state. Bail out. logadd( LOG_ERROR, "fsync() on image file %s failed with errno %d", image->path, errno ); @@ -1116,21 +1120,13 @@ static bool uplink_saveCacheMap(dnbd3_uplink_t *uplink) } } - if ( image->cache_map == NULL ) return true; - logadd( LOG_DEBUG2, "Saving cache map of %s:%d", image->name, (int)image->rid ); - mutex_lock( &image->lock ); - // Lock and get a copy of the cache map, as it could be freed by another thread that is just about to - // figure out that this image's cache copy is complete - if ( image->cache_map == NULL || image->virtualFilesize < DNBD3_BLOCK_SIZE ) { - mutex_unlock( &image->lock ); + dnbd3_cache_map_t *cache = ref_get_cachemap( image ); + if ( cache == NULL ) return true; - } + logadd( LOG_DEBUG2, "Saving cache map of %s:%d", image->name, (int)image->rid ); const size_t size = IMGSIZE_TO_MAPBYTES(image->virtualFilesize); - uint8_t *map = malloc( size ); - memcpy( map, image->cache_map, size ); // Unlock. Use path and cacheFd without locking. path should never change after initialization of the image, // cacheFd is owned by the uplink thread and we don't want to hold a spinlock during I/O - mutex_unlock( &image->lock ); assert( image->path != NULL ); char mapfile[strlen( image->path ) + 4 + 1]; strcpy( mapfile, image->path ); @@ -1139,14 +1135,14 @@ static bool uplink_saveCacheMap(dnbd3_uplink_t *uplink) int fd = open( mapfile, O_WRONLY | O_CREAT, 0644 ); if ( fd == -1 ) { const int err = errno; - free( map ); + ref_put( &cache->reference ); logadd( LOG_WARNING, "Could not open file to write cache map to disk (errno=%d) file %s", err, mapfile ); return false; } size_t done = 0; while ( done < size ) { - const ssize_t ret = write( fd, map, size - done ); + const ssize_t ret = write( fd, cache->map + done, size - done ); if ( ret == -1 ) { if ( errno == EINTR ) continue; logadd( LOG_WARNING, "Could not write cache map (errno=%d) file %s", errno, mapfile ); @@ -1158,11 +1154,11 @@ static bool uplink_saveCacheMap(dnbd3_uplink_t *uplink) } done += (size_t)ret; } + ref_put( &cache->reference ); if ( fsync( fd ) == -1 ) { logadd( LOG_WARNING, "fsync() on image map %s failed with errno %d", mapfile, errno ); } close( fd ); - free( map ); return true; } -- cgit v1.2.3-55-g7522 From 5765ce49f5e1e26505fd6b162db73a732603d1a8 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 5 Sep 2019 16:52:31 +0200 Subject: [SERVER] integrity checker: Improve flushing logic --- src/server/integrity.c | 199 +++++++++++++++++++++++++++---------------------- src/server/uplink.c | 2 +- 2 files changed, 111 insertions(+), 90 deletions(-) (limited to 'src/server/integrity.c') diff --git a/src/server/integrity.c b/src/server/integrity.c index a9fbae6..fddb755 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -35,6 +35,7 @@ static int queueLen = -1; static atomic_bool bRunning = false; static void* integrity_main(void *data); +static void flushFileRange(dnbd3_image_t *image, uint64_t start, uint64_t end); /** * Initialize the integrity check thread @@ -88,14 +89,17 @@ void integrity_check(dnbd3_image_t *image, int block) for (i = 0; i < queueLen; ++i) { if ( freeSlot == -1 && checkQueue[i].image == NULL ) { freeSlot = i; - } else if ( checkQueue[i].image == image - && checkQueue[i].block <= block && checkQueue[i].block + checkQueue[i].count >= block ) { - // Already queued check dominates this one, or at least lies directly before this block - if ( checkQueue[i].block + checkQueue[i].count == block ) { - // It's directly before this one; expand range + } else if ( checkQueue[i].image == image && checkQueue[i].block <= block ) { + if ( checkQueue[i].count == CHECK_ALL ) { + logadd( LOG_DEBUG2, "Dominated by full image scan request (%d/%d) (at %d)", i, queueLen, checkQueue[i].block ); + } else if ( checkQueue[i].block + checkQueue[i].count == block ) { checkQueue[i].count += 1; + logadd( LOG_DEBUG2, "Attaching to existing check request (%d/%d) (at %d, %d to go)", i, queueLen, checkQueue[i].block, checkQueue[i].count ); + } else if ( checkQueue[i].block + checkQueue[i].count > block ) { + logadd( LOG_DEBUG2, "Dominated by existing check request (%d/%d) (at %d, %d to go)", i, queueLen, checkQueue[i].block, checkQueue[i].count ); + } else { + continue; } - logadd( LOG_DEBUG2, "Attaching to existing check request (%d/%d) (%d +%d)", i, queueLen, checkQueue[i].block, checkQueue[i].count ); mutex_unlock( &integrityQueueLock ); return; } @@ -123,8 +127,6 @@ void integrity_check(dnbd3_image_t *image, int block) static void* integrity_main(void * data UNUSED) { int i; - uint8_t *buffer = NULL; - size_t bufferSize = 0; setThreadName( "image-check" ); blockNoncriticalSignals(); #if defined(linux) || defined(__linux) @@ -150,88 +152,70 @@ static void* integrity_main(void * data UNUSED) // We have the image. Call image_release() some time const int qCount = checkQueue[i].count; bool foundCorrupted = false; - mutex_lock( &image->lock ); if ( image->crc32 != NULL && image->realFilesize != 0 ) { int blocks[2] = { checkQueue[i].block, -1 }; mutex_unlock( &integrityQueueLock ); - // Make copy of crc32 list as it might go away const uint64_t fileSize = image->realFilesize; const int numHashBlocks = IMGSIZE_TO_HASHBLOCKS(fileSize); - const size_t required = numHashBlocks * sizeof(uint32_t); - if ( buffer == NULL || required > bufferSize ) { - bufferSize = required; - if ( buffer != NULL ) free( buffer ); - buffer = malloc( bufferSize ); - } - memcpy( buffer, image->crc32, required ); - mutex_unlock( &image->lock ); - // Open for direct I/O if possible; this prevents polluting the fs cache - int fd = open( image->path, O_RDONLY | O_DIRECT ); - bool direct = fd != -1; - if ( unlikely( !direct ) ) { - // Try unbuffered; flush to disk for that - logadd( LOG_DEBUG1, "O_DIRECT failed for %s", image->path ); - image_ensureOpen( image ); - fd = image->readFd; - } int checkCount = MIN( qCount, 5 ); - if ( fd != -1 ) { - while ( blocks[0] < numHashBlocks && !_shutdown ) { - const uint64_t start = blocks[0] * HASH_BLOCK_SIZE; - const uint64_t end = MIN( (uint64_t)(blocks[0] + 1) * HASH_BLOCK_SIZE, image->virtualFilesize ); - bool complete = true; - if ( qCount == CHECK_ALL ) { - dnbd3_cache_map_t *cache = ref_get_cachemap( image ); - if ( cache != NULL ) { - // When checking full image, skip incomplete blocks, otherwise assume block is complete - complete = image_isHashBlockComplete( cache->map, blocks[0], fileSize ); - ref_put( &cache->reference ); - } - } -#if defined(linux) || defined(__linux) - while ( sync_file_range( fd, start, end - start, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER ) == -1 ) -#else - while ( fsync( fd ) == -1 ) -#endif - { - if ( _shutdown ) - break; - if ( errno == EINTR ) - continue; - logadd( LOG_ERROR, "Cannot flush %s for integrity check (errno=%d)", image->path, errno ); - exit( 1 ); + int readFd = -1, directFd = -1; + while ( blocks[0] < numHashBlocks && !_shutdown ) { + const uint64_t start = blocks[0] * HASH_BLOCK_SIZE; + const uint64_t end = MIN( (uint64_t)(blocks[0] + 1) * HASH_BLOCK_SIZE, image->virtualFilesize ); + bool complete = true; + if ( qCount == CHECK_ALL ) { + dnbd3_cache_map_t *cache = ref_get_cachemap( image ); + if ( cache != NULL ) { + // When checking full image, skip incomplete blocks, otherwise assume block is complete + complete = image_isHashBlockComplete( cache->map, blocks[0], fileSize ); + ref_put( &cache->reference ); } - if ( _shutdown ) - break; + } + // Flush to disk if there's an uplink, as that means the block might have been written recently + if ( image->uplinkref != NULL ) { + flushFileRange( image, start, end ); + } + if ( _shutdown ) + break; + // Open for direct I/O if possible; this prevents polluting the fs cache + if ( directFd == -1 && ( end % DNBD3_BLOCK_SIZE ) == 0 ) { // Use direct I/O only if read length is multiple of 4096 to be on the safe side - int tfd; - if ( direct && ( end % DNBD3_BLOCK_SIZE ) == 0 ) { - // Suitable for direct io - tfd = fd; - } else if ( !image_ensureOpen( image ) ) { - logadd( LOG_WARNING, "Cannot open %s for reading", image->path ); - break; + directFd = open( image->path, O_RDONLY | O_DIRECT ); + if ( directFd == -1 ) { + logadd( LOG_DEBUG2, "O_DIRECT failed for %s (errno=%d)", image->path, errno ); + directFd = -2; } else { - tfd = image->readFd; - // Evict from cache so we have to re-read, making sure data was properly stored - posix_fadvise( fd, start, end - start, POSIX_FADV_DONTNEED ); + readFd = directFd; } - if ( complete && !image_checkBlocksCrc32( tfd, (uint32_t*)buffer, blocks, fileSize ) ) { - logadd( LOG_WARNING, "Hash check for block %d of %s failed!", blocks[0], image->name ); - image_updateCachemap( image, start, end, false ); - // If this is not a full check, queue one - if ( qCount != CHECK_ALL ) { - logadd( LOG_INFO, "Queueing full check for %s", image->name ); - integrity_check( image, -1 ); - } - foundCorrupted = true; - } - blocks[0]++; // Increase before break, so it always points to the next block to check after loop - if ( complete && --checkCount == 0 ) break; } - if ( direct ) { - close( fd ); + if ( readFd == -1 ) { // Try buffered; flush to disk for that + image_ensureOpen( image ); + readFd = image->readFd; + } + if ( readFd == -1 ) { + logadd( LOG_MINOR, "Couldn't get any valid fd for integrity check of %s... ignoring...", image->path ); + } else if ( complete && !image_checkBlocksCrc32( readFd, image->crc32, blocks, fileSize ) ) { + bool iscomplete = true; + dnbd3_cache_map_t *cache = ref_get_cachemap( image ); + if ( cache != NULL ) { + iscomplete = image_isHashBlockComplete( cache->map, blocks[0], fileSize ); + ref_put( &cache->reference ); + } + logadd( LOG_WARNING, "Hash check for block %d of %s failed (complete: was: %d, is: %d)", blocks[0], image->name, (int)complete, (int)iscomplete ); + image_updateCachemap( image, start, end, false ); + // If this is not a full check, queue one + if ( qCount != CHECK_ALL ) { + logadd( LOG_INFO, "Queueing full check for %s", image->name ); + integrity_check( image, -1 ); + } + foundCorrupted = true; } + blocks[0]++; // Increase before break, so it always points to the next block to check after loop + if ( complete && --checkCount == 0 ) + break; + } + if ( directFd != -1 && directFd != -2 ) { + close( directFd ); } mutex_lock( &integrityQueueLock ); assert( checkQueue[i].image == image ); @@ -242,11 +226,8 @@ static void* integrity_main(void * data UNUSED) logadd( LOG_WARNING, "BUG! checkQueue counter ran negative" ); } } - if ( checkCount > 0 || checkQueue[i].count <= 0 || fd == -1 ) { - // Done with this task as nothing left, OR we don't have an fd to read from - if ( fd == -1 ) { - logadd( LOG_WARNING, "Cannot hash check %s: bad fd", image->path ); - } + if ( checkCount > 0 || checkQueue[i].count <= 0 ) { + // Done with this task as nothing left checkQueue[i].image = NULL; if ( i + 1 == queueLen ) queueLen--; // Mark as working again if applicable @@ -263,10 +244,8 @@ static void* integrity_main(void * data UNUSED) // Still more blocks to go... checkQueue[i].block = blocks[0]; } - } else { - mutex_unlock( &image->lock ); } - if ( foundCorrupted ) { + if ( foundCorrupted && !_shutdown ) { // Something was fishy, make sure uplink exists mutex_lock( &image->lock ); image->working = false; @@ -278,10 +257,52 @@ static void* integrity_main(void * data UNUSED) } } mutex_unlock( &integrityQueueLock ); - if ( buffer != NULL ) { - free( buffer ); - } bRunning = false; return NULL; } +static void flushFileRange(dnbd3_image_t *image, uint64_t start, uint64_t end) +{ + int flushFd; + int writableFd = -1; + dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref ); + if ( uplink != NULL ) { // Try to steal uplink's writable fd + if ( uplink->cacheFd != -1 ) { + writableFd = dup( uplink->cacheFd ); + } + ref_put( &uplink->reference ); + } + if ( writableFd == -1 ) { // Open file as writable + writableFd = open( image->path, O_WRONLY ); + } + if ( writableFd == -1 ) { // Fallback to readFd (should work on Linux and BSD...) + logadd( LOG_WARNING, "flushFileRange: Cannot open %s for writing. Trying readFd.", image->path ); + image_ensureOpen( image ); + flushFd = image->readFd; + } else { + flushFd = writableFd; + } + if ( flushFd == -1 ) + return; +#if defined(linux) || defined(__linux) + while ( sync_file_range( flushFd, start, end - start, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER ) == -1 ) +#else + while ( fsync( flushFd ) == -1 ) // TODO: fdatasync() should be available since FreeBSD 12.0 ... Might be a tad bit faster +#endif + { + if ( _shutdown ) + break; + int e = errno; + if ( e == EINTR ) + continue; + logadd( LOG_ERROR, "Cannot flush %s for integrity check (errno=%d)", image->path, e ); + if ( e == EIO ) { + exit( 1 ); + } + } + // Evict from cache too so we have to re-read, making sure data was properly stored + posix_fadvise( flushFd, start, end - start, POSIX_FADV_DONTNEED ); + if ( writableFd != -1 ) { + close( writableFd ); + } +} diff --git a/src/server/uplink.c b/src/server/uplink.c index 8a0b06b..dab5c27 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -876,7 +876,7 @@ static void uplink_handleReceive(dnbd3_uplink_t *uplink) ret = (int)pwrite( uplink->cacheFd, uplink->recvBuffer + done, inReply.size - done, start + done ); if ( unlikely( ret == -1 ) ) { err = errno; - if ( err == EINTR ) continue; + if ( err == EINTR && !_shutdown ) continue; if ( err == ENOSPC || err == EDQUOT ) { // try to free 256MiB if ( !tryAgain || !image_ensureDiskSpaceLocked( 256ull * 1024 * 1024, true ) ) break; -- cgit v1.2.3-55-g7522 From fdcbdcac2e721d72136794bdc45c63d71799dcd5 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Tue, 10 Sep 2019 17:49:05 +0200 Subject: [SERVER] Make integrity checks on startup async --- src/server/image.c | 49 ++++++++++++++++++++++++------------------------- src/server/integrity.c | 17 ++++++++++++----- src/server/integrity.h | 2 +- 3 files changed, 37 insertions(+), 31 deletions(-) (limited to 'src/server/integrity.c') diff --git a/src/server/image.c b/src/server/image.c index 5fa06d8..822a710 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -53,7 +53,7 @@ static bool image_ensureDiskSpace(uint64_t size, bool force); static dnbd3_cache_map_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize); static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t fileSize, uint32_t *masterCrc); -static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t fileSize, uint32_t * const crc32list, atomic_uint_least8_t * const cache_map); +static void image_checkRandomBlocks(dnbd3_image_t *image, const int count); static void* closeUnusedFds(void*); static void allocCacheMap(dnbd3_image_t *image, bool complete); @@ -161,7 +161,7 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co for ( pos = start; pos < end; pos += HASH_BLOCK_SIZE ) { const int block = (int)( pos / HASH_BLOCK_SIZE ); if ( image_isHashBlockComplete( cache->map, block, image->realFilesize ) ) { - integrity_check( image, block ); + integrity_check( image, block, false ); } } } @@ -846,19 +846,10 @@ static bool image_load(char *base, char *path, int withUplink) // XXX: Maybe try sha-256 or 512 first if you're paranoid (to be implemented) // 2. Load CRC-32 list of image - bool doFullCheck = false; uint32_t masterCrc = 0; const int hashBlockCount = IMGSIZE_TO_HASHBLOCKS( virtualFilesize ); crc32list = image_loadCrcList( path, virtualFilesize, &masterCrc ); - // Check CRC32 - if ( crc32list != NULL ) { - if ( !image_checkRandomBlocks( 4, fdImage, realFilesize, crc32list, cache != NULL ? cache->map : NULL ) ) { - logadd( LOG_ERROR, "quick crc32 check of %s failed. Data corruption?", path ); - doFullCheck = true; - } - } - // Compare data just loaded to identical image we apparently already loaded if ( existing != NULL ) { if ( existing->realFilesize != realFilesize ) { @@ -943,6 +934,8 @@ static bool image_load(char *base, char *path, int withUplink) if ( image_addToList( image ) ) { // Keep fd for reading fdImage = -1; + // Check CRC32 + image_checkRandomBlocks( image, 4 ); } else { logadd( LOG_ERROR, "Image list full: Could not add image %s", path ); image->readFd = -1; // Keep fdImage instead, will be closed below @@ -950,12 +943,6 @@ static bool image_load(char *base, char *path, int withUplink) goto load_error; } logadd( LOG_DEBUG1, "Loaded image '%s:%d'\n", image->name, (int)image->rid ); - // CRC errors found... - if ( doFullCheck ) { - logadd( LOG_INFO, "Queueing full CRC32 check for '%s:%d'\n", image->name, (int)image->rid ); - integrity_check( image, -1 ); - } - function_return = true; // Clean exit: @@ -1038,18 +1025,26 @@ static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t f return retval; } -static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t realFilesize, uint32_t * const crc32list, atomic_uint_least8_t * const cache_map) +static void image_checkRandomBlocks(dnbd3_image_t *image, const int count) { + if ( image->crc32 == NULL ) + return; // This checks the first block and (up to) count - 1 random blocks for corruption // via the known crc32 list. This is very sloppy and is merely supposed to detect // accidental corruption due to broken dnbd3-proxy functionality or file system - // corruption. + // corruption, or people replacing/updating images which is a very stupid thing. assert( count > 0 ); - const int hashBlocks = IMGSIZE_TO_HASHBLOCKS( realFilesize ); - int blocks[count + 1]; + dnbd3_cache_map_t *cache = ref_get_cachemap( image ); + const int hashBlocks = IMGSIZE_TO_HASHBLOCKS( image->virtualFilesize ); + int blocks[count]; int index = 0, j; int block; - if ( image_isHashBlockComplete( cache_map, 0, realFilesize ) ) blocks[index++] = 0; + if ( image_isHashBlockComplete( cache->map, 0, image->virtualFilesize ) ) { + blocks[index++] = 0; + } + if ( hashBlocks > 1 && image_isHashBlockComplete( cache->map, hashBlocks - 1, image->virtualFilesize ) ) { + blocks[index++] = hashBlocks - 1; + } int tries = count * 5; // Try only so many times to find a non-duplicate complete block while ( index + 1 < count && --tries > 0 ) { block = rand() % hashBlocks; // Random block @@ -1057,11 +1052,15 @@ static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t if ( blocks[j] == block ) goto while_end; } // Block complete? If yes, add to list - if ( image_isHashBlockComplete( cache_map, block, realFilesize ) ) blocks[index++] = block; + if ( image_isHashBlockComplete( cache->map, block, image->virtualFilesize ) ) { + blocks[index++] = block; + } while_end: ; } - blocks[MIN(index, count)] = -1; // End of array has to be marked by a -1 - return image_checkBlocksCrc32( fdImage, crc32list, blocks, realFilesize ); // Return result of check + ref_put( &cache->reference ); + for ( int i = 0; i < index; ++i ) { + integrity_check( image, blocks[i], true ); + } } /** diff --git a/src/server/integrity.c b/src/server/integrity.c index fddb755..2058104 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -78,15 +78,17 @@ void integrity_shutdown() * make sure it is before calling, otherwise it will result in falsely * detected corruption. */ -void integrity_check(dnbd3_image_t *image, int block) +void integrity_check(dnbd3_image_t *image, int block, bool blocking) { + int freeSlot; if ( !bRunning ) { logadd( LOG_MINOR, "Ignoring check request; thread not running..." ); return; } - int i, freeSlot = -1; +start_over: + freeSlot = -1; mutex_lock( &integrityQueueLock ); - for (i = 0; i < queueLen; ++i) { + for (int i = 0; i < queueLen; ++i) { if ( freeSlot == -1 && checkQueue[i].image == NULL ) { freeSlot = i; } else if ( checkQueue[i].image == image && checkQueue[i].block <= block ) { @@ -105,8 +107,13 @@ void integrity_check(dnbd3_image_t *image, int block) } } if ( freeSlot == -1 ) { - if ( queueLen >= CHECK_QUEUE_SIZE ) { + if ( unlikely( queueLen >= CHECK_QUEUE_SIZE ) ) { mutex_unlock( &integrityQueueLock ); + if ( blocking ) { + logadd( LOG_INFO, "Check queue full, waiting a couple seconds...\n" ); + sleep( 3 ); + goto start_over; + } logadd( LOG_INFO, "Check queue full, discarding check request...\n" ); return; } @@ -206,7 +213,7 @@ static void* integrity_main(void * data UNUSED) // If this is not a full check, queue one if ( qCount != CHECK_ALL ) { logadd( LOG_INFO, "Queueing full check for %s", image->name ); - integrity_check( image, -1 ); + integrity_check( image, -1, false ); } foundCorrupted = true; } diff --git a/src/server/integrity.h b/src/server/integrity.h index c3c2b44..09d3785 100644 --- a/src/server/integrity.h +++ b/src/server/integrity.h @@ -7,6 +7,6 @@ void integrity_init(); void integrity_shutdown(); -void integrity_check(dnbd3_image_t *image, int block); +void integrity_check(dnbd3_image_t *image, int block, bool blocking); #endif /* INTEGRITY_H_ */ -- cgit v1.2.3-55-g7522 From 53fbcc89f027992e29c96086dd32eb624e181eac Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Tue, 17 Sep 2019 14:56:03 +0200 Subject: [*] Fix/simplify checks for linux --- src/server/integrity.c | 4 ++-- src/shared/fdsignal.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/server/integrity.c') diff --git a/src/server/integrity.c b/src/server/integrity.c index 2058104..1fbd9dc 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -136,7 +136,7 @@ static void* integrity_main(void * data UNUSED) int i; setThreadName( "image-check" ); blockNoncriticalSignals(); -#if defined(linux) || defined(__linux) +#if defined(__linux__) // Setting nice of this thread - this is not POSIX conforming, so check if other platforms support this. // POSIX says that setpriority() should set the nice value of all threads belonging to the current process, // but on linux you can do this per thread. @@ -291,7 +291,7 @@ static void flushFileRange(dnbd3_image_t *image, uint64_t start, uint64_t end) } if ( flushFd == -1 ) return; -#if defined(linux) || defined(__linux) +#if defined(__linux__) while ( sync_file_range( flushFd, start, end - start, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER ) == -1 ) #else while ( fsync( flushFd ) == -1 ) // TODO: fdatasync() should be available since FreeBSD 12.0 ... Might be a tad bit faster diff --git a/src/shared/fdsignal.c b/src/shared/fdsignal.c index 5e5cf7f..087b6f1 100644 --- a/src/shared/fdsignal.c +++ b/src/shared/fdsignal.c @@ -1,6 +1,6 @@ #include "fdsignal.h" -#if defined(linux) || defined(__linux) || defined(__linux__) +#if defined(__linux__) //#warning "Using eventfd based signalling" #include "fdsignal.inc/eventfd.c" #elif __SIZEOF_INT__ == 4 && __SIZEOF_POINTER__ == 8 -- cgit v1.2.3-55-g7522 From 3d0c89fccf14599d156696d74224a4fbe0787777 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Tue, 29 Oct 2019 17:55:20 +0100 Subject: [SERVER] Fix checking images without cache map --- src/server/image.c | 18 +++++++++++------- src/server/image.h | 2 +- src/server/integrity.c | 4 ++-- 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'src/server/integrity.c') diff --git a/src/server/image.c b/src/server/image.c index 6259e38..9581a92 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -160,7 +160,7 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co end = (end + HASH_BLOCK_SIZE - 1) & ~(uint64_t)(HASH_BLOCK_SIZE - 1); for ( pos = start; pos < end; pos += HASH_BLOCK_SIZE ) { const int block = (int)( pos / HASH_BLOCK_SIZE ); - if ( image_isHashBlockComplete( cache->map, block, image->realFilesize ) ) { + if ( image_isHashBlockComplete( cache, block, image->realFilesize ) ) { integrity_check( image, block, false ); } } @@ -651,9 +651,11 @@ static dnbd3_image_t* image_free(dnbd3_image_t *image) return NULL ; } -bool image_isHashBlockComplete(atomic_uint_least8_t * const cacheMap, const uint64_t block, const uint64_t realFilesize) +bool image_isHashBlockComplete(dnbd3_cache_map_t * const cache, const uint64_t block, const uint64_t realFilesize) { - if ( cacheMap == NULL ) return true; + if ( cache == NULL ) + return true; + const atomic_uint_least8_t *cacheMap = cache->map; const uint64_t end = (block + 1) * HASH_BLOCK_SIZE; if ( end <= realFilesize ) { // Trivial case: block in question is not the last block (well, or image size is multiple of HASH_BLOCK_SIZE) @@ -1039,10 +1041,10 @@ static void image_checkRandomBlocks(dnbd3_image_t *image, const int count) int blocks[count]; int index = 0, j; int block; - if ( image_isHashBlockComplete( cache->map, 0, image->virtualFilesize ) ) { + if ( image_isHashBlockComplete( cache, 0, image->virtualFilesize ) ) { blocks[index++] = 0; } - if ( hashBlocks > 1 && image_isHashBlockComplete( cache->map, hashBlocks - 1, image->virtualFilesize ) ) { + if ( hashBlocks > 1 && image_isHashBlockComplete( cache, hashBlocks - 1, image->virtualFilesize ) ) { blocks[index++] = hashBlocks - 1; } int tries = count * 5; // Try only so many times to find a non-duplicate complete block @@ -1052,12 +1054,14 @@ static void image_checkRandomBlocks(dnbd3_image_t *image, const int count) if ( blocks[j] == block ) goto while_end; } // Block complete? If yes, add to list - if ( image_isHashBlockComplete( cache->map, block, image->virtualFilesize ) ) { + if ( image_isHashBlockComplete( cache, block, image->virtualFilesize ) ) { blocks[index++] = block; } while_end: ; } - ref_put( &cache->reference ); + if ( cache != NULL ) { + ref_put( &cache->reference ); + } for ( int i = 0; i < index; ++i ) { integrity_check( image, blocks[i], true ); } diff --git a/src/server/image.h b/src/server/image.h index 449e31f..89791fc 100644 --- a/src/server/image.h +++ b/src/server/image.h @@ -9,7 +9,7 @@ void image_serverStartup(); bool image_isComplete(dnbd3_image_t *image); -bool image_isHashBlockComplete(atomic_uint_least8_t * const cacheMap, const uint64_t block, const uint64_t fileSize); +bool image_isHashBlockComplete(dnbd3_cache_map_t * const cache, const uint64_t block, const uint64_t fileSize); void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, const bool set); diff --git a/src/server/integrity.c b/src/server/integrity.c index 1fbd9dc..4006dfc 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -174,7 +174,7 @@ static void* integrity_main(void * data UNUSED) dnbd3_cache_map_t *cache = ref_get_cachemap( image ); if ( cache != NULL ) { // When checking full image, skip incomplete blocks, otherwise assume block is complete - complete = image_isHashBlockComplete( cache->map, blocks[0], fileSize ); + complete = image_isHashBlockComplete( cache, blocks[0], fileSize ); ref_put( &cache->reference ); } } @@ -205,7 +205,7 @@ static void* integrity_main(void * data UNUSED) bool iscomplete = true; dnbd3_cache_map_t *cache = ref_get_cachemap( image ); if ( cache != NULL ) { - iscomplete = image_isHashBlockComplete( cache->map, blocks[0], fileSize ); + iscomplete = image_isHashBlockComplete( cache, blocks[0], fileSize ); ref_put( &cache->reference ); } logadd( LOG_WARNING, "Hash check for block %d of %s failed (complete: was: %d, is: %d)", blocks[0], image->name, (int)complete, (int)iscomplete ); -- cgit v1.2.3-55-g7522 From 26c1ad7af0f5749c5343a5823b9c8cece885ce84 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Tue, 3 Mar 2020 12:21:01 +0100 Subject: [SERVER] Remove "working" flag, introduce fine-grained flags Tracking the "working" state of images using one boolean is insufficient regarding the different ways in which providing an image can fail. Introduce separate flags for different conditions, like "file not readable", "file not writable", "no uplink server available", "file content has changed". --- src/server/altservers.c | 4 - src/server/globals.h | 7 +- src/server/image.c | 193 +++++++++++++++++++++++++----------------------- src/server/integrity.c | 20 +---- src/server/net.c | 17 +++-- src/server/uplink.c | 114 ++++++++++++++++++---------- 6 files changed, 197 insertions(+), 158 deletions(-) (limited to 'src/server/integrity.c') diff --git a/src/server/altservers.c b/src/server/altservers.c index 3fdbe0d..a6ad235 100644 --- a/src/server/altservers.c +++ b/src/server/altservers.c @@ -628,10 +628,6 @@ failed: if ( best.fd != -1 ) { close( best.fd ); } - if ( !image->working || uplink->cycleDetected ) { - image->working = true; - LOG( LOG_DEBUG1, "[RTT] No better alt server found, enabling '%s:%d' again... :-(", image->name, (int)image->rid ); - } uplink->cycleDetected = false; // It's a lie, but prevents rtt measurement triggering again right away mutex_lock( &uplink->rttLock ); uplink->rttTestResult = RTT_DONTCHANGE; diff --git a/src/server/globals.h b/src/server/globals.h index b1336dc..31fbce5 100644 --- a/src/server/globals.h +++ b/src/server/globals.h @@ -136,7 +136,12 @@ struct _dnbd3_image atomic_int completenessEstimate; // Completeness estimate in percent atomic_int users; // clients currently using this image. XXX Lock on imageListLock when modifying and checking whether the image should be freed. Reading it elsewhere is fine without the lock. int id; // Unique ID of this image. Only unique in the context of this running instance of DNBD3-Server - atomic_bool working; // true if image exists and completeness is == 100% or a working upstream proxy is connected + struct { + atomic_bool uplink; // No uplink connected + atomic_bool write; // Error writing to file + atomic_bool read; // Error reading from file + atomic_bool changed; // File disappeared or changed, thorough check required if it seems to be back + } problem; uint16_t rid; // revision of image pthread_mutex_t lock; }; diff --git a/src/server/image.c b/src/server/image.c index 6017e59..1ce1574 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -53,7 +53,7 @@ static bool image_ensureDiskSpace(uint64_t size, bool force); static dnbd3_cache_map_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize); static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t fileSize, uint32_t *masterCrc); -static void image_checkRandomBlocks(dnbd3_image_t *image, const int count); +static bool image_checkRandomBlocks(dnbd3_image_t *image, const int count, int fromFd); static void* closeUnusedFds(void*); static void allocCacheMap(dnbd3_image_t *image, bool complete); @@ -239,35 +239,76 @@ bool image_isComplete(dnbd3_image_t *image) */ bool image_ensureOpen(dnbd3_image_t *image) { - if ( image->readFd != -1 ) return image; - int newFd = open( image->path, O_RDONLY ); + bool sizeChanged = false; + if ( image->readFd != -1 && !image->problem.changed ) + return true; + int newFd = image->readFd == -1 ? open( image->path, O_RDONLY ) : dup( image->readFd ); if ( newFd == -1 ) { - logadd( LOG_WARNING, "Cannot open %s for reading", image->path ); + if ( !image->problem.read ) { + logadd( LOG_WARNING, "Cannot open %s for reading", image->path ); + image->problem.read = true; + } } else { - // Check size + // Check size + read access + char buffer[100]; const off_t flen = lseek( newFd, 0, SEEK_END ); if ( flen == -1 ) { - logadd( LOG_WARNING, "Could not seek to end of %s (errno %d)", image->path, errno ); + if ( !image->problem.read ) { + logadd( LOG_WARNING, "Could not seek to end of %s (errno %d)", image->path, errno ); + image->problem.read = true; + } close( newFd ); newFd = -1; } else if ( (uint64_t)flen != image->realFilesize ) { - logadd( LOG_WARNING, "Size of active image with closed fd changed from %" PRIu64 " to %" PRIu64, image->realFilesize, (uint64_t)flen ); + if ( !image->problem.changed ) { + logadd( LOG_WARNING, "Size of active image with closed fd changed from %" PRIu64 " to %" PRIu64, + image->realFilesize, (uint64_t)flen ); + } + sizeChanged = true; + } else if ( pread( newFd, buffer, sizeof(buffer), 0 ) == -1 ) { + if ( !image->problem.read ) { + logadd( LOG_WARNING, "Reading first %d bytes from %s failed (errno=%d)", + (int)sizeof(buffer), image->path, errno ); + image->problem.read = true; + } close( newFd ); newFd = -1; } } if ( newFd == -1 ) { - mutex_lock( &image->lock ); - image->working = false; - mutex_unlock( &image->lock ); + if ( sizeChanged ) { + image->problem.changed = true; + } return false; } + + // Re-opened. Check if the "size/content changed" flag was set before and if so, check crc32, + // but only if the size we just got above is correct. + if ( image->problem.changed && !sizeChanged ) { + if ( image->crc32 == NULL ) { + // Cannot verify further, hope for the best + image->problem.changed = false; + logadd( LOG_DEBUG1, "Size of image %s:%d changed back to expected value", + image->name, (int)image->rid ); + } else if ( image_checkRandomBlocks( image, 1, newFd ) ) { + // This should have checked the first block (if complete) -> All is well again + image->problem.changed = false; + logadd( LOG_DEBUG1, "Size and CRC of image %s:%d changed back to expected value", + image->name, (int)image->rid ); + } + } else { + image->problem.changed = sizeChanged; + } + mutex_lock( &image->lock ); if ( image->readFd == -1 ) { image->readFd = newFd; + image->problem.read = false; mutex_unlock( &image->lock ); } else { - // There was a race while opening the file (happens cause not locked cause blocking), we lost the race so close new fd and proceed + // There was a race while opening the file (happens cause not locked cause blocking), + // we lost the race so close new fd and proceed. + // *OR* we dup()'ed above for cheating when the image changed before. mutex_unlock( &image->lock ); close( newFd ); } @@ -296,7 +337,7 @@ dnbd3_image_t* image_byId(int imgId) * point... * Locks on: imageListLock, _images[].lock */ -dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking) +dnbd3_image_t* image_get(char *name, uint16_t revision, bool ensureFdOpen) { int i; const char *removingText = _removeMissingImages ? ", removing from list" : ""; @@ -326,84 +367,36 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking) candidate->users++; mutex_unlock( &imageListLock ); - // Found, see if it works - // TODO: Also make sure a non-working image still has old fd open but created a new one and removed itself from the list - // TODO: But remember size-changed images forever - if ( candidate->working || checkIfWorking ) { - // Is marked working, but might not have an fd open - if ( !image_ensureOpen( candidate ) ) { - mutex_lock( &candidate->lock ); - timing_get( &candidate->lastWorkCheck ); - mutex_unlock( &candidate->lock ); - if ( _removeMissingImages ) { - candidate = image_remove( candidate ); // No release here, the image is still returned and should be released by caller - } - return candidate; - } - } - - if ( !checkIfWorking ) return candidate; // Not interested in re-cechking working state - - // ...not working... - - // Don't re-check too often - mutex_lock( &candidate->lock ); - bool check; - declare_now; - check = timing_diff( &candidate->lastWorkCheck, &now ) > NONWORKING_RECHECK_INTERVAL_SECONDS; - if ( check ) { - candidate->lastWorkCheck = now; - } - mutex_unlock( &candidate->lock ); - if ( !check ) { + if ( !ensureFdOpen ) // Don't want to re-check return candidate; - } - // reaching this point means: - // 1) We should check if the image is working, it might or might not be in working state right now - // 2) The image is open for reading (or at least was at some point, the fd might be stale if images lie on an NFS share etc.) - // 3) We made sure not to re-check this image too often - - // Common for ro and rw images: Size check, read check - const off_t len = lseek( candidate->readFd, 0, SEEK_END ); - bool reload = false; - if ( len == -1 ) { - logadd( LOG_WARNING, "lseek() on %s failed (errno=%d)%s.", candidate->path, errno, removingText ); - reload = true; - } else if ( (uint64_t)len != candidate->realFilesize ) { - logadd( LOG_WARNING, "Size of %s changed at runtime, keeping disabled! Expected: %" PRIu64 ", found: %" PRIu64 - ". Try sending SIGHUP to server if you know what you're doing.", - candidate->path, candidate->realFilesize, (uint64_t)len ); - } else { - // Seek worked, file size is same, now see if we can read from file - char buffer[100]; - if ( pread( candidate->readFd, buffer, sizeof(buffer), 0 ) == -1 ) { - logadd( LOG_WARNING, "Reading first %d bytes from %s failed (errno=%d)%s.", - (int)sizeof(buffer), candidate->path, errno, removingText ); - reload = true; - } else if ( !candidate->working ) { - // Seems everything is fine again \o/ - candidate->working = true; - logadd( LOG_INFO, "Changed state of %s:%d to 'working'", candidate->name, candidate->rid ); - } - } + if ( image_ensureOpen( candidate ) && !candidate->problem.read ) + return candidate; // We have a read fd and no read or changed problems - if ( reload ) { + // -- image could not be opened again, or is open but has problem -- + + if ( _removeMissingImages && !file_isReadable( candidate->path ) ) { + candidate = image_remove( candidate ); + // No image_release here, the image is still returned and should be released by caller + } else if ( candidate->readFd != -1 ) { + // We cannot just close the fd as it might be in use. Make a copy and remove old entry. + candidate = image_remove( candidate ); // Could not access the image with exising fd - mark for reload which will re-open the file. // make a copy of the image struct but keep the old one around. If/When it's not being used // anymore, it will be freed automatically. - logadd( LOG_DEBUG1, "Reloading image file %s", candidate->path ); + logadd( LOG_DEBUG1, "Reloading image file %s because of read problem/changed", candidate->path ); dnbd3_image_t *img = calloc( sizeof(dnbd3_image_t), 1 ); img->path = strdup( candidate->path ); img->name = strdup( candidate->name ); img->virtualFilesize = candidate->virtualFilesize; img->realFilesize = candidate->realFilesize; - img->atime = now; + timing_get( &img->atime ); img->masterCrc32 = candidate->masterCrc32; img->readFd = -1; img->rid = candidate->rid; img->users = 1; - img->working = false; + img->problem.read = true; + img->problem.changed = candidate->problem.changed; img->ref_cacheMap = NULL; mutex_init( &img->lock, LOCK_IMAGE ); if ( candidate->crc32 != NULL ) { @@ -419,18 +412,17 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking) if ( image_addToList( img ) ) { image_release( candidate ); candidate = img; + // Check if image is incomplete, initialize uplink + if ( candidate->ref_cacheMap != NULL ) { + uplink_init( candidate, -1, NULL, -1 ); + } + // Try again with new instance + image_ensureOpen( candidate ); } else { img->users = 0; image_free( img ); } - // Check if image is incomplete, initialize uplink - if ( candidate->ref_cacheMap != NULL ) { - uplink_init( candidate, -1, NULL, -1 ); - } - // readFd == -1 and working == FALSE at this point, - // this function needs some splitting up for handling as we need to run most - // of the above code again. for now we know that the next call for this - // name:rid will get ne newly inserted "img" and try to re-open the file. + // readFd == -1 and problem.read == true } return candidate; // We did all we can, hopefully it's working @@ -900,7 +892,6 @@ static bool image_load(char *base, char *path, int withUplink) image->rid = (uint16_t)revision; image->users = 0; image->readFd = -1; - image->working = ( cache == NULL ); timing_get( &image->nextCompletenessEstimate ); image->completenessEstimate = -1; mutex_init( &image->lock, LOCK_IMAGE ); @@ -925,7 +916,7 @@ static bool image_load(char *base, char *path, int withUplink) // Image is definitely incomplete, initialize uplink worker if ( image->ref_cacheMap != NULL ) { - image->working = false; + image->problem.uplink = true; if ( withUplink ) { uplink_init( image, -1, NULL, -1 ); } @@ -937,7 +928,7 @@ static bool image_load(char *base, char *path, int withUplink) // Keep fd for reading fdImage = -1; // Check CRC32 - image_checkRandomBlocks( image, 4 ); + image_checkRandomBlocks( image, 4, -1 ); } else { logadd( LOG_ERROR, "Image list full: Could not add image %s", path ); image->readFd = -1; // Keep fdImage instead, will be closed below @@ -1027,10 +1018,19 @@ static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t f return retval; } -static void image_checkRandomBlocks(dnbd3_image_t *image, const int count) +/** + * Check up to count random blocks from given image. If fromFd is -1, the check will + * be run asynchronously using the integrity checker. Otherwise, the check will + * happen in the function and return the result of the check. + * @param image image to check + * @param count number of blocks to check (max) + * @param fromFd, check synchronously and use this fd for reading, -1 = async + * @return true = OK, false = error. Meaningless if fromFd == -1 + */ +static bool image_checkRandomBlocks(dnbd3_image_t *image, const int count, int fromFd) { if ( image->crc32 == NULL ) - return; + return true; // This checks the first block and (up to) count - 1 random blocks for corruption // via the known crc32 list. This is very sloppy and is merely supposed to detect // accidental corruption due to broken dnbd3-proxy functionality or file system @@ -1038,7 +1038,7 @@ static void image_checkRandomBlocks(dnbd3_image_t *image, const int count) assert( count > 0 ); dnbd3_cache_map_t *cache = ref_get_cachemap( image ); const int hashBlocks = IMGSIZE_TO_HASHBLOCKS( image->virtualFilesize ); - int blocks[count]; + int blocks[count+1]; // +1 for "-1" in sync case int index = 0, j; int block; if ( image_isHashBlockComplete( cache, 0, image->virtualFilesize ) ) { @@ -1062,9 +1062,16 @@ while_end: ; if ( cache != NULL ) { ref_put( &cache->reference ); } - for ( int i = 0; i < index; ++i ) { - integrity_check( image, blocks[i], true ); + if ( fromFd == -1 ) { + // Async + for ( int i = 0; i < index; ++i ) { + integrity_check( image, blocks[i], true ); + } + return true; } + // Sync + blocks[index] = -1; + return image_checkBlocksCrc32( fromFd, image->crc32, blocks, image->realFilesize ); } /** @@ -1306,7 +1313,7 @@ server_fail: ; } else { // Clumsy busy wait, but this should only take as long as it takes to start a thread, so is it really worth using a signalling mechanism? int i = 0; - while ( !image->working && ++i < 100 ) + while ( image->problem.uplink && ++i < 100 ) usleep( 2000 ); } } else if ( uplinkSock != -1 ) { @@ -1599,7 +1606,7 @@ int image_getCompletenessEstimate(dnbd3_image_t * const image) assert( image != NULL ); dnbd3_cache_map_t *cache = ref_get_cachemap( image ); if ( cache == NULL ) - return image->working ? 100 : 0; + return 100; const int len = IMGSIZE_TO_MAPBYTES( image->virtualFilesize ); if ( unlikely( len == 0 ) ) { ref_put( &cache->reference ); diff --git a/src/server/integrity.c b/src/server/integrity.c index 4006dfc..91e53b8 100644 --- a/src/server/integrity.c +++ b/src/server/integrity.c @@ -195,9 +195,10 @@ static void* integrity_main(void * data UNUSED) readFd = directFd; } } - if ( readFd == -1 ) { // Try buffered; flush to disk for that - image_ensureOpen( image ); - readFd = image->readFd; + if ( readFd == -1 ) { // Try buffered as fallback + if ( image_ensureOpen( image ) && !image->problem.read ) { + readFd = image->readFd; + } } if ( readFd == -1 ) { logadd( LOG_MINOR, "Couldn't get any valid fd for integrity check of %s... ignoring...", image->path ); @@ -237,16 +238,6 @@ static void* integrity_main(void * data UNUSED) // Done with this task as nothing left checkQueue[i].image = NULL; if ( i + 1 == queueLen ) queueLen--; - // Mark as working again if applicable - if ( !foundCorrupted ) { - dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref ); - if ( uplink != NULL ) { // TODO: image_determineWorkingState() helper? - mutex_lock( &image->lock ); - image->working = uplink->current.fd != -1 && image->readFd != -1; - mutex_unlock( &image->lock ); - ref_put( &uplink->reference ); - } - } } else { // Still more blocks to go... checkQueue[i].block = blocks[0]; @@ -254,9 +245,6 @@ static void* integrity_main(void * data UNUSED) } if ( foundCorrupted && !_shutdown ) { // Something was fishy, make sure uplink exists - mutex_lock( &image->lock ); - image->working = false; - mutex_unlock( &image->lock ); uplink_init( image, -1, NULL, -1 ); } // Release :-) diff --git a/src/server/net.c b/src/server/net.c index aba4e7d..29147be 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -262,7 +262,7 @@ void* net_handleNewConnection(void *clientPtr) atomic_thread_fence( memory_order_release ); if ( unlikely( image == NULL ) ) { //logadd( LOG_DEBUG1, "Client requested non-existent image '%s' (rid:%d), rejected\n", image_name, (int)rid ); - } else if ( unlikely( !image->working ) ) { + } else if ( unlikely( image->problem.read || image->problem.changed ) ) { logadd( LOG_DEBUG1, "Client %s requested non-working image '%s' (rid:%d), rejected\n", client->hostName, image_name, (int)rid ); } else { @@ -273,8 +273,14 @@ void* net_handleNewConnection(void *clientPtr) if ( uplink != NULL && ( uplink->cacheFd == -1 || uplink->queueLen > SERVER_UPLINK_QUEUELEN_THRES ) ) { bOk = ( rand() % 4 ) == 1; } - if ( bOk && uplink != NULL && uplink->cacheFd == -1 ) { // Wait 100ms if local caching is not working so this - usleep( 100000 ); // server gets a penalty and is less likely to be selected + if ( bOk && uplink != NULL ) { + if ( uplink->cacheFd == -1 ) { // Wait 100ms if local caching is not working so this + usleep( 100000 ); // server gets a penalty and is less likely to be selected + } + if ( image->problem.uplink ) { + // Penaltize depending on completeness, if no uplink is available + usleep( ( 100 - image->completenessEstimate ) * 100 ); + } } if ( uplink != NULL ) { ref_put( &uplink->reference ); @@ -383,9 +389,8 @@ void* net_handleNewConnection(void *clientPtr) ref_put( &cache->reference ); if ( !isCached ) { if ( !uplink_request( client, request.handle, offset, request.size, request.hops ) ) { - logadd( LOG_DEBUG1, "Could not relay uncached request from %s to upstream proxy, disabling image %s:%d", + logadd( LOG_DEBUG1, "Could not relay uncached request from %s to upstream proxy for image %s:%d", client->hostName, image->name, image->rid ); - image->working = false; goto exit_client_cleanup; } break; // DONE, exit request.cmd switch @@ -456,7 +461,7 @@ void* net_handleNewConnection(void *clientPtr) } if ( err == EBADF || err == EFAULT || err == EINVAL || err == EIO ) { logadd( LOG_INFO, "Disabling %s:%d", image->name, image->rid ); - image->working = false; + image->problem.read = true; } } goto exit_client_cleanup; diff --git a/src/server/uplink.c b/src/server/uplink.c index f39e633..aba53ba 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -53,9 +53,9 @@ static void* uplink_mainloop(void *data); static void uplink_sendRequests(dnbd3_uplink_t *uplink, bool newOnly); static int uplink_findNextIncompleteHashBlock(dnbd3_uplink_t *uplink, const int lastBlockIndex); static void uplink_handleReceive(dnbd3_uplink_t *uplink); -static int uplink_sendKeepalive(const int fd); +static bool uplink_sendKeepalive(dnbd3_uplink_t *uplink); static void uplink_addCrc32(dnbd3_uplink_t *uplink); -static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink); +static bool uplink_sendReplicationRequest(dnbd3_uplink_t *uplink); static bool uplink_reopenCacheFd(dnbd3_uplink_t *uplink, const bool force); static bool uplink_saveCacheMap(dnbd3_uplink_t *uplink); static bool uplink_connectionShouldShutdown(dnbd3_uplink_t *uplink); @@ -117,6 +117,7 @@ bool uplink_init(dnbd3_image_t *image, int sock, dnbd3_host_t *host, int version uplink->current.fd = -1; mutex_unlock( &uplink->sendMutex ); uplink->cycleDetected = false; + image->problem.uplink = true; if ( sock != -1 ) { uplink->better.fd = sock; int index = altservers_hostToIndex( host ); @@ -371,6 +372,7 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin logadd( LOG_DEBUG2, "Could not trylock send mutex, queueing uplink request" ); } else { if ( unlikely( uplink->current.fd == -1 ) ) { + uplink->image->problem.uplink = true; mutex_unlock( &uplink->sendMutex ); logadd( LOG_DEBUG2, "Cannot do direct uplink request: No socket open" ); } else { @@ -378,12 +380,14 @@ bool uplink_request(dnbd3_client_t *client, uint64_t handle, uint64_t start, uin const uint32_t reqSize = (uint32_t)(((uplink->queue[freeSlot].to + DNBD3_BLOCK_SIZE - 1) & ~(uint64_t)(DNBD3_BLOCK_SIZE - 1)) - reqStart); if ( hops < 200 ) ++hops; const bool ret = dnbd3_get_block( uplink->current.fd, reqStart, reqSize, reqStart, COND_HOPCOUNT( uplink->current.version, hops ) ); - mutex_unlock( &uplink->sendMutex ); if ( unlikely( !ret ) ) { + uplink->image->problem.uplink = true; + mutex_unlock( &uplink->sendMutex ); logadd( LOG_DEBUG2, "Could not send out direct uplink request, queueing" ); } else { // Direct send succeeded, update queue entry from NEW to PENDING, so the request won't be sent again int state; + mutex_unlock( &uplink->sendMutex ); mutex_lock( &uplink->queueLock ); if ( !uplink->shutdown && uplink->queue[freeSlot].handle == handle && uplink->queue[freeSlot].client == client ) { state = uplink->queue[freeSlot].status; @@ -460,9 +464,9 @@ static void* uplink_mainloop(void *data) } while ( !_shutdown && !uplink->shutdown ) { // poll() - waitTime = uplink->rttTestResult == RTT_DOCHANGE ? 0 : -1; - if ( waitTime == 0 ) { + if ( uplink->rttTestResult == RTT_DOCHANGE ) { // 0 means poll, since we're about to change the server + waitTime = 0; } else { declare_now; waitTime = (int)timing_diffMs( &now, &nextAltCheck ); @@ -495,7 +499,7 @@ static void* uplink_mainloop(void *data) discoverFailCount = 0; if ( fd != -1 ) close( fd ); uplink->replicationHandle = REP_NONE; - uplink->image->working = true; + uplink->image->problem.uplink = false; uplink->replicatedLastBlock = false; // Reset this to be safe - request could've been sent but reply was never received buffer[0] = '@'; if ( altservers_toString( uplink->current.index, buffer + 1, sizeof(buffer) - 1 ) ) { @@ -510,6 +514,11 @@ static void* uplink_mainloop(void *data) uplink_sendRequests( uplink, false ); uplink_sendReplicationRequest( uplink ); events[EV_SOCKET].events = POLLIN | POLLRDHUP; + if ( uplink->image->problem.uplink ) { + // Some of the requests above must have failed again already :-( + logadd( LOG_DEBUG1, "Newly established uplink connection failed during getCRC or sendRequests" ); + uplink_connectionFailed( uplink, true ); + } timing_gets( &nextAltCheck, altCheckInterval ); // The rtt worker already did the handshake for our image, so there's nothing // more to do here @@ -517,6 +526,7 @@ static void* uplink_mainloop(void *data) // Check events // Signal if ( (events[EV_SIGNAL].revents & (POLLERR | POLLHUP | POLLRDHUP | POLLNVAL)) ) { + uplink->image->problem.uplink = true; logadd( LOG_WARNING, "poll error on signal in uplink_mainloop!" ); goto cleanup; } else if ( (events[EV_SIGNAL].revents & POLLIN) ) { @@ -553,14 +563,10 @@ static void* uplink_mainloop(void *data) } // Keep-alive if ( uplink->current.fd != -1 && uplink->replicationHandle == REP_NONE ) { - // Send keep-alive if nothing is happening - if ( uplink_sendKeepalive( uplink->current.fd ) ) { - // Re-trigger periodically, in case it requires a minimum user count - uplink_sendReplicationRequest( uplink ); - } else { + // Send keep-alive if nothing is happening, and try to trigger background rep. + if ( !uplink_sendKeepalive( uplink ) || !uplink_sendReplicationRequest( uplink ) ) { uplink_connectionFailed( uplink, true ); - logadd( LOG_DEBUG1, "Error sending keep-alive, panic!\n" ); - setThreadName( "panic-uplink" ); + logadd( LOG_DEBUG1, "Error sending keep-alive/BGR, panic!\n" ); } } // Don't keep uplink established if we're idle for too much @@ -578,6 +584,7 @@ static void* uplink_mainloop(void *data) // Quit work if image is complete logadd( LOG_INFO, "Replication of %s complete.", uplink->image->name ); setThreadName( "finished-uplink" ); + uplink->image->problem.uplink = false; goto cleanup; } else { // Not complete - do measurement @@ -592,10 +599,6 @@ static void* uplink_mainloop(void *data) } else if ( rttTestResult == RTT_NOT_REACHABLE ) { if ( atomic_compare_exchange_strong( &uplink->rttTestResult, &rttTestResult, RTT_IDLE ) ) { discoverFailCount++; - if ( uplink->image->working && uplink->current.fd == -1 && discoverFailCount > (SERVER_RTT_MAX_UNREACH / 2) ) { - logadd( LOG_DEBUG1, "Disabling %s:%d since no uplink is available", uplink->image->name, (int)uplink->image->rid ); - uplink->image->working = false; - } if ( uplink->current.fd == -1 ) { uplink->cycleDetected = false; } @@ -624,8 +627,9 @@ static void* uplink_mainloop(void *data) } } mutex_unlock( &uplink->queueLock ); - if ( resend ) + if ( resend ) { uplink_sendRequests( uplink, true ); + } } #endif } @@ -653,6 +657,9 @@ static void* uplink_mainloop(void *data) return NULL ; } +/** + * Only called from uplink thread. + */ static void uplink_sendRequests(dnbd3_uplink_t *uplink, bool newOnly) { // Scan for new requests @@ -672,13 +679,15 @@ static void uplink_sendRequests(dnbd3_uplink_t *uplink, bool newOnly) if ( hops < 200 ) ++hops; mutex_lock( &uplink->sendMutex ); const bool ret = dnbd3_get_block( uplink->current.fd, reqStart, reqSize, reqStart, COND_HOPCOUNT( uplink->current.version, hops ) ); - mutex_unlock( &uplink->sendMutex ); - if ( !ret ) { + if ( likely( ret ) ) { + mutex_unlock( &uplink->sendMutex ); + } else { // Non-critical - if the connection dropped or the server was changed // the thread will re-send this request as soon as the connection // is reestablished. + uplink->image->problem.uplink = true; + mutex_unlock( &uplink->sendMutex ); logadd( LOG_DEBUG1, "Error forwarding request to uplink server!\n" ); - altservers_serverFailed( uplink->current.index ); return; } mutex_lock( &uplink->queueLock ); @@ -695,21 +704,27 @@ static void uplink_sendRequests(dnbd3_uplink_t *uplink, bool newOnly) * server. This means we might request data we already have, but it makes * the code simpler. Worst case would be only one bit is zero, which means * 4kb are missing, but we will request 32kb. + * + * Only called form uplink thread, so current.fd is assumed to be valid. + * + * @return false if sending request failed, true otherwise (i.e. not necessary/disabled) */ -static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink) +static bool uplink_sendReplicationRequest(dnbd3_uplink_t *uplink) { - if ( uplink == NULL || uplink->current.fd == -1 ) return; - if ( _backgroundReplication == BGR_DISABLED || uplink->cacheFd == -1 ) return; // Don't do background replication + if ( uplink->current.fd == -1 ) + return false; // Should never be called in this state, consider send error + if ( _backgroundReplication == BGR_DISABLED || uplink->cacheFd == -1 ) + return true; // Don't do background replication if ( uplink->nextReplicationIndex == -1 || uplink->replicationHandle != REP_NONE ) - return; // Already a replication request on the wire, or no more blocks to replicate + return true; // Already a replication request on the wire, or no more blocks to replicate dnbd3_image_t * const image = uplink->image; - if ( image->virtualFilesize < DNBD3_BLOCK_SIZE ) return; - if ( image->users < _bgrMinClients ) return; // Not enough active users + if ( image->users < _bgrMinClients ) + return true; // Not enough active users dnbd3_cache_map_t *cache = ref_get_cachemap( image ); - if ( cache == NULL || image->users < _bgrMinClients ) { + if ( cache == NULL || image->users ) { // No cache map (=image complete) ref_put( &cache->reference ); - return; + return true; } const int mapBytes = IMGSIZE_TO_MAPBYTES( image->virtualFilesize ); const int lastBlockIndex = mapBytes - 1; @@ -741,17 +756,20 @@ static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink) if ( replicationIndex == -1 ) { // Replication might be complete, uplink_mainloop should take care.... uplink->nextReplicationIndex = -1; - return; + return true; } const uint64_t offset = (uint64_t)replicationIndex * FILE_BYTES_PER_MAP_BYTE; uplink->replicationHandle = offset; const uint32_t size = (uint32_t)MIN( image->virtualFilesize - offset, FILE_BYTES_PER_MAP_BYTE ); mutex_lock( &uplink->sendMutex ); bool sendOk = dnbd3_get_block( uplink->current.fd, offset, size, uplink->replicationHandle, COND_HOPCOUNT( uplink->current.version, 1 ) ); - mutex_unlock( &uplink->sendMutex ); - if ( !sendOk ) { + if ( likely( sendOk ) ) { + mutex_unlock( &uplink->sendMutex ); + } else { + uplink->image->problem.uplink = true; + mutex_unlock( &uplink->sendMutex ); logadd( LOG_DEBUG1, "Error sending background replication request to uplink server!\n" ); - return; + return false; } if ( replicationIndex == lastBlockIndex ) { uplink->replicatedLastBlock = true; // Special treatment, last byte in map could represent less than 8 blocks @@ -762,6 +780,7 @@ static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink) // Just crossed a hash block boundary, look for new candidate starting at this very index uplink->nextReplicationIndex = uplink_findNextIncompleteHashBlock( uplink, uplink->nextReplicationIndex ); } + return true; } /** @@ -816,6 +835,7 @@ static int uplink_findNextIncompleteHashBlock(dnbd3_uplink_t *uplink, const int /** * Receive data from uplink server and process/dispatch * Locks on: uplink.lock, images[].lock + * Only called from uplink thread, so current.fd is assumed to be valid. */ static void uplink_handleReceive(dnbd3_uplink_t *uplink) { @@ -990,11 +1010,14 @@ static void uplink_handleReceive(dnbd3_uplink_t *uplink) mutex_lock( &uplink->queueLock ); const bool rep = ( uplink->queueLen == 0 ); mutex_unlock( &uplink->queueLock ); - if ( rep ) uplink_sendReplicationRequest( uplink ); + if ( rep ) { + if ( !uplink_sendReplicationRequest( uplink ) ) + goto error_cleanup; + } } return; // Error handling from failed receive or message parsing - error_cleanup: ; +error_cleanup: ; uplink_connectionFailed( uplink, true ); } @@ -1005,8 +1028,10 @@ static void uplink_connectionFailed(dnbd3_uplink_t *uplink, bool findNew) { if ( uplink->current.fd == -1 ) return; + setThreadName( "panic-uplink" ); altservers_serverFailed( uplink->current.index ); mutex_lock( &uplink->sendMutex ); + uplink->image->problem.uplink = true; close( uplink->current.fd ); uplink->current.fd = -1; mutex_unlock( &uplink->sendMutex ); @@ -1025,14 +1050,24 @@ static void uplink_connectionFailed(dnbd3_uplink_t *uplink, bool findNew) } /** - * Send keep alive request to server + * Send keep alive request to server. + * Called from uplink thread, current.fd must be valid. */ -static int uplink_sendKeepalive(const int fd) +static bool uplink_sendKeepalive(dnbd3_uplink_t *uplink) { static const dnbd3_request_t request = { .magic = dnbd3_packet_magic, .cmd = net_order_16( CMD_KEEPALIVE ) }; - return send( fd, &request, sizeof(request), MSG_NOSIGNAL ) == sizeof(request); + mutex_lock( &uplink->sendMutex ); + bool sendOk = send( uplink->current.fd, &request, sizeof(request), MSG_NOSIGNAL ) == sizeof(request); + mutex_unlock( &uplink->sendMutex ); + return sendOk; } +/** + * Request crclist from uplink. + * Called from uplink thread, current.fd must be valid. + * FIXME This is broken as it could happen that another message arrives after sending + * the request. Refactor, split and move receive into general receive handler. + */ static void uplink_addCrc32(dnbd3_uplink_t *uplink) { dnbd3_image_t *image = uplink->image; @@ -1042,6 +1077,9 @@ static void uplink_addCrc32(dnbd3_uplink_t *uplink) uint32_t *buffer = malloc( bytes ); mutex_lock( &uplink->sendMutex ); bool sendOk = dnbd3_get_crc32( uplink->current.fd, &masterCrc, buffer, &bytes ); + if ( !sendOk ) { + uplink->image->problem.uplink = true; + } mutex_unlock( &uplink->sendMutex ); if ( !sendOk || bytes == 0 ) { free( buffer ); -- cgit v1.2.3-55-g7522