From 53dd7bc20968ddfaa601c517e6feb745bb3a21ab Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Sun, 10 Nov 2013 18:19:11 +0100 Subject: [SERVER] Split "pending" lock for alt-server finding into producer and consumer lock to fix a potential NPA when an uplink dies Also some refactoring of variable names and more comments --- src/server/altservers.c | 180 ++++++++++++++++++++++++++++-------------------- 1 file changed, 105 insertions(+), 75 deletions(-) diff --git a/src/server/altservers.c b/src/server/altservers.c index 2cb5061..2eca369 100644 --- a/src/server/altservers.c +++ b/src/server/altservers.c @@ -17,12 +17,13 @@ #include "protocol.h" static dnbd3_connection_t *pending[SERVER_MAX_PENDING_ALT_CHECKS]; -static pthread_spinlock_t pendingLock; +static pthread_spinlock_t pendingLockProduce; // Lock for adding something to pending. (NULL -> nonNULL) +static pthread_mutex_t pendingLockConsume = PTHREAD_MUTEX_INITIALIZER; // Lock for removegin something (nunNULL -> NULL) static int signalPipe = -1; -static dnbd3_alt_server_t _alt_servers[SERVER_MAX_ALTS]; -static int _num_alts = 0; -static pthread_spinlock_t _alts_lock; +static dnbd3_alt_server_t altServers[SERVER_MAX_ALTS]; +static int numAltServers = 0; +static pthread_spinlock_t altServersLock; static int initDone = FALSE; static pthread_t altThread; @@ -32,13 +33,13 @@ static unsigned int altservers_updateRtt(const dnbd3_host_t * const host, const int altservers_getCount() { - return _num_alts; + return numAltServers; } void altservers_init() { - spin_init( &_alts_lock, PTHREAD_PROCESS_PRIVATE ); - memset( _alt_servers, 0, SERVER_MAX_ALTS * sizeof(dnbd3_alt_server_t) ); + spin_init( &altServersLock, PTHREAD_PROCESS_PRIVATE ); + memset( altServers, 0, SERVER_MAX_ALTS * sizeof(dnbd3_alt_server_t) ); if ( 0 != pthread_create( &altThread, NULL, &altservers_main, (void *)NULL ) ) { memlogf( "[ERROR] Could not start altservers connector thread" ); exit( EXIT_FAILURE ); @@ -49,7 +50,6 @@ void altservers_init() void altservers_shutdown() { if ( !initDone ) return; - spin_destroy( &_alts_lock ); pthread_join( altThread, NULL ); } @@ -90,27 +90,27 @@ int altservers_load() int altservers_add(dnbd3_host_t *host, const char *comment, const int isPrivate) { int i, freeSlot = -1; - spin_lock( &_alts_lock ); - for (i = 0; i < _num_alts; ++i) { - if ( isSameAddressPort( &_alt_servers[i].host, host ) ) { - spin_unlock( &_alts_lock ); + spin_lock( &altServersLock ); + for (i = 0; i < numAltServers; ++i) { + if ( isSameAddressPort( &altServers[i].host, host ) ) { + spin_unlock( &altServersLock ); return FALSE; - } else if ( freeSlot == -1 && _alt_servers[i].host.type == 0 ) { + } else if ( freeSlot == -1 && altServers[i].host.type == 0 ) { freeSlot = i; } } if ( freeSlot == -1 ) { - if ( _num_alts >= SERVER_MAX_ALTS ) { + if ( numAltServers >= SERVER_MAX_ALTS ) { memlogf( "[WARNING] Cannot add another alt server, maximum of %d already reached.", (int)SERVER_MAX_ALTS ); - spin_unlock( &_alts_lock ); + spin_unlock( &altServersLock ); return FALSE; } - freeSlot = _num_alts++; + freeSlot = numAltServers++; } - _alt_servers[freeSlot].host = *host; - _alt_servers[freeSlot].isPrivate = isPrivate; - if ( comment != NULL ) snprintf( _alt_servers[freeSlot].comment, COMMENT_LENGTH, "%s", comment ); - spin_unlock( &_alts_lock ); + altServers[freeSlot].host = *host; + altServers[freeSlot].isPrivate = isPrivate; + if ( comment != NULL ) snprintf( altServers[freeSlot].comment, COMMENT_LENGTH, "%s", comment ); + spin_unlock( &altServersLock ); return TRUE; } @@ -120,25 +120,33 @@ int altservers_add(dnbd3_host_t *host, const char *comment, const int isPrivate) void altservers_findUplink(dnbd3_connection_t *uplink) { int i; + // if betterFd != -1 it means the uplink is supposed to switch to another + // server. As this function here is called by the uplink thread, it can + // never be that the uplink is supposed to switch, but instead calls + // this function. assert( uplink->betterFd == -1 ); - spin_lock( &pendingLock ); + spin_lock( &pendingLockProduce ); + // it is however possible that an RTT measurement is currently in progress, + // so check for that case and do nothing if one is in progress if ( uplink->rttTestResult == RTT_INPROGRESS ) { for (i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i) { if ( pending[i] != uplink ) continue; - spin_unlock( &pendingLock ); + // Yep, measuring right now + spin_unlock( &pendingLockProduce ); return; } } + // Find free slot for measurement for (i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i) { if ( pending[i] != NULL ) continue; pending[i] = uplink; uplink->rttTestResult = RTT_INPROGRESS; - spin_unlock( &pendingLock ); - write( signalPipe, "", 1 ); + spin_unlock( &pendingLockProduce ); + write( signalPipe, "", 1 ); // Wake altservers thread up return; } // End of loop - no free slot - spin_unlock( &pendingLock ); + spin_unlock( &pendingLockProduce ); memlogf( "[WARNING] No more free RTT measurement slots, ignoring a request..." ); } @@ -147,39 +155,40 @@ void altservers_findUplink(dnbd3_connection_t *uplink) */ void altservers_removeUplink(dnbd3_connection_t *uplink) { - spin_lock( &pendingLock ); + pthread_mutex_lock( &pendingLockConsume ); for (int i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i) { if ( pending[i] == uplink ) pending[i] = NULL; } - spin_unlock( &pendingLock ); + pthread_mutex_unlock( &pendingLockConsume ); } /** * Get known (working) alt servers, ordered by network closeness * (by finding the smallest possible subnet) - * Private servers are excluded + * Private servers are excluded, so this is what you want to call to + * get a list of servers you can tell a client about */ int altservers_getMatching(dnbd3_host_t *host, dnbd3_server_entry_t *output, int size) { - if ( host == NULL || host->type == 0 || _num_alts == 0 || output == NULL || size <= 0 ) return 0; + if ( host == NULL || host->type == 0 || numAltServers == 0 || output == NULL || size <= 0 ) return 0; int i, j; int count = 0; int distance[size]; - spin_lock( &_alts_lock ); - for (i = 0; i < _num_alts; ++i) { - if ( host->type != _alt_servers[i].host.type ) continue; // Wrong address family - if ( _alt_servers[i].isPrivate ) continue; // Do not tell clients about private servers + spin_lock( &altServersLock ); + for (i = 0; i < numAltServers; ++i) { + if ( host->type != altServers[i].host.type ) continue; // Wrong address family + if ( altServers[i].isPrivate ) continue; // Do not tell clients about private servers // TODO: Prefer same AF here, but if in the end we got less servers than requested, add // servers of other AF too (after this loop) if ( count == 0 ) { // Trivial - this is the first entry - output[0].host = _alt_servers[i].host; + output[0].host = altServers[i].host; output[0].failures = 0; distance[0] = altservers_netCloseness( host, &output[0].host ); count++; } else { // Other entries already exist, insert in proper position - const int dist = altservers_netCloseness( host, &_alt_servers[i].host ); + const int dist = altservers_netCloseness( host, &altServers[i].host ); for (j = 0; j < size; ++j) { if ( j < count && dist <= distance[j] ) continue; if ( j > count ) break; // Should never happen but just in case... @@ -191,7 +200,7 @@ int altservers_getMatching(dnbd3_host_t *host, dnbd3_server_entry_t *output, int if ( count < size ) { count++; } - output[j].host = _alt_servers[i].host; + output[j].host = altServers[i].host; output[j].failures = 0; distance[j] = dist; break; @@ -199,38 +208,42 @@ int altservers_getMatching(dnbd3_host_t *host, dnbd3_server_entry_t *output, int } } // TODO: "if count < size then add servers of other address families" - spin_unlock( &_alts_lock ); + spin_unlock( &altServersLock ); return count; } /** * Get alt servers. If there are more alt servers than - * requested, random servers will be picked + * requested, random servers will be picked. + * This function is suited for finding uplink servers as + * it includes private servers and ignores any "client only" servers */ int altservers_get(dnbd3_host_t *output, int size) { if ( size <= 0 ) return 0; int count = 0, i; const time_t now = time( NULL ); - spin_lock( &_alts_lock ); + spin_lock( &altServersLock ); // Flip first server in list with a random one every time this is called - if ( _num_alts > 1 ) { - const dnbd3_alt_server_t tmp = _alt_servers[0]; + if ( numAltServers > 1 ) { + const dnbd3_alt_server_t tmp = altServers[0]; do { - i = rand() % _num_alts; + i = rand() % numAltServers; } while ( i == 0 ); - _alt_servers[0] = _alt_servers[i]; - _alt_servers[i] = tmp; + altServers[0] = altServers[i]; + altServers[i] = tmp; } - for (i = 0; i < _num_alts; ++i) { - if ( _alt_servers[i].host.type == 0 ) continue; - if ( _proxyPrivateOnly && !_alt_servers[i].isPrivate ) continue; - if ( _alt_servers[i].numFails > SERVER_MAX_UPLINK_FAILS && now - _alt_servers[i].lastFail > SERVER_BAD_UPLINK_IGNORE ) continue; - _alt_servers[i].numFails = 0; - output[count++] = _alt_servers[i].host; + for (i = 0; i < numAltServers; ++i) { + if ( altServers[i].host.type == 0 ) continue; // Slot is empty + if ( _proxyPrivateOnly && !altServers[i].isPrivate ) continue; // Config says to consider private alt-servers only? ignore! + if ( altServers[i].numFails > SERVER_MAX_UPLINK_FAILS // server failed X times in a row + && now - altServers[i].lastFail > SERVER_BAD_UPLINK_IGNORE ) continue; // and last fail was not too long ago? ignore! + // server seems ok, include in output and reset its fail counter + altServers[i].numFails = 0; + output[count++] = altServers[i].host; if ( count >= size ) break; } - spin_unlock( &_alts_lock ); + spin_unlock( &altServersLock ); return count; } @@ -241,24 +254,24 @@ static unsigned int altservers_updateRtt(const dnbd3_host_t * const host, const { unsigned int avg = rtt; int i; - spin_lock( &_alts_lock ); - for (i = 0; i < _num_alts; ++i) { - if ( !isSameAddressPort( host, &_alt_servers[i].host ) ) continue; - _alt_servers[i].rtt[++_alt_servers[i].rttIndex % SERVER_RTT_PROBES] = rtt; + spin_lock( &altServersLock ); + for (i = 0; i < numAltServers; ++i) { + if ( !isSameAddressPort( host, &altServers[i].host ) ) continue; + altServers[i].rtt[++altServers[i].rttIndex % SERVER_RTT_PROBES] = rtt; #if SERVER_RTT_PROBES == 5 - avg = (_alt_servers[i].rtt[0] + _alt_servers[i].rtt[1] + _alt_servers[i].rtt[2] + _alt_servers[i].rtt[3] + _alt_servers[i].rtt[4]) + avg = (altServers[i].rtt[0] + altServers[i].rtt[1] + altServers[i].rtt[2] + altServers[i].rtt[3] + altServers[i].rtt[4]) / SERVER_RTT_PROBES; #else #warning You might want to change the code in altservers_update_rtt if you changed SERVER_RTT_PROBES avg = 0; for (int j = 0; j < SERVER_RTT_PROBES; ++j) { - avg += _alt_servers[i].rtt[j]; + avg += altServers[i].rtt[j]; } avg /= SERVER_RTT_PROBES; #endif break; } - spin_unlock( &_alts_lock ); + spin_unlock( &altServersLock ); return avg; } @@ -291,18 +304,29 @@ void altservers_serverFailed(const dnbd3_host_t * const host) { int i; const time_t now = time( NULL ); - spin_lock( &_alts_lock ); - for (i = 0; i < _num_alts; ++i) { - if ( !isSameAddressPort( host, &_alt_servers[i].host ) ) continue; - if ( now - _alt_servers[i].lastFail > SERVER_RTT_DELAY_INIT ) { - _alt_servers[i].numFails++; - _alt_servers[i].lastFail = now; + spin_lock( &altServersLock ); + for (i = 0; i < numAltServers; ++i) { + if ( !isSameAddressPort( host, &altServers[i].host ) ) continue; + // Do only increase counter if last fail was not too recent. This is + // to prevent the counter from increasing rapidly if many images use the + // same uplink. If there's a network hickup, all uplinks will call this + // function and would increase the counter too quickly, disabling the server. + if ( now - altServers[i].lastFail > SERVER_RTT_DELAY_INIT ) { + altServers[i].numFails++; + altServers[i].lastFail = now; } break; } - spin_unlock( &_alts_lock ); + spin_unlock( &altServersLock ); } - +/** + * Mainloop of this module. It will wait for requests by uplinks to find a + * suitable uplink server for them. If found, it will tell the uplink about + * the best server found. Currently the RTT history is kept per server and + * not per uplink, so if many images use the same uplink server, the history + * will update quite quickly. Needs to be improved some time, ie. by only + * updating the rtt if the last update was at least X seconds ago. + */ static void *altservers_main(void *data) { const int MAXEVENTS = 3; @@ -320,7 +344,7 @@ static void *altservers_main(void *data) setThreadName( "altserver-check" ); blockNoncriticalSignals(); // Init spinlock - spin_init( &pendingLock, PTHREAD_PROCESS_PRIVATE ); + spin_init( &pendingLockProduce, PTHREAD_PROCESS_PRIVATE ); // Init waiting links queue for (int i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i) pending[i] = NULL; @@ -370,10 +394,13 @@ static void *altservers_main(void *data) } } // Work your way through the queue - spin_lock( &pendingLock ); for (itLink = 0; itLink < SERVER_MAX_PENDING_ALT_CHECKS; ++itLink) { - if ( pending[itLink] == NULL ) continue; - spin_unlock( &pendingLock ); + if ( pending[itLink] == NULL ) continue; // Check once before locking, as a mutex is expensive + pthread_mutex_lock( &pendingLockConsume ); + if ( pending[itLink] == NULL ) { // Check again after locking + continue; + pthread_mutex_unlock( &pendingLockConsume ); + } dnbd3_connection_t * const uplink = pending[itLink]; assert( uplink->rttTestResult == RTT_INPROGRESS ); // Now get 4 alt servers @@ -394,7 +421,7 @@ static void *altservers_main(void *data) unsigned int bestRtt = 0xfffffff; unsigned int currentRtt = 0xfffffff; for (itAlt = 0; itAlt < numAlts; ++itAlt) { - usleep( 1000 ); + usleep( 1000 ); // Wait a very short moment for the network to recover (we might be doing lots of measurements...) // Connect clock_gettime( CLOCK_MONOTONIC_RAW, &start ); int sock = sock_connect( &servers[itAlt], 750, 1250 ); @@ -425,7 +452,7 @@ static void *altservers_main(void *data) // Request random block ++++++++++++++++++++++++++++++ fixup_request( request ); if ( !dnbd3_get_block( sock, - (((uint64_t)start.tv_nsec | (uint64_t)rand()) * DNBD3_BLOCK_SIZE )% uplink->image->filesize, + (((uint64_t)start.tv_nsec ^ (uint64_t)rand()) * DNBD3_BLOCK_SIZE )% uplink->image->filesize, DNBD3_BLOCK_SIZE) ) { ERROR_GOTO_VA( server_failed, "[ERROR] Could not request random block for %s", uplink->image->lower_name ); } @@ -449,18 +476,23 @@ static void *altservers_main(void *data) const unsigned int rtt = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_nsec - start.tv_nsec) / 1000; // µs const unsigned int avg = altservers_updateRtt( &servers[itAlt], rtt ); if ( uplink->fd != -1 && isSameAddressPort( &servers[itAlt], &uplink->currentServer ) ) { + // Was measuring current server currentRtt = avg; close( sock ); } else if ( avg < bestRtt ) { + // Was another server, update "best" if ( bestSock != -1 ) close( bestSock ); bestSock = sock; bestRtt = avg; bestIndex = itAlt; } else { + // Was too slow, ignore close( sock ); } + // We're done, call continue continue; // Jump here if anything went wrong + // This will cleanup and continue server_failed: ; altservers_serverFailed( &servers[itAlt] ); server_image_not_available: ; @@ -480,12 +512,10 @@ static void *altservers_main(void *data) } // end of loop over all pending uplinks pending[itLink] = NULL; - spin_lock( &pendingLock ); + pthread_mutex_unlock( &pendingLockConsume ); } - spin_unlock( &pendingLock ); } cleanup: ; - spin_destroy( &pendingLock ); if ( fdEpoll != -1 ) close( fdEpoll ); if ( readPipe != -1 ) close( readPipe ); if ( signalPipe != -1 ) close( signalPipe ); -- cgit v1.2.3-55-g7522 From f0c812050a286d24d79cf38e77fa4eaefb1cec49 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Sun, 10 Nov 2013 18:34:30 +0100 Subject: Refine CMakeLists: Tell cmake it's a C only project --- CMakeLists.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 596fa43..03964a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ # GENERAL # ################################################################################ -PROJECT(dnbd3) -CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0) +PROJECT(dnbd3 C) +CMAKE_MINIMUM_REQUIRED(VERSION 2.6.2) IF (CMAKE_BUILD_TYPE STREQUAL "") SET(CMAKE_BUILD_TYPE Debug) ENDIF() @@ -12,10 +12,9 @@ message( "Build Type selected: ${CMAKE_BUILD_TYPE}" ) SET(CMAKE_C_FLAGS_DEBUG "-std=c99 -O0 -g -Wall -Wno-unused-result -D_GNU_SOURCE -D_DEBUG -Wno-multichar") SET(CMAKE_C_FLAGS_RELEASE "-std=c99 -O2 -Wno-unused-result -D_GNU_SOURCE -DNDEBUG -Wno-multichar") -SET(CMAKE_CXX_FLAGS_DEBUG "-std=c99 -O0 -g -Wall -Wno-unused-result -D_GNU_SOURCE -D_DEBUG") -SET(CMAKE_CXX_FLAGS_RELEASE "-std=c99 -O2 -Wno-unused-result -D_GNU_SOURCE -DNDEBUG" ) +#SET(CMAKE_CXX_FLAGS_DEBUG "-std=c99 -O0 -g -Wall -Wno-unused-result -D_GNU_SOURCE -D_DEBUG") +#SET(CMAKE_CXX_FLAGS_RELEASE "-std=c99 -O2 -Wno-unused-result -D_GNU_SOURCE -DNDEBUG" ) -ADD_DEFINITIONS(-DIPC_TCP) ADD_DEFINITIONS(-D_FILE_OFFSET_BITS=64) ADD_DEFINITIONS(-DWITH_IPV6) -- cgit v1.2.3-55-g7522 From f28b29dbd2087dc8d5982e579971095f2cc73c0f Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Sun, 10 Nov 2013 20:55:48 +0100 Subject: [SERVER] Update LOCKS readme --- LOCKS | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/LOCKS b/LOCKS index 5de521a..a108f1d 100644 --- a/LOCKS +++ b/LOCKS @@ -17,10 +17,12 @@ remoteCloneLock _images_lock _images[].lock uplink.queueLock -_alts_lock +pendingLockProduce +pendingLockConsume +altServersLock client.sendMutex -If you need to lock multiple clients at once, +If you need to lock multiple clients/images/... at once, lock the client with the lowest array index first. If the program logic would require to aquire the -- cgit v1.2.3-55-g7522 From 091ee24472b2f95994425afd4ba2a6e45fde32ce Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Sun, 10 Nov 2013 20:56:50 +0100 Subject: Remove outdated and invalid example configs --- client.conf.example | 29 ----------------------------- server.conf.example | 9 --------- 2 files changed, 38 deletions(-) delete mode 100644 client.conf.example delete mode 100644 server.conf.example diff --git a/client.conf.example b/client.conf.example deleted file mode 100644 index 264632f..0000000 --- a/client.conf.example +++ /dev/null @@ -1,29 +0,0 @@ -# This is a sample configuration file for dnbd3-client - -[Ubuntu 10.04] -server=132.230.4.29 -vid=1 -rid=3 -device=/dev/dnbd0 -ahead=256 - -[Ubuntu 10.10] -server=132.230.4.29 -vid=2 -rid=1 -device=/dev/dnbd1 -ahead=256 - -[Ubuntu 11.04] -server=132.230.4.29 -vid=3 -rid=1 -device=/dev/dnbd2 -ahead=256 - -[Ubuntu 11.10] -server=132.230.4.29 -vid=4 -rid=1 -device=/dev/dnbd3 -ahead=256 diff --git a/server.conf.example b/server.conf.example deleted file mode 100644 index 464a870..0000000 --- a/server.conf.example +++ /dev/null @@ -1,9 +0,0 @@ -# This is a sample configuration file for dnbd3-server - -[settings] -default_namespace=uni-freiburg/rz/bunker - -[eclipse-cdt linux (tar.gz)] -file=/home/sr/Downloads/eclipse-cpp-juno-linux-gtk-x86_64.tar.gz -servers=127.0.0.20:1234;132.230.4.58;2000::dead:beef -rid=3 -- cgit v1.2.3-55-g7522 From 6a6a0e03482a7a799d2bc9ac3440b81d0780547a Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Sun, 10 Nov 2013 21:30:24 +0100 Subject: [SERVER] Minor tweaks here and there --- src/config.h | 17 +++++++++-------- src/kernel/blk.c | 13 +------------ src/server/altservers.c | 2 +- src/server/globals.c | 2 ++ src/server/globals.h | 5 +++++ src/server/image.c | 14 +++++++------- src/server/net.c | 2 +- 7 files changed, 26 insertions(+), 29 deletions(-) diff --git a/src/config.h b/src/config.h index 7df49a0..0228ae9 100644 --- a/src/config.h +++ b/src/config.h @@ -25,19 +25,19 @@ // ########### SERVER ########### // +++++ Performance related -#define SERVER_MAX_CLIENTS 5000 +#define SERVER_MAX_CLIENTS 2000 #define SERVER_MAX_IMAGES 5000 -#define SERVER_MAX_ALTS 1000 -#define SERVER_MAX_UPLINK_QUEUE 1500 -#define SERVER_MAX_UPLINK_FAILS 4 // How many times may a server fail until it is considered bad -#define SERVER_BAD_UPLINK_IGNORE 300 // How many seconds is a server considered bad? -#define SERVER_UPLINK_QUEUELEN_THRES 900 +#define SERVER_MAX_ALTS 250 +#define SERVER_MAX_UPLINK_QUEUE 1500 +#define SERVER_MAX_UPLINK_FAILS 8 // How many times may a server fail until it is considered bad +#define SERVER_BAD_UPLINK_IGNORE 120 // How many seconds is a server considered bad? +#define SERVER_UPLINK_QUEUELEN_THRES 900 #define SERVER_MAX_PENDING_ALT_CHECKS 50 // +++++ Other magic constants #define SERVER_RTT_PROBES 5 #define SERVER_RTT_DELAY_INIT 5 -#define SERVER_RTT_DELAY_MAX 15 +#define SERVER_RTT_DELAY_MAX 45 #define SERVER_REMOTE_IMAGE_CHECK_CACHETIME 600 // 10 minutes #define SERVER_MAX_PROXY_IMAGE_SIZE 100000000000LL // 100GB @@ -61,7 +61,8 @@ #define COMMENT_LENGTH 120 // in seconds if not stated otherwise (MS = milliseconds) -#define SOCKET_TIMEOUT_SERVER_MS 30000 +#define SOCKET_TIMEOUT_SERVER_MS 15000 +#define SOCKET_TIMEOUT_SERVER_RETRIES 3 // When waiting for next header, max reties * above timeout is the actual total timeout (ping timeout) #define SOCKET_TIMEOUT_CLIENT_DATA 2 #define SOCKET_TIMEOUT_CLIENT_DISCOVERY 1 diff --git a/src/kernel/blk.c b/src/kernel/blk.c index 9cd4a76..e55de25 100644 --- a/src/kernel/blk.c +++ b/src/kernel/blk.c @@ -200,18 +200,7 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u break; case IOCTL_SWITCH: - if (msg == NULL) - { - result = -EINVAL; - } - else if (memcmp(&dev->cur_server.host, &msg->host, sizeof(msg->host))) - { - dnbd3_net_disconnect(dev); - dev->cur_server.host = msg->host; - result = dnbd3_net_connect(dev); - } - else - result = 0; + result = -EINVAL; break; case IOCTL_ADD_SRV: diff --git a/src/server/altservers.c b/src/server/altservers.c index 2eca369..17e8e4b 100644 --- a/src/server/altservers.c +++ b/src/server/altservers.c @@ -424,7 +424,7 @@ static void *altservers_main(void *data) usleep( 1000 ); // Wait a very short moment for the network to recover (we might be doing lots of measurements...) // Connect clock_gettime( CLOCK_MONOTONIC_RAW, &start ); - int sock = sock_connect( &servers[itAlt], 750, 1250 ); + int sock = sock_connect( &servers[itAlt], 750, _uplinkTimeout ); if ( sock < 0 ) continue; // Select image ++++++++++++++++++++++++++++++ if ( !dnbd3_select_image( sock, uplink->image->lower_name, uplink->image->rid, FLAGS8_SERVER ) ) { diff --git a/src/server/globals.c b/src/server/globals.c index 4f081c7..3fcb61d 100644 --- a/src/server/globals.c +++ b/src/server/globals.c @@ -14,6 +14,7 @@ int _serverPenalty = 0; int _clientPenalty = 0; int _isProxy = FALSE; int _proxyPrivateOnly = FALSE; +int _uplinkTimeout = 1250; #define SAVE_TO_VAR_STR(ss, kk) do { if (strcmp(section, #ss) == 0 && strcmp(key, #kk) == 0) { if (_ ## kk != NULL) free(_ ## kk); _ ## kk = strdup(value); } } while (0) #define SAVE_TO_VAR_BOOL(ss, kk) do { if (strcmp(section, #ss) == 0 && strcmp(key, #kk) == 0) _ ## kk = atoi(value) != 0 || strcmp(value, "true") == 0 || strcmp(value, "True") == 0 || strcmp(value, "TRUE") == 0; } while (0) @@ -27,6 +28,7 @@ static int ini_handler(void *custom, const char* section, const char* key, const SAVE_TO_VAR_BOOL( dnbd3, proxyPrivateOnly ); SAVE_TO_VAR_INT( dnbd3, serverPenalty ); SAVE_TO_VAR_INT( dnbd3, clientPenalty ); + SAVE_TO_VAR_INT( dnbd3, uplinkTimeout ); return TRUE; } diff --git a/src/server/globals.h b/src/server/globals.h index a15678b..921398d 100644 --- a/src/server/globals.h +++ b/src/server/globals.h @@ -163,6 +163,11 @@ extern int _isProxy; */ extern int _proxyPrivateOnly; +/** + * Read timeout when waiting for data on an uplink + */ +extern int _uplinkTimeout; + void globals_loadConfig(); #endif /* GLOBALS_H_ */ diff --git a/src/server/image.c b/src/server/image.c index e137a1b..37eb11c 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -184,7 +184,7 @@ int image_saveCacheMap(dnbd3_image_t *image) strcpy( mapfile, image->path ); strcat( mapfile, ".map" ); - fd = open( mapfile, O_WRONLY | O_CREAT, 0640 ); + fd = open( mapfile, O_WRONLY | O_CREAT, 0644 ); if ( fd < 0 ) { spin_lock( &image->lock ); image->users--; @@ -732,8 +732,8 @@ int image_create(char *image, int revision, uint64_t size) const int mapsize = IMGSIZE_TO_MAPBYTES(size); // Write files int fdImage = -1, fdCache = -1; - fdImage = open( path, O_RDWR | O_TRUNC | O_CREAT, 0640 ); - fdCache = open( cache, O_RDWR | O_TRUNC | O_CREAT, 0640 ); + fdImage = open( path, O_RDWR | O_TRUNC | O_CREAT, 0644 ); + fdCache = open( cache, O_RDWR | O_TRUNC | O_CREAT, 0644 ); if ( fdImage < 0 ) { memlogf( "[ERROR] Could not open %s for writing.", path ); goto failure_cleanup; @@ -798,7 +798,7 @@ dnbd3_image_t* image_getOrClone(char *name, uint16_t revision) || remoteCloneCache[i].deadline < now || strcmp( cmpname, remoteCloneCache[i].name ) != 0 ) continue; pthread_mutex_unlock( &remoteCloneLock ); // Was recently checked... - return NULL ; + return image_get( name, revision ); } // Re-check to prevent two clients at the same time triggering this image = image_get( name, revision ); @@ -821,7 +821,7 @@ dnbd3_image_t* image_getOrClone(char *name, uint16_t revision) uint16_t remoteVersion, remoteRid; uint64_t remoteImageSize; for (i = 0; i < count; ++i) { - int sock = sock_connect( &servers[i], 500, 1500 ); + int sock = sock_connect( &servers[i], 750, _uplinkTimeout ); if ( sock < 0 ) continue; if ( !dnbd3_select_image( sock, name, revision, FLAGS8_SERVER ) ) goto server_fail; char *remoteName; @@ -882,7 +882,7 @@ static int image_clone(int sock, char *name, uint16_t revision, uint64_t imageSi if ( lists_crc != masterCrc ) { memlogf( "[WARNING] OTF-Clone: Corrupted CRC-32 list. ignored. (%s)", name ); } else { - int fd = open( crcFile, O_WRONLY | O_CREAT, 0640 ); + int fd = open( crcFile, O_WRONLY | O_CREAT, 0644 ); write( fd, &lists_crc, sizeof(uint32_t) ); write( fd, crc32list, crc32len ); close( fd ); @@ -938,7 +938,7 @@ int image_generateCrcFile(char *image) close( fdImage ); return FALSE; } - int fdCrc = open( crcFile, O_RDWR | O_CREAT, 0640 ); + int fdCrc = open( crcFile, O_RDWR | O_CREAT, 0644 ); if ( fdCrc < 0 ) { printf( "Could not open CRC File %s for writing..\n", crcFile ); close( fdImage ); diff --git a/src/server/net.c b/src/server/net.c index 3e9383d..1d9d226 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -50,7 +50,7 @@ static inline char recv_request_header(int sock, dnbd3_request_t *request) int ret, fails = 0; // Read request header from socket while ( (ret = recv( sock, request, sizeof(*request), MSG_WAITALL )) != sizeof(*request) ) { - if ( ret >= 0 || ++fails > 10 ) return FALSE; + if ( ret >= 0 || ++fails > SOCKET_TIMEOUT_SERVER_RETRIES ) return FALSE; const int err = errno; if ( err == EAGAIN || err == EINTR ) continue; printf( "[DEBUG] Error receiving request: Could not read message header (%d/%d, e=%d)\n", ret, (int)sizeof(*request), err ); -- cgit v1.2.3-55-g7522 From cc25ff449361a41429f127d8155131c950d68b6c Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Mon, 11 Nov 2013 19:40:58 +0100 Subject: [CLIENT] Add options to add and remove alt-servers --- src/client/client.c | 125 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 94 insertions(+), 31 deletions(-) diff --git a/src/client/client.c b/src/client/client.c index ae6879e..c0d316b 100644 --- a/src/client/client.c +++ b/src/client/client.c @@ -42,8 +42,9 @@ #define DEV_LEN 15 #define MAX_DEVS 50 + static int openDevices[MAX_DEVS]; -static const char *optString = "f:h:i:r:d:a:c:s:HV?"; +static const char *optString = "f:h:i:r:d:a:cs:HV?k"; static const struct option longOpts[] = { { "file", required_argument, NULL, 'f' }, { "host", required_argument, NULL, 'h' }, @@ -51,8 +52,10 @@ static const struct option longOpts[] = { { "rid", required_argument, NULL, 'r' }, { "device", required_argument, NULL, 'd' }, { "ahead", required_argument, NULL, 'a' }, - { "close", required_argument, NULL, 'c' }, + { "close", no_argument, NULL, 'c' }, { "switch", required_argument, NULL, 's' }, + { "add", required_argument, NULL, 'adds' }, + { "remove", required_argument, NULL, 'rems' }, { "help", no_argument, NULL, 'H' }, { "version", no_argument, NULL, 'V' }, { "daemon", no_argument, NULL, 'D' }, @@ -65,12 +68,42 @@ static const struct option longOpts[] = { static int dnbd3_ioctl(const char *dev, const int command, dnbd3_ioctl_t * const msg); static void dnbd3_client_daemon(); static void dnbd3_daemon_action(int client, int argc, char **argv); -static int dnbd3_daemon_close(int uid, char *device); +static int dnbd3_daemon_ioctl(int uid, char *device, int action, const char *actionName, char *host); static char* dnbd3_daemon_open(int uid, char *host, char *image, int rid, int readAhead); static int dnbd3_daemon_send(int argc, char **argv); static void dnbd3_print_help(char *argv_0); static void dnbd3_print_version(); +/** + * Convert a host and port (network byte order) to printable representation. + * Worst case required buffer len is 48, eg. [1234:1234:1234:1234:1234:1234:1234:1234]:12345 (+ \0) + * Returns TRUE on success, FALSE on error + */ +static char host_to_string(const dnbd3_host_t *host, char *target, size_t targetlen) +{ + // Worst case: Port 5 chars, ':' to separate ip and port 1 char, terminating null 1 char = 7, [] for IPv6 + if ( targetlen < 10 ) return FALSE; + if ( host->type == AF_INET6 ) { + *target++ = '['; + inet_ntop( AF_INET6, host->addr, target, targetlen - 10 ); + target += strlen( target ); + *target++ = ']'; + } else if ( host->type == AF_INET ) { + inet_ntop( AF_INET, host->addr, target, targetlen - 8 ); + target += strlen( target ); + } else { + snprintf( target, targetlen, "", (int)host->type ); + return FALSE; + } + *target = '\0'; + if ( host->port != 0 ) { + // There are still at least 7 bytes left in the buffer, port is at most 5 bytes + ':' + '\0' = 7 + snprintf( target, 7, ":%d", (int)ntohs( host->port ) ); + } + return TRUE; +} + + /** * Parse IPv4 or IPv6 address in string representation to a suitable format usable by the BSD socket library * @string eg. "1.2.3.4" or "2a01::10:5", optially with port appended, eg "1.2.3.4:6666" or "[2a01::10:5]:6666" @@ -162,10 +195,9 @@ static int dnbd3_get_ip(char *hostname, dnbd3_host_t *host) int main(int argc, char *argv[]) { char *dev = NULL; + char host[50]; - int close_dev = 0; - int switch_host = 0; - int killSwitch = FALSE; + int action = -1; dnbd3_ioctl_t msg; memset( &msg, 0, sizeof(dnbd3_ioctl_t) ); @@ -189,6 +221,7 @@ int main(int argc, char *argv[]) if ( !dnbd3_get_ip( optarg, &msg.host ) ) exit( EXIT_FAILURE ); break; case 'i': + action = IOCTL_OPEN; msg.imgname = strdup( optarg ); break; case 'r': @@ -202,12 +235,19 @@ int main(int argc, char *argv[]) msg.read_ahead_kb = atoi( optarg ); break; case 'c': - dev = strdup( optarg ); - close_dev = 1; + action = IOCTL_CLOSE; break; case 's': dnbd3_get_ip( optarg, &msg.host ); - switch_host = 1; + action = IOCTL_SWITCH; + break; + case 'adds': + dnbd3_get_ip( optarg, &msg.host ); + action = IOCTL_ADD_SRV; + break; + case 'rems': + dnbd3_get_ip( optarg, &msg.host ); + action = IOCTL_REM_SRV; break; case 'H': dnbd3_print_help( argv[0] ); @@ -221,9 +261,6 @@ int main(int argc, char *argv[]) case 'D': dnbd3_client_daemon(); break; - case 'k': - killSwitch = TRUE; - break; } opt = getopt_long( argc, argv, optString, longOpts, &longIndex ); } @@ -232,7 +269,7 @@ int main(int argc, char *argv[]) struct stat st; if ( stat( SOCK_PATH, &st ) == 0 ) { if ( dnbd3_daemon_send( argc, argv ) ) exit( 0 ); - printf( "Failed.\n" ); + printf( "\nFailed.\n" ); exit( 1 ); } @@ -245,8 +282,10 @@ int main(int argc, char *argv[]) setuid( getuid() ); } + host_to_string( &msg.host, host, 50 ); + // close device - if ( close_dev && msg.host.type == 0 && dev && (msg.imgname == NULL )) { + if ( action == IOCTL_CLOSE && msg.host.type == 0 && dev && (msg.imgname == NULL )) { printf( "INFO: Closing device %s\n", dev ); if ( dnbd3_ioctl( dev, IOCTL_CLOSE, &msg ) ) exit( EXIT_SUCCESS ); printf( "Couldn't close device.\n" ); @@ -254,16 +293,18 @@ int main(int argc, char *argv[]) } // switch host - if ( switch_host && msg.host.type != 0 && dev && (msg.imgname == NULL )) { - printf( "INFO: Switching device %s to %s\n", dev, "" ); - if ( dnbd3_ioctl( dev, IOCTL_SWITCH, &msg ) ) exit( EXIT_SUCCESS ); - printf( "Switching server failed. Maybe the device is not connected?\n" ); + if ( (action == IOCTL_SWITCH || action == IOCTL_ADD_SRV || action == IOCTL_REM_SRV) && msg.host.type != 0 && dev && (msg.imgname == NULL )) { + if ( action == IOCTL_SWITCH ) printf( "INFO: Switching device %s to %s\n", dev, host ); + if ( action == IOCTL_ADD_SRV ) printf( "INFO: %s: adding %s\n", dev, host ); + if ( action == IOCTL_REM_SRV ) printf( "INFO: %s: removing %s\n", dev, host ); + if ( dnbd3_ioctl( dev, action, &msg ) ) exit( EXIT_SUCCESS ); + printf( "Failed! Maybe the device is not connected?\n" ); exit( EXIT_FAILURE ); } // connect - if ( msg.host.type != 0 && dev && (msg.imgname != NULL )) { - printf( "INFO: Connecting device %s to %s for image %s\n", dev, "", msg.imgname ); + if ( action == IOCTL_OPEN && msg.host.type != 0 && dev && (msg.imgname != NULL )) { + printf( "INFO: Connecting device %s to %s for image %s\n", dev, host, msg.imgname ); if ( dnbd3_ioctl( dev, IOCTL_OPEN, &msg ) ) exit( EXIT_SUCCESS ); printf( "ERROR: connecting device failed. Maybe it's already connected?\n" ); exit( EXIT_FAILURE ); @@ -372,17 +413,24 @@ static void dnbd3_daemon_action(int client, int argc, char **argv) char *host = NULL, *image = NULL, *device = NULL; int rid = 0, uid = 0, killMe = FALSE, ahead = 512; int len; + int action = -1; + const char *actionName = NULL; optind = 1; opt = getopt_long( argc, argv, optString, longOpts, &longIndex ); while ( opt != -1 ) { switch ( opt ) { + case 'd': + device = optarg; + break; case 'h': host = optarg; break; case 'i': image = optarg; + action = IOCTL_OPEN; + actionName = "Open"; break; case 'r': rid = atoi( optarg ); @@ -391,7 +439,16 @@ static void dnbd3_daemon_action(int client, int argc, char **argv) uid = atoi( optarg ); break; case 'c': - device = optarg; + action = IOCTL_CLOSE; + actionName = "Close"; + break; + case 'adds': + action = IOCTL_ADD_SRV; + actionName = "Add Server"; + break; + case 'rems': + action = IOCTL_REM_SRV; + actionName = "Remove Server"; break; case 'a': ahead = atoi( optarg ); @@ -415,8 +472,8 @@ static void dnbd3_daemon_action(int client, int argc, char **argv) exit( 0 ); } - if ( device != NULL ) { - if ( dnbd3_daemon_close( uid, device ) ) { + if ( (action == IOCTL_CLOSE || ((action == IOCTL_ADD_SRV || action == IOCTL_REM_SRV) && host != NULL)) && device != NULL ) { + if ( dnbd3_daemon_ioctl( uid, device, action, actionName, host ) ) { len = 0; } else { len = -1; @@ -424,7 +481,7 @@ static void dnbd3_daemon_action(int client, int argc, char **argv) send( client, &len, sizeof(len), 0 ); return; } - if ( host != NULL && image != NULL && rid >= 0 ) { + if ( action == IOCTL_OPEN && host != NULL && image != NULL && rid >= 0 ) { device = dnbd3_daemon_open( uid, host, image, rid, ahead ); if ( device != NULL ) { len = strlen( device ); @@ -439,7 +496,7 @@ static void dnbd3_daemon_action(int client, int argc, char **argv) printf( "Received a client request I cannot understand.\n" ); } -static int dnbd3_daemon_close(int uid, char *device) +static int dnbd3_daemon_ioctl(int uid, char *device, int action, const char *actionName, char *host) { int index = -1; char dev[DEV_LEN]; @@ -448,25 +505,31 @@ static int dnbd3_daemon_close(int uid, char *device) } else { index = atoi( device ); } + dnbd3_ioctl_t msg; + memset( &msg, 0, sizeof(msg) ); + msg.len = (uint16_t)sizeof(msg); + if ( host != NULL ) { + dnbd3_get_ip( host, &msg.host ); + } if ( index < 0 || index >= MAX_DEVS ) { - printf( "Close request with invalid device id %d\n", index ); + printf( "%s request with invalid device id %d\n", actionName, index ); return FALSE; } snprintf( dev, DEV_LEN, "/dev/dnbd%d", index ); if ( openDevices[index] == -1 ) { - printf( "Close request by %d for closed device %s\n", uid, dev ); + printf( "%s request by %d for closed device %s\n", actionName, uid, dev ); return TRUE; } if ( openDevices[index] != uid ) { - printf( "User %d is not allowed to close %s owned by %d\n", uid, dev, openDevices[index] ); + printf( "%s: User %d cannot access %s owned by %d\n", actionName, uid, dev, openDevices[index] ); return FALSE; } - if ( dnbd3_ioctl( dev, IOCTL_CLOSE, NULL ) ) { - printf( "Closed device %s of user %d\n", dev, uid ); + if ( dnbd3_ioctl( dev, action, &msg ) ) { + printf( "%s request for device %s of user %d successful\n", actionName, dev, uid ); openDevices[index] = -1; return TRUE; } - printf( "Error closing device %s, requested by %d\n", dev, uid ); + printf( "%s: Error on device %s, requested by %d\n", actionName, dev, uid ); return FALSE; } -- cgit v1.2.3-55-g7522 From 79ae416cbf5618328742234c33bb24fb0a61fe87 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Mon, 11 Nov 2013 22:44:00 +0100 Subject: [KERNEL] Fix possible deadlock on server switch [KERNEL] Remove server mode support as it's not needed anymore [KERNEL] Some more sanity checks and debug messages --- src/kernel/dnbd3.h | 4 +- src/kernel/net.c | 151 ++++++++++++++++++++++++++--------------------------- 2 files changed, 75 insertions(+), 80 deletions(-) diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h index aa9ea86..c703019 100644 --- a/src/kernel/dnbd3.h +++ b/src/kernel/dnbd3.h @@ -68,8 +68,8 @@ typedef struct struct socket *better_sock; // process - struct task_struct *thread_send; - struct task_struct *thread_receive; + struct task_struct * volatile thread_send; + struct task_struct * volatile thread_receive; struct task_struct *thread_discover; struct timer_list hb_timer; wait_queue_head_t process_queue_send; diff --git a/src/kernel/net.c b/src/kernel/net.c index 8703d9c..9811a31 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -122,11 +122,19 @@ int dnbd3_net_connect(dnbd3_device_t *dev) struct request *req1 = NULL; struct timeval timeout; - char get_servers = 0, set_client = 0; - - while (dev->disconnecting) - { - if (dev->better_sock) + if (dev->disconnecting) { + debug_dev("CONNECT: Still disconnecting!!!\n"); + while (dev->disconnecting) + schedule(); + } + if (dev->thread_receive != NULL) { + debug_dev("CONNECT: Still receiving!!!\n"); + while (dev->thread_receive != NULL) + schedule(); + } + if (dev->thread_send != NULL) { + debug_dev("CONNECT: Still sending!!!\n"); + while (dev->thread_send != NULL) schedule(); } @@ -135,24 +143,9 @@ int dnbd3_net_connect(dnbd3_device_t *dev) // do some checks before connecting - if (!dev->is_server && is_same_server(&dev->cur_server, &dev->initial_server)) - { - // Forget all known alt servers - memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0]) * NUMBER_SERVERS); - memcpy(dev->alt_servers, &dev->initial_server, sizeof(dev->alt_servers[0])); - get_servers = 1; - } - if (dev->better_sock) - { - set_client = 1; - } - - if (get_servers || set_client) - { - req1 = kmalloc(sizeof(*req1), GFP_ATOMIC ); - if (!req1) - error_dev("FATAL: Kmalloc(1) failed."); - } + req1 = kmalloc(sizeof(*req1), GFP_ATOMIC ); + if (!req1) + error_dev("FATAL: Kmalloc(1) failed."); if (dev->cur_server.host.port == 0 || dev->cur_server.host.type == 0 || dev->imgname == NULL ) error_dev("FATAL: Host, port or image name not set."); @@ -272,19 +265,10 @@ int dnbd3_net_connect(dnbd3_device_t *dev) dev->panic = 0; dev->panic_count = 0; - if (get_servers) // This connection is established to the initial server (from the ioctl call) - { - // Enqueue request to request_queue_send for a fresh list of alt servers - req1->cmd_type = REQ_TYPE_SPECIAL; - req1->cmd_flags = CMD_GET_SERVERS; - list_add(&req1->queuelist, &dev->request_queue_send); - } - else if (set_client) - { - req1->cmd_type = REQ_TYPE_SPECIAL; - req1->cmd_flags = CMD_SET_CLIENT_MODE; - list_add(&req1->queuelist, &dev->request_queue_send); - } + // Enqueue request to request_queue_send for a fresh list of alt servers + req1->cmd_type = REQ_TYPE_SPECIAL; + req1->cmd_flags = CMD_GET_SERVERS; + list_add(&req1->queuelist, &dev->request_queue_send); // create required threads dev->thread_send = kthread_create(dnbd3_net_send, dev, dev->disk->disk_name); @@ -306,7 +290,8 @@ int dnbd3_net_connect(dnbd3_device_t *dev) add_timer(&dev->hb_timer); return 0; - error: if (dev->sock) + error: ; + if (dev->sock) { sock_release(dev->sock); dev->sock = NULL; @@ -340,13 +325,11 @@ int dnbd3_net_disconnect(dnbd3_device_t *dev) if (dev->thread_send) { kthread_stop(dev->thread_send); - dev->thread_send = NULL; } if (dev->thread_receive) { kthread_stop(dev->thread_receive); - dev->thread_receive = NULL; } if (dev->thread_discover) @@ -437,9 +420,10 @@ int dnbd3_net_discover(void *data) struct timeval start, end; unsigned long rtt, best_rtt = 0; unsigned long irqflags; - int i, istart, isize, best_server, current_server; + int i, j, isize, best_server, current_server; int turn = 0; - int ready = 0, do_change, last_alt_count = 0; + int ready = 0, do_change = 0; + char check_order[NUMBER_SERVERS]; int mlen; struct request *last_request = (struct request *)123, *cur_request = (struct request *)456; @@ -463,6 +447,10 @@ int dnbd3_net_discover(void *data) dnbd3_request.magic = dnbd3_packet_magic; + for (i = 0; i < NUMBER_SERVERS; ++i) { + check_order[i] = i; + } + for (;;) { wait_event_interruptible(dev->process_queue_discover, @@ -525,26 +513,34 @@ int dnbd3_net_discover(void *data) current_server = best_server = -1; best_rtt = 0xFFFFFFFul; - if (dev->heartbeat_count < STARTUP_MODE_DURATION || last_alt_count == 0 || dev->panic) + if (dev->heartbeat_count < STARTUP_MODE_DURATION || dev->panic) { - istart = 0; isize = NUMBER_SERVERS; } else { - istart = jiffies % MAX(last_alt_count - 2, 1); isize = 3; } + if (NUMBER_SERVERS > isize) { + for (i = 0; i < isize; ++i) { + j = ((start.tv_sec >> i) ^ (start.tv_usec >> j)) % NUMBER_SERVERS; + if (j != i) { + mlen = check_order[i]; + check_order[i] = check_order[j]; + check_order[j] = mlen; + } + } + } - for (i = istart; i < NUMBER_SERVERS; ++i) + for (j = 0; j < NUMBER_SERVERS; ++j) { + i = check_order[j]; if (dev->alt_servers[i].host.type == 0) // Empty slot continue; - last_alt_count = i; - if (!dev->panic && dev->alt_servers[i].failures > 50 && (jiffies & 7) != 0) // If not in panic mode, skip server if it failed too many times + if (!dev->panic && dev->alt_servers[i].failures > 50 && (start.tv_usec & 7) != 0) // If not in panic mode, skip server if it failed too many times + continue; + if (isize-- <= 0 && !is_same_server(&dev->cur_server, &dev->alt_servers[i])) continue; - if (isize-- <= 0) - break; // Initialize socket and connect if (sock_create_kern(dev->alt_servers[i].host.type, SOCK_STREAM, IPPROTO_TCP, &sock) < 0) @@ -653,13 +649,13 @@ int dnbd3_net_discover(void *data) } else if (sizeof(size_t) >= 8) { - dnbd3_request.offset = ((((start.tv_usec << 12) ^ start.tv_usec) << 4) % dev->reported_size) + dnbd3_request.offset = ((((start.tv_sec << 12) ^ start.tv_usec) << 4) % dev->reported_size) & ~(uint64_t)(RTT_BLOCK_SIZE - 1); //printk("Random offset 64bit: %lluMiB\n", (unsigned long long)(dnbd3_request.offset >> 20)); } else // On 32bit, prevent modulo on a 64bit data type. This limits the random block picking to the first 4GB of the image { - dnbd3_request.offset = ((((start.tv_usec << 12) ^ start.tv_usec) << 4) % (uint32_t)dev->reported_size) + dnbd3_request.offset = ((((start.tv_sec << 12) ^ start.tv_usec) << 4) % (uint32_t)dev->reported_size) & ~(RTT_BLOCK_SIZE - 1); //printk("Random offset 32bit: %lluMiB\n", (unsigned long long)(dnbd3_request.offset >> 20)); } @@ -727,7 +723,8 @@ int dnbd3_net_discover(void *data) continue; - error: ++dev->alt_servers[i].failures; + error: ; + ++dev->alt_servers[i].failures; sock_release(sock); sock = NULL; dev->alt_servers[i].rtts[turn] = RTT_UNREACHABLE; @@ -756,7 +753,7 @@ int dnbd3_net_discover(void *data) continue; } - do_change = !dev->is_server && ready && best_server != current_server && (jiffies & 3) != 0 + do_change = !dev->is_server && ready && best_server != current_server && (start.tv_usec & 3) != 0 && RTT_THRESHOLD_FACTOR(dev->cur_rtt) > best_rtt + 1500; if (ready && !do_change) { @@ -798,7 +795,7 @@ int dnbd3_net_discover(void *data) best_sock = NULL; } - if (!ready || (jiffies & 7) != 0) + if (!ready || (start.tv_usec & 7) != 0) turn = (turn + 1) % 4; if (turn == 3) ready = 1; @@ -811,7 +808,7 @@ int dnbd3_net_discover(void *data) int dnbd3_net_send(void *data) { dnbd3_device_t *dev = data; - struct request *blk_request; + struct request *blk_request, *tmp_request; dnbd3_request_t dnbd3_request; struct msghdr msg; @@ -825,6 +822,19 @@ int dnbd3_net_send(void *data) set_user_nice(current, -20); + // move already sent requests to request_queue_send again + while (!list_empty(&dev->request_queue_receive)) + { + printk("WARN: Request queue was not empty on %s\n", dev->disk->disk_name); + spin_lock_irqsave(&dev->blk_lock, irqflags); + list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist) + { + list_del_init(&blk_request->queuelist); + list_add(&blk_request->queuelist, &dev->request_queue_send); + } + spin_unlock_irqrestore(&dev->blk_lock, irqflags); + } + for (;;) { wait_event_interruptible(dev->process_queue_send, kthread_should_stop() || !list_empty(&dev->request_queue_send)); @@ -879,29 +889,25 @@ int dnbd3_net_send(void *data) iov.iov_len = sizeof(dnbd3_request); if (kernel_sendmsg(dev->sock, &msg, &iov, 1, sizeof(dnbd3_request)) != sizeof(dnbd3_request)) { - printk("Couldn't properly send a request header.\n"); + debug_dev("ERROR: Connection to server lost (send)"); goto error; } wake_up(&dev->process_queue_receive); } + dev->thread_send = NULL; return 0; - error: - debug_dev("ERROR: Connection to server lost (send)"); + error: ; if (dev->sock) - { kernel_sock_shutdown(dev->sock, SHUT_RDWR); - dev->sock = NULL; - } - dev->thread_send = NULL; if (!dev->disconnecting) { dev->panic = 1; - // start discover dev->discover = 1; wake_up(&dev->process_queue_discover); } + dev->thread_send = NULL; return -1; } @@ -938,7 +944,7 @@ int dnbd3_net_receive(void *data) continue; } if (ret <= 0) - error_dev_va("Connection closed (%d).", ret); + error_dev_va("ERROR: Connection to server lost (receive)", ret); if (ret != sizeof(dnbd3_reply)) error_dev("ERROR: Recv msg header."); fixup_reply(dnbd3_reply); @@ -1065,30 +1071,19 @@ int dnbd3_net_receive(void *data) } printk("dnbd3_net_receive terminated normally.\n"); + dev->thread_receive = NULL; return 0; error: - // move already sent requests to request_queue_send again - while (!list_empty(&dev->request_queue_receive)) - { - printk("WARN: Request queue was not empty on %s\n", dev->disk->disk_name); - spin_lock_irqsave(&dev->blk_lock, irqflags); - list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist) - { - list_del_init(&blk_request->queuelist); - list_add(&blk_request->queuelist, &dev->request_queue_send); - } - spin_unlock_irqrestore(&dev->blk_lock, irqflags); - } if (dev->sock) kernel_sock_shutdown(dev->sock, SHUT_RDWR); - dev->thread_receive = NULL; if (!dev->disconnecting) { dev->panic = 1; - // start discover dev->discover = 1; wake_up(&dev->process_queue_discover); } + dev->thread_receive = NULL; return -1; } + -- cgit v1.2.3-55-g7522 From ece6049cdf1148b6ce11bda11c024d1cd4db5011 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Mon, 11 Nov 2013 22:50:08 +0100 Subject: tiny fix --- src/kernel/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernel/net.c b/src/kernel/net.c index 9811a31..e65801a 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -944,7 +944,7 @@ int dnbd3_net_receive(void *data) continue; } if (ret <= 0) - error_dev_va("ERROR: Connection to server lost (receive)", ret); + error_dev("ERROR: Connection to server lost (receive)"); if (ret != sizeof(dnbd3_reply)) error_dev("ERROR: Recv msg header."); fixup_reply(dnbd3_reply); -- cgit v1.2.3-55-g7522