From 31fbbfd05130caf2a236d117a08e727af2cb5ac4 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Wed, 13 Nov 2013 17:35:01 +0100 Subject: [SERVER] Increase RTT check delay for uplinks that failed too many times to save network bandwidth --- src/server/altservers.c | 5 ++++- src/server/globals.h | 1 + src/server/uplink.c | 20 +++++++++++--------- 3 files changed, 16 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/server/altservers.c b/src/server/altservers.c index 5317936..d89e73a 100644 --- a/src/server/altservers.c +++ b/src/server/altservers.c @@ -346,7 +346,7 @@ static void *altservers_main(void *data) dnbd3_host_t servers[ALTS + 1]; serialized_buffer_t serialized; struct timespec start, end; - time_t nextCacheMapSave = time( NULL ) + 120; + time_t nextCacheMapSave = time( NULL ) + 90; setThreadName( "altserver-check" ); blockNoncriticalSignals(); @@ -513,6 +513,9 @@ static void *altservers_main(void *data) uplink->betterFd = bestSock; uplink->betterServer = servers[bestIndex]; uplink->rttTestResult = RTT_DOCHANGE; + } else if (bestSock == -1) { + // No server was reachable + uplink->rttTestResult = RTT_NOT_REACHABLE; } else { // nope if ( bestSock != -1 ) close( bestSock ); diff --git a/src/server/globals.h b/src/server/globals.h index 7d333e0..1243eb8 100644 --- a/src/server/globals.h +++ b/src/server/globals.h @@ -38,6 +38,7 @@ typedef struct #define RTT_INPROGRESS 1 // In progess, not finished #define RTT_DONTCHANGE 2 // Finished, but no better alternative found #define RTT_DOCHANGE 3 // Finished, better alternative written to .betterServer + .betterFd +#define RTT_NOT_REACHABLE 4 // No uplink was reachable struct _dnbd3_connection { int fd; // socket fd to remote server diff --git a/src/server/uplink.c b/src/server/uplink.c index 0f50fa0..d19c1d9 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -197,6 +197,7 @@ static void* uplink_mainloop(void *data) int numSocks, i, waitTime; int altCheckInterval = SERVER_RTT_DELAY_INIT; int bFree = FALSE; + int discoverFailCount = 0; time_t nextAltCheck = 0; char buffer[100]; // @@ -226,6 +227,7 @@ static void* uplink_mainloop(void *data) // Check if server switch is in order if ( link->rttTestResult == RTT_DOCHANGE ) { link->rttTestResult = RTT_IDLE; + discoverFailCount = 0; // The rttTest worker thread has finished our request. // And says it's better to switch to another server const int fd = link->fd; @@ -257,13 +259,9 @@ static void* uplink_mainloop(void *data) // more to do here } // epoll() - if ( link->fd == -1 ) { - waitTime = 2000; - nextAltCheck = 0; - } else { - waitTime = (time( NULL ) - nextAltCheck) * 1000; - if ( waitTime < 1500 ) waitTime = 1500; - } + waitTime = (time( NULL ) - nextAltCheck) * 1000; + if ( waitTime < 1500 ) waitTime = 1500; + if ( waitTime > 5000 ) waitTime = 5000; numSocks = epoll_wait( fdEpoll, events, MAXEVENTS, waitTime ); if ( _shutdown || link->shutdown ) goto cleanup; if ( numSocks < 0 ) { // Error? @@ -320,9 +318,9 @@ static void* uplink_mainloop(void *data) // See if we should trigger an RTT measurement if ( link->rttTestResult == RTT_IDLE || link->rttTestResult == RTT_DONTCHANGE ) { const time_t now = time( NULL ); - if ( nextAltCheck - now > SERVER_RTT_DELAY_MAX ) { + if ( now + SERVER_RTT_DELAY_FAILED < nextAltCheck ) { // This probably means the system time was changed - handle this case properly by capping the timeout - nextAltCheck = now + SERVER_RTT_DELAY_MAX; + nextAltCheck = now + SERVER_RTT_DELAY_FAILED; } else if ( now >= nextAltCheck ) { // It seems it's time for a check if ( image_isComplete( link->image ) ) { @@ -359,6 +357,10 @@ static void* uplink_mainloop(void *data) altCheckInterval = MIN(altCheckInterval + 1, SERVER_RTT_DELAY_MAX); nextAltCheck = now + altCheckInterval; } + } else if ( link->rttTestResult == RTT_NOT_REACHABLE ) { + link->rttTestResult = RTT_IDLE; + discoverFailCount++; + nextAltCheck = time( NULL ) + (discoverFailCount < 5 ? altCheckInterval : SERVER_RTT_DELAY_FAILED); } #ifdef _DEBUG if ( link->fd != -1 ) { -- cgit v1.2.3-55-g7522