From d8b028a18a9581d3fdb07c5c455ba206af50a798 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 24 Oct 2013 16:53:07 +0200 Subject: [KERNEL] Slow down with RTT measurements after 30 seconds Right after connecting, all servers will be polled every 4 seconds for 30 seconds, so we get 7 data points per alt-server. If no better server is found during this time, further RTT measurements will be done every 22 seconds, to put less load on the network in the long run. --- src/config.h | 6 ++++-- src/kernel/dnbd3.h | 3 ++- src/kernel/net.c | 25 ++++++++++++++++++++----- 3 files changed, 26 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/config.h b/src/config.h index 128edb7..7df49a0 100644 --- a/src/config.h +++ b/src/config.h @@ -71,10 +71,12 @@ // This must be a power of two: #define RTT_BLOCK_SIZE 4096 +#define STARTUP_MODE_DURATION 30 // Interval of several repeating tasks (in seconds) -#define TIMER_INTERVAL_PROBE_NORMAL 10 +#define TIMER_INTERVAL_PROBE_STARTUP 4 +#define TIMER_INTERVAL_PROBE_NORMAL 22 #define TIMER_INTERVAL_PROBE_PANIC 2 -#define TIMER_INTERVAL_KEEPALIVE_PACKET 5 +#define TIMER_INTERVAL_KEEPALIVE_PACKET 6 // Expect a keepalive response every X seconds #define SOCKET_KEEPALIVE_TIMEOUT 7 diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h index 915c8b9..aa9ea86 100644 --- a/src/kernel/dnbd3.h +++ b/src/kernel/dnbd3.h @@ -61,7 +61,8 @@ typedef struct int new_servers_num; // number of new alt servers that are waiting to be copied to above array dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers uint8_t discover, panic, disconnecting, is_server, update_available, panic_count; - uint16_t rid, heartbeat_count; + uint16_t rid; + uint32_t heartbeat_count; uint64_t reported_size; // server switch struct socket *better_sock; diff --git a/src/kernel/net.c b/src/kernel/net.c index b3e08df..fc237de 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -394,7 +394,8 @@ void dnbd3_net_heartbeat(unsigned long arg) debug_dev("ERROR: Couldn't create keepalive request."); } } - if (timeout_seconds(TIMER_INTERVAL_PROBE_NORMAL)) + if ((dev->heartbeat_count > STARTUP_MODE_DURATION && timeout_seconds(TIMER_INTERVAL_PROBE_NORMAL)) + || (dev->heartbeat_count <= STARTUP_MODE_DURATION && timeout_seconds(TIMER_INTERVAL_PROBE_STARTUP))) { // Normal discovery dev->discover = 1; @@ -436,9 +437,9 @@ int dnbd3_net_discover(void *data) struct timeval start, end; unsigned long rtt, best_rtt = 0; unsigned long irqflags; - int i, best_server, current_server; + int i, istart, isize, best_server, current_server; int turn = 0; - int ready = 0, do_change; + int ready = 0, do_change, last_alt_count = 0; int mlen; struct request *last_request = (struct request *)123, *cur_request = (struct request *)456; @@ -524,12 +525,26 @@ int dnbd3_net_discover(void *data) current_server = best_server = -1; best_rtt = 0xFFFFFFFul; - for (i = 0; i < NUMBER_SERVERS; ++i) + if (dev->heartbeat_count < STARTUP_MODE_DURATION || last_alt_count == 0 || dev->panic) + { + istart = 0; + isize = NUMBER_SERVERS; + } + else + { + istart = jiffies % MAX(last_alt_count - 2, 1); + isize = 3; + } + + for (i = istart; i < NUMBER_SERVERS; ++i) { if (dev->alt_servers[i].host.type == 0) // Empty slot continue; + last_alt_count = i; if (!dev->panic && dev->alt_servers[i].failures > 50 && (jiffies & 7) != 0) // If not in panic mode, skip server if it failed too many times continue; + if (isize-- <= 0) + break; // Initialize socket and connect if (sock_create_kern(dev->alt_servers[i].host.type, SOCK_STREAM, IPPROTO_TCP, &sock) < 0) @@ -783,7 +798,7 @@ int dnbd3_net_discover(void *data) best_sock = NULL; } - if (!ready || (jiffies & 3) != 0) + if (!ready || (jiffies & 7) != 0) turn = (turn + 1) % 4; if (turn == 3) ready = 1; -- cgit v1.2.3-55-g7522