summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Rettberg2013-10-24 16:53:07 +0200
committerSimon Rettberg2013-10-24 16:53:07 +0200
commitd8b028a18a9581d3fdb07c5c455ba206af50a798 (patch)
treeb4cf226cf7d6c9c1fc7224f7ff686c0d051df09d
parent[SERVER] Only mark server as bad on unecpected error (diff)
downloaddnbd3-d8b028a18a9581d3fdb07c5c455ba206af50a798.tar.gz
dnbd3-d8b028a18a9581d3fdb07c5c455ba206af50a798.tar.xz
dnbd3-d8b028a18a9581d3fdb07c5c455ba206af50a798.zip
[KERNEL] Slow down with RTT measurements after 30 seconds
Right after connecting, all servers will be polled every 4 seconds for 30 seconds, so we get 7 data points per alt-server. If no better server is found during this time, further RTT measurements will be done every 22 seconds, to put less load on the network in the long run.
-rw-r--r--src/config.h6
-rw-r--r--src/kernel/dnbd3.h3
-rw-r--r--src/kernel/net.c25
3 files changed, 26 insertions, 8 deletions
diff --git a/src/config.h b/src/config.h
index 128edb7..7df49a0 100644
--- a/src/config.h
+++ b/src/config.h
@@ -71,10 +71,12 @@
// This must be a power of two:
#define RTT_BLOCK_SIZE 4096
+#define STARTUP_MODE_DURATION 30
// Interval of several repeating tasks (in seconds)
-#define TIMER_INTERVAL_PROBE_NORMAL 10
+#define TIMER_INTERVAL_PROBE_STARTUP 4
+#define TIMER_INTERVAL_PROBE_NORMAL 22
#define TIMER_INTERVAL_PROBE_PANIC 2
-#define TIMER_INTERVAL_KEEPALIVE_PACKET 5
+#define TIMER_INTERVAL_KEEPALIVE_PACKET 6
// Expect a keepalive response every X seconds
#define SOCKET_KEEPALIVE_TIMEOUT 7
diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h
index 915c8b9..aa9ea86 100644
--- a/src/kernel/dnbd3.h
+++ b/src/kernel/dnbd3.h
@@ -61,7 +61,8 @@ typedef struct
int new_servers_num; // number of new alt servers that are waiting to be copied to above array
dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers
uint8_t discover, panic, disconnecting, is_server, update_available, panic_count;
- uint16_t rid, heartbeat_count;
+ uint16_t rid;
+ uint32_t heartbeat_count;
uint64_t reported_size;
// server switch
struct socket *better_sock;
diff --git a/src/kernel/net.c b/src/kernel/net.c
index b3e08df..fc237de 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -394,7 +394,8 @@ void dnbd3_net_heartbeat(unsigned long arg)
debug_dev("ERROR: Couldn't create keepalive request.");
}
}
- if (timeout_seconds(TIMER_INTERVAL_PROBE_NORMAL))
+ if ((dev->heartbeat_count > STARTUP_MODE_DURATION && timeout_seconds(TIMER_INTERVAL_PROBE_NORMAL))
+ || (dev->heartbeat_count <= STARTUP_MODE_DURATION && timeout_seconds(TIMER_INTERVAL_PROBE_STARTUP)))
{
// Normal discovery
dev->discover = 1;
@@ -436,9 +437,9 @@ int dnbd3_net_discover(void *data)
struct timeval start, end;
unsigned long rtt, best_rtt = 0;
unsigned long irqflags;
- int i, best_server, current_server;
+ int i, istart, isize, best_server, current_server;
int turn = 0;
- int ready = 0, do_change;
+ int ready = 0, do_change, last_alt_count = 0;
int mlen;
struct request *last_request = (struct request *)123, *cur_request = (struct request *)456;
@@ -524,12 +525,26 @@ int dnbd3_net_discover(void *data)
current_server = best_server = -1;
best_rtt = 0xFFFFFFFul;
- for (i = 0; i < NUMBER_SERVERS; ++i)
+ if (dev->heartbeat_count < STARTUP_MODE_DURATION || last_alt_count == 0 || dev->panic)
+ {
+ istart = 0;
+ isize = NUMBER_SERVERS;
+ }
+ else
+ {
+ istart = jiffies % MAX(last_alt_count - 2, 1);
+ isize = 3;
+ }
+
+ for (i = istart; i < NUMBER_SERVERS; ++i)
{
if (dev->alt_servers[i].host.type == 0) // Empty slot
continue;
+ last_alt_count = i;
if (!dev->panic && dev->alt_servers[i].failures > 50 && (jiffies & 7) != 0) // If not in panic mode, skip server if it failed too many times
continue;
+ if (isize-- <= 0)
+ break;
// Initialize socket and connect
if (sock_create_kern(dev->alt_servers[i].host.type, SOCK_STREAM, IPPROTO_TCP, &sock) < 0)
@@ -783,7 +798,7 @@ int dnbd3_net_discover(void *data)
best_sock = NULL;
}
- if (!ready || (jiffies & 3) != 0)
+ if (!ready || (jiffies & 7) != 0)
turn = (turn + 1) % 4;
if (turn == 3)
ready = 1;