summaryrefslogtreecommitdiffstats
path: root/inc
diff options
context:
space:
mode:
authorSimon Rettberg2022-02-12 23:56:35 +0100
committerSimon Rettberg2022-02-18 21:34:55 +0100
commiteb2876f6542af2bfa47c7a6905ecc4f81f1d2ad3 (patch)
tree17ebb5fd2d4770a4dd67f857f2488221cd46874c /inc
parent[KERNEL] Add missing include to fix compile on 4.14.x (diff)
downloaddnbd3-eb2876f6542af2bfa47c7a6905ecc4f81f1d2ad3.tar.gz
dnbd3-eb2876f6542af2bfa47c7a6905ecc4f81f1d2ad3.tar.xz
dnbd3-eb2876f6542af2bfa47c7a6905ecc4f81f1d2ad3.zip
[KERNEL] Refactor to use workqueues and blk-mq only
Using workqueues frees us from having to manage the lifecycle of three dedicated threads. Discovery (alt server checks) and sending keepalive packets is now done using work on the power efficient system queue. Sending and receiving happens via dedicated work queues with higher priority. blk-mq has also been around for quite a while in the kernel, so switching to it doesn't hurt backwards compatibility. As the code is now refactored to work more as blk-mq is designed, backwards compatibility even improved while at the same time freeing us from an arsenal of macros that were required to make the blk-mq port look and feel like the old implementation. For example, the code now compiles on CentOS 7 with kernel 3.10 without requiring special macros to detect the heavily modified RedHat kernel with all its backported features. A few other design limitations have been rectified along the way, e.g. switching to another server now doesn't internally disconnect from the current one first, which theoretically could lead to a non-working setup, if the new server isn't reachable and then - because of some transient network error - switching back also fails. As the discover-thread was torn down from the disconnect call, the connection would also not repair itself eventually. we now establish the new connection in parallel to the old one, and only if that succeeds do we replace the old one with it, similar to how the automatic alt-server switch already does it.
Diffstat (limited to 'inc')
-rw-r--r--inc/dnbd3/config/client.h36
1 files changed, 26 insertions, 10 deletions
diff --git a/inc/dnbd3/config/client.h b/inc/dnbd3/config/client.h
index 49d4676..55cf8b3 100644
--- a/inc/dnbd3/config/client.h
+++ b/inc/dnbd3/config/client.h
@@ -4,9 +4,21 @@
// Which is the minimum protocol version the client expects from the server
#define MIN_SUPPORTED_SERVER 2
-// in seconds if not stated otherwise (MS = milliseconds)
-#define SOCKET_TIMEOUT_CLIENT_DATA 2
-#define SOCKET_TIMEOUT_CLIENT_DISCOVERY 1
+// Send keepalive every X seconds
+#define KEEPALIVE_INTERVAL 10
+
+// in seconds if not stated otherwise
+#define SOCKET_TIMEOUT_SEND 2
+
+// Socker receive timeout. Must be higher than keepalive interval, otherwise
+// the connection might be aborted when idle
+#define SOCKET_TIMEOUT_RECV 13
+
+// During discovery, we use very short minimum timeouts (unless in panic mode)
+#define SOCKET_TIMEOUT_DISCOVERY 1
+
+// IO timeout for block layer
+#define BLOCK_LAYER_TIMEOUT 10
#define RTT_THRESHOLD_FACTOR(us) (((us) * 3) / 4) // 3/4 = current to best must be 25% worse
#define RTT_ABSOLUTE_THRESHOLD (80000) // Or 80ms worse
@@ -14,15 +26,19 @@
// This must be a power of two:
#define RTT_BLOCK_SIZE 4096
-#define STARTUP_MODE_DURATION 30
// Interval of several repeating tasks (in seconds)
-#define TIMER_INTERVAL_PROBE_STARTUP 4
-#define TIMER_INTERVAL_PROBE_NORMAL 22
+#define TIMER_INTERVAL_PROBE_STARTUP 2
+#define TIMER_INTERVAL_PROBE_SWITCH 10
#define TIMER_INTERVAL_PROBE_PANIC 2
-#define TIMER_INTERVAL_KEEPALIVE_PACKET 6
-
-// Expect a keepalive response every X seconds
-#define SOCKET_KEEPALIVE_TIMEOUT 8
+#define TIMER_INTERVAL_PROBE_MAX 45
+// How many discover runs after setting up a device should be considered the startup phase
+// during that phase, check all servers, before we start doing it selectively
+// and also don't increase the discover interval during this period
+#define DISCOVER_STARTUP_PHASE_COUNT 6
+// How many servers should be tested at maximum after above
+#define DISCOVER_REDUCED_SERVER_COUNT 3
+// Number of RTT probes to keep in history and average the value over
+#define DISCOVER_HISTORY_SIZE 4
// Number of unsuccessful alt_server probes before read errors are reported to the block layer
// (ALL servers will be probed this many times)