summaryrefslogtreecommitdiffstats
path: root/src/kernel/net.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/kernel/net.c')
-rw-r--r--src/kernel/net.c70
1 files changed, 49 insertions, 21 deletions
diff --git a/src/kernel/net.c b/src/kernel/net.c
index 5ef4016..fcea31b 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -86,7 +86,7 @@ static bool dnbd3_execute_handshake(dnbd3_device_t *dev, struct socket *sock,
struct sockaddr_storage *addr, uint16_t *remote_version, bool copy_image_info);
static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr,
- struct socket *sock);
+ struct socket *sock, u64 test_start, u32 test_size);
static bool dnbd3_send_empty_request(dnbd3_device_t *dev, u16 cmd);
@@ -143,6 +143,7 @@ static void dnbd3_start_discover(dnbd3_device_t *dev, bool panic)
// Panic freshly turned on
dev->panic = true;
dev->discover_interval = TIMER_INTERVAL_PROBE_PANIC;
+ dev->discover_count = 0;
}
spin_unlock_irqrestore(&dev->blk_lock, irqflags);
dnbd3_flag_reset(dev->connection_lock);
@@ -192,10 +193,14 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev)
{
struct socket *sock, *best_sock = NULL;
dnbd3_alt_server_t *alt;
+ struct request *blk_request;
struct sockaddr_storage host_compare, best_server;
uint16_t remote_version;
ktime_t start, end;
unsigned long rtt = 0, best_rtt = 0;
+ u64 test_start = 0;
+ u32 test_size = RTT_BLOCK_SIZE;
+ unsigned long irqflags;
int i, j, k, isize, fails, rtt_threshold;
int do_change = 0;
u8 check_order[NUMBER_SERVERS];
@@ -219,6 +224,10 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev)
best_server.ss_family = 0;
best_rtt = RTT_UNREACHABLE;
+ if (dev->panic) {
+ dnbd3_dev_dbg_host(dev, &host_compare, "Discover in panic mode\n");
+ }
+
if (!ready || dev->panic)
isize = NUMBER_SERVERS;
else
@@ -249,6 +258,24 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev)
if (!dnbd3_execute_handshake(dev, sock, &host_compare, &remote_version, false))
goto error;
+ if (dev->panic) {
+ // In panic mode, use next pending request for testing, this has a higher chance of
+ // filtering out a server which can't actually handle our requests, instead of just
+ // requesting the very first block which should be cached by every server.
+ spin_lock_irqsave(&dev->send_queue_lock, irqflags);
+ if (!list_empty(&dev->send_queue)) {
+ blk_request = list_entry(dev->send_queue.next, struct request, queuelist);
+ test_start = blk_rq_pos(blk_request) << 9; /* sectors to bytes */
+ test_size = blk_rq_bytes(blk_request);
+ }
+ spin_unlock_irqrestore(&dev->send_queue_lock, irqflags);
+ }
+
+ // actual rtt measurement is just the first block request and reply
+ start = ktime_get_real();
+ if (!dnbd3_request_test_block(dev, &host_compare, sock, test_start, test_size))
+ goto error;
+ end = ktime_get_real();
// panic mode, take first responding server
if (dev->panic) {
@@ -259,7 +286,10 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev)
// Check global flag, a connect might have been in progress
if (best_sock != NULL)
sock_release(best_sock);
- set_socket_timeout(sock, false, SOCKET_TIMEOUT_RECV * 1000 + 1000);
+ set_socket_timeout(sock, false, MAX(
+ SOCKET_TIMEOUT_RECV * 1000,
+ (int)ktime_ms_delta(end, start)
+ ) + 1000);
if (dnbd3_set_primary_connection(dev, sock, &host_compare, remote_version) != 0)
sock_release(sock);
dnbd3_flag_reset(dev->connection_lock);
@@ -267,12 +297,6 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev)
}
}
- // actual rtt measurement is just the first block requests and reply
- start = ktime_get_real();
- if (!dnbd3_request_test_block(dev, &host_compare, sock))
- goto error;
- end = ktime_get_real();
-
mutex_lock(&dev->alt_servers_lock);
if (is_same_server(&dev->alt_servers[i].host, &host_compare)) {
dev->alt_servers[i].protocol_version = remote_version;
@@ -446,7 +470,9 @@ static void dnbd3_recv_workfn(struct work_struct *work)
int remaining;
int ret;
+ dnbd3_dev_dbg_cur(dev, "starting receive worker...\n");
mutex_lock(&dev->recv_mutex);
+ dnbd3_dev_dbg_cur(dev, "receive worker started\n");
while (dev->sock) {
// receive net reply
ret = dnbd3_recv_reply(dev->sock, &reply_hdr);
@@ -594,6 +620,7 @@ static void dnbd3_recv_workfn(struct work_struct *work)
out_unlock:
// This will check if we actually still need a new connection
dnbd3_start_discover(dev, true);
+ dnbd3_dev_dbg_cur(dev, "Receive worker exited\n");
mutex_unlock(&dev->recv_mutex);
}
@@ -623,7 +650,7 @@ static void set_socket_timeout(struct socket *sock, bool set_send, int timeout_m
static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, struct socket **sock_out)
{
ktime_t start;
- int ret, connect_time_ms;
+ int ret, connect_time_ms, diff;
struct socket *sock;
int retries = 4;
const int addrlen = addr->ss_family == AF_INET ? sizeof(struct sockaddr_in)
@@ -659,7 +686,7 @@ static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, str
if (dev->panic && dev->panic_count > 1) {
/* in panic mode for some time, start increasing timeouts */
- connect_time_ms = dev->panic_count * 1000;
+ connect_time_ms = dev->panic_count * 333;
} else {
/* otherwise, use 2*RTT of current server */
connect_time_ms = dev->cur_server.rtt * 2 / 1000;
@@ -667,21 +694,21 @@ static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, str
/* but obey a minimal configurable value, and maximum sanity check */
if (connect_time_ms < SOCKET_TIMEOUT_SEND * 1000)
connect_time_ms = SOCKET_TIMEOUT_SEND * 1000;
- else if (connect_time_ms > 60000)
- connect_time_ms = 60000;
+ else if (connect_time_ms > 15000)
+ connect_time_ms = 15000;
set_socket_timeout(sock, false, connect_time_ms); // recv
set_socket_timeout(sock, true, connect_time_ms); // send
start = ktime_get_real();
while (--retries > 0) {
ret = kernel_connect(sock, (struct sockaddr *)addr, addrlen, 0);
- connect_time_ms = (int)ktime_ms_delta(ktime_get_real(), start);
- if (connect_time_ms > 2 * SOCKET_TIMEOUT_SEND * 1000) {
+ diff = (int)ktime_ms_delta(ktime_get_real(), start);
+ if (diff > 2 * connect_time_ms) {
/* Either I'm losing my mind or there was a specific build of kernel
* 5.x where SO_RCVTIMEO didn't affect the connect call above, so
* this function would hang for over a minute for unreachable hosts.
- * Leave in this debug check for twice the configured timeout
+ * Leave in this debug check for twice the configured timeout.
*/
- dnbd3_dev_dbg_host(dev, addr, "connect: call took %dms\n",
+ dnbd3_dev_err_host(dev, addr, "connect: call took %dms\n",
connect_time_ms);
}
if (ret != 0) {
@@ -916,23 +943,24 @@ static bool dnbd3_drain_socket(dnbd3_device_t *dev, struct socket *sock, int byt
return true;
}
-static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr, struct socket *sock)
+static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr,
+ struct socket *sock, u64 test_start, u32 test_size)
{
dnbd3_reply_t reply_hdr;
// Request block
- if (!dnbd3_send_request(sock, CMD_GET_BLOCK, 0, 0, RTT_BLOCK_SIZE)) {
+ if (!dnbd3_send_request(sock, CMD_GET_BLOCK, 0, test_start, test_size)) {
dnbd3_err_dbg_host(dev, addr, "requesting test block failed\n");
return false;
}
- // receive net reply
+ // receive reply header
if (dnbd3_recv_reply(sock, &reply_hdr) != sizeof(reply_hdr)) {
dnbd3_err_dbg_host(dev, addr, "receiving test block header packet failed\n");
return false;
}
if (reply_hdr.magic != dnbd3_packet_magic || reply_hdr.cmd != CMD_GET_BLOCK
- || reply_hdr.size != RTT_BLOCK_SIZE || reply_hdr.handle != 0) {
+ || reply_hdr.size != test_size || reply_hdr.handle != 0) {
dnbd3_err_dbg_host(dev, addr,
"unexpected reply to block request: cmd=%d, size=%d, handle=%llu (discover)\n",
(int)reply_hdr.cmd, (int)reply_hdr.size, reply_hdr.handle);
@@ -940,7 +968,7 @@ static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storag
}
// receive data
- return dnbd3_drain_socket(dev, sock, RTT_BLOCK_SIZE);
+ return dnbd3_drain_socket(dev, sock, test_size);
}
#undef dnbd3_err_dbg_host