diff options
Diffstat (limited to 'src/kernel')
-rw-r--r-- | src/kernel/blk.c | 1 | ||||
-rw-r--r-- | src/kernel/net.c | 70 |
2 files changed, 50 insertions, 21 deletions
diff --git a/src/kernel/blk.c b/src/kernel/blk.c index 2d733b1..69e4583 100644 --- a/src/kernel/blk.c +++ b/src/kernel/blk.c @@ -473,6 +473,7 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) \ + || (LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 132)) \ || RHEL_CHECK_VERSION(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(9, 0)) dev->disk->flags |= GENHD_FL_NO_PART; #else diff --git a/src/kernel/net.c b/src/kernel/net.c index 5ef4016..fcea31b 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -86,7 +86,7 @@ static bool dnbd3_execute_handshake(dnbd3_device_t *dev, struct socket *sock, struct sockaddr_storage *addr, uint16_t *remote_version, bool copy_image_info); static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr, - struct socket *sock); + struct socket *sock, u64 test_start, u32 test_size); static bool dnbd3_send_empty_request(dnbd3_device_t *dev, u16 cmd); @@ -143,6 +143,7 @@ static void dnbd3_start_discover(dnbd3_device_t *dev, bool panic) // Panic freshly turned on dev->panic = true; dev->discover_interval = TIMER_INTERVAL_PROBE_PANIC; + dev->discover_count = 0; } spin_unlock_irqrestore(&dev->blk_lock, irqflags); dnbd3_flag_reset(dev->connection_lock); @@ -192,10 +193,14 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev) { struct socket *sock, *best_sock = NULL; dnbd3_alt_server_t *alt; + struct request *blk_request; struct sockaddr_storage host_compare, best_server; uint16_t remote_version; ktime_t start, end; unsigned long rtt = 0, best_rtt = 0; + u64 test_start = 0; + u32 test_size = RTT_BLOCK_SIZE; + unsigned long irqflags; int i, j, k, isize, fails, rtt_threshold; int do_change = 0; u8 check_order[NUMBER_SERVERS]; @@ -219,6 +224,10 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev) best_server.ss_family = 0; best_rtt = RTT_UNREACHABLE; + if (dev->panic) { + dnbd3_dev_dbg_host(dev, &host_compare, "Discover in panic mode\n"); + } + if (!ready || dev->panic) isize = NUMBER_SERVERS; else @@ -249,6 +258,24 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev) if (!dnbd3_execute_handshake(dev, sock, &host_compare, &remote_version, false)) goto error; + if (dev->panic) { + // In panic mode, use next pending request for testing, this has a higher chance of + // filtering out a server which can't actually handle our requests, instead of just + // requesting the very first block which should be cached by every server. + spin_lock_irqsave(&dev->send_queue_lock, irqflags); + if (!list_empty(&dev->send_queue)) { + blk_request = list_entry(dev->send_queue.next, struct request, queuelist); + test_start = blk_rq_pos(blk_request) << 9; /* sectors to bytes */ + test_size = blk_rq_bytes(blk_request); + } + spin_unlock_irqrestore(&dev->send_queue_lock, irqflags); + } + + // actual rtt measurement is just the first block request and reply + start = ktime_get_real(); + if (!dnbd3_request_test_block(dev, &host_compare, sock, test_start, test_size)) + goto error; + end = ktime_get_real(); // panic mode, take first responding server if (dev->panic) { @@ -259,7 +286,10 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev) // Check global flag, a connect might have been in progress if (best_sock != NULL) sock_release(best_sock); - set_socket_timeout(sock, false, SOCKET_TIMEOUT_RECV * 1000 + 1000); + set_socket_timeout(sock, false, MAX( + SOCKET_TIMEOUT_RECV * 1000, + (int)ktime_ms_delta(end, start) + ) + 1000); if (dnbd3_set_primary_connection(dev, sock, &host_compare, remote_version) != 0) sock_release(sock); dnbd3_flag_reset(dev->connection_lock); @@ -267,12 +297,6 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev) } } - // actual rtt measurement is just the first block requests and reply - start = ktime_get_real(); - if (!dnbd3_request_test_block(dev, &host_compare, sock)) - goto error; - end = ktime_get_real(); - mutex_lock(&dev->alt_servers_lock); if (is_same_server(&dev->alt_servers[i].host, &host_compare)) { dev->alt_servers[i].protocol_version = remote_version; @@ -446,7 +470,9 @@ static void dnbd3_recv_workfn(struct work_struct *work) int remaining; int ret; + dnbd3_dev_dbg_cur(dev, "starting receive worker...\n"); mutex_lock(&dev->recv_mutex); + dnbd3_dev_dbg_cur(dev, "receive worker started\n"); while (dev->sock) { // receive net reply ret = dnbd3_recv_reply(dev->sock, &reply_hdr); @@ -594,6 +620,7 @@ static void dnbd3_recv_workfn(struct work_struct *work) out_unlock: // This will check if we actually still need a new connection dnbd3_start_discover(dev, true); + dnbd3_dev_dbg_cur(dev, "Receive worker exited\n"); mutex_unlock(&dev->recv_mutex); } @@ -623,7 +650,7 @@ static void set_socket_timeout(struct socket *sock, bool set_send, int timeout_m static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, struct socket **sock_out) { ktime_t start; - int ret, connect_time_ms; + int ret, connect_time_ms, diff; struct socket *sock; int retries = 4; const int addrlen = addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) @@ -659,7 +686,7 @@ static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, str if (dev->panic && dev->panic_count > 1) { /* in panic mode for some time, start increasing timeouts */ - connect_time_ms = dev->panic_count * 1000; + connect_time_ms = dev->panic_count * 333; } else { /* otherwise, use 2*RTT of current server */ connect_time_ms = dev->cur_server.rtt * 2 / 1000; @@ -667,21 +694,21 @@ static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, str /* but obey a minimal configurable value, and maximum sanity check */ if (connect_time_ms < SOCKET_TIMEOUT_SEND * 1000) connect_time_ms = SOCKET_TIMEOUT_SEND * 1000; - else if (connect_time_ms > 60000) - connect_time_ms = 60000; + else if (connect_time_ms > 15000) + connect_time_ms = 15000; set_socket_timeout(sock, false, connect_time_ms); // recv set_socket_timeout(sock, true, connect_time_ms); // send start = ktime_get_real(); while (--retries > 0) { ret = kernel_connect(sock, (struct sockaddr *)addr, addrlen, 0); - connect_time_ms = (int)ktime_ms_delta(ktime_get_real(), start); - if (connect_time_ms > 2 * SOCKET_TIMEOUT_SEND * 1000) { + diff = (int)ktime_ms_delta(ktime_get_real(), start); + if (diff > 2 * connect_time_ms) { /* Either I'm losing my mind or there was a specific build of kernel * 5.x where SO_RCVTIMEO didn't affect the connect call above, so * this function would hang for over a minute for unreachable hosts. - * Leave in this debug check for twice the configured timeout + * Leave in this debug check for twice the configured timeout. */ - dnbd3_dev_dbg_host(dev, addr, "connect: call took %dms\n", + dnbd3_dev_err_host(dev, addr, "connect: call took %dms\n", connect_time_ms); } if (ret != 0) { @@ -916,23 +943,24 @@ static bool dnbd3_drain_socket(dnbd3_device_t *dev, struct socket *sock, int byt return true; } -static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr, struct socket *sock) +static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr, + struct socket *sock, u64 test_start, u32 test_size) { dnbd3_reply_t reply_hdr; // Request block - if (!dnbd3_send_request(sock, CMD_GET_BLOCK, 0, 0, RTT_BLOCK_SIZE)) { + if (!dnbd3_send_request(sock, CMD_GET_BLOCK, 0, test_start, test_size)) { dnbd3_err_dbg_host(dev, addr, "requesting test block failed\n"); return false; } - // receive net reply + // receive reply header if (dnbd3_recv_reply(sock, &reply_hdr) != sizeof(reply_hdr)) { dnbd3_err_dbg_host(dev, addr, "receiving test block header packet failed\n"); return false; } if (reply_hdr.magic != dnbd3_packet_magic || reply_hdr.cmd != CMD_GET_BLOCK - || reply_hdr.size != RTT_BLOCK_SIZE || reply_hdr.handle != 0) { + || reply_hdr.size != test_size || reply_hdr.handle != 0) { dnbd3_err_dbg_host(dev, addr, "unexpected reply to block request: cmd=%d, size=%d, handle=%llu (discover)\n", (int)reply_hdr.cmd, (int)reply_hdr.size, reply_hdr.handle); @@ -940,7 +968,7 @@ static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storag } // receive data - return dnbd3_drain_socket(dev, sock, RTT_BLOCK_SIZE); + return dnbd3_drain_socket(dev, sock, test_size); } #undef dnbd3_err_dbg_host |