From f0eb94d1ef693651f62256ffb2c0201c6b2977cb Mon Sep 17 00:00:00 2001 From: Frederic Robra Date: Tue, 27 Aug 2019 12:48:58 +0200 Subject: removed bug in timeout where look was not released --- src/kernel/net.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 9 deletions(-) (limited to 'src/kernel/net.c') diff --git a/src/kernel/net.c b/src/kernel/net.c index 07f350e..79c860f 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -292,6 +292,7 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, void *kaddr; uint32_t tag, cookie; uint16_t hwq; + uint32_t remaining = reply->size; int result = 0; uint64_t handle; dnbd3_init_msghdr(msg); @@ -306,18 +307,23 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, blk_mq_unique_tag_to_tag(tag)); } if (!req || !blk_mq_request_started(req)) { - dev_err(disk_to_dev(dev->disk), "unexpected reply (%d) %p\n", - tag, req); - return -EIO; + error_sock(sock, "unexpected reply (%d) %p", tag, req); + if (req) { + debug_sock(sock, "requeue request"); + dnbd3_requeue_cmd(blk_mq_rq_to_pdu(req)); + } +// return -EIO; + goto clear_socket; } cmd = blk_mq_rq_to_pdu(req); mutex_lock(&cmd->lock); if (cmd->cookie != cookie) { - dev_err(disk_to_dev(dev->disk), "double reply on req %p, cookie %u, handle cookie %u\n", + error_sock(sock, "double reply on req %p, cookie %u, handle cookie %u", req, cmd->cookie, cookie); mutex_unlock(&cmd->lock); - return -EIO; +// return -EIO; + goto clear_socket; } rq_for_each_segment(bvec_inst, req, iter) { @@ -329,13 +335,18 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, iov.iov_len = bvec->bv_len; result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, msg.msg_flags); + remaining -= result; if (result != bvec->bv_len) { kunmap(bvec->bv_page); sigprocmask(SIG_SETMASK, &oldset, NULL ); error_sock(sock, "could not receive from net to block layer"); dnbd3_requeue_cmd(cmd); mutex_unlock(&cmd->lock); - return -EIO; + if (result >= 0) { + goto clear_socket; + } else { + return result; + } } kunmap(bvec->bv_page); @@ -344,6 +355,32 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, mutex_unlock(&cmd->lock); dnbd3_end_cmd(cmd, 0); return result; +clear_socket: + warn_sock(sock, "caught an error while receiving block, clearing buffer"); + char *buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL); + if (!buf) { + error_sock(sock, "kmalloc failed"); + return -EIO; + } + + iov.iov_base = buf; + iov.iov_len = RTT_BLOCK_SIZE; + while (remaining > 0) { + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); + if (result <= 0) { + goto error; + } + remaining -= result; + } + + debug_sock(sock, "cleared buffer %d bytes, reply size is %d", result, + reply->size); +error: + if (buf) { + kfree(buf); + } + return -EIO; } @@ -449,6 +486,7 @@ consume_payload: mutex_unlock(&dev->device_lock); return result; } + remaining -= result; } mutex_unlock(&dev->device_lock); return result; @@ -684,7 +722,7 @@ error: /* discovery takes care of to many failures */ sock->server->failures++; warn_sock(sock, "receive error happened %d, total failures %d", - result, sock->server->failures); + result, sock->server->failures); } debug_sock(sock, "receive completed, waiting for next receive"); } @@ -749,6 +787,34 @@ static struct dnbd3_server **dnbd3_sort_server(struct dnbd3_device *dev) { return sorted_servers; } +static int dnbd3_panic_connect(struct dnbd3_device *dev) +{ + struct dnbd3_server *working = NULL; + int i; + for (i = 0; i < NUMBER_CONNECTIONS; i++) { + if (dnbd3_is_sock_alive(dev->socks[i])) { + working = dev->socks[i].server; + } + } + if (working == NULL) { + for (i = 0; i < NUMBER_SERVERS; i++) { + if (!dnbd3_socket_connect(&dev->socks[0], + &dev->alt_servers[i])) { + working = &dev->alt_servers[i]; + } + } + } + if (working == NULL) { + return -EIO; + } + for (i = 0; i < NUMBER_CONNECTIONS; i++) { + if (dev->socks[i].server != working) { + dnbd3_socket_connect(&dev->socks[i], working); + } + } + return 0; +} + /** * dnbd3_adjust_connections - create a connection plan and connect * @dev: the dnbd3 device @@ -842,7 +908,13 @@ static void dnbd3_panic_worker(struct work_struct *work) sock_alive); mutex_lock(&dev->device_lock); - dnbd3_adjust_connections(dev); + if (dnbd3_adjust_connections(dev)) { + if (dnbd3_panic_connect(dev)) { + error_dev(dev, "failed to connect to any server"); + dev->connected = false; + } + + } mutex_unlock(&dev->device_lock); } } @@ -1043,7 +1115,13 @@ static void dnbd3_discovery_worker(struct work_struct *work) mutex_lock(&dev->device_lock); - dnbd3_adjust_connections(dev); + if (dnbd3_adjust_connections(dev)) { + if (dnbd3_panic_connect(dev)) { + error_dev(dev, "failed to connect to any server"); + dev->connected = false; + } + + } mutex_unlock(&dev->device_lock); dev->discovery_count++; @@ -1283,6 +1361,7 @@ int dnbd3_net_connect(struct dnbd3_device *dev) dnbd3_net_disconnect(dev); return -ENOENT; } + dev->connected = true; debug_dev(dev, "connected, starting workers"); INIT_WORK(&dev->discovery_worker, dnbd3_discovery_worker); @@ -1323,6 +1402,7 @@ int dnbd3_net_disconnect(struct dnbd3_device *dev) } } } + dev->connected = false; return result; } -- cgit v1.2.3-55-g7522