From f0eb94d1ef693651f62256ffb2c0201c6b2977cb Mon Sep 17 00:00:00 2001 From: Frederic Robra Date: Tue, 27 Aug 2019 12:48:58 +0200 Subject: removed bug in timeout where look was not released --- src/kernel/core.c | 4 +-- src/kernel/dnbd3.h | 1 + src/kernel/mq.c | 20 ++++++----- src/kernel/net.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 104 insertions(+), 19 deletions(-) diff --git a/src/kernel/core.c b/src/kernel/core.c index 19f4bac..6e5b82b 100644 --- a/src/kernel/core.c +++ b/src/kernel/core.c @@ -275,8 +275,8 @@ int dnbd3_add_device(struct dnbd3_device *dev, int minor) dev->tag_set.queue_depth = 128; dev->tag_set.numa_node = NUMA_NO_NODE; dev->tag_set.cmd_size = sizeof(struct dnbd3_cmd); - dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | - BLK_MQ_F_SG_MERGE; // | BLK_MQ_F_BLOCKING; + dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE | + BLK_MQ_F_BLOCKING; dev->tag_set.driver_data = dev; err = blk_mq_alloc_tag_set(&dev->tag_set); diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h index 1cf302f..4a2302d 100644 --- a/src/kernel/dnbd3.h +++ b/src/kernel/dnbd3.h @@ -166,6 +166,7 @@ struct dnbd3_device { struct mutex device_lock; + bool connected; struct dnbd3_sock socks[NUMBER_CONNECTIONS]; char *imgname; struct dnbd3_server initial_server; diff --git a/src/kernel/mq.c b/src/kernel/mq.c index 0a99817..946e395 100644 --- a/src/kernel/mq.c +++ b/src/kernel/mq.c @@ -36,6 +36,7 @@ static void dnbd3_busy_iter(struct request *req, void *priv, bool arg2) *is_busy = true; } + /** * dnbd3_is_mq_busy - check if mq is busy * @dev: the device @@ -98,7 +99,8 @@ void dnbd3_end_cmd(struct dnbd3_cmd *cmd, blk_status_t error) static bool dnbd3_is_any_sock_alive(struct dnbd3_cmd *cmd) { int i; for (i = 0; i < NUMBER_CONNECTIONS; i++) { - if (dnbd3_is_sock_alive(cmd->dnbd3->socks[i])) { + if (dnbd3_is_sock_alive(cmd->dnbd3->socks[i]) && + !cmd->dnbd3->socks->panic) { return true; } } @@ -122,16 +124,16 @@ static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index) if (!(sock->server && sock->sock && !sock->panic)) { - warn_dev(dev, "attempted send on invalid socket %d", index); +// warn_dev(dev, "attempted send on invalid socket %d", index); // msleep(SOCKET_TIMEOUT_CLIENT_DATA * 1000); - if (dnbd3_is_any_sock_alive(cmd)) { - info_dev(dev, "reset request to new socket"); + if (dev->connected) { +// info_dev(dev, "reset request to new socket"); dnbd3_requeue_cmd(cmd); ret = 0; goto out; } else { - error_dev(dev, "ending request, no socket found"); + error_dev(dev, "ending request, device not connected"); dnbd3_end_cmd(cmd, BLK_STS_IOERR); ret = -EIO; goto out; @@ -218,22 +220,24 @@ static enum blk_eh_timer_return dnbd3_xmit_timeout(struct request *req, { struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(req); struct dnbd3_device *dev = cmd->dnbd3; - warn_dev(dev, "received timeout"); + warn_dev(dev, "request timed out"); if (!mutex_trylock(&cmd->lock)) { return BLK_EH_RESET_TIMER; } - if (dnbd3_is_any_sock_alive(cmd)) { + if (dev->connected) { info_dev(dev, "reset request to new socket"); dnbd3_requeue_cmd(cmd); + mutex_unlock(&cmd->lock); return BLK_EH_DONE; } - dev_err_ratelimited(disk_to_dev(dev->disk), "connection timed out\n"); + error_dev(dev, "connection timed out"); cmd->status = BLK_STS_IOERR; // blk_mq_complete_request(req); dnbd3_end_cmd(cmd, BLK_STS_TIMEOUT); + mutex_unlock(&cmd->lock); return BLK_EH_DONE; } diff --git a/src/kernel/net.c b/src/kernel/net.c index 07f350e..79c860f 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -292,6 +292,7 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, void *kaddr; uint32_t tag, cookie; uint16_t hwq; + uint32_t remaining = reply->size; int result = 0; uint64_t handle; dnbd3_init_msghdr(msg); @@ -306,18 +307,23 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, blk_mq_unique_tag_to_tag(tag)); } if (!req || !blk_mq_request_started(req)) { - dev_err(disk_to_dev(dev->disk), "unexpected reply (%d) %p\n", - tag, req); - return -EIO; + error_sock(sock, "unexpected reply (%d) %p", tag, req); + if (req) { + debug_sock(sock, "requeue request"); + dnbd3_requeue_cmd(blk_mq_rq_to_pdu(req)); + } +// return -EIO; + goto clear_socket; } cmd = blk_mq_rq_to_pdu(req); mutex_lock(&cmd->lock); if (cmd->cookie != cookie) { - dev_err(disk_to_dev(dev->disk), "double reply on req %p, cookie %u, handle cookie %u\n", + error_sock(sock, "double reply on req %p, cookie %u, handle cookie %u", req, cmd->cookie, cookie); mutex_unlock(&cmd->lock); - return -EIO; +// return -EIO; + goto clear_socket; } rq_for_each_segment(bvec_inst, req, iter) { @@ -329,13 +335,18 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, iov.iov_len = bvec->bv_len; result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, msg.msg_flags); + remaining -= result; if (result != bvec->bv_len) { kunmap(bvec->bv_page); sigprocmask(SIG_SETMASK, &oldset, NULL ); error_sock(sock, "could not receive from net to block layer"); dnbd3_requeue_cmd(cmd); mutex_unlock(&cmd->lock); - return -EIO; + if (result >= 0) { + goto clear_socket; + } else { + return result; + } } kunmap(bvec->bv_page); @@ -344,6 +355,32 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, mutex_unlock(&cmd->lock); dnbd3_end_cmd(cmd, 0); return result; +clear_socket: + warn_sock(sock, "caught an error while receiving block, clearing buffer"); + char *buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL); + if (!buf) { + error_sock(sock, "kmalloc failed"); + return -EIO; + } + + iov.iov_base = buf; + iov.iov_len = RTT_BLOCK_SIZE; + while (remaining > 0) { + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); + if (result <= 0) { + goto error; + } + remaining -= result; + } + + debug_sock(sock, "cleared buffer %d bytes, reply size is %d", result, + reply->size); +error: + if (buf) { + kfree(buf); + } + return -EIO; } @@ -449,6 +486,7 @@ consume_payload: mutex_unlock(&dev->device_lock); return result; } + remaining -= result; } mutex_unlock(&dev->device_lock); return result; @@ -684,7 +722,7 @@ error: /* discovery takes care of to many failures */ sock->server->failures++; warn_sock(sock, "receive error happened %d, total failures %d", - result, sock->server->failures); + result, sock->server->failures); } debug_sock(sock, "receive completed, waiting for next receive"); } @@ -749,6 +787,34 @@ static struct dnbd3_server **dnbd3_sort_server(struct dnbd3_device *dev) { return sorted_servers; } +static int dnbd3_panic_connect(struct dnbd3_device *dev) +{ + struct dnbd3_server *working = NULL; + int i; + for (i = 0; i < NUMBER_CONNECTIONS; i++) { + if (dnbd3_is_sock_alive(dev->socks[i])) { + working = dev->socks[i].server; + } + } + if (working == NULL) { + for (i = 0; i < NUMBER_SERVERS; i++) { + if (!dnbd3_socket_connect(&dev->socks[0], + &dev->alt_servers[i])) { + working = &dev->alt_servers[i]; + } + } + } + if (working == NULL) { + return -EIO; + } + for (i = 0; i < NUMBER_CONNECTIONS; i++) { + if (dev->socks[i].server != working) { + dnbd3_socket_connect(&dev->socks[i], working); + } + } + return 0; +} + /** * dnbd3_adjust_connections - create a connection plan and connect * @dev: the dnbd3 device @@ -842,7 +908,13 @@ static void dnbd3_panic_worker(struct work_struct *work) sock_alive); mutex_lock(&dev->device_lock); - dnbd3_adjust_connections(dev); + if (dnbd3_adjust_connections(dev)) { + if (dnbd3_panic_connect(dev)) { + error_dev(dev, "failed to connect to any server"); + dev->connected = false; + } + + } mutex_unlock(&dev->device_lock); } } @@ -1043,7 +1115,13 @@ static void dnbd3_discovery_worker(struct work_struct *work) mutex_lock(&dev->device_lock); - dnbd3_adjust_connections(dev); + if (dnbd3_adjust_connections(dev)) { + if (dnbd3_panic_connect(dev)) { + error_dev(dev, "failed to connect to any server"); + dev->connected = false; + } + + } mutex_unlock(&dev->device_lock); dev->discovery_count++; @@ -1283,6 +1361,7 @@ int dnbd3_net_connect(struct dnbd3_device *dev) dnbd3_net_disconnect(dev); return -ENOENT; } + dev->connected = true; debug_dev(dev, "connected, starting workers"); INIT_WORK(&dev->discovery_worker, dnbd3_discovery_worker); @@ -1323,6 +1402,7 @@ int dnbd3_net_disconnect(struct dnbd3_device *dev) } } } + dev->connected = false; return result; } -- cgit v1.2.3-55-g7522