summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Robra2019-08-27 12:48:58 +0200
committerFrederic Robra2019-08-27 12:48:58 +0200
commitf0eb94d1ef693651f62256ffb2c0201c6b2977cb (patch)
treeb1e69eeff33f37a1dc024de5299d212a11ccde95
parentmoved mq part to new file (diff)
downloaddnbd3-ng-f0eb94d1ef693651f62256ffb2c0201c6b2977cb.tar.gz
dnbd3-ng-f0eb94d1ef693651f62256ffb2c0201c6b2977cb.tar.xz
dnbd3-ng-f0eb94d1ef693651f62256ffb2c0201c6b2977cb.zip
removed bug in timeout where look was not released
-rw-r--r--src/kernel/core.c4
-rw-r--r--src/kernel/dnbd3.h1
-rw-r--r--src/kernel/mq.c20
-rw-r--r--src/kernel/net.c98
4 files changed, 104 insertions, 19 deletions
diff --git a/src/kernel/core.c b/src/kernel/core.c
index 19f4bac..6e5b82b 100644
--- a/src/kernel/core.c
+++ b/src/kernel/core.c
@@ -275,8 +275,8 @@ int dnbd3_add_device(struct dnbd3_device *dev, int minor)
dev->tag_set.queue_depth = 128;
dev->tag_set.numa_node = NUMA_NO_NODE;
dev->tag_set.cmd_size = sizeof(struct dnbd3_cmd);
- dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
- BLK_MQ_F_SG_MERGE; // | BLK_MQ_F_BLOCKING;
+ dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE |
+ BLK_MQ_F_BLOCKING;
dev->tag_set.driver_data = dev;
err = blk_mq_alloc_tag_set(&dev->tag_set);
diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h
index 1cf302f..4a2302d 100644
--- a/src/kernel/dnbd3.h
+++ b/src/kernel/dnbd3.h
@@ -166,6 +166,7 @@ struct dnbd3_device {
struct mutex device_lock;
+ bool connected;
struct dnbd3_sock socks[NUMBER_CONNECTIONS];
char *imgname;
struct dnbd3_server initial_server;
diff --git a/src/kernel/mq.c b/src/kernel/mq.c
index 0a99817..946e395 100644
--- a/src/kernel/mq.c
+++ b/src/kernel/mq.c
@@ -36,6 +36,7 @@ static void dnbd3_busy_iter(struct request *req, void *priv, bool arg2)
*is_busy = true;
}
+
/**
* dnbd3_is_mq_busy - check if mq is busy
* @dev: the device
@@ -98,7 +99,8 @@ void dnbd3_end_cmd(struct dnbd3_cmd *cmd, blk_status_t error)
static bool dnbd3_is_any_sock_alive(struct dnbd3_cmd *cmd) {
int i;
for (i = 0; i < NUMBER_CONNECTIONS; i++) {
- if (dnbd3_is_sock_alive(cmd->dnbd3->socks[i])) {
+ if (dnbd3_is_sock_alive(cmd->dnbd3->socks[i]) &&
+ !cmd->dnbd3->socks->panic) {
return true;
}
}
@@ -122,16 +124,16 @@ static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index)
if (!(sock->server && sock->sock && !sock->panic)) {
- warn_dev(dev, "attempted send on invalid socket %d", index);
+// warn_dev(dev, "attempted send on invalid socket %d", index);
// msleep(SOCKET_TIMEOUT_CLIENT_DATA * 1000);
- if (dnbd3_is_any_sock_alive(cmd)) {
- info_dev(dev, "reset request to new socket");
+ if (dev->connected) {
+// info_dev(dev, "reset request to new socket");
dnbd3_requeue_cmd(cmd);
ret = 0;
goto out;
} else {
- error_dev(dev, "ending request, no socket found");
+ error_dev(dev, "ending request, device not connected");
dnbd3_end_cmd(cmd, BLK_STS_IOERR);
ret = -EIO;
goto out;
@@ -218,22 +220,24 @@ static enum blk_eh_timer_return dnbd3_xmit_timeout(struct request *req,
{
struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(req);
struct dnbd3_device *dev = cmd->dnbd3;
- warn_dev(dev, "received timeout");
+ warn_dev(dev, "request timed out");
if (!mutex_trylock(&cmd->lock)) {
return BLK_EH_RESET_TIMER;
}
- if (dnbd3_is_any_sock_alive(cmd)) {
+ if (dev->connected) {
info_dev(dev, "reset request to new socket");
dnbd3_requeue_cmd(cmd);
+ mutex_unlock(&cmd->lock);
return BLK_EH_DONE;
}
- dev_err_ratelimited(disk_to_dev(dev->disk), "connection timed out\n");
+ error_dev(dev, "connection timed out");
cmd->status = BLK_STS_IOERR;
// blk_mq_complete_request(req);
dnbd3_end_cmd(cmd, BLK_STS_TIMEOUT);
+ mutex_unlock(&cmd->lock);
return BLK_EH_DONE;
}
diff --git a/src/kernel/net.c b/src/kernel/net.c
index 07f350e..79c860f 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -292,6 +292,7 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock,
void *kaddr;
uint32_t tag, cookie;
uint16_t hwq;
+ uint32_t remaining = reply->size;
int result = 0;
uint64_t handle;
dnbd3_init_msghdr(msg);
@@ -306,18 +307,23 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock,
blk_mq_unique_tag_to_tag(tag));
}
if (!req || !blk_mq_request_started(req)) {
- dev_err(disk_to_dev(dev->disk), "unexpected reply (%d) %p\n",
- tag, req);
- return -EIO;
+ error_sock(sock, "unexpected reply (%d) %p", tag, req);
+ if (req) {
+ debug_sock(sock, "requeue request");
+ dnbd3_requeue_cmd(blk_mq_rq_to_pdu(req));
+ }
+// return -EIO;
+ goto clear_socket;
}
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
if (cmd->cookie != cookie) {
- dev_err(disk_to_dev(dev->disk), "double reply on req %p, cookie %u, handle cookie %u\n",
+ error_sock(sock, "double reply on req %p, cookie %u, handle cookie %u",
req, cmd->cookie, cookie);
mutex_unlock(&cmd->lock);
- return -EIO;
+// return -EIO;
+ goto clear_socket;
}
rq_for_each_segment(bvec_inst, req, iter) {
@@ -329,13 +335,18 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock,
iov.iov_len = bvec->bv_len;
result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len,
msg.msg_flags);
+ remaining -= result;
if (result != bvec->bv_len) {
kunmap(bvec->bv_page);
sigprocmask(SIG_SETMASK, &oldset, NULL );
error_sock(sock, "could not receive from net to block layer");
dnbd3_requeue_cmd(cmd);
mutex_unlock(&cmd->lock);
- return -EIO;
+ if (result >= 0) {
+ goto clear_socket;
+ } else {
+ return result;
+ }
}
kunmap(bvec->bv_page);
@@ -344,6 +355,32 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock,
mutex_unlock(&cmd->lock);
dnbd3_end_cmd(cmd, 0);
return result;
+clear_socket:
+ warn_sock(sock, "caught an error while receiving block, clearing buffer");
+ char *buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL);
+ if (!buf) {
+ error_sock(sock, "kmalloc failed");
+ return -EIO;
+ }
+
+ iov.iov_base = buf;
+ iov.iov_len = RTT_BLOCK_SIZE;
+ while (remaining > 0) {
+ result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
+ msg.msg_flags);
+ if (result <= 0) {
+ goto error;
+ }
+ remaining -= result;
+ }
+
+ debug_sock(sock, "cleared buffer %d bytes, reply size is %d", result,
+ reply->size);
+error:
+ if (buf) {
+ kfree(buf);
+ }
+ return -EIO;
}
@@ -449,6 +486,7 @@ consume_payload:
mutex_unlock(&dev->device_lock);
return result;
}
+ remaining -= result;
}
mutex_unlock(&dev->device_lock);
return result;
@@ -684,7 +722,7 @@ error:
/* discovery takes care of to many failures */
sock->server->failures++;
warn_sock(sock, "receive error happened %d, total failures %d",
- result, sock->server->failures);
+ result, sock->server->failures);
}
debug_sock(sock, "receive completed, waiting for next receive");
}
@@ -749,6 +787,34 @@ static struct dnbd3_server **dnbd3_sort_server(struct dnbd3_device *dev) {
return sorted_servers;
}
+static int dnbd3_panic_connect(struct dnbd3_device *dev)
+{
+ struct dnbd3_server *working = NULL;
+ int i;
+ for (i = 0; i < NUMBER_CONNECTIONS; i++) {
+ if (dnbd3_is_sock_alive(dev->socks[i])) {
+ working = dev->socks[i].server;
+ }
+ }
+ if (working == NULL) {
+ for (i = 0; i < NUMBER_SERVERS; i++) {
+ if (!dnbd3_socket_connect(&dev->socks[0],
+ &dev->alt_servers[i])) {
+ working = &dev->alt_servers[i];
+ }
+ }
+ }
+ if (working == NULL) {
+ return -EIO;
+ }
+ for (i = 0; i < NUMBER_CONNECTIONS; i++) {
+ if (dev->socks[i].server != working) {
+ dnbd3_socket_connect(&dev->socks[i], working);
+ }
+ }
+ return 0;
+}
+
/**
* dnbd3_adjust_connections - create a connection plan and connect
* @dev: the dnbd3 device
@@ -842,7 +908,13 @@ static void dnbd3_panic_worker(struct work_struct *work)
sock_alive);
mutex_lock(&dev->device_lock);
- dnbd3_adjust_connections(dev);
+ if (dnbd3_adjust_connections(dev)) {
+ if (dnbd3_panic_connect(dev)) {
+ error_dev(dev, "failed to connect to any server");
+ dev->connected = false;
+ }
+
+ }
mutex_unlock(&dev->device_lock);
}
}
@@ -1043,7 +1115,13 @@ static void dnbd3_discovery_worker(struct work_struct *work)
mutex_lock(&dev->device_lock);
- dnbd3_adjust_connections(dev);
+ if (dnbd3_adjust_connections(dev)) {
+ if (dnbd3_panic_connect(dev)) {
+ error_dev(dev, "failed to connect to any server");
+ dev->connected = false;
+ }
+
+ }
mutex_unlock(&dev->device_lock);
dev->discovery_count++;
@@ -1283,6 +1361,7 @@ int dnbd3_net_connect(struct dnbd3_device *dev)
dnbd3_net_disconnect(dev);
return -ENOENT;
}
+ dev->connected = true;
debug_dev(dev, "connected, starting workers");
INIT_WORK(&dev->discovery_worker, dnbd3_discovery_worker);
@@ -1323,6 +1402,7 @@ int dnbd3_net_disconnect(struct dnbd3_device *dev)
}
}
}
+ dev->connected = false;
return result;
}