summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Robra2019-07-14 20:04:45 +0200
committerFrederic Robra2019-07-14 20:04:45 +0200
commit23e585f4af267808c2ed0c69430207d7d7716a5b (patch)
treee89fb196ce9fd45e256acf5477d436dbc090cbb1
parentsplitted receiver (diff)
downloaddnbd3-ng-23e585f4af267808c2ed0c69430207d7d7716a5b.tar.gz
dnbd3-ng-23e585f4af267808c2ed0c69430207d7d7716a5b.tar.xz
dnbd3-ng-23e585f4af267808c2ed0c69430207d7d7716a5b.zip
device now errors if connection get closed
-rw-r--r--src/kernel/core.c30
-rw-r--r--src/kernel/dnbd3.h10
-rw-r--r--src/kernel/net.c131
3 files changed, 106 insertions, 65 deletions
diff --git a/src/kernel/core.c b/src/kernel/core.c
index 8f02900..4704b0d 100644
--- a/src/kernel/core.c
+++ b/src/kernel/core.c
@@ -77,8 +77,10 @@ static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
struct dnbd3_device *dev = cmd->dnbd3;
- struct dnbd3_sock *sock;
+ struct dnbd3_sock *sock = NULL;
int ret = -1;
+ int i;
+ int sock_alive = 0;
printk(KERN_DEBUG "dnbd3: handle request at position %lu and size %d, device %i\n", blk_rq_pos(req), blk_rq_bytes(req), dev->minor);
@@ -89,11 +91,24 @@ static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index)
// return -EINVAL;
// }
- sock = &dev->socks[index];
- if (!sock->sock) {
+ for (i = 0; i < NUMBER_CONNECTIONS; i++) {
+ if (dev->socks[i].sock && dev->socks[i].server) {
+ if (index == sock_alive) {
+ sock = &dev->socks[index];
+ }
+ sock_alive++;
+ }
+ }
+
+ if (!sock) {
printk(KERN_INFO "dnbd3: index is %d but no socket was found\n", index);
dev_err_ratelimited(disk_to_dev(dev->disk), "attempted send on invalid socket\n");
- blk_mq_start_request(req);
+ if (sock_alive > 0) {
+ blk_mq_update_nr_hw_queues(&dev->tag_set, sock_alive);
+ blk_mq_start_request(req);
+ return -EINVAL;
+ }
+ blk_mq_end_request(req, BLK_STS_IOERR);
return -EINVAL;
}
@@ -188,6 +203,7 @@ static enum blk_eh_timer_return dnbd3_xmit_timeout(struct request *req, bool res
dev_err_ratelimited(disk_to_dev(dev->disk), "connection timed out\n");
cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(req);
+ blk_mq_end_request(req, BLK_STS_TIMEOUT);
return BLK_EH_DONE;
}
@@ -292,6 +308,8 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd
kfree(dev->imgname);
dev->imgname = NULL;
}
+ dev->rid = 0;
+ dev->reported_size = 0;
break;
case IOCTL_SWITCH:
@@ -506,6 +524,10 @@ static void dnbd3_dev_remove(struct dnbd3_device *dnbd3)
dnbd3_net_disconnect(dnbd3);
disk->private_data = NULL;
put_disk(disk);
+ if (dnbd3->imgname) {
+ kfree(dnbd3->imgname);
+ dnbd3->imgname = NULL;
+ }
}
mutex_destroy(&dnbd3->device_lock);
}
diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h
index ae00cca..feb09ae 100644
--- a/src/kernel/dnbd3.h
+++ b/src/kernel/dnbd3.h
@@ -50,13 +50,12 @@ struct dnbd3_sock {
struct mutex lock;
struct request *pending;
struct dnbd3_server *server;
- uint32_t heartbeat_count;
uint32_t cookie;
uint8_t panic;//, discover, panic_count;
struct dnbd3_device *device;
- struct work_struct keepalive;
+ struct work_struct keepalive_worker;
struct timer_list keepalive_timer;
- struct work_struct receive;
+ struct work_struct receive_worker;
};
struct dnbd3_device {
@@ -85,8 +84,9 @@ struct dnbd3_device {
uint64_t reported_size;
struct work_struct panic_worker;
- struct work_struct discovery; // if in irq and need to send request
- struct timer_list discovery_timer;
+ struct work_struct discovery_worker; // if in irq and need to send request
+ struct timer_list timer;
+ uint32_t timer_count;
};
diff --git a/src/kernel/net.c b/src/kernel/net.c
index 0b83a43..e4ab608 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -53,7 +53,7 @@ static volatile uint64_t send_wq_signal; //TODO make atomic atomic_64_t
static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *server);
static int __dnbd3_socket_connect(struct dnbd3_server * server, struct dnbd3_sock *sock);
-static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server *server, struct dnbd3_sock *sock, bool wait_for_receiver);
+static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server *server, struct dnbd3_sock *sock);
static void dnbd3_print_host(struct dnbd3_host_t *host, char *msg)
@@ -440,7 +440,7 @@ static int dnbd3_receive_cmd_select_image(struct dnbd3_device *dev, struct dnbd3
}
static void dnbd3_receive_worker(struct work_struct *work)
{
- struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, receive);
+ struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, receive_worker);
struct dnbd3_device *dev = sock->device;
dnbd3_reply_t dnbd3_reply;
uint64_t handle;
@@ -504,6 +504,7 @@ error:
wake_up_interruptible(&send_wq);
if (result == 0) {
printk(KERN_INFO "dnbd3: result is 0, socket seems to be down\n");
+ sock->panic = 1;
// dnbd3_socket_disconnect(dev, NULL, sock, false);//TODO use panic or something or start worker to reconnect?
break; //the socket seems to be down
} else if (result < 0) {
@@ -516,28 +517,36 @@ error:
}
-void dnbd3_keepalive_timer(struct timer_list *arg)
+static void dnbd3_timer(struct timer_list *arg)
{
- struct dnbd3_sock *sock = container_of(arg, struct dnbd3_sock, keepalive_timer);
- queue_work(dnbd3_wq, &sock->keepalive);
- sock->keepalive_timer.expires = KEEPALIVE_TIMER;
- add_timer(&sock->keepalive_timer);
+ struct dnbd3_device *dev = container_of(arg, struct dnbd3_device, timer);
+ int i;
+
+ queue_work(dnbd3_wq, &dev->panic_worker);
+
+ if (dev->timer_count % TIMER_INTERVAL_KEEPALIVE_PACKET == 0) {
+ for (i = 0; i < NUMBER_CONNECTIONS; i++) {
+ if (dev->socks[i].sock && dev->socks[i].server) {
+ queue_work(dnbd3_wq, &dev->socks[i].keepalive_worker);
+ }
+ }
+ }
+ if (dev->timer_count % TIMER_INTERVAL_PROBE_NORMAL == 0) {
+ queue_work(dnbd3_wq, &dev->discovery_worker);
+ }
+
+
+ dev->timer_count++;
+ dev->timer.expires = jiffies + HZ;
+ add_timer(&dev->timer);
}
+
static void dnbd3_keepalive_worker(struct work_struct *work)
{
- struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, keepalive);
+ struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, keepalive_worker);
printk(KERN_DEBUG "dnbd3: starting keepalive worker\n");
dnbd3_send_request_blocking(sock, CMD_KEEPALIVE);
- sock->heartbeat_count++;
-}
-
-static void dnbd3_discovery_timer(struct timer_list *arg)
-{
- struct dnbd3_device *dev = container_of(arg, struct dnbd3_device, discovery_timer);
- queue_work(dnbd3_wq, &dev->discovery);
- dev->discovery_timer.expires = DISCOVERY_TIMER;
- add_timer(&dev->discovery_timer);
}
static struct dnbd3_server *dnbd3_find_best_alt_server(struct dnbd3_device *dev) {
@@ -578,24 +587,30 @@ static void dnbd3_panic_worker(struct work_struct *work)
if (panicked_sock) {
printk(KERN_WARNING "dnbd3: socket %d panicked, connections still alive %d\n", panicked_sock->sock_nr, sock_alive);
panicked_server = panicked_sock->server;
- dnbd3_socket_disconnect(dev, panicked_server, panicked_sock, true);
+ dnbd3_socket_disconnect(dev, panicked_server, panicked_sock);
new_server = dnbd3_find_best_alt_server(dev);
if (new_server != NULL && new_server != panicked_server) {
- printk(KERN_INFO "dnbd3: found replacement server");
+ printk(KERN_INFO "dnbd3: found replacement server\n");
dnbd3_socket_connect(dev, new_server);
} else if (sock_alive > 0) {
printk(KERN_INFO "dnbd3: found no replacement server but still connected to %d servers\n", sock_alive);
} else {
printk(KERN_ERR "dnbd3: could not reconnect to server\n");
}
+ } else if (sock_alive == 0) {
+ new_server = dnbd3_find_best_alt_server(dev);
+ if (new_server != NULL) {
+ printk(KERN_INFO "dnbd3: reconnect to server\n");
+ dnbd3_socket_connect(dev, new_server);
+ }
}
}
static void dnbd3_discovery_worker(struct work_struct *work)
{
- struct dnbd3_device *dev = container_of(work, struct dnbd3_device, discovery);
+ struct dnbd3_device *dev = container_of(work, struct dnbd3_device, discovery_worker);
struct dnbd3_sock *sock = NULL;
int i, j;
struct dnbd3_server *existing_server, *free_server, *failed_server;
@@ -643,10 +658,10 @@ static void dnbd3_discovery_worker(struct work_struct *work)
if (existing_server) {
if (new_server->failures == 1) { // remove is requested
dnbd3_print_host(&existing_server->host, "remove server");
- dnbd3_socket_disconnect(dev, existing_server, NULL, true); // TODO what to do when only one connection?
+ dnbd3_socket_disconnect(dev, existing_server, NULL); // TODO what to do when only one connection?
existing_server->host.type = 0;
}
- existing_server->failures = 0; // reset failure count
+// existing_server->failures = 0; // reset failure count
continue;
} else if (free_server) {
free_server->host = new_server->host;
@@ -682,6 +697,8 @@ static void dnbd3_discovery_worker(struct work_struct *work)
printk(KERN_ERR "dnbd3: kmalloc failed\n");
goto error;
}
+ mutex_init(&sock->lock);
+ mutex_lock(&sock->lock);
// measure rtt for all alt servers
for (i = 0; i < NUMBER_SERVERS; i++) {
existing_server = &dev->alt_servers[i];
@@ -689,8 +706,6 @@ static void dnbd3_discovery_worker(struct work_struct *work)
sock->sock = NULL;
sock->device = dev;
sock->server = existing_server;
-
- init_msghdr(msg);
if (__dnbd3_socket_connect(existing_server, sock)) {
printk(KERN_ERR "dnbd3: socket connect failed in rtt measurement\n");
goto rtt_error;
@@ -700,13 +715,19 @@ static void dnbd3_discovery_worker(struct work_struct *work)
printk(KERN_ERR "dnbd3: request select image failed in rtt measurement\n");
goto rtt_error;
}
+
if (dnbd3_receive_cmd(sock, &dnbd3_reply) <= 0) {
- printk(KERN_ERR "dnbd3: receive cmd failed in rtt measurement %d\n", j);
+ printk(KERN_ERR "dnbd3: receive select image failed in rtt measurement\n");
+ goto rtt_error;
+
+ }
+ if (dnbd3_reply.magic != dnbd3_packet_magic || dnbd3_reply.cmd != CMD_SELECT_IMAGE || dnbd3_reply.size < 4) {
+ printk(KERN_ERR "dnbd3: receive select image wrong header in rtt measurement\n");
goto rtt_error;
}
if (dnbd3_receive_cmd_select_image(dev, sock, &dnbd3_reply) <= 0) {
- printk(KERN_ERR "dnbd3: receive select image failed in rtt measurement %d\n", j);
+ printk(KERN_ERR "dnbd3: receive data select image failed in rtt measurement\n");
goto rtt_error;
}
@@ -767,6 +788,8 @@ rtt_error:
}
}
}
+ mutex_unlock(&sock->lock);
+ mutex_destroy(&sock->lock);
error:
if (buf) {
kfree(buf);
@@ -841,6 +864,7 @@ error:
static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *server)
{
int i;
+ int sock_alive = 0;
int result = -EIO;
struct dnbd3_sock *sock = NULL;
for (i = 0; i < NUMBER_CONNECTIONS; i++) {
@@ -869,8 +893,8 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s
}
// start the receiver
- INIT_WORK(&sock->receive, dnbd3_receive_worker);
- queue_work(dnbd3_wq, &sock->receive);
+ INIT_WORK(&sock->receive_worker, dnbd3_receive_worker);
+ queue_work(dnbd3_wq, &sock->receive_worker);
result = dnbd3_send_request_blocking(sock, CMD_SELECT_IMAGE);
if (result) {
@@ -881,15 +905,14 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s
printk(KERN_DEBUG "dnbd3: connected to image %s, filesize %llu\n", dev->imgname, dev->reported_size);
- // add heartbeat timer and scheduler for the command
- INIT_WORK(&sock->keepalive, dnbd3_keepalive_worker);
- sock->heartbeat_count = 0;
- timer_setup(&sock->keepalive_timer, dnbd3_keepalive_timer, 0);
- sock->keepalive_timer.expires = KEEPALIVE_TIMER;
- add_timer(&sock->keepalive_timer);
+ INIT_WORK(&sock->keepalive_worker, dnbd3_keepalive_worker);
-
-// kfree(req);
+ for (i = 0; i < NUMBER_CONNECTIONS; i++) {
+ if (dev->socks[i].sock && dev->socks[i].server) {
+ sock_alive++;
+ }
+ }
+ blk_mq_update_nr_hw_queues(&dev->tag_set, sock_alive);
return 0;
error:
if (sock->sock) {
@@ -900,7 +923,7 @@ error:
}
-static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server *server, struct dnbd3_sock *sock, bool wait_for_receiver)
+static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server *server, struct dnbd3_sock *sock)
{
int i;
int sock_alive = 0;
@@ -908,7 +931,7 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server
if (sock == NULL && dev->socks[i].server == server) {
sock = &dev->socks[i];
}
- if (dev->socks[i].sock) {
+ if (dev->socks[i].sock && dev->socks[i].server) {
sock_alive++;
}
}
@@ -916,15 +939,16 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server
printk(KERN_WARNING "dnbd3: could not find socket to disconnect\n");
return -EIO;
}
+ blk_mq_update_nr_hw_queues(&dev->tag_set, sock_alive - 1);
if (sock_alive <= 1) {
printk(KERN_INFO "dnbd3: shutting down last socket and stopping discovery\n");
- del_timer_sync(&dev->discovery_timer);
- cancel_work_sync(&dev->discovery);
+ del_timer_sync(&dev->timer);
+ dev->timer_count = 0;
+ cancel_work_sync(&dev->discovery_worker);
+// cancel_work_sync(&dev->panic_worker); // do not wait for panic_worker, probably we are called from panic_worker
}
- printk(KERN_DEBUG "dnbd3: stopping keepalive\n");
- del_timer_sync(&sock->keepalive_timer);
- cancel_work_sync(&sock->keepalive);
+ cancel_work_sync(&sock->keepalive_worker);
printk(KERN_DEBUG "dnbd3: socket disconnect device %i\n", dev->minor);
mutex_lock(&sock->lock);
@@ -945,17 +969,14 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server
mutex_unlock(&sock->lock);
mutex_destroy(&sock->lock);
- if (wait_for_receiver) {
- printk(KERN_DEBUG "dnbd3: cancel receiver work device %i\n", dev->minor);
- cancel_work_sync(&sock->receive);
- }
+ printk(KERN_DEBUG "dnbd3: cancel receiver work device %i\n", dev->minor);
+ cancel_work_sync(&sock->receive_worker);
if (sock->sock) {
sock_release(sock->sock);
sock->sock = NULL;
}
sock->panic = 0;
- sock->heartbeat_count = 0;
return 0;
}
@@ -966,7 +987,7 @@ int dnbd3_net_disconnect(struct dnbd3_device *dev)
for (i = 0; i < NUMBER_CONNECTIONS; i++) {
if (dev->socks[i].sock) {
- if (dnbd3_socket_disconnect(dev, NULL, &dev->socks[i], true)) {
+ if (dnbd3_socket_disconnect(dev, NULL, &dev->socks[i])) {
result = -EIO;
}
}
@@ -982,13 +1003,11 @@ int dnbd3_net_connect(struct dnbd3_device *dev)
if (dnbd3_socket_connect(dev, &dev->alt_servers[0]) == 0) {
dnbd3_print_server_list(dev);
- INIT_WORK(&dev->discovery, dnbd3_discovery_worker);
- timer_setup(&dev->discovery_timer, dnbd3_discovery_timer, 0);
- dev->discovery_timer.expires = DISCOVERY_TIMER;
- add_timer(&dev->discovery_timer);
-
- // let it discover alt servers
- queue_work(dnbd3_wq, &dev->discovery);
+ INIT_WORK(&dev->discovery_worker, dnbd3_discovery_worker);
+ INIT_WORK(&dev->panic_worker, dnbd3_panic_worker);
+ timer_setup(&dev->timer, dnbd3_timer, 0);
+ dev->timer.expires = jiffies + HZ;
+ add_timer(&dev->timer);
result = 0;
} else {