summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Robra2019-07-17 15:24:49 +0200
committerFrederic Robra2019-07-17 15:24:49 +0200
commit278fcb346fa53d94ad018cdd2f80dfd8b99335c2 (patch)
tree1e59a5dc51a5827f624133e56aec28264123bd2f
parentdiscovery now connects to new slots (diff)
downloaddnbd3-ng-278fcb346fa53d94ad018cdd2f80dfd8b99335c2.tar.gz
dnbd3-ng-278fcb346fa53d94ad018cdd2f80dfd8b99335c2.tar.xz
dnbd3-ng-278fcb346fa53d94ad018cdd2f80dfd8b99335c2.zip
fixed various bugs
-rw-r--r--src/kernel/core.c39
-rw-r--r--src/kernel/dnbd3.h67
-rw-r--r--src/kernel/net.c476
3 files changed, 328 insertions, 254 deletions
diff --git a/src/kernel/core.c b/src/kernel/core.c
index 9d930cb..bf316d4 100644
--- a/src/kernel/core.c
+++ b/src/kernel/core.c
@@ -67,8 +67,8 @@ int major;
static void dnbd3_requeue_cmd(struct dnbd3_cmd *cmd)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
-
- if (!test_and_set_bit(DNBD3_CMD_REQUEUED, &cmd->flags)) {
+ if (!cmd->requed) {
+ cmd->requed = true;
blk_mq_requeue_request(req, true);
}
}
@@ -115,9 +115,9 @@ static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index)
cmd->status = BLK_STS_OK;
- mutex_lock(&sock->lock);
+ mutex_lock(&sock->tx_lock);
if (unlikely(!sock->sock)) {
- mutex_unlock(&sock->lock);
+ mutex_unlock(&sock->tx_lock);
warn_sock(sock, "not connected");
return -EIO;
}
@@ -136,7 +136,7 @@ static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index)
ret = 0;
}
out:
- mutex_unlock(&sock->lock);
+ mutex_unlock(&sock->tx_lock);
return ret;
}
@@ -144,18 +144,14 @@ static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_
{
struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
int ret;
- struct dnbd3_device *dev = cmd->dnbd3;
-
- debug_dev(dev, "queue request");
mutex_lock(&cmd->lock);
- clear_bit(DNBD3_CMD_REQUEUED, &cmd->flags);
-
+ cmd->requed = false;
ret = dnbd3_handle_cmd(cmd, hctx->queue_num);
if (ret < 0) {
ret = BLK_STS_IOERR;
- } else if (!ret) {
+ } else if (ret >= 0) {
ret = BLK_STS_OK;
}
mutex_unlock(&cmd->lock);
@@ -167,7 +163,7 @@ static int dnbd3_init_request(struct blk_mq_tag_set *set, struct request *rq, un
{
struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(rq);
cmd->dnbd3 = set->driver_data;
- cmd->flags = 0;
+ cmd->requed = false;
mutex_init(&cmd->lock);
return 0;
}
@@ -482,25 +478,26 @@ static int dnbd3_exit_cb(int id, void *ptr, void *data)
return 0;
}
-static void dnbd3_dev_remove(struct dnbd3_device *dnbd3)
+static void dnbd3_dev_remove(struct dnbd3_device *dev)
{
- struct gendisk *disk = dnbd3->disk;
+ struct gendisk *disk = dev->disk;
struct request_queue *q;
+ dnbd3_net_disconnect(dev);
+
if (disk) {
q = disk->queue;
del_gendisk(disk);
blk_cleanup_queue(q);
- blk_mq_free_tag_set(&dnbd3->tag_set);
- dnbd3_net_disconnect(dnbd3);
+ blk_mq_free_tag_set(&dev->tag_set);
disk->private_data = NULL;
put_disk(disk);
- if (dnbd3->imgname) {
- kfree(dnbd3->imgname);
- dnbd3->imgname = NULL;
- }
}
- mutex_destroy(&dnbd3->device_lock);
+ if (dev->imgname) {
+ kfree(dev->imgname);
+ dev->imgname = NULL;
+ }
+ mutex_destroy(&dev->device_lock);
}
static void dnbd3_put(struct dnbd3_device *dnbd3)
diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h
index 43b923f..c9f9fab 100644
--- a/src/kernel/dnbd3.h
+++ b/src/kernel/dnbd3.h
@@ -31,32 +31,49 @@
#include "serialize.h"
-#define NUMBER_CONNECTIONS 4 // power of 2
+#define NUMBER_CONNECTIONS 2 // power of 2
#define DEBUG
extern struct workqueue_struct *dnbd3_wq;
+/**
+ * struct dnbd3_server - defining a server for dnbd3
+ * @host: host of this server
+ * @rtts: last four round trip time measurements in microseconds
+ * @protocol_version: dnbd3 protocol version of this server
+ * @failures: how many times the server was unreachable
+ */
struct dnbd3_server {
dnbd3_host_t host;
- uint64_t rtts[4]; // Last four round trip time measurements in microsecond
- uint16_t protocol_version; // dnbd3 protocol version of this server
- uint8_t failures; // How many times the server was unreachable
+ uint64_t rtts[4];
+ uint16_t protocol_version;
+ uint8_t failures;
};
-
+/**
+ * struct dnbd3_sock - defining a socket for dnbd3
+ * @sock_nr: nr of this socket
+ * @device: the dnbd3_device this socket belongs to
+ * @server: the server this socket is connected to, 'NULL' if not connected
+ *
+ */
struct dnbd3_sock {
- uint16_t sock_nr;
- struct socket *sock;
- struct mutex lock;
- struct request *pending;
+ uint8_t sock_nr;
+ struct dnbd3_device *device;
struct dnbd3_server *server;
+
+ struct mutex tx_lock;
+ struct socket *sock;
+
+ bool panic;//, discover, panic_count;
uint32_t cookie;
- uint8_t panic;//, discover, panic_count;
- struct dnbd3_device *device;
+
struct work_struct keepalive_worker;
- struct timer_list keepalive_timer;
struct work_struct receive_worker;
+
+ struct request *pending;
+ uint16_t receive_command;
};
struct dnbd3_device {
@@ -79,13 +96,13 @@ struct dnbd3_device {
struct dnbd3_server alt_servers[NUMBER_SERVERS]; // array of alt servers
int new_servers_num; // number of new alt servers that are waiting to be copied to above array
dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers
- uint8_t update_available;
- uint8_t use_server_provided_alts;
+ bool update_available;
+ bool use_server_provided_alts;
uint16_t rid;
uint64_t reported_size;
struct work_struct panic_worker;
- struct work_struct discovery_worker; // if in irq and need to send request
+ struct work_struct discovery_worker;
uint8_t discovery_count;
struct timer_list timer;
uint8_t timer_count;
@@ -95,11 +112,9 @@ struct dnbd3_device {
struct dnbd3_cmd {
struct dnbd3_device *dnbd3;
struct mutex lock;
-// int index;
uint32_t cookie;
blk_status_t status;
- unsigned long flags;
-// uint32_t cmd_cookie;
+ bool requed;
};
@@ -112,7 +127,7 @@ struct dnbd3_cmd {
} \
} while (0)
-#define print_server(level, dev, server, fmt, ...) \
+#define _print_server(level, dev, server, fmt, ...) \
do { \
if ((server)->host.type == HOST_IP4) { \
printk(level "dnbd%d: " fmt " %pI4:%d\n", (dev)->minor, ## __VA_ARGS__, (server)->host.addr, (server)->host.port); \
@@ -133,6 +148,8 @@ struct dnbd3_cmd {
#define debug_sock(sock, fmt, ...) \
_print_sock(KERN_DEBUG, sock, fmt, ## __VA_ARGS__)
+#define debug_server(dev, server, fmt, ...) \
+ _print_server(KERN_DEBUG, dev, server, fmt, ## __VA_ARGS__)
#else
@@ -142,6 +159,7 @@ struct dnbd3_cmd {
#define debug_sock(sock, fmt, ...)
+#define debug_server(dev, server, fmt, ...)
#endif
@@ -152,7 +170,10 @@ struct dnbd3_cmd {
printk(KERN_INFO "dnbd%d: " fmt "\n", (dev)->minor, ## __VA_ARGS__)
#define info_sock(sock, fmt, ...) \
- _print_sock(KERN_DEBUG, sock, fmt, ## __VA_ARGS__)
+ _print_sock(KERN_INFO, sock, fmt, ## __VA_ARGS__)
+
+#define info_server(dev, server, fmt, ...) \
+ _print_server(KERN_INFO, dev, server, fmt, ## __VA_ARGS__)
#define warn(fmt, ...) \
@@ -164,6 +185,9 @@ struct dnbd3_cmd {
#define warn_sock(sock, fmt, ...) \
_print_sock(KERN_WARNING, sock, fmt, ## __VA_ARGS__)
+#define warn_server(dev, server, fmt, ...) \
+ _print_server(KERN_WARNING, dev, server, fmt, ## __VA_ARGS__)
+
#define error(fmt, ...) \
printk(KERN_ERR "dnbd: " fmt "\n", ## __VA_ARGS__)
@@ -174,4 +198,7 @@ struct dnbd3_cmd {
#define error_sock(sock, fmt, ...) \
_print_sock(KERN_ERR, sock, fmt, ## __VA_ARGS__)
+#define error_server(dev, server, fmt, ...) \
+ _print_server(KERN_ERR, dev, server, fmt, ## __VA_ARGS__)
+
#endif /* DNBD_H_ */
diff --git a/src/kernel/net.c b/src/kernel/net.c
index 6d8538d..d493994 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -49,7 +49,6 @@
} while (0)
static DECLARE_WAIT_QUEUE_HEAD(send_wq);
-static atomic64_t send_wq_signal;
static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *server);
static int __dnbd3_socket_connect(struct dnbd3_server * server, struct dnbd3_sock *sock);
@@ -59,10 +58,10 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server
static void dnbd3_print_server_list(struct dnbd3_device *dev)
{
int i;
- print_server(KERN_INFO, dev, &dev->initial_server, "initial server is");
+ info_server(dev, &dev->initial_server, "initial server is");
for (i = 0; i < NUMBER_SERVERS; i++) {
if (dev->alt_servers[i].host.addr[0] != 0) {
- print_server(KERN_INFO, dev, &dev->alt_servers[i], "alternative server is");
+ info_server(dev, &dev->alt_servers[i], "alternative server is");
}
}
}
@@ -73,15 +72,15 @@ static inline uint64_t dnbd3_to_wq_signal(int minor, uint16_t dnbd3_cmd, uint16_
}
-static uint64_t dnbd3_to_handle(uint32_t arg0, uint32_t arg1) {
- return ((uint64_t) arg0 << 32) | arg1;
+static inline uint64_t dnbd3_to_handle(uint32_t tag, uint32_t cookie) {
+ return ((uint64_t) tag << 32) | cookie;
}
-static uint32_t dnbd3_arg0_from_handle(uint64_t handle) {
+static inline uint32_t dnbd3_tag_from_handle(uint64_t handle) {
return (uint32_t)(handle >> 32);
}
-static uint32_t dnbd3_arg1_from_handle(uint64_t handle) {
+static inline uint32_t dnbd3_cookie_from_handle(uint64_t handle) {
return (uint32_t) handle;
}
@@ -142,26 +141,27 @@ int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, struct dnbd
iov[0].iov_base = &dnbd3_request;
iov[0].iov_len = sizeof(dnbd3_request);
send_len = iov_num == 1 ? sizeof(dnbd3_request) : iov[0].iov_len + iov[1].iov_len;
- if ((result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, send_len)) != send_len) {
+ result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, send_len);
+ if (result != send_len) {
error_sock(sock, "connection to server lost");
+ sock->server->failures++;
goto error;
}
sock->pending = NULL;
- result = 0;
error:
return result;
}
-int dnbd3_send_request_blocking(struct dnbd3_sock *sock, int dnbd3_cmd)
+static int dnbd3_send_request_blocking(struct dnbd3_sock *sock, uint16_t dnbd3_cmd)
{
- int result = 0;
- uint64_t handle;
+ int result;
struct request *req = kmalloc(sizeof(struct request), GFP_KERNEL);
- debug_sock(sock, "request starting blocking request");
+ debug_sock(sock, "starting blocking request %d", dnbd3_cmd);
if (!req) {
error_sock(sock, "kmalloc failed");
+ result = -EIO;
goto error;
}
@@ -179,18 +179,15 @@ int dnbd3_send_request_blocking(struct dnbd3_sock *sock, int dnbd3_cmd)
goto error;
}
- mutex_lock(&sock->lock);
+ mutex_lock(&sock->tx_lock);
result = dnbd3_send_request(sock, req, NULL);
- if (result) {
- mutex_unlock(&sock->lock);
+ if (result <= 0) {
+ mutex_unlock(&sock->tx_lock);
goto error;
}
- atomic64_set(&send_wq_signal, 0);
- handle = dnbd3_to_wq_signal(sock->device->minor, dnbd3_cmd, sock->sock_nr);
+ mutex_unlock(&sock->tx_lock);
- mutex_unlock(&sock->lock);
-
- if (wait_event_interruptible_timeout(send_wq, atomic64_read(&send_wq_signal) == handle, REQUEST_TIMEOUT) <= 0) { // timeout or interrupt
+ if (wait_event_interruptible_timeout(send_wq, sock->receive_command == dnbd3_cmd, REQUEST_TIMEOUT) <= 0) { // timeout or interrupt
warn_sock(sock, "request timed out, cmd %d", dnbd3_cmd);
result = -EIO;
goto error;
@@ -248,8 +245,8 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3
init_msghdr(msg);
memcpy(&handle, &reply->handle, sizeof(handle));
- cookie = dnbd3_arg1_from_handle(handle);
- tag = dnbd3_arg0_from_handle(handle);
+ cookie = dnbd3_cookie_from_handle(handle);
+ tag = dnbd3_tag_from_handle(handle);
hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < dev->tag_set.nr_hw_queues) {
@@ -366,7 +363,7 @@ static int dnbd3_receive_cmd_latest_rid(struct dnbd3_device *dev, struct dnbd3_s
}
rid = net_order_16(rid);
debug_sock(sock, "latest rid of %s is %d (currently using %d)", dev->imgname, (int)rid, (int)dev->rid);
- dev->update_available = (rid > dev->rid ? 1 : 0);
+ dev->update_available = (rid > dev->rid ? true : false);
return result;
}
@@ -431,8 +428,8 @@ static void dnbd3_receive_worker(struct work_struct *work)
struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, receive_worker);
struct dnbd3_device *dev = sock->device;
dnbd3_reply_t dnbd3_reply;
- uint64_t handle;
int result;
+ debug_sock(sock, "receive worker is starting");
while(1) { // loop until socket returns 0
result = dnbd3_receive_cmd(sock, &dnbd3_reply);
@@ -481,16 +478,15 @@ static void dnbd3_receive_worker(struct work_struct *work)
}
break;
default:
- warn_sock(sock, "unknown command eeceived");
+ warn_sock(sock, "unknown command received");
break;
}
error:
- handle = dnbd3_to_wq_signal(dev->minor, dnbd3_reply.cmd, sock->sock_nr);
- atomic64_set(&send_wq_signal, handle);
+ sock->receive_command = dnbd3_reply.cmd;
wake_up_interruptible(&send_wq);
if (result == 0) {
info_sock(sock, "result is 0, socket seems to be down");
- sock->panic = 1;
+ sock->panic = true;
break; //the socket seems to be down
} else if (result < 0) {
sock->server->failures++; // discovery takes care of to many failures
@@ -574,7 +570,7 @@ static struct dnbd3_server *dnbd3_find_best_alt_server(struct dnbd3_device *dev)
}
}
if (best_alt_server) {
- print_server(KERN_INFO, dev, best_alt_server, "found best alt server with rtt %llu", rtt);
+ info_server(dev, best_alt_server, "found best alt server with rtt %llu", best_rtt);
} else {
debug_dev(dev, "did not find any alternative server");
}
@@ -585,12 +581,72 @@ static bool dnbd3_better_rtt(struct dnbd3_server *new_server, struct dnbd3_serve
uint64_t new_rtt = (new_server->rtts[0] + new_server->rtts[1] + new_server->rtts[2] + new_server->rtts[3]) / 4;
uint64_t existing_rtt = (existing_server->rtts[0] + existing_server->rtts[1] + existing_server->rtts[2] + existing_server->rtts[3]) / 4;
- if (((new_rtt * 2)/3) < existing_rtt) {
+ if (new_rtt < ((existing_rtt * 2)/3)) { //TODO add macro to control this
return true;
}
return false;
}
+static void dnbd3_adjust_connections(struct dnbd3_device *dev) {
+ int i;
+ int sock_alive = 0;
+ struct dnbd3_server *server, *existing_server;
+
+ // connect empty sockets
+ sock_alive = 0;
+ for (i = 0; i < NUMBER_CONNECTIONS; i++) {
+ if (!dnbd3_is_sock_alive(dev->socks[i])) {
+ server = dnbd3_find_best_alt_server(dev);
+ if (server) {
+ if (dnbd3_socket_connect(dev, server) == 0) {
+ sock_alive++;
+ } else {
+ warn_server(dev, server, "failed to connect");
+ }
+ }
+ } else {
+ sock_alive++;
+ }
+ }
+
+ // replace socket with better server
+ if (sock_alive == NUMBER_CONNECTIONS) {
+ for (i = 0; i < NUMBER_CONNECTIONS; i++) {
+ if (dnbd3_is_sock_alive(dev->socks[i])) {
+ server = dnbd3_find_best_alt_server(dev);
+ existing_server = dev->socks[i].server;
+ if (server && dnbd3_better_rtt(server, dev->socks[i].server)) {
+ dnbd3_socket_disconnect(dev, NULL, &dev->socks[i]);
+
+ if (dnbd3_socket_connect(dev, server) != 0) {
+ warn_server(dev, server, "failed to connect");
+ dnbd3_socket_connect(dev, existing_server);
+ }
+ }
+ }
+ }
+ }
+
+ debug_dev(dev, "connected to %d/%d sockets", sock_alive, NUMBER_CONNECTIONS);
+}
+
+static int dnbd3_panic_connect(struct dnbd3_device *dev) {
+ int result, i;
+ result = dnbd3_socket_connect(dev, &dev->initial_server);
+ if (result) {
+ for (i = 0; i < NUMBER_SERVERS; i++) {
+ if (dev->alt_servers[i].host.type != 0) {
+ result = dnbd3_socket_connect(dev, &dev->alt_servers[i]);
+ if (!result) {
+ info_server(dev, &dev->alt_servers[i], "found server to connect to");
+ break;
+ }
+ }
+ }
+ }
+ return result;
+}
+
static void dnbd3_panic_worker(struct work_struct *work)
{
struct dnbd3_device *dev = container_of(work, struct dnbd3_device, panic_worker);
@@ -613,24 +669,155 @@ static void dnbd3_panic_worker(struct work_struct *work)
dnbd3_socket_disconnect(dev, panicked_server, panicked_sock);
if (new_server != NULL && new_server != panicked_server) {
- print_server(KERN_INFO, dev, new_server, "found replacement");
- dnbd3_socket_connect(dev, new_server);
+ info_server(dev, new_server, "found replacement");
+ if (!dnbd3_socket_connect(dev, new_server)) {
+ sock_alive++;
+ }
} else if (sock_alive > 0) {
info_dev(dev, "found no replacement server but still connected to %d servers", sock_alive);
- } else {
- error_dev(dev, "could not reconnect to server");
}
} else if (sock_alive == 0) {
new_server = dnbd3_find_best_alt_server(dev);
if (new_server != NULL) {
- print_server(KERN_INFO, dev, new_server, "reconnect to server");
- dnbd3_socket_connect(dev, new_server);
+ info_server(dev, new_server, "reconnect to server");
+ if (!dnbd3_socket_connect(dev, new_server)) {
+ sock_alive++;
+ }
+ }
+ }
+
+ if (sock_alive == 0) {
+ warn_dev(dev, "did not find a good server, trying to connect to any available server");
+
+ if (dnbd3_panic_connect(dev)) {
+ error_dev(dev, "could not connect to any server");
} else {
- error_dev(dev, "could not reconnect to server");
+ info_dev(dev, "found server to connect to");
}
}
}
+static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *server)
+{
+ struct kvec iov;
+ struct timeval start, end;
+ dnbd3_request_t dnbd3_request;
+ dnbd3_reply_t dnbd3_reply;
+ struct dnbd3_sock sock;
+ struct msghdr msg;
+ char *buf;
+ struct request req;
+ uint64_t rtt = RTT_UNREACHABLE;
+ int result;
+
+ buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL);
+ if (!buf) {
+ debug_dev(dev, "kmalloc failed in rtt measurement");
+ result = -EIO;
+ goto error;
+ }
+
+ sock.sock_nr = NUMBER_CONNECTIONS;
+ sock.sock = NULL;
+ sock.device = dev;
+ sock.server = server;
+
+ result = __dnbd3_socket_connect(server, &sock);
+ if (result) {
+ error_sock(&sock, "socket connect failed in rtt measurement");
+ goto error;
+ }
+ dnbd3_connect(&req);
+ result = dnbd3_send_request(&sock, &req, NULL);
+ if (result <= 0) {
+ error_sock(&sock, "request select image failed in rtt measurement");
+ goto error;
+ }
+
+ result = dnbd3_receive_cmd(&sock, &dnbd3_reply);
+ if (result <= 0) {
+ error_sock(&sock, "receive select image failed in rtt measurement");
+ goto error;
+
+ }
+ if (dnbd3_reply.magic != dnbd3_packet_magic || dnbd3_reply.cmd != CMD_SELECT_IMAGE || dnbd3_reply.size < 4) {
+ error_sock(&sock, "receive select image wrong header in rtt measurement");
+ result = -EIO;
+ goto error;
+ }
+ result = dnbd3_receive_cmd_select_image(dev, &sock, &dnbd3_reply);
+ if (result <= 0) {
+ error_sock(&sock, "receive data select image failed in rtt measurement");
+ goto error;
+ }
+
+ // Request block
+ dnbd3_request.magic = dnbd3_packet_magic;
+ dnbd3_request.cmd = CMD_GET_BLOCK;
+ // Do *NOT* pick a random block as it has proven to cause severe
+ // cache thrashing on the server
+ dnbd3_request.offset = 0;
+ dnbd3_request.size = RTT_BLOCK_SIZE;
+ fixup_request(dnbd3_request);
+ iov.iov_base = &dnbd3_request;
+ iov.iov_len = sizeof(dnbd3_request);
+
+ init_msghdr(msg);
+ // start rtt measurement
+ do_gettimeofday(&start);
+
+ result = kernel_sendmsg(sock.sock, &msg, &iov, 1, sizeof(dnbd3_request));
+ if (result <= 0) {
+ error_sock(&sock, "request test block failed in rtt measurement");
+ goto error;
+ }
+ // receive net reply
+ iov.iov_base = &dnbd3_reply;
+ iov.iov_len = sizeof(dnbd3_reply);
+ result = kernel_recvmsg(sock.sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags);
+ if (result != sizeof(dnbd3_reply)) {
+ error_sock(&sock, "receive header test block failed in rtt measurement");
+ goto error;
+ }
+ fixup_reply(dnbd3_reply);
+ if (dnbd3_reply.magic != dnbd3_packet_magic|| dnbd3_reply.cmd != CMD_GET_BLOCK || dnbd3_reply.size != RTT_BLOCK_SIZE) {
+ error_sock(&sock, "receive header cmd test block failed in rtt measurement");
+ result = -EIO;
+ goto error;
+ }
+
+ // receive data
+ iov.iov_base = buf;
+ iov.iov_len = RTT_BLOCK_SIZE;
+ result = kernel_recvmsg(sock.sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags);
+ if (result != RTT_BLOCK_SIZE) {
+ error_sock(&sock, "receive test block failed in rtt measurement");
+ goto error;
+ }
+
+ do_gettimeofday(&end); // end rtt measurement
+
+ rtt = (uint64_t)((end.tv_sec - start.tv_sec) * 1000000ull + (end.tv_usec - start.tv_usec));
+
+ info_sock(&sock, "new rrt is %llu", rtt);
+
+error:
+ sock.server->rtts[dev->discovery_count % 4] = rtt;
+ if (result <= 0) {
+ server->failures++;
+ }
+ if (buf) {
+ kfree(buf);
+ }
+ if (sock.sock) {
+ kernel_sock_shutdown(sock.sock, SHUT_RDWR);
+ sock.server = NULL;
+ sock_release(sock.sock);
+ sock.sock = NULL;
+ }
+ return result;
+}
+
static void dnbd3_discovery_worker(struct work_struct *work)
{
@@ -639,15 +826,6 @@ static void dnbd3_discovery_worker(struct work_struct *work)
int i, j;
struct dnbd3_server *existing_server, *free_server, *failed_server;
dnbd3_server_entry_t *new_server;
- struct kvec iov;
- struct timeval start, end;
- dnbd3_request_t dnbd3_request;
- dnbd3_reply_t dnbd3_reply;
- struct msghdr msg;
- char *buf;
- struct request *req = NULL;
- uint64_t rtt;
- serialized_buffer_t *payload;
if (!dnbd3_is_sock_alive(*sock)) {
sock = NULL;
@@ -664,7 +842,9 @@ static void dnbd3_discovery_worker(struct work_struct *work)
debug_sock(sock, "starting discovery worker");
- dnbd3_send_request_blocking(sock, CMD_GET_SERVERS);
+ if (dnbd3_send_request_blocking(sock, CMD_GET_SERVERS) <= 0) {
+ error_sock(sock, "failed to get servers in discovery");
+ }
debug_sock(sock, "new server num is %d", dev->new_servers_num);
if (dev->new_servers_num) {
@@ -695,7 +875,7 @@ static void dnbd3_discovery_worker(struct work_struct *work)
if (existing_server) {
if (new_server->failures == 1) { // remove is requested
- print_server(KERN_INFO, dev, new_server, "remove server is requested");
+ info_server(dev, new_server, "remove server is requested");
dnbd3_socket_disconnect(dev, existing_server, NULL); // TODO what to do when only one connection?
existing_server->host.type = 0;
}
@@ -710,7 +890,7 @@ static void dnbd3_discovery_worker(struct work_struct *work)
//no server found to replace
continue;
}
- print_server(KERN_INFO, dev, free_server, "got new alt server");
+ info_server(dev, free_server, "got new alt server");
free_server->failures = 0;
free_server->protocol_version = 0;
free_server->rtts[0] = free_server->rtts[1] = free_server->rtts[2] = free_server->rtts[3] = RTT_UNREACHABLE;
@@ -719,166 +899,23 @@ static void dnbd3_discovery_worker(struct work_struct *work)
dev->new_servers_num = 0;
mutex_unlock(&dev->device_lock);
}
- buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL);
- if (!buf) {
- error_dev(dev, "kmalloc failed");
- goto error;
- }
- payload = (serialized_buffer_t *)buf;
- req = kmalloc(sizeof(struct request), GFP_KERNEL);
- if (!req) {
- error_dev(dev, "kmalloc failed");
- goto error;
- }
- sock = kmalloc(sizeof(struct dnbd3_sock), GFP_KERNEL);
- if (!sock) {
- error_dev(dev, "kmalloc failed");
- goto error;
- }
- sock->sock_nr = NUMBER_CONNECTIONS;
// measure rtt for all alt servers
for (i = 0; i < NUMBER_SERVERS; i++) {
existing_server = &dev->alt_servers[i];
if (existing_server->host.type) {
- sock->sock = NULL;
- sock->device = dev;
- sock->server = existing_server;
- if (__dnbd3_socket_connect(existing_server, sock)) {
- error_sock(sock, "socket connect failed in rtt measurement");
- goto rtt_error;
- }
- dnbd3_connect(req);
- if (dnbd3_send_request(sock, req, NULL)) {
- error_sock(sock, "request select image failed in rtt measurement");
- goto rtt_error;
- }
-
- if (dnbd3_receive_cmd(sock, &dnbd3_reply) <= 0) {
- error_sock(sock, "receive select image failed in rtt measurement");
- goto rtt_error;
-
- }
- if (dnbd3_reply.magic != dnbd3_packet_magic || dnbd3_reply.cmd != CMD_SELECT_IMAGE || dnbd3_reply.size < 4) {
- error_sock(sock, "receive select image wrong header in rtt measurement");
- goto rtt_error;
- }
-
- if (dnbd3_receive_cmd_select_image(dev, sock, &dnbd3_reply) <= 0) {
- error_sock(sock, "receive data select image failed in rtt measurement");
- goto rtt_error;
- }
-
- // Request block
- dnbd3_request.magic = dnbd3_packet_magic;
- dnbd3_request.cmd = CMD_GET_BLOCK;
- // Do *NOT* pick a random block as it has proven to cause severe
- // cache thrashing on the server
- dnbd3_request.offset = 0;
- dnbd3_request.size = RTT_BLOCK_SIZE;
- fixup_request(dnbd3_request);
- iov.iov_base = &dnbd3_request;
- iov.iov_len = sizeof(dnbd3_request);
-
- init_msghdr(msg);
- // start rtt measurement
- do_gettimeofday(&start);
-
- if (kernel_sendmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_request)) <= 0) {
- error_sock(sock, "request test block failed in rtt measurement");
- goto rtt_error;
- }
- // receive net reply
- iov.iov_base = &dnbd3_reply;
- iov.iov_len = sizeof(dnbd3_reply);
- if ((j = kernel_recvmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags)) != sizeof(dnbd3_reply)) {
- error_sock(sock, "receive header test block failed in rtt measurement %d %ld", j, sizeof(dnbd3_reply));
- goto rtt_error;
- }
- fixup_reply(dnbd3_reply);
- if (dnbd3_reply.magic != dnbd3_packet_magic|| dnbd3_reply.cmd != CMD_GET_BLOCK || dnbd3_reply.size != RTT_BLOCK_SIZE) {
- error_sock(sock, "receive header cmd test block failed in rtt measurement");
- goto rtt_error;
- }
-
- // receive data
- iov.iov_base = buf;
- iov.iov_len = RTT_BLOCK_SIZE;
- if (kernel_recvmsg(sock->sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags) != RTT_BLOCK_SIZE) {
- error_sock(sock, "receive test block failed in rtt measurement");
- goto rtt_error;
- }
-
- do_gettimeofday(&end); // end rtt measurement
-
- rtt = (uint64_t)((end.tv_sec - start.tv_sec) * 1000000ull + (end.tv_usec - start.tv_usec));
-
- debug_sock(sock, "new rrt is %llu", rtt);
- existing_server->rtts[dev->discovery_count % 4] = rtt;
-
-rtt_error:
- if (sock->sock) {
- kernel_sock_shutdown(sock->sock, SHUT_RDWR);
- sock->server = NULL;
- }
-
- if (sock->sock) {
- sock_release(sock->sock);
- sock->sock = NULL;
+ if (dnbd3_meassure_rtt(dev, existing_server) <= 0) {
+ existing_server->failures++;
+ warn_server(dev, existing_server, "failed to meassure rtt");
}
}
}
-error:
- if (buf) {
- kfree(buf);
- buf = NULL;
- }
- if (req) {
- kfree(req);
- req = NULL;
- }
- if (sock) {
- kfree(sock);
- sock = NULL;
- }
- // connect empty sockets
- j = 0;
- for (i = 0; i < NUMBER_CONNECTIONS; i++) {
- if (!dnbd3_is_sock_alive(dev->socks[i])) {
- free_server = dnbd3_find_best_alt_server(dev);
- if (free_server) {
- if (dnbd3_socket_connect(dev, free_server) == 0) {
- j++;
- } else {
- print_server(KERN_WARNING, dev, free_server, "failed to connect");
- }
- }
- } else {
- j++;
- }
- }
-
- // replace socket with better server
- if (j == NUMBER_CONNECTIONS) {
- for (i = 0; i < NUMBER_CONNECTIONS; i++) {
- if (dnbd3_is_sock_alive(dev->socks[i])) {
- free_server = dnbd3_find_best_alt_server(dev);
- if (free_server && dnbd3_better_rtt(free_server, dev->socks[i].server)) {
- dnbd3_socket_disconnect(dev, NULL, &dev->socks[i]);
-
- if (dnbd3_socket_connect(dev, free_server) != 0) {
- print_server(KERN_WARNING, dev, free_server, "failed to connect");
- }
- }
- }
- }
- }
-
- debug_dev(dev, "connected to %d / %d sockets", j, NUMBER_CONNECTIONS);
+ dnbd3_adjust_connections(dev);
dev->discovery_count++;
}
+
static int __dnbd3_socket_connect(struct dnbd3_server *server, struct dnbd3_sock *sock)
{
int result = 0;
@@ -948,7 +985,7 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s
}
}
if (sock == NULL) {
- print_server(KERN_ERR, dev, server, "could not connect to socket, to many connections");
+ warn_server(dev, server, "could not connect to socket, to many connections");
return -EIO;
}
sock->server = server;
@@ -957,12 +994,16 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s
debug_sock(sock, "socket connect");
- mutex_init(&sock->lock);
- mutex_lock(&sock->lock);
- __dnbd3_socket_connect(server, sock);
- mutex_unlock(&sock->lock);
+ mutex_init(&sock->tx_lock);
+ mutex_lock(&sock->tx_lock);
+ result = __dnbd3_socket_connect(server, sock);
+ mutex_unlock(&sock->tx_lock);
+
+ sock->panic = false;
+
if (!sock->sock) {
error_sock(sock, "socket is not connected");
+ server->failures++;
result = -EIO;
goto error;
}
@@ -972,8 +1013,10 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s
queue_work(dnbd3_wq, &sock->receive_worker);
result = dnbd3_send_request_blocking(sock, CMD_SELECT_IMAGE);
- if (result) {
+ if (result <= 0) {
error_sock(sock, "connection to image %s failed", dev->imgname);
+ server->failures++;
+ result = -EIO;
goto error;
}
@@ -999,9 +1042,12 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s
return 0;
error:
if (sock->sock) {
+ kernel_sock_shutdown(sock->sock, SHUT_RDWR);
+ cancel_work_sync(&sock->receive_worker);
sock_release(sock->sock);
sock->sock = NULL;
}
+ mutex_destroy(&sock->tx_lock);
return result;
}
@@ -1024,33 +1070,33 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server
}
blk_mq_update_nr_hw_queues(&dev->tag_set, sock_alive - 1);
if (sock_alive <= 1) {
- info_sock(sock, "shutting down last socket and stopping discovery");
+ info_sock(sock, "shutting down last socket and stopping timer");
del_timer_sync(&dev->timer);
- dev->timer_count = 0;
- dev->discovery_count = 0;
- cancel_work_sync(&dev->discovery_worker);
+// dev->timer_count = 0;
+// dev->discovery_count = 0;
+// cancel_work_sync(&dev->discovery_worker); // do not wait
// cancel_work_sync(&dev->panic_worker); // do not wait for panic_worker, probably we are called from panic_worker
}
cancel_work_sync(&sock->keepalive_worker);
debug_sock(sock, "socket disconnect");
- mutex_lock(&sock->lock);
+ mutex_lock(&sock->tx_lock);
/*
* Important sequence to shut down socket
* 1. kernel_sock_shutdown
* socket shutdown, receiver which hangs in kernel_recvmsg returns 0
* 2. cancel_work_sync(receiver)
- * wait for the receiver to finish, so the socket is not usesd anymore
+ * wait for the receiver to finish, so the socket is not used anymore
* 3. sock_release
* release the socket and set to NULL
*/
if (sock->sock) {
kernel_sock_shutdown(sock->sock, SHUT_RDWR);
}
- mutex_unlock(&sock->lock);
- mutex_destroy(&sock->lock);
+ mutex_unlock(&sock->tx_lock);
+ mutex_destroy(&sock->tx_lock);
cancel_work_sync(&sock->receive_worker);
@@ -1058,8 +1104,8 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server
sock_release(sock->sock);
sock->sock = NULL;
}
- sock->panic = 0;
sock->server = NULL;
+ sock->panic = false;
return 0;
}
@@ -1068,6 +1114,10 @@ int dnbd3_net_disconnect(struct dnbd3_device *dev)
int i;
int result = 0;
+ del_timer_sync(&dev->timer);
+ cancel_work_sync(&dev->discovery_worker);
+ cancel_work_sync(&dev->panic_worker); // be sure it does not recover while disconnecting
+
for (i = 0; i < NUMBER_CONNECTIONS; i++) {
if (dev->socks[i].sock) {
if (dnbd3_socket_disconnect(dev, NULL, &dev->socks[i])) {