diff options
author | Frederic Robra | 2019-07-17 15:24:49 +0200 |
---|---|---|
committer | Frederic Robra | 2019-07-17 15:24:49 +0200 |
commit | 278fcb346fa53d94ad018cdd2f80dfd8b99335c2 (patch) | |
tree | 1e59a5dc51a5827f624133e56aec28264123bd2f | |
parent | discovery now connects to new slots (diff) | |
download | dnbd3-ng-278fcb346fa53d94ad018cdd2f80dfd8b99335c2.tar.gz dnbd3-ng-278fcb346fa53d94ad018cdd2f80dfd8b99335c2.tar.xz dnbd3-ng-278fcb346fa53d94ad018cdd2f80dfd8b99335c2.zip |
fixed various bugs
-rw-r--r-- | src/kernel/core.c | 39 | ||||
-rw-r--r-- | src/kernel/dnbd3.h | 67 | ||||
-rw-r--r-- | src/kernel/net.c | 476 |
3 files changed, 328 insertions, 254 deletions
diff --git a/src/kernel/core.c b/src/kernel/core.c index 9d930cb..bf316d4 100644 --- a/src/kernel/core.c +++ b/src/kernel/core.c @@ -67,8 +67,8 @@ int major; static void dnbd3_requeue_cmd(struct dnbd3_cmd *cmd) { struct request *req = blk_mq_rq_from_pdu(cmd); - - if (!test_and_set_bit(DNBD3_CMD_REQUEUED, &cmd->flags)) { + if (!cmd->requed) { + cmd->requed = true; blk_mq_requeue_request(req, true); } } @@ -115,9 +115,9 @@ static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index) cmd->status = BLK_STS_OK; - mutex_lock(&sock->lock); + mutex_lock(&sock->tx_lock); if (unlikely(!sock->sock)) { - mutex_unlock(&sock->lock); + mutex_unlock(&sock->tx_lock); warn_sock(sock, "not connected"); return -EIO; } @@ -136,7 +136,7 @@ static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index) ret = 0; } out: - mutex_unlock(&sock->lock); + mutex_unlock(&sock->tx_lock); return ret; } @@ -144,18 +144,14 @@ static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_ { struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); int ret; - struct dnbd3_device *dev = cmd->dnbd3; - - debug_dev(dev, "queue request"); mutex_lock(&cmd->lock); - clear_bit(DNBD3_CMD_REQUEUED, &cmd->flags); - + cmd->requed = false; ret = dnbd3_handle_cmd(cmd, hctx->queue_num); if (ret < 0) { ret = BLK_STS_IOERR; - } else if (!ret) { + } else if (ret >= 0) { ret = BLK_STS_OK; } mutex_unlock(&cmd->lock); @@ -167,7 +163,7 @@ static int dnbd3_init_request(struct blk_mq_tag_set *set, struct request *rq, un { struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->dnbd3 = set->driver_data; - cmd->flags = 0; + cmd->requed = false; mutex_init(&cmd->lock); return 0; } @@ -482,25 +478,26 @@ static int dnbd3_exit_cb(int id, void *ptr, void *data) return 0; } -static void dnbd3_dev_remove(struct dnbd3_device *dnbd3) +static void dnbd3_dev_remove(struct dnbd3_device *dev) { - struct gendisk *disk = dnbd3->disk; + struct gendisk *disk = dev->disk; struct request_queue *q; + dnbd3_net_disconnect(dev); + if (disk) { q = disk->queue; del_gendisk(disk); blk_cleanup_queue(q); - blk_mq_free_tag_set(&dnbd3->tag_set); - dnbd3_net_disconnect(dnbd3); + blk_mq_free_tag_set(&dev->tag_set); disk->private_data = NULL; put_disk(disk); - if (dnbd3->imgname) { - kfree(dnbd3->imgname); - dnbd3->imgname = NULL; - } } - mutex_destroy(&dnbd3->device_lock); + if (dev->imgname) { + kfree(dev->imgname); + dev->imgname = NULL; + } + mutex_destroy(&dev->device_lock); } static void dnbd3_put(struct dnbd3_device *dnbd3) diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h index 43b923f..c9f9fab 100644 --- a/src/kernel/dnbd3.h +++ b/src/kernel/dnbd3.h @@ -31,32 +31,49 @@ #include "serialize.h" -#define NUMBER_CONNECTIONS 4 // power of 2 +#define NUMBER_CONNECTIONS 2 // power of 2 #define DEBUG extern struct workqueue_struct *dnbd3_wq; +/** + * struct dnbd3_server - defining a server for dnbd3 + * @host: host of this server + * @rtts: last four round trip time measurements in microseconds + * @protocol_version: dnbd3 protocol version of this server + * @failures: how many times the server was unreachable + */ struct dnbd3_server { dnbd3_host_t host; - uint64_t rtts[4]; // Last four round trip time measurements in microsecond - uint16_t protocol_version; // dnbd3 protocol version of this server - uint8_t failures; // How many times the server was unreachable + uint64_t rtts[4]; + uint16_t protocol_version; + uint8_t failures; }; - +/** + * struct dnbd3_sock - defining a socket for dnbd3 + * @sock_nr: nr of this socket + * @device: the dnbd3_device this socket belongs to + * @server: the server this socket is connected to, 'NULL' if not connected + * + */ struct dnbd3_sock { - uint16_t sock_nr; - struct socket *sock; - struct mutex lock; - struct request *pending; + uint8_t sock_nr; + struct dnbd3_device *device; struct dnbd3_server *server; + + struct mutex tx_lock; + struct socket *sock; + + bool panic;//, discover, panic_count; uint32_t cookie; - uint8_t panic;//, discover, panic_count; - struct dnbd3_device *device; + struct work_struct keepalive_worker; - struct timer_list keepalive_timer; struct work_struct receive_worker; + + struct request *pending; + uint16_t receive_command; }; struct dnbd3_device { @@ -79,13 +96,13 @@ struct dnbd3_device { struct dnbd3_server alt_servers[NUMBER_SERVERS]; // array of alt servers int new_servers_num; // number of new alt servers that are waiting to be copied to above array dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers - uint8_t update_available; - uint8_t use_server_provided_alts; + bool update_available; + bool use_server_provided_alts; uint16_t rid; uint64_t reported_size; struct work_struct panic_worker; - struct work_struct discovery_worker; // if in irq and need to send request + struct work_struct discovery_worker; uint8_t discovery_count; struct timer_list timer; uint8_t timer_count; @@ -95,11 +112,9 @@ struct dnbd3_device { struct dnbd3_cmd { struct dnbd3_device *dnbd3; struct mutex lock; -// int index; uint32_t cookie; blk_status_t status; - unsigned long flags; -// uint32_t cmd_cookie; + bool requed; }; @@ -112,7 +127,7 @@ struct dnbd3_cmd { } \ } while (0) -#define print_server(level, dev, server, fmt, ...) \ +#define _print_server(level, dev, server, fmt, ...) \ do { \ if ((server)->host.type == HOST_IP4) { \ printk(level "dnbd%d: " fmt " %pI4:%d\n", (dev)->minor, ## __VA_ARGS__, (server)->host.addr, (server)->host.port); \ @@ -133,6 +148,8 @@ struct dnbd3_cmd { #define debug_sock(sock, fmt, ...) \ _print_sock(KERN_DEBUG, sock, fmt, ## __VA_ARGS__) +#define debug_server(dev, server, fmt, ...) \ + _print_server(KERN_DEBUG, dev, server, fmt, ## __VA_ARGS__) #else @@ -142,6 +159,7 @@ struct dnbd3_cmd { #define debug_sock(sock, fmt, ...) +#define debug_server(dev, server, fmt, ...) #endif @@ -152,7 +170,10 @@ struct dnbd3_cmd { printk(KERN_INFO "dnbd%d: " fmt "\n", (dev)->minor, ## __VA_ARGS__) #define info_sock(sock, fmt, ...) \ - _print_sock(KERN_DEBUG, sock, fmt, ## __VA_ARGS__) + _print_sock(KERN_INFO, sock, fmt, ## __VA_ARGS__) + +#define info_server(dev, server, fmt, ...) \ + _print_server(KERN_INFO, dev, server, fmt, ## __VA_ARGS__) #define warn(fmt, ...) \ @@ -164,6 +185,9 @@ struct dnbd3_cmd { #define warn_sock(sock, fmt, ...) \ _print_sock(KERN_WARNING, sock, fmt, ## __VA_ARGS__) +#define warn_server(dev, server, fmt, ...) \ + _print_server(KERN_WARNING, dev, server, fmt, ## __VA_ARGS__) + #define error(fmt, ...) \ printk(KERN_ERR "dnbd: " fmt "\n", ## __VA_ARGS__) @@ -174,4 +198,7 @@ struct dnbd3_cmd { #define error_sock(sock, fmt, ...) \ _print_sock(KERN_ERR, sock, fmt, ## __VA_ARGS__) +#define error_server(dev, server, fmt, ...) \ + _print_server(KERN_ERR, dev, server, fmt, ## __VA_ARGS__) + #endif /* DNBD_H_ */ diff --git a/src/kernel/net.c b/src/kernel/net.c index 6d8538d..d493994 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -49,7 +49,6 @@ } while (0) static DECLARE_WAIT_QUEUE_HEAD(send_wq); -static atomic64_t send_wq_signal; static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *server); static int __dnbd3_socket_connect(struct dnbd3_server * server, struct dnbd3_sock *sock); @@ -59,10 +58,10 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server static void dnbd3_print_server_list(struct dnbd3_device *dev) { int i; - print_server(KERN_INFO, dev, &dev->initial_server, "initial server is"); + info_server(dev, &dev->initial_server, "initial server is"); for (i = 0; i < NUMBER_SERVERS; i++) { if (dev->alt_servers[i].host.addr[0] != 0) { - print_server(KERN_INFO, dev, &dev->alt_servers[i], "alternative server is"); + info_server(dev, &dev->alt_servers[i], "alternative server is"); } } } @@ -73,15 +72,15 @@ static inline uint64_t dnbd3_to_wq_signal(int minor, uint16_t dnbd3_cmd, uint16_ } -static uint64_t dnbd3_to_handle(uint32_t arg0, uint32_t arg1) { - return ((uint64_t) arg0 << 32) | arg1; +static inline uint64_t dnbd3_to_handle(uint32_t tag, uint32_t cookie) { + return ((uint64_t) tag << 32) | cookie; } -static uint32_t dnbd3_arg0_from_handle(uint64_t handle) { +static inline uint32_t dnbd3_tag_from_handle(uint64_t handle) { return (uint32_t)(handle >> 32); } -static uint32_t dnbd3_arg1_from_handle(uint64_t handle) { +static inline uint32_t dnbd3_cookie_from_handle(uint64_t handle) { return (uint32_t) handle; } @@ -142,26 +141,27 @@ int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, struct dnbd iov[0].iov_base = &dnbd3_request; iov[0].iov_len = sizeof(dnbd3_request); send_len = iov_num == 1 ? sizeof(dnbd3_request) : iov[0].iov_len + iov[1].iov_len; - if ((result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, send_len)) != send_len) { + result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, send_len); + if (result != send_len) { error_sock(sock, "connection to server lost"); + sock->server->failures++; goto error; } sock->pending = NULL; - result = 0; error: return result; } -int dnbd3_send_request_blocking(struct dnbd3_sock *sock, int dnbd3_cmd) +static int dnbd3_send_request_blocking(struct dnbd3_sock *sock, uint16_t dnbd3_cmd) { - int result = 0; - uint64_t handle; + int result; struct request *req = kmalloc(sizeof(struct request), GFP_KERNEL); - debug_sock(sock, "request starting blocking request"); + debug_sock(sock, "starting blocking request %d", dnbd3_cmd); if (!req) { error_sock(sock, "kmalloc failed"); + result = -EIO; goto error; } @@ -179,18 +179,15 @@ int dnbd3_send_request_blocking(struct dnbd3_sock *sock, int dnbd3_cmd) goto error; } - mutex_lock(&sock->lock); + mutex_lock(&sock->tx_lock); result = dnbd3_send_request(sock, req, NULL); - if (result) { - mutex_unlock(&sock->lock); + if (result <= 0) { + mutex_unlock(&sock->tx_lock); goto error; } - atomic64_set(&send_wq_signal, 0); - handle = dnbd3_to_wq_signal(sock->device->minor, dnbd3_cmd, sock->sock_nr); + mutex_unlock(&sock->tx_lock); - mutex_unlock(&sock->lock); - - if (wait_event_interruptible_timeout(send_wq, atomic64_read(&send_wq_signal) == handle, REQUEST_TIMEOUT) <= 0) { // timeout or interrupt + if (wait_event_interruptible_timeout(send_wq, sock->receive_command == dnbd3_cmd, REQUEST_TIMEOUT) <= 0) { // timeout or interrupt warn_sock(sock, "request timed out, cmd %d", dnbd3_cmd); result = -EIO; goto error; @@ -248,8 +245,8 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3 init_msghdr(msg); memcpy(&handle, &reply->handle, sizeof(handle)); - cookie = dnbd3_arg1_from_handle(handle); - tag = dnbd3_arg0_from_handle(handle); + cookie = dnbd3_cookie_from_handle(handle); + tag = dnbd3_tag_from_handle(handle); hwq = blk_mq_unique_tag_to_hwq(tag); if (hwq < dev->tag_set.nr_hw_queues) { @@ -366,7 +363,7 @@ static int dnbd3_receive_cmd_latest_rid(struct dnbd3_device *dev, struct dnbd3_s } rid = net_order_16(rid); debug_sock(sock, "latest rid of %s is %d (currently using %d)", dev->imgname, (int)rid, (int)dev->rid); - dev->update_available = (rid > dev->rid ? 1 : 0); + dev->update_available = (rid > dev->rid ? true : false); return result; } @@ -431,8 +428,8 @@ static void dnbd3_receive_worker(struct work_struct *work) struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, receive_worker); struct dnbd3_device *dev = sock->device; dnbd3_reply_t dnbd3_reply; - uint64_t handle; int result; + debug_sock(sock, "receive worker is starting"); while(1) { // loop until socket returns 0 result = dnbd3_receive_cmd(sock, &dnbd3_reply); @@ -481,16 +478,15 @@ static void dnbd3_receive_worker(struct work_struct *work) } break; default: - warn_sock(sock, "unknown command eeceived"); + warn_sock(sock, "unknown command received"); break; } error: - handle = dnbd3_to_wq_signal(dev->minor, dnbd3_reply.cmd, sock->sock_nr); - atomic64_set(&send_wq_signal, handle); + sock->receive_command = dnbd3_reply.cmd; wake_up_interruptible(&send_wq); if (result == 0) { info_sock(sock, "result is 0, socket seems to be down"); - sock->panic = 1; + sock->panic = true; break; //the socket seems to be down } else if (result < 0) { sock->server->failures++; // discovery takes care of to many failures @@ -574,7 +570,7 @@ static struct dnbd3_server *dnbd3_find_best_alt_server(struct dnbd3_device *dev) } } if (best_alt_server) { - print_server(KERN_INFO, dev, best_alt_server, "found best alt server with rtt %llu", rtt); + info_server(dev, best_alt_server, "found best alt server with rtt %llu", best_rtt); } else { debug_dev(dev, "did not find any alternative server"); } @@ -585,12 +581,72 @@ static bool dnbd3_better_rtt(struct dnbd3_server *new_server, struct dnbd3_serve uint64_t new_rtt = (new_server->rtts[0] + new_server->rtts[1] + new_server->rtts[2] + new_server->rtts[3]) / 4; uint64_t existing_rtt = (existing_server->rtts[0] + existing_server->rtts[1] + existing_server->rtts[2] + existing_server->rtts[3]) / 4; - if (((new_rtt * 2)/3) < existing_rtt) { + if (new_rtt < ((existing_rtt * 2)/3)) { //TODO add macro to control this return true; } return false; } +static void dnbd3_adjust_connections(struct dnbd3_device *dev) { + int i; + int sock_alive = 0; + struct dnbd3_server *server, *existing_server; + + // connect empty sockets + sock_alive = 0; + for (i = 0; i < NUMBER_CONNECTIONS; i++) { + if (!dnbd3_is_sock_alive(dev->socks[i])) { + server = dnbd3_find_best_alt_server(dev); + if (server) { + if (dnbd3_socket_connect(dev, server) == 0) { + sock_alive++; + } else { + warn_server(dev, server, "failed to connect"); + } + } + } else { + sock_alive++; + } + } + + // replace socket with better server + if (sock_alive == NUMBER_CONNECTIONS) { + for (i = 0; i < NUMBER_CONNECTIONS; i++) { + if (dnbd3_is_sock_alive(dev->socks[i])) { + server = dnbd3_find_best_alt_server(dev); + existing_server = dev->socks[i].server; + if (server && dnbd3_better_rtt(server, dev->socks[i].server)) { + dnbd3_socket_disconnect(dev, NULL, &dev->socks[i]); + + if (dnbd3_socket_connect(dev, server) != 0) { + warn_server(dev, server, "failed to connect"); + dnbd3_socket_connect(dev, existing_server); + } + } + } + } + } + + debug_dev(dev, "connected to %d/%d sockets", sock_alive, NUMBER_CONNECTIONS); +} + +static int dnbd3_panic_connect(struct dnbd3_device *dev) { + int result, i; + result = dnbd3_socket_connect(dev, &dev->initial_server); + if (result) { + for (i = 0; i < NUMBER_SERVERS; i++) { + if (dev->alt_servers[i].host.type != 0) { + result = dnbd3_socket_connect(dev, &dev->alt_servers[i]); + if (!result) { + info_server(dev, &dev->alt_servers[i], "found server to connect to"); + break; + } + } + } + } + return result; +} + static void dnbd3_panic_worker(struct work_struct *work) { struct dnbd3_device *dev = container_of(work, struct dnbd3_device, panic_worker); @@ -613,24 +669,155 @@ static void dnbd3_panic_worker(struct work_struct *work) dnbd3_socket_disconnect(dev, panicked_server, panicked_sock); if (new_server != NULL && new_server != panicked_server) { - print_server(KERN_INFO, dev, new_server, "found replacement"); - dnbd3_socket_connect(dev, new_server); + info_server(dev, new_server, "found replacement"); + if (!dnbd3_socket_connect(dev, new_server)) { + sock_alive++; + } } else if (sock_alive > 0) { info_dev(dev, "found no replacement server but still connected to %d servers", sock_alive); - } else { - error_dev(dev, "could not reconnect to server"); } } else if (sock_alive == 0) { new_server = dnbd3_find_best_alt_server(dev); if (new_server != NULL) { - print_server(KERN_INFO, dev, new_server, "reconnect to server"); - dnbd3_socket_connect(dev, new_server); + info_server(dev, new_server, "reconnect to server"); + if (!dnbd3_socket_connect(dev, new_server)) { + sock_alive++; + } + } + } + + if (sock_alive == 0) { + warn_dev(dev, "did not find a good server, trying to connect to any available server"); + + if (dnbd3_panic_connect(dev)) { + error_dev(dev, "could not connect to any server"); } else { - error_dev(dev, "could not reconnect to server"); + info_dev(dev, "found server to connect to"); } } } +static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *server) +{ + struct kvec iov; + struct timeval start, end; + dnbd3_request_t dnbd3_request; + dnbd3_reply_t dnbd3_reply; + struct dnbd3_sock sock; + struct msghdr msg; + char *buf; + struct request req; + uint64_t rtt = RTT_UNREACHABLE; + int result; + + buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL); + if (!buf) { + debug_dev(dev, "kmalloc failed in rtt measurement"); + result = -EIO; + goto error; + } + + sock.sock_nr = NUMBER_CONNECTIONS; + sock.sock = NULL; + sock.device = dev; + sock.server = server; + + result = __dnbd3_socket_connect(server, &sock); + if (result) { + error_sock(&sock, "socket connect failed in rtt measurement"); + goto error; + } + dnbd3_connect(&req); + result = dnbd3_send_request(&sock, &req, NULL); + if (result <= 0) { + error_sock(&sock, "request select image failed in rtt measurement"); + goto error; + } + + result = dnbd3_receive_cmd(&sock, &dnbd3_reply); + if (result <= 0) { + error_sock(&sock, "receive select image failed in rtt measurement"); + goto error; + + } + if (dnbd3_reply.magic != dnbd3_packet_magic || dnbd3_reply.cmd != CMD_SELECT_IMAGE || dnbd3_reply.size < 4) { + error_sock(&sock, "receive select image wrong header in rtt measurement"); + result = -EIO; + goto error; + } + result = dnbd3_receive_cmd_select_image(dev, &sock, &dnbd3_reply); + if (result <= 0) { + error_sock(&sock, "receive data select image failed in rtt measurement"); + goto error; + } + + // Request block + dnbd3_request.magic = dnbd3_packet_magic; + dnbd3_request.cmd = CMD_GET_BLOCK; + // Do *NOT* pick a random block as it has proven to cause severe + // cache thrashing on the server + dnbd3_request.offset = 0; + dnbd3_request.size = RTT_BLOCK_SIZE; + fixup_request(dnbd3_request); + iov.iov_base = &dnbd3_request; + iov.iov_len = sizeof(dnbd3_request); + + init_msghdr(msg); + // start rtt measurement + do_gettimeofday(&start); + + result = kernel_sendmsg(sock.sock, &msg, &iov, 1, sizeof(dnbd3_request)); + if (result <= 0) { + error_sock(&sock, "request test block failed in rtt measurement"); + goto error; + } + // receive net reply + iov.iov_base = &dnbd3_reply; + iov.iov_len = sizeof(dnbd3_reply); + result = kernel_recvmsg(sock.sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags); + if (result != sizeof(dnbd3_reply)) { + error_sock(&sock, "receive header test block failed in rtt measurement"); + goto error; + } + fixup_reply(dnbd3_reply); + if (dnbd3_reply.magic != dnbd3_packet_magic|| dnbd3_reply.cmd != CMD_GET_BLOCK || dnbd3_reply.size != RTT_BLOCK_SIZE) { + error_sock(&sock, "receive header cmd test block failed in rtt measurement"); + result = -EIO; + goto error; + } + + // receive data + iov.iov_base = buf; + iov.iov_len = RTT_BLOCK_SIZE; + result = kernel_recvmsg(sock.sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags); + if (result != RTT_BLOCK_SIZE) { + error_sock(&sock, "receive test block failed in rtt measurement"); + goto error; + } + + do_gettimeofday(&end); // end rtt measurement + + rtt = (uint64_t)((end.tv_sec - start.tv_sec) * 1000000ull + (end.tv_usec - start.tv_usec)); + + info_sock(&sock, "new rrt is %llu", rtt); + +error: + sock.server->rtts[dev->discovery_count % 4] = rtt; + if (result <= 0) { + server->failures++; + } + if (buf) { + kfree(buf); + } + if (sock.sock) { + kernel_sock_shutdown(sock.sock, SHUT_RDWR); + sock.server = NULL; + sock_release(sock.sock); + sock.sock = NULL; + } + return result; +} + static void dnbd3_discovery_worker(struct work_struct *work) { @@ -639,15 +826,6 @@ static void dnbd3_discovery_worker(struct work_struct *work) int i, j; struct dnbd3_server *existing_server, *free_server, *failed_server; dnbd3_server_entry_t *new_server; - struct kvec iov; - struct timeval start, end; - dnbd3_request_t dnbd3_request; - dnbd3_reply_t dnbd3_reply; - struct msghdr msg; - char *buf; - struct request *req = NULL; - uint64_t rtt; - serialized_buffer_t *payload; if (!dnbd3_is_sock_alive(*sock)) { sock = NULL; @@ -664,7 +842,9 @@ static void dnbd3_discovery_worker(struct work_struct *work) debug_sock(sock, "starting discovery worker"); - dnbd3_send_request_blocking(sock, CMD_GET_SERVERS); + if (dnbd3_send_request_blocking(sock, CMD_GET_SERVERS) <= 0) { + error_sock(sock, "failed to get servers in discovery"); + } debug_sock(sock, "new server num is %d", dev->new_servers_num); if (dev->new_servers_num) { @@ -695,7 +875,7 @@ static void dnbd3_discovery_worker(struct work_struct *work) if (existing_server) { if (new_server->failures == 1) { // remove is requested - print_server(KERN_INFO, dev, new_server, "remove server is requested"); + info_server(dev, new_server, "remove server is requested"); dnbd3_socket_disconnect(dev, existing_server, NULL); // TODO what to do when only one connection? existing_server->host.type = 0; } @@ -710,7 +890,7 @@ static void dnbd3_discovery_worker(struct work_struct *work) //no server found to replace continue; } - print_server(KERN_INFO, dev, free_server, "got new alt server"); + info_server(dev, free_server, "got new alt server"); free_server->failures = 0; free_server->protocol_version = 0; free_server->rtts[0] = free_server->rtts[1] = free_server->rtts[2] = free_server->rtts[3] = RTT_UNREACHABLE; @@ -719,166 +899,23 @@ static void dnbd3_discovery_worker(struct work_struct *work) dev->new_servers_num = 0; mutex_unlock(&dev->device_lock); } - buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL); - if (!buf) { - error_dev(dev, "kmalloc failed"); - goto error; - } - payload = (serialized_buffer_t *)buf; - req = kmalloc(sizeof(struct request), GFP_KERNEL); - if (!req) { - error_dev(dev, "kmalloc failed"); - goto error; - } - sock = kmalloc(sizeof(struct dnbd3_sock), GFP_KERNEL); - if (!sock) { - error_dev(dev, "kmalloc failed"); - goto error; - } - sock->sock_nr = NUMBER_CONNECTIONS; // measure rtt for all alt servers for (i = 0; i < NUMBER_SERVERS; i++) { existing_server = &dev->alt_servers[i]; if (existing_server->host.type) { - sock->sock = NULL; - sock->device = dev; - sock->server = existing_server; - if (__dnbd3_socket_connect(existing_server, sock)) { - error_sock(sock, "socket connect failed in rtt measurement"); - goto rtt_error; - } - dnbd3_connect(req); - if (dnbd3_send_request(sock, req, NULL)) { - error_sock(sock, "request select image failed in rtt measurement"); - goto rtt_error; - } - - if (dnbd3_receive_cmd(sock, &dnbd3_reply) <= 0) { - error_sock(sock, "receive select image failed in rtt measurement"); - goto rtt_error; - - } - if (dnbd3_reply.magic != dnbd3_packet_magic || dnbd3_reply.cmd != CMD_SELECT_IMAGE || dnbd3_reply.size < 4) { - error_sock(sock, "receive select image wrong header in rtt measurement"); - goto rtt_error; - } - - if (dnbd3_receive_cmd_select_image(dev, sock, &dnbd3_reply) <= 0) { - error_sock(sock, "receive data select image failed in rtt measurement"); - goto rtt_error; - } - - // Request block - dnbd3_request.magic = dnbd3_packet_magic; - dnbd3_request.cmd = CMD_GET_BLOCK; - // Do *NOT* pick a random block as it has proven to cause severe - // cache thrashing on the server - dnbd3_request.offset = 0; - dnbd3_request.size = RTT_BLOCK_SIZE; - fixup_request(dnbd3_request); - iov.iov_base = &dnbd3_request; - iov.iov_len = sizeof(dnbd3_request); - - init_msghdr(msg); - // start rtt measurement - do_gettimeofday(&start); - - if (kernel_sendmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_request)) <= 0) { - error_sock(sock, "request test block failed in rtt measurement"); - goto rtt_error; - } - // receive net reply - iov.iov_base = &dnbd3_reply; - iov.iov_len = sizeof(dnbd3_reply); - if ((j = kernel_recvmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags)) != sizeof(dnbd3_reply)) { - error_sock(sock, "receive header test block failed in rtt measurement %d %ld", j, sizeof(dnbd3_reply)); - goto rtt_error; - } - fixup_reply(dnbd3_reply); - if (dnbd3_reply.magic != dnbd3_packet_magic|| dnbd3_reply.cmd != CMD_GET_BLOCK || dnbd3_reply.size != RTT_BLOCK_SIZE) { - error_sock(sock, "receive header cmd test block failed in rtt measurement"); - goto rtt_error; - } - - // receive data - iov.iov_base = buf; - iov.iov_len = RTT_BLOCK_SIZE; - if (kernel_recvmsg(sock->sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags) != RTT_BLOCK_SIZE) { - error_sock(sock, "receive test block failed in rtt measurement"); - goto rtt_error; - } - - do_gettimeofday(&end); // end rtt measurement - - rtt = (uint64_t)((end.tv_sec - start.tv_sec) * 1000000ull + (end.tv_usec - start.tv_usec)); - - debug_sock(sock, "new rrt is %llu", rtt); - existing_server->rtts[dev->discovery_count % 4] = rtt; - -rtt_error: - if (sock->sock) { - kernel_sock_shutdown(sock->sock, SHUT_RDWR); - sock->server = NULL; - } - - if (sock->sock) { - sock_release(sock->sock); - sock->sock = NULL; + if (dnbd3_meassure_rtt(dev, existing_server) <= 0) { + existing_server->failures++; + warn_server(dev, existing_server, "failed to meassure rtt"); } } } -error: - if (buf) { - kfree(buf); - buf = NULL; - } - if (req) { - kfree(req); - req = NULL; - } - if (sock) { - kfree(sock); - sock = NULL; - } - // connect empty sockets - j = 0; - for (i = 0; i < NUMBER_CONNECTIONS; i++) { - if (!dnbd3_is_sock_alive(dev->socks[i])) { - free_server = dnbd3_find_best_alt_server(dev); - if (free_server) { - if (dnbd3_socket_connect(dev, free_server) == 0) { - j++; - } else { - print_server(KERN_WARNING, dev, free_server, "failed to connect"); - } - } - } else { - j++; - } - } - - // replace socket with better server - if (j == NUMBER_CONNECTIONS) { - for (i = 0; i < NUMBER_CONNECTIONS; i++) { - if (dnbd3_is_sock_alive(dev->socks[i])) { - free_server = dnbd3_find_best_alt_server(dev); - if (free_server && dnbd3_better_rtt(free_server, dev->socks[i].server)) { - dnbd3_socket_disconnect(dev, NULL, &dev->socks[i]); - - if (dnbd3_socket_connect(dev, free_server) != 0) { - print_server(KERN_WARNING, dev, free_server, "failed to connect"); - } - } - } - } - } - - debug_dev(dev, "connected to %d / %d sockets", j, NUMBER_CONNECTIONS); + dnbd3_adjust_connections(dev); dev->discovery_count++; } + static int __dnbd3_socket_connect(struct dnbd3_server *server, struct dnbd3_sock *sock) { int result = 0; @@ -948,7 +985,7 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s } } if (sock == NULL) { - print_server(KERN_ERR, dev, server, "could not connect to socket, to many connections"); + warn_server(dev, server, "could not connect to socket, to many connections"); return -EIO; } sock->server = server; @@ -957,12 +994,16 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s debug_sock(sock, "socket connect"); - mutex_init(&sock->lock); - mutex_lock(&sock->lock); - __dnbd3_socket_connect(server, sock); - mutex_unlock(&sock->lock); + mutex_init(&sock->tx_lock); + mutex_lock(&sock->tx_lock); + result = __dnbd3_socket_connect(server, sock); + mutex_unlock(&sock->tx_lock); + + sock->panic = false; + if (!sock->sock) { error_sock(sock, "socket is not connected"); + server->failures++; result = -EIO; goto error; } @@ -972,8 +1013,10 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s queue_work(dnbd3_wq, &sock->receive_worker); result = dnbd3_send_request_blocking(sock, CMD_SELECT_IMAGE); - if (result) { + if (result <= 0) { error_sock(sock, "connection to image %s failed", dev->imgname); + server->failures++; + result = -EIO; goto error; } @@ -999,9 +1042,12 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s return 0; error: if (sock->sock) { + kernel_sock_shutdown(sock->sock, SHUT_RDWR); + cancel_work_sync(&sock->receive_worker); sock_release(sock->sock); sock->sock = NULL; } + mutex_destroy(&sock->tx_lock); return result; } @@ -1024,33 +1070,33 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server } blk_mq_update_nr_hw_queues(&dev->tag_set, sock_alive - 1); if (sock_alive <= 1) { - info_sock(sock, "shutting down last socket and stopping discovery"); + info_sock(sock, "shutting down last socket and stopping timer"); del_timer_sync(&dev->timer); - dev->timer_count = 0; - dev->discovery_count = 0; - cancel_work_sync(&dev->discovery_worker); +// dev->timer_count = 0; +// dev->discovery_count = 0; +// cancel_work_sync(&dev->discovery_worker); // do not wait // cancel_work_sync(&dev->panic_worker); // do not wait for panic_worker, probably we are called from panic_worker } cancel_work_sync(&sock->keepalive_worker); debug_sock(sock, "socket disconnect"); - mutex_lock(&sock->lock); + mutex_lock(&sock->tx_lock); /* * Important sequence to shut down socket * 1. kernel_sock_shutdown * socket shutdown, receiver which hangs in kernel_recvmsg returns 0 * 2. cancel_work_sync(receiver) - * wait for the receiver to finish, so the socket is not usesd anymore + * wait for the receiver to finish, so the socket is not used anymore * 3. sock_release * release the socket and set to NULL */ if (sock->sock) { kernel_sock_shutdown(sock->sock, SHUT_RDWR); } - mutex_unlock(&sock->lock); - mutex_destroy(&sock->lock); + mutex_unlock(&sock->tx_lock); + mutex_destroy(&sock->tx_lock); cancel_work_sync(&sock->receive_worker); @@ -1058,8 +1104,8 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server sock_release(sock->sock); sock->sock = NULL; } - sock->panic = 0; sock->server = NULL; + sock->panic = false; return 0; } @@ -1068,6 +1114,10 @@ int dnbd3_net_disconnect(struct dnbd3_device *dev) int i; int result = 0; + del_timer_sync(&dev->timer); + cancel_work_sync(&dev->discovery_worker); + cancel_work_sync(&dev->panic_worker); // be sure it does not recover while disconnecting + for (i = 0; i < NUMBER_CONNECTIONS; i++) { if (dev->socks[i].sock) { if (dnbd3_socket_disconnect(dev, NULL, &dev->socks[i])) { |