From 457721b76423156fcb79e613003ac99e749636ca Mon Sep 17 00:00:00 2001 From: Frederic Robra Date: Tue, 23 Jul 2019 17:13:48 +0200 Subject: added some documentation --- src/kernel/dnbd3.h | 86 ++++-- src/kernel/net.c | 841 +++++++++++++++++++++++++++++++++++------------------ 2 files changed, 621 insertions(+), 306 deletions(-) diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h index ccd3bf1..3ae42fd 100644 --- a/src/kernel/dnbd3.h +++ b/src/kernel/dnbd3.h @@ -33,15 +33,22 @@ /** * the number of parallel connections */ -#define NUMBER_CONNECTIONS 4 // power of 2 +#define NUMBER_CONNECTIONS 4 /** - * limit to which the other connected servers are only allowed to be that worser then the best rtt + * limit to which the other connected servers are only allowed to be that worser + * then the best rtt */ #define RTT_THRESOULD_LIMIT(best_rtt) ((best_rtt) * 10) -#define DEBUG -#define DEBUG_FILE +/** + * turn on/off debug information (1/0) + */ +#define DEBUG 1 +/** + * turn on/off file and line information (1/0) + */ +#define DEBUG_FILE 1 extern struct workqueue_struct *dnbd3_wq; @@ -86,7 +93,8 @@ struct dnbd3_server { * @tx_lock: mutex to lock when sending * @sock: the socket, 'NULL' if not connected * @panic: 'true' if it is not possible to send or receive - * @cookie: is incremented for every send, used to find the mq request in the receiver + * @cookie: is incremented for every send, used to find the mq request in the + * receiver * @keepalive_worker: worker to send a keepalive package * @receive_worker: worker to handle the incoming packages * @pending: the pending request which is going to be send @@ -118,16 +126,21 @@ struct dnbd3_sock { * @device_lock: mutex to lock when device changes * @socks: array of dnbd3_sock to connect to * @imgname: the connected image name - * @initial_server: the server which was configured with ioctl, will not be overriden + * @initial_server: the server which was configured with ioctl, will not be + * overriden * @alt_servers: array of alternative servers - * @new_servers_num: number of new alternative servers that are waiting to be copied to above array + * @new_servers_num: number of new alternative servers that are waiting to be + * copied to above array * @new_servers: pending new alternative servers * @update_available: 'true' if the rid has changes - * @use_server_provided_alts: 'true' if the alt_servers array is upated by the alternatives provided by the server + * @use_server_provided_alts: 'true' if the alt_servers array is upated by the + * alternatives provided by the server * @rid: the revision ID? TODO * @reported_size: the size of the image - * @panic_worker: worker to handle panics and to connect if all connections are down - * @discovery_worker: worker to update the alt_servers, make rtt meassurement and reconnect to better servers + * @panic_worker: worker to handle panics and to connect if all connections are + * down + * @discovery_worker: worker to update the alt_servers, make rtt meassurement + * and reconnect to better servers * @discovery_count: counter for the discovery worker * @timer: timer to start the appropriate workers * @timer_count: counter for the timer @@ -137,21 +150,18 @@ struct dnbd3_device { struct blk_mq_tag_set tag_set; struct list_head list; - // block struct gendisk *disk; - // sysfs struct kobject kobj; struct mutex device_lock; - // network struct dnbd3_sock socks[NUMBER_CONNECTIONS]; char *imgname; struct dnbd3_server initial_server; - struct dnbd3_server alt_servers[NUMBER_SERVERS]; // array of alt servers - int new_servers_num; // number of new alt servers that are waiting to be copied to above array - dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers + struct dnbd3_server alt_servers[NUMBER_SERVERS]; + int new_servers_num; + dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; bool update_available; bool use_server_provided_alts; uint16_t rid; @@ -198,12 +208,12 @@ struct dnbd3_cmd { * print fmt, adds sock and device information to log * _server(dev, server, fmt,...) * print fmt, adds device and appends server information to the log - * DEBUG - if not defined switch of all debug messages - * DEBUG_FILE - if not defined switch of file and line number information + * DEBUG - 1/0 switch all debug messages on + * DEBUG_FILE - 1/0 switch on file and line number information */ -#ifdef DEBUG_FILE +#if IS_ENABLED(DEBUG_FILE) #define __print(level, fmt,...) \ printk(level "%s:%d " fmt "\n", __FILE__, __LINE__, ## __VA_ARGS__) @@ -211,30 +221,56 @@ struct dnbd3_cmd { #else #define __print(level, fmt,...) \ - printk(fmt, ## __VA_ARGS__) + printk(level fmt "\n", ## __VA_ARGS__) #endif #define __print_sock(level, sock, fmt, ...) \ do { \ if ((sock)->server->host.type == HOST_IP4) { \ - __print(level, "dnbd%d/%d %pI4:%d: " fmt, (sock)->device->minor, (sock)->sock_nr, (sock)->server->host.addr, (sock)->server->host.port, ## __VA_ARGS__); \ + __print( \ + level, \ + "dnbd%d/%d %pI4:%d: " fmt, \ + (sock)->device->minor, \ + (sock)->sock_nr, \ + (sock)->server->host.addr, \ + (sock)->server->host.port, \ + ## __VA_ARGS__); \ } else { \ - __print(level, "dnbd%d/%d %pI6:%d: " fmt, (sock)->device->minor, (sock)->sock_nr, (sock)->server->host.addr, (sock)->server->host.port, ## __VA_ARGS__); \ + __print( \ + level, \ + "dnbd%d/%d %pI6:%d: " fmt, \ + (sock)->device->minor, \ + (sock)->sock_nr, \ + (sock)->server->host.addr, \ + (sock)->server->host.port, \ + ## __VA_ARGS__); \ } \ } while (0) #define __print_server(level, dev, server, fmt, ...) \ do { \ if ((server)->host.type == HOST_IP4) { \ - __print(level, "dnbd%d: " fmt " %pI4:%d", (dev)->minor, ## __VA_ARGS__, (server)->host.addr, (server)->host.port); \ + __print( \ + level, \ + "dnbd%d: " fmt " %pI4:%d", \ + (dev)->minor, \ + ## __VA_ARGS__, \ + (server)->host.addr, \ + (server)->host.port); \ } else { \ - __print(level, "dnbd%d: " fmt " %pI6:%d", (dev)->minor, ## __VA_ARGS__, (server)->host.addr, (server)->host.port); \ + __print( \ + level, \ + "dnbd%d: " fmt " %pI6:%d", \ + (dev)->minor, \ + ## __VA_ARGS__, \ + (server)->host.addr, \ + (server)->host.port); \ } \ } while (0) -#ifdef DEBUG +#if IS_ENABLED(DEBUG) #define debug(fmt, ...) \ __print(KERN_DEBUG, "dnbd: " fmt, ## __VA_ARGS__) diff --git a/src/kernel/net.c b/src/kernel/net.c index da9b897..dfde203 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -20,7 +20,6 @@ */ - #include #include @@ -32,102 +31,136 @@ #define DNBD3_REQ_OP_SPECIAL REQ_OP_DRV_IN #define DNBD3_REQ_OP_CONNECT REQ_OP_DRV_OUT -#define dnbd3_cmd_to_priv(req, cmd) (req)->cmd_flags = DNBD3_REQ_OP_SPECIAL | ((cmd) << REQ_FLAG_BITS) -#define dnbd3_connect(req) (req)->cmd_flags = DNBD3_REQ_OP_CONNECT | ((CMD_SELECT_IMAGE) << REQ_FLAG_BITS) -#define dnbd3_priv_to_cmd(req) ((req)->cmd_flags >> REQ_FLAG_BITS) +#define dnbd3_cmd_to_op_special(req, cmd) \ + (req)->cmd_flags = DNBD3_REQ_OP_SPECIAL | ((cmd) << REQ_FLAG_BITS) + +#define dnbd3_op_special_to_cmd(req) \ + ((req)->cmd_flags >> REQ_FLAG_BITS) + +#define dnbd3_connect_to_req(req) \ + (req)->cmd_flags = DNBD3_REQ_OP_CONNECT \ + | ((CMD_SELECT_IMAGE) << REQ_FLAG_BITS) + #define dnbd3_test_block_to_req(req) \ do { \ (req)->cmd_flags = REQ_OP_READ; \ (req)->__data_len = RTT_BLOCK_SIZE; \ (req)->__sector = 0; \ } while (0) -#define dnbd3_sock_create(af,type,proto,sock) sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, type, proto, sock) -#define REQUEST_TIMEOUT (HZ * SOCKET_TIMEOUT_CLIENT_DATA) +#define dnbd3_sock_create(af,type,proto,sock) \ + sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, \ + type, proto, sock) +#define REQUEST_TIMEOUT \ + (HZ * SOCKET_TIMEOUT_CLIENT_DATA) -#define init_msghdr(h) do { \ - h.msg_name = NULL; \ - h.msg_namelen = 0; \ - h.msg_control = NULL; \ - h.msg_controllen = 0; \ - h.msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \ + +#define dnbd3_init_msghdr(h) \ + do { \ + (h).msg_name = NULL; \ + (h).msg_namelen = 0; \ + (h).msg_control = NULL; \ + (h).msg_controllen = 0; \ + (h).msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \ } while (0) -static DECLARE_WAIT_QUEUE_HEAD(send_wq); +#define dnbd3_avg_rtt(server) \ + (( (server)->rtts[0] + (server)->rtts[1] \ + + (server)->rtts[2] + (server)->rtts[3] ) / 4 ) -static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *server); -static int __dnbd3_socket_connect(struct dnbd3_server * server, struct dnbd3_sock *sock); -static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server *server, struct dnbd3_sock *sock); +#define dnbd3_set_rtt_unreachable(server) \ + (server)->rtts[0] = (server)->rtts[1] = (server)->rtts[2] \ + = (server)->rtts[3] = RTT_UNREACHABLE; -static void dnbd3_print_server_list(struct dnbd3_device *dev) -{ - int i; - info_server(dev, &dev->initial_server, "initial server is"); - for (i = 0; i < NUMBER_SERVERS; i++) { - if (dev->alt_servers[i].host.addr[0] != 0) { - info_server(dev, &dev->alt_servers[i], "alternative server is"); - } - } -} +static int dnbd3_server_connect(struct dnbd3_device *dev, + struct dnbd3_server *server); +static int dnbd3_socket_connect(struct dnbd3_sock *sock, + struct dnbd3_server * server); +static int dnbd3_socket_disconnect(struct dnbd3_device *dev, + struct dnbd3_server *server, struct dnbd3_sock *sock); -static inline uint64_t dnbd3_to_wq_signal(int minor, uint16_t dnbd3_cmd, uint16_t sock_nr) { - return ((uint64_t) minor << 32) | ((uint32_t) dnbd3_cmd << 16) | sock_nr; -} +/* + * Methods for request and receive commands + */ +/** + * dnbd3_to_handle - convert tag and cookie to handle + * @tag: the tag to convert + * @cookie: the cookie to convert + */ static inline uint64_t dnbd3_to_handle(uint32_t tag, uint32_t cookie) { return ((uint64_t) tag << 32) | cookie; } +/** + * dnbd3_tag_from_handle - get tag from handle + * @handle: the handle + */ static inline uint32_t dnbd3_tag_from_handle(uint64_t handle) { return (uint32_t)(handle >> 32); } +/** + * dnbd3_cookie_from_handle - get cookie from handle + * @handle: the handle + */ static inline uint32_t dnbd3_cookie_from_handle(uint64_t handle) { return (uint32_t) handle; } -int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, struct dnbd3_cmd *cmd) +/** + * dnbd3_send_request - send a request + * @sock: the socket where the request is send + * @req: the request to send + * @cmd: optional - the dnbd3_cmd from mq + * + * the tx_lock of the socket must be held + */ +int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, + struct dnbd3_cmd *cmd) { - dnbd3_request_t dnbd3_request; + dnbd3_request_t request; struct msghdr msg; struct kvec iov[2]; size_t iov_num = 1; - size_t send_len; + size_t lng; int result; uint32_t tag; uint64_t handle; serialized_buffer_t payload_buffer; sock->pending = req; - init_msghdr(msg); + dnbd3_init_msghdr(msg); - dnbd3_request.magic = dnbd3_packet_magic; + request.magic = dnbd3_packet_magic; switch (req_op(req)) { case REQ_OP_READ: debug_sock(sock, "request operation read"); - dnbd3_request.cmd = CMD_GET_BLOCK; - dnbd3_request.offset = blk_rq_pos(req) << 9; // *512 - dnbd3_request.size = blk_rq_bytes(req); // bytes left to complete entire request + request.cmd = CMD_GET_BLOCK; + request.offset = blk_rq_pos(req) << 9; // * 512 + request.size = blk_rq_bytes(req); break; case DNBD3_REQ_OP_SPECIAL: debug_sock(sock, "request operation special"); - dnbd3_request.cmd = dnbd3_priv_to_cmd(req); - dnbd3_request.size = 0; + request.cmd = dnbd3_op_special_to_cmd(req); + request.size = 0; break; case DNBD3_REQ_OP_CONNECT: - debug_sock(sock, "request operation connect to %s", sock->device->imgname); - dnbd3_request.cmd = CMD_SELECT_IMAGE; + debug_sock(sock, "request operation connect to %s", + sock->device->imgname); + request.cmd = CMD_SELECT_IMAGE; serializer_reset_write(&payload_buffer); serializer_put_uint16(&payload_buffer, PROTOCOL_VERSION); serializer_put_string(&payload_buffer, sock->device->imgname); serializer_put_uint16(&payload_buffer, sock->device->rid); serializer_put_uint8(&payload_buffer, 0); // is_server = false iov[1].iov_base = &payload_buffer; - dnbd3_request.size = iov[1].iov_len = serializer_get_written_length(&payload_buffer); + request.size = serializer_get_written_length(&payload_buffer); + iov[1].iov_len = request.size; iov_num = 2; break; default: @@ -137,18 +170,18 @@ int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, struct dnbd if (cmd != NULL) { cmd->cookie = sock->cookie; tag = blk_mq_unique_tag(req); - handle = dnbd3_to_handle(tag, sock->cookie);// ((uint64_t) tag << 32) | sock->cookie; + handle = dnbd3_to_handle(tag, sock->cookie); } else { handle = sock->cookie; } - memcpy(&dnbd3_request.handle, &handle, sizeof(handle)); + memcpy(&request.handle, &handle, sizeof(handle)); - fixup_request(dnbd3_request); - iov[0].iov_base = &dnbd3_request; - iov[0].iov_len = sizeof(dnbd3_request); - send_len = iov_num == 1 ? sizeof(dnbd3_request) : iov[0].iov_len + iov[1].iov_len; - result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, send_len); - if (result != send_len) { + fixup_request(request); + iov[0].iov_base = &request; + iov[0].iov_len = sizeof(request); + lng = iov_num == 1 ? iov[0].iov_len : iov[0].iov_len + iov[1].iov_len; + result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, lng); + if (result != lng) { error_sock(sock, "connection to server lost"); sock->server->failures++; goto error; @@ -160,6 +193,11 @@ error: } +/** + * dnbd3_send_request_cmd - send a dndb3 cmd + * @sock: the socket where the request is send + * @dnbd3_cmd: the dnbd3 cmd to send + */ static int dnbd3_send_request_cmd(struct dnbd3_sock *sock, uint16_t dnbd3_cmd) { int result; @@ -173,21 +211,22 @@ static int dnbd3_send_request_cmd(struct dnbd3_sock *sock, uint16_t dnbd3_cmd) switch (dnbd3_cmd) { case CMD_KEEPALIVE: case CMD_GET_SERVERS: - dnbd3_cmd_to_priv(req, dnbd3_cmd); + dnbd3_cmd_to_op_special(req, dnbd3_cmd); break; case CMD_SELECT_IMAGE: - dnbd3_connect(req); + dnbd3_connect_to_req(req); break; case CMD_GET_BLOCK: dnbd3_test_block_to_req(req); break; default: - warn_sock(sock, "unsupported command for blocking %d", dnbd3_cmd); + warn_sock(sock, "unsupported command %d", dnbd3_cmd); result = -EINVAL; goto error; } mutex_lock(&sock->tx_lock); + sock->pending = req; result = dnbd3_send_request(sock, req, NULL); if (result <= 0) { mutex_unlock(&sock->tx_lock); @@ -202,21 +241,28 @@ error: return result; } +/** + * dnbd3_receive_cmd - receive a command + * @sock: the socket where the request is received + * @reply: an unused reply will be filled with the reply of the server + * + * this method should be called directly after the dnbd3_send_request_ method + */ static int dnbd3_receive_cmd(struct dnbd3_sock *sock, dnbd3_reply_t *reply) { int result; struct msghdr msg; struct kvec iov; - init_msghdr(msg); + dnbd3_init_msghdr(msg); iov.iov_base = reply; iov.iov_len = sizeof(dnbd3_reply_t); - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); if (result <= 0) { return result; } fixup_reply(dnbd3_reply); - // check error if (reply->magic != dnbd3_packet_magic) { error_sock(sock, "receive cmd wrong magic packet"); return -EIO; @@ -229,7 +275,18 @@ static int dnbd3_receive_cmd(struct dnbd3_sock *sock, dnbd3_reply_t *reply) return result; } -static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3_sock *sock, dnbd3_reply_t *reply) +/** + * dnbd3_receive_cmd_get_block_mq - receive a block for mq + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * this method copies the data to user space according to the request which is + * encoded in the handle by the send request method and decoded here. + */ +static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, + dnbd3_reply_t *reply) { struct dnbd3_cmd *cmd; struct msghdr msg; @@ -237,6 +294,7 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3 struct kvec iov; struct req_iterator iter; struct bio_vec bvec_inst; + struct dnbd3_device *dev = sock->device; struct bio_vec *bvec = &bvec_inst; sigset_t blocked, oldset; void *kaddr; @@ -244,7 +302,7 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3 uint16_t hwq; int result = 0; uint64_t handle; - init_msghdr(msg); + dnbd3_init_msghdr(msg); memcpy(&handle, &reply->handle, sizeof(handle)); cookie = dnbd3_cookie_from_handle(handle); @@ -252,10 +310,12 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3 hwq = blk_mq_unique_tag_to_hwq(tag); if (hwq < dev->tag_set.nr_hw_queues) { - req = blk_mq_tag_to_rq(dev->tag_set.tags[hwq], blk_mq_unique_tag_to_tag(tag)); + req = blk_mq_tag_to_rq(dev->tag_set.tags[hwq], + blk_mq_unique_tag_to_tag(tag)); } if (!req || !blk_mq_request_started(req)) { - dev_err(disk_to_dev(dev->disk), "unexpected reply (%d) %p\n", tag, req); + dev_err(disk_to_dev(dev->disk), "unexpected reply (%d) %p\n", + tag, req); return -EIO; } cmd = blk_mq_rq_to_pdu(req); @@ -263,12 +323,11 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3 mutex_lock(&cmd->lock); if (cmd->cookie != cookie) { dev_err(disk_to_dev(dev->disk), "double reply on req %p, cookie %u, handle cookie %u\n", - req, cmd->cookie, cookie); + req, cmd->cookie, cookie); mutex_unlock(&cmd->lock); return -EIO; } - rq_for_each_segment(bvec_inst, req, iter) { siginitsetinv(&blocked, sigmask(SIGKILL)); sigprocmask(SIG_SETMASK, &blocked, &oldset); @@ -276,11 +335,12 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3 kaddr = kmap(bvec->bv_page) + bvec->bv_offset; iov.iov_base = kaddr; iov.iov_len = bvec->bv_len; - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, msg.msg_flags); + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, + msg.msg_flags); if (result != bvec->bv_len) { kunmap(bvec->bv_page); sigprocmask(SIG_SETMASK, &oldset, NULL ); - error_sock(sock, "could not receive form net to block layer"); + error_sock(sock, "could not receive from net to block layer"); mutex_unlock(&cmd->lock); return result; } @@ -293,7 +353,18 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3 return result; } -static int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock, dnbd3_reply_t *reply) + +/** + * dnbd3_receive_cmd_get_block_test - receive a test block + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * the received data is just thrown away + */ +static int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock, + dnbd3_reply_t *reply) { struct msghdr msg; struct kvec iov; @@ -304,10 +375,11 @@ static int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock, dnbd3_reply goto error; } - init_msghdr(msg); + dnbd3_init_msghdr(msg); iov.iov_base = buf; iov.iov_len = reply->size; - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, reply->size, msg.msg_flags); + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, reply->size, + msg.msg_flags); if (result != RTT_BLOCK_SIZE) { error_sock(sock, "receive test block failed"); goto error; @@ -320,14 +392,25 @@ error: return result; } -static int dnbd3_receive_cmd_get_servers(struct dnbd3_device *dev, struct dnbd3_sock *sock, dnbd3_reply_t *reply) +/** + * dnbd3_receive_cmd_get_servers - receive new servers + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * the new servers are copied to dnbd3_device.new_servers and + * dnbd3_device.new_server_num is set accordingly + */ +static int dnbd3_receive_cmd_get_servers(struct dnbd3_sock *sock, + dnbd3_reply_t *reply) { struct msghdr msg; struct kvec iov; - /* return true if did not receive servers, not an error*/ + struct dnbd3_device *dev = sock->device; int result = 1; int count, remaining; - init_msghdr(msg); + dnbd3_init_msghdr(msg); debug_sock(sock, "get servers received"); mutex_lock(&dev->device_lock); @@ -341,9 +424,11 @@ static int dnbd3_receive_cmd_get_servers(struct dnbd3_device *dev, struct dnbd3_ if (count != 0) { iov.iov_base = dev->new_servers; iov.iov_len = count * sizeof(dnbd3_server_entry_t); - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, (count * sizeof(dnbd3_server_entry_t)), msg.msg_flags); + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); if (result <= 0) { - error_sock(sock, "failed to receive get servers %d", result); + error_sock(sock, "failed to receive get servers %d", + result); return result; } else if (result != (count * sizeof(dnbd3_server_entry_t))) { error_sock(sock, "failed to get servers"); @@ -352,14 +437,19 @@ static int dnbd3_receive_cmd_get_servers(struct dnbd3_device *dev, struct dnbd3_ } dev->new_servers_num = count; } - // If there were more servers than accepted, remove the remaining data from the socket buffer + /* + * if there were more servers than accepted, remove the remaining data + * from the socket buffer + * abuse the reply struct as the receive buffer + */ remaining = reply->size - (count * sizeof(dnbd3_server_entry_t)); consume_payload: while (remaining > 0) { - count = MIN(sizeof(dnbd3_reply_t), remaining); // Abuse the reply struct as the receive buffer + count = MIN(sizeof(dnbd3_reply_t), remaining); iov.iov_base = reply; iov.iov_len = count; - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, count, msg.msg_flags); + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, count, + msg.msg_flags); if (result <= 0) { error_sock(sock, "failed to receive payload from get servers"); mutex_unlock(&dev->device_lock); @@ -370,13 +460,24 @@ consume_payload: return result; } -static int dnbd3_receive_cmd_latest_rid(struct dnbd3_device *dev, struct dnbd3_sock *sock, dnbd3_reply_t *reply) +/** + * dnbd3_receive_cmd_latest_rid - receive latest rid + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * dnbd3_device.update_available is set if a new RID is received + */ +static int dnbd3_receive_cmd_latest_rid(struct dnbd3_sock *sock, + dnbd3_reply_t *reply) { struct kvec iov; uint16_t rid; int result; struct msghdr msg; - init_msghdr(msg); + struct dnbd3_device *dev = sock->device; + dnbd3_init_msghdr(msg); debug_sock(sock, "latest rid received"); if (reply->size != 2) { @@ -385,18 +486,32 @@ static int dnbd3_receive_cmd_latest_rid(struct dnbd3_device *dev, struct dnbd3_s } iov.iov_base = &rid; iov.iov_len = sizeof(rid); - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); if (result <= 0) { error_sock(sock, "failed to receive latest rid"); return result; } rid = net_order_16(rid); - debug_sock(sock, "latest rid of %s is %d (currently using %d)", dev->imgname, (int)rid, (int)dev->rid); + debug_sock(sock, "latest rid of %s is %d (currently using %d)", + dev->imgname, (int)rid, (int)dev->rid); dev->update_available = (rid > dev->rid ? true : false); return result; } -static int dnbd3_receive_cmd_select_image(struct dnbd3_device *dev, struct dnbd3_sock *sock, dnbd3_reply_t *reply) + +/** + * dnbd3_receive_cmd_latest_rid - select the image + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * if this is the first connection the image name, file size and rid will be set + * if this is a further connection image name, file size and rid will be checked + */ +static int dnbd3_receive_cmd_select_image(struct dnbd3_sock *sock, + dnbd3_reply_t *reply) { struct kvec iov; uint16_t rid; @@ -405,12 +520,13 @@ static int dnbd3_receive_cmd_select_image(struct dnbd3_device *dev, struct dnbd3 struct msghdr msg; serialized_buffer_t payload_buffer; uint64_t reported_size; - init_msghdr(msg); + struct dnbd3_device *dev = sock->device; + dnbd3_init_msghdr(msg); debug_sock(sock, "select image received"); - // receive reply payload iov.iov_base = &payload_buffer; iov.iov_len = reply->size; - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); if (result <= 0) { error_sock(sock, "failed to receive select image %d", result); return result; @@ -420,7 +536,7 @@ static int dnbd3_receive_cmd_select_image(struct dnbd3_device *dev, struct dnbd3 return -EIO; } - // handle/check reply payload + /* handle/check reply payload */ serializer_reset_read(&payload_buffer, reply->size); sock->server->protocol_version = serializer_get_uint16(&payload_buffer); if (sock->server->protocol_version < MIN_SUPPORTED_SERVER) { @@ -433,7 +549,8 @@ static int dnbd3_receive_cmd_select_image(struct dnbd3_device *dev, struct dnbd3 name = serializer_get_string(&payload_buffer); rid = serializer_get_uint16(&payload_buffer); if (dev->rid != rid && strcmp(name, dev->imgname) != 0) { - error_sock(sock, "server offers image '%s', requested '%s'", name, dev->imgname); + error_sock(sock, "server offers image '%s', requested '%s'", + name, dev->imgname); return -EIO; } @@ -444,24 +561,71 @@ static int dnbd3_receive_cmd_select_image(struct dnbd3_device *dev, struct dnbd3 return -EIO; } dev->reported_size = reported_size; - set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte blocks */ + set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte */ } else if (dev->reported_size != reported_size) { - error_sock(sock, "reported size by server is %llu but should be %llu", reported_size, dev->reported_size); + error_sock(sock, "reported size by server is %llu but should be %llu", + reported_size, dev->reported_size); return -EIO; } return result; } + + +/* + * Timer and workers + */ + +/** + * dnbd3_timer - the timer to start different workers + * @arg: the timer_list used to get the dnbd3_device + * + * workers to start: + * - panic_worker + * - keepalive_worker for each connected socket + * - discovery_worker + */ +static void dnbd3_timer(struct timer_list *arg) +{ + struct dnbd3_device *dev; + int i; + dev = container_of(arg, struct dnbd3_device, timer); + queue_work(dnbd3_wq, &dev->panic_worker); + + if (dev->timer_count % TIMER_INTERVAL_KEEPALIVE_PACKET == 0) { + for (i = 0; i < NUMBER_CONNECTIONS; i++) { + if (dnbd3_is_sock_alive(dev->socks[i])) { + queue_work(dnbd3_wq, &dev->socks[i].keepalive_worker); + } + } + } + /* start after 4 seconds */ + if (dev->timer_count % TIMER_INTERVAL_PROBE_NORMAL == 4) { + queue_work(dnbd3_wq, &dev->discovery_worker); + } + + + dev->timer_count++; + dev->timer.expires = jiffies + HZ; + add_timer(&dev->timer); +} + +/** + * dnbd3_receive_worker - receives data from a socket + * @work: the work used to get the dndb3_sock + * + * receives data until the socket is closed (returns 0) + */ static void dnbd3_receive_worker(struct work_struct *work) { - struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, receive_worker); - struct dnbd3_device *dev = sock->device; - dnbd3_reply_t dnbd3_reply; + struct dnbd3_sock *sock; + dnbd3_reply_t reply; int result; + sock = container_of(work, struct dnbd3_sock, receive_worker); debug_sock(sock, "receive worker is starting"); while(1) { // loop until socket returns 0 - result = dnbd3_receive_cmd(sock, &dnbd3_reply); + result = dnbd3_receive_cmd(sock, &reply); if (result == -EAGAIN) { continue; } else if (result <= 0) { @@ -470,39 +634,43 @@ static void dnbd3_receive_worker(struct work_struct *work) } - switch (dnbd3_reply.cmd) { + switch (reply.cmd) { case CMD_GET_BLOCK: - result = dnbd3_receive_cmd_get_block_mq(dev, sock, &dnbd3_reply); + result = dnbd3_receive_cmd_get_block_mq(sock, &reply); if (result <= 0) { - error_sock(sock, "receive cmd get block mq failed %d", result); + error_sock(sock, "receive cmd get block mq failed %d", + result); goto error; } - continue; // we do not need to wake up anyone, wait for next cmd (block) + continue; case CMD_GET_SERVERS: - result = dnbd3_receive_cmd_get_servers(dev, sock, &dnbd3_reply); + result = dnbd3_receive_cmd_get_servers(sock, &reply); if (result <= 0) { - error_sock(sock, "receive cmd get servers failed %d", result); + error_sock(sock, "receive cmd get servers failed %d", + result); goto error; } break; case CMD_LATEST_RID: - result = dnbd3_receive_cmd_latest_rid(dev, sock, &dnbd3_reply); + result = dnbd3_receive_cmd_latest_rid(sock, &reply); if (result <= 0) { - error_sock(sock, "receive cmd latest rid failed %d", result); + error_sock(sock, "receive cmd latest rid failed %d", + result); goto error; } break; case CMD_KEEPALIVE: - if (dnbd3_reply.size != 0) { + if (reply.size != 0) { error_sock(sock, "got keep alive packet with payload"); goto error; } debug_sock(sock, "keep alive received"); break; case CMD_SELECT_IMAGE: - result = dnbd3_receive_cmd_select_image(dev, sock, &dnbd3_reply); + result = dnbd3_receive_cmd_select_image(sock, &reply); if (result <= 0) { - error_sock(sock, "receive cmd select image failed %d", result); + error_sock(sock, "receive cmd select image failed %d", + result); goto error; } break; @@ -514,10 +682,12 @@ error: if (result == 0) { info_sock(sock, "result is 0, socket seems to be down"); sock->panic = true; - break; //the socket seems to be down + break; } else if (result < 0) { - sock->server->failures++; // discovery takes care of to many failures - warn_sock(sock, "receive error happened %d, total failures %d", result, sock->server->failures); + /* discovery takes care of to many failures */ + sock->server->failures++; + warn_sock(sock, "receive error happened %d, total failures %d", + result, sock->server->failures); } debug_sock(sock, "receive completed, waiting for next receive"); } @@ -526,49 +696,37 @@ error: } -static void dnbd3_timer(struct timer_list *arg) -{ - struct dnbd3_device *dev = container_of(arg, struct dnbd3_device, timer); - int i; - - queue_work(dnbd3_wq, &dev->panic_worker); - - if (dev->timer_count % TIMER_INTERVAL_KEEPALIVE_PACKET == 0) { - for (i = 0; i < NUMBER_CONNECTIONS; i++) { - if (dnbd3_is_sock_alive(dev->socks[i])) { - queue_work(dnbd3_wq, &dev->socks[i].keepalive_worker); - } - } - } - if (dev->timer_count % TIMER_INTERVAL_PROBE_NORMAL == 4) { // wait for 4 seconds - queue_work(dnbd3_wq, &dev->discovery_worker); - } - - - dev->timer_count++; - dev->timer.expires = jiffies + HZ; - add_timer(&dev->timer); -} - - +/** + * dnbd3_receive_worker - sends a keepalive + * @work: the work used to get the dndb3_sock + */ static void dnbd3_keepalive_worker(struct work_struct *work) { - struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, keepalive_worker); + struct dnbd3_sock *sock; + sock = container_of(work, struct dnbd3_sock, keepalive_worker); debug_sock(sock, "starting keepalive worker"); dnbd3_send_request_cmd(sock, CMD_KEEPALIVE); } -static struct dnbd3_server *dnbd3_find_best_alt_server(struct dnbd3_device *dev) { +/** + * dnbd3_find_best_alt_server - find best alternative server + * @dev: the device where to search for alternative servers + * + * searches for an alternative server which has an rtt better than RTT_THRESOLD + * of the best connected server + */ +static struct dnbd3_server *dnbd3_find_best_alt_server(struct dnbd3_device *dev) +{ int i, j; uint64_t rtt = 0; uint64_t best_rtt = RTT_UNREACHABLE; uint64_t current_best_rtt = RTT_UNREACHABLE; - struct dnbd3_server *best_alt_server = NULL; - struct dnbd3_server *better_alt_server = NULL; + struct dnbd3_server *best_server = NULL; + struct dnbd3_server *server = NULL; for (i = 0; i < NUMBER_CONNECTIONS; i++) { if (dnbd3_is_sock_alive(dev->socks[i])) { - rtt = (dev->socks[i].server->rtts[0] + dev->socks[i].server->rtts[1] + dev->socks[i].server->rtts[2] + dev->socks[i].server->rtts[3]) / 4; + rtt = dnbd3_avg_rtt(dev->socks[i].server); if (rtt <= current_best_rtt) { current_best_rtt = rtt; } @@ -576,37 +734,48 @@ static struct dnbd3_server *dnbd3_find_best_alt_server(struct dnbd3_device *dev) } best_rtt = RTT_THRESOULD_LIMIT(current_best_rtt); - debug_dev(dev, "best connected rtt is %llu, searching for rtt better than %llu", current_best_rtt, best_rtt); + debug_dev(dev, "best connected rtt is %llu, searching for rtt better than %llu", + current_best_rtt, best_rtt); for (i = 0; i < NUMBER_SERVERS; i++) { if (dev->alt_servers[i].host.type != 0) { - rtt = (dev->alt_servers[i].rtts[0] + dev->alt_servers[i].rtts[1] + dev->alt_servers[i].rtts[2] + dev->alt_servers[i].rtts[3]) / 4; + rtt = dnbd3_avg_rtt(&dev->alt_servers[i]); if (rtt <= best_rtt) { - better_alt_server = &dev->alt_servers[i]; - for (j = 0; j < NUMBER_CONNECTIONS; j++) { // check if already connected - if (better_alt_server == dev->socks[j].server) { - better_alt_server = NULL; // found already connected server + server = &dev->alt_servers[i]; + /* check if already connected */ + for (j = 0; j < NUMBER_CONNECTIONS; j++) { + if (server == dev->socks[j].server) { + server = NULL; break; } } - if (better_alt_server) { - best_alt_server = better_alt_server; + if (server) { + best_server = server; best_rtt = rtt; } } } } - if (best_alt_server) { - info_server(dev, best_alt_server, "found best alt server with rtt %llu", best_rtt); + if (best_server) { + debug_server(dev, best_server, "found best alt server with rtt %llu", + best_rtt); } else { debug_dev(dev, "did not find any alternative server"); } - return best_alt_server; + return best_server; } -static bool dnbd3_better_rtt(struct dnbd3_server *new_server, struct dnbd3_server *existing_server) { - uint64_t new_rtt = (new_server->rtts[0] + new_server->rtts[1] + new_server->rtts[2] + new_server->rtts[3]) / 4; - uint64_t existing_rtt = (existing_server->rtts[0] + existing_server->rtts[1] + existing_server->rtts[2] + existing_server->rtts[3]) / 4; +/** + * dnbd3_better_rtt - checks if the rtt is better + * @new_server: the server to check + * @existing_server: current server + * + * checks if the rtt is better than RTT_THRESHOLD_FACTOR + */ +static bool dnbd3_better_rtt(struct dnbd3_server *new_server, + struct dnbd3_server *existing_server) { + uint64_t new_rtt = dnbd3_avg_rtt(new_server); + uint64_t existing_rtt = dnbd3_avg_rtt(existing_server); if (new_rtt < RTT_THRESHOLD_FACTOR(existing_rtt)) { return true; @@ -614,6 +783,14 @@ static bool dnbd3_better_rtt(struct dnbd3_server *new_server, struct dnbd3_serve return false; } +/** + * dnbd3_adjust_connections - adjust the connections of the device + * @dev: the device + * + * 1. connect empty sockets if best alternative server is found + * 2. replace slow socket with better server if available + * 3. remove socket if one is slow + */ static void dnbd3_adjust_connections(struct dnbd3_device *dev) { int i; int sock_alive = 0; @@ -621,13 +798,13 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) { uint64_t best_rtt = RTT_UNREACHABLE; struct dnbd3_server *server, *existing_server; - // connect empty sockets + /* connect empty sockets */ sock_alive = 0; for (i = 0; i < NUMBER_CONNECTIONS; i++) { if (!dnbd3_is_sock_alive(dev->socks[i])) { server = dnbd3_find_best_alt_server(dev); if (server) { - if (dnbd3_socket_connect(dev, server) == 0) { + if (dnbd3_server_connect(dev, server) == 0) { sock_alive++; } else { warn_server(dev, server, "failed to connect"); @@ -638,7 +815,7 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) { } } - // replace socket with better server + /* replace socket with better server */ if (sock_alive == NUMBER_CONNECTIONS) { for (i = 0; i < NUMBER_CONNECTIONS; i++) { if (dnbd3_is_sock_alive(dev->socks[i])) { @@ -647,20 +824,21 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) { if (server && dnbd3_better_rtt(server, dev->socks[i].server)) { dnbd3_socket_disconnect(dev, NULL, &dev->socks[i]); - if (dnbd3_socket_connect(dev, server) != 0) { + /* if it fails reconnect to existing */ + if (dnbd3_server_connect(dev, server) != 0) { warn_server(dev, server, "failed to connect"); - dnbd3_socket_connect(dev, existing_server); + dnbd3_server_connect(dev, existing_server); } } } } } - // remove a socket if it is much slower than the others + /* remove a socket if it is much slower than the others */ if (sock_alive > 1) { for (i = 0; i < NUMBER_CONNECTIONS; i++) { if (dnbd3_is_sock_alive(dev->socks[i])) { - rtt = (dev->socks[i].server->rtts[0] + dev->socks[i].server->rtts[1] + dev->socks[i].server->rtts[2] + dev->socks[i].server->rtts[3]) / 4; + rtt = dnbd3_avg_rtt(dev->socks[i].server); if (rtt <= best_rtt) { best_rtt = rtt; } @@ -668,7 +846,7 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) { } for (i = 0; i < NUMBER_CONNECTIONS; i++) { if (dnbd3_is_sock_alive(dev->socks[i])) { - rtt = (dev->socks[i].server->rtts[0] + dev->socks[i].server->rtts[1] + dev->socks[i].server->rtts[2] + dev->socks[i].server->rtts[3]) / 4; + rtt = dnbd3_avg_rtt(dev->socks[i].server); if (rtt > RTT_THRESOULD_LIMIT(best_rtt)) { info_sock(&dev->socks[i], "removing connection with rtt %llu", rtt); dnbd3_socket_disconnect(dev, NULL, &dev->socks[i]); @@ -678,16 +856,21 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) { } } - debug_dev(dev, "connected to %d/%d sockets", sock_alive, NUMBER_CONNECTIONS); + info_dev(dev, "connected to %d/%d sockets", sock_alive, NUMBER_CONNECTIONS); } +/** + * dnbd3_panic_connect - connect to the first available server + * @dev: the device + */ static int dnbd3_panic_connect(struct dnbd3_device *dev) { int result, i; - result = dnbd3_socket_connect(dev, &dev->initial_server); + result = dnbd3_server_connect(dev, &dev->initial_server); if (result) { for (i = 0; i < NUMBER_SERVERS; i++) { if (dev->alt_servers[i].host.type != 0) { - result = dnbd3_socket_connect(dev, &dev->alt_servers[i]); + result = dnbd3_server_connect(dev, + &dev->alt_servers[i]); if (!result) { info_server(dev, &dev->alt_servers[i], "found server to connect to"); break; @@ -698,13 +881,22 @@ static int dnbd3_panic_connect(struct dnbd3_device *dev) { return result; } +/** + * dnbd3_panic_worker - handle panicked sockets + * @work: the work used to get the dndb3_device + * + * 1. disconnect panicked socket + * 2. reconnect to good alternative + * 3. if no socket is connected do a panic_connect + */ static void dnbd3_panic_worker(struct work_struct *work) { - struct dnbd3_device *dev = container_of(work, struct dnbd3_device, panic_worker); + struct dnbd3_device *dev; struct dnbd3_sock *panicked_sock = NULL; struct dnbd3_server *new_server, *panicked_server; int i; int sock_alive = 0; + dev = container_of(work, struct dnbd3_device, panic_worker); for (i = 0; i < NUMBER_CONNECTIONS; i++) { if (dev->socks[i].panic) { panicked_sock = &dev->socks[i]; @@ -712,8 +904,10 @@ static void dnbd3_panic_worker(struct work_struct *work) sock_alive++; } } + if (panicked_sock) { - warn_sock(panicked_sock, "panicked, connections still alive %d", sock_alive); + warn_sock(panicked_sock, "panicked, connections still alive %d", + sock_alive); panicked_server = panicked_sock->server; new_server = dnbd3_find_best_alt_server(dev); @@ -721,19 +915,12 @@ static void dnbd3_panic_worker(struct work_struct *work) if (new_server != NULL && new_server != panicked_server) { info_server(dev, new_server, "found replacement"); - if (!dnbd3_socket_connect(dev, new_server)) { + if (!dnbd3_server_connect(dev, new_server)) { sock_alive++; } } else if (sock_alive > 0) { - info_dev(dev, "found no replacement server but still connected to %d servers", sock_alive); - } - } else if (sock_alive == 0) { - new_server = dnbd3_find_best_alt_server(dev); - if (new_server != NULL) { - info_server(dev, new_server, "reconnect to server"); - if (!dnbd3_socket_connect(dev, new_server)) { - sock_alive++; - } + info_dev(dev, "found no replacement server but still connected to %d servers", + sock_alive); } } @@ -748,7 +935,13 @@ static void dnbd3_panic_worker(struct work_struct *work) } } -static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *server) +/** + * dnbd3_meassure_rtt - meassure the rtt of a server + * @dev: the device this server belongs to + * @server: the server to meassure + */ +static int dnbd3_meassure_rtt(struct dnbd3_device *dev, + struct dnbd3_server *server) { struct timeval start, end; dnbd3_reply_t reply; @@ -762,12 +955,12 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *ser .server = server }; - result = __dnbd3_socket_connect(server, &sock); + result = dnbd3_socket_connect(&sock, server); if (result) { error_sock(&sock, "socket connect failed in rtt measurement"); goto error; } - dnbd3_connect(&req); + dnbd3_connect_to_req(&req); result = dnbd3_send_request_cmd(&sock, CMD_SELECT_IMAGE); if (result <= 0) { error_sock(&sock, "request select image failed in rtt measurement"); @@ -780,12 +973,14 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *ser goto error; } - if (reply.magic != dnbd3_packet_magic || reply.cmd != CMD_SELECT_IMAGE || reply.size < 4) { + if (reply.magic != dnbd3_packet_magic || reply.cmd != CMD_SELECT_IMAGE + || reply.size < 4) { error_sock(&sock, "receive select image wrong header in rtt measurement"); result = -EIO; goto error; } - result = dnbd3_receive_cmd_select_image(dev, &sock, &reply); + + result = dnbd3_receive_cmd_select_image(&sock, &reply); if (result <= 0) { error_sock(&sock, "receive data select image failed in rtt measurement"); goto error; @@ -799,7 +994,8 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *ser goto error; } result = dnbd3_receive_cmd(&sock, &reply); - if (reply.magic != dnbd3_packet_magic|| reply.cmd != CMD_GET_BLOCK || reply.size != RTT_BLOCK_SIZE) { + if (reply.magic != dnbd3_packet_magic|| reply.cmd != CMD_GET_BLOCK + || reply.size != RTT_BLOCK_SIZE) { error_sock(&sock, "receive header cmd test block failed in rtt measurement"); result = -EIO; goto error; @@ -811,9 +1007,10 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *ser } do_gettimeofday(&end); // end rtt measurement - rtt = (uint64_t)((end.tv_sec - start.tv_sec) * 1000000ull + (end.tv_usec - start.tv_usec)); + rtt = (uint64_t)((end.tv_sec - start.tv_sec) * 1000000ull + + (end.tv_usec - start.tv_usec)); - info_sock(&sock, "new rrt is %llu", rtt); + debug_sock(&sock, "new rrt is %llu", rtt); error: sock.server->rtts[dev->discovery_count % 4] = rtt; @@ -830,63 +1027,82 @@ error: } -static void dnbd3_discovery_worker(struct work_struct *work) +static void dnbd3_merge_new_server(struct dnbd3_device *dev, + dnbd3_server_entry_t *new_server) { - struct dnbd3_device *dev = container_of(work, struct dnbd3_device, discovery_worker); - int i, j; + int i; struct dnbd3_server *existing_server, *free_server, *failed_server; - dnbd3_server_entry_t *new_server; + existing_server = NULL; + free_server = NULL; + failed_server = NULL; + + /* find servers in alternative servers */ + for (i = 0; i < NUMBER_SERVERS; i++) { + if ((new_server->host.type == dev->alt_servers[i].host.type) + && (new_server->host.port == dev->alt_servers[i].host.port) + && (0 == memcmp(new_server->host.addr, + dev->alt_servers[i].host.addr, + (new_server->host.type == HOST_IP4 ? 4 : 16) + ))) { + + existing_server = &dev->alt_servers[i]; + } else if (dev->alt_servers[i].host.type == 0) { + free_server = &dev->alt_servers[i]; + } else if (dev->alt_servers[i].failures > 20) { + failed_server = &dev->alt_servers[i]; + } + } + + if (existing_server) { + if (new_server->failures == 1) { /* remove is requested */ + info_server(dev, new_server, + "remove server is requested"); + dnbd3_socket_disconnect(dev, existing_server, NULL); + existing_server->host.type = 0; + } +// existing_server->failures = 0; // reset failure count + return; + } else if (free_server) { + free_server->host = new_server->host; + } else if (failed_server) { + failed_server->host = new_server->host; + free_server = failed_server; + } else { + /* no server found to replace */ + return; + } + info_server(dev, free_server, "got new alternative server"); + free_server->failures = 0; + free_server->protocol_version = 0; + dnbd3_set_rtt_unreachable(free_server); +} +/** + * dnbd3_discovery_worker - handle discovery + * @work: the work used to get the dndb3_device + * + * 1. check if new servers are available and set them to alternative servers + * 2. meassure the rtt for all available servers + * 3. adjust the connections + */ +static void dnbd3_discovery_worker(struct work_struct *work) +{ + struct dnbd3_device *dev; + int i; + struct dnbd3_server *server; + dnbd3_server_entry_t *new_server; + dev = container_of(work, struct dnbd3_device, discovery_worker); - debug_dev(dev, "starting discovery worker new server num is %d", dev->new_servers_num); + debug_dev(dev, "starting discovery worker new server num is %d", + dev->new_servers_num); if (dev->new_servers_num) { mutex_lock(&dev->device_lock); - for (i = 0; i < dev->new_servers_num; i++) { new_server = &dev->new_servers[i]; - if (new_server->host.type == HOST_IP4 || new_server->host.type == HOST_IP6) { - existing_server = NULL; - free_server = NULL; - failed_server = NULL; - - // find servers in alt servers - for (j = 0; j < NUMBER_SERVERS; j++) { - if ((new_server->host.type == dev->alt_servers[j].host.type) - && (new_server->host.port == dev->alt_servers[j].host.port) - && (0 == memcmp(new_server->host.addr, dev->alt_servers[j].host.addr, - (new_server->host.type == HOST_IP4 ? 4 : 16)))) { - - existing_server = &dev->alt_servers[j]; - } else if (dev->alt_servers[j].host.type == 0) { - free_server = &dev->alt_servers[j]; - } else if (dev->alt_servers[j].failures > 20) { - failed_server = &dev->alt_servers[j]; - } - } - - if (existing_server) { - if (new_server->failures == 1) { // remove is requested - info_server(dev, new_server, "remove server is requested"); - dnbd3_socket_disconnect(dev, existing_server, NULL); // TODO what to do when only one connection? - existing_server->host.type = 0; - } -// existing_server->failures = 0; // reset failure count - continue; - } else if (free_server) { - free_server->host = new_server->host; - } else if (failed_server) { - failed_server->host = new_server->host; - free_server = failed_server; - } else { - //no server found to replace - continue; - } - info_server(dev, free_server, "got new alt server"); - free_server->failures = 0; - free_server->protocol_version = 0; - free_server->rtts[0] = free_server->rtts[1] = free_server->rtts[2] = free_server->rtts[3] = RTT_UNREACHABLE; + if (new_server->host.type != 0) { + dnbd3_merge_new_server(dev, new_server); } } dev->new_servers_num = 0; @@ -894,11 +1110,12 @@ static void dnbd3_discovery_worker(struct work_struct *work) } // measure rtt for all alt servers for (i = 0; i < NUMBER_SERVERS; i++) { - existing_server = &dev->alt_servers[i]; - if (existing_server->host.type) { - if (dnbd3_meassure_rtt(dev, existing_server) <= 0) { - existing_server->failures++; - warn_server(dev, existing_server, "failed to meassure rtt"); + server = &dev->alt_servers[i]; + if (server->host.type) { + if (dnbd3_meassure_rtt(dev, server) <= 0) { + server->failures++; + warn_server(dev, server, + "failed to meassure rtt"); } } } @@ -909,7 +1126,18 @@ static void dnbd3_discovery_worker(struct work_struct *work) } -static int __dnbd3_socket_connect(struct dnbd3_server *server, struct dnbd3_sock *sock) +/* + * Connect and disconnect + */ + + +/** + * dnbd3_socket_connect - connect a socket to a server + * @sock: the socket to connect + * @server: the server + */ +static int dnbd3_socket_connect(struct dnbd3_sock *sock, + struct dnbd3_server *server) { int result = 0; struct timeval timeout; @@ -925,14 +1153,17 @@ static int __dnbd3_socket_connect(struct dnbd3_server *server, struct dnbd3_sock timeout.tv_sec = SOCKET_TIMEOUT_CLIENT_DATA; timeout.tv_usec = 0; - - if ((result = dnbd3_sock_create(server->host.type, SOCK_STREAM, IPPROTO_TCP, &sock->sock)) < 0) { + result = dnbd3_sock_create(server->host.type, SOCK_STREAM, IPPROTO_TCP, + &sock->sock); + if (result < 0) { error_sock(sock, "could not create socket"); goto error; } - kernel_setsockopt(sock->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout)); - kernel_setsockopt(sock->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout)); + kernel_setsockopt(sock->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, + sizeof(timeout)); + kernel_setsockopt(sock->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, + sizeof(timeout)); sock->sock->sk->sk_allocation = GFP_NOIO; if (server->host.type == HOST_IP4) { struct sockaddr_in sin; @@ -940,7 +1171,9 @@ static int __dnbd3_socket_connect(struct dnbd3_server *server, struct dnbd3_sock sin.sin_family = AF_INET; memcpy(&(sin.sin_addr), server->host.addr, 4); sin.sin_port = server->host.port; - if ((result = kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0)) != 0) { + result = kernel_connect(sock->sock, (struct sockaddr *)&sin, + sizeof(sin), 0); + if (result != 0) { error_sock(sock, "connection to host failed"); goto error; } @@ -950,7 +1183,9 @@ static int __dnbd3_socket_connect(struct dnbd3_server *server, struct dnbd3_sock sin.sin6_family = AF_INET6; memcpy(&(sin.sin6_addr), server->host.addr, 16); sin.sin6_port = server->host.port; - if ((result = kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0)) != 0){ + result = kernel_connect(sock->sock, (struct sockaddr *)&sin, + sizeof(sin), 0); + if (result != 0){ error_sock(sock, "connection to host failed"); goto error; } @@ -966,9 +1201,19 @@ error: } /** - * connect a dnbd3 device to a server + * dnbd3_server_connect - connect a server to a device + * @dev: the device + * @server: the server to connect + * + * 1. connects the server to a free socket if available + * 2. select the image + * 3. start receiver_worker and keepalive_worker + * 4. if it is the first connection start timer, panic_worker and + * keepalive_worker + * 5. update the mq queues to the number of sockets alive */ -static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *server) +static int dnbd3_server_connect(struct dnbd3_device *dev, + struct dnbd3_server *server) { int i; int sock_alive = 0; @@ -993,7 +1238,12 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s mutex_init(&sock->tx_lock); mutex_lock(&sock->tx_lock); - result = __dnbd3_socket_connect(server, sock); + result = dnbd3_socket_connect(sock, server); + if (result) { + error_sock(sock, "connection to socket failed"); + result = -EIO; + goto error; + } mutex_unlock(&sock->tx_lock); sock->panic = false; @@ -1014,22 +1264,27 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s } result = dnbd3_receive_cmd(sock, &reply); if (result <= 0) { - error_sock(sock, "receive cmd to image %s failed", dev->imgname); + error_sock(sock, "receive cmd to image %s failed", + dev->imgname); result = -EIO; goto error; } - if (reply.magic != dnbd3_packet_magic || reply.cmd != CMD_SELECT_IMAGE || reply.size < 4) { - error_sock(sock, "receive select image wrong header %s", dev->imgname); + if (reply.magic != dnbd3_packet_magic || reply.cmd != CMD_SELECT_IMAGE + || reply.size < 4) { + error_sock(sock, "receive select image wrong header %s", + dev->imgname); result = -EIO; goto error; } - result = dnbd3_receive_cmd_select_image(dev, sock, &reply); + result = dnbd3_receive_cmd_select_image(sock, &reply); if (result <= 0) { - error_sock(sock, "receive cmd select image %s failed", dev->imgname); + error_sock(sock, "receive cmd select image %s failed", + dev->imgname); result = -EIO; goto error; } - debug_sock(sock, "connected to image %s, filesize %llu", dev->imgname, dev->reported_size); + debug_sock(sock, "connected to image %s, filesize %llu", dev->imgname, + dev->reported_size); // start the receiver INIT_WORK(&sock->receive_worker, dnbd3_receive_worker); @@ -1042,7 +1297,8 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s sock_alive++; } } - if (sock_alive == 1) { // first socket to connect, start timer and workers + /* if first socket to connect, start timer and workers */ + if (sock_alive == 1) { debug_sock(sock, "first connection to server, starting workers"); INIT_WORK(&dev->discovery_worker, dnbd3_discovery_worker); INIT_WORK(&dev->panic_worker, dnbd3_panic_worker); @@ -1052,7 +1308,7 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s } blk_mq_update_nr_hw_queues(&dev->tag_set, sock_alive); - // request alternative servers receiver will handle this + /* request alternative servers receiver will handle this */ if (dnbd3_send_request_cmd(sock, CMD_GET_SERVERS) <= 0) { error_sock(sock, "failed to get servers in discovery"); } @@ -1071,7 +1327,18 @@ error: } -static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server *server, struct dnbd3_sock *sock) +/** + * dnbd3_socket_disconnect - disconnect a socket or server + * @dev: the device + * @server: optional the server to disconnect + * @sock: optional the socket to disconnect + * + * 1. update nr of mq queues + * 2. if last socket remove timer + * 3. disconnect socket + */ +static int dnbd3_socket_disconnect(struct dnbd3_device *dev, + struct dnbd3_server *server, struct dnbd3_sock *sock) { int i; int sock_alive = 0; @@ -1091,10 +1358,10 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server if (sock_alive <= 1) { info_sock(sock, "shutting down last socket and stopping timer"); del_timer_sync(&dev->timer); -// dev->timer_count = 0; -// dev->discovery_count = 0; -// cancel_work_sync(&dev->discovery_worker); // do not wait -// cancel_work_sync(&dev->panic_worker); // do not wait for panic_worker, probably we are called from panic_worker + /* + * do not wait for discovery and panic worker as they may have + * called this method + */ } cancel_work_sync(&sock->keepalive_worker); @@ -1105,9 +1372,11 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server /* * Important sequence to shut down socket * 1. kernel_sock_shutdown - * socket shutdown, receiver which hangs in kernel_recvmsg returns 0 + * socket shutdown, receiver which block ins socket receive + * returns 0 * 2. cancel_work_sync(receiver) - * wait for the receiver to finish, so the socket is not used anymore + * wait for the receiver to finish, so the socket is not used + * anymore * 3. sock_release * release the socket and set to NULL */ @@ -1128,18 +1397,50 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server return 0; } +/** + * dnbd3_net_connect - connect device + * @dev: the device to connect + * + * dnbd3_device.alt_servers[0] must set + */ +int dnbd3_net_connect(struct dnbd3_device *dev) +{ + int result; + debug_dev(dev, "connecting to server"); + + if (dev->alt_servers[0].host.type == 0) { + return -ENONET; + } + + // alt_server[0] is the initial server + result = dnbd3_server_connect(dev, &dev->alt_servers[0]); + if (result) { + error_dev(dev, "failed to connect to initial server"); + result = -ENOENT; + dev->imgname = NULL; + dev->socks[0].server = NULL; + } + return result; +} + +/** + * dnbd3_net_disconnect - disconnect device + * @dev: the device to disconnect + */ int dnbd3_net_disconnect(struct dnbd3_device *dev) { int i; int result = 0; del_timer_sync(&dev->timer); + /* be sure it does not recover while disconnecting */ cancel_work_sync(&dev->discovery_worker); - cancel_work_sync(&dev->panic_worker); // be sure it does not recover while disconnecting + cancel_work_sync(&dev->panic_worker); for (i = 0; i < NUMBER_CONNECTIONS; i++) { if (dev->socks[i].sock) { - if (dnbd3_socket_disconnect(dev, NULL, &dev->socks[i])) { + if (dnbd3_socket_disconnect(dev, NULL, + &dev->socks[i])) { result = -EIO; } } @@ -1148,25 +1449,3 @@ int dnbd3_net_disconnect(struct dnbd3_device *dev) } -int dnbd3_net_connect(struct dnbd3_device *dev) -{ - int result; - debug_dev(dev, "connecting to server"); - - // alt_server[0] is the initial server - if (dnbd3_socket_connect(dev, &dev->alt_servers[0]) == 0) { - dnbd3_print_server_list(dev); - - - result = 0; - } else { - error_dev(dev, "failed to connect to initial server"); - result = -ENOENT; - dev->imgname = NULL; - dev->socks[0].server = NULL; - } - return result; -} - - - -- cgit v1.2.3-55-g7522