diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/kernel/core.c | 181 | ||||
-rw-r--r-- | src/kernel/dnbd3.h | 17 | ||||
-rw-r--r-- | src/kernel/net.c | 98 |
3 files changed, 217 insertions, 79 deletions
diff --git a/src/kernel/core.c b/src/kernel/core.c index b945a55..cc19b58 100644 --- a/src/kernel/core.c +++ b/src/kernel/core.c @@ -52,7 +52,6 @@ #include "clientconfig.h" #include "net.h" -#define DNBD3_CMD_REQUEUED 1 struct workqueue_struct *dnbd3_wq; @@ -63,7 +62,10 @@ static unsigned int max_devs = NUMBER_DEVICES; static struct dnbd3_device *device; int major; - +/** + * dnbd3_requeue_cmd - requeue a command once + * @cmd: the command to requeue + */ static void dnbd3_requeue_cmd(struct dnbd3_cmd *cmd) { struct request *req = blk_mq_rq_from_pdu(cmd); @@ -73,23 +75,25 @@ static void dnbd3_requeue_cmd(struct dnbd3_cmd *cmd) } } +/** + * dnbd3_handle_cmd - handles a mq command + * @cmd: the cmd to send + * @index: the index of the queue + */ static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); struct dnbd3_device *dev = cmd->dnbd3; struct dnbd3_sock *sock = NULL; + bool first_try = true; int ret = -1; int i; int sock_alive = 0; - debug_dev(dev, "handle request at position %lu, size %d, index %d", blk_rq_pos(req), blk_rq_bytes(req), index); + debug_dev(dev, "handle request at position %lu, size %d, index %d", + blk_rq_pos(req), blk_rq_bytes(req), index); -// if (index >= 1) { // TODO use next server with good rtt for this request -// printk(KERN_INFO "dnbd3: index is %d", index); -// dev_err_ratelimited(disk_to_dev(dev->disk), "attempted send on invalid socket\n"); -// blk_mq_start_request(req); -// return -EINVAL; -// } +again: for (i = 0; i < NUMBER_CONNECTIONS; i++) { if (dnbd3_is_sock_alive(dev->socks[i])) { @@ -109,6 +113,13 @@ static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index) ret = 0; goto out; } + if (first_try) { + debug_dev(dev, "no socket found, going to sleep"); + msleep(SOCKET_TIMEOUT_CLIENT_DATA * 1000); + first_try = false; + goto again; + } + error_dev(dev, "failed to find a socket, end request"); blk_mq_end_request(req, BLK_STS_IOERR); return -EINVAL; } @@ -141,7 +152,13 @@ out: return ret; } -static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) +/** + * dnbd3_queue_rq - queue request + * @hctx: state for a hardware queue facing the hardware block device + * @bd: the queue data including the request + */ +static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); int ret; @@ -160,7 +177,15 @@ static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_ return ret; } -static int dnbd3_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) +/** + * dnbd3_init_request - init a mq request + * @set: the mq tag set + * @rq: the request + * @hctx_idx: + * @numa_node: + */ +static int dnbd3_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->dnbd3 = set->driver_data; @@ -168,7 +193,14 @@ static int dnbd3_init_request(struct blk_mq_tag_set *set, struct request *rq, un mutex_init(&cmd->lock); return 0; } -static enum blk_eh_timer_return dnbd3_xmit_timeout(struct request *req, bool reserved) + +/** + * dnbd3_xmit_timeout - timeout function for mq + * @req: the timedout request + * @reserved: + */ +static enum blk_eh_timer_return dnbd3_xmit_timeout(struct request *req, + bool reserved) { struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(req); struct dnbd3_device *dev = cmd->dnbd3; @@ -194,7 +226,10 @@ static enum blk_eh_timer_return dnbd3_xmit_timeout(struct request *req, bool res return BLK_EH_DONE; } - +/** + * struct blk_mq_ops - dnbd3_mq_ops + * multiqueue operations + */ static struct blk_mq_ops dnbd3_mq_ops = { .queue_rq = dnbd3_queue_rq, .init_request = dnbd3_init_request, @@ -203,8 +238,15 @@ static struct blk_mq_ops dnbd3_mq_ops = { - -static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) +/** + * dnbd3_ioctl - the ioctl function of the dnbd3 kernel modul + * @bdev: the block device + * @mode: + * @cmd: the ioctl command + * @arg: the user data + */ +static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) { int result = -EIO; struct dnbd3_device *dev = bdev->bd_disk->private_data; @@ -216,28 +258,35 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd if (arg != 0) { msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (msg == NULL) return -ENOMEM; - if (copy_from_user((char *)msg, (char *)arg, 2) != 0 || msg->len != sizeof(*msg)) { + if (msg == NULL) { + return -ENOMEM; + } + result = copy_from_user((char *)msg, (char *)arg, 2); + if (result != 0 || msg->len != sizeof(*msg)) { result = -ENOEXEC; - goto cleanup_return; + goto error; } - if (copy_from_user((char *)msg, (char *)arg, sizeof(*msg)) != 0) { + result = copy_from_user((char *)msg, (char *)arg, sizeof(*msg)); + if (result != 0) { result = -ENOENT; - goto cleanup_return; + goto error; } if (msg->imgname != NULL && msg->imgnamelen > 0) { imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL); if (imgname == NULL) { result = -ENOMEM; - goto cleanup_return; + goto error; } - if (copy_from_user(imgname, msg->imgname, msg->imgnamelen) != 0) { + result = copy_from_user( + imgname, msg->imgname, msg->imgnamelen); + if (result != 0) { result = -ENOENT; - goto cleanup_return; + goto error; } imgname[msg->imgnamelen] = '\0'; - debug_dev(dev, "ioctl image name of len %i is %s", (int)msg->imgnamelen, imgname); + debug_dev(dev, "ioctl image name of len %i is %s", + (int)msg->imgnamelen, imgname); } } @@ -255,16 +304,22 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd if (sizeof(msg->host) != sizeof(dev->initial_server.host)) { warn_dev(dev, "odd size bug#1 triggered in ioctl"); } - memcpy(&dev->initial_server.host, &msg->host, sizeof(msg->host)); + memcpy(&dev->initial_server.host, &msg->host, + sizeof(msg->host)); dev->initial_server.failures = 0; - dev->initial_server.rtts[0] = dev->initial_server.rtts[1] = dev->initial_server.rtts[2] = dev->initial_server.rtts[3] = RTT_UNREACHABLE; -// memcpy(&dev->initial_server, &dev->cur_server, sizeof(dev->initial_server)); + dnbd3_set_rtt_unreachable(&dev->initial_server) dev->imgname = imgname; dev->rid = msg->rid; - dev->use_server_provided_alts = msg->use_server_provided_alts; - // Forget all alt servers on explicit connect, set first alt server to initial server - memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS); - memcpy(dev->alt_servers, &dev->initial_server, sizeof(dev->alt_servers[0])); + dev->use_server_provided_alts = + msg->use_server_provided_alts; + /* + * forget all alt servers on explicit connect, set first + * alt server to initial server + */ + memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0]) + * NUMBER_SERVERS); + memcpy(dev->alt_servers, &dev->initial_server, + sizeof(dev->alt_servers[0])); result = dnbd3_net_connect(dev); imgname = NULL; } @@ -297,8 +352,11 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd } else if (msg == NULL) { result = -EINVAL; } else { - memcpy(&dev->new_servers[dev->new_servers_num].host, &msg->host, sizeof(msg->host)); - dev->new_servers[dev->new_servers_num].failures = (cmd == IOCTL_ADD_SRV ? 0 : 1); // 0 = ADD, 1 = REM + memcpy(&dev->new_servers[dev->new_servers_num].host, + &msg->host, sizeof(msg->host)); + /* 0 = ADD, 1 = REM */ + dev->new_servers[dev->new_servers_num].failures = + (cmd == IOCTL_ADD_SRV ? 0 : 1); ++dev->new_servers_num; result = 0; } @@ -315,7 +373,7 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd break; } mutex_unlock(&dev->device_lock); -cleanup_return: +error: if (msg) kfree(msg); if (imgname) kfree(imgname); return result; @@ -323,7 +381,10 @@ cleanup_return: } - +/** + * struct block_device_operations - dnbd3_fops + * device operations for ioctl + */ static struct block_device_operations dnbd3_fops = { .owner = THIS_MODULE, @@ -334,7 +395,11 @@ static struct block_device_operations dnbd3_fops = - +/** + * dnbd3_add_device - add a dnbd3 device + * @dev: the device + * @minor: the minor number of the device + */ int dnbd3_add_device(struct dnbd3_device *dev, int minor) { struct gendisk *disk; @@ -369,7 +434,8 @@ int dnbd3_add_device(struct dnbd3_device *dev, int minor) dev->minor = minor; dev->disk = disk; dev->tag_set.ops = &dnbd3_mq_ops; - dev->tag_set.nr_hw_queues = 1; // this can be changed later with blk_mq_update_nr_hw_queues() + /* this can be changed later with blk_mq_update_nr_hw_queues() */ + dev->tag_set.nr_hw_queues = 1; dev->tag_set.queue_depth = 128; dev->tag_set.numa_node = NUMA_NO_NODE; dev->tag_set.cmd_size = sizeof(struct dnbd3_cmd); @@ -428,13 +494,22 @@ out_free_dnbd3: - +/** + * dnbd3_init - init the dnbd3 kernel modul + */ static int __init dnbd3_init(void) { int i; debug("starting kernel module"); - dnbd3_wq = alloc_workqueue("kdnbd3", WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND, 0); + /* + * allocate a workqueue/thread for this modul + * WQ_MEM_RECLAIM - it is allowed to allocate memory + * WQ_FREEZABLE - can go to sleep + * WQ_UNBOUND - not bound to a certain CPU + */ + dnbd3_wq = alloc_workqueue("kdnbd3", + WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND, 0); if (max_devs < 0) { error("max_devs must be >= 0"); @@ -469,7 +544,12 @@ static int __init dnbd3_init(void) return 0; } - +/** + * dnbd3_exit_cb - callback function for idr_for_each + * @id: the id + * @ptr: the entry + * @data: the callback data + */ static int dnbd3_exit_cb(int id, void *ptr, void *data) { struct list_head *list = (struct list_head *)data; @@ -479,6 +559,10 @@ static int dnbd3_exit_cb(int id, void *ptr, void *data) return 0; } +/** + * dnbd3_dev_remove - remove the dnbd3 device + * @dev: the device to remove + */ static void dnbd3_dev_remove(struct dnbd3_device *dev) { struct gendisk *disk = dev->disk; @@ -501,15 +585,9 @@ static void dnbd3_dev_remove(struct dnbd3_device *dev) mutex_destroy(&dev->device_lock); } -static void dnbd3_put(struct dnbd3_device *dnbd3) -{ - mutex_lock(&dnbd3_index_mutex); - idr_remove(&dnbd3_index_idr, dnbd3->minor); - mutex_unlock(&dnbd3_index_mutex); - dnbd3_dev_remove(dnbd3); -} - - +/** + * dnbd3_exit - exit the dnbd3 modul + */ static void __exit dnbd3_exit(void) { struct dnbd3_device *dnbd3; @@ -524,7 +602,10 @@ static void __exit dnbd3_exit(void) dnbd3 = list_first_entry(&del_list, struct dnbd3_device, list); dnbd3_sysfs_exit(dnbd3); list_del_init(&dnbd3->list); - dnbd3_put(dnbd3); + mutex_lock(&dnbd3_index_mutex); + idr_remove(&dnbd3_index_idr, dnbd3->minor); + mutex_unlock(&dnbd3_index_mutex); + dnbd3_dev_remove(dnbd3); } idr_destroy(&dnbd3_index_idr); diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h index 3ae42fd..edd19e1 100644 --- a/src/kernel/dnbd3.h +++ b/src/kernel/dnbd3.h @@ -33,8 +33,12 @@ /** * the number of parallel connections */ + #define NUMBER_CONNECTIONS 4 + + + /** * limit to which the other connected servers are only allowed to be that worser * then the best rtt @@ -76,12 +80,12 @@ struct dnbd3_server { * for (i = 3; i > 0; i--) { * server->failures[i] = server->failures[i - 1] * } - * server-failures[0] = 0; + * server->failures[0] = 0; * failures always go to failures[0] / *failures * * - is this to much effort? * - what are the benefits? - * - could increase the rtt e.g. (rtt = rtt * failures) + * - could increase the rtt e.g. (rtt = rtt * failures) */ }; @@ -192,6 +196,15 @@ struct dnbd3_cmd { }; +#define dnbd3_avg_rtt(server) \ + (( (server)->rtts[0] + (server)->rtts[1] \ + + (server)->rtts[2] + (server)->rtts[3] ) / 4 ) + +#define dnbd3_set_rtt_unreachable(server) \ + (server)->rtts[0] = (server)->rtts[1] = (server)->rtts[2] \ + = (server)->rtts[3] = RTT_UNREACHABLE; + + /** * macros for logging * levels: diff --git a/src/kernel/net.c b/src/kernel/net.c index dfde203..2d0b6ad 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -65,21 +65,46 @@ (h).msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \ } while (0) -#define dnbd3_avg_rtt(server) \ - (( (server)->rtts[0] + (server)->rtts[1] \ - + (server)->rtts[2] + (server)->rtts[3] ) / 4 ) -#define dnbd3_set_rtt_unreachable(server) \ - (server)->rtts[0] = (server)->rtts[1] = (server)->rtts[2] \ - = (server)->rtts[3] = RTT_UNREACHABLE; +#if NUMBER_CONNECTIONS == 1 +#define dnbd3_pl_socket_connect(dev, server) \ + dnbd3_socket_connect(dev, server) +#define dnbd3_pl_socket_disconnect(sock) \ + dnbd3_socket_disconnect(sock) + + +#else +#define dnbd3_pl_socket_connect(sock, server) \ + do {\ + int i; \ + for (i = 0 < NUMBER_PARALLEL_CONNECTIONS; i++) { \ + dnbd3_socket_connect(dev, server); \ + dnbd3_socket_connect((sock) + \ + (i * sizeof(struct dnbd3_sock)), \ + (server)); \ + } \ + } while (0) + + +#define dnbd3_pl_socket_disconnect(sock) \ + do { \ + int i; \ + for (i = 0 < NUMBER_PARALLEL_CONNECTIONS; i++) { \ + dnbd3_socket_disconnect((sock) + \ + (i * sizeof(struct dnbd3_sock))); \ + } \ + } while (0) + +#endif static int dnbd3_server_connect(struct dnbd3_device *dev, struct dnbd3_server *server); static int dnbd3_socket_connect(struct dnbd3_sock *sock, struct dnbd3_server * server); -static int dnbd3_socket_disconnect(struct dnbd3_device *dev, - struct dnbd3_server *server, struct dnbd3_sock *sock); +static int dnbd3_socket_disconnect(struct dnbd3_sock *sock); +static int dnbd3_server_disconnect(struct dnbd3_device *dev, + struct dnbd3_server *server); /* @@ -822,7 +847,7 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) { server = dnbd3_find_best_alt_server(dev); existing_server = dev->socks[i].server; if (server && dnbd3_better_rtt(server, dev->socks[i].server)) { - dnbd3_socket_disconnect(dev, NULL, &dev->socks[i]); + dnbd3_socket_disconnect(&dev->socks[i]); /* if it fails reconnect to existing */ if (dnbd3_server_connect(dev, server) != 0) { @@ -849,7 +874,7 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) { rtt = dnbd3_avg_rtt(dev->socks[i].server); if (rtt > RTT_THRESOULD_LIMIT(best_rtt)) { info_sock(&dev->socks[i], "removing connection with rtt %llu", rtt); - dnbd3_socket_disconnect(dev, NULL, &dev->socks[i]); + dnbd3_socket_disconnect(&dev->socks[i]); sock_alive--; } } @@ -911,7 +936,8 @@ static void dnbd3_panic_worker(struct work_struct *work) panicked_server = panicked_sock->server; new_server = dnbd3_find_best_alt_server(dev); - dnbd3_socket_disconnect(dev, panicked_server, panicked_sock); + dnbd3_socket_disconnect(panicked_sock); + dnbd3_set_rtt_unreachable(panicked_server); if (new_server != NULL && new_server != panicked_server) { info_server(dev, new_server, "found replacement"); @@ -1027,6 +1053,11 @@ error: } +/** + * dnbd3_merge_new_server - merge the new server into the alt server list + * @dev: the device + * @new_server: the new server list to merge + */ static void dnbd3_merge_new_server(struct dnbd3_device *dev, dnbd3_server_entry_t *new_server) { @@ -1057,7 +1088,7 @@ static void dnbd3_merge_new_server(struct dnbd3_device *dev, if (new_server->failures == 1) { /* remove is requested */ info_server(dev, new_server, "remove server is requested"); - dnbd3_socket_disconnect(dev, existing_server, NULL); + dnbd3_server_disconnect(dev, existing_server); existing_server->host.type = 0; } // existing_server->failures = 0; // reset failure count @@ -1329,32 +1360,22 @@ error: /** * dnbd3_socket_disconnect - disconnect a socket or server - * @dev: the device - * @server: optional the server to disconnect - * @sock: optional the socket to disconnect + * @sock: the socket to disconnect * * 1. update nr of mq queues * 2. if last socket remove timer * 3. disconnect socket */ -static int dnbd3_socket_disconnect(struct dnbd3_device *dev, - struct dnbd3_server *server, struct dnbd3_sock *sock) +static int dnbd3_socket_disconnect(struct dnbd3_sock *sock) { int i; + struct dnbd3_device *dev = sock->device; int sock_alive = 0; for (i = 0; i < NUMBER_CONNECTIONS; i++) { - if (sock == NULL && dev->socks[i].server == server) { - sock = &dev->socks[i]; - } if (dnbd3_is_sock_alive(dev->socks[i])) { sock_alive++; } } - if (!sock || !sock->sock) { - warn_dev(dev, "could not find socket to disconnect"); - return -EIO; - } - blk_mq_update_nr_hw_queues(&dev->tag_set, sock_alive - 1); if (sock_alive <= 1) { info_sock(sock, "shutting down last socket and stopping timer"); del_timer_sync(&dev->timer); @@ -1385,6 +1406,7 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, } mutex_unlock(&sock->tx_lock); mutex_destroy(&sock->tx_lock); + blk_mq_update_nr_hw_queues(&dev->tag_set, sock_alive - 1); cancel_work_sync(&sock->receive_worker); @@ -1398,6 +1420,29 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, } /** + * dnbd3_server_disconnect - disconnect a server from a socket + * @dev: the device + * @server: the server to disconnect + */ +static int dnbd3_server_disconnect(struct dnbd3_device *dev, + struct dnbd3_server *server) +{ + int i; + struct dnbd3_sock *sock = NULL; + for (i = 0; i < NUMBER_CONNECTIONS; i++) { + if (dev->socks[i].server == server) { + sock = &dev->socks[i]; + } + } + if (!sock || !sock->sock) { + warn_dev(dev, "could not find socket to disconnect"); + return -EIO; + } + return dnbd3_socket_disconnect(sock); +} + + +/** * dnbd3_net_connect - connect device * @dev: the device to connect * @@ -1439,8 +1484,7 @@ int dnbd3_net_disconnect(struct dnbd3_device *dev) for (i = 0; i < NUMBER_CONNECTIONS; i++) { if (dev->socks[i].sock) { - if (dnbd3_socket_disconnect(dev, NULL, - &dev->socks[i])) { + if (dnbd3_socket_disconnect(&dev->socks[i])) { result = -EIO; } } |