diff options
Diffstat (limited to 'src/kernel/blk.c')
-rw-r--r-- | src/kernel/blk.c | 740 |
1 files changed, 482 insertions, 258 deletions
diff --git a/src/kernel/blk.c b/src/kernel/blk.c index 889b988..69e4583 100644 --- a/src/kernel/blk.c +++ b/src/kernel/blk.c @@ -1,9 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file is part of the Distributed Network Block Device 3 * * Copyright(c) 2011-2012 Johann Latocha <johann@latocha.de> * - * This file may be licensed under the terms of of the + * This file may be licensed under the terms of the * GNU General Public License Version 2 (the ``GPL''). * * Software distributed under the License is distributed @@ -18,248 +19,259 @@ * */ -#include "clientconfig.h" +#include <dnbd3/config/client.h> #include "blk.h" #include "net.h" #include "sysfs.h" +#include "dnbd3_main.h" #include <linux/pagemap.h> -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) -#define dnbd3_req_read(req) \ - req_op(req) == REQ_OP_READ -#define dnbd3_req_fs(req) \ - dnbd3_req_read(req) || req_op(req) == REQ_OP_WRITE -#define dnbd3_req_special(req) \ - blk_rq_is_private(req) -#else -#define dnbd3_req_read(req) \ - rq_data_dir(req) == READ -#define dnbd3_req_fs(req) \ - req->cmd_type == REQ_TYPE_FS -#define dnbd3_req_special(req) \ - req->cmd_type == REQ_TYPE_SPECIAL -#endif - -int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) +static int dnbd3_close_device(dnbd3_device_t *dev) { - struct gendisk *disk; - struct request_queue *blk_queue; - - init_waitqueue_head(&dev->process_queue_send); - init_waitqueue_head(&dev->process_queue_receive); - init_waitqueue_head(&dev->process_queue_discover); - INIT_LIST_HEAD(&dev->request_queue_send); - INIT_LIST_HEAD(&dev->request_queue_receive); + int result; - memset(&dev->cur_server, 0, sizeof(dev->cur_server)); - memset(&dev->initial_server, 0, sizeof(dev->initial_server)); - dev->better_sock = NULL; + if (dev->imgname) + dev_info(dnbd3_device_to_dev(dev), "closing down device.\n"); + dev->panic = false; + result = dnbd3_net_disconnect(dev); + kfree(dev->imgname); dev->imgname = NULL; - dev->rid = 0; - dev->update_available = 0; - memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS); - dev->thread_send = NULL; - dev->thread_receive = NULL; - dev->thread_discover = NULL; - dev->discover = 0; - dev->disconnecting = 0; - dev->panic = 0; - dev->panic_count = 0; - dev->reported_size = 0; - - if (!(disk = alloc_disk(1))) - { - printk("ERROR: dnbd3 alloc_disk failed.\n"); - return -EIO; - } - - disk->major = major; - disk->first_minor = minor; - sprintf(disk->disk_name, "dnbd%d", minor); - set_capacity(disk, 0); - set_disk_ro(disk, 1); - disk->fops = &dnbd3_blk_ops; - - spin_lock_init(&dev->blk_lock); - if ((blk_queue = blk_init_queue(&dnbd3_blk_request, &dev->blk_lock)) == NULL) - { - printk("ERROR: dnbd3 blk_init_queue failed.\n"); - return -EIO; - } - - blk_queue_logical_block_size(blk_queue, DNBD3_BLOCK_SIZE); - blk_queue_physical_block_size(blk_queue, DNBD3_BLOCK_SIZE); - - disk->queue = blk_queue; - disk->private_data = dev; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); -#else - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); -#endif -#define ONE_MEG (1048576) - blk_queue_max_segment_size(disk->queue, ONE_MEG); - blk_queue_max_segments(disk->queue, 0xffff); - blk_queue_max_hw_sectors(disk->queue, ONE_MEG / DNBD3_BLOCK_SIZE); - disk->queue->limits.max_sectors = 256; - dev->disk = disk; -#undef ONE_MEG - add_disk(disk); - dnbd3_sysfs_init(dev); - return 0; + /* new requests might have been queued up, */ + /* but now that imgname is NULL no new ones can show up */ + blk_mq_freeze_queue(dev->queue); + set_capacity(dev->disk, 0); + blk_mq_unfreeze_queue(dev->queue); + return result; } -int dnbd3_blk_del_device(dnbd3_device_t *dev) -{ - dnbd3_sysfs_exit(dev); - dnbd3_net_disconnect(dev); - del_gendisk(dev->disk); - put_disk(dev->disk); - blk_cleanup_queue(dev->disk->queue); - return 0; -} - -struct block_device_operations dnbd3_blk_ops = - { .owner = THIS_MODULE, .ioctl = dnbd3_blk_ioctl, }; - -int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) +static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { int result = -100; dnbd3_device_t *dev = bdev->bd_disk->private_data; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) struct request_queue *blk_queue = dev->disk->queue; +#endif char *imgname = NULL; dnbd3_ioctl_t *msg = NULL; - //unsigned long irqflags; + int i = 0, j; + u8 locked = 0; - while (dev->disconnecting) - { - // do nothing - } - - if (arg != 0) - { + if (arg != 0) { msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (msg == NULL) return -ENOMEM; - if (copy_from_user((char *)msg, (char *)arg, 2) != 0 || msg->len != sizeof(*msg)) - { + if (msg == NULL) + return -ENOMEM; + if (copy_from_user((char *)msg, (char *)arg, 2) != 0 || msg->len != sizeof(*msg)) { result = -ENOEXEC; goto cleanup_return; } - if (copy_from_user((char *)msg, (char *)arg, sizeof(*msg)) != 0) - { + if (copy_from_user((char *)msg, (char *)arg, sizeof(*msg)) != 0) { result = -ENOENT; goto cleanup_return; } - if (msg->imgname != NULL && msg->imgnamelen > 0) - { + if (msg->imgname != NULL && msg->imgnamelen > 0) { imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL); - if (imgname == NULL) - { + if (imgname == NULL) { result = -ENOMEM; goto cleanup_return; } - if (copy_from_user(imgname, msg->imgname, msg->imgnamelen) != 0) - { + if (copy_from_user(imgname, msg->imgname, msg->imgnamelen) != 0) { result = -ENOENT; goto cleanup_return; } imgname[msg->imgnamelen] = '\0'; - //printk("IOCTL Image name of len %d is %s\n", (int)msg->imgnamelen, imgname); } } - - switch (cmd) - { + switch (cmd) { case IOCTL_OPEN: - if (dev->imgname != NULL) - { + if (!dnbd3_flag_get(dev->connection_lock)) { result = -EBUSY; + break; } - else if (imgname == NULL) - { + locked = 1; + if (dev->imgname != NULL) { + result = -EBUSY; + } else if (imgname == NULL) { result = -EINVAL; - } - else if (msg == NULL) - { + } else if (msg == NULL) { result = -EINVAL; - } - else - { - if (sizeof(msg->host) != sizeof(dev->cur_server.host)) - printk("Odd size bug#1 triggered in IOCTL\n"); - memcpy(&dev->cur_server.host, &msg->host, sizeof(msg->host)); - dev->cur_server.failures = 0; - memcpy(&dev->initial_server, &dev->cur_server, sizeof(dev->initial_server)); + } else { + /* assert that at least one and not to many hosts are given */ + if (msg->hosts_num < 1 || msg->hosts_num > NUMBER_SERVERS) { + result = -EINVAL; + break; + } + dev->imgname = imgname; dev->rid = msg->rid; dev->use_server_provided_alts = msg->use_server_provided_alts; - // Forget all alt servers on explicit connect, set first al server to initial server - memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS); - memcpy(dev->alt_servers, &dev->initial_server, sizeof(dev->alt_servers[0])); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) - if (blk_queue->backing_dev_info != NULL) { + + dev_info(dnbd3_device_to_dev(dev), "opening device.\n"); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0) + // set optimal request size for the queue to half the read-ahead + blk_queue_io_opt(dev->queue, (msg->read_ahead_kb * 512)); +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 15, 0) \ + && !RHEL_CHECK_VERSION(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(9, 0)) + // set readahead from optimal request size of the queue + // ra_pages are calculated by following formula: queue_io_opt() * 2 / PAGE_SIZE + blk_queue_update_readahead(dev->queue); +#endif +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) + if (blk_queue->backing_dev_info != NULL) blk_queue->backing_dev_info->ra_pages = (msg->read_ahead_kb * 1024) / PAGE_SIZE; - } #else blk_queue->backing_dev_info.ra_pages = (msg->read_ahead_kb * 1024) / PAGE_SIZE; #endif - if (dnbd3_net_connect(dev) == 0) - { - result = 0; - imgname = NULL; // Prevent kfree at the end + + /* add specified servers to alt server list */ + for (i = 0; i < NUMBER_SERVERS; i++) + dev->alt_servers[i].host.ss_family = 0; + for (i = 0; i < msg->hosts_num; i++) { + /* copy provided host into corresponding alt server slot */ + if (dnbd3_add_server(dev, &msg->hosts[i]) == 0) + dev_dbg(dnbd3_device_to_dev(dev), "adding server %pISpc\n", + &dev->alt_servers[i].host); + else + dev_warn(dnbd3_device_to_dev(dev), "could not add server %pISpc\n", + &dev->alt_servers[i].host); } - else - { - result = -ENOENT; + + /* + * probe added alt servers in specified order and + * choose first working server as initial server + */ + result = -EPROTONOSUPPORT; + for (i = 0; i < NUMBER_SERVERS; i++) { + /* probe added alt server */ + if (dev->alt_servers[i].host.ss_family == 0) + continue; // Empty slot + + result = dnbd3_new_connection(dev, &dev->alt_servers[i].host, true); + if (result == 0) { + /* connection established, store index of server and exit loop */ + result = i; + break; + } + } + + if (result >= 0) { + /* connection was successful */ + dev_dbg(dnbd3_device_to_dev(dev), "server %pISpc is initial server\n", + &dev->cur_server.host); + imgname = NULL; // Prevent kfree at the end + } else { + /* probing failed */ dev->imgname = NULL; } } break; case IOCTL_CLOSE: - dnbd3_blk_fail_all_requests(dev); - result = dnbd3_net_disconnect(dev); - dnbd3_blk_fail_all_requests(dev); - set_capacity(dev->disk, 0); - if (dev->imgname) - { - kfree(dev->imgname); - dev->imgname = NULL; + if (!dnbd3_flag_get(dev->connection_lock)) { + result = -EBUSY; + break; } + locked = 1; + result = dnbd3_close_device(dev); break; case IOCTL_SWITCH: - result = -EINVAL; + if (!dnbd3_flag_get(dev->connection_lock)) { + result = -EBUSY; + break; + } + locked = 1; + if (dev->imgname == NULL) { + result = -ENOTCONN; + } else if (msg == NULL) { + result = -EINVAL; + } else { + dnbd3_alt_server_t *alt_server; + struct sockaddr_storage new_addr; + + mutex_lock(&dev->alt_servers_lock); + alt_server = get_existing_alt_from_host(&msg->hosts[0], dev); + if (alt_server == NULL) { + mutex_unlock(&dev->alt_servers_lock); + /* specified server is not known, so do not switch */ + result = -ENOENT; + } else { + /* specified server is known, so try to switch to it */ + new_addr = alt_server->host; + mutex_unlock(&dev->alt_servers_lock); + if (is_same_server(&dev->cur_server.host, &new_addr)) { + /* specified server is current server, so do not switch */ + result = 0; + } else { + dev_info(dnbd3_device_to_dev(dev), "manual server switch to %pISpc\n", + &new_addr); + result = dnbd3_new_connection(dev, &new_addr, false); + if (result != 0) { + /* switching didn't work */ + result = -EAGAIN; + } + } + if (result == 0) { + /* fake RTT so we don't switch away again soon */ + mutex_lock(&dev->alt_servers_lock); + for (i = 0; i < NUMBER_SERVERS; ++i) { + alt_server = &dev->alt_servers[i]; + if (is_same_server(&alt_server->host, &new_addr)) { + for (j = 0; j < DISCOVER_HISTORY_SIZE; ++j) + alt_server->rtts[j] = 1; + alt_server->best_count = 100; + } else { + for (j = 0; j < DISCOVER_HISTORY_SIZE; ++j) + if (alt_server->rtts[j] < 500000) + alt_server->rtts[j] = 500000; + alt_server->best_count = 0; + } + } + mutex_unlock(&dev->alt_servers_lock); + } + } + } break; case IOCTL_ADD_SRV: - case IOCTL_REM_SRV: - if (dev->imgname == NULL) - { - result = -ENOENT; + case IOCTL_REM_SRV: { + struct sockaddr_storage addr; + dnbd3_host_t *host; + + if (dev->imgname == NULL) { + result = -ENOTCONN; + break; } - else if (dev->new_servers_num >= NUMBER_SERVERS) - { - result = -EAGAIN; + if (msg == NULL) { + result = -EINVAL; + break; } - else if (msg == NULL) - { + host = &msg->hosts[0]; + if (!dnbd3_host_to_sockaddr(host, &addr)) { result = -EINVAL; + break; } - else - { - memcpy(&dev->new_servers[dev->new_servers_num].host, &msg->host, sizeof(msg->host)); - dev->new_servers[dev->new_servers_num].failures = (cmd == IOCTL_ADD_SRV ? 0 : 1); // 0 = ADD, 1 = REM - ++dev->new_servers_num; - result = 0; + + if (cmd == IOCTL_ADD_SRV) { + result = dnbd3_add_server(dev, host); + if (result == -EEXIST) + dev_info(dnbd3_device_to_dev(dev), "alt server %pISpc already exists\n", &addr); + else if (result == -ENOSPC) + dev_info(dnbd3_device_to_dev(dev), "cannot add %pISpc; no free slot\n", &addr); + else + dev_info(dnbd3_device_to_dev(dev), "added alt server %pISpc\n", &addr); + } else { // IOCTL_REM_SRV + result = dnbd3_rem_server(dev, host); + if (result == -ENOENT) + dev_info(dnbd3_device_to_dev(dev), "alt server %pISpc not found\n", &addr); + else + dev_info(dnbd3_device_to_dev(dev), "removed alt server %pISpc\n", &addr); } break; - + } case BLKFLSBUF: result = 0; break; @@ -270,113 +282,325 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u } cleanup_return: - if (msg) kfree(msg); - if (imgname) kfree(imgname); + kfree(msg); + kfree(imgname); + if (locked) + dnbd3_flag_reset(dev->connection_lock); return result; } -/** - * dev->blk_lock and q->queue_lock are being held - * when this is called! +static const struct block_device_operations dnbd3_blk_ops = { + .owner = THIS_MODULE, + .ioctl = dnbd3_blk_ioctl, +}; + +static void dnbd3_add_queue(dnbd3_device_t *dev, struct request *rq) +{ + unsigned long irqflags; + + spin_lock_irqsave(&dev->send_queue_lock, irqflags); + list_add_tail(&rq->queuelist, &dev->send_queue); + spin_unlock_irqrestore(&dev->send_queue_lock, irqflags); + spin_lock_irqsave(&dev->blk_lock, irqflags); + queue_work(dev->send_wq, &dev->send_work); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); +} + +/* + * Linux kernel blk-mq driver function (entry point) to handle block IO requests */ -void dnbd3_blk_request(struct request_queue *q) +static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { - struct request *req; - dnbd3_device_t *dev; + struct request *rq = bd->rq; + dnbd3_device_t *dev = rq->q->queuedata; + struct dnbd3_cmd *cmd; - while ((req = blk_fetch_request(q)) != NULL) - { - dev = req->rq_disk->private_data; + if (dev->imgname == NULL || !device_active(dev)) + return BLK_STS_IOERR; - if (dev->imgname == NULL) - { - __blk_end_request_all(req, -EIO); - continue; - } + if (req_op(rq) != REQ_OP_READ) + return BLK_STS_IOERR; - if (!(dnbd3_req_fs(req))) - { - __blk_end_request_all(req, 0); - continue; - } + if (PROBE_COUNT_TIMEOUT > 0 && dev->panic_count >= PROBE_COUNT_TIMEOUT) + return BLK_STS_TIMEOUT; - if (PROBE_COUNT_TIMEOUT > 0 && dev->panic_count >= PROBE_COUNT_TIMEOUT) - { - __blk_end_request_all(req, -EIO); - continue; - } + if (rq_data_dir(rq) != READ) + return BLK_STS_NOTSUPP; - if (!(dnbd3_req_read(req))) - { - __blk_end_request_all(req, -EACCES); - continue; + cmd = blk_mq_rq_to_pdu(rq); + cmd->handle = (u64)blk_mq_unique_tag(rq) | (((u64)jiffies) << 32); + blk_mq_start_request(rq); + dnbd3_add_queue(dev, rq); + return BLK_STS_OK; +} + +static enum blk_eh_timer_return dnbd3_rq_timeout(struct request *req +#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 0, 0) \ + && !RHEL_CHECK_VERSION(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(9, 0)) + , bool reserved +#endif + ) +{ + unsigned long irqflags; + struct request *rq_iter; + bool found = false; + dnbd3_device_t *dev = req->q->queuedata; + + spin_lock_irqsave(&dev->send_queue_lock, irqflags); + list_for_each_entry(rq_iter, &dev->send_queue, queuelist) { + if (rq_iter == req) { + found = true; + break; + } + } + spin_unlock_irqrestore(&dev->send_queue_lock, irqflags); + // If still in send queue, do nothing + if (found) + return BLK_EH_RESET_TIMER; + + spin_lock_irqsave(&dev->recv_queue_lock, irqflags); + list_for_each_entry(rq_iter, &dev->recv_queue, queuelist) { + if (rq_iter == req) { + found = true; + list_del_init(&req->queuelist); + break; } + } + spin_unlock_irqrestore(&dev->recv_queue_lock, irqflags); + if (!found) { + dev_err(dnbd3_device_to_dev(dev), "timeout request neither found in send nor recv queue, ignoring\n"); + // Assume it was fnished concurrently + return BLK_EH_DONE; + } + // Add to send queue again and trigger work, reset timeout + dnbd3_add_queue(dev, req); + return BLK_EH_RESET_TIMER; +} + +static +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +const +#endif +struct blk_mq_ops dnbd3_mq_ops = { + .queue_rq = dnbd3_queue_rq, + .timeout = dnbd3_rq_timeout, +}; + +int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) +{ + int ret; + + memset(dev, 0, sizeof(*dev)); + dev->index = minor; + // lock for imgname, cur_server etc. + spin_lock_init(&dev->blk_lock); + spin_lock_init(&dev->send_queue_lock); + spin_lock_init(&dev->recv_queue_lock); + INIT_LIST_HEAD(&dev->send_queue); + INIT_LIST_HEAD(&dev->recv_queue); + dnbd3_flag_reset(dev->connection_lock); + dnbd3_flag_reset(dev->discover_running); + mutex_init(&dev->alt_servers_lock); + dnbd3_net_work_init(dev); + + // memset has done this already but I like initial values to be explicit + dev->imgname = NULL; + dev->rid = 0; + dev->update_available = false; + dev->panic = false; + dev->panic_count = 0; + dev->reported_size = 0; + + // set up tag_set for blk-mq + dev->tag_set.ops = &dnbd3_mq_ops; + dev->tag_set.nr_hw_queues = 1; + dev->tag_set.queue_depth = 128; + dev->tag_set.numa_node = NUMA_NO_NODE; + dev->tag_set.cmd_size = sizeof(struct dnbd3_cmd); + dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + dev->tag_set.driver_data = dev; + dev->tag_set.timeout = BLOCK_LAYER_TIMEOUT * HZ; + + ret = blk_mq_alloc_tag_set(&dev->tag_set); + if (ret) { + dev_err(dnbd3_device_to_dev(dev), "blk_mq_alloc_tag_set failed\n"); + goto out; + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0) + // set up blk-mq and disk + dev->disk = blk_mq_alloc_disk(&dev->tag_set, dev); + if (IS_ERR(dev->disk)) { + dev_err(dnbd3_device_to_dev(dev), "blk_mq_alloc_disk failed\n"); + ret = PTR_ERR(dev->disk); + goto out_cleanup_tags; + } + dev->queue = dev->disk->queue; +#else + // set up blk-mq + dev->queue = blk_mq_init_queue(&dev->tag_set); + if (IS_ERR(dev->queue)) { + ret = PTR_ERR(dev->queue); + dev_err(dnbd3_device_to_dev(dev), "blk_mq_init_queue failed\n"); + goto out_cleanup_tags; + } + dev->queue->queuedata = dev; +#endif + + blk_queue_logical_block_size(dev->queue, DNBD3_BLOCK_SIZE); + blk_queue_physical_block_size(dev->queue, DNBD3_BLOCK_SIZE); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dev->queue); +#else + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, dev->queue); +#endif +#define ONE_MEG (1048576) + blk_queue_max_segment_size(dev->queue, ONE_MEG); + blk_queue_max_segments(dev->queue, 0xffff); + blk_queue_max_hw_sectors(dev->queue, ONE_MEG / DNBD3_BLOCK_SIZE); + dev->queue->limits.max_sectors = 256; +#undef ONE_MEG + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) + // set up disk + dev->disk = alloc_disk(1); + if (!dev->disk) { + dev_err(dnbd3_device_to_dev(dev), "alloc_disk failed\n"); + ret = -ENOMEM; + goto out_cleanup_queue; + } +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) \ + || (LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 132)) \ + || RHEL_CHECK_VERSION(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(9, 0)) + dev->disk->flags |= GENHD_FL_NO_PART; +#else + dev->disk->flags |= GENHD_FL_NO_PART_SCAN; +#endif + dev->disk->major = major; + dev->disk->first_minor = minor; + dev->disk->minors = 1; + dev->disk->fops = &dnbd3_blk_ops; + dev->disk->private_data = dev; + dev->disk->queue = dev->queue; + sprintf(dev->disk->disk_name, "dnbd%d", minor); + set_capacity(dev->disk, 0); + set_disk_ro(dev->disk, 1); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) \ + || RHEL_CHECK_VERSION(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(9, 0)) + ret = add_disk(dev->disk); + if (ret != 0) + goto out_cleanup_queue; +#else + add_disk(dev->disk); +#endif + + // set up sysfs + dnbd3_sysfs_init(dev); + + return 0; + +out_cleanup_queue: +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) + blk_cleanup_queue(dev->queue); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 0, 0) \ + && !RHEL_CHECK_VERSION(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(9, 0)) + blk_cleanup_disk(dev->disk); +#else + put_disk(dev->disk); +#endif +out_cleanup_tags: + blk_mq_free_tag_set(&dev->tag_set); +out: + mutex_destroy(&dev->alt_servers_lock); + return ret; +} + +int dnbd3_blk_del_device(dnbd3_device_t *dev) +{ + while (!dnbd3_flag_get(dev->connection_lock)) + schedule(); + dnbd3_close_device(dev); + dnbd3_sysfs_exit(dev); + del_gendisk(dev->disk); +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) + blk_cleanup_queue(dev->queue); + put_disk(dev->disk); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 0, 0) \ + && !RHEL_CHECK_VERSION(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(9, 0)) + blk_cleanup_disk(dev->disk); +#else + put_disk(dev->disk); +#endif + blk_mq_free_tag_set(&dev->tag_set); + mutex_destroy(&dev->alt_servers_lock); + return 0; +} + +void dnbd3_blk_requeue_all_requests(dnbd3_device_t *dev) +{ + struct request *blk_request; + unsigned long flags; + struct list_head local_copy; + int count = 0; - list_add_tail(&req->queuelist, &dev->request_queue_send); - spin_unlock_irq(q->queue_lock); - wake_up(&dev->process_queue_send); - spin_lock_irq(q->queue_lock); + INIT_LIST_HEAD(&local_copy); + spin_lock_irqsave(&dev->recv_queue_lock, flags); + while (!list_empty(&dev->recv_queue)) { + blk_request = list_entry(dev->recv_queue.next, struct request, queuelist); + list_del_init(&blk_request->queuelist); + list_add(&blk_request->queuelist, &local_copy); + count++; + } + spin_unlock_irqrestore(&dev->recv_queue_lock, flags); + if (count) + dev_info(dnbd3_device_to_dev(dev), "re-queueing %d requests\n", count); + while (!list_empty(&local_copy)) { + blk_request = list_entry(local_copy.next, struct request, queuelist); + list_del_init(&blk_request->queuelist); + spin_lock_irqsave(&dev->send_queue_lock, flags); + list_add_tail(&blk_request->queuelist, &dev->send_queue); + spin_unlock_irqrestore(&dev->send_queue_lock, flags); } + // Do this even if we didn't move anything from the recv list to the send + // list. It might have already contained something, which needs to be + // re-requested anyways if this was called because of a server switch. + spin_lock_irqsave(&dev->blk_lock, flags); + queue_work(dev->send_wq, &dev->send_work); + spin_unlock_irqrestore(&dev->blk_lock, flags); } void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev) { - struct request *blk_request, *tmp_request; - struct request *blk_request2, *tmp_request2; + struct request *blk_request; unsigned long flags; struct list_head local_copy; - int dup; + int count = 0; + INIT_LIST_HEAD(&local_copy); - spin_lock_irqsave(&dev->blk_lock, flags); - while (!list_empty(&dev->request_queue_receive)) - { - list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist) - { - list_del_init(&blk_request->queuelist); - dup = 0; - list_for_each_entry_safe(blk_request2, tmp_request2, &local_copy, queuelist) - { - if (blk_request == blk_request2) - { - printk("WARNING: Request is in both lists!\n"); - dup = 1; - break; - } - } - if (!dup) list_add(&blk_request->queuelist, &local_copy); - } + spin_lock_irqsave(&dev->recv_queue_lock, flags); + while (!list_empty(&dev->recv_queue)) { + blk_request = list_entry(dev->recv_queue.next, struct request, queuelist); + list_del_init(&blk_request->queuelist); + list_add(&blk_request->queuelist, &local_copy); + count++; } - while (!list_empty(&dev->request_queue_send)) - { - list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_send, queuelist) - { - list_del_init(&blk_request->queuelist); - dup = 0; - list_for_each_entry_safe(blk_request2, tmp_request2, &local_copy, queuelist) - { - if (blk_request == blk_request2) - { - printk("WARNING: Request is in both lists!\n"); - dup = 1; - break; - } - } - if (!dup) list_add(&blk_request->queuelist, &local_copy); - } + spin_unlock_irqrestore(&dev->recv_queue_lock, flags); + spin_lock_irqsave(&dev->send_queue_lock, flags); + while (!list_empty(&dev->send_queue)) { + blk_request = list_entry(dev->send_queue.next, struct request, queuelist); + list_del_init(&blk_request->queuelist); + list_add(&blk_request->queuelist, &local_copy); + count++; } - spin_unlock_irqrestore(&dev->blk_lock, flags); - list_for_each_entry_safe(blk_request, tmp_request, &local_copy, queuelist) - { + spin_unlock_irqrestore(&dev->send_queue_lock, flags); + if (count) + dev_info(dnbd3_device_to_dev(dev), "failing %d requests\n", count); + while (!list_empty(&local_copy)) { + blk_request = list_entry(local_copy.next, struct request, queuelist); list_del_init(&blk_request->queuelist); - if (dnbd3_req_fs(blk_request)) - { - spin_lock_irqsave(&dev->blk_lock, flags); - __blk_end_request_all(blk_request, -EIO); - spin_unlock_irqrestore(&dev->blk_lock, flags); - } - else if (dnbd3_req_special(blk_request)) - { - kfree(blk_request); - } + blk_mq_end_request(blk_request, BLK_STS_IOERR); } } |