From 47c8cb8c78fbace1e5edb2107fc1c4dcbe7058be Mon Sep 17 00:00:00 2001 From: Manuel Bentele Date: Thu, 27 Aug 2020 11:02:30 +0200 Subject: [KERNEL] convert to blk-mq and ktime This converts the dnbd3 kernel module driver to use the blk-mq infrastructure, which allows the dnbd3 kernel module driver to be compatible with Linux kernels in version 5.x or later. The conversion of the implementation uses one hardware queue to preserve the existing send/receive and load-balancing logic, but can be scaled up in the future. In addition to that, time measurements in the implementation are converted to ktime based accessors to replace the use of deprecated time interfaces. --- src/kernel/blk.c | 276 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 156 insertions(+), 120 deletions(-) (limited to 'src/kernel/blk.c') diff --git a/src/kernel/blk.c b/src/kernel/blk.c index 889b988..dde8dea 100644 --- a/src/kernel/blk.c +++ b/src/kernel/blk.c @@ -41,92 +41,7 @@ req->cmd_type == REQ_TYPE_SPECIAL #endif -int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) -{ - struct gendisk *disk; - struct request_queue *blk_queue; - - init_waitqueue_head(&dev->process_queue_send); - init_waitqueue_head(&dev->process_queue_receive); - init_waitqueue_head(&dev->process_queue_discover); - INIT_LIST_HEAD(&dev->request_queue_send); - INIT_LIST_HEAD(&dev->request_queue_receive); - - memset(&dev->cur_server, 0, sizeof(dev->cur_server)); - memset(&dev->initial_server, 0, sizeof(dev->initial_server)); - dev->better_sock = NULL; - - dev->imgname = NULL; - dev->rid = 0; - dev->update_available = 0; - memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS); - dev->thread_send = NULL; - dev->thread_receive = NULL; - dev->thread_discover = NULL; - dev->discover = 0; - dev->disconnecting = 0; - dev->panic = 0; - dev->panic_count = 0; - dev->reported_size = 0; - - if (!(disk = alloc_disk(1))) - { - printk("ERROR: dnbd3 alloc_disk failed.\n"); - return -EIO; - } - - disk->major = major; - disk->first_minor = minor; - sprintf(disk->disk_name, "dnbd%d", minor); - set_capacity(disk, 0); - set_disk_ro(disk, 1); - disk->fops = &dnbd3_blk_ops; - - spin_lock_init(&dev->blk_lock); - if ((blk_queue = blk_init_queue(&dnbd3_blk_request, &dev->blk_lock)) == NULL) - { - printk("ERROR: dnbd3 blk_init_queue failed.\n"); - return -EIO; - } - - blk_queue_logical_block_size(blk_queue, DNBD3_BLOCK_SIZE); - blk_queue_physical_block_size(blk_queue, DNBD3_BLOCK_SIZE); - - disk->queue = blk_queue; - disk->private_data = dev; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); -#else - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); -#endif -#define ONE_MEG (1048576) - blk_queue_max_segment_size(disk->queue, ONE_MEG); - blk_queue_max_segments(disk->queue, 0xffff); - blk_queue_max_hw_sectors(disk->queue, ONE_MEG / DNBD3_BLOCK_SIZE); - disk->queue->limits.max_sectors = 256; - dev->disk = disk; -#undef ONE_MEG - - add_disk(disk); - dnbd3_sysfs_init(dev); - return 0; -} - -int dnbd3_blk_del_device(dnbd3_device_t *dev) -{ - dnbd3_sysfs_exit(dev); - dnbd3_net_disconnect(dev); - del_gendisk(dev->disk); - put_disk(dev->disk); - blk_cleanup_queue(dev->disk->queue); - return 0; -} - -struct block_device_operations dnbd3_blk_ops = - { .owner = THIS_MODULE, .ioctl = dnbd3_blk_ioctl, }; - -int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) +static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { int result = -100; dnbd3_device_t *dev = bdev->bd_disk->private_data; @@ -225,7 +140,9 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u dnbd3_blk_fail_all_requests(dev); result = dnbd3_net_disconnect(dev); dnbd3_blk_fail_all_requests(dev); + blk_mq_freeze_queue(dev->queue); set_capacity(dev->disk, 0); + blk_mq_unfreeze_queue(dev->queue); if (dev->imgname) { kfree(dev->imgname); @@ -275,48 +192,167 @@ cleanup_return: return result; } -/** - * dev->blk_lock and q->queue_lock are being held - * when this is called! - */ -void dnbd3_blk_request(struct request_queue *q) +static const struct block_device_operations dnbd3_blk_ops = { + .owner = THIS_MODULE, + .ioctl = dnbd3_blk_ioctl, +}; + +static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { - struct request *req; - dnbd3_device_t *dev; + struct request *rq = bd->rq; + dnbd3_device_t *dev = rq->q->queuedata; + unsigned long irqflags; + + blk_mq_start_request(rq); - while ((req = blk_fetch_request(q)) != NULL) + if (dev->imgname == NULL) { - dev = req->rq_disk->private_data; + blk_mq_end_request(rq, BLK_STS_IOERR); + goto out; + } - if (dev->imgname == NULL) - { - __blk_end_request_all(req, -EIO); - continue; - } + if (!(dnbd3_req_fs(rq))) + { + blk_mq_end_request(rq, BLK_STS_IOERR); + goto out; + } - if (!(dnbd3_req_fs(req))) - { - __blk_end_request_all(req, 0); - continue; - } + if (PROBE_COUNT_TIMEOUT > 0 && dev->panic_count >= PROBE_COUNT_TIMEOUT) + { + blk_mq_end_request(rq, BLK_STS_TIMEOUT); + goto out; + } - if (PROBE_COUNT_TIMEOUT > 0 && dev->panic_count >= PROBE_COUNT_TIMEOUT) - { - __blk_end_request_all(req, -EIO); - continue; - } + if (!(dnbd3_req_read(rq))) + { + blk_mq_end_request(rq, BLK_STS_NOTSUPP); + goto out; + } - if (!(dnbd3_req_read(req))) - { - __blk_end_request_all(req, -EACCES); - continue; - } + spin_lock_irqsave(&dev->blk_lock, irqflags); + list_add_tail(&rq->queuelist, &dev->request_queue_send); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); + wake_up(&dev->process_queue_send); + +out: + return BLK_STS_OK; +} + +static const struct blk_mq_ops dnbd3_mq_ops = { + .queue_rq = dnbd3_queue_rq, +}; + +int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) +{ + int ret; + + init_waitqueue_head(&dev->process_queue_send); + init_waitqueue_head(&dev->process_queue_receive); + init_waitqueue_head(&dev->process_queue_discover); + INIT_LIST_HEAD(&dev->request_queue_send); + INIT_LIST_HEAD(&dev->request_queue_receive); + + memset(&dev->cur_server, 0, sizeof(dev->cur_server)); + memset(&dev->initial_server, 0, sizeof(dev->initial_server)); + dev->better_sock = NULL; + + dev->imgname = NULL; + dev->rid = 0; + dev->update_available = 0; + memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS); + dev->thread_send = NULL; + dev->thread_receive = NULL; + dev->thread_discover = NULL; + dev->discover = 0; + dev->disconnecting = 0; + dev->panic = 0; + dev->panic_count = 0; + dev->reported_size = 0; + + // set up spin lock for request queues for send and receive + spin_lock_init(&dev->blk_lock); + + // set up tag_set for blk-mq + dev->tag_set.ops = &dnbd3_mq_ops; + dev->tag_set.nr_hw_queues = 1; + dev->tag_set.queue_depth = 128; + dev->tag_set.numa_node = NUMA_NO_NODE; + dev->tag_set.cmd_size = 0; + dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + dev->tag_set.driver_data = dev; + + ret = blk_mq_alloc_tag_set(&dev->tag_set); + if (ret) + { + printk(KERN_ERR "ERROR: dnbd3 blk_mq_alloc_tag_set failed.\n"); + goto out; + } - list_add_tail(&req->queuelist, &dev->request_queue_send); - spin_unlock_irq(q->queue_lock); - wake_up(&dev->process_queue_send); - spin_lock_irq(q->queue_lock); + // set up blk-mq + dev->queue = blk_mq_init_queue(&dev->tag_set); + if (IS_ERR(dev->queue)) { + ret = PTR_ERR(dev->queue); + goto out_cleanup_tags; } + dev->queue->queuedata = dev; + + blk_queue_logical_block_size(dev->queue, DNBD3_BLOCK_SIZE); + blk_queue_physical_block_size(dev->queue, DNBD3_BLOCK_SIZE); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dev->queue); +#else + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, dev->queue); +#endif +#define ONE_MEG (1048576) + blk_queue_max_segment_size(dev->queue, ONE_MEG); + blk_queue_max_segments(dev->queue, 0xffff); + blk_queue_max_hw_sectors(dev->queue, ONE_MEG / DNBD3_BLOCK_SIZE); + dev->queue->limits.max_sectors = 256; +#undef ONE_MEG + + // set up disk + if (!(dev->disk = alloc_disk(1))) + { + printk(KERN_ERR "ERROR: dnbd3 alloc_disk failed.\n"); + ret = -ENOMEM; + goto out_cleanup_queue; + } + + dev->disk->flags |= GENHD_FL_NO_PART_SCAN; + dev->disk->major = major; + dev->disk->first_minor = minor; + dev->disk->fops = &dnbd3_blk_ops; + dev->disk->private_data = dev; + dev->disk->queue = dev->queue; + sprintf(dev->disk->disk_name, "dnbd%d", minor); + set_capacity(dev->disk, 0); + set_disk_ro(dev->disk, 1); + add_disk(dev->disk); + + // set up sysfs + dnbd3_sysfs_init(dev); + + return 0; + +out_cleanup_queue: + blk_cleanup_queue(dev->queue); +out_cleanup_tags: + blk_mq_free_tag_set(&dev->tag_set); +out: + return ret; +} + +int dnbd3_blk_del_device(dnbd3_device_t *dev) +{ + dnbd3_sysfs_exit(dev); + dnbd3_net_disconnect(dev); + del_gendisk(dev->disk); + blk_cleanup_queue(dev->queue); + blk_mq_free_tag_set(&dev->tag_set); + put_disk(dev->disk); + return 0; } void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev) @@ -371,7 +407,7 @@ void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev) if (dnbd3_req_fs(blk_request)) { spin_lock_irqsave(&dev->blk_lock, flags); - __blk_end_request_all(blk_request, -EIO); + blk_mq_end_request(blk_request, BLK_STS_IOERR); spin_unlock_irqrestore(&dev->blk_lock, flags); } else if (dnbd3_req_special(blk_request)) -- cgit v1.2.3-55-g7522