diff options
-rw-r--r-- | CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/kernel/blk.c | 276 | ||||
-rw-r--r-- | src/kernel/blk.h | 6 | ||||
-rw-r--r-- | src/kernel/dnbd3.h | 3 | ||||
-rw-r--r-- | src/kernel/net.c | 36 |
5 files changed, 183 insertions, 144 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 18ff147..21865e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -231,8 +231,14 @@ IF(BUILD_KERNEL_MODULE) SET(KERNEL_DIR "/lib/modules/${CMAKE_SYSTEM_VERSION}/build") ENDIF() + SET(KERNEL_C_FLAGS "") + IF(KERNEL_C_FLAGS MATCHES Debug) + SET(KERNEL_C_FLAGS "-g -DDEBUG") + ENDIF() + SET(KBUILD_COMMAND ${CMAKE_MAKE_PROGRAM} -C ${KERNEL_DIR} M=${CMAKE_BINARY_DIR} modules + EXTRA_CFLAGS=${KERNEL_C_FLAGS} ) CONFIGURE_FILE(Kbuild.in ${CMAKE_BINARY_DIR}/Kbuild) diff --git a/src/kernel/blk.c b/src/kernel/blk.c index 889b988..dde8dea 100644 --- a/src/kernel/blk.c +++ b/src/kernel/blk.c @@ -41,92 +41,7 @@ req->cmd_type == REQ_TYPE_SPECIAL #endif -int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) -{ - struct gendisk *disk; - struct request_queue *blk_queue; - - init_waitqueue_head(&dev->process_queue_send); - init_waitqueue_head(&dev->process_queue_receive); - init_waitqueue_head(&dev->process_queue_discover); - INIT_LIST_HEAD(&dev->request_queue_send); - INIT_LIST_HEAD(&dev->request_queue_receive); - - memset(&dev->cur_server, 0, sizeof(dev->cur_server)); - memset(&dev->initial_server, 0, sizeof(dev->initial_server)); - dev->better_sock = NULL; - - dev->imgname = NULL; - dev->rid = 0; - dev->update_available = 0; - memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS); - dev->thread_send = NULL; - dev->thread_receive = NULL; - dev->thread_discover = NULL; - dev->discover = 0; - dev->disconnecting = 0; - dev->panic = 0; - dev->panic_count = 0; - dev->reported_size = 0; - - if (!(disk = alloc_disk(1))) - { - printk("ERROR: dnbd3 alloc_disk failed.\n"); - return -EIO; - } - - disk->major = major; - disk->first_minor = minor; - sprintf(disk->disk_name, "dnbd%d", minor); - set_capacity(disk, 0); - set_disk_ro(disk, 1); - disk->fops = &dnbd3_blk_ops; - - spin_lock_init(&dev->blk_lock); - if ((blk_queue = blk_init_queue(&dnbd3_blk_request, &dev->blk_lock)) == NULL) - { - printk("ERROR: dnbd3 blk_init_queue failed.\n"); - return -EIO; - } - - blk_queue_logical_block_size(blk_queue, DNBD3_BLOCK_SIZE); - blk_queue_physical_block_size(blk_queue, DNBD3_BLOCK_SIZE); - - disk->queue = blk_queue; - disk->private_data = dev; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); -#else - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); -#endif -#define ONE_MEG (1048576) - blk_queue_max_segment_size(disk->queue, ONE_MEG); - blk_queue_max_segments(disk->queue, 0xffff); - blk_queue_max_hw_sectors(disk->queue, ONE_MEG / DNBD3_BLOCK_SIZE); - disk->queue->limits.max_sectors = 256; - dev->disk = disk; -#undef ONE_MEG - - add_disk(disk); - dnbd3_sysfs_init(dev); - return 0; -} - -int dnbd3_blk_del_device(dnbd3_device_t *dev) -{ - dnbd3_sysfs_exit(dev); - dnbd3_net_disconnect(dev); - del_gendisk(dev->disk); - put_disk(dev->disk); - blk_cleanup_queue(dev->disk->queue); - return 0; -} - -struct block_device_operations dnbd3_blk_ops = - { .owner = THIS_MODULE, .ioctl = dnbd3_blk_ioctl, }; - -int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) +static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { int result = -100; dnbd3_device_t *dev = bdev->bd_disk->private_data; @@ -225,7 +140,9 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u dnbd3_blk_fail_all_requests(dev); result = dnbd3_net_disconnect(dev); dnbd3_blk_fail_all_requests(dev); + blk_mq_freeze_queue(dev->queue); set_capacity(dev->disk, 0); + blk_mq_unfreeze_queue(dev->queue); if (dev->imgname) { kfree(dev->imgname); @@ -275,48 +192,167 @@ cleanup_return: return result; } -/** - * dev->blk_lock and q->queue_lock are being held - * when this is called! - */ -void dnbd3_blk_request(struct request_queue *q) +static const struct block_device_operations dnbd3_blk_ops = { + .owner = THIS_MODULE, + .ioctl = dnbd3_blk_ioctl, +}; + +static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { - struct request *req; - dnbd3_device_t *dev; + struct request *rq = bd->rq; + dnbd3_device_t *dev = rq->q->queuedata; + unsigned long irqflags; + + blk_mq_start_request(rq); - while ((req = blk_fetch_request(q)) != NULL) + if (dev->imgname == NULL) { - dev = req->rq_disk->private_data; + blk_mq_end_request(rq, BLK_STS_IOERR); + goto out; + } - if (dev->imgname == NULL) - { - __blk_end_request_all(req, -EIO); - continue; - } + if (!(dnbd3_req_fs(rq))) + { + blk_mq_end_request(rq, BLK_STS_IOERR); + goto out; + } - if (!(dnbd3_req_fs(req))) - { - __blk_end_request_all(req, 0); - continue; - } + if (PROBE_COUNT_TIMEOUT > 0 && dev->panic_count >= PROBE_COUNT_TIMEOUT) + { + blk_mq_end_request(rq, BLK_STS_TIMEOUT); + goto out; + } - if (PROBE_COUNT_TIMEOUT > 0 && dev->panic_count >= PROBE_COUNT_TIMEOUT) - { - __blk_end_request_all(req, -EIO); - continue; - } + if (!(dnbd3_req_read(rq))) + { + blk_mq_end_request(rq, BLK_STS_NOTSUPP); + goto out; + } - if (!(dnbd3_req_read(req))) - { - __blk_end_request_all(req, -EACCES); - continue; - } + spin_lock_irqsave(&dev->blk_lock, irqflags); + list_add_tail(&rq->queuelist, &dev->request_queue_send); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); + wake_up(&dev->process_queue_send); + +out: + return BLK_STS_OK; +} + +static const struct blk_mq_ops dnbd3_mq_ops = { + .queue_rq = dnbd3_queue_rq, +}; + +int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) +{ + int ret; + + init_waitqueue_head(&dev->process_queue_send); + init_waitqueue_head(&dev->process_queue_receive); + init_waitqueue_head(&dev->process_queue_discover); + INIT_LIST_HEAD(&dev->request_queue_send); + INIT_LIST_HEAD(&dev->request_queue_receive); + + memset(&dev->cur_server, 0, sizeof(dev->cur_server)); + memset(&dev->initial_server, 0, sizeof(dev->initial_server)); + dev->better_sock = NULL; + + dev->imgname = NULL; + dev->rid = 0; + dev->update_available = 0; + memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS); + dev->thread_send = NULL; + dev->thread_receive = NULL; + dev->thread_discover = NULL; + dev->discover = 0; + dev->disconnecting = 0; + dev->panic = 0; + dev->panic_count = 0; + dev->reported_size = 0; + + // set up spin lock for request queues for send and receive + spin_lock_init(&dev->blk_lock); + + // set up tag_set for blk-mq + dev->tag_set.ops = &dnbd3_mq_ops; + dev->tag_set.nr_hw_queues = 1; + dev->tag_set.queue_depth = 128; + dev->tag_set.numa_node = NUMA_NO_NODE; + dev->tag_set.cmd_size = 0; + dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + dev->tag_set.driver_data = dev; + + ret = blk_mq_alloc_tag_set(&dev->tag_set); + if (ret) + { + printk(KERN_ERR "ERROR: dnbd3 blk_mq_alloc_tag_set failed.\n"); + goto out; + } - list_add_tail(&req->queuelist, &dev->request_queue_send); - spin_unlock_irq(q->queue_lock); - wake_up(&dev->process_queue_send); - spin_lock_irq(q->queue_lock); + // set up blk-mq + dev->queue = blk_mq_init_queue(&dev->tag_set); + if (IS_ERR(dev->queue)) { + ret = PTR_ERR(dev->queue); + goto out_cleanup_tags; } + dev->queue->queuedata = dev; + + blk_queue_logical_block_size(dev->queue, DNBD3_BLOCK_SIZE); + blk_queue_physical_block_size(dev->queue, DNBD3_BLOCK_SIZE); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dev->queue); +#else + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, dev->queue); +#endif +#define ONE_MEG (1048576) + blk_queue_max_segment_size(dev->queue, ONE_MEG); + blk_queue_max_segments(dev->queue, 0xffff); + blk_queue_max_hw_sectors(dev->queue, ONE_MEG / DNBD3_BLOCK_SIZE); + dev->queue->limits.max_sectors = 256; +#undef ONE_MEG + + // set up disk + if (!(dev->disk = alloc_disk(1))) + { + printk(KERN_ERR "ERROR: dnbd3 alloc_disk failed.\n"); + ret = -ENOMEM; + goto out_cleanup_queue; + } + + dev->disk->flags |= GENHD_FL_NO_PART_SCAN; + dev->disk->major = major; + dev->disk->first_minor = minor; + dev->disk->fops = &dnbd3_blk_ops; + dev->disk->private_data = dev; + dev->disk->queue = dev->queue; + sprintf(dev->disk->disk_name, "dnbd%d", minor); + set_capacity(dev->disk, 0); + set_disk_ro(dev->disk, 1); + add_disk(dev->disk); + + // set up sysfs + dnbd3_sysfs_init(dev); + + return 0; + +out_cleanup_queue: + blk_cleanup_queue(dev->queue); +out_cleanup_tags: + blk_mq_free_tag_set(&dev->tag_set); +out: + return ret; +} + +int dnbd3_blk_del_device(dnbd3_device_t *dev) +{ + dnbd3_sysfs_exit(dev); + dnbd3_net_disconnect(dev); + del_gendisk(dev->disk); + blk_cleanup_queue(dev->queue); + blk_mq_free_tag_set(&dev->tag_set); + put_disk(dev->disk); + return 0; } void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev) @@ -371,7 +407,7 @@ void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev) if (dnbd3_req_fs(blk_request)) { spin_lock_irqsave(&dev->blk_lock, flags); - __blk_end_request_all(blk_request, -EIO); + blk_mq_end_request(blk_request, BLK_STS_IOERR); spin_unlock_irqrestore(&dev->blk_lock, flags); } else if (dnbd3_req_special(blk_request)) diff --git a/src/kernel/blk.h b/src/kernel/blk.h index 5091d19..0afce2e 100644 --- a/src/kernel/blk.h +++ b/src/kernel/blk.h @@ -27,12 +27,6 @@ #define REQ_TYPE_SPECIAL REQ_TYPE_DRV_PRIV #endif -extern struct block_device_operations dnbd3_blk_ops; - -int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); - -void dnbd3_blk_request(struct request_queue *q); - int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor); int dnbd3_blk_del_device(dnbd3_device_t *dev); diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h index f8af69f..aceb853 100644 --- a/src/kernel/dnbd3.h +++ b/src/kernel/dnbd3.h @@ -25,6 +25,7 @@ #include <linux/kthread.h> #include <linux/module.h> #include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <net/sock.h> #define KERNEL_MODULE @@ -46,6 +47,8 @@ typedef struct { // block struct gendisk *disk; + struct blk_mq_tag_set tag_set; + struct request_queue *queue; spinlock_t blk_lock; // sysfs diff --git a/src/kernel/net.c b/src/kernel/net.c index 9e48b86..337cfd7 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -25,13 +25,15 @@ #include "serialize.h" -#include <linux/time.h> +#include <linux/ktime.h> #include <linux/signal.h> #ifndef MIN #define MIN(a,b) ((a) < (b) ? (a) : (b)) #endif +#define ktime_to_s(kt) ktime_divns(kt, NSEC_PER_SEC) + #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) #define dnbd3_sock_create(af,type,proto,sock) sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, type, proto, sock) #else @@ -203,8 +205,8 @@ int dnbd3_net_connect(dnbd3_device_t *dev) if (dnbd3_sock_create(dev->cur_server.host.type, SOCK_STREAM, IPPROTO_TCP, &dev->sock) < 0) error_dev("ERROR: Couldn't create socket (v6)."); - kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout)); - kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout)); + kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&timeout, sizeof(timeout)); + kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO_NEW, (char *)&timeout, sizeof(timeout)); dev->sock->sk->sk_allocation = GFP_NOIO; if (dev->cur_server.host.type == HOST_IP4) { @@ -289,8 +291,8 @@ int dnbd3_net_connect(dnbd3_device_t *dev) debug_dev("INFO: On-the-fly server change."); dev->sock = dev->better_sock; dev->better_sock = NULL; - kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout)); - kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout)); + kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&timeout, sizeof(timeout)); + kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO_NEW, (char *)&timeout, sizeof(timeout)); } dev->panic = 0; @@ -459,7 +461,7 @@ int dnbd3_net_discover(void *data) uint64_t filesize; uint16_t rid; - struct timeval start, end; + ktime_t start, end; unsigned long rtt, best_rtt = 0; unsigned long irqflags; int i, j, isize, best_server, current_server; @@ -565,7 +567,7 @@ int dnbd3_net_discover(void *data) } if (NUMBER_SERVERS > isize) { for (i = 0; i < isize; ++i) { - j = ((start.tv_sec >> i) ^ (start.tv_usec >> j)) % NUMBER_SERVERS; + j = ((ktime_to_s(start) >> i) ^ (ktime_to_us(start) >> j)) % NUMBER_SERVERS; if (j != i) { mlen = check_order[i]; check_order[i] = check_order[j]; @@ -579,7 +581,7 @@ int dnbd3_net_discover(void *data) i = check_order[j]; if (dev->alt_servers[i].host.type == 0) // Empty slot continue; - if (!dev->panic && dev->alt_servers[i].failures > 50 && (start.tv_usec & 7) != 0) // If not in panic mode, skip server if it failed too many times + if (!dev->panic && dev->alt_servers[i].failures > 50 && (ktime_to_us(start) & 7) != 0) // If not in panic mode, skip server if it failed too many times continue; if (isize-- <= 0 && !is_same_server(&dev->cur_server, &dev->alt_servers[i])) continue; @@ -591,8 +593,8 @@ int dnbd3_net_discover(void *data) sock = NULL; continue; } - kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout)); - kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout)); + kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&timeout, sizeof(timeout)); + kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO_NEW, (char *)&timeout, sizeof(timeout)); sock->sk->sk_allocation = GFP_NOIO; if (dev->alt_servers[i].host.type == HOST_IP4) { @@ -693,7 +695,7 @@ int dnbd3_net_discover(void *data) iov[0].iov_len = sizeof(dnbd3_request); // start rtt measurement - do_gettimeofday(&start); + start = ktime_get_real(); if (kernel_sendmsg(sock, &msg, iov, 1, sizeof(dnbd3_request)) <= 0) error_alt("ERROR: Requesting test block failed (discover)."); @@ -715,10 +717,9 @@ int dnbd3_net_discover(void *data) if (kernel_recvmsg(sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != RTT_BLOCK_SIZE) error_alt("ERROR: Receiving test block payload failed (discover)."); - do_gettimeofday(&end); // end rtt measurement + end = ktime_get_real(); // end rtt measurement - dev->alt_servers[i].rtts[turn] = (unsigned long)((end.tv_sec - start.tv_sec) * 1000000ull - + (end.tv_usec - start.tv_usec)); + dev->alt_servers[i].rtts[turn] = (unsigned long) ktime_us_delta(end, start); rtt = (dev->alt_servers[i].rtts[0] + dev->alt_servers[i].rtts[1] + dev->alt_servers[i].rtts[2] + dev->alt_servers[i].rtts[3]) / 4; @@ -781,7 +782,7 @@ int dnbd3_net_discover(void *data) continue; } - do_change = ready && best_server != current_server && (start.tv_usec & 3) != 0 + do_change = ready && best_server != current_server && (ktime_to_us(start) & 3) != 0 && RTT_THRESHOLD_FACTOR(dev->cur_rtt) > best_rtt + 1500; if (ready && !do_change) { @@ -823,7 +824,7 @@ int dnbd3_net_discover(void *data) best_sock = NULL; } - if (!ready || (start.tv_usec & 15) != 0) + if (!ready || (ktime_to_us(start) & 15) != 0) turn = (turn + 1) % 4; if (turn == 2) // Set ready when we only have 2 of 4 measurements for quicker load balancing ready = 1; @@ -1032,7 +1033,7 @@ int dnbd3_net_receive(void *data) } spin_lock_irqsave(&dev->blk_lock, irqflags); list_del_init(&blk_request->queuelist); - __blk_end_request_all(blk_request, 0); + blk_mq_end_request(blk_request, BLK_STS_OK); spin_unlock_irqrestore(&dev->blk_lock, irqflags); continue; @@ -1120,4 +1121,3 @@ int dnbd3_net_receive(void *data) dev->thread_receive = NULL; return -1; } - |