diff options
Diffstat (limited to 'src/kernel/blk.c')
-rw-r--r-- | src/kernel/blk.c | 332 |
1 files changed, 145 insertions, 187 deletions
diff --git a/src/kernel/blk.c b/src/kernel/blk.c index ccaa6c1..5795c03 100644 --- a/src/kernel/blk.c +++ b/src/kernel/blk.c @@ -34,25 +34,16 @@ static int dnbd3_close_device(dnbd3_device_t *dev) if (dev->imgname) dev_info(dnbd3_device_to_dev(dev), "closing down device.\n"); - /* quickly fail all requests */ - dnbd3_blk_fail_all_requests(dev); - dev->panic = 0; - dev->discover = 0; + dev->panic = false; result = dnbd3_net_disconnect(dev); kfree(dev->imgname); dev->imgname = NULL; /* new requests might have been queued up, */ /* but now that imgname is NULL no new ones can show up */ - dnbd3_blk_fail_all_requests(dev); -#ifdef DNBD3_BLK_MQ blk_mq_freeze_queue(dev->queue); -#endif set_capacity(dev->disk, 0); -#ifdef DNBD3_BLK_MQ blk_mq_unfreeze_queue(dev->queue); -#endif - dev->reported_size = 0; return result; } @@ -65,8 +56,7 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int #endif char *imgname = NULL; dnbd3_ioctl_t *msg = NULL; - unsigned long irqflags; - int i = 0; + int i = 0, j; u8 locked = 0; if (arg != 0) { @@ -97,7 +87,7 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int switch (cmd) { case IOCTL_OPEN: - if (atomic_cmpxchg(&dev->connection_lock, 0, 1) != 0) { + if (!dnbd3_flag_get(dev->connection_lock)) { result = -EBUSY; break; } @@ -121,7 +111,7 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int dev_info(dnbd3_device_to_dev(dev), "opening device.\n"); #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0) - // set optimal request size for the queue + // set optimal request size for the queue to half the read-ahead blk_queue_io_opt(dev->queue, (msg->read_ahead_kb * 512)); #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 15, 0) // set readahead from optimal request size of the queue @@ -158,8 +148,7 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int if (dev->alt_servers[i].host.ss_family == 0) continue; // Empty slot - dev->cur_server.host = dev->alt_servers[i].host; - result = dnbd3_net_connect(dev); + result = dnbd3_new_connection(dev, &dev->alt_servers[i].host, true); if (result == 0) { /* connection established, store index of server and exit loop */ result = i; @@ -168,7 +157,7 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int } if (result >= 0) { - /* probing was successful */ + /* connection was successful */ dev_dbg(dnbd3_device_to_dev(dev), "server %pISpc is initial server\n", &dev->cur_server.host); imgname = NULL; // Prevent kfree at the end @@ -180,7 +169,7 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int break; case IOCTL_CLOSE: - if (atomic_cmpxchg(&dev->connection_lock, 0, 1) != 0) { + if (!dnbd3_flag_get(dev->connection_lock)) { result = -EBUSY; break; } @@ -189,7 +178,7 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int break; case IOCTL_SWITCH: - if (atomic_cmpxchg(&dev->connection_lock, 0, 1) != 0) { + if (!dnbd3_flag_get(dev->connection_lock)) { result = -EBUSY; break; } @@ -216,40 +205,12 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int /* specified server is current server, so do not switch */ result = 0; } else { - struct sockaddr_storage old_server; - dev_info(dnbd3_device_to_dev(dev), "manual server switch to %pISpc\n", &new_addr); - /* save current working server */ - /* lock device to get consistent copy of current working server */ - spin_lock_irqsave(&dev->blk_lock, irqflags); - old_server = dev->cur_server.host; - spin_unlock_irqrestore(&dev->blk_lock, irqflags); - - /* disconnect old server */ - dnbd3_net_disconnect(dev); - - /* connect to new specified server (switching) */ - spin_lock_irqsave(&dev->blk_lock, irqflags); - dev->cur_server.host = new_addr; - spin_unlock_irqrestore(&dev->blk_lock, irqflags); - result = dnbd3_net_connect(dev); + result = dnbd3_new_connection(dev, &new_addr, false); if (result != 0) { - /* reconnect with old server if switching has failed */ - spin_lock_irqsave(&dev->blk_lock, irqflags); - dev->cur_server.host = old_server; - spin_unlock_irqrestore(&dev->blk_lock, irqflags); - if (dnbd3_net_connect(dev) != 0) { - /* we couldn't reconnect to the old server */ - /* device is dangling now and needs another SWITCH call */ - dev_warn( - dnbd3_device_to_dev(dev), - "switching failed and could not switch back to old server - dangling device\n"); - result = -ECONNABORTED; - } else { - /* switching didn't work but we are back to the old server */ - result = -EAGAIN; - } + /* switching didn't work */ + result = -EAGAIN; } else { /* switch succeeded */ /* fake RTT so we don't switch away again soon */ @@ -257,12 +218,13 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int for (i = 0; i < NUMBER_SERVERS; ++i) { alt_server = &dev->alt_servers[i]; if (is_same_server(&alt_server->host, &new_addr)) { - alt_server->rtts[0] = alt_server->rtts[1] - = alt_server->rtts[2] = alt_server->rtts[3] = 4; + for (j = 0; j < DISCOVER_HISTORY_SIZE; ++j) + alt_server->rtts[j] = 1; alt_server->best_count = 100; } else { - alt_server->rtts[0] <<= 2; - alt_server->rtts[2] <<= 2; + for (j = 0; j < DISCOVER_HISTORY_SIZE; ++j) + if (alt_server->rtts[j] < 5000) + alt_server->rtts[j] = 5000; alt_server->best_count = 0; } } @@ -318,12 +280,11 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int break; } - if (locked) - atomic_set(&dev->connection_lock, 0); - cleanup_return: kfree(msg); kfree(imgname); + if (locked) + dnbd3_flag_reset(dev->connection_lock); return result; } @@ -332,7 +293,18 @@ static const struct block_device_operations dnbd3_blk_ops = { .ioctl = dnbd3_blk_ioctl, }; -#ifdef DNBD3_BLK_MQ +static void dnbd3_add_queue(dnbd3_device_t *dev, struct request *rq) +{ + unsigned long irqflags; + + spin_lock_irqsave(&dev->send_queue_lock, irqflags); + list_add_tail(&rq->queuelist, &dev->send_queue); + spin_unlock_irqrestore(&dev->send_queue_lock, irqflags); + spin_lock_irqsave(&dev->blk_lock, irqflags); + queue_work(dev->send_wq, &dev->send_work); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); +} + /* * Linux kernel blk-mq driver function (entry point) to handle block IO requests */ @@ -340,110 +312,108 @@ static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_ { struct request *rq = bd->rq; dnbd3_device_t *dev = rq->q->queuedata; - unsigned long irqflags; + struct dnbd3_cmd *cmd; - if (dev->imgname == NULL) + if (dev->imgname == NULL || !device_active(dev)) return BLK_STS_IOERR; - if (!(dnbd3_req_fs(rq))) + if (req_op(rq) != REQ_OP_READ) return BLK_STS_IOERR; if (PROBE_COUNT_TIMEOUT > 0 && dev->panic_count >= PROBE_COUNT_TIMEOUT) return BLK_STS_TIMEOUT; - if (!(dnbd3_req_read(rq))) + if (rq_data_dir(rq) != READ) return BLK_STS_NOTSUPP; + cmd = blk_mq_rq_to_pdu(rq); + cmd->handle = (u64)blk_mq_unique_tag(rq) | (((u64)jiffies) << 32); blk_mq_start_request(rq); - spin_lock_irqsave(&dev->blk_lock, irqflags); - list_add_tail(&rq->queuelist, &dev->request_queue_send); - spin_unlock_irqrestore(&dev->blk_lock, irqflags); - wake_up(&dev->process_queue_send); + dnbd3_add_queue(dev, rq); return BLK_STS_OK; } -static const struct blk_mq_ops dnbd3_mq_ops = { - .queue_rq = dnbd3_queue_rq, -}; - -#else /* DNBD3_BLK_MQ */ -/* - * Linux kernel blk driver function (entry point) to handle block IO requests - */ -static void dnbd3_blk_request(struct request_queue *q) +static enum blk_eh_timer_return dnbd3_rq_timeout(struct request *req, bool reserved) { - struct request *rq; - dnbd3_device_t *dev; - - while ((rq = blk_fetch_request(q)) != NULL) { - dev = rq->rq_disk->private_data; - - if (dev->imgname == NULL) { - __blk_end_request_all(rq, -EIO); - continue; - } - - if (!(dnbd3_req_fs(rq))) { - __blk_end_request_all(rq, 0); - continue; - } - - if (PROBE_COUNT_TIMEOUT > 0 && dev->panic_count >= PROBE_COUNT_TIMEOUT) { - __blk_end_request_all(rq, -EIO); - continue; + unsigned long irqflags; + struct request *rq_iter; + bool found = false; + dnbd3_device_t *dev = req->q->queuedata; + + spin_lock_irqsave(&dev->send_queue_lock, irqflags); + list_for_each_entry(rq_iter, &dev->send_queue, queuelist) { + if (rq_iter == req) { + found = true; + break; } - - if (!(dnbd3_req_read(rq))) { - __blk_end_request_all(rq, -EACCES); - continue; + } + spin_unlock_irqrestore(&dev->send_queue_lock, irqflags); + // If still in send queue, do nothing + if (found) + return BLK_EH_RESET_TIMER; + + spin_lock_irqsave(&dev->recv_queue_lock, irqflags); + list_for_each_entry(rq_iter, &dev->recv_queue, queuelist) { + if (rq_iter == req) { + found = true; + list_del_init(&req->queuelist); + break; } - - list_add_tail(&rq->queuelist, &dev->request_queue_send); - spin_unlock_irq(q->queue_lock); - wake_up(&dev->process_queue_send); - spin_lock_irq(q->queue_lock); } + spin_unlock_irqrestore(&dev->recv_queue_lock, irqflags); + if (!found) { + dev_err(dnbd3_device_to_dev(dev), "timeout request neither found in send nor recv queue, ignoring\n"); + // Assume it was fnished concurrently + return BLK_EH_DONE; + } + // Add to send queue again and trigger work, reset timeout + dnbd3_add_queue(dev, req); + return BLK_EH_RESET_TIMER; } -#endif /* DNBD3_BLK_MQ */ + +static +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +const +#endif +struct blk_mq_ops dnbd3_mq_ops = { + .queue_rq = dnbd3_queue_rq, + .timeout = dnbd3_rq_timeout, +}; int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) { int ret; - init_waitqueue_head(&dev->process_queue_send); - init_waitqueue_head(&dev->process_queue_discover); - INIT_LIST_HEAD(&dev->request_queue_send); - INIT_LIST_HEAD(&dev->request_queue_receive); - - memset(&dev->cur_server, 0, sizeof(dev->cur_server)); - dev->better_sock = NULL; + memset(dev, 0, sizeof(*dev)); + dev->index = minor; + // lock for imgname, cur_server etc. + spin_lock_init(&dev->blk_lock); + spin_lock_init(&dev->send_queue_lock); + spin_lock_init(&dev->recv_queue_lock); + INIT_LIST_HEAD(&dev->send_queue); + INIT_LIST_HEAD(&dev->recv_queue); + dnbd3_flag_reset(dev->connection_lock); + dnbd3_flag_reset(dev->discover_running); + mutex_init(&dev->alt_servers_lock); + dnbd3_net_work_init(dev); + // memset has done this already but I like initial values to be explicit dev->imgname = NULL; dev->rid = 0; - dev->update_available = 0; - mutex_init(&dev->alt_servers_lock); - memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0]) * NUMBER_SERVERS); - dev->thread_send = NULL; - dev->thread_receive = NULL; - dev->thread_discover = NULL; - dev->discover = 0; - atomic_set(&dev->connection_lock, 0); - dev->panic = 0; + dev->update_available = false; + dev->panic = false; dev->panic_count = 0; dev->reported_size = 0; - // set up spin lock for request queues for send and receive - spin_lock_init(&dev->blk_lock); - -#ifdef DNBD3_BLK_MQ // set up tag_set for blk-mq dev->tag_set.ops = &dnbd3_mq_ops; dev->tag_set.nr_hw_queues = 1; dev->tag_set.queue_depth = 128; dev->tag_set.numa_node = NUMA_NO_NODE; - dev->tag_set.cmd_size = 0; + dev->tag_set.cmd_size = sizeof(struct dnbd3_cmd); dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; dev->tag_set.driver_data = dev; + dev->tag_set.timeout = BLOCK_LAYER_TIMEOUT * HZ; ret = blk_mq_alloc_tag_set(&dev->tag_set); if (ret) { @@ -470,16 +440,6 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) } dev->queue->queuedata = dev; #endif -#else - // set up blk - dev->queue = blk_init_queue(&dnbd3_blk_request, &dev->blk_lock); - if (!dev->queue) { - ret = -ENOMEM; - dev_err(dnbd3_device_to_dev(dev), "blk_init_queue failed\n"); - goto out; - } - dev->queue->queuedata = dev; -#endif /* DNBD3_BLK_MQ */ blk_queue_logical_block_size(dev->queue, DNBD3_BLOCK_SIZE); blk_queue_physical_block_size(dev->queue, DNBD3_BLOCK_SIZE); @@ -527,90 +487,88 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) out_cleanup_queue: blk_cleanup_queue(dev->queue); #endif -#ifdef DNBD3_BLK_MQ out_cleanup_tags: blk_mq_free_tag_set(&dev->tag_set); -#endif out: + mutex_destroy(&dev->alt_servers_lock); return ret; } int dnbd3_blk_del_device(dnbd3_device_t *dev) { - while (atomic_cmpxchg(&dev->connection_lock, 0, 1) != 0) + while (!dnbd3_flag_get(dev->connection_lock)) schedule(); dnbd3_close_device(dev); dnbd3_sysfs_exit(dev); del_gendisk(dev->disk); #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) blk_cleanup_queue(dev->queue); -#endif -#ifdef DNBD3_BLK_MQ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0) +#else blk_cleanup_disk(dev->disk); #endif blk_mq_free_tag_set(&dev->tag_set); -#endif mutex_destroy(&dev->alt_servers_lock); #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 14, 0) put_disk(dev->disk); #endif + mutex_destroy(&dev->alt_servers_lock); return 0; } +void dnbd3_blk_requeue_all_requests(dnbd3_device_t *dev) +{ + struct request *blk_request; + unsigned long flags; + struct list_head local_copy; + int count = 0; + + INIT_LIST_HEAD(&local_copy); + spin_lock_irqsave(&dev->recv_queue_lock, flags); + while (!list_empty(&dev->recv_queue)) { + blk_request = list_entry(dev->recv_queue.next, struct request, queuelist); + list_del_init(&blk_request->queuelist); + list_add(&blk_request->queuelist, &local_copy); + count++; + } + spin_unlock_irqrestore(&dev->recv_queue_lock, flags); + if (count) + dev_info(dnbd3_device_to_dev(dev), "re-queueing %d requests\n", count); + while (!list_empty(&local_copy)) { + blk_request = list_entry(local_copy.next, struct request, queuelist); + list_del_init(&blk_request->queuelist); + dnbd3_add_queue(dev, blk_request); + } +} + void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev) { - struct request *blk_request, *tmp_request; - struct request *blk_request2, *tmp_request2; + struct request *blk_request; unsigned long flags; struct list_head local_copy; - int dup; + int count = 0; INIT_LIST_HEAD(&local_copy); - spin_lock_irqsave(&dev->blk_lock, flags); - while (!list_empty(&dev->request_queue_receive)) { - list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist) { - list_del_init(&blk_request->queuelist); - dup = 0; - list_for_each_entry_safe(blk_request2, tmp_request2, &local_copy, queuelist) { - if (blk_request == blk_request2) { - dev_warn(dnbd3_device_to_dev(dev), - "same request is in request_queue_receive multiple times\n"); - BUG(); - dup = 1; - break; - } - } - if (!dup) - list_add(&blk_request->queuelist, &local_copy); - } + spin_lock_irqsave(&dev->recv_queue_lock, flags); + while (!list_empty(&dev->recv_queue)) { + blk_request = list_entry(dev->recv_queue.next, struct request, queuelist); + list_del_init(&blk_request->queuelist); + list_add(&blk_request->queuelist, &local_copy); + count++; } - while (!list_empty(&dev->request_queue_send)) { - list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_send, queuelist) { - list_del_init(&blk_request->queuelist); - dup = 0; - list_for_each_entry_safe(blk_request2, tmp_request2, &local_copy, queuelist) { - if (blk_request == blk_request2) { - dev_warn(dnbd3_device_to_dev(dev), "request is in both lists\n"); - BUG(); - dup = 1; - break; - } - } - if (!dup) - list_add(&blk_request->queuelist, &local_copy); - } + spin_unlock_irqrestore(&dev->recv_queue_lock, flags); + spin_lock_irqsave(&dev->send_queue_lock, flags); + while (!list_empty(&dev->send_queue)) { + blk_request = list_entry(dev->send_queue.next, struct request, queuelist); + list_del_init(&blk_request->queuelist); + list_add(&blk_request->queuelist, &local_copy); + count++; } - spin_unlock_irqrestore(&dev->blk_lock, flags); - list_for_each_entry_safe(blk_request, tmp_request, &local_copy, queuelist) { + spin_unlock_irqrestore(&dev->send_queue_lock, flags); + if (count) + dev_info(dnbd3_device_to_dev(dev), "failing %d requests\n", count); + while (!list_empty(&local_copy)) { + blk_request = list_entry(local_copy.next, struct request, queuelist); list_del_init(&blk_request->queuelist); - if (dnbd3_req_fs(blk_request)) -#ifdef DNBD3_BLK_MQ - blk_mq_end_request(blk_request, BLK_STS_IOERR); -#else - blk_end_request_all(blk_request, -EIO); -#endif - else if (dnbd3_req_special(blk_request)) - kfree(blk_request); + blk_mq_end_request(blk_request, BLK_STS_IOERR); } } |