summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt6
-rw-r--r--src/kernel/blk.c276
-rw-r--r--src/kernel/blk.h6
-rw-r--r--src/kernel/dnbd3.h3
-rw-r--r--src/kernel/net.c36
5 files changed, 183 insertions, 144 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 18ff147..21865e5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -231,8 +231,14 @@ IF(BUILD_KERNEL_MODULE)
SET(KERNEL_DIR "/lib/modules/${CMAKE_SYSTEM_VERSION}/build")
ENDIF()
+ SET(KERNEL_C_FLAGS "")
+ IF(KERNEL_C_FLAGS MATCHES Debug)
+ SET(KERNEL_C_FLAGS "-g -DDEBUG")
+ ENDIF()
+
SET(KBUILD_COMMAND ${CMAKE_MAKE_PROGRAM} -C ${KERNEL_DIR}
M=${CMAKE_BINARY_DIR} modules
+ EXTRA_CFLAGS=${KERNEL_C_FLAGS}
)
CONFIGURE_FILE(Kbuild.in ${CMAKE_BINARY_DIR}/Kbuild)
diff --git a/src/kernel/blk.c b/src/kernel/blk.c
index 889b988..dde8dea 100644
--- a/src/kernel/blk.c
+++ b/src/kernel/blk.c
@@ -41,92 +41,7 @@
req->cmd_type == REQ_TYPE_SPECIAL
#endif
-int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor)
-{
- struct gendisk *disk;
- struct request_queue *blk_queue;
-
- init_waitqueue_head(&dev->process_queue_send);
- init_waitqueue_head(&dev->process_queue_receive);
- init_waitqueue_head(&dev->process_queue_discover);
- INIT_LIST_HEAD(&dev->request_queue_send);
- INIT_LIST_HEAD(&dev->request_queue_receive);
-
- memset(&dev->cur_server, 0, sizeof(dev->cur_server));
- memset(&dev->initial_server, 0, sizeof(dev->initial_server));
- dev->better_sock = NULL;
-
- dev->imgname = NULL;
- dev->rid = 0;
- dev->update_available = 0;
- memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS);
- dev->thread_send = NULL;
- dev->thread_receive = NULL;
- dev->thread_discover = NULL;
- dev->discover = 0;
- dev->disconnecting = 0;
- dev->panic = 0;
- dev->panic_count = 0;
- dev->reported_size = 0;
-
- if (!(disk = alloc_disk(1)))
- {
- printk("ERROR: dnbd3 alloc_disk failed.\n");
- return -EIO;
- }
-
- disk->major = major;
- disk->first_minor = minor;
- sprintf(disk->disk_name, "dnbd%d", minor);
- set_capacity(disk, 0);
- set_disk_ro(disk, 1);
- disk->fops = &dnbd3_blk_ops;
-
- spin_lock_init(&dev->blk_lock);
- if ((blk_queue = blk_init_queue(&dnbd3_blk_request, &dev->blk_lock)) == NULL)
- {
- printk("ERROR: dnbd3 blk_init_queue failed.\n");
- return -EIO;
- }
-
- blk_queue_logical_block_size(blk_queue, DNBD3_BLOCK_SIZE);
- blk_queue_physical_block_size(blk_queue, DNBD3_BLOCK_SIZE);
-
- disk->queue = blk_queue;
- disk->private_data = dev;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
-#else
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
-#endif
-#define ONE_MEG (1048576)
- blk_queue_max_segment_size(disk->queue, ONE_MEG);
- blk_queue_max_segments(disk->queue, 0xffff);
- blk_queue_max_hw_sectors(disk->queue, ONE_MEG / DNBD3_BLOCK_SIZE);
- disk->queue->limits.max_sectors = 256;
- dev->disk = disk;
-#undef ONE_MEG
-
- add_disk(disk);
- dnbd3_sysfs_init(dev);
- return 0;
-}
-
-int dnbd3_blk_del_device(dnbd3_device_t *dev)
-{
- dnbd3_sysfs_exit(dev);
- dnbd3_net_disconnect(dev);
- del_gendisk(dev->disk);
- put_disk(dev->disk);
- blk_cleanup_queue(dev->disk->queue);
- return 0;
-}
-
-struct block_device_operations dnbd3_blk_ops =
- { .owner = THIS_MODULE, .ioctl = dnbd3_blk_ioctl, };
-
-int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
{
int result = -100;
dnbd3_device_t *dev = bdev->bd_disk->private_data;
@@ -225,7 +140,9 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u
dnbd3_blk_fail_all_requests(dev);
result = dnbd3_net_disconnect(dev);
dnbd3_blk_fail_all_requests(dev);
+ blk_mq_freeze_queue(dev->queue);
set_capacity(dev->disk, 0);
+ blk_mq_unfreeze_queue(dev->queue);
if (dev->imgname)
{
kfree(dev->imgname);
@@ -275,48 +192,167 @@ cleanup_return:
return result;
}
-/**
- * dev->blk_lock and q->queue_lock are being held
- * when this is called!
- */
-void dnbd3_blk_request(struct request_queue *q)
+static const struct block_device_operations dnbd3_blk_ops = {
+ .owner = THIS_MODULE,
+ .ioctl = dnbd3_blk_ioctl,
+};
+
+static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd)
{
- struct request *req;
- dnbd3_device_t *dev;
+ struct request *rq = bd->rq;
+ dnbd3_device_t *dev = rq->q->queuedata;
+ unsigned long irqflags;
+
+ blk_mq_start_request(rq);
- while ((req = blk_fetch_request(q)) != NULL)
+ if (dev->imgname == NULL)
{
- dev = req->rq_disk->private_data;
+ blk_mq_end_request(rq, BLK_STS_IOERR);
+ goto out;
+ }
- if (dev->imgname == NULL)
- {
- __blk_end_request_all(req, -EIO);
- continue;
- }
+ if (!(dnbd3_req_fs(rq)))
+ {
+ blk_mq_end_request(rq, BLK_STS_IOERR);
+ goto out;
+ }
- if (!(dnbd3_req_fs(req)))
- {
- __blk_end_request_all(req, 0);
- continue;
- }
+ if (PROBE_COUNT_TIMEOUT > 0 && dev->panic_count >= PROBE_COUNT_TIMEOUT)
+ {
+ blk_mq_end_request(rq, BLK_STS_TIMEOUT);
+ goto out;
+ }
- if (PROBE_COUNT_TIMEOUT > 0 && dev->panic_count >= PROBE_COUNT_TIMEOUT)
- {
- __blk_end_request_all(req, -EIO);
- continue;
- }
+ if (!(dnbd3_req_read(rq)))
+ {
+ blk_mq_end_request(rq, BLK_STS_NOTSUPP);
+ goto out;
+ }
- if (!(dnbd3_req_read(req)))
- {
- __blk_end_request_all(req, -EACCES);
- continue;
- }
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
+ list_add_tail(&rq->queuelist, &dev->request_queue_send);
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
+ wake_up(&dev->process_queue_send);
+
+out:
+ return BLK_STS_OK;
+}
+
+static const struct blk_mq_ops dnbd3_mq_ops = {
+ .queue_rq = dnbd3_queue_rq,
+};
+
+int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor)
+{
+ int ret;
+
+ init_waitqueue_head(&dev->process_queue_send);
+ init_waitqueue_head(&dev->process_queue_receive);
+ init_waitqueue_head(&dev->process_queue_discover);
+ INIT_LIST_HEAD(&dev->request_queue_send);
+ INIT_LIST_HEAD(&dev->request_queue_receive);
+
+ memset(&dev->cur_server, 0, sizeof(dev->cur_server));
+ memset(&dev->initial_server, 0, sizeof(dev->initial_server));
+ dev->better_sock = NULL;
+
+ dev->imgname = NULL;
+ dev->rid = 0;
+ dev->update_available = 0;
+ memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS);
+ dev->thread_send = NULL;
+ dev->thread_receive = NULL;
+ dev->thread_discover = NULL;
+ dev->discover = 0;
+ dev->disconnecting = 0;
+ dev->panic = 0;
+ dev->panic_count = 0;
+ dev->reported_size = 0;
+
+ // set up spin lock for request queues for send and receive
+ spin_lock_init(&dev->blk_lock);
+
+ // set up tag_set for blk-mq
+ dev->tag_set.ops = &dnbd3_mq_ops;
+ dev->tag_set.nr_hw_queues = 1;
+ dev->tag_set.queue_depth = 128;
+ dev->tag_set.numa_node = NUMA_NO_NODE;
+ dev->tag_set.cmd_size = 0;
+ dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+ dev->tag_set.driver_data = dev;
+
+ ret = blk_mq_alloc_tag_set(&dev->tag_set);
+ if (ret)
+ {
+ printk(KERN_ERR "ERROR: dnbd3 blk_mq_alloc_tag_set failed.\n");
+ goto out;
+ }
- list_add_tail(&req->queuelist, &dev->request_queue_send);
- spin_unlock_irq(q->queue_lock);
- wake_up(&dev->process_queue_send);
- spin_lock_irq(q->queue_lock);
+ // set up blk-mq
+ dev->queue = blk_mq_init_queue(&dev->tag_set);
+ if (IS_ERR(dev->queue)) {
+ ret = PTR_ERR(dev->queue);
+ goto out_cleanup_tags;
}
+ dev->queue->queuedata = dev;
+
+ blk_queue_logical_block_size(dev->queue, DNBD3_BLOCK_SIZE);
+ blk_queue_physical_block_size(dev->queue, DNBD3_BLOCK_SIZE);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dev->queue);
+#else
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, dev->queue);
+#endif
+#define ONE_MEG (1048576)
+ blk_queue_max_segment_size(dev->queue, ONE_MEG);
+ blk_queue_max_segments(dev->queue, 0xffff);
+ blk_queue_max_hw_sectors(dev->queue, ONE_MEG / DNBD3_BLOCK_SIZE);
+ dev->queue->limits.max_sectors = 256;
+#undef ONE_MEG
+
+ // set up disk
+ if (!(dev->disk = alloc_disk(1)))
+ {
+ printk(KERN_ERR "ERROR: dnbd3 alloc_disk failed.\n");
+ ret = -ENOMEM;
+ goto out_cleanup_queue;
+ }
+
+ dev->disk->flags |= GENHD_FL_NO_PART_SCAN;
+ dev->disk->major = major;
+ dev->disk->first_minor = minor;
+ dev->disk->fops = &dnbd3_blk_ops;
+ dev->disk->private_data = dev;
+ dev->disk->queue = dev->queue;
+ sprintf(dev->disk->disk_name, "dnbd%d", minor);
+ set_capacity(dev->disk, 0);
+ set_disk_ro(dev->disk, 1);
+ add_disk(dev->disk);
+
+ // set up sysfs
+ dnbd3_sysfs_init(dev);
+
+ return 0;
+
+out_cleanup_queue:
+ blk_cleanup_queue(dev->queue);
+out_cleanup_tags:
+ blk_mq_free_tag_set(&dev->tag_set);
+out:
+ return ret;
+}
+
+int dnbd3_blk_del_device(dnbd3_device_t *dev)
+{
+ dnbd3_sysfs_exit(dev);
+ dnbd3_net_disconnect(dev);
+ del_gendisk(dev->disk);
+ blk_cleanup_queue(dev->queue);
+ blk_mq_free_tag_set(&dev->tag_set);
+ put_disk(dev->disk);
+ return 0;
}
void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev)
@@ -371,7 +407,7 @@ void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev)
if (dnbd3_req_fs(blk_request))
{
spin_lock_irqsave(&dev->blk_lock, flags);
- __blk_end_request_all(blk_request, -EIO);
+ blk_mq_end_request(blk_request, BLK_STS_IOERR);
spin_unlock_irqrestore(&dev->blk_lock, flags);
}
else if (dnbd3_req_special(blk_request))
diff --git a/src/kernel/blk.h b/src/kernel/blk.h
index 5091d19..0afce2e 100644
--- a/src/kernel/blk.h
+++ b/src/kernel/blk.h
@@ -27,12 +27,6 @@
#define REQ_TYPE_SPECIAL REQ_TYPE_DRV_PRIV
#endif
-extern struct block_device_operations dnbd3_blk_ops;
-
-int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg);
-
-void dnbd3_blk_request(struct request_queue *q);
-
int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor);
int dnbd3_blk_del_device(dnbd3_device_t *dev);
diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h
index f8af69f..aceb853 100644
--- a/src/kernel/dnbd3.h
+++ b/src/kernel/dnbd3.h
@@ -25,6 +25,7 @@
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <net/sock.h>
#define KERNEL_MODULE
@@ -46,6 +47,8 @@ typedef struct
{
// block
struct gendisk *disk;
+ struct blk_mq_tag_set tag_set;
+ struct request_queue *queue;
spinlock_t blk_lock;
// sysfs
diff --git a/src/kernel/net.c b/src/kernel/net.c
index 9e48b86..337cfd7 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -25,13 +25,15 @@
#include "serialize.h"
-#include <linux/time.h>
+#include <linux/ktime.h>
#include <linux/signal.h>
#ifndef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif
+#define ktime_to_s(kt) ktime_divns(kt, NSEC_PER_SEC)
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
#define dnbd3_sock_create(af,type,proto,sock) sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, type, proto, sock)
#else
@@ -203,8 +205,8 @@ int dnbd3_net_connect(dnbd3_device_t *dev)
if (dnbd3_sock_create(dev->cur_server.host.type, SOCK_STREAM, IPPROTO_TCP, &dev->sock) < 0)
error_dev("ERROR: Couldn't create socket (v6).");
- kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout));
- kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout));
+ kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&timeout, sizeof(timeout));
+ kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO_NEW, (char *)&timeout, sizeof(timeout));
dev->sock->sk->sk_allocation = GFP_NOIO;
if (dev->cur_server.host.type == HOST_IP4)
{
@@ -289,8 +291,8 @@ int dnbd3_net_connect(dnbd3_device_t *dev)
debug_dev("INFO: On-the-fly server change.");
dev->sock = dev->better_sock;
dev->better_sock = NULL;
- kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout));
- kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout));
+ kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&timeout, sizeof(timeout));
+ kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO_NEW, (char *)&timeout, sizeof(timeout));
}
dev->panic = 0;
@@ -459,7 +461,7 @@ int dnbd3_net_discover(void *data)
uint64_t filesize;
uint16_t rid;
- struct timeval start, end;
+ ktime_t start, end;
unsigned long rtt, best_rtt = 0;
unsigned long irqflags;
int i, j, isize, best_server, current_server;
@@ -565,7 +567,7 @@ int dnbd3_net_discover(void *data)
}
if (NUMBER_SERVERS > isize) {
for (i = 0; i < isize; ++i) {
- j = ((start.tv_sec >> i) ^ (start.tv_usec >> j)) % NUMBER_SERVERS;
+ j = ((ktime_to_s(start) >> i) ^ (ktime_to_us(start) >> j)) % NUMBER_SERVERS;
if (j != i) {
mlen = check_order[i];
check_order[i] = check_order[j];
@@ -579,7 +581,7 @@ int dnbd3_net_discover(void *data)
i = check_order[j];
if (dev->alt_servers[i].host.type == 0) // Empty slot
continue;
- if (!dev->panic && dev->alt_servers[i].failures > 50 && (start.tv_usec & 7) != 0) // If not in panic mode, skip server if it failed too many times
+ if (!dev->panic && dev->alt_servers[i].failures > 50 && (ktime_to_us(start) & 7) != 0) // If not in panic mode, skip server if it failed too many times
continue;
if (isize-- <= 0 && !is_same_server(&dev->cur_server, &dev->alt_servers[i]))
continue;
@@ -591,8 +593,8 @@ int dnbd3_net_discover(void *data)
sock = NULL;
continue;
}
- kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout));
- kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout));
+ kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&timeout, sizeof(timeout));
+ kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO_NEW, (char *)&timeout, sizeof(timeout));
sock->sk->sk_allocation = GFP_NOIO;
if (dev->alt_servers[i].host.type == HOST_IP4)
{
@@ -693,7 +695,7 @@ int dnbd3_net_discover(void *data)
iov[0].iov_len = sizeof(dnbd3_request);
// start rtt measurement
- do_gettimeofday(&start);
+ start = ktime_get_real();
if (kernel_sendmsg(sock, &msg, iov, 1, sizeof(dnbd3_request)) <= 0)
error_alt("ERROR: Requesting test block failed (discover).");
@@ -715,10 +717,9 @@ int dnbd3_net_discover(void *data)
if (kernel_recvmsg(sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != RTT_BLOCK_SIZE)
error_alt("ERROR: Receiving test block payload failed (discover).");
- do_gettimeofday(&end); // end rtt measurement
+ end = ktime_get_real(); // end rtt measurement
- dev->alt_servers[i].rtts[turn] = (unsigned long)((end.tv_sec - start.tv_sec) * 1000000ull
- + (end.tv_usec - start.tv_usec));
+ dev->alt_servers[i].rtts[turn] = (unsigned long) ktime_us_delta(end, start);
rtt = (dev->alt_servers[i].rtts[0] + dev->alt_servers[i].rtts[1] + dev->alt_servers[i].rtts[2]
+ dev->alt_servers[i].rtts[3]) / 4;
@@ -781,7 +782,7 @@ int dnbd3_net_discover(void *data)
continue;
}
- do_change = ready && best_server != current_server && (start.tv_usec & 3) != 0
+ do_change = ready && best_server != current_server && (ktime_to_us(start) & 3) != 0
&& RTT_THRESHOLD_FACTOR(dev->cur_rtt) > best_rtt + 1500;
if (ready && !do_change) {
@@ -823,7 +824,7 @@ int dnbd3_net_discover(void *data)
best_sock = NULL;
}
- if (!ready || (start.tv_usec & 15) != 0)
+ if (!ready || (ktime_to_us(start) & 15) != 0)
turn = (turn + 1) % 4;
if (turn == 2) // Set ready when we only have 2 of 4 measurements for quicker load balancing
ready = 1;
@@ -1032,7 +1033,7 @@ int dnbd3_net_receive(void *data)
}
spin_lock_irqsave(&dev->blk_lock, irqflags);
list_del_init(&blk_request->queuelist);
- __blk_end_request_all(blk_request, 0);
+ blk_mq_end_request(blk_request, BLK_STS_OK);
spin_unlock_irqrestore(&dev->blk_lock, irqflags);
continue;
@@ -1120,4 +1121,3 @@ int dnbd3_net_receive(void *data)
dev->thread_receive = NULL;
return -1;
}
-