summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Robra2019-07-07 22:13:01 +0200
committerFrederic Robra2019-07-07 22:13:01 +0200
commitf9ec2db3b4d1e0047087393218618cf8c439c336 (patch)
treef24305f0e8725c09c33ddf9f2cca0fa49264e579
parentadded first support for connection with more than one server (diff)
downloaddnbd3-ng-f9ec2db3b4d1e0047087393218618cf8c439c336.tar.gz
dnbd3-ng-f9ec2db3b4d1e0047087393218618cf8c439c336.tar.xz
dnbd3-ng-f9ec2db3b4d1e0047087393218618cf8c439c336.zip
added first draft for keepalive and discovery
-rw-r--r--Kbuild.in2
-rw-r--r--src/kernel/block.c399
-rw-r--r--src/kernel/block.h26
-rw-r--r--src/kernel/core.c304
-rw-r--r--src/kernel/dnbd3.h27
-rw-r--r--src/kernel/mq.c237
-rw-r--r--src/kernel/mq.h16
-rw-r--r--src/kernel/net.c528
-rw-r--r--src/kernel/net.h33
-rw-r--r--src/kernel/sysfs.c19
-rw-r--r--src/kernel/sysfs.h12
-rw-r--r--src/kernel/utils.c3
12 files changed, 888 insertions, 718 deletions
diff --git a/Kbuild.in b/Kbuild.in
index 566a58b..68c1d02 100644
--- a/Kbuild.in
+++ b/Kbuild.in
@@ -1,2 +1,2 @@
obj-m := ${MODULE_NAME}.o
-${MODULE_NAME}-objs += core.o sysfs.o block.o mq.o utils.o serialize_kmod.o
+${MODULE_NAME}-objs += core.o sysfs.o net.o utils.o serialize_kmod.o
diff --git a/src/kernel/block.c b/src/kernel/block.c
deleted file mode 100644
index 4295418..0000000
--- a/src/kernel/block.c
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- * block.c
- *
- * Created on: Jun 26, 2019
- * Author: fred
- */
-
-#include <net/sock.h>
-
-#include "dnbd3.h"
-#include "block.h"
-#include "clientconfig.h"
-
-#define dnbd3_sock_create(af,type,proto,sock) sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, type, proto, sock)
-
-
-
-
-void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev)
-{
- printk(KERN_DEBUG "dnbd3: fail all requests device %i\n", dev->minor);
-}
-
-
-int dnbd3_socket_connect(dnbd3_device_t *dev, dnbd3_sock *sock)
-{
- int result = -EIO;
- struct request *req1 = NULL;
- struct timeval timeout;
- struct dnbd3_server_t *server = sock->server;
- dnbd3_request_t dnbd3_request;
- dnbd3_reply_t dnbd3_reply;
- struct msghdr msg;
- struct kvec iov[2];
- uint16_t rid;
- uint64_t reported_size;
- char *name;
- int mlen;
- serialized_buffer_t payload_buffer;
-
- printk(KERN_DEBUG "dnbd3: socket connect device %i\n", dev->minor);
-
- mutex_init(&sock->lock);
- mutex_lock(&sock->lock);
- if (sock->pending) {
- printk(KERN_DEBUG "dnbd3: socket still in request\n");
- while (sock->pending)
- schedule();
- }
- if (server->host.port == 0 || server->host.type == 0) {
- printk(KERN_ERR "dnbd3: host or port not set\n");
- goto error;
- }
- if (sock->sock) {
- printk(KERN_WARNING "dnbd3: socket already connected\n");
- goto error;
- }
-
- timeout.tv_sec = SOCKET_TIMEOUT_CLIENT_DATA;
- timeout.tv_usec = 0;
-
- req1 = kmalloc(sizeof(*req1), GFP_ATOMIC );
- if (!req1) {
- printk(KERN_ERR "dnbd3: kmalloc failed\n");
- goto error;
- }
-
- init_msghdr(msg);
-
- if (dnbd3_sock_create(server->host.type, SOCK_STREAM, IPPROTO_TCP, &sock->sock) < 0) {
- printk(KERN_ERR "dnbd3: could not create socket\n");
- goto error;
- }
-
- kernel_setsockopt(sock->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout));
- kernel_setsockopt(sock->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout));
- sock->sock->sk->sk_allocation = GFP_NOIO;
- if (server->host.type == HOST_IP4) {
- struct sockaddr_in sin;
- memset(&sin, 0, sizeof(sin));
- sin.sin_family = AF_INET;
- memcpy(&(sin.sin_addr), server->host.addr, 4);
- sin.sin_port = server->host.port;
- if (kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0) != 0) {
- printk(KERN_ERR "dnbd3: connection to host failed (ipv4)\n");
- goto error;
- }
- } else {
- struct sockaddr_in6 sin;
- memset(&sin, 0, sizeof(sin));
- sin.sin6_family = AF_INET6;
- memcpy(&(sin.sin6_addr), server->host.addr, 16);
- sin.sin6_port = server->host.port;
- if (kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0) != 0){
- printk(KERN_ERR "dnbd3: connection to host failed (ipv6)\n");
- goto error;
- }
- }
- // Request filesize
- dnbd3_request.magic = dnbd3_packet_magic;
- dnbd3_request.cmd = CMD_SELECT_IMAGE;
- iov[0].iov_base = &dnbd3_request;
- iov[0].iov_len = sizeof(dnbd3_request);
- serializer_reset_write(&payload_buffer);
- serializer_put_uint16(&payload_buffer, PROTOCOL_VERSION);
- serializer_put_string(&payload_buffer, dev->imgname);
- serializer_put_uint16(&payload_buffer, dev->rid);
- serializer_put_uint8(&payload_buffer, 0); // is_server = false
- iov[1].iov_base = &payload_buffer;
- dnbd3_request.size = iov[1].iov_len = serializer_get_written_length(&payload_buffer);
- fixup_request(dnbd3_request);
- mlen = sizeof(dnbd3_request) + iov[1].iov_len;
- if (kernel_sendmsg(sock->sock, &msg, iov, 2, mlen) != mlen) {
- printk(KERN_ERR "dnbd3: could not send CMD_SIZE_REQUEST\n");
- goto error;
- }
- // receive reply header
- iov[0].iov_base = &dnbd3_reply;
- iov[0].iov_len = sizeof(dnbd3_reply);
- if (kernel_recvmsg(sock->sock, &msg, iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply)) {
- printk(KERN_ERR "dnbd3: received corrupted reply header after CMD_SIZE_REQUEST\n");
- goto error;
- }
-
- // check reply header
- fixup_reply(dnbd3_reply);
- if (dnbd3_reply.cmd != CMD_SELECT_IMAGE ||
- dnbd3_reply.size < 3 ||
- dnbd3_reply.size > MAX_PAYLOAD ||
- dnbd3_reply.magic != dnbd3_packet_magic) {
- printk(KERN_ERR "dnbd3: received invalid reply to CMD_SIZE_REQUEST image does not exist on server\n");
- goto error;
- }
-
- // receive reply payload
- iov[0].iov_base = &payload_buffer;
- iov[0].iov_len = dnbd3_reply.size;
- if (kernel_recvmsg(sock->sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != dnbd3_reply.size) {
- printk(KERN_ERR "dnbd3: could not read CMD_SELECT_IMAGE payload on handshake\n");
- goto error;
- }
-
- // handle/check reply payload
- serializer_reset_read(&payload_buffer, dnbd3_reply.size);
- server->protocol_version = serializer_get_uint16(&payload_buffer);
- if (server->protocol_version < MIN_SUPPORTED_SERVER) {
- printk(KERN_ERR "dnbd3: server version is lower than min supported version\n");
- goto error;
- }
-
- name = serializer_get_string(&payload_buffer);
- rid = serializer_get_uint16(&payload_buffer);
- if (dev->rid != rid && strcmp(name, dev->imgname) != 0) {
- printk(KERN_ERR "dnbd3: server offers image '%s', requested '%s'\n", name, dev->imgname);
- goto error;
- }
-
- reported_size = serializer_get_uint64(&payload_buffer);
- if (dev->reported_size == NULL) {
- if (reported_size < 4096) {
- printk(KERN_ERR "dnbd3: reported size by server is < 4096\n");
- goto error;
- }
- dev->reported_size = reported_size;
- set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte blocks */
- } else if (dev->reported_size != reported_size) {
- printk(KERN_ERR "dnbd3: reported size by server is %llu but should be %llu\n", reported_size, dev->reported_size);
- }
-
- printk(KERN_DEBUG "dnbd3: connected to image %s, filesize %llu\n", dev->imgname, dev->reported_size);
-
- mutex_unlock(&sock->lock);
-
-// TODO add heartbeat
- // timer_setup(&dev->hb_timer, dnbd3_net_heartbeat, 0);
- // dev->hb_timer.expires = jiffies + HZ;
- // add_timer(&dev->hb_timer);
-
- return 0;
-error:
- if (sock->sock) {
- sock_release(sock->sock);
- sock->sock = NULL;
- }
- if (req1) {
- kfree(req1);
- }
- mutex_unlock(&sock->lock);
- return result;
-}
-
-int dnbd3_socket_disconnect(dnbd3_device_t *dev, dnbd3_sock *sock)
-{
- printk(KERN_DEBUG "dnbd3: socket disconnect device %i\n", dev->minor);
- mutex_lock(&sock->lock);
-
- // clear heartbeat timer
-// del_timer(&dev->hb_timer);
-
-
- if (sock->sock) {
- kernel_sock_shutdown(sock->sock, SHUT_RDWR);
- }
-
- // clear socket
- if (sock->sock) {
- sock_release(sock->sock);
- sock->sock = NULL;
- }
-
- mutex_unlock(&sock->lock);
- mutex_destroy(&sock->lock);
- return 0;
-}
-
-int dnbd3_net_disconnect(struct dnbd3_device_t *dev) {
- int i;
- int result;
- for (i = 0; i < NUMBER_CONNECTIONS; i++) {
- if (dev->socks[i].sock) {
- if (dnbd3_socket_disconnect(dev, &dev->socks[i])) {
- result = -EIO;
- }
- }
- }
- return result;
-}
-
-
-static void printHost(struct dnbd3_host_t *host, char *msg)
-{
- if (host->type == HOST_IP4) {
- printk(KERN_INFO "dnbd3: %s %pI4:%d", msg, host->addr, host->port);
- } else {
- printk(KERN_INFO "dnbd3: %s [%pI6]:%d", msg, host->addr, host->port);
- }
-}
-
-static void printServerList(struct dnbd3_device_t *dev)
-{
- int i;
- printHost(&dev->initial_server.host, "initial server is");
- for (i = 0; i < NUMBER_SERVERS; i++) {
- if (dev->alt_servers[i].host.addr[0] != 0) {
- printHost(&dev->alt_servers[i].host, "alternative server is");
- }
- }
-}
-
-static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
-{
- int result = -100;
- dnbd3_device_t *dev = bdev->bd_disk->private_data;
- char *imgname = NULL;
- dnbd3_ioctl_t *msg = NULL;
- int i;
-
- printk(KERN_DEBUG "dnbd3: ioctl device %i, cmd %i, arg %lu\n", dev->minor, cmd, arg);
- //unsigned long irqflags;
-
-// while (dev->disconnecting) {
-// // do nothing
-// }
-
- if (arg != 0) {
- msg = kmalloc(sizeof(*msg), GFP_KERNEL);
- if (msg == NULL) return -ENOMEM;
- if (copy_from_user((char *)msg, (char *)arg, 2) != 0 || msg->len != sizeof(*msg)) {
- result = -ENOEXEC;
- goto cleanup_return;
- }
- if (copy_from_user((char *)msg, (char *)arg, sizeof(*msg)) != 0) {
- result = -ENOENT;
- goto cleanup_return;
- }
- if (msg->imgname != NULL && msg->imgnamelen > 0) {
- imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL);
- if (imgname == NULL) {
- result = -ENOMEM;
- goto cleanup_return;
- }
- if (copy_from_user(imgname, msg->imgname, msg->imgnamelen) != 0) {
- result = -ENOENT;
- goto cleanup_return;
- }
- imgname[msg->imgnamelen] = '\0';
-
- printk(KERN_DEBUG "dnbd3: ioctl image name of len %i is %s\n", (int)msg->imgnamelen, imgname);
- }
- }
-
- switch (cmd) {
- case IOCTL_OPEN:
- printk(KERN_DEBUG "dnbd3: ioctl open\n");
- if (dev->imgname != NULL) {
- result = -EBUSY;
- } else if (imgname == NULL) {
- result = -EINVAL;
- } else if (msg == NULL) {
- result = -EINVAL;
- } else {
- if (sizeof(msg->host) != sizeof(dev->initial_server.host)) {
- printk(KERN_INFO "dnbd3: odd size bug#1 triggered in ioctl\n");
- }
- memcpy(&dev->initial_server.host, &msg->host, sizeof(msg->host));
- dev->initial_server.failures = 0;
-// memcpy(&dev->initial_server, &dev->cur_server, sizeof(dev->initial_server));
- dev->imgname = imgname;
- dev->rid = msg->rid;
- dev->use_server_provided_alts = msg->use_server_provided_alts;
- // Forget all alt servers on explicit connect, set first alt server to initial server
- memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS);
- memcpy(dev->alt_servers, &dev->initial_server, sizeof(dev->alt_servers[0]));
-//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
-// if (blk_queue->backing_dev_info != NULL) {
-// blk_queue->backing_dev_info->ra_pages = (msg->read_ahead_kb * 1024) / PAGE_SIZE;
-// }
-//#else
-// blk_queue->backing_dev_info.ra_pages = (msg->read_ahead_kb * 1024) / PAGE_SIZE;
-//#endif
-
-
- dev->socks[0].server = &dev->initial_server;
- if (dnbd3_socket_connect(dev, &dev->socks[0]) == 0) {
- printServerList(dev);
- result = 0;
- imgname = NULL; // Prevent kfree at the end
- } else {
- printk(KERN_ERR "dnbd3: failed to connect to initial server\n");
- result = -ENOENT;
- dev->imgname = NULL;
- dev->socks[0].server = NULL;
- }
-
- }
- break;
-
- case IOCTL_CLOSE:
- printk(KERN_DEBUG "dnbd3: ioctl close\n");
- dnbd3_blk_fail_all_requests(dev);
- dnbd3_net_disconnect(dev);
- dnbd3_blk_fail_all_requests(dev);
- set_capacity(dev->disk, 0);
- if (dev->imgname) {
- kfree(dev->imgname);
- dev->imgname = NULL;
- }
- break;
-
- case IOCTL_SWITCH:
- printk(KERN_DEBUG "dnbd3: ioctl switch\n");
- result = -EINVAL;
- break;
-
- case IOCTL_ADD_SRV:
- case IOCTL_REM_SRV:
- printk(KERN_DEBUG "dnbd3: ioctl add/rem srv\n");
- if (dev->imgname == NULL) {
- result = -ENOENT;
- } else if (dev->new_servers_num >= NUMBER_SERVERS) {
- result = -EAGAIN;
- } else if (msg == NULL) {
- result = -EINVAL;
- } else {
- memcpy(&dev->new_servers[dev->new_servers_num].host, &msg->host, sizeof(msg->host));
- dev->new_servers[dev->new_servers_num].failures = (cmd == IOCTL_ADD_SRV ? 0 : 1); // 0 = ADD, 1 = REM
- ++dev->new_servers_num;
- result = 0;
- }
- break;
-
- case BLKFLSBUF:
- printk(KERN_DEBUG "dnbd3: ioctl blkflsbuf\n");
- result = 0;
- break;
-
- default:
- printk(KERN_DEBUG "dnbd3: ioctl unhandled cmd %d\n", cmd);
- result = -EIO;
- break;
- }
-
-cleanup_return:
- if (msg) kfree(msg);
- if (imgname) kfree(imgname);
- return result;
-
-}
-
-
-
-struct block_device_operations dnbd3_fops =
-{
- .owner = THIS_MODULE,
- .ioctl = dnbd3_ioctl,
- .compat_ioctl = dnbd3_ioctl,
-};
-
-
diff --git a/src/kernel/block.h b/src/kernel/block.h
deleted file mode 100644
index 49055d5..0000000
--- a/src/kernel/block.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * block.h
- *
- * Created on: Jun 26, 2019
- * Author: fred
- */
-
-#ifndef SRC_KERNEL_BLOCK_H_
-#define SRC_KERNEL_BLOCK_H_
-
-#define init_msghdr(h) do { \
- h.msg_name = NULL; \
- h.msg_namelen = 0; \
- h.msg_control = NULL; \
- h.msg_controllen = 0; \
- h.msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \
- } while (0)
-
-//int dnbd3_net_connect(dnbd3_device_t *dev);
-
-int dnbd3_net_disconnect(dnbd3_device_t *dev);
-
-extern struct block_device_operations dnbd3_fops;
-
-
-#endif /* SRC_KERNEL_BLOCK_H_ */
diff --git a/src/kernel/core.c b/src/kernel/core.c
index db1f6de..48f809b 100644
--- a/src/kernel/core.c
+++ b/src/kernel/core.c
@@ -48,27 +48,298 @@
#include <asm/types.h>
#include "dnbd3.h"
-#include "clientconfig.h"
#include "sysfs.h"
-#include "block.h"
-#include "mq.h"
+#include "clientconfig.h"
+#include "net.h"
+
+#define DNBD3_CMD_REQUEUED 1
DEFINE_IDR(dnbd3_index_idr);
DEFINE_MUTEX(dnbd3_index_mutex);
static unsigned int max_devs = NUMBER_DEVICES;
-static dnbd3_device_t *dnbd3_device;
+static dnbd3_device *device;
int major;
+static void dnbd3_requeue_cmd(struct dnbd3_cmd *cmd)
+{
+ struct request *req = blk_mq_rq_from_pdu(cmd);
+
+ if (!test_and_set_bit(DNBD3_CMD_REQUEUED, &cmd->flags)) {
+ blk_mq_requeue_request(req, true);
+ }
+}
+
+static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index)
+{
+ struct request *req = blk_mq_rq_from_pdu(cmd);
+ struct dnbd3_device *dev = cmd->dnbd3;
+ struct dnbd3_sock *sock;
+ int ret = -1;
+
+ printk(KERN_DEBUG "dnbd3: handle request at position %lu and size %d, device %i\n", blk_rq_pos(req), blk_rq_bytes(req), dev->minor);
+
+// if (index >= 1) { // TODO use next server with good rtt for this request
+// printk(KERN_INFO "dnbd3: index is %d", index);
+// dev_err_ratelimited(disk_to_dev(dev->disk), "attempted send on invalid socket\n");
+// blk_mq_start_request(req);
+// return -EINVAL;
+// }
+
+ sock = &dev->socks[index];
+ if (!sock->sock) {
+ printk(KERN_INFO "dnbd3: index is %d but no socket was found\n", index);
+ dev_err_ratelimited(disk_to_dev(dev->disk), "attempted send on invalid socket\n");
+ blk_mq_start_request(req);
+ return -EINVAL;
+ }
+
+
+ cmd->status = BLK_STS_OK;
+
+again:
+
+
+ mutex_lock(&sock->lock);
+ if (unlikely(!sock->sock)) {
+ mutex_unlock(&sock->lock);
+ printk(KERN_DEBUG "dnbd3: not connected\n");
+ return -EIO;
+ }
+
+ blk_mq_start_request(req);
+ if (unlikely(sock->pending && sock->pending != req)) {
+ dnbd3_requeue_cmd(cmd);
+ ret = 0;
+ goto out;
+ }
+
+ ret = dnbd3_send_request(dev, sock, blk_mq_rq_from_pdu(cmd));
+ if (ret == -EAGAIN) {
+ dev_err_ratelimited(disk_to_dev(dev->disk), "request send failed, requeueing\n");
+ dnbd3_requeue_cmd(cmd);
+ ret = 0;
+ }
+out:
+ mutex_unlock(&sock->lock);
+ return ret;
+}
+
+static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd)
+{
+ struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+ int ret;
+ struct dnbd3_device *dev = cmd->dnbd3;
+
+ printk(KERN_DEBUG "dnbd3: queue request device %i\n", dev->minor);
+
-static int dnbd3_add_device(dnbd3_device_t *dev, int minor)
+ mutex_lock(&cmd->lock);
+ clear_bit(DNBD3_CMD_REQUEUED, &cmd->flags);
+
+
+ ret = dnbd3_handle_cmd(cmd, hctx->queue_num);
+ if (ret < 0) {
+ ret = BLK_STS_IOERR;
+ } else if (!ret) {
+ ret = BLK_STS_OK;
+ }
+ mutex_unlock(&cmd->lock);
+
+ return ret;
+}
+
+static void dnbd3_complete_rq(struct request *req)
+{
+ printk(KERN_DEBUG "dnbd3: dnbd3_complete_rq\n");
+
+}
+
+static int dnbd3_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node)
+{
+ struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(rq);
+ cmd->dnbd3 = set->driver_data;
+ cmd->flags = 0;
+ mutex_init(&cmd->lock);
+ return 0;
+}
+static enum blk_eh_timer_return dnbd3_xmit_timeout(struct request *req, bool reserved)
+{
+ printk(KERN_DEBUG "dnbd3: dnbd3_xmit_timeout\n");
+ return BLK_EH_DONE;
+}
+
+
+static struct blk_mq_ops dnbd3_mq_ops = {
+ .queue_rq = dnbd3_queue_rq,
+ .complete = dnbd3_complete_rq,
+ .init_request = dnbd3_init_request,
+ .timeout = dnbd3_xmit_timeout,
+};
+
+
+
+
+static void dnbd3_blk_fail_all_requests(dnbd3_device *dev)
+{
+ printk(KERN_DEBUG "dnbd3: fail all requests device %i\n", dev->minor);
+}
+
+
+
+static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+{
+ int result = -100;
+ dnbd3_device *dev = bdev->bd_disk->private_data;
+ char *imgname = NULL;
+ dnbd3_ioctl_t *msg = NULL;
+
+ printk(KERN_DEBUG "dnbd3: ioctl device %i, cmd %i, arg %lu\n", dev->minor, cmd, arg);
+ //unsigned long irqflags;
+
+// while (dev->disconnecting) {
+// // do nothing
+// }
+
+ if (arg != 0) {
+ msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+ if (msg == NULL) return -ENOMEM;
+ if (copy_from_user((char *)msg, (char *)arg, 2) != 0 || msg->len != sizeof(*msg)) {
+ result = -ENOEXEC;
+ goto cleanup_return;
+ }
+ if (copy_from_user((char *)msg, (char *)arg, sizeof(*msg)) != 0) {
+ result = -ENOENT;
+ goto cleanup_return;
+ }
+ if (msg->imgname != NULL && msg->imgnamelen > 0) {
+ imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL);
+ if (imgname == NULL) {
+ result = -ENOMEM;
+ goto cleanup_return;
+ }
+ if (copy_from_user(imgname, msg->imgname, msg->imgnamelen) != 0) {
+ result = -ENOENT;
+ goto cleanup_return;
+ }
+ imgname[msg->imgnamelen] = '\0';
+
+ printk(KERN_DEBUG "dnbd3: ioctl image name of len %i is %s\n", (int)msg->imgnamelen, imgname);
+ }
+ }
+
+ mutex_lock(&dev->device_lock);
+ switch (cmd) {
+ case IOCTL_OPEN:
+ printk(KERN_DEBUG "dnbd3: ioctl open\n");
+ if (dev->imgname != NULL) {
+ result = -EBUSY;
+ } else if (imgname == NULL) {
+ result = -EINVAL;
+ } else if (msg == NULL) {
+ result = -EINVAL;
+ } else {
+ if (sizeof(msg->host) != sizeof(dev->initial_server.host)) {
+ printk(KERN_INFO "dnbd3: odd size bug#1 triggered in ioctl\n");
+ }
+ memcpy(&dev->initial_server.host, &msg->host, sizeof(msg->host));
+ dev->initial_server.failures = 0;
+// memcpy(&dev->initial_server, &dev->cur_server, sizeof(dev->initial_server));
+ dev->imgname = imgname;
+ dev->rid = msg->rid;
+ dev->use_server_provided_alts = msg->use_server_provided_alts;
+ // Forget all alt servers on explicit connect, set first alt server to initial server
+ memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS);
+ memcpy(dev->alt_servers, &dev->initial_server, sizeof(dev->alt_servers[0]));
+//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+// if (blk_queue->backing_dev_info != NULL) {
+// blk_queue->backing_dev_info->ra_pages = (msg->read_ahead_kb * 1024) / PAGE_SIZE;
+// }
+//#else
+// blk_queue->backing_dev_info.ra_pages = (msg->read_ahead_kb * 1024) / PAGE_SIZE;
+//#endif
+
+
+ result = dnbd3_net_connect(dev);
+ imgname = NULL;
+ }
+ break;
+
+ case IOCTL_CLOSE:
+ printk(KERN_DEBUG "dnbd3: ioctl close\n");
+ dnbd3_blk_fail_all_requests(dev);
+ dnbd3_net_disconnect(dev);
+ dnbd3_blk_fail_all_requests(dev);
+ set_capacity(dev->disk, 0);
+ if (dev->imgname) {
+ kfree(dev->imgname);
+ dev->imgname = NULL;
+ }
+ break;
+
+ case IOCTL_SWITCH:
+ printk(KERN_DEBUG "dnbd3: ioctl switch\n");
+ result = -EINVAL;
+ break;
+
+ case IOCTL_ADD_SRV:
+ case IOCTL_REM_SRV:
+ printk(KERN_DEBUG "dnbd3: ioctl add/rem srv\n");
+ if (dev->imgname == NULL) {
+ result = -ENOENT;
+ } else if (dev->new_servers_num >= NUMBER_SERVERS) {
+ result = -EAGAIN;
+ } else if (msg == NULL) {
+ result = -EINVAL;
+ } else {
+ memcpy(&dev->new_servers[dev->new_servers_num].host, &msg->host, sizeof(msg->host));
+ dev->new_servers[dev->new_servers_num].failures = (cmd == IOCTL_ADD_SRV ? 0 : 1); // 0 = ADD, 1 = REM
+ ++dev->new_servers_num;
+ result = 0;
+ }
+ break;
+
+ case BLKFLSBUF:
+ printk(KERN_DEBUG "dnbd3: ioctl blkflsbuf\n");
+ result = 0;
+ break;
+
+ default:
+ printk(KERN_DEBUG "dnbd3: ioctl unhandled cmd %d\n", cmd);
+ result = -EIO;
+ break;
+ }
+ mutex_unlock(&dev->device_lock);
+cleanup_return:
+ if (msg) kfree(msg);
+ if (imgname) kfree(imgname);
+ return result;
+
+}
+
+
+
+static struct block_device_operations dnbd3_fops =
+{
+ .owner = THIS_MODULE,
+ .ioctl = dnbd3_ioctl,
+ .compat_ioctl = dnbd3_ioctl,
+};
+
+
+
+
+
+int dnbd3_add_device(dnbd3_device *dev, int minor)
{
struct gendisk *disk;
struct request_queue *q;
int err = -ENOMEM;
printk(KERN_DEBUG "dnbd3: adding device %i\n", minor);
+ mutex_init(&dev->device_lock);
+ mutex_lock(&dev->device_lock);
disk = alloc_disk(1);
if (!disk) {
@@ -129,6 +400,9 @@ static int dnbd3_add_device(dnbd3_device_t *dev, int minor)
printk(KERN_DEBUG "dnbd3: add disk device %s\n", disk->disk_name);
add_disk(disk);
dnbd3_sysfs_init(dev);
+
+
+ mutex_unlock(&dev->device_lock);
return minor;
out_free_tags:
@@ -139,12 +413,14 @@ out_free_disk:
put_disk(disk);
out_free_dnbd3:
kfree(dev);
+ mutex_unlock(&dev->device_lock);
printk(KERN_DEBUG "dnbd3: destroy device %i\n", minor);
return err;
}
+
static int __init dnbd3_init(void)
{
int i;
@@ -156,8 +432,8 @@ static int __init dnbd3_init(void)
}
- dnbd3_device = kcalloc(max_devs, sizeof(*dnbd3_device), GFP_KERNEL);
- if (!dnbd3_device) {
+ device = kcalloc(max_devs, sizeof(*device), GFP_KERNEL);
+ if (!device) {
printk(KERN_ERR "dnbd3: failed to create dnbd3 device\n");
return -ENOMEM;
}
@@ -174,7 +450,7 @@ static int __init dnbd3_init(void)
// add MAX_NUMBER_DEVICES devices
mutex_lock(&dnbd3_index_mutex);
for (i = 0; i < max_devs; i++) {
- dnbd3_add_device(&dnbd3_device[i], i);
+ dnbd3_add_device(&device[i], i);
}
mutex_unlock(&dnbd3_index_mutex);
@@ -187,13 +463,13 @@ static int __init dnbd3_init(void)
static int dnbd3_exit_cb(int id, void *ptr, void *data)
{
struct list_head *list = (struct list_head *)data;
- struct dnbd3_device_t *dnbd3 = ptr;
+ struct dnbd3_device *dnbd3 = ptr;
list_add_tail(&dnbd3->list, list);
return 0;
}
-static void dnbd3_dev_remove(struct dnbd3_device_t *dnbd3)
+static void dnbd3_dev_remove(struct dnbd3_device *dnbd3)
{
struct gendisk *disk = dnbd3->disk;
struct request_queue *q;
@@ -209,7 +485,7 @@ static void dnbd3_dev_remove(struct dnbd3_device_t *dnbd3)
}
}
-static void dnbd3_put(struct dnbd3_device_t *dnbd3)
+static void dnbd3_put(struct dnbd3_device *dnbd3)
{
mutex_lock(&dnbd3_index_mutex);
idr_remove(&dnbd3_index_idr, dnbd3->minor);
@@ -220,7 +496,7 @@ static void dnbd3_put(struct dnbd3_device_t *dnbd3)
static void __exit dnbd3_exit(void)
{
- dnbd3_device_t *dnbd3;
+ dnbd3_device *dnbd3;
LIST_HEAD(del_list);
printk(KERN_DEBUG "dnbd3: stopping kernel module\n");
@@ -229,7 +505,7 @@ static void __exit dnbd3_exit(void)
mutex_unlock(&dnbd3_index_mutex);
while (!list_empty(&del_list)) {
- dnbd3 = list_first_entry(&del_list, struct dnbd3_device_t, list);
+ dnbd3 = list_first_entry(&del_list, struct dnbd3_device, list);
dnbd3_sysfs_exit(dnbd3);
list_del_init(&dnbd3->list);
dnbd3_put(dnbd3);
@@ -238,7 +514,7 @@ static void __exit dnbd3_exit(void)
idr_destroy(&dnbd3_index_idr);
unregister_blkdev(major, "dnbd3");
- kfree(dnbd3_device);
+ kfree(device);
printk(KERN_INFO "dnbd3: stopped kernel module\n");
}
diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h
index 7ad6b2f..57f26f6 100644
--- a/src/kernel/dnbd3.h
+++ b/src/kernel/dnbd3.h
@@ -33,22 +33,27 @@
#define NUMBER_CONNECTIONS 4
-typedef struct dnbd3_server_t {
+typedef struct dnbd3_server {
dnbd3_host_t host;
uint64_t rtts[4]; // Last four round trip time measurements in microsecond
uint16_t protocol_version; // dnbd3 protocol version of this server
uint8_t failures; // How many times the server was unreachable
-} dnbd3_server_t;
+} dnbd3_server;
typedef struct dnbd3_sock {
struct socket *sock;
struct mutex lock;
struct request *pending;
- struct dnbd3_server_t *server;
+ struct dnbd3_server *server;
+ uint32_t heartbeat_count;
+ uint8_t panic, discover, panic_count;
+// struct dnbd3_device *container_of;
+ struct work_struct keepalive;
+ struct timer_list keepalive_timer;
} dnbd3_sock;
-typedef struct dnbd3_device_t {
+typedef struct dnbd3_device {
int minor;
struct blk_mq_tag_set tag_set;
struct list_head list;
@@ -59,28 +64,32 @@ typedef struct dnbd3_device_t {
// sysfs
struct kobject kobj;
+ struct mutex device_lock;
+
// network
dnbd3_sock socks[NUMBER_CONNECTIONS];
char *imgname;
// struct socket *sock;
// struct mutex socket_lock;
// struct request *pending;
- dnbd3_server_t initial_server;
+ dnbd3_server initial_server;
// dnbd3_server_t cur_server, initial_server;
// uint64_t cur_rtt;
// serialized_buffer_t payload_buffer;
- dnbd3_server_t alt_servers[NUMBER_SERVERS]; // array of alt servers
+ dnbd3_server alt_servers[NUMBER_SERVERS]; // array of alt servers
int new_servers_num; // number of new alt servers that are waiting to be copied to above array
dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers
// uint8_t discover, panic, disconnecting, update_available, panic_count;
uint8_t update_available;
uint8_t use_server_provided_alts;
uint16_t rid;
- uint32_t heartbeat_count;
+// uint32_t heartbeat_count;
uint64_t reported_size;
// server switch
// struct socket *better_sock;
+ struct work_struct discovery; // if in irq and need to send request
+ struct timer_list discovery_timer;
// process
// struct task_struct * thread_send;
@@ -93,11 +102,11 @@ typedef struct dnbd3_device_t {
// struct list_head request_queue_send;
// struct list_head request_queue_receive;
-} dnbd3_device_t;
+} dnbd3_device;
typedef struct dnbd3_cmd {
- struct dnbd3_device_t *dnbd3;
+ struct dnbd3_device *dnbd3;
struct mutex lock;
int index;
int cookie;
diff --git a/src/kernel/mq.c b/src/kernel/mq.c
deleted file mode 100644
index 98e6e14..0000000
--- a/src/kernel/mq.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * mq.c
- *
- * Created on: Jun 26, 2019
- * Author: fred
- */
-
-#include <net/sock.h>
-
-#include "mq.h"
-#include "block.h"
-
-#define DNBD3_CMD_REQUEUED 1
-
-
-#define dnbd3_priv_to_cmd(req) ((req)->cmd_flags >> REQ_FLAG_BITS)
-
-static int dnbd3_send_cmd(struct dnbd3_device_t *dev, struct dnbd3_sock *sock, struct dnbd3_cmd *cmd, int index)
-{
- struct request *req = blk_mq_rq_from_pdu(cmd);
- dnbd3_request_t dnbd3_request;
- dnbd3_reply_t dnbd3_reply;
- struct msghdr msg;
- struct kvec iov;
- struct req_iterator iter;
- struct bio_vec bvec_inst;
- struct bio_vec *bvec = &bvec_inst;
- sigset_t blocked, oldset;
- void *kaddr;
- int result;
- sock->pending = req;
- init_msghdr(msg);
-
- dnbd3_request.magic = dnbd3_packet_magic;
-
- switch (req_op(req)) {
-// case REQ_OP_DISCARD:
-// printk(KERN_DEBUG "dnbd3: request operation discard on device %d\n", dev->minor);
-// break;
-// case REQ_OP_FLUSH:
-// printk(KERN_DEBUG "dnbd3: request operation flush on device %d\n", dev->minor);
-// break;
-// case REQ_OP_WRITE:
-// printk(KERN_DEBUG "dnbd3: request operation write on device %d\n", dev->minor);
-// break;
- case REQ_OP_READ:
- printk(KERN_DEBUG "dnbd3: request operation read on device %d\n", dev->minor);
- dnbd3_request.cmd = CMD_GET_BLOCK;
- dnbd3_request.offset = blk_rq_pos(req) << 9; // *512
- dnbd3_request.size = blk_rq_bytes(req); // bytes left to complete entire request
- break;
- case REQ_OP_DRV_IN:
- printk(KERN_DEBUG "dnbd3: request operation driver in on device %d\n", dev->minor);
- dnbd3_request.cmd = dnbd3_priv_to_cmd(req);
- dnbd3_request.size = 0;
- break;
-
- default:
- return -EIO;
- }
-
- dnbd3_request.handle = (uint64_t)(uintptr_t)req; // Double cast to prevent warning on 32bit
- fixup_request(dnbd3_request);
- iov.iov_base = &dnbd3_request;
- iov.iov_len = sizeof(dnbd3_request);
- if (kernel_sendmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_request)) != sizeof(dnbd3_request)) {
- printk(KERN_ERR "dnbd3: connection to server lost\n");
- result = -EIO;
- goto error;
- }
-
- // receive net reply
- iov.iov_base = &dnbd3_reply;
- iov.iov_len = sizeof(dnbd3_reply);
- result = kernel_recvmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags);
- if (!result) {
- printk(KERN_ERR "dnbd3: connection to server lost\n");
- result = -EIO;
- goto error;
-
- }
- fixup_reply(dnbd3_reply);
-
- // check error
- if (dnbd3_reply.magic != dnbd3_packet_magic) {
- printk(KERN_ERR "dnbd3: wrong magic packet\n");
- result = -EIO;
- goto error;
- }
-
- if (dnbd3_reply.cmd != CMD_GET_BLOCK) {
- printk(KERN_ERR "dnbd3: command was %d\n", dnbd3_reply.cmd);
- result = -EIO;
- goto error;
- }
-
-
-
- rq_for_each_segment(bvec_inst, req, iter) {
- siginitsetinv(&blocked, sigmask(SIGKILL));
- sigprocmask(SIG_SETMASK, &blocked, &oldset);
-
- kaddr = kmap(bvec->bv_page) + bvec->bv_offset;
- iov.iov_base = kaddr;
- iov.iov_len = bvec->bv_len;
- if (kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, msg.msg_flags) != bvec->bv_len) {
- kunmap(bvec->bv_page);
- sigprocmask(SIG_SETMASK, &oldset, NULL );
- printk(KERN_ERR "dnbd3: could not receive form net to block layer\n");
- goto error;
- }
- kunmap(bvec->bv_page);
-
- sigprocmask(SIG_SETMASK, &oldset, NULL );
- }
- blk_mq_end_request(req, 0);
- sock->pending = NULL;
-error:
- return result;
-}
-
-static void dnbd3_requeue_cmd(struct dnbd3_cmd *cmd)
-{
- struct request *req = blk_mq_rq_from_pdu(cmd);
-
- if (!test_and_set_bit(DNBD3_CMD_REQUEUED, &cmd->flags)) {
- blk_mq_requeue_request(req, true);
- }
-}
-
-static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index)
-{
- struct request *req = blk_mq_rq_from_pdu(cmd);
- struct dnbd3_device_t *dev = cmd->dnbd3;
- struct dnbd3_sock *sock;
- int ret = -1;
-
- printk(KERN_DEBUG "dnbd3: handle request at position %lu and size %d, device %i\n", blk_rq_pos(req), blk_rq_bytes(req), dev->minor);
-
-// if (index >= 1) { // TODO use next server with good rtt for this request
-// printk(KERN_INFO "dnbd3: index is %d", index);
-// dev_err_ratelimited(disk_to_dev(dev->disk), "attempted send on invalid socket\n");
-// blk_mq_start_request(req);
-// return -EINVAL;
-// }
-
- sock = &dev->socks[index];
- if (!sock->sock) {
- printk(KERN_INFO "dnbd3: index is %d but no socket was found\n", index);
- dev_err_ratelimited(disk_to_dev(dev->disk), "attempted send on invalid socket\n");
- blk_mq_start_request(req);
- return -EINVAL;
- }
-
-
- cmd->status = BLK_STS_OK;
-
-again:
-
-
- mutex_lock(&sock->lock);
- if (unlikely(!sock->sock)) {
- mutex_unlock(&sock->lock);
- printk(KERN_DEBUG "dnbd3: not connected\n");
- return -EIO;
- }
-
- blk_mq_start_request(req);
- if (unlikely(sock->pending && sock->pending != req)) {
- dnbd3_requeue_cmd(cmd);
- ret = 0;
- goto out;
- }
-
- ret = dnbd3_send_cmd(dev, sock, cmd, index);
- if (ret == -EAGAIN) {
- dev_err_ratelimited(disk_to_dev(dev->disk), "request send failed, requeueing\n");
- dnbd3_requeue_cmd(cmd);
- ret = 0;
- }
-out:
- mutex_unlock(&sock->lock);
- return ret;
-}
-
-static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd)
-{
- struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
- int ret;
- struct dnbd3_device_t *dev = cmd->dnbd3;
-
- printk(KERN_DEBUG "dnbd3: queue request device %i\n", dev->minor);
-
-
- mutex_lock(&cmd->lock);
- clear_bit(DNBD3_CMD_REQUEUED, &cmd->flags);
-
-
- ret = dnbd3_handle_cmd(cmd, hctx->queue_num);
- if (ret < 0) {
- ret = BLK_STS_IOERR;
- } else if (!ret) {
- ret = BLK_STS_OK;
- }
- mutex_unlock(&cmd->lock);
-
- return ret;
-}
-
-static void dnbd3_complete_rq(struct request *req)
-{
- printk(KERN_DEBUG "dnbd3: dnbd3_complete_rq\n");
-
-}
-
-static int dnbd3_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node)
-{
- struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(rq);
- cmd->dnbd3 = set->driver_data;
- cmd->flags = 0;
- mutex_init(&cmd->lock);
- return 0;
-}
-static enum blk_eh_timer_return dnbd3_xmit_timeout(struct request *req, bool reserved)
-{
- printk(KERN_DEBUG "dnbd3: dnbd3_xmit_timeout\n");
- return BLK_EH_DONE;
-}
-
-
-struct blk_mq_ops dnbd3_mq_ops = {
- .queue_rq = dnbd3_queue_rq,
- .complete = dnbd3_complete_rq,
- .init_request = dnbd3_init_request,
- .timeout = dnbd3_xmit_timeout,
-};
-
diff --git a/src/kernel/mq.h b/src/kernel/mq.h
deleted file mode 100644
index 9f3cde8..0000000
--- a/src/kernel/mq.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * mq.h
- *
- * Created on: Jun 26, 2019
- * Author: fred
- */
-
-#ifndef SRC_KERNEL_MQ_H_
-#define SRC_KERNEL_MQ_H_
-
-#include "dnbd3.h"
-
-extern struct blk_mq_ops dnbd3_mq_ops;
-
-
-#endif /* SRC_KERNEL_MQ_H_ */
diff --git a/src/kernel/net.c b/src/kernel/net.c
new file mode 100644
index 0000000..f44925f
--- /dev/null
+++ b/src/kernel/net.c
@@ -0,0 +1,528 @@
+/*
+ * This file is part of the Distributed Network Block Device 3
+ *
+ * Copyright(c) 2019 Frederic Robra <frederic@robra.org>
+ * Parts copyright 2011-2012 Johann Latocha <johann@latocha.de>
+ *
+ * This file may be licensed under the terms of of the
+ * GNU General Public License Version 2 (the ``GPL'').
+ *
+ * Software distributed under the License is distributed
+ * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
+ * express or implied. See the GPL for the specific language
+ * governing rights and limitations.
+ *
+ * You should have received a copy of the GPL along with this
+ * program. If not, go to http://www.gnu.org/licenses/gpl.html
+ * or write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <net/sock.h>
+
+#include "dnbd3.h"
+#include "clientconfig.h"
+
+#define dnbd3_cmd_to_priv(req, cmd) (req)->cmd_flags = REQ_OP_DRV_IN | ((cmd) << REQ_FLAG_BITS)
+#define dnbd3_priv_to_cmd(req) ((req)->cmd_flags >> REQ_FLAG_BITS)
+#define dnbd3_req_op(req) req_op(req)
+#define dnbd3_sock_create(af,type,proto,sock) sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, type, proto, sock)
+
+#define KEEPALIVE_TIMER (TIMER_INTERVAL_KEEPALIVE_PACKET * (jiffies + HZ))
+#define DISCOVERY_TIMER (TIMER_INTERVAL_PROBE_NORMAL * (jiffies + HZ))
+
+#define init_msghdr(h) do { \
+ h.msg_name = NULL; \
+ h.msg_namelen = 0; \
+ h.msg_control = NULL; \
+ h.msg_controllen = 0; \
+ h.msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \
+ } while (0)
+
+
+static void printHost(struct dnbd3_host_t *host, char *msg)
+{
+ if (host->type == HOST_IP4) {
+ printk(KERN_INFO "dnbd3: %s %pI4:%d\n", msg, host->addr, host->port);
+ } else {
+ printk(KERN_INFO "dnbd3: %s [%pI6]:%d\n", msg, host->addr, host->port);
+ }
+}
+
+static void printServerList(struct dnbd3_device *dev)
+{
+ int i;
+ printHost(&dev->initial_server.host, "initial server is");
+ for (i = 0; i < NUMBER_SERVERS; i++) {
+ if (dev->alt_servers[i].host.addr[0] != 0) {
+ printHost(&dev->alt_servers[i].host, "alternative server is");
+ }
+ }
+}
+
+int dnbd3_send_request(struct dnbd3_device *dev, struct dnbd3_sock *sock, struct request *req)
+{
+ dnbd3_request_t dnbd3_request;
+ dnbd3_reply_t dnbd3_reply;
+ struct msghdr msg;
+ struct kvec iov;
+ struct req_iterator iter;
+ struct bio_vec bvec_inst;
+ struct bio_vec *bvec = &bvec_inst;
+ sigset_t blocked, oldset;
+ void *kaddr;
+ int result, count, remaining;
+ uint16_t rid;
+ sock->pending = req;
+ init_msghdr(msg);
+
+ dnbd3_request.magic = dnbd3_packet_magic;
+
+ switch (req_op(req)) {
+// case REQ_OP_DISCARD:
+// printk(KERN_DEBUG "dnbd3: request operation discard on device %d\n", dev->minor);
+// break;
+// case REQ_OP_FLUSH:
+// printk(KERN_DEBUG "dnbd3: request operation flush on device %d\n", dev->minor);
+// break;
+// case REQ_OP_WRITE:
+// printk(KERN_DEBUG "dnbd3: request operation write on device %d\n", dev->minor);
+// break;
+ case REQ_OP_READ:
+ printk(KERN_DEBUG "dnbd3: request operation read\n");
+ dnbd3_request.cmd = CMD_GET_BLOCK;
+ dnbd3_request.offset = blk_rq_pos(req) << 9; // *512
+ dnbd3_request.size = blk_rq_bytes(req); // bytes left to complete entire request
+ break;
+ case REQ_OP_DRV_IN:
+ printk(KERN_DEBUG "dnbd3: request operation driver in\n");
+ dnbd3_request.cmd = dnbd3_priv_to_cmd(req);
+ dnbd3_request.size = 0;
+ break;
+
+ default:
+ return -EIO;
+ }
+
+ dnbd3_request.handle = (uint64_t)(uintptr_t)req; // Double cast to prevent warning on 32bit
+ fixup_request(dnbd3_request);
+ iov.iov_base = &dnbd3_request;
+ iov.iov_len = sizeof(dnbd3_request);
+ if (kernel_sendmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_request)) != sizeof(dnbd3_request)) {
+ printk(KERN_ERR "dnbd3: connection to server lost\n");
+ result = -EIO;
+ goto error;
+ }
+
+ // receive net reply
+ iov.iov_base = &dnbd3_reply;
+ iov.iov_len = sizeof(dnbd3_reply);
+ result = kernel_recvmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags);
+ if (!result) {
+ printk(KERN_ERR "dnbd3: connection to server lost\n");
+ result = -EIO;
+ goto error;
+
+ }
+ fixup_reply(dnbd3_reply);
+
+ // check error
+ if (dnbd3_reply.magic != dnbd3_packet_magic) {
+ printk(KERN_ERR "dnbd3: wrong magic packet\n");
+ result = -EIO;
+ goto error;
+ }
+
+ if (dnbd3_reply.cmd == 0) {
+ printk(KERN_ERR "dnbd3: command was 0\n");
+ result = -EIO;
+ goto error;
+ }
+
+
+ switch (dnbd3_reply.cmd) {
+ case CMD_GET_BLOCK:
+ rq_for_each_segment(bvec_inst, req, iter) {
+ siginitsetinv(&blocked, sigmask(SIGKILL));
+ sigprocmask(SIG_SETMASK, &blocked, &oldset);
+
+ kaddr = kmap(bvec->bv_page) + bvec->bv_offset;
+ iov.iov_base = kaddr;
+ iov.iov_len = bvec->bv_len;
+ if (kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, msg.msg_flags) != bvec->bv_len) {
+ kunmap(bvec->bv_page);
+ sigprocmask(SIG_SETMASK, &oldset, NULL );
+ printk(KERN_ERR "dnbd3: could not receive form net to block layer\n");
+ goto error;
+ }
+ kunmap(bvec->bv_page);
+
+ sigprocmask(SIG_SETMASK, &oldset, NULL );
+ }
+ blk_mq_end_request(req, 0);
+ break;
+ case CMD_GET_SERVERS:
+ printk(KERN_DEBUG "dnbd3: get servers received\n");
+ mutex_lock(&dev->device_lock);
+ if (!dev->use_server_provided_alts) {
+ remaining = dnbd3_reply.size;
+ goto consume_payload;
+ }
+ dev->new_servers_num = 0;
+ count = MIN(NUMBER_SERVERS, dnbd3_reply.size / sizeof(dnbd3_server_entry_t));
+
+ if (count != 0) {
+ iov.iov_base = dev->new_servers;
+ iov.iov_len = count * sizeof(dnbd3_server_entry_t);
+ if (kernel_recvmsg(sock->sock, &msg, &iov, 1, (count * sizeof(dnbd3_server_entry_t)), msg.msg_flags) != (count * sizeof(dnbd3_server_entry_t))) {
+ printk(KERN_ERR "dnbd3: failed to get servers\n");
+ mutex_unlock(&dev->device_lock);
+ goto error;
+ }
+ dev->new_servers_num = count;
+ }
+ // If there were more servers than accepted, remove the remaining data from the socket buffer
+ remaining = dnbd3_reply.size - (count * sizeof(dnbd3_server_entry_t));
+consume_payload:
+ while (remaining > 0) {
+ count = MIN(sizeof(dnbd3_reply), remaining); // Abuse the reply struct as the receive buffer
+ iov.iov_base = &dnbd3_reply;
+ iov.iov_len = count;
+ result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags);
+ if (result <= 0) {
+ printk(KERN_ERR "dnbd3: failed to receive payload from get servers\n");
+ mutex_unlock(&dev->device_lock);
+ goto error;
+ }
+ result = 0;
+ }
+ mutex_unlock(&dev->device_lock);
+ break;
+ case CMD_LATEST_RID:
+ if (dnbd3_reply.size != 2) {
+ printk(KERN_ERR "dnbd3: failed to get latest rid, wrong size\n");
+ goto error;
+ }
+ printk(KERN_DEBUG "dnbd3: latest rid received\n");
+ iov.iov_base = &rid;
+ iov.iov_len = sizeof(rid);
+ if (kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags) <= 0) {
+ printk(KERN_ERR "dnbd3: failed to get latest rid\n");
+ goto error;
+ }
+ rid = net_order_16(rid);
+ printk("Latest rid of %s is %d (currently using %d)\n", dev->imgname, (int)rid, (int)dev->rid);
+ dev->update_available = (rid > dev->rid ? 1 : 0);
+ break;
+ case CMD_KEEPALIVE:
+ if (dnbd3_reply.size != 0) {
+ printk(KERN_ERR "dnbd3: got keep alive packet with payload\n");
+ goto error;
+ }
+ printk(KERN_DEBUG "dnbd3: keep alive received\n");
+ break;
+
+ default:
+ printk("ERROR: Unknown command (Receive)\n");
+ break;
+
+ }
+ sock->pending = NULL;
+error:
+ return result;
+}
+
+
+void dnbd3_keepalive(struct timer_list *arg)
+{
+ struct dnbd3_sock *sock = container_of(arg, struct dnbd3_sock, keepalive_timer);
+ printk(KERN_DEBUG "dnbd3: schedule keepalive\n");
+// schedule_work(&sock->keepalive);
+ sock->keepalive_timer.expires = KEEPALIVE_TIMER;
+ add_timer(&sock->keepalive_timer);
+}
+
+static void keepalive(struct work_struct *work)
+{
+ struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, keepalive);
+ struct request *req;
+ mutex_lock(&sock->lock);
+ req = kmalloc(sizeof(struct request), GFP_ATOMIC );
+ // send keepalive
+ if (req) {
+ dnbd3_cmd_to_priv(req, CMD_KEEPALIVE);
+ dnbd3_send_request(NULL, sock, req); // we do not need the device for keepalive
+ kfree(req);
+ } else {
+ printk(KERN_WARNING "dnbd3: could not create keepalive request\n");
+ }
+ ++sock->heartbeat_count;
+ mutex_unlock(&sock->lock);
+}
+
+void dnbd3_discovery(struct timer_list *arg)
+{
+ struct dnbd3_device *dev = container_of(arg, struct dnbd3_device, discovery_timer);
+ printk(KERN_DEBUG "dnbd3: schedule discovery\n");
+// schedule_work(&dev->discovery);
+ dev->discovery_timer.expires = DISCOVERY_TIMER;
+ add_timer(&dev->discovery_timer);
+}
+
+static void discovery(struct work_struct *work)
+{
+ struct dnbd3_device *dev = container_of(work, struct dnbd3_device, discovery);
+ dnbd3_sock *sock = &dev->socks[0]; // we use the first sock for discovery
+ struct request *req;
+ mutex_lock(&sock->lock);
+ req = kmalloc(sizeof(struct request), GFP_ATOMIC );
+ // send keepalive
+ if (req) {
+ dnbd3_cmd_to_priv(req, CMD_GET_SERVERS);
+ dnbd3_send_request(NULL, sock, req); // we do not need the device for keepalive
+ kfree(req);
+ } else {
+ printk(KERN_WARNING "dnbd3: could not create get servers request\n");
+ }
+ mutex_unlock(&sock->lock);
+}
+
+
+static int dnbd3_socket_connect(dnbd3_device *dev, dnbd3_sock *sock)
+{
+ int result = -EIO;
+ struct request *req1 = NULL;
+ struct timeval timeout;
+ struct dnbd3_server *server = sock->server;
+ dnbd3_request_t dnbd3_request;
+ dnbd3_reply_t dnbd3_reply;
+ struct msghdr msg;
+ struct kvec iov[2];
+ uint16_t rid;
+ uint64_t reported_size;
+ char *name;
+ int mlen;
+ serialized_buffer_t payload_buffer;
+
+ printk(KERN_DEBUG "dnbd3: socket connect device %i\n", dev->minor);
+
+ mutex_init(&sock->lock);
+ mutex_lock(&sock->lock);
+ if (sock->pending) {
+ printk(KERN_DEBUG "dnbd3: socket still in request\n");
+ while (sock->pending)
+ schedule();
+ }
+ if (server->host.port == 0 || server->host.type == 0) {
+ printk(KERN_ERR "dnbd3: host or port not set\n");
+ goto error;
+ }
+ if (sock->sock) {
+ printk(KERN_WARNING "dnbd3: socket already connected\n");
+ goto error;
+ }
+
+ timeout.tv_sec = SOCKET_TIMEOUT_CLIENT_DATA;
+ timeout.tv_usec = 0;
+
+ req1 = kmalloc(sizeof(*req1), GFP_ATOMIC );
+ if (!req1) {
+ printk(KERN_ERR "dnbd3: kmalloc failed\n");
+ goto error;
+ }
+
+ init_msghdr(msg);
+
+ if (dnbd3_sock_create(server->host.type, SOCK_STREAM, IPPROTO_TCP, &sock->sock) < 0) {
+ printk(KERN_ERR "dnbd3: could not create socket\n");
+ goto error;
+ }
+
+ kernel_setsockopt(sock->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout));
+ kernel_setsockopt(sock->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout));
+ sock->sock->sk->sk_allocation = GFP_NOIO;
+ if (server->host.type == HOST_IP4) {
+ struct sockaddr_in sin;
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
+ memcpy(&(sin.sin_addr), server->host.addr, 4);
+ sin.sin_port = server->host.port;
+ if (kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0) != 0) {
+ printk(KERN_ERR "dnbd3: connection to host failed (ipv4)\n");
+ goto error;
+ }
+ } else {
+ struct sockaddr_in6 sin;
+ memset(&sin, 0, sizeof(sin));
+ sin.sin6_family = AF_INET6;
+ memcpy(&(sin.sin6_addr), server->host.addr, 16);
+ sin.sin6_port = server->host.port;
+ if (kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0) != 0){
+ printk(KERN_ERR "dnbd3: connection to host failed (ipv6)\n");
+ goto error;
+ }
+ }
+ // Request filesize
+ dnbd3_request.magic = dnbd3_packet_magic;
+ dnbd3_request.cmd = CMD_SELECT_IMAGE;
+ iov[0].iov_base = &dnbd3_request;
+ iov[0].iov_len = sizeof(dnbd3_request);
+ serializer_reset_write(&payload_buffer);
+ serializer_put_uint16(&payload_buffer, PROTOCOL_VERSION);
+ serializer_put_string(&payload_buffer, dev->imgname);
+ serializer_put_uint16(&payload_buffer, dev->rid);
+ serializer_put_uint8(&payload_buffer, 0); // is_server = false
+ iov[1].iov_base = &payload_buffer;
+ dnbd3_request.size = iov[1].iov_len = serializer_get_written_length(&payload_buffer);
+ fixup_request(dnbd3_request);
+ mlen = sizeof(dnbd3_request) + iov[1].iov_len;
+ if (kernel_sendmsg(sock->sock, &msg, iov, 2, mlen) != mlen) {
+ printk(KERN_ERR "dnbd3: could not send CMD_SIZE_REQUEST\n");
+ goto error;
+ }
+ // receive reply header
+ iov[0].iov_base = &dnbd3_reply;
+ iov[0].iov_len = sizeof(dnbd3_reply);
+ if (kernel_recvmsg(sock->sock, &msg, iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply)) {
+ printk(KERN_ERR "dnbd3: received corrupted reply header after CMD_SIZE_REQUEST\n");
+ goto error;
+ }
+
+ // check reply header
+ fixup_reply(dnbd3_reply);
+ if (dnbd3_reply.cmd != CMD_SELECT_IMAGE ||
+ dnbd3_reply.size < 3 ||
+ dnbd3_reply.size > MAX_PAYLOAD ||
+ dnbd3_reply.magic != dnbd3_packet_magic) {
+ printk(KERN_ERR "dnbd3: received invalid reply to CMD_SIZE_REQUEST image does not exist on server\n");
+ goto error;
+ }
+
+ // receive reply payload
+ iov[0].iov_base = &payload_buffer;
+ iov[0].iov_len = dnbd3_reply.size;
+ if (kernel_recvmsg(sock->sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != dnbd3_reply.size) {
+ printk(KERN_ERR "dnbd3: could not read CMD_SELECT_IMAGE payload on handshake\n");
+ goto error;
+ }
+
+ // handle/check reply payload
+ serializer_reset_read(&payload_buffer, dnbd3_reply.size);
+ server->protocol_version = serializer_get_uint16(&payload_buffer);
+ if (server->protocol_version < MIN_SUPPORTED_SERVER) {
+ printk(KERN_ERR "dnbd3: server version is lower than min supported version\n");
+ goto error;
+ }
+
+ name = serializer_get_string(&payload_buffer);
+ rid = serializer_get_uint16(&payload_buffer);
+ if (dev->rid != rid && strcmp(name, dev->imgname) != 0) {
+ printk(KERN_ERR "dnbd3: server offers image '%s', requested '%s'\n", name, dev->imgname);
+ goto error;
+ }
+
+ reported_size = serializer_get_uint64(&payload_buffer);
+ if (dev->reported_size == NULL) {
+ if (reported_size < 4096) {
+ printk(KERN_ERR "dnbd3: reported size by server is < 4096\n");
+ goto error;
+ }
+ dev->reported_size = reported_size;
+ set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte blocks */
+ } else if (dev->reported_size != reported_size) {
+ printk(KERN_ERR "dnbd3: reported size by server is %llu but should be %llu\n", reported_size, dev->reported_size);
+ }
+
+ printk(KERN_DEBUG "dnbd3: connected to image %s, filesize %llu\n", dev->imgname, dev->reported_size);
+
+// TODO add heartbeat
+ // add heartbeat timer and scheduler for the command
+ INIT_WORK(&sock->keepalive, keepalive);
+ sock->heartbeat_count = 0;
+ timer_setup(&sock->keepalive_timer, dnbd3_keepalive, 0);
+ sock->keepalive_timer.expires = KEEPALIVE_TIMER;
+ add_timer(&sock->keepalive_timer);
+
+ mutex_unlock(&sock->lock);
+
+ return 0;
+error:
+ if (sock->sock) {
+ sock_release(sock->sock);
+ sock->sock = NULL;
+ }
+ if (req1) {
+ kfree(req1);
+ }
+ mutex_unlock(&sock->lock);
+ return result;
+}
+
+static int dnbd3_socket_disconnect(dnbd3_device *dev, dnbd3_sock *sock)
+{
+ printk(KERN_DEBUG "dnbd3: socket disconnect device %i\n", dev->minor);
+ mutex_lock(&sock->lock);
+
+ // clear heartbeat timer
+ del_timer_sync(&sock->keepalive_timer);
+// destroy_workqueue(&sock->keepalive);
+
+ if (sock->sock) {
+ kernel_sock_shutdown(sock->sock, SHUT_RDWR);
+ }
+
+ // clear socket
+ if (sock->sock) {
+ sock_release(sock->sock);
+ sock->sock = NULL;
+ }
+
+ mutex_unlock(&sock->lock);
+ mutex_destroy(&sock->lock);
+ return 0;
+}
+
+int dnbd3_net_disconnect(struct dnbd3_device *dev)
+{
+ int i;
+ int result;
+ del_timer_sync(&dev->discovery_timer);
+// destroy_workqueue(&dev->discovery);
+ for (i = 0; i < NUMBER_CONNECTIONS; i++) {
+ if (dev->socks[i].sock) {
+ if (dnbd3_socket_disconnect(dev, &dev->socks[i])) {
+ result = -EIO;
+ }
+ }
+ }
+ return result;
+}
+
+
+int dnbd3_net_connect(struct dnbd3_device *dev) {
+ // TODO decide which socket to connect
+ int result;
+ dev->socks[0].server = &dev->initial_server;
+ if (dnbd3_socket_connect(dev, &dev->socks[0]) == 0) {
+ printServerList(dev);
+
+ INIT_WORK(&dev->discovery, discovery);
+ timer_setup(&dev->discovery_timer, dnbd3_discovery, 0);
+ dev->discovery_timer.expires = DISCOVERY_TIMER;
+ add_timer(&dev->discovery_timer);
+
+ result = 0;
+ } else {
+ printk(KERN_ERR "dnbd3: failed to connect to initial server\n");
+ result = -ENOENT;
+ dev->imgname = NULL;
+ dev->socks[0].server = NULL;
+ }
+ return result;
+}
+
+
+
diff --git a/src/kernel/net.h b/src/kernel/net.h
new file mode 100644
index 0000000..6cd0ffa
--- /dev/null
+++ b/src/kernel/net.h
@@ -0,0 +1,33 @@
+/*
+ * This file is part of the Distributed Network Block Device 3
+ *
+ * Copyright(c) 2019 Frederic Robra <frederic@robra.org>
+ * Parts copyright 2011-2012 Johann Latocha <johann@latocha.de>
+ *
+ * This file may be licensed under the terms of of the
+ * GNU General Public License Version 2 (the ``GPL'').
+ *
+ * Software distributed under the License is distributed
+ * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
+ * express or implied. See the GPL for the specific language
+ * governing rights and limitations.
+ *
+ * You should have received a copy of the GPL along with this
+ * program. If not, go to http://www.gnu.org/licenses/gpl.html
+ * or write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+#ifndef NET_H_
+#define NET_H_
+
+
+int dnbd3_send_request(struct dnbd3_device *dev, struct dnbd3_sock *sock, struct request *req);
+
+
+int dnbd3_net_connect(struct dnbd3_device *dev);
+int dnbd3_net_disconnect(struct dnbd3_device *dev);
+
+#endif /* NET_H_ */
diff --git a/src/kernel/sysfs.c b/src/kernel/sysfs.c
index 118cbca..b5e180b 100644
--- a/src/kernel/sysfs.c
+++ b/src/kernel/sysfs.c
@@ -18,9 +18,10 @@
*
*/
+#include "sysfs.h"
+
#include <linux/kobject.h>
-#include "sysfs.h"
#include "utils.h"
#ifndef MIN
@@ -42,7 +43,7 @@
// return MIN(snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)dev->cur_rtt), PAGE_SIZE);
//}
-ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev)
+ssize_t show_alt_server_num(char *buf, dnbd3_device *dev)
{
int i, num = 0;
for (i = 0; i < NUMBER_SERVERS; ++i)
@@ -52,7 +53,7 @@ ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev)
return MIN(snprintf(buf, PAGE_SIZE, "%d\n", num), PAGE_SIZE);
}
-ssize_t show_alt_servers(char *buf, dnbd3_device_t *dev)
+ssize_t show_alt_servers(char *buf, dnbd3_device *dev)
{
int i, size = PAGE_SIZE, ret;
for (i = 0; i < NUMBER_SERVERS; ++i)
@@ -84,18 +85,18 @@ ssize_t show_alt_servers(char *buf, dnbd3_device_t *dev)
return PAGE_SIZE - size;
}
-ssize_t show_image_name(char *buf, dnbd3_device_t *dev)
+ssize_t show_image_name(char *buf, dnbd3_device *dev)
{
if (dev->imgname == NULL) return sprintf(buf, "(null)");
return MIN(snprintf(buf, PAGE_SIZE, "%s\n", dev->imgname), PAGE_SIZE);
}
-ssize_t show_rid(char *buf, dnbd3_device_t *dev)
+ssize_t show_rid(char *buf, dnbd3_device *dev)
{
return MIN(snprintf(buf, PAGE_SIZE, "%d\n", dev->rid), PAGE_SIZE);
}
-ssize_t show_update_available(char *buf, dnbd3_device_t *dev)
+ssize_t show_update_available(char *buf, dnbd3_device *dev)
{
return MIN(snprintf(buf, PAGE_SIZE, "%d\n", dev->update_available), PAGE_SIZE);
}
@@ -152,7 +153,7 @@ device_attr_t update_available =
ssize_t device_show(struct kobject *kobj, struct attribute *attr, char *buf)
{
device_attr_t *device_attr = container_of(attr, device_attr_t, attr);
- dnbd3_device_t *dev = container_of(kobj, dnbd3_device_t, kobj);
+ dnbd3_device *dev = container_of(kobj, dnbd3_device, kobj);
return device_attr->show(buf, dev);
}
@@ -187,7 +188,7 @@ struct kobj_type device_ktype =
};
-void dnbd3_sysfs_init(dnbd3_device_t *dev)
+void dnbd3_sysfs_init(dnbd3_device *dev)
{
int error;
struct kobject *kobj = &dev->kobj;
@@ -199,7 +200,7 @@ void dnbd3_sysfs_init(dnbd3_device_t *dev)
printk("Error initializing dnbd3 device!\n");
}
-void dnbd3_sysfs_exit(dnbd3_device_t *dev)
+void dnbd3_sysfs_exit(dnbd3_device *dev)
{
kobject_put(&dev->kobj);
}
diff --git a/src/kernel/sysfs.h b/src/kernel/sysfs.h
index 0a747a5..b48acc3 100644
--- a/src/kernel/sysfs.h
+++ b/src/kernel/sysfs.h
@@ -23,22 +23,22 @@
#include "dnbd3.h"
-void dnbd3_sysfs_init(dnbd3_device_t *dev);
+void dnbd3_sysfs_init(dnbd3_device *dev);
-void dnbd3_sysfs_exit(dnbd3_device_t *dev);
+void dnbd3_sysfs_exit(dnbd3_device *dev);
typedef struct
{
struct attribute attr;
- ssize_t (*show)(char *, dnbd3_device_t *);
- ssize_t (*store)(const char *, size_t, dnbd3_device_t *);
+ ssize_t (*show)(char *, dnbd3_device *);
+ ssize_t (*store)(const char *, size_t, dnbd3_device *);
} device_attr_t;
typedef struct
{
struct attribute attr;
- ssize_t (*show)(char *, dnbd3_server_t *);
- ssize_t (*store)(const char *, size_t, dnbd3_server_t *);
+ ssize_t (*show)(char *, dnbd3_server *);
+ ssize_t (*store)(const char *, size_t, dnbd3_server *);
} server_attr_t;
diff --git a/src/kernel/utils.c b/src/kernel/utils.c
index 902025f..957d69b 100644
--- a/src/kernel/utils.c
+++ b/src/kernel/utils.c
@@ -18,9 +18,10 @@
*
*/
+#include "utils.h"
+
#include <linux/kernel.h>
-#include "utils.h"
unsigned int inet_addr(char *str)
{