From f9ec2db3b4d1e0047087393218618cf8c439c336 Mon Sep 17 00:00:00 2001 From: Frederic Robra Date: Sun, 7 Jul 2019 22:13:01 +0200 Subject: added first draft for keepalive and discovery --- Kbuild.in | 2 +- src/kernel/block.c | 399 ---------------------------------------- src/kernel/block.h | 26 --- src/kernel/core.c | 304 ++++++++++++++++++++++++++++-- src/kernel/dnbd3.h | 27 ++- src/kernel/mq.c | 237 ------------------------ src/kernel/mq.h | 16 -- src/kernel/net.c | 528 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/kernel/net.h | 33 ++++ src/kernel/sysfs.c | 19 +- src/kernel/sysfs.h | 12 +- src/kernel/utils.c | 3 +- 12 files changed, 888 insertions(+), 718 deletions(-) delete mode 100644 src/kernel/block.c delete mode 100644 src/kernel/block.h delete mode 100644 src/kernel/mq.c delete mode 100644 src/kernel/mq.h create mode 100644 src/kernel/net.c create mode 100644 src/kernel/net.h diff --git a/Kbuild.in b/Kbuild.in index 566a58b..68c1d02 100644 --- a/Kbuild.in +++ b/Kbuild.in @@ -1,2 +1,2 @@ obj-m := ${MODULE_NAME}.o -${MODULE_NAME}-objs += core.o sysfs.o block.o mq.o utils.o serialize_kmod.o +${MODULE_NAME}-objs += core.o sysfs.o net.o utils.o serialize_kmod.o diff --git a/src/kernel/block.c b/src/kernel/block.c deleted file mode 100644 index 4295418..0000000 --- a/src/kernel/block.c +++ /dev/null @@ -1,399 +0,0 @@ -/* - * block.c - * - * Created on: Jun 26, 2019 - * Author: fred - */ - -#include - -#include "dnbd3.h" -#include "block.h" -#include "clientconfig.h" - -#define dnbd3_sock_create(af,type,proto,sock) sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, type, proto, sock) - - - - -void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev) -{ - printk(KERN_DEBUG "dnbd3: fail all requests device %i\n", dev->minor); -} - - -int dnbd3_socket_connect(dnbd3_device_t *dev, dnbd3_sock *sock) -{ - int result = -EIO; - struct request *req1 = NULL; - struct timeval timeout; - struct dnbd3_server_t *server = sock->server; - dnbd3_request_t dnbd3_request; - dnbd3_reply_t dnbd3_reply; - struct msghdr msg; - struct kvec iov[2]; - uint16_t rid; - uint64_t reported_size; - char *name; - int mlen; - serialized_buffer_t payload_buffer; - - printk(KERN_DEBUG "dnbd3: socket connect device %i\n", dev->minor); - - mutex_init(&sock->lock); - mutex_lock(&sock->lock); - if (sock->pending) { - printk(KERN_DEBUG "dnbd3: socket still in request\n"); - while (sock->pending) - schedule(); - } - if (server->host.port == 0 || server->host.type == 0) { - printk(KERN_ERR "dnbd3: host or port not set\n"); - goto error; - } - if (sock->sock) { - printk(KERN_WARNING "dnbd3: socket already connected\n"); - goto error; - } - - timeout.tv_sec = SOCKET_TIMEOUT_CLIENT_DATA; - timeout.tv_usec = 0; - - req1 = kmalloc(sizeof(*req1), GFP_ATOMIC ); - if (!req1) { - printk(KERN_ERR "dnbd3: kmalloc failed\n"); - goto error; - } - - init_msghdr(msg); - - if (dnbd3_sock_create(server->host.type, SOCK_STREAM, IPPROTO_TCP, &sock->sock) < 0) { - printk(KERN_ERR "dnbd3: could not create socket\n"); - goto error; - } - - kernel_setsockopt(sock->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout)); - kernel_setsockopt(sock->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout)); - sock->sock->sk->sk_allocation = GFP_NOIO; - if (server->host.type == HOST_IP4) { - struct sockaddr_in sin; - memset(&sin, 0, sizeof(sin)); - sin.sin_family = AF_INET; - memcpy(&(sin.sin_addr), server->host.addr, 4); - sin.sin_port = server->host.port; - if (kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0) != 0) { - printk(KERN_ERR "dnbd3: connection to host failed (ipv4)\n"); - goto error; - } - } else { - struct sockaddr_in6 sin; - memset(&sin, 0, sizeof(sin)); - sin.sin6_family = AF_INET6; - memcpy(&(sin.sin6_addr), server->host.addr, 16); - sin.sin6_port = server->host.port; - if (kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0) != 0){ - printk(KERN_ERR "dnbd3: connection to host failed (ipv6)\n"); - goto error; - } - } - // Request filesize - dnbd3_request.magic = dnbd3_packet_magic; - dnbd3_request.cmd = CMD_SELECT_IMAGE; - iov[0].iov_base = &dnbd3_request; - iov[0].iov_len = sizeof(dnbd3_request); - serializer_reset_write(&payload_buffer); - serializer_put_uint16(&payload_buffer, PROTOCOL_VERSION); - serializer_put_string(&payload_buffer, dev->imgname); - serializer_put_uint16(&payload_buffer, dev->rid); - serializer_put_uint8(&payload_buffer, 0); // is_server = false - iov[1].iov_base = &payload_buffer; - dnbd3_request.size = iov[1].iov_len = serializer_get_written_length(&payload_buffer); - fixup_request(dnbd3_request); - mlen = sizeof(dnbd3_request) + iov[1].iov_len; - if (kernel_sendmsg(sock->sock, &msg, iov, 2, mlen) != mlen) { - printk(KERN_ERR "dnbd3: could not send CMD_SIZE_REQUEST\n"); - goto error; - } - // receive reply header - iov[0].iov_base = &dnbd3_reply; - iov[0].iov_len = sizeof(dnbd3_reply); - if (kernel_recvmsg(sock->sock, &msg, iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply)) { - printk(KERN_ERR "dnbd3: received corrupted reply header after CMD_SIZE_REQUEST\n"); - goto error; - } - - // check reply header - fixup_reply(dnbd3_reply); - if (dnbd3_reply.cmd != CMD_SELECT_IMAGE || - dnbd3_reply.size < 3 || - dnbd3_reply.size > MAX_PAYLOAD || - dnbd3_reply.magic != dnbd3_packet_magic) { - printk(KERN_ERR "dnbd3: received invalid reply to CMD_SIZE_REQUEST image does not exist on server\n"); - goto error; - } - - // receive reply payload - iov[0].iov_base = &payload_buffer; - iov[0].iov_len = dnbd3_reply.size; - if (kernel_recvmsg(sock->sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != dnbd3_reply.size) { - printk(KERN_ERR "dnbd3: could not read CMD_SELECT_IMAGE payload on handshake\n"); - goto error; - } - - // handle/check reply payload - serializer_reset_read(&payload_buffer, dnbd3_reply.size); - server->protocol_version = serializer_get_uint16(&payload_buffer); - if (server->protocol_version < MIN_SUPPORTED_SERVER) { - printk(KERN_ERR "dnbd3: server version is lower than min supported version\n"); - goto error; - } - - name = serializer_get_string(&payload_buffer); - rid = serializer_get_uint16(&payload_buffer); - if (dev->rid != rid && strcmp(name, dev->imgname) != 0) { - printk(KERN_ERR "dnbd3: server offers image '%s', requested '%s'\n", name, dev->imgname); - goto error; - } - - reported_size = serializer_get_uint64(&payload_buffer); - if (dev->reported_size == NULL) { - if (reported_size < 4096) { - printk(KERN_ERR "dnbd3: reported size by server is < 4096\n"); - goto error; - } - dev->reported_size = reported_size; - set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte blocks */ - } else if (dev->reported_size != reported_size) { - printk(KERN_ERR "dnbd3: reported size by server is %llu but should be %llu\n", reported_size, dev->reported_size); - } - - printk(KERN_DEBUG "dnbd3: connected to image %s, filesize %llu\n", dev->imgname, dev->reported_size); - - mutex_unlock(&sock->lock); - -// TODO add heartbeat - // timer_setup(&dev->hb_timer, dnbd3_net_heartbeat, 0); - // dev->hb_timer.expires = jiffies + HZ; - // add_timer(&dev->hb_timer); - - return 0; -error: - if (sock->sock) { - sock_release(sock->sock); - sock->sock = NULL; - } - if (req1) { - kfree(req1); - } - mutex_unlock(&sock->lock); - return result; -} - -int dnbd3_socket_disconnect(dnbd3_device_t *dev, dnbd3_sock *sock) -{ - printk(KERN_DEBUG "dnbd3: socket disconnect device %i\n", dev->minor); - mutex_lock(&sock->lock); - - // clear heartbeat timer -// del_timer(&dev->hb_timer); - - - if (sock->sock) { - kernel_sock_shutdown(sock->sock, SHUT_RDWR); - } - - // clear socket - if (sock->sock) { - sock_release(sock->sock); - sock->sock = NULL; - } - - mutex_unlock(&sock->lock); - mutex_destroy(&sock->lock); - return 0; -} - -int dnbd3_net_disconnect(struct dnbd3_device_t *dev) { - int i; - int result; - for (i = 0; i < NUMBER_CONNECTIONS; i++) { - if (dev->socks[i].sock) { - if (dnbd3_socket_disconnect(dev, &dev->socks[i])) { - result = -EIO; - } - } - } - return result; -} - - -static void printHost(struct dnbd3_host_t *host, char *msg) -{ - if (host->type == HOST_IP4) { - printk(KERN_INFO "dnbd3: %s %pI4:%d", msg, host->addr, host->port); - } else { - printk(KERN_INFO "dnbd3: %s [%pI6]:%d", msg, host->addr, host->port); - } -} - -static void printServerList(struct dnbd3_device_t *dev) -{ - int i; - printHost(&dev->initial_server.host, "initial server is"); - for (i = 0; i < NUMBER_SERVERS; i++) { - if (dev->alt_servers[i].host.addr[0] != 0) { - printHost(&dev->alt_servers[i].host, "alternative server is"); - } - } -} - -static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) -{ - int result = -100; - dnbd3_device_t *dev = bdev->bd_disk->private_data; - char *imgname = NULL; - dnbd3_ioctl_t *msg = NULL; - int i; - - printk(KERN_DEBUG "dnbd3: ioctl device %i, cmd %i, arg %lu\n", dev->minor, cmd, arg); - //unsigned long irqflags; - -// while (dev->disconnecting) { -// // do nothing -// } - - if (arg != 0) { - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (msg == NULL) return -ENOMEM; - if (copy_from_user((char *)msg, (char *)arg, 2) != 0 || msg->len != sizeof(*msg)) { - result = -ENOEXEC; - goto cleanup_return; - } - if (copy_from_user((char *)msg, (char *)arg, sizeof(*msg)) != 0) { - result = -ENOENT; - goto cleanup_return; - } - if (msg->imgname != NULL && msg->imgnamelen > 0) { - imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL); - if (imgname == NULL) { - result = -ENOMEM; - goto cleanup_return; - } - if (copy_from_user(imgname, msg->imgname, msg->imgnamelen) != 0) { - result = -ENOENT; - goto cleanup_return; - } - imgname[msg->imgnamelen] = '\0'; - - printk(KERN_DEBUG "dnbd3: ioctl image name of len %i is %s\n", (int)msg->imgnamelen, imgname); - } - } - - switch (cmd) { - case IOCTL_OPEN: - printk(KERN_DEBUG "dnbd3: ioctl open\n"); - if (dev->imgname != NULL) { - result = -EBUSY; - } else if (imgname == NULL) { - result = -EINVAL; - } else if (msg == NULL) { - result = -EINVAL; - } else { - if (sizeof(msg->host) != sizeof(dev->initial_server.host)) { - printk(KERN_INFO "dnbd3: odd size bug#1 triggered in ioctl\n"); - } - memcpy(&dev->initial_server.host, &msg->host, sizeof(msg->host)); - dev->initial_server.failures = 0; -// memcpy(&dev->initial_server, &dev->cur_server, sizeof(dev->initial_server)); - dev->imgname = imgname; - dev->rid = msg->rid; - dev->use_server_provided_alts = msg->use_server_provided_alts; - // Forget all alt servers on explicit connect, set first alt server to initial server - memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS); - memcpy(dev->alt_servers, &dev->initial_server, sizeof(dev->alt_servers[0])); -//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) -// if (blk_queue->backing_dev_info != NULL) { -// blk_queue->backing_dev_info->ra_pages = (msg->read_ahead_kb * 1024) / PAGE_SIZE; -// } -//#else -// blk_queue->backing_dev_info.ra_pages = (msg->read_ahead_kb * 1024) / PAGE_SIZE; -//#endif - - - dev->socks[0].server = &dev->initial_server; - if (dnbd3_socket_connect(dev, &dev->socks[0]) == 0) { - printServerList(dev); - result = 0; - imgname = NULL; // Prevent kfree at the end - } else { - printk(KERN_ERR "dnbd3: failed to connect to initial server\n"); - result = -ENOENT; - dev->imgname = NULL; - dev->socks[0].server = NULL; - } - - } - break; - - case IOCTL_CLOSE: - printk(KERN_DEBUG "dnbd3: ioctl close\n"); - dnbd3_blk_fail_all_requests(dev); - dnbd3_net_disconnect(dev); - dnbd3_blk_fail_all_requests(dev); - set_capacity(dev->disk, 0); - if (dev->imgname) { - kfree(dev->imgname); - dev->imgname = NULL; - } - break; - - case IOCTL_SWITCH: - printk(KERN_DEBUG "dnbd3: ioctl switch\n"); - result = -EINVAL; - break; - - case IOCTL_ADD_SRV: - case IOCTL_REM_SRV: - printk(KERN_DEBUG "dnbd3: ioctl add/rem srv\n"); - if (dev->imgname == NULL) { - result = -ENOENT; - } else if (dev->new_servers_num >= NUMBER_SERVERS) { - result = -EAGAIN; - } else if (msg == NULL) { - result = -EINVAL; - } else { - memcpy(&dev->new_servers[dev->new_servers_num].host, &msg->host, sizeof(msg->host)); - dev->new_servers[dev->new_servers_num].failures = (cmd == IOCTL_ADD_SRV ? 0 : 1); // 0 = ADD, 1 = REM - ++dev->new_servers_num; - result = 0; - } - break; - - case BLKFLSBUF: - printk(KERN_DEBUG "dnbd3: ioctl blkflsbuf\n"); - result = 0; - break; - - default: - printk(KERN_DEBUG "dnbd3: ioctl unhandled cmd %d\n", cmd); - result = -EIO; - break; - } - -cleanup_return: - if (msg) kfree(msg); - if (imgname) kfree(imgname); - return result; - -} - - - -struct block_device_operations dnbd3_fops = -{ - .owner = THIS_MODULE, - .ioctl = dnbd3_ioctl, - .compat_ioctl = dnbd3_ioctl, -}; - - diff --git a/src/kernel/block.h b/src/kernel/block.h deleted file mode 100644 index 49055d5..0000000 --- a/src/kernel/block.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * block.h - * - * Created on: Jun 26, 2019 - * Author: fred - */ - -#ifndef SRC_KERNEL_BLOCK_H_ -#define SRC_KERNEL_BLOCK_H_ - -#define init_msghdr(h) do { \ - h.msg_name = NULL; \ - h.msg_namelen = 0; \ - h.msg_control = NULL; \ - h.msg_controllen = 0; \ - h.msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \ - } while (0) - -//int dnbd3_net_connect(dnbd3_device_t *dev); - -int dnbd3_net_disconnect(dnbd3_device_t *dev); - -extern struct block_device_operations dnbd3_fops; - - -#endif /* SRC_KERNEL_BLOCK_H_ */ diff --git a/src/kernel/core.c b/src/kernel/core.c index db1f6de..48f809b 100644 --- a/src/kernel/core.c +++ b/src/kernel/core.c @@ -48,27 +48,298 @@ #include #include "dnbd3.h" -#include "clientconfig.h" #include "sysfs.h" -#include "block.h" -#include "mq.h" +#include "clientconfig.h" +#include "net.h" + +#define DNBD3_CMD_REQUEUED 1 DEFINE_IDR(dnbd3_index_idr); DEFINE_MUTEX(dnbd3_index_mutex); static unsigned int max_devs = NUMBER_DEVICES; -static dnbd3_device_t *dnbd3_device; +static dnbd3_device *device; int major; +static void dnbd3_requeue_cmd(struct dnbd3_cmd *cmd) +{ + struct request *req = blk_mq_rq_from_pdu(cmd); + + if (!test_and_set_bit(DNBD3_CMD_REQUEUED, &cmd->flags)) { + blk_mq_requeue_request(req, true); + } +} + +static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index) +{ + struct request *req = blk_mq_rq_from_pdu(cmd); + struct dnbd3_device *dev = cmd->dnbd3; + struct dnbd3_sock *sock; + int ret = -1; + + printk(KERN_DEBUG "dnbd3: handle request at position %lu and size %d, device %i\n", blk_rq_pos(req), blk_rq_bytes(req), dev->minor); + +// if (index >= 1) { // TODO use next server with good rtt for this request +// printk(KERN_INFO "dnbd3: index is %d", index); +// dev_err_ratelimited(disk_to_dev(dev->disk), "attempted send on invalid socket\n"); +// blk_mq_start_request(req); +// return -EINVAL; +// } + + sock = &dev->socks[index]; + if (!sock->sock) { + printk(KERN_INFO "dnbd3: index is %d but no socket was found\n", index); + dev_err_ratelimited(disk_to_dev(dev->disk), "attempted send on invalid socket\n"); + blk_mq_start_request(req); + return -EINVAL; + } + + + cmd->status = BLK_STS_OK; + +again: + + + mutex_lock(&sock->lock); + if (unlikely(!sock->sock)) { + mutex_unlock(&sock->lock); + printk(KERN_DEBUG "dnbd3: not connected\n"); + return -EIO; + } + + blk_mq_start_request(req); + if (unlikely(sock->pending && sock->pending != req)) { + dnbd3_requeue_cmd(cmd); + ret = 0; + goto out; + } + + ret = dnbd3_send_request(dev, sock, blk_mq_rq_from_pdu(cmd)); + if (ret == -EAGAIN) { + dev_err_ratelimited(disk_to_dev(dev->disk), "request send failed, requeueing\n"); + dnbd3_requeue_cmd(cmd); + ret = 0; + } +out: + mutex_unlock(&sock->lock); + return ret; +} + +static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) +{ + struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + int ret; + struct dnbd3_device *dev = cmd->dnbd3; + + printk(KERN_DEBUG "dnbd3: queue request device %i\n", dev->minor); + -static int dnbd3_add_device(dnbd3_device_t *dev, int minor) + mutex_lock(&cmd->lock); + clear_bit(DNBD3_CMD_REQUEUED, &cmd->flags); + + + ret = dnbd3_handle_cmd(cmd, hctx->queue_num); + if (ret < 0) { + ret = BLK_STS_IOERR; + } else if (!ret) { + ret = BLK_STS_OK; + } + mutex_unlock(&cmd->lock); + + return ret; +} + +static void dnbd3_complete_rq(struct request *req) +{ + printk(KERN_DEBUG "dnbd3: dnbd3_complete_rq\n"); + +} + +static int dnbd3_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) +{ + struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(rq); + cmd->dnbd3 = set->driver_data; + cmd->flags = 0; + mutex_init(&cmd->lock); + return 0; +} +static enum blk_eh_timer_return dnbd3_xmit_timeout(struct request *req, bool reserved) +{ + printk(KERN_DEBUG "dnbd3: dnbd3_xmit_timeout\n"); + return BLK_EH_DONE; +} + + +static struct blk_mq_ops dnbd3_mq_ops = { + .queue_rq = dnbd3_queue_rq, + .complete = dnbd3_complete_rq, + .init_request = dnbd3_init_request, + .timeout = dnbd3_xmit_timeout, +}; + + + + +static void dnbd3_blk_fail_all_requests(dnbd3_device *dev) +{ + printk(KERN_DEBUG "dnbd3: fail all requests device %i\n", dev->minor); +} + + + +static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) +{ + int result = -100; + dnbd3_device *dev = bdev->bd_disk->private_data; + char *imgname = NULL; + dnbd3_ioctl_t *msg = NULL; + + printk(KERN_DEBUG "dnbd3: ioctl device %i, cmd %i, arg %lu\n", dev->minor, cmd, arg); + //unsigned long irqflags; + +// while (dev->disconnecting) { +// // do nothing +// } + + if (arg != 0) { + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (msg == NULL) return -ENOMEM; + if (copy_from_user((char *)msg, (char *)arg, 2) != 0 || msg->len != sizeof(*msg)) { + result = -ENOEXEC; + goto cleanup_return; + } + if (copy_from_user((char *)msg, (char *)arg, sizeof(*msg)) != 0) { + result = -ENOENT; + goto cleanup_return; + } + if (msg->imgname != NULL && msg->imgnamelen > 0) { + imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL); + if (imgname == NULL) { + result = -ENOMEM; + goto cleanup_return; + } + if (copy_from_user(imgname, msg->imgname, msg->imgnamelen) != 0) { + result = -ENOENT; + goto cleanup_return; + } + imgname[msg->imgnamelen] = '\0'; + + printk(KERN_DEBUG "dnbd3: ioctl image name of len %i is %s\n", (int)msg->imgnamelen, imgname); + } + } + + mutex_lock(&dev->device_lock); + switch (cmd) { + case IOCTL_OPEN: + printk(KERN_DEBUG "dnbd3: ioctl open\n"); + if (dev->imgname != NULL) { + result = -EBUSY; + } else if (imgname == NULL) { + result = -EINVAL; + } else if (msg == NULL) { + result = -EINVAL; + } else { + if (sizeof(msg->host) != sizeof(dev->initial_server.host)) { + printk(KERN_INFO "dnbd3: odd size bug#1 triggered in ioctl\n"); + } + memcpy(&dev->initial_server.host, &msg->host, sizeof(msg->host)); + dev->initial_server.failures = 0; +// memcpy(&dev->initial_server, &dev->cur_server, sizeof(dev->initial_server)); + dev->imgname = imgname; + dev->rid = msg->rid; + dev->use_server_provided_alts = msg->use_server_provided_alts; + // Forget all alt servers on explicit connect, set first alt server to initial server + memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS); + memcpy(dev->alt_servers, &dev->initial_server, sizeof(dev->alt_servers[0])); +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +// if (blk_queue->backing_dev_info != NULL) { +// blk_queue->backing_dev_info->ra_pages = (msg->read_ahead_kb * 1024) / PAGE_SIZE; +// } +//#else +// blk_queue->backing_dev_info.ra_pages = (msg->read_ahead_kb * 1024) / PAGE_SIZE; +//#endif + + + result = dnbd3_net_connect(dev); + imgname = NULL; + } + break; + + case IOCTL_CLOSE: + printk(KERN_DEBUG "dnbd3: ioctl close\n"); + dnbd3_blk_fail_all_requests(dev); + dnbd3_net_disconnect(dev); + dnbd3_blk_fail_all_requests(dev); + set_capacity(dev->disk, 0); + if (dev->imgname) { + kfree(dev->imgname); + dev->imgname = NULL; + } + break; + + case IOCTL_SWITCH: + printk(KERN_DEBUG "dnbd3: ioctl switch\n"); + result = -EINVAL; + break; + + case IOCTL_ADD_SRV: + case IOCTL_REM_SRV: + printk(KERN_DEBUG "dnbd3: ioctl add/rem srv\n"); + if (dev->imgname == NULL) { + result = -ENOENT; + } else if (dev->new_servers_num >= NUMBER_SERVERS) { + result = -EAGAIN; + } else if (msg == NULL) { + result = -EINVAL; + } else { + memcpy(&dev->new_servers[dev->new_servers_num].host, &msg->host, sizeof(msg->host)); + dev->new_servers[dev->new_servers_num].failures = (cmd == IOCTL_ADD_SRV ? 0 : 1); // 0 = ADD, 1 = REM + ++dev->new_servers_num; + result = 0; + } + break; + + case BLKFLSBUF: + printk(KERN_DEBUG "dnbd3: ioctl blkflsbuf\n"); + result = 0; + break; + + default: + printk(KERN_DEBUG "dnbd3: ioctl unhandled cmd %d\n", cmd); + result = -EIO; + break; + } + mutex_unlock(&dev->device_lock); +cleanup_return: + if (msg) kfree(msg); + if (imgname) kfree(imgname); + return result; + +} + + + +static struct block_device_operations dnbd3_fops = +{ + .owner = THIS_MODULE, + .ioctl = dnbd3_ioctl, + .compat_ioctl = dnbd3_ioctl, +}; + + + + + +int dnbd3_add_device(dnbd3_device *dev, int minor) { struct gendisk *disk; struct request_queue *q; int err = -ENOMEM; printk(KERN_DEBUG "dnbd3: adding device %i\n", minor); + mutex_init(&dev->device_lock); + mutex_lock(&dev->device_lock); disk = alloc_disk(1); if (!disk) { @@ -129,6 +400,9 @@ static int dnbd3_add_device(dnbd3_device_t *dev, int minor) printk(KERN_DEBUG "dnbd3: add disk device %s\n", disk->disk_name); add_disk(disk); dnbd3_sysfs_init(dev); + + + mutex_unlock(&dev->device_lock); return minor; out_free_tags: @@ -139,12 +413,14 @@ out_free_disk: put_disk(disk); out_free_dnbd3: kfree(dev); + mutex_unlock(&dev->device_lock); printk(KERN_DEBUG "dnbd3: destroy device %i\n", minor); return err; } + static int __init dnbd3_init(void) { int i; @@ -156,8 +432,8 @@ static int __init dnbd3_init(void) } - dnbd3_device = kcalloc(max_devs, sizeof(*dnbd3_device), GFP_KERNEL); - if (!dnbd3_device) { + device = kcalloc(max_devs, sizeof(*device), GFP_KERNEL); + if (!device) { printk(KERN_ERR "dnbd3: failed to create dnbd3 device\n"); return -ENOMEM; } @@ -174,7 +450,7 @@ static int __init dnbd3_init(void) // add MAX_NUMBER_DEVICES devices mutex_lock(&dnbd3_index_mutex); for (i = 0; i < max_devs; i++) { - dnbd3_add_device(&dnbd3_device[i], i); + dnbd3_add_device(&device[i], i); } mutex_unlock(&dnbd3_index_mutex); @@ -187,13 +463,13 @@ static int __init dnbd3_init(void) static int dnbd3_exit_cb(int id, void *ptr, void *data) { struct list_head *list = (struct list_head *)data; - struct dnbd3_device_t *dnbd3 = ptr; + struct dnbd3_device *dnbd3 = ptr; list_add_tail(&dnbd3->list, list); return 0; } -static void dnbd3_dev_remove(struct dnbd3_device_t *dnbd3) +static void dnbd3_dev_remove(struct dnbd3_device *dnbd3) { struct gendisk *disk = dnbd3->disk; struct request_queue *q; @@ -209,7 +485,7 @@ static void dnbd3_dev_remove(struct dnbd3_device_t *dnbd3) } } -static void dnbd3_put(struct dnbd3_device_t *dnbd3) +static void dnbd3_put(struct dnbd3_device *dnbd3) { mutex_lock(&dnbd3_index_mutex); idr_remove(&dnbd3_index_idr, dnbd3->minor); @@ -220,7 +496,7 @@ static void dnbd3_put(struct dnbd3_device_t *dnbd3) static void __exit dnbd3_exit(void) { - dnbd3_device_t *dnbd3; + dnbd3_device *dnbd3; LIST_HEAD(del_list); printk(KERN_DEBUG "dnbd3: stopping kernel module\n"); @@ -229,7 +505,7 @@ static void __exit dnbd3_exit(void) mutex_unlock(&dnbd3_index_mutex); while (!list_empty(&del_list)) { - dnbd3 = list_first_entry(&del_list, struct dnbd3_device_t, list); + dnbd3 = list_first_entry(&del_list, struct dnbd3_device, list); dnbd3_sysfs_exit(dnbd3); list_del_init(&dnbd3->list); dnbd3_put(dnbd3); @@ -238,7 +514,7 @@ static void __exit dnbd3_exit(void) idr_destroy(&dnbd3_index_idr); unregister_blkdev(major, "dnbd3"); - kfree(dnbd3_device); + kfree(device); printk(KERN_INFO "dnbd3: stopped kernel module\n"); } diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h index 7ad6b2f..57f26f6 100644 --- a/src/kernel/dnbd3.h +++ b/src/kernel/dnbd3.h @@ -33,22 +33,27 @@ #define NUMBER_CONNECTIONS 4 -typedef struct dnbd3_server_t { +typedef struct dnbd3_server { dnbd3_host_t host; uint64_t rtts[4]; // Last four round trip time measurements in microsecond uint16_t protocol_version; // dnbd3 protocol version of this server uint8_t failures; // How many times the server was unreachable -} dnbd3_server_t; +} dnbd3_server; typedef struct dnbd3_sock { struct socket *sock; struct mutex lock; struct request *pending; - struct dnbd3_server_t *server; + struct dnbd3_server *server; + uint32_t heartbeat_count; + uint8_t panic, discover, panic_count; +// struct dnbd3_device *container_of; + struct work_struct keepalive; + struct timer_list keepalive_timer; } dnbd3_sock; -typedef struct dnbd3_device_t { +typedef struct dnbd3_device { int minor; struct blk_mq_tag_set tag_set; struct list_head list; @@ -59,28 +64,32 @@ typedef struct dnbd3_device_t { // sysfs struct kobject kobj; + struct mutex device_lock; + // network dnbd3_sock socks[NUMBER_CONNECTIONS]; char *imgname; // struct socket *sock; // struct mutex socket_lock; // struct request *pending; - dnbd3_server_t initial_server; + dnbd3_server initial_server; // dnbd3_server_t cur_server, initial_server; // uint64_t cur_rtt; // serialized_buffer_t payload_buffer; - dnbd3_server_t alt_servers[NUMBER_SERVERS]; // array of alt servers + dnbd3_server alt_servers[NUMBER_SERVERS]; // array of alt servers int new_servers_num; // number of new alt servers that are waiting to be copied to above array dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers // uint8_t discover, panic, disconnecting, update_available, panic_count; uint8_t update_available; uint8_t use_server_provided_alts; uint16_t rid; - uint32_t heartbeat_count; +// uint32_t heartbeat_count; uint64_t reported_size; // server switch // struct socket *better_sock; + struct work_struct discovery; // if in irq and need to send request + struct timer_list discovery_timer; // process // struct task_struct * thread_send; @@ -93,11 +102,11 @@ typedef struct dnbd3_device_t { // struct list_head request_queue_send; // struct list_head request_queue_receive; -} dnbd3_device_t; +} dnbd3_device; typedef struct dnbd3_cmd { - struct dnbd3_device_t *dnbd3; + struct dnbd3_device *dnbd3; struct mutex lock; int index; int cookie; diff --git a/src/kernel/mq.c b/src/kernel/mq.c deleted file mode 100644 index 98e6e14..0000000 --- a/src/kernel/mq.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * mq.c - * - * Created on: Jun 26, 2019 - * Author: fred - */ - -#include - -#include "mq.h" -#include "block.h" - -#define DNBD3_CMD_REQUEUED 1 - - -#define dnbd3_priv_to_cmd(req) ((req)->cmd_flags >> REQ_FLAG_BITS) - -static int dnbd3_send_cmd(struct dnbd3_device_t *dev, struct dnbd3_sock *sock, struct dnbd3_cmd *cmd, int index) -{ - struct request *req = blk_mq_rq_from_pdu(cmd); - dnbd3_request_t dnbd3_request; - dnbd3_reply_t dnbd3_reply; - struct msghdr msg; - struct kvec iov; - struct req_iterator iter; - struct bio_vec bvec_inst; - struct bio_vec *bvec = &bvec_inst; - sigset_t blocked, oldset; - void *kaddr; - int result; - sock->pending = req; - init_msghdr(msg); - - dnbd3_request.magic = dnbd3_packet_magic; - - switch (req_op(req)) { -// case REQ_OP_DISCARD: -// printk(KERN_DEBUG "dnbd3: request operation discard on device %d\n", dev->minor); -// break; -// case REQ_OP_FLUSH: -// printk(KERN_DEBUG "dnbd3: request operation flush on device %d\n", dev->minor); -// break; -// case REQ_OP_WRITE: -// printk(KERN_DEBUG "dnbd3: request operation write on device %d\n", dev->minor); -// break; - case REQ_OP_READ: - printk(KERN_DEBUG "dnbd3: request operation read on device %d\n", dev->minor); - dnbd3_request.cmd = CMD_GET_BLOCK; - dnbd3_request.offset = blk_rq_pos(req) << 9; // *512 - dnbd3_request.size = blk_rq_bytes(req); // bytes left to complete entire request - break; - case REQ_OP_DRV_IN: - printk(KERN_DEBUG "dnbd3: request operation driver in on device %d\n", dev->minor); - dnbd3_request.cmd = dnbd3_priv_to_cmd(req); - dnbd3_request.size = 0; - break; - - default: - return -EIO; - } - - dnbd3_request.handle = (uint64_t)(uintptr_t)req; // Double cast to prevent warning on 32bit - fixup_request(dnbd3_request); - iov.iov_base = &dnbd3_request; - iov.iov_len = sizeof(dnbd3_request); - if (kernel_sendmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_request)) != sizeof(dnbd3_request)) { - printk(KERN_ERR "dnbd3: connection to server lost\n"); - result = -EIO; - goto error; - } - - // receive net reply - iov.iov_base = &dnbd3_reply; - iov.iov_len = sizeof(dnbd3_reply); - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags); - if (!result) { - printk(KERN_ERR "dnbd3: connection to server lost\n"); - result = -EIO; - goto error; - - } - fixup_reply(dnbd3_reply); - - // check error - if (dnbd3_reply.magic != dnbd3_packet_magic) { - printk(KERN_ERR "dnbd3: wrong magic packet\n"); - result = -EIO; - goto error; - } - - if (dnbd3_reply.cmd != CMD_GET_BLOCK) { - printk(KERN_ERR "dnbd3: command was %d\n", dnbd3_reply.cmd); - result = -EIO; - goto error; - } - - - - rq_for_each_segment(bvec_inst, req, iter) { - siginitsetinv(&blocked, sigmask(SIGKILL)); - sigprocmask(SIG_SETMASK, &blocked, &oldset); - - kaddr = kmap(bvec->bv_page) + bvec->bv_offset; - iov.iov_base = kaddr; - iov.iov_len = bvec->bv_len; - if (kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, msg.msg_flags) != bvec->bv_len) { - kunmap(bvec->bv_page); - sigprocmask(SIG_SETMASK, &oldset, NULL ); - printk(KERN_ERR "dnbd3: could not receive form net to block layer\n"); - goto error; - } - kunmap(bvec->bv_page); - - sigprocmask(SIG_SETMASK, &oldset, NULL ); - } - blk_mq_end_request(req, 0); - sock->pending = NULL; -error: - return result; -} - -static void dnbd3_requeue_cmd(struct dnbd3_cmd *cmd) -{ - struct request *req = blk_mq_rq_from_pdu(cmd); - - if (!test_and_set_bit(DNBD3_CMD_REQUEUED, &cmd->flags)) { - blk_mq_requeue_request(req, true); - } -} - -static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index) -{ - struct request *req = blk_mq_rq_from_pdu(cmd); - struct dnbd3_device_t *dev = cmd->dnbd3; - struct dnbd3_sock *sock; - int ret = -1; - - printk(KERN_DEBUG "dnbd3: handle request at position %lu and size %d, device %i\n", blk_rq_pos(req), blk_rq_bytes(req), dev->minor); - -// if (index >= 1) { // TODO use next server with good rtt for this request -// printk(KERN_INFO "dnbd3: index is %d", index); -// dev_err_ratelimited(disk_to_dev(dev->disk), "attempted send on invalid socket\n"); -// blk_mq_start_request(req); -// return -EINVAL; -// } - - sock = &dev->socks[index]; - if (!sock->sock) { - printk(KERN_INFO "dnbd3: index is %d but no socket was found\n", index); - dev_err_ratelimited(disk_to_dev(dev->disk), "attempted send on invalid socket\n"); - blk_mq_start_request(req); - return -EINVAL; - } - - - cmd->status = BLK_STS_OK; - -again: - - - mutex_lock(&sock->lock); - if (unlikely(!sock->sock)) { - mutex_unlock(&sock->lock); - printk(KERN_DEBUG "dnbd3: not connected\n"); - return -EIO; - } - - blk_mq_start_request(req); - if (unlikely(sock->pending && sock->pending != req)) { - dnbd3_requeue_cmd(cmd); - ret = 0; - goto out; - } - - ret = dnbd3_send_cmd(dev, sock, cmd, index); - if (ret == -EAGAIN) { - dev_err_ratelimited(disk_to_dev(dev->disk), "request send failed, requeueing\n"); - dnbd3_requeue_cmd(cmd); - ret = 0; - } -out: - mutex_unlock(&sock->lock); - return ret; -} - -static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) -{ - struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); - int ret; - struct dnbd3_device_t *dev = cmd->dnbd3; - - printk(KERN_DEBUG "dnbd3: queue request device %i\n", dev->minor); - - - mutex_lock(&cmd->lock); - clear_bit(DNBD3_CMD_REQUEUED, &cmd->flags); - - - ret = dnbd3_handle_cmd(cmd, hctx->queue_num); - if (ret < 0) { - ret = BLK_STS_IOERR; - } else if (!ret) { - ret = BLK_STS_OK; - } - mutex_unlock(&cmd->lock); - - return ret; -} - -static void dnbd3_complete_rq(struct request *req) -{ - printk(KERN_DEBUG "dnbd3: dnbd3_complete_rq\n"); - -} - -static int dnbd3_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) -{ - struct dnbd3_cmd *cmd = blk_mq_rq_to_pdu(rq); - cmd->dnbd3 = set->driver_data; - cmd->flags = 0; - mutex_init(&cmd->lock); - return 0; -} -static enum blk_eh_timer_return dnbd3_xmit_timeout(struct request *req, bool reserved) -{ - printk(KERN_DEBUG "dnbd3: dnbd3_xmit_timeout\n"); - return BLK_EH_DONE; -} - - -struct blk_mq_ops dnbd3_mq_ops = { - .queue_rq = dnbd3_queue_rq, - .complete = dnbd3_complete_rq, - .init_request = dnbd3_init_request, - .timeout = dnbd3_xmit_timeout, -}; - diff --git a/src/kernel/mq.h b/src/kernel/mq.h deleted file mode 100644 index 9f3cde8..0000000 --- a/src/kernel/mq.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * mq.h - * - * Created on: Jun 26, 2019 - * Author: fred - */ - -#ifndef SRC_KERNEL_MQ_H_ -#define SRC_KERNEL_MQ_H_ - -#include "dnbd3.h" - -extern struct blk_mq_ops dnbd3_mq_ops; - - -#endif /* SRC_KERNEL_MQ_H_ */ diff --git a/src/kernel/net.c b/src/kernel/net.c new file mode 100644 index 0000000..f44925f --- /dev/null +++ b/src/kernel/net.c @@ -0,0 +1,528 @@ +/* + * This file is part of the Distributed Network Block Device 3 + * + * Copyright(c) 2019 Frederic Robra + * Parts copyright 2011-2012 Johann Latocha + * + * This file may be licensed under the terms of of the + * GNU General Public License Version 2 (the ``GPL''). + * + * Software distributed under the License is distributed + * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See the GPL for the specific language + * governing rights and limitations. + * + * You should have received a copy of the GPL along with this + * program. If not, go to http://www.gnu.org/licenses/gpl.html + * or write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + + + +#include + +#include "dnbd3.h" +#include "clientconfig.h" + +#define dnbd3_cmd_to_priv(req, cmd) (req)->cmd_flags = REQ_OP_DRV_IN | ((cmd) << REQ_FLAG_BITS) +#define dnbd3_priv_to_cmd(req) ((req)->cmd_flags >> REQ_FLAG_BITS) +#define dnbd3_req_op(req) req_op(req) +#define dnbd3_sock_create(af,type,proto,sock) sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, type, proto, sock) + +#define KEEPALIVE_TIMER (TIMER_INTERVAL_KEEPALIVE_PACKET * (jiffies + HZ)) +#define DISCOVERY_TIMER (TIMER_INTERVAL_PROBE_NORMAL * (jiffies + HZ)) + +#define init_msghdr(h) do { \ + h.msg_name = NULL; \ + h.msg_namelen = 0; \ + h.msg_control = NULL; \ + h.msg_controllen = 0; \ + h.msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \ + } while (0) + + +static void printHost(struct dnbd3_host_t *host, char *msg) +{ + if (host->type == HOST_IP4) { + printk(KERN_INFO "dnbd3: %s %pI4:%d\n", msg, host->addr, host->port); + } else { + printk(KERN_INFO "dnbd3: %s [%pI6]:%d\n", msg, host->addr, host->port); + } +} + +static void printServerList(struct dnbd3_device *dev) +{ + int i; + printHost(&dev->initial_server.host, "initial server is"); + for (i = 0; i < NUMBER_SERVERS; i++) { + if (dev->alt_servers[i].host.addr[0] != 0) { + printHost(&dev->alt_servers[i].host, "alternative server is"); + } + } +} + +int dnbd3_send_request(struct dnbd3_device *dev, struct dnbd3_sock *sock, struct request *req) +{ + dnbd3_request_t dnbd3_request; + dnbd3_reply_t dnbd3_reply; + struct msghdr msg; + struct kvec iov; + struct req_iterator iter; + struct bio_vec bvec_inst; + struct bio_vec *bvec = &bvec_inst; + sigset_t blocked, oldset; + void *kaddr; + int result, count, remaining; + uint16_t rid; + sock->pending = req; + init_msghdr(msg); + + dnbd3_request.magic = dnbd3_packet_magic; + + switch (req_op(req)) { +// case REQ_OP_DISCARD: +// printk(KERN_DEBUG "dnbd3: request operation discard on device %d\n", dev->minor); +// break; +// case REQ_OP_FLUSH: +// printk(KERN_DEBUG "dnbd3: request operation flush on device %d\n", dev->minor); +// break; +// case REQ_OP_WRITE: +// printk(KERN_DEBUG "dnbd3: request operation write on device %d\n", dev->minor); +// break; + case REQ_OP_READ: + printk(KERN_DEBUG "dnbd3: request operation read\n"); + dnbd3_request.cmd = CMD_GET_BLOCK; + dnbd3_request.offset = blk_rq_pos(req) << 9; // *512 + dnbd3_request.size = blk_rq_bytes(req); // bytes left to complete entire request + break; + case REQ_OP_DRV_IN: + printk(KERN_DEBUG "dnbd3: request operation driver in\n"); + dnbd3_request.cmd = dnbd3_priv_to_cmd(req); + dnbd3_request.size = 0; + break; + + default: + return -EIO; + } + + dnbd3_request.handle = (uint64_t)(uintptr_t)req; // Double cast to prevent warning on 32bit + fixup_request(dnbd3_request); + iov.iov_base = &dnbd3_request; + iov.iov_len = sizeof(dnbd3_request); + if (kernel_sendmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_request)) != sizeof(dnbd3_request)) { + printk(KERN_ERR "dnbd3: connection to server lost\n"); + result = -EIO; + goto error; + } + + // receive net reply + iov.iov_base = &dnbd3_reply; + iov.iov_len = sizeof(dnbd3_reply); + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags); + if (!result) { + printk(KERN_ERR "dnbd3: connection to server lost\n"); + result = -EIO; + goto error; + + } + fixup_reply(dnbd3_reply); + + // check error + if (dnbd3_reply.magic != dnbd3_packet_magic) { + printk(KERN_ERR "dnbd3: wrong magic packet\n"); + result = -EIO; + goto error; + } + + if (dnbd3_reply.cmd == 0) { + printk(KERN_ERR "dnbd3: command was 0\n"); + result = -EIO; + goto error; + } + + + switch (dnbd3_reply.cmd) { + case CMD_GET_BLOCK: + rq_for_each_segment(bvec_inst, req, iter) { + siginitsetinv(&blocked, sigmask(SIGKILL)); + sigprocmask(SIG_SETMASK, &blocked, &oldset); + + kaddr = kmap(bvec->bv_page) + bvec->bv_offset; + iov.iov_base = kaddr; + iov.iov_len = bvec->bv_len; + if (kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, msg.msg_flags) != bvec->bv_len) { + kunmap(bvec->bv_page); + sigprocmask(SIG_SETMASK, &oldset, NULL ); + printk(KERN_ERR "dnbd3: could not receive form net to block layer\n"); + goto error; + } + kunmap(bvec->bv_page); + + sigprocmask(SIG_SETMASK, &oldset, NULL ); + } + blk_mq_end_request(req, 0); + break; + case CMD_GET_SERVERS: + printk(KERN_DEBUG "dnbd3: get servers received\n"); + mutex_lock(&dev->device_lock); + if (!dev->use_server_provided_alts) { + remaining = dnbd3_reply.size; + goto consume_payload; + } + dev->new_servers_num = 0; + count = MIN(NUMBER_SERVERS, dnbd3_reply.size / sizeof(dnbd3_server_entry_t)); + + if (count != 0) { + iov.iov_base = dev->new_servers; + iov.iov_len = count * sizeof(dnbd3_server_entry_t); + if (kernel_recvmsg(sock->sock, &msg, &iov, 1, (count * sizeof(dnbd3_server_entry_t)), msg.msg_flags) != (count * sizeof(dnbd3_server_entry_t))) { + printk(KERN_ERR "dnbd3: failed to get servers\n"); + mutex_unlock(&dev->device_lock); + goto error; + } + dev->new_servers_num = count; + } + // If there were more servers than accepted, remove the remaining data from the socket buffer + remaining = dnbd3_reply.size - (count * sizeof(dnbd3_server_entry_t)); +consume_payload: + while (remaining > 0) { + count = MIN(sizeof(dnbd3_reply), remaining); // Abuse the reply struct as the receive buffer + iov.iov_base = &dnbd3_reply; + iov.iov_len = count; + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); + if (result <= 0) { + printk(KERN_ERR "dnbd3: failed to receive payload from get servers\n"); + mutex_unlock(&dev->device_lock); + goto error; + } + result = 0; + } + mutex_unlock(&dev->device_lock); + break; + case CMD_LATEST_RID: + if (dnbd3_reply.size != 2) { + printk(KERN_ERR "dnbd3: failed to get latest rid, wrong size\n"); + goto error; + } + printk(KERN_DEBUG "dnbd3: latest rid received\n"); + iov.iov_base = &rid; + iov.iov_len = sizeof(rid); + if (kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags) <= 0) { + printk(KERN_ERR "dnbd3: failed to get latest rid\n"); + goto error; + } + rid = net_order_16(rid); + printk("Latest rid of %s is %d (currently using %d)\n", dev->imgname, (int)rid, (int)dev->rid); + dev->update_available = (rid > dev->rid ? 1 : 0); + break; + case CMD_KEEPALIVE: + if (dnbd3_reply.size != 0) { + printk(KERN_ERR "dnbd3: got keep alive packet with payload\n"); + goto error; + } + printk(KERN_DEBUG "dnbd3: keep alive received\n"); + break; + + default: + printk("ERROR: Unknown command (Receive)\n"); + break; + + } + sock->pending = NULL; +error: + return result; +} + + +void dnbd3_keepalive(struct timer_list *arg) +{ + struct dnbd3_sock *sock = container_of(arg, struct dnbd3_sock, keepalive_timer); + printk(KERN_DEBUG "dnbd3: schedule keepalive\n"); +// schedule_work(&sock->keepalive); + sock->keepalive_timer.expires = KEEPALIVE_TIMER; + add_timer(&sock->keepalive_timer); +} + +static void keepalive(struct work_struct *work) +{ + struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, keepalive); + struct request *req; + mutex_lock(&sock->lock); + req = kmalloc(sizeof(struct request), GFP_ATOMIC ); + // send keepalive + if (req) { + dnbd3_cmd_to_priv(req, CMD_KEEPALIVE); + dnbd3_send_request(NULL, sock, req); // we do not need the device for keepalive + kfree(req); + } else { + printk(KERN_WARNING "dnbd3: could not create keepalive request\n"); + } + ++sock->heartbeat_count; + mutex_unlock(&sock->lock); +} + +void dnbd3_discovery(struct timer_list *arg) +{ + struct dnbd3_device *dev = container_of(arg, struct dnbd3_device, discovery_timer); + printk(KERN_DEBUG "dnbd3: schedule discovery\n"); +// schedule_work(&dev->discovery); + dev->discovery_timer.expires = DISCOVERY_TIMER; + add_timer(&dev->discovery_timer); +} + +static void discovery(struct work_struct *work) +{ + struct dnbd3_device *dev = container_of(work, struct dnbd3_device, discovery); + dnbd3_sock *sock = &dev->socks[0]; // we use the first sock for discovery + struct request *req; + mutex_lock(&sock->lock); + req = kmalloc(sizeof(struct request), GFP_ATOMIC ); + // send keepalive + if (req) { + dnbd3_cmd_to_priv(req, CMD_GET_SERVERS); + dnbd3_send_request(NULL, sock, req); // we do not need the device for keepalive + kfree(req); + } else { + printk(KERN_WARNING "dnbd3: could not create get servers request\n"); + } + mutex_unlock(&sock->lock); +} + + +static int dnbd3_socket_connect(dnbd3_device *dev, dnbd3_sock *sock) +{ + int result = -EIO; + struct request *req1 = NULL; + struct timeval timeout; + struct dnbd3_server *server = sock->server; + dnbd3_request_t dnbd3_request; + dnbd3_reply_t dnbd3_reply; + struct msghdr msg; + struct kvec iov[2]; + uint16_t rid; + uint64_t reported_size; + char *name; + int mlen; + serialized_buffer_t payload_buffer; + + printk(KERN_DEBUG "dnbd3: socket connect device %i\n", dev->minor); + + mutex_init(&sock->lock); + mutex_lock(&sock->lock); + if (sock->pending) { + printk(KERN_DEBUG "dnbd3: socket still in request\n"); + while (sock->pending) + schedule(); + } + if (server->host.port == 0 || server->host.type == 0) { + printk(KERN_ERR "dnbd3: host or port not set\n"); + goto error; + } + if (sock->sock) { + printk(KERN_WARNING "dnbd3: socket already connected\n"); + goto error; + } + + timeout.tv_sec = SOCKET_TIMEOUT_CLIENT_DATA; + timeout.tv_usec = 0; + + req1 = kmalloc(sizeof(*req1), GFP_ATOMIC ); + if (!req1) { + printk(KERN_ERR "dnbd3: kmalloc failed\n"); + goto error; + } + + init_msghdr(msg); + + if (dnbd3_sock_create(server->host.type, SOCK_STREAM, IPPROTO_TCP, &sock->sock) < 0) { + printk(KERN_ERR "dnbd3: could not create socket\n"); + goto error; + } + + kernel_setsockopt(sock->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout)); + kernel_setsockopt(sock->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout)); + sock->sock->sk->sk_allocation = GFP_NOIO; + if (server->host.type == HOST_IP4) { + struct sockaddr_in sin; + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + memcpy(&(sin.sin_addr), server->host.addr, 4); + sin.sin_port = server->host.port; + if (kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0) != 0) { + printk(KERN_ERR "dnbd3: connection to host failed (ipv4)\n"); + goto error; + } + } else { + struct sockaddr_in6 sin; + memset(&sin, 0, sizeof(sin)); + sin.sin6_family = AF_INET6; + memcpy(&(sin.sin6_addr), server->host.addr, 16); + sin.sin6_port = server->host.port; + if (kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0) != 0){ + printk(KERN_ERR "dnbd3: connection to host failed (ipv6)\n"); + goto error; + } + } + // Request filesize + dnbd3_request.magic = dnbd3_packet_magic; + dnbd3_request.cmd = CMD_SELECT_IMAGE; + iov[0].iov_base = &dnbd3_request; + iov[0].iov_len = sizeof(dnbd3_request); + serializer_reset_write(&payload_buffer); + serializer_put_uint16(&payload_buffer, PROTOCOL_VERSION); + serializer_put_string(&payload_buffer, dev->imgname); + serializer_put_uint16(&payload_buffer, dev->rid); + serializer_put_uint8(&payload_buffer, 0); // is_server = false + iov[1].iov_base = &payload_buffer; + dnbd3_request.size = iov[1].iov_len = serializer_get_written_length(&payload_buffer); + fixup_request(dnbd3_request); + mlen = sizeof(dnbd3_request) + iov[1].iov_len; + if (kernel_sendmsg(sock->sock, &msg, iov, 2, mlen) != mlen) { + printk(KERN_ERR "dnbd3: could not send CMD_SIZE_REQUEST\n"); + goto error; + } + // receive reply header + iov[0].iov_base = &dnbd3_reply; + iov[0].iov_len = sizeof(dnbd3_reply); + if (kernel_recvmsg(sock->sock, &msg, iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply)) { + printk(KERN_ERR "dnbd3: received corrupted reply header after CMD_SIZE_REQUEST\n"); + goto error; + } + + // check reply header + fixup_reply(dnbd3_reply); + if (dnbd3_reply.cmd != CMD_SELECT_IMAGE || + dnbd3_reply.size < 3 || + dnbd3_reply.size > MAX_PAYLOAD || + dnbd3_reply.magic != dnbd3_packet_magic) { + printk(KERN_ERR "dnbd3: received invalid reply to CMD_SIZE_REQUEST image does not exist on server\n"); + goto error; + } + + // receive reply payload + iov[0].iov_base = &payload_buffer; + iov[0].iov_len = dnbd3_reply.size; + if (kernel_recvmsg(sock->sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != dnbd3_reply.size) { + printk(KERN_ERR "dnbd3: could not read CMD_SELECT_IMAGE payload on handshake\n"); + goto error; + } + + // handle/check reply payload + serializer_reset_read(&payload_buffer, dnbd3_reply.size); + server->protocol_version = serializer_get_uint16(&payload_buffer); + if (server->protocol_version < MIN_SUPPORTED_SERVER) { + printk(KERN_ERR "dnbd3: server version is lower than min supported version\n"); + goto error; + } + + name = serializer_get_string(&payload_buffer); + rid = serializer_get_uint16(&payload_buffer); + if (dev->rid != rid && strcmp(name, dev->imgname) != 0) { + printk(KERN_ERR "dnbd3: server offers image '%s', requested '%s'\n", name, dev->imgname); + goto error; + } + + reported_size = serializer_get_uint64(&payload_buffer); + if (dev->reported_size == NULL) { + if (reported_size < 4096) { + printk(KERN_ERR "dnbd3: reported size by server is < 4096\n"); + goto error; + } + dev->reported_size = reported_size; + set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte blocks */ + } else if (dev->reported_size != reported_size) { + printk(KERN_ERR "dnbd3: reported size by server is %llu but should be %llu\n", reported_size, dev->reported_size); + } + + printk(KERN_DEBUG "dnbd3: connected to image %s, filesize %llu\n", dev->imgname, dev->reported_size); + +// TODO add heartbeat + // add heartbeat timer and scheduler for the command + INIT_WORK(&sock->keepalive, keepalive); + sock->heartbeat_count = 0; + timer_setup(&sock->keepalive_timer, dnbd3_keepalive, 0); + sock->keepalive_timer.expires = KEEPALIVE_TIMER; + add_timer(&sock->keepalive_timer); + + mutex_unlock(&sock->lock); + + return 0; +error: + if (sock->sock) { + sock_release(sock->sock); + sock->sock = NULL; + } + if (req1) { + kfree(req1); + } + mutex_unlock(&sock->lock); + return result; +} + +static int dnbd3_socket_disconnect(dnbd3_device *dev, dnbd3_sock *sock) +{ + printk(KERN_DEBUG "dnbd3: socket disconnect device %i\n", dev->minor); + mutex_lock(&sock->lock); + + // clear heartbeat timer + del_timer_sync(&sock->keepalive_timer); +// destroy_workqueue(&sock->keepalive); + + if (sock->sock) { + kernel_sock_shutdown(sock->sock, SHUT_RDWR); + } + + // clear socket + if (sock->sock) { + sock_release(sock->sock); + sock->sock = NULL; + } + + mutex_unlock(&sock->lock); + mutex_destroy(&sock->lock); + return 0; +} + +int dnbd3_net_disconnect(struct dnbd3_device *dev) +{ + int i; + int result; + del_timer_sync(&dev->discovery_timer); +// destroy_workqueue(&dev->discovery); + for (i = 0; i < NUMBER_CONNECTIONS; i++) { + if (dev->socks[i].sock) { + if (dnbd3_socket_disconnect(dev, &dev->socks[i])) { + result = -EIO; + } + } + } + return result; +} + + +int dnbd3_net_connect(struct dnbd3_device *dev) { + // TODO decide which socket to connect + int result; + dev->socks[0].server = &dev->initial_server; + if (dnbd3_socket_connect(dev, &dev->socks[0]) == 0) { + printServerList(dev); + + INIT_WORK(&dev->discovery, discovery); + timer_setup(&dev->discovery_timer, dnbd3_discovery, 0); + dev->discovery_timer.expires = DISCOVERY_TIMER; + add_timer(&dev->discovery_timer); + + result = 0; + } else { + printk(KERN_ERR "dnbd3: failed to connect to initial server\n"); + result = -ENOENT; + dev->imgname = NULL; + dev->socks[0].server = NULL; + } + return result; +} + + + diff --git a/src/kernel/net.h b/src/kernel/net.h new file mode 100644 index 0000000..6cd0ffa --- /dev/null +++ b/src/kernel/net.h @@ -0,0 +1,33 @@ +/* + * This file is part of the Distributed Network Block Device 3 + * + * Copyright(c) 2019 Frederic Robra + * Parts copyright 2011-2012 Johann Latocha + * + * This file may be licensed under the terms of of the + * GNU General Public License Version 2 (the ``GPL''). + * + * Software distributed under the License is distributed + * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See the GPL for the specific language + * governing rights and limitations. + * + * You should have received a copy of the GPL along with this + * program. If not, go to http://www.gnu.org/licenses/gpl.html + * or write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + + +#ifndef NET_H_ +#define NET_H_ + + +int dnbd3_send_request(struct dnbd3_device *dev, struct dnbd3_sock *sock, struct request *req); + + +int dnbd3_net_connect(struct dnbd3_device *dev); +int dnbd3_net_disconnect(struct dnbd3_device *dev); + +#endif /* NET_H_ */ diff --git a/src/kernel/sysfs.c b/src/kernel/sysfs.c index 118cbca..b5e180b 100644 --- a/src/kernel/sysfs.c +++ b/src/kernel/sysfs.c @@ -18,9 +18,10 @@ * */ +#include "sysfs.h" + #include -#include "sysfs.h" #include "utils.h" #ifndef MIN @@ -42,7 +43,7 @@ // return MIN(snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)dev->cur_rtt), PAGE_SIZE); //} -ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev) +ssize_t show_alt_server_num(char *buf, dnbd3_device *dev) { int i, num = 0; for (i = 0; i < NUMBER_SERVERS; ++i) @@ -52,7 +53,7 @@ ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev) return MIN(snprintf(buf, PAGE_SIZE, "%d\n", num), PAGE_SIZE); } -ssize_t show_alt_servers(char *buf, dnbd3_device_t *dev) +ssize_t show_alt_servers(char *buf, dnbd3_device *dev) { int i, size = PAGE_SIZE, ret; for (i = 0; i < NUMBER_SERVERS; ++i) @@ -84,18 +85,18 @@ ssize_t show_alt_servers(char *buf, dnbd3_device_t *dev) return PAGE_SIZE - size; } -ssize_t show_image_name(char *buf, dnbd3_device_t *dev) +ssize_t show_image_name(char *buf, dnbd3_device *dev) { if (dev->imgname == NULL) return sprintf(buf, "(null)"); return MIN(snprintf(buf, PAGE_SIZE, "%s\n", dev->imgname), PAGE_SIZE); } -ssize_t show_rid(char *buf, dnbd3_device_t *dev) +ssize_t show_rid(char *buf, dnbd3_device *dev) { return MIN(snprintf(buf, PAGE_SIZE, "%d\n", dev->rid), PAGE_SIZE); } -ssize_t show_update_available(char *buf, dnbd3_device_t *dev) +ssize_t show_update_available(char *buf, dnbd3_device *dev) { return MIN(snprintf(buf, PAGE_SIZE, "%d\n", dev->update_available), PAGE_SIZE); } @@ -152,7 +153,7 @@ device_attr_t update_available = ssize_t device_show(struct kobject *kobj, struct attribute *attr, char *buf) { device_attr_t *device_attr = container_of(attr, device_attr_t, attr); - dnbd3_device_t *dev = container_of(kobj, dnbd3_device_t, kobj); + dnbd3_device *dev = container_of(kobj, dnbd3_device, kobj); return device_attr->show(buf, dev); } @@ -187,7 +188,7 @@ struct kobj_type device_ktype = }; -void dnbd3_sysfs_init(dnbd3_device_t *dev) +void dnbd3_sysfs_init(dnbd3_device *dev) { int error; struct kobject *kobj = &dev->kobj; @@ -199,7 +200,7 @@ void dnbd3_sysfs_init(dnbd3_device_t *dev) printk("Error initializing dnbd3 device!\n"); } -void dnbd3_sysfs_exit(dnbd3_device_t *dev) +void dnbd3_sysfs_exit(dnbd3_device *dev) { kobject_put(&dev->kobj); } diff --git a/src/kernel/sysfs.h b/src/kernel/sysfs.h index 0a747a5..b48acc3 100644 --- a/src/kernel/sysfs.h +++ b/src/kernel/sysfs.h @@ -23,22 +23,22 @@ #include "dnbd3.h" -void dnbd3_sysfs_init(dnbd3_device_t *dev); +void dnbd3_sysfs_init(dnbd3_device *dev); -void dnbd3_sysfs_exit(dnbd3_device_t *dev); +void dnbd3_sysfs_exit(dnbd3_device *dev); typedef struct { struct attribute attr; - ssize_t (*show)(char *, dnbd3_device_t *); - ssize_t (*store)(const char *, size_t, dnbd3_device_t *); + ssize_t (*show)(char *, dnbd3_device *); + ssize_t (*store)(const char *, size_t, dnbd3_device *); } device_attr_t; typedef struct { struct attribute attr; - ssize_t (*show)(char *, dnbd3_server_t *); - ssize_t (*store)(const char *, size_t, dnbd3_server_t *); + ssize_t (*show)(char *, dnbd3_server *); + ssize_t (*store)(const char *, size_t, dnbd3_server *); } server_attr_t; diff --git a/src/kernel/utils.c b/src/kernel/utils.c index 902025f..957d69b 100644 --- a/src/kernel/utils.c +++ b/src/kernel/utils.c @@ -18,9 +18,10 @@ * */ +#include "utils.h" + #include -#include "utils.h" unsigned int inet_addr(char *str) { -- cgit v1.2.3-55-g7522