From de01183aa40dbbd274e18f681d8a255a886f493e Mon Sep 17 00:00:00 2001 From: sr Date: Mon, 27 Aug 2012 21:02:49 +0200 Subject: [KERNEL] Refactor and extend sysfs (add data the server will need in proxy mode) [SERVER] Use MSG_MORE instead of cork/uncork to save two syscalls [KERNEL] Fail-Counter for alt servers, ignore servers that fail too often [KERNEL] Add new alt servers to list, instead of replacing the old list [*] Add CMD_LATEST_RID to tell client about new revisions --- src/client/client.c | 8 +-- src/kernel/blk.c | 13 ++-- src/kernel/dnbd3.h | 4 +- src/kernel/net.c | 181 +++++++++++++++++++++++++++++++++++----------------- src/kernel/sysfs.c | 137 ++++++++++++++++++--------------------- src/server/ipc.c | 8 ++- src/server/net.c | 38 +++++------ src/server/server.c | 19 +++++- src/server/server.h | 5 ++ src/server/utils.c | 4 +- src/types.h | 9 +-- 11 files changed, 250 insertions(+), 176 deletions(-) (limited to 'src') diff --git a/src/client/client.c b/src/client/client.c index 57762cc..93aaeb7 100644 --- a/src/client/client.c +++ b/src/client/client.c @@ -170,7 +170,7 @@ int main(int argc, char *argv[]) const int ret = ioctl(fd, IOCTL_CLOSE, &msg); if (ret < 0) { - printf("ERROR: ioctl not successful (close, %s (%d))\n", strerror(ret), ret); + printf("ERROR: ioctl not successful (close, %s (%d))\n", strerror(-ret), ret); exit(EXIT_FAILURE); } @@ -187,7 +187,7 @@ int main(int argc, char *argv[]) const int ret = ioctl(fd, IOCTL_SWITCH, &msg); if (ret < 0) { - printf("ERROR: ioctl not successful (switch, %s (%d))\n", strerror(ret), ret); + printf("ERROR: ioctl not successful (switch, %s (%d))\n", strerror(-ret), ret); exit(EXIT_FAILURE); } @@ -205,7 +205,7 @@ int main(int argc, char *argv[]) const int ret = ioctl(fd, IOCTL_OPEN, &msg); if (ret < 0) { - printf("ERROR: ioctl not successful (connect, %s (%d))\n", strerror(ret), ret); + printf("ERROR: ioctl not successful (connect, %s (%d))\n", strerror(-ret), ret); exit(EXIT_FAILURE); } @@ -242,7 +242,7 @@ int main(int argc, char *argv[]) const int ret = ioctl(fd, IOCTL_OPEN, &msg); if (ret < 0) { - printf("ERROR: ioctl not successful (config file, %s (%d))\n", strerror(ret), ret); + printf("ERROR: ioctl not successful (config file, %s (%d))\n", strerror(-ret), ret); exit(EXIT_FAILURE); } diff --git a/src/kernel/blk.c b/src/kernel/blk.c index 70e8419..21a14f0 100644 --- a/src/kernel/blk.c +++ b/src/kernel/blk.c @@ -35,14 +35,14 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) INIT_LIST_HEAD(&dev->request_queue_send); INIT_LIST_HEAD(&dev->request_queue_receive); - memset(&dev->cur_server, 0, sizeof(dnbd3_server_t)); + memset(&dev->cur_server, 0, sizeof(dev->cur_server)); + memset(&dev->initial_server, 0, sizeof(dev->initial_server)); dev->better_sock = NULL; dev->imgname = NULL; dev->rid = 0; dev->update_available = 0; - dev->alt_servers_num = 0; - memset(dev->alt_servers, 0, sizeof(dnbd3_server_t)*NUMBER_SERVERS); + memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS); dev->thread_send = NULL; dev->thread_receive = NULL; dev->thread_discover = NULL; @@ -103,16 +103,16 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u dnbd3_device_t *dev = bdev->bd_disk->private_data; struct request_queue *blk_queue = dev->disk->queue; char *imgname = NULL; - dnbd3_ioctl_t *msg = kmalloc(sizeof(dnbd3_ioctl_t), GFP_KERNEL); + dnbd3_ioctl_t *msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (msg == NULL) return -ENOMEM; copy_from_user((char *)msg, (char *)arg, 2); - if (msg->len != sizeof(dnbd3_ioctl_t)) + if (msg->len != sizeof(*msg)) { result = -ENOEXEC; goto cleanup_return; } - copy_from_user((char *)msg, (char *)arg, sizeof(dnbd3_ioctl_t)); + copy_from_user((char *)msg, (char *)arg, sizeof(*msg)); if (msg->imgname != NULL && msg->imgnamelen > 0) { imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL); @@ -142,6 +142,7 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u memcpy(dev->cur_server.hostaddr, msg->addr, 16); dev->cur_server.port = msg->port; dev->cur_server.hostaddrtype = msg->addrtype; + dev->cur_server.failures = 0; memcpy(&dev->initial_server, &dev->cur_server, sizeof(dev->initial_server)); dev->imgname = imgname; dev->rid = msg->rid; diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h index 51253a1..7c550e3 100644 --- a/src/kernel/dnbd3.h +++ b/src/kernel/dnbd3.h @@ -40,8 +40,7 @@ typedef struct uint16_t protocol_version; // dnbd3 protocol version of this server uint8_t hostaddr[16]; // Address in network representation (IPv4 or IPv6) uint8_t hostaddrtype; // Address type (AF_INET or AF_INET6) - uint8_t skip_count; // Do not check this server the next skip_count times - struct kobject kobj; // SysFS + uint8_t failures; // How many times the server was unreachable } dnbd3_server_t; typedef struct @@ -60,7 +59,6 @@ typedef struct char *imgname; serialized_buffer_t payload_buffer; int rid, update_available; - int alt_servers_num; // number of currently known alt servers dnbd3_server_t alt_servers[NUMBER_SERVERS]; // array of alt servers int new_servers_num; // number of new alt servers that are waiting to be copied to above array dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers diff --git a/src/kernel/net.c b/src/kernel/net.c index 6a3b02e..91ea7cd 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -36,6 +36,39 @@ static inline int is_same_server(const dnbd3_server_t * const a, const dnbd3_ser && (0 == memcmp(a->hostaddr, b->hostaddr, (a->hostaddrtype == AF_INET ? 4 : 16))); } +static inline dnbd3_server_t* get_existing_server(const dnbd3_server_entry_t * const newserver, dnbd3_device_t * const dev) +{ + int i; + for (i = 0; i < NUMBER_SERVERS; ++i) + { + if ((newserver->hostaddrtype == dev->alt_servers[i].hostaddrtype) + && (newserver->port == dev->alt_servers[i].port) + && (0 == memcmp(newserver->hostaddr, dev->alt_servers[i].hostaddr, (newserver->hostaddrtype == AF_INET ? 4 : 16)))) + { + return &dev->alt_servers[i]; + break; + } + } + return NULL; +} + +static inline dnbd3_server_t* get_free_alt_server(dnbd3_device_t * const dev) +{ + int i; + for (i = 0; i < NUMBER_SERVERS; ++i) + { + if (dev->alt_servers[i].hostaddrtype == 0) + return &dev->alt_servers[i]; + } + for (i = 0; i < NUMBER_SERVERS; ++i) + { + if (dev->alt_servers[i].failures > 10) + return &dev->alt_servers[i]; + } + return NULL; +} + + int dnbd3_net_connect(dnbd3_device_t *dev) { struct sockaddr_in sin; @@ -195,8 +228,9 @@ int dnbd3_net_connect(dnbd3_device_t *dev) if (req1) // This connection is established to the initial server (from the ioctl call) { - // Set number of known alt servers to 0 - dev->alt_servers_num = 0; + // Forget all known alt servers + memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS); + memcpy(dev->alt_servers, &dev->initial_server, sizeof(dev->alt_servers[0])); // And then enqueue request to request_queue_send for a fresh list of alt servers req1->cmd_type = REQ_TYPE_SPECIAL; req1->cmd_flags = CMD_GET_SERVERS; @@ -324,6 +358,7 @@ int dnbd3_net_discover(void *data) dnbd3_request_t dnbd3_request; dnbd3_reply_t dnbd3_reply; + dnbd3_server_t *alt_server; struct msghdr msg; struct kvec iov[2]; @@ -334,6 +369,7 @@ int dnbd3_net_discover(void *data) struct timeval start, end; unsigned long rtt, best_rtt = 0; + unsigned long irqflags; int i, best_server, current_server; int turn = 0; int ready = 0; @@ -367,39 +403,46 @@ int dnbd3_net_discover(void *data) dev->discover = 0; // Check if the list of alt servers needs to be updated and do so if neccessary - spin_lock_irq(&dev->blk_lock); if (dev->new_servers_num) { + spin_lock_irqsave(&dev->blk_lock, irqflags); for (i = 0; i < dev->new_servers_num; ++i) { - memcpy(dev->alt_servers[i].hostaddr, dev->new_servers[i].ipaddr, 16); - dev->alt_servers[i].hostaddrtype = dev->new_servers[i].addrtype; - dev->alt_servers[i].port = dev->new_servers[i].port; - dev->alt_servers[i].rtts[0] = dev->alt_servers[i].rtts[1] - = dev->alt_servers[i].rtts[2] = dev->alt_servers[i].rtts[3] + if (dev->new_servers[i].hostaddrtype != AF_INET) // Invalid entry.. (Add IPv6) + continue; + alt_server = get_existing_server(&dev->new_servers[i], dev); + if (alt_server != NULL) // Server already known, reset fail counter + { + alt_server->failures = 0; + continue; + } + alt_server = get_free_alt_server(dev); + if (alt_server == NULL) // All NUMBER_SERVERS slots are taken, ignore entry + continue; + // Add new server entry + memcpy(alt_server->hostaddr, dev->new_servers[i].hostaddr, 16); + alt_server->hostaddrtype = dev->new_servers[i].hostaddrtype; + alt_server->port = dev->new_servers[i].port; + alt_server->rtts[0] = alt_server->rtts[1] + = alt_server->rtts[2] = alt_server->rtts[3] = RTT_UNREACHABLE; - dev->alt_servers[i].protocol_version = 0; - dev->alt_servers[i].skip_count = 0; + alt_server->protocol_version = 0; + alt_server->failures = 0; } - dev->alt_servers_num = dev->new_servers_num; dev->new_servers_num = 0; + spin_unlock_irqrestore(&dev->blk_lock, irqflags); } - spin_unlock_irq(&dev->blk_lock); current_server = best_server = -1; best_rtt = 0xFFFFFFFul; - for (i=0; i < dev->alt_servers_num; ++i) + for (i=0; i < NUMBER_SERVERS; ++i) { - if (dev->alt_servers[i].hostaddrtype != AF_INET) // add IPv6.... + if (dev->alt_servers[i].hostaddrtype == 0) // Empty slot + continue; + if (!dev->panic && dev->alt_servers[i].failures > 50) // If not in panic mode, skip server if it failed too many times continue; - if (!dev->panic && dev->alt_servers[i].skip_count) // If not in panic mode, skip server if indicated - { - --dev->alt_servers[i].skip_count; - continue; - } - // Initialize socket and connect if (sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock) < 0) { @@ -569,9 +612,7 @@ int dnbd3_net_discover(void *data) rtt = ( dev->alt_servers[i].rtts[0] + dev->alt_servers[i].rtts[1] + dev->alt_servers[i].rtts[2] - + dev->alt_servers[i].rtts[3] ) >> 2; // ">> 2" == "/ 4", needed to prevent 64bit division on 32bit - printk("RTT: %luµs\n", rtt); - + + dev->alt_servers[i].rtts[3] ) / 4; if (best_rtt > rtt) { // This one is better, keep socket open in case we switch @@ -596,16 +637,17 @@ int dnbd3_net_discover(void *data) continue; - error: - sock_release(sock); - sock = NULL; - dev->alt_servers[i].rtts[turn] = RTT_UNREACHABLE; - if (is_same_server(&dev->cur_server, &dev->alt_servers[i])) - { - dev->cur_rtt = RTT_UNREACHABLE; - current_server = i; - } - continue; + error: + ++dev->alt_servers[i].failures; + sock_release(sock); + sock = NULL; + dev->alt_servers[i].rtts[turn] = RTT_UNREACHABLE; + if (is_same_server(&dev->cur_server, &dev->alt_servers[i])) + { + dev->cur_rtt = RTT_UNREACHABLE; + current_server = i; + } + continue; } if (dev->panic && ++dev->panic_count == 21) @@ -663,6 +705,8 @@ int dnbd3_net_send(void *data) struct msghdr msg; struct kvec iov; + unsigned long irqflags; + init_msghdr(msg); dnbd3_request.magic = dnbd3_packet_magic; @@ -678,14 +722,14 @@ int dnbd3_net_send(void *data) break; // extract block request - spin_lock_irq(&dev->blk_lock); // TODO: http://www.linuxjournal.com/article/5833 says spin_lock_irq should not be used in general, but article is 10 years old + spin_lock_irqsave(&dev->blk_lock, irqflags); if (list_empty(&dev->request_queue_send)) { - spin_unlock_irq(&dev->blk_lock); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); continue; } blk_request = list_entry(dev->request_queue_send.next, struct request, queuelist); - spin_unlock_irq(&dev->blk_lock); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); // what to do? switch (blk_request->cmd_type) @@ -695,25 +739,25 @@ int dnbd3_net_send(void *data) dnbd3_request.offset = blk_rq_pos(blk_request) << 9; // *512 dnbd3_request.size = blk_rq_bytes(blk_request); // bytes left to complete entire request // enqueue request to request_queue_receive - spin_lock_irq(&dev->blk_lock); + spin_lock_irqsave(&dev->blk_lock, irqflags); list_del_init(&blk_request->queuelist); list_add_tail(&blk_request->queuelist, &dev->request_queue_receive); - spin_unlock_irq(&dev->blk_lock); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); break; case REQ_TYPE_SPECIAL: dnbd3_request.cmd = blk_request->cmd_flags; dnbd3_request.size = 0; - spin_lock_irq(&dev->blk_lock); + spin_lock_irqsave(&dev->blk_lock, irqflags); list_del_init(&blk_request->queuelist); - spin_unlock_irq(&dev->blk_lock); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); break; default: printk("ERROR: Unknown command (send)\n"); - spin_lock_irq(&dev->blk_lock); + spin_lock_irqsave(&dev->blk_lock, irqflags); list_del_init(&blk_request->queuelist); - spin_unlock_irq(&dev->blk_lock); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); continue; } @@ -758,8 +802,9 @@ int dnbd3_net_receive(void *data) struct req_iterator iter; struct bio_vec *bvec; void *kaddr; - unsigned long flags; + unsigned long irqflags; sigset_t blocked, oldset; + uint16_t rid; int count, remaining, ret; @@ -811,7 +856,7 @@ int dnbd3_net_receive(void *data) case CMD_GET_BLOCK: // search for replied request in queue blk_request = NULL; - spin_lock_irq(&dev->blk_lock); + spin_lock_irqsave(&dev->blk_lock, irqflags); list_for_each_entry_safe(received_request, tmp_request, &dev->request_queue_receive, queuelist) { if ((uint64_t)(uintptr_t)received_request == dnbd3_reply.handle) // Double cast to prevent warning on 32bit @@ -820,7 +865,7 @@ int dnbd3_net_receive(void *data) break; } } - spin_unlock_irq(&dev->blk_lock); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); if (blk_request == NULL) { printk("ERROR: Received block data for unrequested handle (%llu: %llu).\n", @@ -846,10 +891,10 @@ int dnbd3_net_receive(void *data) sigprocmask(SIG_SETMASK, &oldset, NULL); } - spin_lock_irqsave(&dev->blk_lock, flags); + spin_lock_irqsave(&dev->blk_lock, irqflags); list_del_init(&blk_request->queuelist); __blk_end_request_all(blk_request, 0); - spin_unlock_irqrestore(&dev->blk_lock, flags); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); continue; case CMD_GET_SERVERS: @@ -858,9 +903,9 @@ int dnbd3_net_receive(void *data) remaining = dnbd3_reply.size; goto clear_remaining_payload; } - spin_lock_irq(&dev->blk_lock); + spin_lock_irqsave(&dev->blk_lock, irqflags); dev->new_servers_num = 0; - spin_unlock_irq(&dev->blk_lock); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); count = MIN(NUMBER_SERVERS, dnbd3_reply.size / sizeof(dnbd3_server_entry_t)); if (count != 0) @@ -874,16 +919,16 @@ int dnbd3_net_receive(void *data) } for (remaining = 0; remaining < count; ++remaining) { - if (dev->new_servers[remaining].addrtype == AF_INET) - printk("New Server: %pI4 : %d\n", dev->new_servers[remaining].ipaddr, (int)ntohs(dev->new_servers[remaining].port)); - else if (dev->new_servers[remaining].addrtype == AF_INET6) - printk("New Server: %pI6 : %d\n", dev->new_servers[remaining].ipaddr, (int)ntohs(dev->new_servers[remaining].port)); + if (dev->new_servers[remaining].hostaddrtype == AF_INET) + printk("New Server: %pI4 : %d\n", dev->new_servers[remaining].hostaddr, (int)ntohs(dev->new_servers[remaining].port)); + else if (dev->new_servers[remaining].hostaddrtype == AF_INET6) + printk("New Server: %pI6 : %d\n", dev->new_servers[remaining].hostaddr, (int)ntohs(dev->new_servers[remaining].port)); else - printk("New Server of unknown address type (%d)\n", (int)dev->new_servers[remaining].addrtype); + printk("New Server of unknown address type (%d)\n", (int)dev->new_servers[remaining].hostaddrtype); } - spin_lock_irq(&dev->blk_lock); + spin_lock_irqsave(&dev->blk_lock, irqflags); dev->new_servers_num = count; - spin_unlock_irq(&dev->blk_lock); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); // TODO: Re-Add update check } // If there were more servers than accepted, remove the remaining data from the socket buffer @@ -904,6 +949,26 @@ clear_remaining_payload: } continue; + case CMD_LATEST_RID: + if (dnbd3_reply.size != 2) + { + printk("Error: CMD_LATEST_RID.size != 2.\n"); + continue; + } + iov.iov_base = &rid; + iov.iov_len = sizeof(rid); + if (kernel_recvmsg(dev->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags) <= 0) + { + printk("Error: Could not receive CMD_LATEST_RID payload.\n"); + } + else + { + rid = net_order_16(rid); + printk("Latest rid of %s is %d (currently using %d)\n", dev->imgname, (int)rid, (int)dev->rid); + dev->update_available = (rid > dev->rid ? 1 : 0); + } + continue; + case CMD_KEEPALIVE: if (dnbd3_reply.size != 0) printk("Error: keep alive packet with payload.\n"); @@ -925,13 +990,13 @@ error: while (!list_empty(&dev->request_queue_receive)) { printk("WARN: Request queue was not empty on %s\n", dev->disk->disk_name); - spin_lock_irq(&dev->blk_lock); + spin_lock_irqsave(&dev->blk_lock, irqflags); list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist) { list_del_init(&blk_request->queuelist); list_add(&blk_request->queuelist, &dev->request_queue_send); } - spin_unlock_irq(&dev->blk_lock); + spin_unlock_irqrestore(&dev->blk_lock, irqflags); } if (dev->sock) kernel_sock_shutdown(dev->sock, SHUT_RDWR); diff --git a/src/kernel/sysfs.c b/src/kernel/sysfs.c index df94d20..ff3b5f9 100644 --- a/src/kernel/sysfs.c +++ b/src/kernel/sysfs.c @@ -23,51 +23,84 @@ #include "sysfs.h" #include "utils.h" -ssize_t show_cur_server_ip(char *buf, dnbd3_device_t *dev) +#ifndef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +ssize_t show_cur_server_addr(char *buf, dnbd3_device_t *dev) { - return sprintf(buf, "%pI4\n", dev->cur_server.hostaddr); + if (dev->cur_server.hostaddrtype == AF_INET) + return MIN(snprintf(buf, PAGE_SIZE, "%pI4,%d\n", dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port)), PAGE_SIZE); + return MIN(snprintf(buf, PAGE_SIZE, "%pI6,%d\n", dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port)), PAGE_SIZE); } ssize_t show_cur_server_rtt(char *buf, dnbd3_device_t *dev) { - return sprintf(buf, "%llu\n", (unsigned long long)dev->cur_rtt); + return MIN(snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)dev->cur_rtt), PAGE_SIZE); } ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev) { - return sprintf(buf, "%d\n", dev->alt_servers_num); + int i, num = 0; + for (i = 0; i < NUMBER_SERVERS; ++i) + { + if (dev->alt_servers[i].hostaddrtype) ++num; + } + return MIN(snprintf(buf, PAGE_SIZE, "%d\n", num), PAGE_SIZE); +} + +ssize_t show_alt_servers(char *buf, dnbd3_device_t *dev) +{ + int i, size = PAGE_SIZE, ret; + for (i = 0; i < NUMBER_SERVERS; ++i) + { + if (dev->alt_servers[i].hostaddrtype == AF_INET) + ret = MIN(snprintf(buf, size, "%pI4,%d,%llu,%d\n", + dev->alt_servers[i].hostaddr, + (int)ntohs(dev->alt_servers[i].port), + (unsigned long long)((dev->alt_servers[i].rtts[0] + dev->alt_servers[i].rtts[1] + dev->alt_servers[i].rtts[2] + dev->alt_servers[i].rtts[3]) / 4), + (int)dev->alt_servers[i].failures) + , size); + else if (dev->alt_servers[i].hostaddrtype == AF_INET6) + ret = MIN(snprintf(buf, size, "%pI6,%d,%llu,%d\n", + dev->alt_servers[i].hostaddr, + (int)ntohs(dev->alt_servers[i].port), + (unsigned long long)((dev->alt_servers[i].rtts[0] + dev->alt_servers[i].rtts[1] + dev->alt_servers[i].rtts[2] + dev->alt_servers[i].rtts[3]) / 4), + (int)dev->alt_servers[i].failures) + , size); + else + continue; + size -= ret; + buf += ret; + if (size <= 0) + { + size = 0; + break; + } + } + return PAGE_SIZE - size; } ssize_t show_image_name(char *buf, dnbd3_device_t *dev) { if (dev->imgname == NULL) return sprintf(buf, "(null)"); - return sprintf(buf, "%s\n", dev->imgname); + return MIN(snprintf(buf, PAGE_SIZE, "%s\n", dev->imgname), PAGE_SIZE); } ssize_t show_rid(char *buf, dnbd3_device_t *dev) { - return sprintf(buf, "%d\n", dev->rid); + return MIN(snprintf(buf, PAGE_SIZE, "%d\n", dev->rid), PAGE_SIZE); } ssize_t show_update_available(char *buf, dnbd3_device_t *dev) { - return sprintf(buf, "%d\n", dev->update_available); -} - -ssize_t show_alt_server_ip(char *buf, dnbd3_server_t *srv) -{ - return sprintf(buf, "%pI4\n", srv->hostaddr); -} - -ssize_t show_alt_server_rtt(char *buf, dnbd3_server_t *srv) -{ - return sprintf(buf, "%llu\n", (uint64_t)((srv->rtts[0]+srv->rtts[1]+srv->rtts[2]+srv->rtts[3]) / 4)); + return MIN(snprintf(buf, PAGE_SIZE, "%d\n", dev->update_available), PAGE_SIZE); } -device_attr_t cur_server_ip = +device_attr_t cur_server_addr = { - .attr = {.name = "cur_server_ip", .mode = 0444 }, - .show = show_cur_server_ip, + .attr = {.name = "cur_server_addr", .mode = 0444 }, + .show = show_cur_server_addr, .store = NULL, }; @@ -85,6 +118,13 @@ device_attr_t alt_server_num = .store = NULL, }; +device_attr_t alt_servers = +{ + .attr = {.name = "alt_servers", .mode = 0444 }, + .show = show_alt_servers, + .store = NULL, +}; + device_attr_t image_name = { .attr = {.name = "image_name", .mode = 0444 }, @@ -106,20 +146,6 @@ device_attr_t update_available = .store = NULL, }; -server_attr_t alt_server_ip = -{ - .attr = {.name = "alt_server_ip", .mode = 0444 }, - .show = show_alt_server_ip, - .store = NULL, -}; - -server_attr_t alt_server_rtt = -{ - .attr = {.name = "alt_server_rtt", .mode = 0444 }, - .show = show_alt_server_rtt, - .store = NULL, -}; - ssize_t device_show(struct kobject *kobj, struct attribute *attr, char *buf) { device_attr_t *device_attr = container_of(attr, device_attr_t, attr); @@ -127,41 +153,24 @@ ssize_t device_show(struct kobject *kobj, struct attribute *attr, char *buf) return device_attr->show(buf, dev); } -ssize_t server_show(struct kobject *kobj, struct attribute *attr, char *buf) -{ - server_attr_t *server_attr = container_of(attr, server_attr_t, attr); - dnbd3_server_t *srv = container_of(kobj, dnbd3_server_t, kobj); - return server_attr->show(buf, srv); -} - struct attribute *device_attrs[] = { - &cur_server_ip.attr, + &cur_server_addr.attr, &cur_server_rtt.attr, &alt_server_num.attr, + &alt_servers.attr, &image_name.attr, &rid.attr, &update_available.attr, NULL, }; -struct attribute *server_attrs[] = -{ - &alt_server_ip.attr, - &alt_server_rtt.attr, - NULL, -}; struct sysfs_ops device_ops = { .show = device_show, }; -struct sysfs_ops server_ops = -{ - .show = server_show, -}; - void release(struct kobject *kobj) { kobj->state_initialized = 0; @@ -174,39 +183,17 @@ struct kobj_type device_ktype = .release = release, }; -struct kobj_type server_ktype = -{ - .default_attrs = server_attrs, - .sysfs_ops = &server_ops, - .release = release, -}; void dnbd3_sysfs_init(dnbd3_device_t *dev) { - int i; - char name[15] = "alt_server99"; struct kobject *kobj = &dev->kobj; struct kobj_type *ktype = &device_ktype; struct kobject *parent = &disk_to_dev(dev->disk)->kobj; kobject_init_and_add(kobj, ktype, parent, "net"); - - for (i = 0; i < NUMBER_SERVERS; i++) - { - sprintf(name, "alt_server%d", i); - kobj = &dev->alt_servers[i].kobj; - ktype = &server_ktype; - parent = &dev->kobj; - kobject_init_and_add(kobj, ktype, parent, name); - } } void dnbd3_sysfs_exit(dnbd3_device_t *dev) { - int i; - for (i = 0; i < NUMBER_SERVERS; i++) - { - kobject_put(&dev->alt_servers[i].kobj); - } kobject_put(&dev->kobj); } diff --git a/src/server/ipc.c b/src/server/ipc.c index 5da811c..ece6f07 100644 --- a/src/server/ipc.c +++ b/src/server/ipc.c @@ -403,9 +403,10 @@ void dnbd3_ipc_send(int cmd) if (cmd == IPC_INFO && header.size > 0) { - char* buf = malloc(header.size); + char* buf = malloc(header.size+1); size = recv(client_sock, buf, header.size, MSG_WAITALL); xmlDocPtr doc = xmlReadMemory(buf, size, "noname.xml", NULL, 0); + buf[header.size] = 0; if (doc) { @@ -479,9 +480,10 @@ void dnbd3_ipc_send(int cmd) // xmlDocDump(stdout, doc); - } else + } + else { - printf("ERROR: Failed to parse reply\n"); + printf("ERROR: Failed to parse reply\n-----------\n%s\n-------------\n", buf); } } diff --git a/src/server/net.c b/src/server/net.c index c145beb..49cfb24 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -39,7 +39,7 @@ #include "../config.h" -static char recv_request_header(int sock, dnbd3_request_t *request) +static inline char recv_request_header(int sock, dnbd3_request_t *request) { int ret; // Read request header from socket @@ -62,10 +62,13 @@ static char recv_request_header(int sock, dnbd3_request_t *request) memlogf("[WARNING] Client tries to send a packet of type %d with %d bytes payload. Dropping client.", (int)request->cmd, (int)request->size); return 0; } +#ifdef _DEBUG + if (_fake_delay) usleep(_fake_delay); +#endif return 1; } -static char recv_request_payload(int sock, uint32_t size, serialized_buffer_t *payload) +static inline char recv_request_payload(int sock, uint32_t size, serialized_buffer_t *payload) { if (size == 0) { @@ -87,10 +90,11 @@ static char recv_request_payload(int sock, uint32_t size, serialized_buffer_t *p return 1; } -static char send_reply(int sock, dnbd3_reply_t *reply, void *payload) +static inline char send_reply(int sock, dnbd3_reply_t *reply, void *payload) { + const unsigned int size = reply->size; fixup_reply(*reply); - if (!payload || reply->size == 0) + if (!payload || size == 0) { if (send(sock, reply, sizeof(dnbd3_reply_t), MSG_WAITALL) != sizeof(dnbd3_reply_t)) { @@ -104,10 +108,10 @@ static char send_reply(int sock, dnbd3_reply_t *reply, void *payload) iov[0].iov_base = reply; iov[0].iov_len = sizeof(dnbd3_reply_t); iov[1].iov_base = payload; - iov[1].iov_len = reply->size; - if (writev(sock, iov, 2) != sizeof(dnbd3_reply_t) + reply->size) + iov[1].iov_len = size; + if (writev(sock, iov, 2) != sizeof(dnbd3_reply_t) + size) { - printf("[DEBUG] Send failed (reply with payload of %d bytes)\n", (int)reply->size); + printf("[DEBUG] Send failed (reply with payload of %u bytes)\n", size); return 0; } } @@ -120,9 +124,6 @@ void *dnbd3_handle_query(void *dnbd3_client) dnbd3_request_t request; dnbd3_reply_t reply; - const int cork = 1; - const int uncork = 0; - dnbd3_image_t *image = NULL; int image_file = -1, image_cache = -1; @@ -216,7 +217,6 @@ void *dnbd3_handle_query(void *dnbd3_client) if (image) while (recv_request_header(client->sock, &request)) { - switch (request.cmd) { @@ -246,12 +246,16 @@ void *dnbd3_handle_query(void *dnbd3_client) break; } - // TODO: Try MSG_MORE instead of cork+uncork if performance ever becomes an issue.. - setsockopt(client->sock, SOL_TCP, TCP_CORK, &cork, sizeof(cork)); reply.cmd = CMD_GET_BLOCK; reply.size = request.size; reply.handle = request.handle; - send_reply(client->sock, &reply, NULL); + + fixup_reply(reply); + if (send(client->sock, &reply, sizeof(dnbd3_reply_t), MSG_MORE) != sizeof(dnbd3_reply_t)) + { + printf("[DEBUG] Sending CMD_GET_BLOCK header failed\n"); + return 0; + } if (request.size == 0) // Request for 0 bytes, done after sending header break; @@ -264,8 +268,6 @@ void *dnbd3_handle_query(void *dnbd3_client) printf("[ERROR] sendfile failed (image to net)\n"); close(client->sock); } - - setsockopt(client->sock, SOL_TCP, TCP_CORK, &uncork, sizeof(uncork)); break; } @@ -343,8 +345,6 @@ void *dnbd3_handle_query(void *dnbd3_client) memlogf("[ERROR] sendfile failed (cache to net)\n"); close(client->sock); } - - setsockopt(client->sock, SOL_TCP, TCP_CORK, &uncork, sizeof(uncork)); break; @@ -353,7 +353,7 @@ void *dnbd3_handle_query(void *dnbd3_client) num = 0; for (i = 0; i < NUMBER_SERVERS; i++) { - if (image->servers[i].addrtype == 0 || image->servers[i].failures > 200) continue; + if (image->servers[i].hostaddrtype == 0 || image->servers[i].failures > 200) continue; memcpy(server_list + num++, image->servers + i, sizeof(dnbd3_server_entry_t)); } reply.cmd = CMD_GET_SERVERS; diff --git a/src/server/server.c b/src/server/server.c index b2a36a5..965c368 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -37,6 +37,9 @@ #include "memlog.h" int _sock; +#ifdef _DEBUG +int _fake_delay = 0; +#endif pthread_spinlock_t _spinlock; GSList *_dnbd3_clients = NULL; @@ -48,6 +51,9 @@ void dnbd3_print_help(char* argv_0) printf("Usage: %s [OPTIONS]...\n", argv_0); printf("Start the DNBD3 server\n"); printf("-f or --file \t\t Configuration file (default /etc/dnbd3-server.conf)\n"); +#ifdef _DEBUG + printf("-d or --delay \t\t Add a fake network delay of X µs\n"); +#endif printf("-n or --nodaemon \t Start server in foreground\n"); printf("-r or --reload \t\t Reload configuration file\n"); printf("-s or --stop \t\t Stop running dnbd3-server\n"); @@ -119,10 +125,11 @@ int main(int argc, char* argv[]) int demonize = 1; int opt = 0; int longIndex = 0; - static const char *optString = "f:nrsiHV?"; + static const char *optString = "f:d:nrsiHV?"; static const struct option longOpts[] = { { "file", required_argument, NULL, 'f' }, + { "delay", required_argument, NULL, 'd' }, { "nodaemon", no_argument, NULL, 'n' }, { "reload", no_argument, NULL, 'r' }, { "stop", no_argument, NULL, 's' }, @@ -137,8 +144,16 @@ int main(int argc, char* argv[]) switch (opt) { case 'f': - _config_file_name = optarg; + _config_file_name = strdup(optarg); break; + case 'd': +#ifdef _DEBUG + _fake_delay = atoi(optarg); + break; +#else + printf("This option is only available in debug builds.\n\n"); + return EXIT_FAILURE; +#endif case 'n': demonize = 0; break; diff --git a/src/server/server.h b/src/server/server.h index 69d597b..7e19f27 100644 --- a/src/server/server.h +++ b/src/server/server.h @@ -56,6 +56,11 @@ extern pthread_spinlock_t _spinlock; extern char *_config_file_name; extern GSList *_dnbd3_images; // of dnbd3_image_t + +#ifdef _DEBUG +extern int _fake_delay; +#endif + void dnbd3_cleanup(); #endif /* SERVER_H_ */ diff --git a/src/server/utils.c b/src/server/utils.c index 15e51f8..3fe1900 100644 --- a/src/server/utils.c +++ b/src/server/utils.c @@ -260,11 +260,11 @@ void dnbd3_load_config(char *file) gchar **servers = g_key_file_get_string_list(gkf, groups[i], "servers", &num_servers, NULL); if (servers) for (k = 0, j = 0; j < MIN(num_servers, NUMBER_SERVERS); ++j) { - if (parse_address(servers[j], &(image->servers[k].addrtype), image->servers[k].ipaddr, &(image->servers[k].port))) + if (parse_address(servers[j], &(image->servers[k].hostaddrtype), image->servers[k].hostaddr, &(image->servers[k].port))) { ++k; continue; } - image->servers[k].addrtype = 0; + image->servers[k].hostaddrtype = 0; } g_strfreev(servers); diff --git a/src/types.h b/src/types.h index 0227433..4f44f32 100644 --- a/src/types.h +++ b/src/types.h @@ -76,6 +76,7 @@ typedef struct #define CMD_GET_SERVERS 3 #define CMD_ERROR 4 #define CMD_KEEPALIVE 5 +#define CMD_LATEST_RID 6 #pragma pack(1) typedef struct @@ -101,10 +102,10 @@ typedef struct #pragma pack(1) typedef struct { - uint8_t ipaddr[16]; // 16byte (network representation, so it can be directly passed to socket functions) - uint16_t port; // 2byte (network representation, so it can be directly passed to socket functions) - uint8_t addrtype; // 1byte (ip version. AF_INET or AF_INET6. 0 means this struct is empty and should be ignored) - uint8_t failures; // 1byte (number of times server has been consecutively unreachable) + uint8_t hostaddr[16]; // 16byte (network representation, so it can be directly passed to socket functions) + uint16_t port; // 2byte (network representation, so it can be directly passed to socket functions) + uint8_t hostaddrtype; // 1byte (ip version. AF_INET or AF_INET6. 0 means this struct is empty and should be ignored) + uint8_t failures; // 1byte (number of times server has been consecutively unreachable) } dnbd3_server_entry_t; #pragma pack(0) -- cgit v1.2.3-55-g7522