diff options
-rw-r--r-- | CMakeLists.txt | 3 | ||||
-rw-r--r-- | Kbuild.in | 2 | ||||
-rw-r--r-- | server.conf.example | 28 | ||||
-rw-r--r-- | src/client/client.c | 64 | ||||
-rw-r--r-- | src/config.h | 27 | ||||
-rw-r--r-- | src/kernel/blk.c | 134 | ||||
-rw-r--r-- | src/kernel/blk.h | 8 | ||||
-rw-r--r-- | src/kernel/dnbd3.h | 31 | ||||
-rw-r--r-- | src/kernel/net.c | 740 | ||||
-rw-r--r-- | src/kernel/net.h | 13 | ||||
-rw-r--r-- | src/kernel/serialize_kmod.c | 4 | ||||
-rw-r--r-- | src/kernel/sysfs.c | 23 | ||||
-rw-r--r-- | src/serialize.c | 74 | ||||
-rw-r--r-- | src/serialize.h | 31 | ||||
-rw-r--r-- | src/server/ipc.c | 91 | ||||
-rw-r--r-- | src/server/net.c | 314 | ||||
-rw-r--r-- | src/server/serialize.c | 5 | ||||
-rw-r--r-- | src/server/server.c | 83 | ||||
-rw-r--r-- | src/server/server.h | 32 | ||||
-rw-r--r-- | src/server/utils.c | 420 | ||||
-rw-r--r-- | src/server/utils.h | 3 | ||||
-rw-r--r-- | src/types.h | 64 |
22 files changed, 1561 insertions, 633 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index fa2fd4b..1c44769 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,9 +47,10 @@ TARGET_LINK_LIBRARIES(dnbd3-server ${CMAKE_THREAD_LIBS_INIT} ${GLIB_LDFLAGS} ${L # MODULE # ################################################################################ + SET(MODULE_NAME dnbd3) SET(MODULE_FILE ${MODULE_NAME}.ko) -FILE(GLOB MODULE_SOURCE_FILES src/kernel/*.c) +FILE(GLOB MODULE_SOURCE_FILES src/kernel/*.c src/serialize.c) FILE(GLOB MODULE_HEADER_FILES src/kernel/*.h src/*.h) SET(KERNEL_DIR "/lib/modules/${CMAKE_SYSTEM_VERSION}/build") @@ -1,2 +1,2 @@ obj-m := ${MODULE_NAME}.o -${MODULE_NAME}-objs += core.o blk.o net.o sysfs.o utils.o
\ No newline at end of file +${MODULE_NAME}-objs += core.o blk.o net.o sysfs.o utils.o serialize_kmod.o
\ No newline at end of file diff --git a/server.conf.example b/server.conf.example index d04aea0..464a870 100644 --- a/server.conf.example +++ b/server.conf.example @@ -1,25 +1,9 @@ # This is a sample configuration file for dnbd3-server -[Ubuntu 10.04] -file=/home/jjl/Data/ISOs/Ubuntu/ubuntu-10.04.3-desktop-i386.iso -servers=132.230.4.29;132.230.4.220;132.230.8.96 -vid=1 -rid=3 - -[Ubuntu 10.10] -file=/home/jjl/Data/ISOs/Ubuntu/ubuntu-10.10-desktop-i386.iso -servers=132.230.4.29;132.230.4.220;132.230.8.96 -vid=2 -rid=1 +[settings] +default_namespace=uni-freiburg/rz/bunker -[Ubuntu 11.04] -file=/home/jjl/Data/ISOs/Ubuntu/ubuntu-11.04-desktop-i386.iso -servers=132.230.4.29;132.230.4.220;132.230.8.96 -vid=3 -rid=1 - -[Ubuntu 11.10] -file=/home/jjl/Data/ISOs/Ubuntu/ubuntu-11.10-desktop-i386.iso -servers=132.230.4.29;132.230.4.220;132.230.8.96 -vid=4 -rid=1 +[eclipse-cdt linux (tar.gz)] +file=/home/sr/Downloads/eclipse-cpp-juno-linux-gtk-x86_64.tar.gz +servers=127.0.0.20:1234;132.230.4.58;2000::dead:beef +rid=3 diff --git a/src/client/client.c b/src/client/client.c index 68ed70e..a48cab9 100644 --- a/src/client/client.c +++ b/src/client/client.c @@ -28,6 +28,7 @@ #include <glib.h> #include <netdb.h> #include <arpa/inet.h> +#include <string.h> #include "../types.h" #include "../version.h" @@ -37,11 +38,11 @@ char *_config_file_name = DEFAULT_CLIENT_CONFIG_FILE; void dnbd3_print_help(char* argv_0) { printf("\nUsage: %s\n" - "\t-h <host> -v <vid> [-r <rid>] -d <device> [-a <KB>] || -f <file> || -c <device>\n\n", argv_0); + "\t-h <host> -i <image name> [-r <rid>] -d <device> [-a <KB>] || -f <file> || -c <device>\n\n", argv_0); printf("Start the DNBD3 client.\n"); printf("-f or --file \t\t Configuration file (default /etc/dnbd3-client.conf)\n"); printf("-h or --host \t\t Host running dnbd3-server.\n"); - printf("-v or --vid \t\t Volume-ID of exported image.\n"); + printf("-i or --image \t\t Image name of exported image.\n"); printf("-r or --rid \t\t Release-ID of exported image (default 0, latest).\n"); printf("-d or --device \t\t DNBD3 device name.\n"); printf("-a or --ahead \t\t Read ahead in KByte (default %i).\n", DEFAULT_READ_AHEAD_KB); @@ -58,7 +59,7 @@ void dnbd3_print_version() exit(EXIT_SUCCESS); } -char* dnbd3_get_ip(char* hostname) +static void dnbd3_get_ip(char* hostname, uint8_t *target, uint8_t *addrtype) { struct hostent *host; @@ -68,7 +69,16 @@ char* dnbd3_get_ip(char* hostname) exit(EXIT_FAILURE); } - return inet_ntoa(*((struct in_addr *) host->h_addr)); + *addrtype = (uint8_t)host->h_addrtype; + if (host->h_addrtype == AF_INET) + memcpy(target, host->h_addr, 4); + else if (host->h_addrtype == AF_INET6) + memcpy(target, host->h_addr, 16); + else + { + printf("FATAL: Unknown address type: %d\n", host->h_addrtype); + exit(EXIT_FAILURE); + } } int main(int argc, char *argv[]) @@ -80,19 +90,20 @@ int main(int argc, char *argv[]) int switch_host = 0; dnbd3_ioctl_t msg; - msg.host = NULL; - msg.vid = 0; - msg.rid = 0; + memset(&msg, 0, sizeof(dnbd3_ioctl_t)); + msg.len = (uint16_t)sizeof(dnbd3_ioctl_t); msg.read_ahead_kb = DEFAULT_READ_AHEAD_KB; + msg.port = htons(PORT); + msg.addrtype = 0; int opt = 0; int longIndex = 0; - static const char *optString = "f:h:v:r:d:a:c:s:HV?"; + static const char *optString = "f:h:i:r:d:a:c:s:HV?"; static const struct option longOpts[] = { { "file", required_argument, NULL, 'f' }, { "host", required_argument, NULL, 'h' }, - { "vid", required_argument, NULL, 'v' }, + { "image", required_argument, NULL, 'i' }, { "rid", required_argument, NULL, 'r' }, { "device", required_argument, NULL, 'd' }, { "ahead", required_argument, NULL, 'a' }, @@ -108,29 +119,32 @@ int main(int argc, char *argv[]) switch (opt) { case 'f': - _config_file_name = optarg; + _config_file_name = strdup(optarg); break; case 'h': - msg.host = dnbd3_get_ip(optarg); + dnbd3_get_ip(optarg, msg.addr, &msg.addrtype); + printf("Host set to %s (type %d)\n", optarg, (int)msg.addrtype); break; - case 'v': - msg.vid = atoi(optarg); + case 'i': + msg.imgname = strdup(optarg); + printf("Image: %s\n", msg.imgname); break; case 'r': msg.rid = atoi(optarg); break; case 'd': - dev = optarg; + dev = strdup(optarg); + printf("Device is %s\n", dev); break; case 'a': msg.read_ahead_kb = atoi(optarg); break; case 'c': - dev = optarg; + dev = strdup(optarg); close_dev = 1; break; case 's': - msg.host = dnbd3_get_ip(optarg); + dnbd3_get_ip(optarg, msg.addr, &msg.addrtype); switch_host = 1; break; case 'H': @@ -141,12 +155,13 @@ int main(int argc, char *argv[]) break; case '?': dnbd3_print_help(argv[0]); + break; } opt = getopt_long(argc, argv, optString, longOpts, &longIndex); } // close device - if (close_dev && !msg.host && dev && (msg.vid == 0)) + if (close_dev && msg.addrtype == 0 && dev && (msg.imgname == NULL)) { fd = open(dev, O_WRONLY); printf("INFO: Closing device %s\n", dev); @@ -162,10 +177,10 @@ int main(int argc, char *argv[]) } // switch host - if (switch_host && msg.host && dev && (msg.vid == 0)) + if (switch_host && msg.addrtype != 0 && dev && (msg.imgname == NULL)) { fd = open(dev, O_WRONLY); - printf("INFO: Switching device %s to %s\n", dev, msg.host); + printf("INFO: Switching device %s to %s\n", dev, "<fixme>"); if (ioctl(fd, IOCTL_SWITCH, &msg) < 0) { @@ -178,10 +193,11 @@ int main(int argc, char *argv[]) } // connect - if (msg.host && dev && (msg.vid != 0)) + if (msg.addrtype != 0 && dev && (msg.imgname != NULL)) { + msg.imgnamelen = (uint16_t)strlen(msg.imgname); fd = open(dev, O_WRONLY); - printf("INFO: Connecting %s to %s vid:%i rid:%i\n", dev, msg.host, msg.vid, msg.rid); + printf("INFO: Connecting %s to %s (%s rid:%i)\n", dev, "<fixme>", msg.imgname, msg.rid); if (ioctl(fd, IOCTL_OPEN, &msg) < 0) { @@ -207,8 +223,8 @@ int main(int argc, char *argv[]) for (i = 0; i < j; i++) { - msg.host = g_key_file_get_string(gkf, groups[i], "server", NULL); - msg.vid = g_key_file_get_integer(gkf, groups[i], "vid", NULL); + dnbd3_get_ip(g_key_file_get_string(gkf, groups[i], "server", NULL), msg.addr, &msg.addrtype); + msg.imgname = g_key_file_get_string(gkf, groups[i], "name", NULL); msg.rid = g_key_file_get_integer(gkf, groups[i], "rid", NULL); dev = g_key_file_get_string(gkf, groups[i], "device", NULL); @@ -217,7 +233,7 @@ int main(int argc, char *argv[]) msg.read_ahead_kb = DEFAULT_READ_AHEAD_KB; fd = open(dev, O_WRONLY); - printf("INFO: Connecting %s to %s vid:%i rid:%i\n", dev, msg.host, msg.vid, msg.rid); + printf("INFO: Connecting %s to %s (%s rid:%i)\n", dev, "<fixme>", msg.imgname, msg.rid); if (ioctl(fd, IOCTL_OPEN, &msg) < 0) { diff --git a/src/config.h b/src/config.h index be3df5c..78c19c9 100644 --- a/src/config.h +++ b/src/config.h @@ -21,25 +21,42 @@ #ifndef CONFIG_H_ #define CONFIG_H_ -// network +// +++++ Network +++++ +// Default port #define PORT 5003 #define PORTSTR "5003" +// Protocol version should be increased whenever new features/messages are added, +// so either the client or server can run in compatibility mode, or they can +// cancel the connection right away if the protocol has changed too much +#define PROTOCOL_VERSION 1 +// Which is the minimum protocol version the server expects from the client +#define MIN_SUPPORTED_CLIENT 1 +// Which is the minimum protocol version the client expects from the server +#define MIN_SUPPORTED_SERVER 1 + +// No payload allowed exceeding this many bytes: +#define MAX_PAYLOAD 1000 + #define SOCKET_TIMEOUT_SERVER 30 #define SOCKET_TIMEOUT_CLIENT_DATA 2 #define SOCKET_TIMEOUT_CLIENT_DISCOVERY 1 -#define TIMER_INTERVAL_HEARTBEAT 10*HZ -#define TIMER_INTERVAL_PANIC 1*HZ + #define NUMBER_SERVERS 8 #define RTT_THRESHOLD 1000 +// This must be a power of two: +#define RTT_BLOCK_SIZE 4096 + +#define TIMER_INTERVAL_HEARTBEAT 10*HZ +#define TIMER_INTERVAL_PANIC 2*HZ -// block device +// +++++ Block Device +++++ #define KERNEL_SECTOR_SIZE 512 #define DNBD3_BLOCK_SIZE 4096 #define NUMBER_DEVICES 8 #define DEFAULT_READ_AHEAD_KB 256 -// misc +// +++++ Misc +++++ #define DEFAULT_SERVER_CONFIG_FILE "/etc/dnbd3/server.conf" #define DEFAULT_CLIENT_CONFIG_FILE "/etc/dnbd3/client.conf" #define UNIX_SOCKET "/run/dnbd3-server.sock" diff --git a/src/kernel/blk.c b/src/kernel/blk.c index 2f52cc3..1177f3a 100644 --- a/src/kernel/blk.c +++ b/src/kernel/blk.c @@ -35,12 +35,10 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) INIT_LIST_HEAD(&dev->request_queue_send); INIT_LIST_HEAD(&dev->request_queue_receive); - memset(dev->cur_server.host, 0, 16); - memset(dev->cur_server.port, 0, 6); - dev->cur_server.rtt = 0; - dev->cur_server.sock = NULL; + memset(&dev->cur_server, 0, sizeof(dnbd3_server_t)); + dev->better_sock = NULL; - dev->vid = 0; + dev->imgname = NULL; dev->rid = 0; dev->update_available = 0; dev->alt_servers_num = 0; @@ -50,6 +48,8 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) dev->thread_discover = NULL; dev->discover = 0; dev->panic = 0; + dev->panic_count = 0; + dev->reported_size = 0; if (!(disk = alloc_disk(1))) { @@ -102,28 +102,60 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u int result = 0; dnbd3_device_t *dev = bdev->bd_disk->private_data; struct request_queue *blk_queue = dev->disk->queue; + char *imgname = NULL; dnbd3_ioctl_t *msg = kmalloc(sizeof(dnbd3_ioctl_t), GFP_KERNEL); - copy_from_user((char *)msg, (char *)arg, sizeof(*msg)); + + if (msg == NULL) return -ENOMEM; + copy_from_user((char *)msg, (char *)arg, 2); + if (msg->len != sizeof(dnbd3_ioctl_t)) + { + result = -ENOEXEC; + goto cleanup_return; + } + copy_from_user((char *)msg, (char *)arg, sizeof(dnbd3_ioctl_t)); + if (msg->imgname != NULL && msg->imgnamelen > 0) + { + imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL); + if (imgname == NULL) + { + result = -ENOMEM; + goto cleanup_return; + } + copy_from_user(imgname, msg->imgname, msg->imgnamelen); + imgname[msg->imgnamelen] = '\0'; + } switch (cmd) { case IOCTL_OPEN: - strcpy(dev->cur_server.host, msg->host); - strcpy(dev->cur_server.port, PORTSTR); - dev->vid = msg->vid; - dev->rid = msg->rid; - blk_queue->backing_dev_info.ra_pages = (msg->read_ahead_kb * 1024)/ PAGE_CACHE_SIZE; - result = dnbd3_net_connect(dev); + if (imgname == NULL) + { + result = -EINVAL; + } + else + { + memcpy(dev->cur_server.hostaddr, msg->addr, 16); + dev->cur_server.port = msg->port; + dev->cur_server.hostaddrtype = msg->addrtype; + dev->imgname = imgname; + imgname = NULL; + dev->rid = msg->rid; + blk_queue->backing_dev_info.ra_pages = (msg->read_ahead_kb * 1024) / PAGE_CACHE_SIZE; + result = dnbd3_net_connect(dev); + } break; case IOCTL_CLOSE: set_capacity(dev->disk, 0); result = dnbd3_net_disconnect(dev); + dnbd3_blk_fail_all_requests(dev); break; case IOCTL_SWITCH: dnbd3_net_disconnect(dev); - strcpy(dev->cur_server.host, msg->host); + memcpy(dev->cur_server.hostaddr, msg->addr, 16); + dev->cur_server.port = msg->port; + dev->cur_server.hostaddrtype = msg->addrtype; result = dnbd3_net_connect(dev); break; @@ -132,10 +164,12 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u default: result = -EIO; - + break; } - kfree(msg); +cleanup_return: + if (msg) kfree(msg); + if (imgname) kfree(imgname); return result; } @@ -148,18 +182,78 @@ void dnbd3_blk_request(struct request_queue *q) { dev = req->rq_disk->private_data; + if (dev->cur_server.hostaddrtype == 0) + { + __blk_end_request_all(req, -EIO); + continue; + } + if (req->cmd_type != REQ_TYPE_FS) { __blk_end_request_all(req, 0); continue; } - if (rq_data_dir(req) == READ) + if (dev->panic_count >= 20) + { + __blk_end_request_all(req, -EIO); + continue; + } + + if (rq_data_dir(req) != READ) { - list_add_tail(&req->queuelist, &dev->request_queue_send); - spin_unlock_irq(q->queue_lock); - wake_up(&dev->process_queue_send); - spin_lock_irq(q->queue_lock); + __blk_end_request_all(req, -EACCES); + continue; } + + list_add_tail(&req->queuelist, &dev->request_queue_send); + spin_unlock_irq(q->queue_lock); + wake_up(&dev->process_queue_send); + spin_lock_irq(q->queue_lock); } } + +void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev) +{ + struct request *blk_request, *tmp_request; + struct request *blk_request2, *tmp_request2; + unsigned long flags; + struct list_head local_copy; + int dup; + INIT_LIST_HEAD(&local_copy); + spin_lock_irq(&dev->blk_lock); + list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist) + { + list_del_init(&blk_request->queuelist); + list_add(&blk_request->queuelist, &local_copy); + } + list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_send, queuelist) + { + list_del_init(&blk_request->queuelist); + dup = 0; + list_for_each_entry_safe(blk_request2, tmp_request2, &local_copy, queuelist) + { + if (blk_request == blk_request2) + { + printk("WARNING: Request is in both lists!\n"); + dup = 1; + } + } + if (!dup) list_add(&blk_request->queuelist, &local_copy); + } + spin_unlock_irq(&dev->blk_lock); + list_for_each_entry_safe(blk_request, tmp_request, &local_copy, queuelist) + { + list_del_init(&blk_request->queuelist); + if (blk_request->cmd_type == REQ_TYPE_FS) + { + spin_lock_irqsave(&dev->blk_lock, flags); + __blk_end_request_all(blk_request, -EIO); + spin_unlock_irqrestore(&dev->blk_lock, flags); + } + else if (blk_request->cmd_type == REQ_TYPE_SPECIAL) + { + kfree(blk_request); + } + } +} diff --git a/src/kernel/blk.h b/src/kernel/blk.h index 57d9bfa..28f6f8c 100644 --- a/src/kernel/blk.h +++ b/src/kernel/blk.h @@ -23,12 +23,6 @@ #include "dnbd3.h" -enum -{ - REQ_GET_SERVERS = 1, - REQ_GET_FILESIZE = 2, -}; - extern struct block_device_operations dnbd3_blk_ops; int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); @@ -39,4 +33,6 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor); int dnbd3_blk_del_device(dnbd3_device_t *dev); +void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev); + #endif /* BLK_H_ */ diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h index 0e408dc..dfed0f2 100644 --- a/src/kernel/dnbd3.h +++ b/src/kernel/dnbd3.h @@ -29,17 +29,19 @@ #include "config.h" #include "types.h" +#include "serialize.h" extern int major; typedef struct { - char host[16]; - char port[6]; - uint64_t rtt; - uint64_t rtts[4]; - struct socket *sock; - struct kobject kobj; + uint64_t rtts[4]; // Last four round trip time measurements in µs + uint16_t port; // Port in network representation + uint16_t protocol_version; // dnbd3 protocol version of this server + uint8_t hostaddr[16]; // Address in network representation (IPv4 or IPv6) + uint8_t hostaddrtype; // Address type (AF_INET or AF_INET6) + uint8_t skip_count; // Do not check this server the next skip_count times + struct kobject kobj; // SysFS } dnbd3_server_t; typedef struct @@ -52,11 +54,20 @@ typedef struct struct kobject kobj; // network + struct socket *sock; dnbd3_server_t cur_server; - int vid, rid, update_available; - int alt_servers_num; - dnbd3_server_t alt_servers[NUMBER_SERVERS]; - int discover, panic; + uint64_t cur_rtt; + char *imgname; + serialized_buffer_t payload_buffer; + int rid, update_available; + int alt_servers_num; // number of currently known alt servers + dnbd3_server_t alt_servers[NUMBER_SERVERS]; // array of alt servers + int new_servers_num; // number of new alt servers that are waiting to be copied to above array + dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers + int discover, panic, panic_count, disconnecting; + uint64_t reported_size; + // server switch + struct socket *better_sock; // process struct timer_list hb_timer; diff --git a/src/kernel/net.c b/src/kernel/net.c index 840282f..4d33842 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -21,87 +21,169 @@ #include "net.h" #include "blk.h" #include "utils.h" +#include "serialize.h" #include <linux/time.h> +#ifndef MIN +#define MIN(a,b) (a < b ? a : b) +#endif + int dnbd3_net_connect(dnbd3_device_t *dev) { struct sockaddr_in sin; - struct request *req0 = kmalloc(sizeof(struct request), GFP_ATOMIC); - struct request *req1 = kmalloc(sizeof(struct request), GFP_ATOMIC); + struct request *req1 = kmalloc(sizeof(*req1), GFP_ATOMIC); struct timeval timeout; timeout.tv_sec = SOCKET_TIMEOUT_CLIENT_DATA; timeout.tv_usec = 0; // do some checks before connecting - if (!req0 || !req1) + if (!req1) { - printk("FATAL: Kmalloc failed.\n"); - memset(dev->cur_server.host, '\0', sizeof(dev->cur_server.host)); - return -1; + printk("FATAL: Kmalloc(1) failed.\n"); + goto error; } - if (!dev->cur_server.host || !dev->cur_server.port || (dev->vid == 0)) + if (dev->cur_server.port == 0 || dev->cur_server.hostaddrtype == 0 || dev->imgname == NULL) { - printk("FATAL: Host, port or vid not set.\n"); - memset(dev->cur_server.host, '\0', sizeof(dev->cur_server.host)); - return -1; + printk("FATAL: Host, port or image name not set.\n"); + goto error; } - if (dev->cur_server.sock) + if (dev->sock) { - printk("ERROR: Device %s is already connected to %s.\n", dev->disk->disk_name, dev->cur_server.host); - return -1; + if (dev->cur_server.hostaddrtype == AF_INET) + printk("ERROR: Device %s is already connected to %pI4 : %d.\n", dev->disk->disk_name, dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port)); + else + printk("ERROR: Device %s is already connected to %pI6 : %d.\n", dev->disk->disk_name, dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port)); + goto error; } - printk("INFO: Connecting device %s to %s\n", dev->disk->disk_name, dev->cur_server.host); - - // initialize socket - if (sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &dev->cur_server.sock) < 0) + if (dev->cur_server.hostaddrtype == AF_INET) + printk("INFO: Connecting device %s to %pI4 : %d\n", dev->disk->disk_name, dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port)); + else { - printk("ERROR: Couldn't create socket.\n"); - dev->cur_server.sock = NULL; - memset(dev->cur_server.host, '\0', sizeof(dev->cur_server.host)); - return -1; + printk("ERROR: Cannot connect to %pI6 - IPv6 not yet implemented.\n", dev->cur_server.hostaddr); + //printk("INFO: Connecting device %s to %pI6 : %d\n", dev->disk->disk_name, dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port)); + goto error; + } + + if (dev->better_sock == NULL) + { // no established connection yet from discovery thread, start new one + dnbd3_request_t dnbd3_request; + dnbd3_reply_t dnbd3_reply; + struct msghdr msg; + struct kvec iov[2]; + uint16_t rid; + char *name; + init_msghdr(msg); + if (sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &dev->sock) < 0) + { + printk("ERROR: Couldn't create socket.\n"); + goto error; + } + kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &timeout, sizeof(timeout)); + kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout, sizeof(timeout)); + dev->sock->sk->sk_allocation = GFP_NOIO; + sin.sin_family = AF_INET; + memcpy(&(sin.sin_addr.s_addr), dev->cur_server.hostaddr, 4); + sin.sin_port = dev->cur_server.port; + if (kernel_connect(dev->sock, (struct sockaddr *) &sin, sizeof(sin), 0) < 0) + { + printk("ERROR: Couldn't connect to host %pI4 : %d\n", dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port)); + goto error; + } + // Request filesize + dnbd3_request.magic = dnbd3_packet_magic; + dnbd3_request.cmd = CMD_GET_SIZE; + dnbd3_request.size = strlen(dev->imgname) + 1 + 2 + 2; // str+\0, version, rid + fixup_request(dnbd3_request); + iov[0].iov_base = &dnbd3_request; + iov[0].iov_len = sizeof(dnbd3_request); + serializer_reset_write(&dev->payload_buffer); + serializer_put_uint16(&dev->payload_buffer, PROTOCOL_VERSION); + serializer_put_string(&dev->payload_buffer, dev->imgname); + serializer_put_uint16(&dev->payload_buffer, dev->rid); + iov[1].iov_base = &dev->payload_buffer; + iov[1].iov_len = serializer_get_written_length(&dev->payload_buffer); + if (kernel_sendmsg(dev->sock, &msg, iov, 2, sizeof(dnbd3_request) + iov[1].iov_len) <= 0) + goto error; + // receive reply header + iov[0].iov_base = &dnbd3_reply; + iov[0].iov_len = sizeof(dnbd3_reply); + if (kernel_recvmsg(dev->sock, &msg, iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply) || dnbd3_reply.cmd != CMD_GET_SIZE || dnbd3_reply.size < 3 || dnbd3_reply.size > MAX_PAYLOAD || dnbd3_reply.magic != dnbd3_packet_magic) + { + printk("FATAL: Requested image does not exist on server.\n"); + goto error; + } + // receive reply payload + iov[0].iov_base = &dev->payload_buffer; + iov[0].iov_len = dnbd3_reply.size; + if (kernel_recvmsg(dev->sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != dnbd3_reply.size) + { + printk("FATAL: Cold not read CMD_GET_SIZE payload on handshake.\n"); + goto error; + } + // read reply payload + dev->cur_server.protocol_version = serializer_get_uint16(&dev->payload_buffer); + if (dev->cur_server.protocol_version < MIN_SUPPORTED_SERVER) + { + printk("FATAL: Server version is lower than min supported version.\n"); + goto error; + } + name = serializer_get_string(&dev->payload_buffer); + if (dev->rid != 0 && strcmp(name, dev->imgname) != 0) + { + printk("FATAL: Server provides different image than asked for.\n"); + goto error; + } + if (strlen(dev->imgname) < strlen(name)) + { + dev->imgname = krealloc(dev->imgname, strlen(name) + 1, GFP_ATOMIC); + if (dev->imgname == NULL) + { + printk("FATAL: Reallocating buffer for new image name failed"); + goto error; + } + } + strcpy(dev->imgname, name); + rid = serializer_get_uint16(&dev->payload_buffer); + if (dev->rid != 0 && dev->rid != rid) + { + printk("FATAL: Server provides different rid of image than asked for.\n"); + goto error; + } + dev->rid = rid; + dev->reported_size = serializer_get_uint64(&dev->payload_buffer); + // store image information + set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte blocks */ + printk("INFO: Filesize of %s: %llu\n", dev->disk->disk_name, dev->reported_size); } - kernel_setsockopt(dev->cur_server.sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &timeout, sizeof(timeout)); - kernel_setsockopt(dev->cur_server.sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout, sizeof(timeout)); - dev->cur_server.sock->sk->sk_allocation = GFP_NOIO; - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = inet_addr(dev->cur_server.host); - sin.sin_port = htons(simple_strtol(dev->cur_server.port, NULL, 10)); - if (kernel_connect(dev->cur_server.sock, (struct sockaddr *) &sin, sizeof(sin), 0) < 0) + else // Switching server, connection is already established and size request was executed { - printk("ERROR: Couldn't connect to host %s:%s\n", dev->cur_server.host, dev->cur_server.port); - dev->cur_server.sock = NULL; - memset(dev->cur_server.host, '\0', sizeof(dev->cur_server.host)); - return -1; + printk("INFO: On-the-fly server change\n"); + dev->sock = dev->better_sock; + dev->better_sock = NULL; + kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &timeout, sizeof(timeout)); + kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout, sizeof(timeout)); } dev->panic = 0; + dev->panic_count = 0; dev->alt_servers_num = 0; dev->update_available = 0; - // enqueue request to request_queue_send (ask alt servers) req1->cmd_type = REQ_TYPE_SPECIAL; - req1->cmd_flags = REQ_GET_SERVERS; + req1->cmd_flags = CMD_GET_SERVERS; list_add(&req1->queuelist, &dev->request_queue_send); - // enqueue request to request_queue_send (ask file size) - req0->cmd_type = REQ_TYPE_SPECIAL; - req0->cmd_flags = REQ_GET_FILESIZE; - list_add(&req0->queuelist, &dev->request_queue_send); - - // start sending thread + // create required threads dev->thread_send = kthread_create(dnbd3_net_send, dev, dev->disk->disk_name); - wake_up_process(dev->thread_send); - - // start receiving thread dev->thread_receive = kthread_create(dnbd3_net_receive, dev, dev->disk->disk_name); - wake_up_process(dev->thread_receive); - - // start discover thread dev->thread_discover = kthread_create(dnbd3_net_discover, dev, dev->disk->disk_name); + // start them up + wake_up_process(dev->thread_send); + wake_up_process(dev->thread_receive); wake_up_process(dev->thread_discover); wake_up(&dev->process_queue_send); @@ -114,15 +196,32 @@ int dnbd3_net_connect(dnbd3_device_t *dev) add_timer(&dev->hb_timer); return 0; +error: + if (dev->sock) + { + sock_release(dev->sock); + dev->sock = NULL; + } + dev->cur_server.hostaddrtype = 0; + dev->cur_server.port = 0; + if (req1) kfree(req1); + return -1; } int dnbd3_net_disconnect(dnbd3_device_t *dev) { printk("INFO: Disconnecting device %s\n", dev->disk->disk_name); + dev->disconnecting = 1; + // clear heartbeat timer if (&dev->hb_timer) - del_timer(&dev->hb_timer); + del_timer(&dev->hb_timer); + + dev->discover = 0; + + if (dev->sock) + kernel_sock_shutdown(dev->sock, SHUT_RDWR); // kill sending and receiving threads if (dev->thread_send) @@ -144,12 +243,15 @@ int dnbd3_net_disconnect(dnbd3_device_t *dev) } // clear socket - if (dev->cur_server.sock) + if (dev->sock) { - sock_release(dev->cur_server.sock); - dev->cur_server.sock = NULL; - memset(dev->cur_server.host, '\0', sizeof(dev->cur_server.host)); + sock_release(dev->sock); + dev->sock = NULL; } + dev->cur_server.hostaddrtype = 0; + dev->cur_server.port = 0; + + dev->disconnecting = 0; return 0; } @@ -157,19 +259,23 @@ int dnbd3_net_disconnect(dnbd3_device_t *dev) void dnbd3_net_heartbeat(unsigned long arg) { dnbd3_device_t *dev = (dnbd3_device_t *) arg; - struct request *req = kmalloc(sizeof(struct request), GFP_ATOMIC); - // send keepalive - if (req) - { - req->cmd_type = REQ_TYPE_SPECIAL; - req->cmd_flags = REQ_GET_SERVERS; - list_add_tail(&req->queuelist, &dev->request_queue_send); - wake_up(&dev->process_queue_send); - } - else + + if (!dev->panic) { - printk("ERROR: Couldn't create keepalive request\n"); + struct request *req = kmalloc(sizeof(struct request), GFP_ATOMIC); + // send keepalive + if (req) + { + req->cmd_type = REQ_TYPE_SPECIAL; + req->cmd_flags = CMD_GET_SERVERS; + list_add_tail(&req->queuelist, &dev->request_queue_send); + wake_up(&dev->process_queue_send); + } + else + { + printk("ERROR: Couldn't create keepalive request\n"); + } } // start discover @@ -188,20 +294,21 @@ int dnbd3_net_discover(void *data) { dnbd3_device_t *dev = data; struct sockaddr_in sin; - struct socket *sock; + struct socket *sock, *best_sock = NULL; dnbd3_request_t dnbd3_request; dnbd3_reply_t dnbd3_reply; struct msghdr msg; - struct kvec iov; + struct kvec iov[2]; - char *buf; + char *buf, *name; + serialized_buffer_t *payload; uint64_t filesize; - char current_server[16], best_server[16]; + uint16_t rid; struct timeval start, end; - uint64_t t1, t2, best_rtt = 0; - int i, num = 0; + uint64_t rtt, best_rtt = 0; + int i, best_server, current_server; int turn = 0; int ready = 0; @@ -217,22 +324,55 @@ int dnbd3_net_discover(void *data) printk("FATAL: Kmalloc failed (discover)\n"); return -1; } + payload = (serialized_buffer_t*)buf; - while (!kthread_should_stop()) + dnbd3_request.magic = dnbd3_packet_magic; + + for (;;) { - wait_event_interruptible(dev->process_queue_discover, kthread_should_stop() || dev->discover); + wait_event_interruptible(dev->process_queue_discover, + kthread_should_stop() || dev->discover); - if (!&dev->discover) - continue; + if (kthread_should_stop() || dev->imgname == NULL) + break; - num = dev->alt_servers_num; + if (!dev->discover) + continue; dev->discover = 0; - strcpy(best_server, "0.0.0.0"); - best_rtt = -1; - for (i=0; i < num && i < NUMBER_SERVERS; i++) + // Check if the list of alt servers needs to be updated and do so if neccessary + spin_lock_irq(&dev->blk_lock); + if (dev->new_servers_num) { - // initialize socket and connect + for (i = 0; i < dev->new_servers_num; ++i) + { + memcpy(dev->alt_servers[i].hostaddr, dev->new_servers[i].ipaddr, 16); + dev->alt_servers[i].hostaddrtype = dev->new_servers[i].addrtype; + dev->alt_servers[i].port = dev->new_servers[i].port; + memset(dev->alt_servers[i].rtts, 0xFF, sizeof(dev->alt_servers[i].rtts[0]) * 4); + dev->alt_servers[i].protocol_version = 0; + dev->alt_servers[i].skip_count = 0; + } + dev->alt_servers_num = dev->new_servers_num; + dev->new_servers_num = 0; + } + spin_unlock_irq(&dev->blk_lock); + + current_server = best_server = -1; + best_rtt = 0xFFFFFFFFFFFFull; + + for (i=0; i < dev->alt_servers_num; ++i) + { + if (dev->alt_servers[i].hostaddrtype != AF_INET) // add IPv6.... + continue; + + if (!dev->panic && dev->alt_servers[i].skip_count) // If not in panic mode, skip server if indicated + { + --dev->alt_servers[i].skip_count; + continue; + } + + // Initialize socket and connect if (sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock) < 0) { printk("ERROR: Couldn't create socket (discover)\n"); @@ -241,49 +381,102 @@ int dnbd3_net_discover(void *data) } kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &timeout, sizeof(timeout)); kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout, sizeof(timeout)); - strcpy(current_server, dev->alt_servers[i].host); sock->sk->sk_allocation = GFP_NOIO; - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = inet_addr(current_server); - sin.sin_port = htons(simple_strtol(dev->cur_server.port, NULL, 10)); + sin.sin_family = AF_INET; // add IPv6..... + memcpy(&sin.sin_addr.s_addr, dev->alt_servers[i].hostaddr, 4); + sin.sin_port = dev->alt_servers[i].port; if (kernel_connect(sock, (struct sockaddr *) &sin, sizeof(sin), 0) < 0) { - printk("ERROR: Couldn't connect to host %s:%s (discover)\n", current_server, dev->cur_server.port); - dev->alt_servers[i].rtt = -1; - sock = NULL; - continue; + //printk("ERROR: Couldn't connect to host %s:%s (discover)\n", current_server, dev->cur_server.port); + goto error; } // Request filesize dnbd3_request.cmd = CMD_GET_SIZE; - dnbd3_request.vid = dev->vid; - dnbd3_request.rid = dev->rid; - iov.iov_base = &dnbd3_request; - iov.iov_len = sizeof(dnbd3_request); - if (kernel_sendmsg(sock, &msg, &iov, 1, sizeof(dnbd3_request)) <= 0) + dnbd3_request.size = strlen(dev->imgname) + 1 + 2 + 2; // str+\0, version, rid + fixup_request(dnbd3_request); + iov[0].iov_base = &dnbd3_request; + iov[0].iov_len = sizeof(dnbd3_request); + serializer_reset_write(payload); + serializer_put_uint16(payload, PROTOCOL_VERSION); + serializer_put_string(payload, dev->imgname); + serializer_put_uint16(payload, dev->rid); + iov[1].iov_base = payload; + iov[1].iov_len = serializer_get_written_length(payload); + if (kernel_sendmsg(sock, &msg, iov, 2, sizeof(dnbd3_request) + iov[1].iov_len) != sizeof(dnbd3_request) + iov[1].iov_len) + { + printk("ERROR: Requesting image size failed (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); goto error; + } // receive net reply - iov.iov_base = &dnbd3_reply; - iov.iov_len = sizeof(dnbd3_reply); - if (kernel_recvmsg(sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags) <= 0) + iov[0].iov_base = &dnbd3_reply; + iov[0].iov_len = sizeof(dnbd3_reply); + if (kernel_recvmsg(sock, &msg, iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply)) + { + printk("ERROR: Receiving image size packet (header) failed (%pI4 :%d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); goto error; + } + fixup_reply(dnbd3_reply); + if (dnbd3_reply.magic != dnbd3_packet_magic || dnbd3_reply.cmd != CMD_GET_SIZE || dnbd3_reply.size < 4) + { + printk("ERROR: Content of image size packet (header) mismatched (%pI4 :%d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); + goto error; + } // receive data - iov.iov_base = &filesize; - iov.iov_len = sizeof(uint64_t); - if (kernel_recvmsg(sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags) <= 0) + iov[0].iov_base = payload; + iov[0].iov_len = dnbd3_reply.size; + if (kernel_recvmsg(sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != dnbd3_reply.size) + { + printk("ERROR: Receiving image size packet (payload) failed (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); goto error; + } + serializer_reset_read(payload, dnbd3_reply.size); + + dev->alt_servers[i].protocol_version = serializer_get_uint16(payload); + if (dev->alt_servers[i].protocol_version < MIN_SUPPORTED_SERVER) + { + printk("ERROR: Server version too old (client: %d, server: %d, min supported: %d) (%pI4 : %d, discover)\n", (int)PROTOCOL_VERSION, (int)dev->alt_servers[i].protocol_version, (int)MIN_SUPPORTED_SERVER, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); + goto error; + } + + name = serializer_get_string(payload); + if (name == NULL) + { + printk("ERROR: Server did not supply an image name (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); + goto error; + } + if (strcmp(name, dev->imgname) != 0) + { + printk("ERROR: Image name does not match requested one (client: '%s', server: '%s') (%pI4 : %d, discover)\n", dev->imgname, name, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); + goto error; + } + + rid = serializer_get_uint16(payload); + if (rid != dev->rid) + { + printk("ERROR: Server supplied wrong rid (client: '%d', server: '%d') (%pI4 : %d, discover)\n", (int)dev->rid, (int)rid, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); + goto error; + } + + filesize = serializer_get_uint64(payload); + if (filesize != dev->reported_size) + { + printk("ERROR: Reported image size of %llu does not match expected value %llu. (%pI4 :%d, discover)\n", (unsigned long long)filesize, (unsigned long long)dev->reported_size, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); + goto error; + } // panic mode, take first responding server if (dev->panic) { - printk("WARN: Panic mode (%s), taking server %s\n", dev->disk->disk_name, current_server); - sock_release(sock); + printk("WARN: Panic mode (%s), taking server %pI4 : %d\n", dev->disk->disk_name, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); + if (best_sock != NULL) sock_release(best_sock); + dev->better_sock = sock; // Pass over socket to take a shortcut in *_connect(); kfree(buf); dev->thread_discover = NULL; dnbd3_net_disconnect(dev); - strcpy(dev->cur_server.host, current_server); + memcpy(&dev->cur_server, &dev->alt_servers[i], sizeof(dev->cur_server)); dnbd3_net_connect(dev); return 0; } @@ -292,81 +485,129 @@ int dnbd3_net_discover(void *data) // Request block dnbd3_request.cmd = CMD_GET_BLOCK; - dnbd3_request.offset = 0; // TODO: take random block - dnbd3_request.size = 4096; - iov.iov_base = &dnbd3_request; - iov.iov_len = sizeof(dnbd3_request); - if (kernel_sendmsg(sock, &msg, &iov, 1, sizeof(dnbd3_request)) <= 0) + dnbd3_request.offset = ((start.tv_usec ^ start.tv_sec) % dev->reported_size) & ~(uint64_t)(RTT_BLOCK_SIZE-1); // Pick random block + dnbd3_request.size = RTT_BLOCK_SIZE; + fixup_request(dnbd3_request); + iov[0].iov_base = &dnbd3_request; + iov[0].iov_len = sizeof(dnbd3_request); + if (kernel_sendmsg(sock, &msg, iov, 1, sizeof(dnbd3_request)) <= 0) + { + printk("ERROR: Requesting test block failed (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); goto error; + } - // receive net replay - iov.iov_base = &dnbd3_reply; - iov.iov_len = sizeof(dnbd3_reply); - if (kernel_recvmsg(sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags) <= 0) + // receive net reply + iov[0].iov_base = &dnbd3_reply; + iov[0].iov_len = sizeof(dnbd3_reply); + if (kernel_recvmsg(sock, &msg, iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply)) + { + printk("ERROR: Receiving test block header packet failed (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); + goto error; + } + fixup_reply(dnbd3_reply); + if (dnbd3_reply.cmd != CMD_GET_BLOCK || dnbd3_reply.size != RTT_BLOCK_SIZE) + { + printk("ERROR: Unexpected reply to block request: cmd=%d, size=%d (%pI4 : %d, discover)\n", (int)dnbd3_reply.cmd, (int)dnbd3_reply.size, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); goto error; + } // receive data - iov.iov_base = buf; - iov.iov_len = 4096; - if (kernel_recvmsg(sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags) <= 0) + iov[0].iov_base = buf; + iov[0].iov_len = RTT_BLOCK_SIZE; + if (kernel_recvmsg(sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != RTT_BLOCK_SIZE) + { + printk("ERROR: Receiving test block payload failed (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port)); goto error; + } do_gettimeofday(&end); // end rtt measurement - // clear socket - sock_release(sock); - sock = NULL; + dev->alt_servers[i].rtts[turn] = + (end.tv_sec - start.tv_sec) * 1000000ull + + (end.tv_usec - start.tv_usec); - t1 = (start.tv_sec*1000000ull) + start.tv_usec; - t2 = (end.tv_sec*1000000ull) + end.tv_usec; - dev->alt_servers[i].rtts[turn] = t2 -t1; + rtt = ( dev->alt_servers[i].rtts[0] + + dev->alt_servers[i].rtts[1] + + dev->alt_servers[i].rtts[2] + + dev->alt_servers[i].rtts[3] ) / 4; - dev->alt_servers[i].rtt = ( dev->alt_servers[i].rtts[0] - +dev->alt_servers[i].rtts[1] - +dev->alt_servers[i].rtts[2] - +dev->alt_servers[i].rtts[3] ) / 4; - - if (best_rtt > dev->alt_servers[i].rtt) - { - best_rtt = dev->alt_servers[i].rtt; - strcpy(best_server, current_server); + if (best_rtt > rtt) + { // This one is better, keep socket open in case we switch + best_rtt = rtt; + best_server = i; + if (best_sock != NULL) sock_release(best_sock); + best_sock = sock; + sock = NULL; + } + else + { // Not better, discard connection + sock_release(sock); + sock = NULL; } // update cur servers rtt - if (strcmp(dev->cur_server.host, dev->alt_servers[i].host) == 0) + if (dev->cur_server.port == dev->alt_servers[i].port && dev->cur_server.hostaddrtype == dev->alt_servers[i].hostaddrtype + && ( + (dev->cur_server.hostaddrtype == AF_INET + && memcmp(dev->cur_server.hostaddr, dev->alt_servers[i].hostaddr, 4) == 0) + || + (dev->cur_server.hostaddrtype == AF_INET6 + && memcmp(dev->cur_server.hostaddr, dev->alt_servers[i].hostaddr, 16) == 0) + ) + ) { - dev->cur_server.rtt = dev->alt_servers[i].rtt; + dev->cur_rtt = rtt; + current_server = i; } continue; error: - printk("ERROR: Send/Receive failed, host %s:%s (discover)\n", current_server, dev->cur_server.port); sock_release(sock); sock = NULL; + dev->alt_servers[i].rtts[turn] = 0xFFFFFFFF; continue; } + if (dev->panic && ++dev->panic_count == 21) + { // After 21 retries, bail out by reporting errors to block layer + dnbd3_blk_fail_all_requests(dev); + } - if (!strcmp(best_server, "0.0.0.0") || best_rtt == (uint64_t)-1) + if (best_server == -1 || kthread_should_stop()) // No alt server could be reached at all or thread should stop + { + if (best_sock != NULL) // Should never happen actually + { + sock_release(best_sock); + best_sock = NULL; + } continue; + } // take server with lowest rtt - if (ready && num > 1 && strcmp(dev->cur_server.host, best_server) && !kthread_should_stop() - && dev->cur_server.rtt > best_rtt + RTT_THRESHOLD) + if (ready && best_server != current_server + && dev->cur_rtt > best_rtt + RTT_THRESHOLD) { - printk("INFO: Server %s on %s is faster (%lluus)\n", best_server, dev->disk->disk_name, best_rtt); + printk("INFO: Server %d on %s is faster (%lluµs)\n", best_server, dev->disk->disk_name, best_rtt); kfree(buf); + dev->better_sock = best_sock; // Take shortcut by continuing to use open connection dev->thread_discover = NULL; dnbd3_net_disconnect(dev); - strcpy(dev->cur_server.host, best_server); - dev->cur_server.rtt = best_rtt; + memcpy(&dev->cur_server, &dev->alt_servers[best_server], sizeof(dev->cur_server)); + dev->cur_rtt = best_rtt; dnbd3_net_connect(dev); return 0; } - turn = (turn +1) % 4; + // Clean up connection that was held open for quicker server switch + if (best_sock != NULL) + { + sock_release(best_sock); + best_sock = NULL; + } + + turn = (turn + 1) % 4; if (turn == 3) ready = 1; @@ -386,21 +627,25 @@ int dnbd3_net_send(void *data) init_msghdr(msg); - dnbd3_request.vid = dev->vid; - dnbd3_request.rid = dev->rid; + dnbd3_request.magic = dnbd3_packet_magic; set_user_nice(current, -20); - while (!kthread_should_stop() || !list_empty(&dev->request_queue_send)) + for (;;) { wait_event_interruptible(dev->process_queue_send, kthread_should_stop() || !list_empty(&dev->request_queue_send)); - if (list_empty(&dev->request_queue_send)) - continue; + if (kthread_should_stop()) + break; // extract block request - spin_lock_irq(&dev->blk_lock); + spin_lock_irq(&dev->blk_lock); // TODO: http://www.linuxjournal.com/article/5833 says spin_lock_irq should not be used in general, but article is 10 years old + if (list_empty(&dev->request_queue_send)) + { + spin_unlock_irq(&dev->blk_lock); + continue; + } blk_request = list_entry(dev->request_queue_send.next, struct request, queuelist); spin_unlock_irq(&dev->blk_lock); @@ -411,18 +656,19 @@ int dnbd3_net_send(void *data) dnbd3_request.cmd = CMD_GET_BLOCK; dnbd3_request.offset = blk_rq_pos(blk_request) << 9; // *512 dnbd3_request.size = blk_rq_bytes(blk_request); // bytes left to complete entire request + // enqueue request to request_queue_receive + spin_lock_irq(&dev->blk_lock); + list_del_init(&blk_request->queuelist); + list_add_tail(&blk_request->queuelist, &dev->request_queue_receive); + spin_unlock_irq(&dev->blk_lock); break; case REQ_TYPE_SPECIAL: - switch (blk_request->cmd_flags) - { - case REQ_GET_FILESIZE: - dnbd3_request.cmd = CMD_GET_SIZE; - break; - case REQ_GET_SERVERS: - dnbd3_request.cmd = CMD_GET_SERVERS; - break; - } + dnbd3_request.cmd = blk_request->cmd_flags; + dnbd3_request.size = 0; + spin_lock_irq(&dev->blk_lock); + list_del_init(&blk_request->queuelist); + spin_unlock_irq(&dev->blk_lock); break; default: @@ -434,29 +680,30 @@ int dnbd3_net_send(void *data) } // send net request - memcpy(dnbd3_request.handle, &blk_request, sizeof(blk_request)); + dnbd3_request.handle = (uint64_t)blk_request; + fixup_request(dnbd3_request); iov.iov_base = &dnbd3_request; iov.iov_len = sizeof(dnbd3_request); - if (kernel_sendmsg(dev->cur_server.sock, &msg, &iov, 1, sizeof(dnbd3_request)) <= 0) + if (kernel_sendmsg(dev->sock, &msg, &iov, 1, sizeof(dnbd3_request)) != sizeof(dnbd3_request)) goto error; - - // enqueue request to request_queue_receive - spin_lock_irq(&dev->blk_lock); - list_del_init(&blk_request->queuelist); - list_add_tail(&blk_request->queuelist, &dev->request_queue_receive); - spin_unlock_irq(&dev->blk_lock); wake_up(&dev->process_queue_receive); } return 0; - error: - printk("ERROR: Connection to server %s lost (send)\n", dev->cur_server.host); - if (dev->cur_server.sock) - kernel_sock_shutdown(dev->cur_server.sock, SHUT_RDWR); - dev->thread_send = NULL; - dev->panic = 1; - return -1; +error: + printk("ERROR: Connection to server %pI4 : %d lost (send)\n", dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port)); + if (dev->sock) + kernel_sock_shutdown(dev->sock, SHUT_RDWR); + dev->thread_send = NULL; + if (!dev->disconnecting) + { + dev->panic = 1; + // start discover + dev->discover = 1; + wake_up(&dev->process_queue_discover); + } + return -1; } int dnbd3_net_receive(void *data) @@ -473,59 +720,64 @@ int dnbd3_net_receive(void *data) unsigned long flags; sigset_t blocked, oldset; - unsigned int size, i; - uint64_t filesize; - struct in_addr tmp_addr; + int count, remaining; init_msghdr(msg); set_user_nice(current, -20); - while (!kthread_should_stop() || !list_empty(&dev->request_queue_receive)) + for (;;) { wait_event_interruptible(dev->process_queue_receive, kthread_should_stop() || !list_empty(&dev->request_queue_receive)); + if (kthread_should_stop()) + break; + if (list_empty(&dev->request_queue_receive)) continue; // receive net reply iov.iov_base = &dnbd3_reply; iov.iov_len = sizeof(dnbd3_reply); - if (kernel_recvmsg(dev->cur_server.sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags) <= 0) + if (kernel_recvmsg(dev->sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply)) goto error; + fixup_reply(dnbd3_reply); - // search for replied request in queue - received_request = *(struct request **) dnbd3_reply.handle; - spin_lock_irq(&dev->blk_lock); - list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist) + // check error + if (dnbd3_reply.magic != dnbd3_packet_magic) { - if (blk_request == received_request) - break; + printk("ERROR: Wrong packet magic (Receive)\n"); + goto error; } - spin_unlock_irq(&dev->blk_lock); - - // check error if (dnbd3_reply.cmd == 0) { printk("ERROR: Command was 0 (Receive)\n"); goto error; } - if (dnbd3_reply.size == 0) - { - printk("FATAL: Requested image does't exist cmd: %i\n", dnbd3_reply.cmd); - spin_lock_irq(&dev->blk_lock); - list_del_init(&blk_request->queuelist); - spin_unlock_irq(&dev->blk_lock); - if ( (dnbd3_reply.cmd == CMD_GET_SIZE) || (dnbd3_reply.cmd == CMD_GET_SERVERS) ) - kfree(blk_request); - continue; - } // what to do? switch (dnbd3_reply.cmd) { case CMD_GET_BLOCK: + // search for replied request in queue + blk_request = NULL; + spin_lock_irq(&dev->blk_lock); + list_for_each_entry_safe(received_request, tmp_request, &dev->request_queue_receive, queuelist) + { + if ((uint64_t)received_request == dnbd3_reply.handle) + { + blk_request = received_request; + break; + } + } + spin_unlock_irq(&dev->blk_lock); + if (blk_request == NULL) + { + printk("ERROR: Received block data for unrequested handle (%llu: %llu).\n", + (unsigned long long)dnbd3_reply.handle, (unsigned long long)dnbd3_reply.size); + goto error; + } // receive data and answer to block layer rq_for_each_segment(bvec, blk_request, iter) { @@ -533,10 +785,9 @@ int dnbd3_net_receive(void *data) sigprocmask(SIG_SETMASK, &blocked, &oldset); kaddr = kmap(bvec->bv_page) + bvec->bv_offset; - size = bvec->bv_len; iov.iov_base = kaddr; - iov.iov_len = size; - if (kernel_recvmsg(dev->cur_server.sock, &msg, &iov, 1, size, msg.msg_flags) <= 0) + iov.iov_len = bvec->bv_len; + if (kernel_recvmsg(dev->sock, &msg, &iov, 1, bvec->bv_len, msg.msg_flags) <= 0) { kunmap(bvec->bv_page); goto error; @@ -551,47 +802,38 @@ int dnbd3_net_receive(void *data) spin_unlock_irqrestore(&dev->blk_lock, flags); continue; - case CMD_GET_SIZE: - dev->vid = dnbd3_reply.vid; - dev->rid = dnbd3_reply.rid; - iov.iov_base = &filesize; - iov.iov_len = sizeof(uint64_t); - if (kernel_recvmsg(dev->cur_server.sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags) <= 0) - goto error; - set_capacity(dev->disk, filesize >> 9); /* 512 Byte blocks */ - printk("INFO: Filesize %s: %llu\n", dev->disk->disk_name, filesize); - spin_lock_irq(&dev->blk_lock); - list_del_init(&blk_request->queuelist); - spin_unlock_irq(&dev->blk_lock); - kfree(blk_request); - continue; - case CMD_GET_SERVERS: - dev->alt_servers_num = dnbd3_reply.size / sizeof(struct in_addr); - size = sizeof(struct in_addr); - for (i = 0; i < dev->alt_servers_num && i < NUMBER_SERVERS; i++) + spin_lock_irq(&dev->blk_lock); + dev->new_servers_num = 0; + spin_unlock_irq(&dev->blk_lock); + count = MIN(NUMBER_SERVERS, dnbd3_reply.size / sizeof(dnbd3_server_entry_t)); + + if (count != 0) { - iov.iov_base = &tmp_addr; - iov.iov_len = size; - if (kernel_recvmsg(dev->cur_server.sock, &msg, &iov, 1, size, msg.msg_flags) <= 0) - goto error; - inet_ntoa(tmp_addr, dev->alt_servers[i].host); + iov.iov_base = dev->new_servers; + iov.iov_len = count * sizeof(dnbd3_server_entry_t); + if (kernel_recvmsg(dev->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags) != iov.iov_len) + goto error; + spin_lock_irq(&dev->blk_lock); + dev->new_servers_num = count; + spin_unlock_irq(&dev->blk_lock); + // TODO: Re-Add update check + } + // If there were more servers than accepted, remove the remaining data from the socket buffer + remaining = dnbd3_reply.size - count * sizeof(dnbd3_server_entry_t); + while (remaining > 0) + { + count = MIN(sizeof(dnbd3_reply), remaining); + iov.iov_base = &dnbd3_reply; + iov.iov_len = count; + if (kernel_recvmsg(dev->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags) <= 0) + goto error; + remaining -= count; } - spin_lock_irq(&dev->blk_lock); - list_del_init(&blk_request->queuelist); - spin_unlock_irq(&dev->blk_lock); - kfree(blk_request); - - if (dev->rid < dnbd3_reply.rid) - dev->update_available = 1; - continue; default: printk("ERROR: Unknown command (Receive)\n"); - spin_lock_irq(&dev->blk_lock); - list_del_init(&blk_request->queuelist); - spin_unlock_irq(&dev->blk_lock); continue; } @@ -599,23 +841,29 @@ int dnbd3_net_receive(void *data) return 0; - error: - printk("ERROR: Connection to server %s lost (receive)\n", dev->cur_server.host); - // move already send requests to request_queue_send again - if (!list_empty(&dev->request_queue_receive)) - { - printk("WARN: Request queue was not empty on %s\n", dev->disk->disk_name); - spin_lock_irq(&dev->blk_lock); - list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist) - { - list_del_init(&blk_request->queuelist); - list_add(&blk_request->queuelist, &dev->request_queue_send); - } - spin_unlock_irq(&dev->blk_lock); - } - if (dev->cur_server.sock) - kernel_sock_shutdown(dev->cur_server.sock, SHUT_RDWR); - dev->thread_receive = NULL; - dev->panic = 1; - return -1; +error: + printk("ERROR: Connection to server %pI4 : %d lost (receive)\n", dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port)); + // move already send requests to request_queue_send again + if (!list_empty(&dev->request_queue_receive)) + { + printk("WARN: Request queue was not empty on %s\n", dev->disk->disk_name); + spin_lock_irq(&dev->blk_lock); + list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist) + { + list_del_init(&blk_request->queuelist); + list_add(&blk_request->queuelist, &dev->request_queue_send); + } + spin_unlock_irq(&dev->blk_lock); + } + if (dev->sock) + kernel_sock_shutdown(dev->sock, SHUT_RDWR); + dev->thread_receive = NULL; + if (!dev->disconnecting) + { + dev->panic = 1; + // start discover + dev->discover = 1; + wake_up(&dev->process_queue_discover); + } + return -1; } diff --git a/src/kernel/net.h b/src/kernel/net.h index 64816bc..18aa227 100644 --- a/src/kernel/net.h +++ b/src/kernel/net.h @@ -23,12 +23,13 @@ #include "dnbd3.h" -#define init_msghdr(h)\ - h.msg_name = NULL;\ - h.msg_namelen = 0;\ - h.msg_control = NULL;\ - h.msg_controllen = 0;\ - h.msg_flags = MSG_WAITALL | MSG_NOSIGNAL; +#define init_msghdr(h) do { \ + h.msg_name = NULL; \ + h.msg_namelen = 0; \ + h.msg_control = NULL; \ + h.msg_controllen = 0; \ + h.msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \ + } while (0) int dnbd3_net_connect(dnbd3_device_t *lo); diff --git a/src/kernel/serialize_kmod.c b/src/kernel/serialize_kmod.c new file mode 100644 index 0000000..a6a9b03 --- /dev/null +++ b/src/kernel/serialize_kmod.c @@ -0,0 +1,4 @@ +#include <linux/kernel.h> +#include <linux/string.h> + +#include "serialize.c" diff --git a/src/kernel/sysfs.c b/src/kernel/sysfs.c index a455bd2..df94d20 100644 --- a/src/kernel/sysfs.c +++ b/src/kernel/sysfs.c @@ -25,12 +25,12 @@ ssize_t show_cur_server_ip(char *buf, dnbd3_device_t *dev) { - return sprintf(buf, "%s\n", dev->cur_server.host); + return sprintf(buf, "%pI4\n", dev->cur_server.hostaddr); } ssize_t show_cur_server_rtt(char *buf, dnbd3_device_t *dev) { - return sprintf(buf, "%llu\n", dev->cur_server.rtt); + return sprintf(buf, "%llu\n", (unsigned long long)dev->cur_rtt); } ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev) @@ -38,9 +38,10 @@ ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev) return sprintf(buf, "%d\n", dev->alt_servers_num); } -ssize_t show_vid(char *buf, dnbd3_device_t *dev) +ssize_t show_image_name(char *buf, dnbd3_device_t *dev) { - return sprintf(buf, "%d\n", dev->vid); + if (dev->imgname == NULL) return sprintf(buf, "(null)"); + return sprintf(buf, "%s\n", dev->imgname); } ssize_t show_rid(char *buf, dnbd3_device_t *dev) @@ -55,12 +56,12 @@ ssize_t show_update_available(char *buf, dnbd3_device_t *dev) ssize_t show_alt_server_ip(char *buf, dnbd3_server_t *srv) { - return sprintf(buf, "%s\n", srv->host); + return sprintf(buf, "%pI4\n", srv->hostaddr); } ssize_t show_alt_server_rtt(char *buf, dnbd3_server_t *srv) { - return sprintf(buf, "%llu\n", srv->rtt); + return sprintf(buf, "%llu\n", (uint64_t)((srv->rtts[0]+srv->rtts[1]+srv->rtts[2]+srv->rtts[3]) / 4)); } device_attr_t cur_server_ip = @@ -84,10 +85,10 @@ device_attr_t alt_server_num = .store = NULL, }; -device_attr_t vid = +device_attr_t image_name = { - .attr = {.name = "vid", .mode = 0444 }, - .show = show_vid, + .attr = {.name = "image_name", .mode = 0444 }, + .show = show_image_name, .store = NULL, }; @@ -138,7 +139,7 @@ struct attribute *device_attrs[] = &cur_server_ip.attr, &cur_server_rtt.attr, &alt_server_num.attr, - &vid.attr, + &image_name.attr, &rid.attr, &update_available.attr, NULL, @@ -183,7 +184,7 @@ struct kobj_type server_ktype = void dnbd3_sysfs_init(dnbd3_device_t *dev) { int i; - char name[] = "alt_server99"; + char name[15] = "alt_server99"; struct kobject *kobj = &dev->kobj; struct kobj_type *ktype = &device_ktype; struct kobject *parent = &disk_to_dev(dev->disk)->kobj; diff --git a/src/serialize.c b/src/serialize.c new file mode 100644 index 0000000..7961cfe --- /dev/null +++ b/src/serialize.c @@ -0,0 +1,74 @@ +#include "serialize.h" +#include "types.h" + +#ifndef MIN +#define MIN(a,b) (a < b ? a : b) +#endif + +void serializer_reset_read(serialized_buffer_t *buffer, size_t data_len) +{ + buffer->buffer_end = buffer->buffer + MIN(MAX_PAYLOAD, data_len); + buffer->buffer_pointer = buffer->buffer; +} + +void serializer_reset_write(serialized_buffer_t *buffer) +{ + buffer->buffer_end = buffer->buffer + MAX_PAYLOAD; + buffer->buffer_pointer = buffer->buffer; +} + +uint16_t serializer_get_uint16(serialized_buffer_t *buffer) +{ + uint16_t ret; + if (buffer->buffer_pointer + 2 > buffer->buffer_end) return 0; + memcpy(&ret, buffer->buffer_pointer, 2); + *buffer->buffer_pointer += 2; + return net_order_16(ret); +} + +uint64_t serializer_get_uint64(serialized_buffer_t *buffer) +{ + uint64_t ret; + if (buffer->buffer_pointer + 8 > buffer->buffer_end) return 0; + memcpy(&ret, buffer->buffer_pointer, 8); + *buffer->buffer_pointer += 8; + return net_order_64(ret); +} + +char* serializer_get_string(serialized_buffer_t *buffer) +{ + char *ptr = buffer->buffer_pointer, *start = buffer->buffer_pointer; + while (ptr < buffer->buffer_end && *ptr) ++ptr; + if (*ptr) return NULL; // String did not terminate within buffer (possibly corrupted/malicious packet) + buffer->buffer_pointer = ptr + 1; + return start; +} + +void serializer_put_uint16(serialized_buffer_t *buffer, uint16_t value) +{ + if (buffer->buffer_pointer + 2 > buffer->buffer_end) return; + value = net_order_16(value); + memcpy(buffer->buffer_pointer, &value, 2); + buffer->buffer_pointer += 2; +} + +void serializer_put_uint64(serialized_buffer_t *buffer, uint64_t value) +{ + if (buffer->buffer_pointer + 8 > buffer->buffer_end) return; + value = net_order_64(value); + memcpy(buffer->buffer_pointer, &value, 8); + buffer->buffer_pointer += 8; +} + +void serializer_put_string(serialized_buffer_t *buffer, char *value) +{ + size_t len = strlen(value) + 1; + if (buffer->buffer_pointer + len > buffer->buffer_end) return; + memcpy(buffer->buffer_pointer, value, len); + buffer->buffer_pointer += len; +} + +ssize_t serializer_get_written_length(serialized_buffer_t *buffer) +{ + return buffer->buffer_pointer - buffer->buffer; +} diff --git a/src/serialize.h b/src/serialize.h new file mode 100644 index 0000000..eeb3b26 --- /dev/null +++ b/src/serialize.h @@ -0,0 +1,31 @@ +#ifndef SERIALIZER_H_ +#define SERIALIZER_H_ + +#include "config.h" + +typedef struct +{ + char buffer[MAX_PAYLOAD]; // This MUST be the first member or send_reply() will blow up + char *buffer_end; + char *buffer_pointer; +} serialized_buffer_t; + +void serializer_reset_read(serialized_buffer_t *buffer, size_t data_len); + +void serializer_reset_write(serialized_buffer_t *buffer); + +uint16_t serializer_get_uint16(serialized_buffer_t *buffer); + +uint64_t serializer_get_uint64(serialized_buffer_t *buffer); + +char* serializer_get_string(serialized_buffer_t *buffer); + +void serializer_put_uint16(serialized_buffer_t *buffer, uint16_t value); + +void serializer_put_uint64(serialized_buffer_t *buffer, uint64_t value); + +void serializer_put_string(serialized_buffer_t *buffer, char *value); + +ssize_t serializer_get_written_length(serialized_buffer_t *buffer); + +#endif diff --git a/src/server/ipc.c b/src/server/ipc.c index 917d6e5..5da811c 100644 --- a/src/server/ipc.c +++ b/src/server/ipc.c @@ -34,16 +34,17 @@ #include <libxml/xpath.h> #include "ipc.h" -#include "config.h" +#include "../config.h" #include "server.h" #include "utils.h" +#include "memlog.h" void* dnbd3_ipc_receive() { GSList *iterator = NULL; struct tm * timeinfo; - char time_buff[64]; + char time_buff[64], rid[20], ipaddr[100]; dnbd3_ipc_t header; int server_sock, client_sock; @@ -118,7 +119,7 @@ void* dnbd3_ipc_receive() grp = getgrnam(UNIX_SOCKET_GROUP); if (grp == NULL) { - printf("WARN: Group '%s' not found.\n", UNIX_SOCKET_GROUP); + memlogf("WARN: Group '%s' not found.\n", UNIX_SOCKET_GROUP); } else { @@ -129,9 +130,12 @@ void* dnbd3_ipc_receive() while (1) { - int i = 0, size = 0; + int size; char* buf; xmlDocPtr doc; + xmlNodePtr root_node, images_node, clients_node, tmp_node, log_parent_node, log_node; + xmlChar *xmlbuff; + int buffersize; // Accept connection if ((client_sock = accept(server_sock, &client, &len)) < 0) @@ -151,7 +155,7 @@ void* dnbd3_ipc_receive() switch (header.cmd) { case IPC_EXIT: - printf("INFO: Server shutdown...\n"); + memlogf("INFO: Server shutdown...\n"); header.size = ntohl(0); header.error = ntohl(0); send(client_sock, (char *) &header, sizeof(header), MSG_WAITALL); @@ -161,21 +165,13 @@ void* dnbd3_ipc_receive() break; case IPC_RELOAD: - printf("INFO: Reloading configuration...\n"); - dnbd3_reload_config(_config_file_name); - header.size = ntohl(0); - header.error = ntohl(0); + header.size = ntohl(0); + header.error = ntohl(ERROR_UNKNOWN); send(client_sock, (char *) &header, sizeof(header), MSG_WAITALL); close(client_sock); break; case IPC_INFO: - pthread_spin_lock(&_spinlock); - - xmlNodePtr root_node, images_node, clients_node, tmp_node; - xmlChar *xmlbuff; - int buffersize; - doc = xmlNewDoc(BAD_CAST "1.0"); root_node = xmlNewNode(NULL, BAD_CAST "info"); xmlDocSetRootElement(doc, root_node); @@ -183,38 +179,46 @@ void* dnbd3_ipc_receive() // Images images_node = xmlNewNode(NULL, BAD_CAST "images"); xmlAddChild(root_node, images_node); - for (i = 0; i < _num_images; i++) - { - char vid[20], rid[20]; - sprintf(vid,"%d",_images[i].vid); - sprintf(rid,"%d",_images[i].rid); - timeinfo = localtime(&_images[i].atime); + pthread_spin_lock(&_spinlock); + for (iterator = _dnbd3_images; iterator; iterator = iterator->next) + { + const dnbd3_image_t *image = iterator->data; + sprintf(rid,"%d",image->rid); + timeinfo = localtime(&image->atime); strftime(time_buff,64,"%d.%m.%y %H:%M:%S",timeinfo); tmp_node = xmlNewNode(NULL, BAD_CAST "image"); - xmlNewProp(tmp_node, BAD_CAST "group", BAD_CAST _images[i].group); + xmlNewProp(tmp_node, BAD_CAST "name", BAD_CAST image->name); xmlNewProp(tmp_node, BAD_CAST "atime", BAD_CAST time_buff); - xmlNewProp(tmp_node, BAD_CAST "vid", BAD_CAST vid); xmlNewProp(tmp_node, BAD_CAST "rid", BAD_CAST rid); - xmlNewProp(tmp_node, BAD_CAST "file", BAD_CAST _images[i].file); - xmlNewProp(tmp_node, BAD_CAST "servers", BAD_CAST _images[i].serverss); - xmlNewProp(tmp_node, BAD_CAST "cache", BAD_CAST _images[i].cache_file); + xmlNewProp(tmp_node, BAD_CAST "file", BAD_CAST image->file); + xmlNewProp(tmp_node, BAD_CAST "servers", BAD_CAST "???"); + xmlNewProp(tmp_node, BAD_CAST "cache", BAD_CAST image->cache_file); xmlAddChild(images_node, tmp_node); } - // Clients clients_node = xmlNewNode(NULL, BAD_CAST "clients"); - xmlAddChild(root_node, clients_node); + log_node = xmlAddChild(root_node, clients_node); for (iterator = _dnbd3_clients; iterator; iterator = iterator->next) { dnbd3_client_t *client = iterator->data; if (client->image) { tmp_node = xmlNewNode(NULL, BAD_CAST "client"); - xmlNewProp(tmp_node, BAD_CAST "ip", BAD_CAST client->ip); + *ipaddr = '\0'; + inet_ntop(client->addrtype, client->ipaddr, ipaddr, 100); + xmlNewProp(tmp_node, BAD_CAST "ip", BAD_CAST ipaddr); xmlNewProp(tmp_node, BAD_CAST "file", BAD_CAST client->image->file); xmlAddChild(clients_node, tmp_node); } } + pthread_spin_unlock(&_spinlock); + + // Log + log_parent_node = xmlNewChild(root_node, NULL, BAD_CAST "log", NULL); + char *log = fetchlog(0); + if (log == NULL) log = "LOG IS NULL"; + log_node = xmlNewCDataBlock(doc, BAD_CAST log, strlen(log)); + xmlAddChild(log_parent_node, log_node); // Dump and send xmlDocDumpFormatMemory(doc, &xmlbuff, &buffersize, 1); @@ -224,10 +228,10 @@ void* dnbd3_ipc_receive() send(client_sock, (char *) xmlbuff, buffersize, MSG_WAITALL); // Cleanup - pthread_spin_unlock(&_spinlock); close(client_sock); xmlFree(xmlbuff); xmlFreeDoc(doc); + free(log); break; case IPC_ADDIMG: @@ -257,11 +261,10 @@ void* dnbd3_ipc_receive() if(cur->type == XML_ELEMENT_NODE) { dnbd3_image_t image; - image.group = (char *) xmlGetNoNsProp(cur, BAD_CAST "group"); - image.vid = atoi((char *) xmlGetNoNsProp(cur, BAD_CAST "vid")); + memset(&image, 0, sizeof(dnbd3_image_t)); + image.name = (char *) xmlGetNoNsProp(cur, BAD_CAST "name"); image.rid = atoi((char *) xmlGetNoNsProp(cur, BAD_CAST "rid")); image.file = (char *) xmlGetNoNsProp(cur, BAD_CAST "file"); - image.serverss = (char *) xmlGetNoNsProp(cur, BAD_CAST "servers"); image.cache_file = (char *) xmlGetNoNsProp(cur, BAD_CAST "cache"); header.error = htonl(dnbd3_add_image(&image, _config_file_name)); } @@ -308,11 +311,10 @@ void* dnbd3_ipc_receive() if(cur->type == XML_ELEMENT_NODE) { dnbd3_image_t image; - image.group = (char *) xmlGetNoNsProp(cur, BAD_CAST "group"); - image.vid = atoi((char *) xmlGetNoNsProp(cur, BAD_CAST "vid")); + memset(&image, 0, sizeof(dnbd3_image_t)); + image.name = (char *) xmlGetNoNsProp(cur, BAD_CAST "name"); image.rid = atoi((char *) xmlGetNoNsProp(cur, BAD_CAST "rid")); image.file = (char *) xmlGetNoNsProp(cur, BAD_CAST "file"); - image.serverss = (char *) xmlGetNoNsProp(cur, BAD_CAST "servers"); image.cache_file = (char *) xmlGetNoNsProp(cur, BAD_CAST "cache"); header.error = htonl(dnbd3_del_image(&image, _config_file_name)); } @@ -333,7 +335,7 @@ void* dnbd3_ipc_receive() break; default: - printf("ERROR: Unknown command: %i\n", header.cmd); + memlogf("ERROR: Unknown command: %i\n", header.cmd); header.size = htonl(0); header.error = htonl(ERROR_UNKNOWN); send(client_sock, (char *) &header, sizeof(header), MSG_WAITALL); @@ -343,6 +345,7 @@ void* dnbd3_ipc_receive() } } close(server_sock); + pthread_exit((void *) 0); } void dnbd3_ipc_send(int cmd) @@ -414,11 +417,21 @@ void dnbd3_ipc_send(int cmd) xmlNodeSetPtr nodes; xmlNodePtr cur; + // Print log + xpathExpr = BAD_CAST "/info/log"; + xpathCtx = xmlXPathNewContext(doc); + xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx); + if (xpathObj->nodesetval && xpathObj->nodesetval->nodeTab && xpathObj->nodesetval->nodeTab[0]) { + printf("--- Last log lines ----\n%s\n\n", xmlNodeGetContent(xpathObj->nodesetval->nodeTab[0])); + } + xmlXPathFreeObject(xpathObj); + xmlXPathFreeContext(xpathCtx); + // Print images xpathExpr = BAD_CAST "/info/images/image"; xpathCtx = xmlXPathNewContext(doc); xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx); - printf("Exported images (atime, vid, rid, file):\n"); + printf("Exported images (atime, name, rid, file):\n"); printf("========================================\n"); nodes = xpathObj->nodesetval; n = (nodes) ? nodes->nodeNr : 0; @@ -428,7 +441,7 @@ void dnbd3_ipc_send(int cmd) { cur = nodes->nodeTab[i]; xmlChar *atime = xmlGetNoNsProp(cur, BAD_CAST "atime"); - xmlChar *vid = xmlGetNoNsProp(cur, BAD_CAST "vid"); + xmlChar *vid = xmlGetNoNsProp(cur, BAD_CAST "name"); xmlChar *rid = xmlGetNoNsProp(cur, BAD_CAST "rid"); xmlChar *file = xmlGetNoNsProp(cur, BAD_CAST "file"); printf("%s\t%s\t%s\t%s\n", atime, vid, rid, file); diff --git a/src/server/net.c b/src/server/net.c index cd93c0d..9ae168a 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -34,6 +34,83 @@ #include "server.h" #include "utils.h" +#include "memlog.h" +#include "../serialize.h" +#include "../config.h" + + +static char recv_request_header(int sock, dnbd3_request_t *request) +{ + // Read request heade from socket + if (recv(sock, request, sizeof(dnbd3_request_t), MSG_WAITALL) != sizeof(dnbd3_request_t)) + { + printf("[DEBUG] Error receiving request: Could not read message header\n"); + return 0; + } + // Make sure all bytes are in the right order (endianness) + fixup_request(*request); + if (request->magic != dnbd3_packet_magic) + { + printf("[DEBUG] Magic in client request incorrect\n"); + return 0; + } + // Payload sanity check + if (request->size > MAX_PAYLOAD) + { + memlogf("[WARNING] Client tries to send a packet of type %d with %d bytes payload. Dropping client.", (int)request->cmd, (int)request->size); + return 0; + } + return 1; +} + +static char recv_request_payload(int sock, uint32_t size, serialized_buffer_t *payload) +{ + if (size == 0) + { + memlogf("[BUG] Called recv_request_payload() to receive 0 bytes"); + return 0; + } + if (size > MAX_PAYLOAD) + { + memlogf("[BUG] Called recv_request_payload() for more bytes than the passed buffer could hold!"); + return 0; + } + if (recv(sock, payload->buffer, size, MSG_WAITALL) != size) + { + printf("[ERROR] Could not receive request payload of length %d\n", (int)size); + return 0; + } + // Prepare payload buffer for reading + serializer_reset_read(payload, size); + return 1; +} + +static char send_reply(int sock, dnbd3_reply_t *reply, void *payload) +{ + fixup_reply(*reply); + if (!payload || reply->size == 0) + { + if (send(sock, reply, sizeof(dnbd3_reply_t), MSG_WAITALL) != sizeof(dnbd3_reply_t)) + { + printf("[DEBUG] Send failed (header-only)\n"); + return 0; + } + } + else + { + struct iovec iov[2]; + iov[0].iov_base = reply; + iov[0].iov_len = sizeof(dnbd3_reply_t); + iov[1].iov_base = payload; + iov[1].iov_len = reply->size; + if (writev(sock, iov, 2) != sizeof(dnbd3_reply_t) + reply->size) + { + printf("[DEBUG] Send failed (reply with payload of %d bytes)\n", (int)reply->size); + return 0; + } + } + return 1; +} void *dnbd3_handle_query(void *dnbd3_client) { @@ -41,87 +118,146 @@ void *dnbd3_handle_query(void *dnbd3_client) dnbd3_request_t request; dnbd3_reply_t reply; - int cork = 1; - int uncork = 0; + const int cork = 1; + const int uncork = 0; dnbd3_image_t *image = NULL; - int image_file, image_cache = -1; + int image_file = -1, image_cache = -1; - struct in_addr alt_server; - int i = 0; + int i, num; uint64_t map_y; char map_x, bit_mask; + serialized_buffer_t payload; + char *image_name; + uint16_t rid, client_version; uint64_t todo_size = 0; uint64_t todo_offset = 0; uint64_t cur_offset = 0; uint64_t last_offset = 0; + dnbd3_server_entry_t server_list[NUMBER_SERVERS]; + int dirty = 0; - while (recv(client->sock, &request, sizeof(dnbd3_request_t), MSG_WAITALL) > 0) + reply.magic = dnbd3_packet_magic; + + // Receive first packet. This must be CMD_GET_SIZE by protocol specification + if (recv_request_header(client->sock, &request)) + { + if (request.cmd != CMD_GET_SIZE) + { + printf("[DEBUG] Client sent invalid handshake (%d). Dropping Client\n", (int)request.cmd); + } + else + { + if (recv_request_payload(client->sock, request.size, &payload)) + { + client_version = serializer_get_uint16(&payload); + image_name = serializer_get_string(&payload); + rid = serializer_get_uint16(&payload); + if (request.size < 3 || !image_name || client_version < MIN_SUPPORTED_CLIENT) + { + if (client_version < MIN_SUPPORTED_CLIENT) + { + printf("[DEBUG] Client too old\n"); + } + else + { + printf("[DEBUG] Incomplete handshake received\n"); + } + } + else + { + pthread_spin_lock(&_spinlock); + image = dnbd3_get_image(image_name, rid, 0); + if (!image) + { + printf("[DEBUG] Client requested non-existent image '%s'\n", image_name); + } + else + { + serializer_put_uint16(&payload, PROTOCOL_VERSION); + serializer_put_string(&payload, image->low_name); + serializer_put_uint16(&payload, image->rid); + serializer_put_uint64(&payload, image->filesize); + reply.cmd = CMD_GET_SIZE; + reply.size = serializer_get_written_length(&payload); + if (!send_reply(client->sock, &reply, &payload)) + { + image = NULL; + } + else + { + image_file = open(image->file, O_RDONLY); + if (image_file == -1) + { + image = NULL; + } + else + { + client->image = image; + image->atime = time(NULL); // TODO: check if mutex is needed + + if (image->cache_map && image->cache_file) + image_cache = open(image->cache_file, O_RDWR); + } + } + } + pthread_spin_unlock(&_spinlock); + } + } + } + } + + if (image) while (recv_request_header(client->sock, &request)) { - reply.cmd = request.cmd; - reply.size = 0; - memcpy(reply.handle, request.handle, sizeof(request.handle)); - pthread_spin_lock(&client->spinlock); switch (request.cmd) { - case CMD_GET_SERVERS: - image = dnbd3_get_image(request.vid, request.rid); - if(!image) - goto error; - - int num = (image->num_servers < NUMBER_SERVERS) ? image->num_servers : NUMBER_SERVERS; - reply.vid = image->vid; - reply.rid = dnbd3_get_image(request.vid, 0)->rid; - reply.size = num * sizeof(struct in_addr); - send(client->sock, (char *) &reply, sizeof(dnbd3_reply_t), 0); - - for (i = 0; i < num; i++) - { - inet_aton(image->servers[i], &alt_server); - send(client->sock, (char *) &alt_server, sizeof(struct in_addr), 0); - } - client->image = image; - image->atime = time(NULL); // TODO: check if mutex is needed - break; - - case CMD_GET_SIZE: - image = dnbd3_get_image(request.vid, request.rid); - if(!image) - goto error; - - reply.vid = image->vid; - reply.rid = image->rid; - reply.size = sizeof(uint64_t); - send(client->sock, (char *) &reply, sizeof(dnbd3_reply_t), 0); - - send(client->sock, &image->filesize, sizeof(uint64_t), 0); - image_file = open(image->file, O_RDONLY); - client->image = image; - image->atime = time(NULL); // TODO: check if mutex is needed - - if (image->cache_file) - image_cache = open(image->cache_file, O_RDWR); - - break; case CMD_GET_BLOCK: - if (image_file < 0) - goto error; + if (request.offset >= image->filesize) + { // Sanity check + memlogf("[WARNING] Client requested non-existent block"); + reply.size = 0; + reply.cmd = CMD_ERROR; + send_reply(client->sock, &reply, NULL); + break; + } + if (request.offset + request.size > image->filesize) + { // Sanity check + memlogf("[WARNING] Client requested data block that extends beyond image size"); + reply.size = 0; + reply.cmd = CMD_ERROR; + send_reply(client->sock, &reply, NULL); + break; + } + if (request.size > image->filesize) + { // Sanity check + memlogf("[WARNING] Client requested data block that is bigger than the image size"); + reply.size = 0; + reply.cmd = CMD_ERROR; + send_reply(client->sock, &reply, NULL); + break; + } + // TODO: Try MSG_MORE instead of cork+uncork if performance ever becomes an issue.. setsockopt(client->sock, SOL_TCP, TCP_CORK, &cork, sizeof(cork)); + reply.cmd = CMD_GET_BLOCK; reply.size = request.size; - send(client->sock, (char *) &reply, sizeof(dnbd3_reply_t), 0); + reply.handle = request.handle; + send_reply(client->sock, &reply, NULL); // caching is off - if (!image->cache_file) + if (image_cache == -1) { - if (sendfile(client->sock, image_file, (off_t *) &request.offset, request.size) < 0) - printf("ERROR: Sendfile failed (sock)\n"); + if (sendfile(client->sock, image_file, (off_t *)&request.offset, request.size) != request.size) + { + printf("[ERROR] sendfile failed (image to net)\n"); + close(client->sock); + } setsockopt(client->sock, SOL_TCP, TCP_CORK, &uncork, sizeof(uncork)); break; @@ -134,10 +270,11 @@ void *dnbd3_handle_query(void *dnbd3_client) cur_offset = request.offset; last_offset = request.offset + request.size; + // first make sure the whole requested part is in the local cache file while(cur_offset < last_offset) { map_y = cur_offset >> 15; - map_x = (cur_offset >> 12) & 7; // mod 8 + map_x = (cur_offset >> 12) & 7; // mod 256 bit_mask = 0b00000001 << (map_x); cur_offset += 4096; @@ -147,8 +284,15 @@ void *dnbd3_handle_query(void *dnbd3_client) if (todo_size != 0) // fetch missing chunks { lseek(image_cache, todo_offset, SEEK_SET); - if (sendfile(image_cache, image_file, (off_t *) &todo_offset, todo_size) < 0) - printf("ERROR: Sendfile failed (cache)\n"); + if (sendfile(image_cache, image_file, (off_t *) &todo_offset, todo_size) != todo_size) + { + printf("[ERROR] sendfile failed (copy to cache 1)\n"); + close(client->sock); + // Reset these so we don't update the cache map with false information + dirty = 0; + todo_size = 0; + break; + } todo_size = 0; dirty = 1; } @@ -164,13 +308,16 @@ void *dnbd3_handle_query(void *dnbd3_client) if (todo_size != 0) { lseek(image_cache, todo_offset, SEEK_SET); - if (sendfile(image_cache, image_file, (off_t *) &todo_offset, todo_size) < 0) - printf("ERROR: Sendfile failed (cache)\n"); - + if (sendfile(image_cache, image_file, (off_t *) &todo_offset, todo_size) != todo_size) + { + printf("[ERROR] sendfile failed (copy to cache 2)\n"); + close(client->sock); + break; + } dirty = 1; } - if (dirty) + if (dirty) // cache map needs to be updated as something was missing locally { // set 1 in cache map for whole request cur_offset = request.offset; @@ -185,35 +332,42 @@ void *dnbd3_handle_query(void *dnbd3_client) } // send data to client - if (sendfile(client->sock, image_cache, (off_t *) &request.offset, request.size) < 0) - printf("ERROR: Sendfile failed (net)\n"); + if (sendfile(client->sock, image_cache, (off_t *) &request.offset, request.size) != request.size) + { + memlogf("[ERROR] sendfile failed (cache to net)\n"); + close(client->sock); + } setsockopt(client->sock, SOL_TCP, TCP_CORK, &uncork, sizeof(uncork)); break; + + case CMD_GET_SERVERS: + // Build list of known working alt servers + num = 0; + for (i = 0; i < NUMBER_SERVERS; i++) + { + if (image->servers[i].addrtype == 0 || image->servers[i].failures > 200) continue; + memcpy(server_list + num++, image->servers + i, sizeof(dnbd3_server_entry_t)); + } + reply.cmd = CMD_GET_SERVERS; + reply.size = num * sizeof(dnbd3_server_entry_t); + send_reply(client->sock, &reply, server_list); + break; + default: - printf("ERROR: Unknown command\n"); + memlogf("ERROR: Unknown command\n"); break; } - pthread_spin_unlock(&client->spinlock); - continue; - - error: - printf("ERROR: Client requested an unknown image id.\n"); - send(client->sock, (char *) &reply, sizeof(dnbd3_reply_t), 0); - pthread_spin_unlock(&client->spinlock); - continue; - } close(client->sock); - close(image_file); - close(image_cache); + if (image_file != -1) close(image_file); + if (image_cache != -1) close(image_cache); pthread_spin_lock(&_spinlock); _dnbd3_clients = g_slist_remove(_dnbd3_clients, client); pthread_spin_unlock(&_spinlock); - printf("INFO: Client %s exit\n", client->ip); free(client); pthread_exit((void *) 0); } @@ -227,7 +381,7 @@ int dnbd3_setup_socket() sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); if (sock < 0) { - printf("ERROR: Socket failure\n"); + memlogf("ERROR: Socket setup failure\n"); return -1; } @@ -239,14 +393,14 @@ int dnbd3_setup_socket() // Bind to socket if (bind(sock, (struct sockaddr*) &server, sizeof(server)) < 0) { - printf("ERROR: Bind failure\n"); + memlogf("ERROR: Bind failure\n"); return -1; } // Listen on socket if (listen(sock, 100) == -1) { - printf("ERROR: Listen failure\n"); + memlogf("ERROR: Listen failure\n"); return -1; } diff --git a/src/server/serialize.c b/src/server/serialize.c new file mode 100644 index 0000000..4934132 --- /dev/null +++ b/src/server/serialize.c @@ -0,0 +1,5 @@ +#include <stdio.h> +#include <string.h> +#include <stdint.h> + +#include "../serialize.c" diff --git a/src/server/server.c b/src/server/server.c index 371d27a..0a206d1 100644 --- a/src/server/server.c +++ b/src/server/server.c @@ -34,14 +34,14 @@ #include "utils.h" #include "net.h" #include "ipc.h" +#include "memlog.h" int _sock; pthread_spinlock_t _spinlock; GSList *_dnbd3_clients = NULL; char *_config_file_name = DEFAULT_SERVER_CONFIG_FILE; -dnbd3_image_t *_images; -size_t _num_images = 0; +GSList *_dnbd3_images; // of dnbd3_image_t void dnbd3_print_help(char* argv_0) { @@ -65,8 +65,10 @@ void dnbd3_print_version() void dnbd3_cleanup() { - int i, fd; - printf("INFO: Cleanup...\n"); + int fd; + memlogf("INFO: Cleanup...\n"); + + close(_sock); pthread_spin_lock(&_spinlock); GSList *iterator = NULL; @@ -74,39 +76,38 @@ void dnbd3_cleanup() { dnbd3_client_t *client = iterator->data; shutdown(client->sock, SHUT_RDWR); - pthread_join(*client->thread, NULL); + pthread_join(client->thread, NULL); + g_free(client); } g_slist_free(_dnbd3_clients); - for (i = 0; i < _num_images; i++) + for (iterator = _dnbd3_images; iterator; iterator = iterator->next) { // save cache maps to files - if (_images[i].cache_file) + dnbd3_image_t *image = iterator->data; + if (image->cache_file) { - char tmp[strlen(_images[i].cache_file)+4]; - strcpy(tmp, _images[i].cache_file); + char tmp[strlen(image->cache_file)+4]; + strcpy(tmp, image->cache_file); strcat(tmp, ".map"); fd = open(tmp, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR); if (fd > 0) - write(fd, _images[i].cache_map, (_images[i].filesize >> 15) * sizeof(char)); + write(fd, image->cache_map, ((image->filesize + (1 << 15) - 1) >> 15) * sizeof(char)); close(fd); } - free(_images[i].group); - free(_images[i].file); - free(_images[i].servers); - free(_images[i].serverss); - free(_images[i].cache_file); - free(_images[i].cache_map); + free(image->name); + g_free(image->file); + g_free(image->cache_file); + free(image->cache_map); + g_free(image); } + g_slist_free(_dnbd3_images); pthread_spin_unlock(&_spinlock); - - close(_sock); - free(_images); #ifndef IPC_TCP unlink(UNIX_SOCKET); #endif @@ -161,6 +162,7 @@ int main(int argc, char* argv[]) break; case '?': dnbd3_print_help(argv[0]); + break; } opt = getopt_long(argc, argv, optString, longOpts, &longIndex); } @@ -168,6 +170,10 @@ int main(int argc, char* argv[]) if (demonize) daemon(1, 0); + pthread_spin_init(&_spinlock, PTHREAD_PROCESS_PRIVATE); + + initmemlog(); + // load config file dnbd3_load_config(_config_file_name); @@ -191,9 +197,7 @@ int main(int argc, char* argv[]) pthread_t thread_ipc; pthread_create(&(thread_ipc), NULL, dnbd3_ipc_receive, NULL); - pthread_spin_init(&_spinlock, PTHREAD_PROCESS_PRIVATE); - - printf("INFO: Server is ready...\n"); + memlogf("[INFO] Server is ready..."); // main loop while (1) @@ -201,28 +205,45 @@ int main(int argc, char* argv[]) fd = accept(_sock, (struct sockaddr*) &client, &len); if (fd < 0) { - printf("ERROR: Accept failure\n"); + memlogf("[ERROR] Accept failure"); continue; } - printf("INFO: Client %s connected\n", inet_ntoa(client.sin_addr)); + //memlogf("INFO: Client %s connected\n", inet_ntoa(client.sin_addr)); setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout, sizeof(timeout)); setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (char *) &timeout, sizeof(timeout)); - pthread_t thread; - dnbd3_client_t *dnbd3_client = (dnbd3_client_t *) malloc(sizeof(dnbd3_client_t)); - pthread_spin_init(&dnbd3_client->spinlock, PTHREAD_PROCESS_PRIVATE); - strcpy(dnbd3_client->ip, inet_ntoa(client.sin_addr)); + dnbd3_client_t *dnbd3_client = g_new0(dnbd3_client_t, 1); + if (dnbd3_client == NULL) + { + memlogf("[ERROR] Could not alloc dnbd3_client_t for new client."); + close(fd); + continue; + } + // TODO: Extend this if you ever want to add IPv6 (something like:) + // dnbd3_client->addrtype = AF_INET6; + // memcpy(dnbd3_client->ipaddr, &(client.sin6_addr), 16); + dnbd3_client->addrtype = AF_INET; + memcpy(dnbd3_client->ipaddr, &(client.sin_addr), 4); dnbd3_client->sock = fd; - dnbd3_client->thread = &thread; dnbd3_client->image = NULL; + // This has to be done before creating the thread, otherwise a race condition might occur when the new thread dies faster than this thread adds the client to the list after creating the thread pthread_spin_lock(&_spinlock); _dnbd3_clients = g_slist_append(_dnbd3_clients, dnbd3_client); pthread_spin_unlock(&_spinlock); - pthread_create(&(thread), NULL, dnbd3_handle_query, (void *) (uintptr_t) dnbd3_client); - pthread_detach(thread); + if (0 != pthread_create(&(dnbd3_client->thread), NULL, dnbd3_handle_query, (void *) (uintptr_t) dnbd3_client)) + { + memlogf("[ERROR] Could not start thread for new client."); + pthread_spin_lock(&_spinlock); + _dnbd3_clients = g_slist_remove(_dnbd3_clients, dnbd3_client); + pthread_spin_unlock(&_spinlock); + g_free(dnbd3_client); + close(fd); + continue; + } + pthread_detach(dnbd3_client->thread); } dnbd3_cleanup(); diff --git a/src/server/server.h b/src/server/server.h index f499acc..69d597b 100644 --- a/src/server/server.h +++ b/src/server/server.h @@ -30,33 +30,31 @@ typedef struct { - char *group; - char *file; - uint64_t filesize; - size_t num_servers; - char **servers; - char *serverss; - int vid; - int rid; - time_t atime; - char *cache_map; - char *cache_file; + char *name; // full name of image, eg. "uni-freiburg.ubuntu-12.04" + char *low_name; // full name of image, lowercased for comparison + int rid; // revision of provided image + char *file; // path to image file or device + uint64_t filesize; // size of image + dnbd3_server_entry_t servers[NUMBER_SERVERS]; // known alt servers that also offer that image + time_t atime; // last access time + uint8_t *cache_map; // cache map telling which parts are locally cached + char *cache_file; // path to local cache of image (in case the image is read from a dnbd3 device) + char working; // whether this image is considered working. local images are "working" if the local file exists, proxied images have to have at least one working upstream server or a complete local cache file } dnbd3_image_t; typedef struct { int sock; - char ip[16]; - pthread_t *thread; + uint8_t ipaddr[16]; + uint8_t addrtype; // ip version (AF_INET or AF_INET6) + pthread_t thread; dnbd3_image_t *image; - pthread_spinlock_t spinlock; } dnbd3_client_t; -extern GSList *_dnbd3_clients; +extern GSList *_dnbd3_clients; // of dnbd3_client_t extern pthread_spinlock_t _spinlock; extern char *_config_file_name; -extern dnbd3_image_t *_images; -extern size_t _num_images; +extern GSList *_dnbd3_images; // of dnbd3_image_t void dnbd3_cleanup(); diff --git a/src/server/utils.c b/src/server/utils.c index 0be1569..15e51f8 100644 --- a/src/server/utils.c +++ b/src/server/utils.c @@ -23,14 +23,124 @@ #include <sys/stat.h> #include <pthread.h> #include <string.h> +#include <glib.h> +#include <netinet/in.h> +#include <arpa/inet.h> #include "server.h" #include "utils.h" +#include "memlog.h" + +/** + * Parse IPv4 or IPv6 address in string representation to a suitable format usable by the BSD socket library + * @string eg. "1.2.3.4" or "2a01::10:5", optially with port appended, eg "1.2.3.4:6666" or "2a01::10:5:6666" + * @af will contain either AF_INET or AF_INET6 + * @addr will contain the address in network representation + * @port will contain the port in network representation, defaulting to #define PORT if none was given + * returns 1 on success, 0 in failure. contents of af, addr and port are undefined in the latter case + */ +static char parse_address(char *string, uint8_t *af, uint8_t *addr, uint16_t *port) +{ + struct in_addr v4; + struct in6_addr v6; + + // Try IPv4 without port + if (1 == inet_pton(AF_INET, string, &v4)) + { + *af = AF_INET; + memcpy(addr, &v4, 4); + *port = htons(PORT); + return 1; + } + // Try IPv6 without port + if (1 == inet_pton(AF_INET6, string, &v6)) + { + *af = AF_INET6; + memcpy(addr, &v6, 16); + *port = htons(PORT); + return 1; + } + + // Scan for port + char *portpos = NULL, *ptr = string; + while (*ptr) + { + if (*ptr == ':') portpos = ptr; + ++ptr; + } + if (portpos == NULL) return 0; // No port in string + // Consider IP being surrounded by [ ] + if (*string == '[' && *(portpos-1) == ']') + { + ++string; + *(portpos-1) = '\0'; + } + *portpos++ = '\0'; + int p = atoi(portpos); + if (p < 1 || p > 65535) return 0; // Invalid port + *port = htons((uint16_t)p); + + // Try IPv4 with port + if (1 == inet_pton(AF_INET, string, &v4)) + { + *af = AF_INET; + memcpy(addr, &v4, 4); + return 1; + } + // Try IPv6 with port + if (1 == inet_pton(AF_INET6, string, &v6)) + { + *af = AF_INET6; + memcpy(addr, &v6, 16); + return 1; + } + + // FAIL + return 0; +} + +static char is_valid_namespace(char *namespace) +{ + if (*namespace == '\0' || *namespace == '/') return 0; // Invalid: Length = 0 or starting with a slash + while (*namespace) + { + if (*namespace != '/' && *namespace != '-' + && (*namespace < 'a' || *namespace > 'z') + && (*namespace < 'A' || *namespace > 'Z')) return 0; + ++namespace; + } + if (*(namespace - 1) == '/') return 0; // Invalid: Ends in a slash + return 1; +} + +static char is_valid_imagename(char *namespace) +{ + if (*namespace == '\0' || *namespace == ' ') return 0; // Invalid: Length = 0 or starting with a space + while (*namespace) + { // Check for invalid chars + if (*namespace != '.' && *namespace != '-' && *namespace != ' ' + && *namespace != '(' && *namespace != ')' + && (*namespace < 'a' || *namespace > 'z') + && (*namespace < 'A' || *namespace > 'Z')) return 0; + ++namespace; + } + if (*(namespace - 1) == ' ') return 0; // Invalid: Ends in a space + return 1; +} + +static void strtolower(char *string) +{ + while (*string) + { + if (*string >= 'A' && *string <= 'Z') *string += 32; + ++string; + } +} void dnbd3_load_config(char *file) { int fd; - gint i; + gint i, j, k; GKeyFile* gkf; gkf = g_key_file_new(); @@ -40,111 +150,209 @@ void dnbd3_load_config(char *file) exit(EXIT_FAILURE); } + char *namespace = g_key_file_get_string(gkf, "settings", "default_namespace", NULL); + if (namespace && !is_valid_namespace(namespace)) + { + memlogf("[ERROR] Ignoring default namespace: '%s' is not a valid namespace", namespace); + g_free(namespace); + namespace = NULL; + } + gchar **groups = NULL; - groups = g_key_file_get_groups(gkf, &_num_images); - _images = calloc(_num_images, sizeof(dnbd3_image_t)); + gsize section_count; + groups = g_key_file_get_groups(gkf, §ion_count); - for (i = 0; i < _num_images; i++) + for (i = 0; i < section_count; i++) { - _images[i].group = malloc(strlen(groups[i])); - strcpy(_images[i].group, groups[i]); - _images[i].file = g_key_file_get_string(gkf, groups[i], "file", NULL); - _images[i].servers = g_key_file_get_string_list(gkf, groups[i], "servers", &_images[i].num_servers, NULL); - _images[i].serverss = g_key_file_get_string(gkf, groups[i], "servers", NULL); - _images[i].vid = g_key_file_get_integer(gkf, groups[i], "vid", NULL); - _images[i].rid = g_key_file_get_integer(gkf, groups[i], "rid", NULL); - _images[i].cache_file = g_key_file_get_string(gkf, groups[i], "cache", NULL); - _images[i].atime = 0; - - if (_images[i].num_servers > NUMBER_SERVERS) - printf("WARN: Max allowed servers %i\n", NUMBER_SERVERS); - - fd = open(_images[i].file, O_RDONLY); - if (fd > 0) - _images[i].filesize = lseek(fd, 0, SEEK_END); + // Special group + if (strcmp(groups[i], "settings") == 0 || strcmp(groups[i], "trusted") == 0) + { + continue; + } + + // An actual image definition + + if (!is_valid_imagename(groups[i])) + { + memlogf("[ERROR] Invalid image name: '%s'", groups[i]); + continue; + } + + int rid = g_key_file_get_integer(gkf, groups[i], "rid", NULL); + if (rid <= 0) + { + memlogf("[ERROR] Invalid rid '%d' for image '%s'", rid, groups[i]); + continue; + } + + if (strchr(groups[i], '.') == NULL && namespace == NULL) + { + memlogf("[ERROR] Image '%s' has local name and no default namespace is defined; entry ignored.", groups[i]); + continue; + } + + dnbd3_image_t *image = g_new0(dnbd3_image_t, 1); + if (image == NULL) + { + memlogf("[ERROR] Could not allocate dnbd3_image_t while reading config"); + continue; + } + + if (strchr(groups[i], '/') == NULL) + { // Local image, build global name + image->name = calloc(strlen(namespace) + strlen(groups[i]) + 2, sizeof(char)); + sprintf(image->name, "%s/%s", namespace, groups[i]); + } else - printf("ERROR: Image file not found: %s\n", _images[i].file); - - close(fd); - - if (_images[i].cache_file) { - // read cache map from file - _images[i].cache_map = calloc(_images[i].filesize >> 15, sizeof(char)); - memset(_images[i].cache_map, 0, (_images[i].filesize >> 15) * sizeof(char)); - char tmp[strlen(_images[i].cache_file)+4]; - strcpy(tmp, _images[i].cache_file); - strcat(tmp, ".map"); - fd = open(tmp, O_RDONLY); - if (fd > 0) - read(fd, _images[i].cache_map, (_images[i].filesize >> 15) * sizeof(char)); - close(fd); - - // open cache file - fd = open(_images[i].cache_file, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR); - if (fd < 1) - printf("ERROR: Could't create cache file\n"); - - if (_images[i].filesize != lseek(fd, 0, SEEK_END)) - fallocate(fd, 0, 0, _images[i].filesize); - - close(fd); + image->name = strdup(groups[i]); } - } - g_strfreev(groups); - g_key_file_free(gkf); -} - -void dnbd3_reload_config(char* config_file_name) -{ - int i, fd; - pthread_spin_lock(&_spinlock); - GSList *iterator = NULL; - for (iterator = _dnbd3_clients; iterator; iterator = iterator->next) - { - dnbd3_client_t *client = iterator->data; - pthread_spin_lock(&client->spinlock); - client->image = NULL; - } - - for (i = 0; i < _num_images; i++) - { - // save cache maps - if (_images[i].cache_file) + if (dnbd3_get_image(image->name, rid, 0)) + { + memlogf("[ERROR] Duplicate image in config: '%s' rid:%d", image->name, rid); + free(image->name); + g_free(image); + continue; + } + + image->low_name = strdup(image->name); + strtolower(image->low_name); + + image->rid = rid; + image->file = g_key_file_get_string(gkf, groups[i], "file", NULL); + char relayed = image->file == NULL || *image->file == '\0'; + if (relayed && image->file) + { + g_free(image->file); + image->file = NULL; + } + + if (relayed) // Image is relayed (this server acts as proxy) + { + if (strchr(groups[i], '.') == NULL) + { + memlogf("[ERROR] Relayed image without global name in config: '%s'", groups[i]); + g_free(image); + continue; + } + image->cache_file = g_key_file_get_string(gkf, groups[i], "cache", NULL); + if (image->cache_file && *image->cache_file == '\0') g_free(image->cache_file); + } + else // Image is a local one, open file to get size { - char tmp[strlen(_images[i].cache_file)+4]; - strcpy(tmp, _images[i].cache_file); - strcat(tmp, ".map"); - fd = open(tmp, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR); - - if (fd > 0) - write(fd, _images[i].cache_map, (_images[i].filesize >> 15) * sizeof(char)); + fd = open(image->file, O_RDONLY); + if (fd > 0) { + image->filesize = lseek(fd, 0, SEEK_END); + if (image->filesize & 4095) { + memlogf("[WARNING] Size of image '%s' is not a multiple of 4096. Last incomplete block will be ignored!", image->file); + image->filesize &= ~(uint64_t)4095; + } + close(fd); + image->working = 1; + } else { + memlogf("[ERROR] Image file not found: '%s'", image->file); + } + } - close(fd); + // A list of servers that are known to also host or relay this image + gsize num_servers; + gchar **servers = g_key_file_get_string_list(gkf, groups[i], "servers", &num_servers, NULL); + if (servers) for (k = 0, j = 0; j < MIN(num_servers, NUMBER_SERVERS); ++j) + { + if (parse_address(servers[j], &(image->servers[k].addrtype), image->servers[k].ipaddr, &(image->servers[k].port))) + { + ++k; continue; + } + image->servers[k].addrtype = 0; } + g_strfreev(servers); - free(_images[i].group); - free(_images[i].file); - free(_images[i].servers); - free(_images[i].serverss); - free(_images[i].cache_file); - free(_images[i].cache_map); + if (image->cache_file) + { + // Determine size of cached image + fd = open(image->cache_file, O_RDONLY); + if (fd > 0) + { + image->filesize = lseek(fd, 0, SEEK_END); + close(fd); + } + if (image->filesize & 4095) + { // Cache files should always be trincated to 4kib boundaries already + memlogf("[WARNING] Size of cache file '%s' is not a multiple of 4096. Something's fishy!", image->cache_file); + image->filesize = 0; + } + else if (image->filesize > 0) + { + const size_t map_len_bytes = (image->filesize + (1 << 15) - 1) >> 15; + image->cache_map = calloc(map_len_bytes, sizeof(uint8_t)); + // read cache map from file + // one byte in the map covers 8 4kib blocks, so 32kib per byte + // "+ (1 << 15) - 1" is required to account for the last bit of + // the image that is smaller than 32kib + // this would be the case whenever the image file size is not a + // multiple of 32kib (= the number of blocks is not dividable by 8) + // ie: if the image is 49152 bytes and you do 49152 >> 15 you get 1, + // but you actually need 2 bytes to have a complete cache map + char tmp[strlen(image->cache_file)+4]; + strcpy(tmp, image->cache_file); + strcat(tmp, ".map"); + fd = open(tmp, O_RDONLY); + if (fd > 0) + { + read(fd, image->cache_map, map_len_bytes * sizeof(uint8_t)); + close(fd); + // If the whole image is cached, mark it as working right away without waiting for an upstream server + image->working = 1; + for (j = 0; j < map_len_bytes - 1; ++j) + { + if (image->cache_map[j] != 0xFF) + { + image->working = 0; + break; + } + } + const int blocks_in_last_byte = (image->filesize >> 12) & 7; + uint8_t last_byte = 0; + if (blocks_in_last_byte == 0) + last_byte = 0xFF; + else + for (j = 0; j < k; ++j) last_byte = (last_byte << 1) | 1; + if ((image->cache_map[map_len_bytes-1] & last_byte) != last_byte) + image->working = 0; + else + memlogf("[INFO] Publishing relayed image '%s' because the local cache copy is complete", image->name); + } + + /* + // TODO: Do this as soon as a connection to a upstream server is established + // open cache file + fd = open(_images[i].cache_file, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR); + if (fd < 1) + memlogf("ERROR: Could't create cache file '%s'", _images[i].cache_file); + + if (_images[i].filesize != lseek(fd, 0, SEEK_END)) + fallocate(fd, 0, 0, _images[i].filesize); + + close(fd); + */ + } + } // end cache_file handling + pthread_spin_lock(&_spinlock); + _dnbd3_images = g_slist_append(_dnbd3_images, image); + pthread_spin_unlock(&_spinlock); + // DONE IMAGE } - _num_images = 0; - free(_images); - dnbd3_load_config(config_file_name); - for (iterator = _dnbd3_clients; iterator; iterator = iterator->next) - { - dnbd3_client_t *client = iterator->data; - pthread_spin_unlock(&client->spinlock); - } - pthread_spin_unlock(&_spinlock); + g_free(namespace); + g_strfreev(groups); + g_key_file_free(gkf); } int dnbd3_add_image(dnbd3_image_t *image, char *file) { + return ERROR_IMAGE_ALREADY_EXISTS; // TODO: Make it work with image names + /* FILE* f = fopen(image->file,"r"); if (f == NULL) { @@ -198,10 +406,13 @@ int dnbd3_add_image(dnbd3_image_t *image, char *file) printf("ERROR: Config file is not writable: %s\n", file); return ERROR_CONFIG_FILE_PERMISSIONS; } + */ } int dnbd3_del_image(dnbd3_image_t *image, char *file) { + return ERROR_IMAGE_NOT_FOUND; // TODO: Make it work with image names + /* if (image->rid == 0) { printf("ERROR: Delete with rid=0 is not allowed\n"); @@ -254,38 +465,47 @@ int dnbd3_del_image(dnbd3_image_t *image, char *file) printf("ERROR: Config file is not writable: %s\n", file); return ERROR_CONFIG_FILE_PERMISSIONS; } + */ } -dnbd3_image_t* dnbd3_get_image(int vid, int rid) +dnbd3_image_t* dnbd3_get_image(char *name_orig, int rid, const char do_lock) { - int i, max = 0; - dnbd3_image_t *result = NULL; - for (i = 0; i < _num_images; ++i) + dnbd3_image_t *result = NULL, *image; + GSList *iterator; + char name[strlen(name_orig) + 1]; + strcpy(name, name_orig); + strtolower(name); + if (do_lock) pthread_spin_lock(&_spinlock); + for (iterator = _dnbd3_images; iterator; iterator = iterator->next) { + image = iterator->data; if (rid != 0) // rid was specified { - if (_images[i].vid == vid && _images[i].rid == rid) - result = &_images[i]; + if (image->rid == rid && strcmp(name, image->low_name) == 0) + { + result = image; + break; + } } else // search max. rid available { - if (_images[i].vid == vid && _images[i].rid > max) + if (strcmp(name, image->low_name) == 0 && (result == NULL || result->rid < image->rid)) { - result = &_images[i]; - max = _images[i].rid; + result = image; } } } + if (do_lock) pthread_spin_unlock(&_spinlock); return result; } void dnbd3_handle_sigpipe(int signum) { - printf("ERROR: SIGPIPE received!\n"); + memlogf("ERROR: SIGPIPE received!\n"); } void dnbd3_handle_sigterm(int signum) { - printf("INFO: SIGTERM or SIGINT received!\n"); + memlogf("INFO: SIGTERM or SIGINT received!\n"); dnbd3_cleanup(); } diff --git a/src/server/utils.h b/src/server/utils.h index d9d3ebc..ec83e64 100644 --- a/src/server/utils.h +++ b/src/server/utils.h @@ -35,11 +35,10 @@ #define ERROR_UNKNOWN 10 void dnbd3_load_config(char *file); -void dnbd3_reload_config(char* config_file_name); int dnbd3_add_image(dnbd3_image_t *image, char *file); int dnbd3_del_image(dnbd3_image_t *image, char *file); -dnbd3_image_t* dnbd3_get_image(int vid, int rid); +dnbd3_image_t* dnbd3_get_image(char *name, int rid, const char do_lock); void dnbd3_handle_sigpipe(int signum); void dnbd3_handle_sigterm(int signum); diff --git a/src/types.h b/src/types.h index 04dcf85..5332dc4 100644 --- a/src/types.h +++ b/src/types.h @@ -29,10 +29,41 @@ #define IOCTL_CLOSE _IO(0xab, 2) #define IOCTL_SWITCH _IO(0xab, 3) +#if defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && __BYTE_ORDER == __BIG_ENDIAN) || (defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +const uint16_t dnbd3_packet_magic = (0x73 << 8) | (0x72); +// Flip bytes around on big endian when putting stuff on the net +#define net_order_64(a) ((uint64_t)((((a) & 0xFFull) << 56) | (((a) & 0xFF00ull) << 40) | (((a) & 0xFF0000ull) << 24) | (((a) & 0xFF000000ull) << 8) | (((a) & 0xFF00000000ull) >> 8) | (((a) & 0xFF0000000000ull) >> 24) | (((a) & 0xFF000000000000ull) >> 40) | (((a) & 0xFF00000000000000ull) >> 56))) +#define net_order_32(a) ((uint32_t)((((a) & (uint32_t)0xFF) << 24) | (((a) & (uint32_t)0xFF00) << 8) | (((a) & (uint32_t)0xFF0000) >> 8) | (((a) & (uint32_t)0xFF000000) >> 24))) +#define net_order_16(a) ((uint16_t)((((a) & (uint16_t)0xFF) << 8) | (((a) & (uint16_t)0xFF00) >> 8))) +#define fixup_request(a) do { \ + (a).cmd = net_order_16((a).cmd); \ + (a).size = net_order_32((a).size); \ + (a).offset = net_order_64((a).offset); \ +} while (0) +#define fixup_reply(a) do { \ + (a).cmd = net_order_16((a).cmd); \ + (a).size = net_order_32((a).size); \ +} while (0) +#elif defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN) || (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +static const uint16_t dnbd3_packet_magic = (0x73) | (0x72 << 8); +// Make little endian our network byte order as probably 99.999% of machines this will be used on are LE +#define net_order_64(a) (a) +#define net_order_32(a) (a) +#define net_order_16(a) (a) +#define fixup_request(a) while(0) +#define fixup_reply(a) while(0) +#else +#error "Unknown Endianness" +#endif + typedef struct { - char *host; - int vid; + uint16_t len; + uint8_t addrtype; + uint8_t addr[16]; // network representation + uint16_t port; // network representation + uint16_t imgnamelen; + char *imgname; int rid; int read_ahead_kb; } dnbd3_ioctl_t; @@ -41,28 +72,37 @@ typedef struct #define CMD_GET_BLOCK 1 #define CMD_GET_SIZE 2 #define CMD_GET_SERVERS 3 +#define CMD_ERROR 4 #pragma pack(1) typedef struct { + uint16_t magic; // 2byte uint16_t cmd; // 2byte - uint16_t vid; // 2byte - uint16_t rid; // 2byte - uint64_t offset; // 8byte - uint64_t size; // 8byte - char handle[8]; // 8byte + uint32_t size; // 4byte + uint64_t offset; // 8byte + uint64_t handle; // 8byte } dnbd3_request_t; #pragma pack(0) #pragma pack(1) typedef struct { - uint16_t cmd; // 2byte - uint16_t vid; // 2byte - uint16_t rid; // 2byte - uint64_t size; // 8byte - char handle[8]; // 8byte + uint16_t magic; // 2byte + uint16_t cmd; // 2byte + uint32_t size; // 4byte + uint64_t handle; // 8byte } dnbd3_reply_t; #pragma pack(0) +#pragma pack(1) +typedef struct +{ + uint8_t ipaddr[16]; // 16byte (network representation, so it can be directly passed to socket functions) + uint16_t port; // 2byte (network representation, so it can be directly passed to socket functions) + uint8_t addrtype; // 1byte (ip version. AF_INET or AF_INET6. 0 means this struct is empty and should be ignored) + uint8_t failures; // 1byte (number of times server has been consecutively unreachable) +} dnbd3_server_entry_t; +#pragma pack(0) + #endif /* TYPES_H_ */ |