summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt3
-rw-r--r--Kbuild.in2
-rw-r--r--server.conf.example28
-rw-r--r--src/client/client.c64
-rw-r--r--src/config.h27
-rw-r--r--src/kernel/blk.c134
-rw-r--r--src/kernel/blk.h8
-rw-r--r--src/kernel/dnbd3.h31
-rw-r--r--src/kernel/net.c740
-rw-r--r--src/kernel/net.h13
-rw-r--r--src/kernel/serialize_kmod.c4
-rw-r--r--src/kernel/sysfs.c23
-rw-r--r--src/serialize.c74
-rw-r--r--src/serialize.h31
-rw-r--r--src/server/ipc.c91
-rw-r--r--src/server/net.c314
-rw-r--r--src/server/serialize.c5
-rw-r--r--src/server/server.c83
-rw-r--r--src/server/server.h32
-rw-r--r--src/server/utils.c420
-rw-r--r--src/server/utils.h3
-rw-r--r--src/types.h64
22 files changed, 1561 insertions, 633 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fa2fd4b..1c44769 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -47,9 +47,10 @@ TARGET_LINK_LIBRARIES(dnbd3-server ${CMAKE_THREAD_LIBS_INIT} ${GLIB_LDFLAGS} ${L
# MODULE #
################################################################################
+
SET(MODULE_NAME dnbd3)
SET(MODULE_FILE ${MODULE_NAME}.ko)
-FILE(GLOB MODULE_SOURCE_FILES src/kernel/*.c)
+FILE(GLOB MODULE_SOURCE_FILES src/kernel/*.c src/serialize.c)
FILE(GLOB MODULE_HEADER_FILES src/kernel/*.h src/*.h)
SET(KERNEL_DIR "/lib/modules/${CMAKE_SYSTEM_VERSION}/build")
diff --git a/Kbuild.in b/Kbuild.in
index 6c60037..667cee0 100644
--- a/Kbuild.in
+++ b/Kbuild.in
@@ -1,2 +1,2 @@
obj-m := ${MODULE_NAME}.o
-${MODULE_NAME}-objs += core.o blk.o net.o sysfs.o utils.o \ No newline at end of file
+${MODULE_NAME}-objs += core.o blk.o net.o sysfs.o utils.o serialize_kmod.o \ No newline at end of file
diff --git a/server.conf.example b/server.conf.example
index d04aea0..464a870 100644
--- a/server.conf.example
+++ b/server.conf.example
@@ -1,25 +1,9 @@
# This is a sample configuration file for dnbd3-server
-[Ubuntu 10.04]
-file=/home/jjl/Data/ISOs/Ubuntu/ubuntu-10.04.3-desktop-i386.iso
-servers=132.230.4.29;132.230.4.220;132.230.8.96
-vid=1
-rid=3
-
-[Ubuntu 10.10]
-file=/home/jjl/Data/ISOs/Ubuntu/ubuntu-10.10-desktop-i386.iso
-servers=132.230.4.29;132.230.4.220;132.230.8.96
-vid=2
-rid=1
+[settings]
+default_namespace=uni-freiburg/rz/bunker
-[Ubuntu 11.04]
-file=/home/jjl/Data/ISOs/Ubuntu/ubuntu-11.04-desktop-i386.iso
-servers=132.230.4.29;132.230.4.220;132.230.8.96
-vid=3
-rid=1
-
-[Ubuntu 11.10]
-file=/home/jjl/Data/ISOs/Ubuntu/ubuntu-11.10-desktop-i386.iso
-servers=132.230.4.29;132.230.4.220;132.230.8.96
-vid=4
-rid=1
+[eclipse-cdt linux (tar.gz)]
+file=/home/sr/Downloads/eclipse-cpp-juno-linux-gtk-x86_64.tar.gz
+servers=127.0.0.20:1234;132.230.4.58;2000::dead:beef
+rid=3
diff --git a/src/client/client.c b/src/client/client.c
index 68ed70e..a48cab9 100644
--- a/src/client/client.c
+++ b/src/client/client.c
@@ -28,6 +28,7 @@
#include <glib.h>
#include <netdb.h>
#include <arpa/inet.h>
+#include <string.h>
#include "../types.h"
#include "../version.h"
@@ -37,11 +38,11 @@ char *_config_file_name = DEFAULT_CLIENT_CONFIG_FILE;
void dnbd3_print_help(char* argv_0)
{
printf("\nUsage: %s\n"
- "\t-h <host> -v <vid> [-r <rid>] -d <device> [-a <KB>] || -f <file> || -c <device>\n\n", argv_0);
+ "\t-h <host> -i <image name> [-r <rid>] -d <device> [-a <KB>] || -f <file> || -c <device>\n\n", argv_0);
printf("Start the DNBD3 client.\n");
printf("-f or --file \t\t Configuration file (default /etc/dnbd3-client.conf)\n");
printf("-h or --host \t\t Host running dnbd3-server.\n");
- printf("-v or --vid \t\t Volume-ID of exported image.\n");
+ printf("-i or --image \t\t Image name of exported image.\n");
printf("-r or --rid \t\t Release-ID of exported image (default 0, latest).\n");
printf("-d or --device \t\t DNBD3 device name.\n");
printf("-a or --ahead \t\t Read ahead in KByte (default %i).\n", DEFAULT_READ_AHEAD_KB);
@@ -58,7 +59,7 @@ void dnbd3_print_version()
exit(EXIT_SUCCESS);
}
-char* dnbd3_get_ip(char* hostname)
+static void dnbd3_get_ip(char* hostname, uint8_t *target, uint8_t *addrtype)
{
struct hostent *host;
@@ -68,7 +69,16 @@ char* dnbd3_get_ip(char* hostname)
exit(EXIT_FAILURE);
}
- return inet_ntoa(*((struct in_addr *) host->h_addr));
+ *addrtype = (uint8_t)host->h_addrtype;
+ if (host->h_addrtype == AF_INET)
+ memcpy(target, host->h_addr, 4);
+ else if (host->h_addrtype == AF_INET6)
+ memcpy(target, host->h_addr, 16);
+ else
+ {
+ printf("FATAL: Unknown address type: %d\n", host->h_addrtype);
+ exit(EXIT_FAILURE);
+ }
}
int main(int argc, char *argv[])
@@ -80,19 +90,20 @@ int main(int argc, char *argv[])
int switch_host = 0;
dnbd3_ioctl_t msg;
- msg.host = NULL;
- msg.vid = 0;
- msg.rid = 0;
+ memset(&msg, 0, sizeof(dnbd3_ioctl_t));
+ msg.len = (uint16_t)sizeof(dnbd3_ioctl_t);
msg.read_ahead_kb = DEFAULT_READ_AHEAD_KB;
+ msg.port = htons(PORT);
+ msg.addrtype = 0;
int opt = 0;
int longIndex = 0;
- static const char *optString = "f:h:v:r:d:a:c:s:HV?";
+ static const char *optString = "f:h:i:r:d:a:c:s:HV?";
static const struct option longOpts[] =
{
{ "file", required_argument, NULL, 'f' },
{ "host", required_argument, NULL, 'h' },
- { "vid", required_argument, NULL, 'v' },
+ { "image", required_argument, NULL, 'i' },
{ "rid", required_argument, NULL, 'r' },
{ "device", required_argument, NULL, 'd' },
{ "ahead", required_argument, NULL, 'a' },
@@ -108,29 +119,32 @@ int main(int argc, char *argv[])
switch (opt)
{
case 'f':
- _config_file_name = optarg;
+ _config_file_name = strdup(optarg);
break;
case 'h':
- msg.host = dnbd3_get_ip(optarg);
+ dnbd3_get_ip(optarg, msg.addr, &msg.addrtype);
+ printf("Host set to %s (type %d)\n", optarg, (int)msg.addrtype);
break;
- case 'v':
- msg.vid = atoi(optarg);
+ case 'i':
+ msg.imgname = strdup(optarg);
+ printf("Image: %s\n", msg.imgname);
break;
case 'r':
msg.rid = atoi(optarg);
break;
case 'd':
- dev = optarg;
+ dev = strdup(optarg);
+ printf("Device is %s\n", dev);
break;
case 'a':
msg.read_ahead_kb = atoi(optarg);
break;
case 'c':
- dev = optarg;
+ dev = strdup(optarg);
close_dev = 1;
break;
case 's':
- msg.host = dnbd3_get_ip(optarg);
+ dnbd3_get_ip(optarg, msg.addr, &msg.addrtype);
switch_host = 1;
break;
case 'H':
@@ -141,12 +155,13 @@ int main(int argc, char *argv[])
break;
case '?':
dnbd3_print_help(argv[0]);
+ break;
}
opt = getopt_long(argc, argv, optString, longOpts, &longIndex);
}
// close device
- if (close_dev && !msg.host && dev && (msg.vid == 0))
+ if (close_dev && msg.addrtype == 0 && dev && (msg.imgname == NULL))
{
fd = open(dev, O_WRONLY);
printf("INFO: Closing device %s\n", dev);
@@ -162,10 +177,10 @@ int main(int argc, char *argv[])
}
// switch host
- if (switch_host && msg.host && dev && (msg.vid == 0))
+ if (switch_host && msg.addrtype != 0 && dev && (msg.imgname == NULL))
{
fd = open(dev, O_WRONLY);
- printf("INFO: Switching device %s to %s\n", dev, msg.host);
+ printf("INFO: Switching device %s to %s\n", dev, "<fixme>");
if (ioctl(fd, IOCTL_SWITCH, &msg) < 0)
{
@@ -178,10 +193,11 @@ int main(int argc, char *argv[])
}
// connect
- if (msg.host && dev && (msg.vid != 0))
+ if (msg.addrtype != 0 && dev && (msg.imgname != NULL))
{
+ msg.imgnamelen = (uint16_t)strlen(msg.imgname);
fd = open(dev, O_WRONLY);
- printf("INFO: Connecting %s to %s vid:%i rid:%i\n", dev, msg.host, msg.vid, msg.rid);
+ printf("INFO: Connecting %s to %s (%s rid:%i)\n", dev, "<fixme>", msg.imgname, msg.rid);
if (ioctl(fd, IOCTL_OPEN, &msg) < 0)
{
@@ -207,8 +223,8 @@ int main(int argc, char *argv[])
for (i = 0; i < j; i++)
{
- msg.host = g_key_file_get_string(gkf, groups[i], "server", NULL);
- msg.vid = g_key_file_get_integer(gkf, groups[i], "vid", NULL);
+ dnbd3_get_ip(g_key_file_get_string(gkf, groups[i], "server", NULL), msg.addr, &msg.addrtype);
+ msg.imgname = g_key_file_get_string(gkf, groups[i], "name", NULL);
msg.rid = g_key_file_get_integer(gkf, groups[i], "rid", NULL);
dev = g_key_file_get_string(gkf, groups[i], "device", NULL);
@@ -217,7 +233,7 @@ int main(int argc, char *argv[])
msg.read_ahead_kb = DEFAULT_READ_AHEAD_KB;
fd = open(dev, O_WRONLY);
- printf("INFO: Connecting %s to %s vid:%i rid:%i\n", dev, msg.host, msg.vid, msg.rid);
+ printf("INFO: Connecting %s to %s (%s rid:%i)\n", dev, "<fixme>", msg.imgname, msg.rid);
if (ioctl(fd, IOCTL_OPEN, &msg) < 0)
{
diff --git a/src/config.h b/src/config.h
index be3df5c..78c19c9 100644
--- a/src/config.h
+++ b/src/config.h
@@ -21,25 +21,42 @@
#ifndef CONFIG_H_
#define CONFIG_H_
-// network
+// +++++ Network +++++
+// Default port
#define PORT 5003
#define PORTSTR "5003"
+// Protocol version should be increased whenever new features/messages are added,
+// so either the client or server can run in compatibility mode, or they can
+// cancel the connection right away if the protocol has changed too much
+#define PROTOCOL_VERSION 1
+// Which is the minimum protocol version the server expects from the client
+#define MIN_SUPPORTED_CLIENT 1
+// Which is the minimum protocol version the client expects from the server
+#define MIN_SUPPORTED_SERVER 1
+
+// No payload allowed exceeding this many bytes:
+#define MAX_PAYLOAD 1000
+
#define SOCKET_TIMEOUT_SERVER 30
#define SOCKET_TIMEOUT_CLIENT_DATA 2
#define SOCKET_TIMEOUT_CLIENT_DISCOVERY 1
-#define TIMER_INTERVAL_HEARTBEAT 10*HZ
-#define TIMER_INTERVAL_PANIC 1*HZ
+
#define NUMBER_SERVERS 8
#define RTT_THRESHOLD 1000
+// This must be a power of two:
+#define RTT_BLOCK_SIZE 4096
+
+#define TIMER_INTERVAL_HEARTBEAT 10*HZ
+#define TIMER_INTERVAL_PANIC 2*HZ
-// block device
+// +++++ Block Device +++++
#define KERNEL_SECTOR_SIZE 512
#define DNBD3_BLOCK_SIZE 4096
#define NUMBER_DEVICES 8
#define DEFAULT_READ_AHEAD_KB 256
-// misc
+// +++++ Misc +++++
#define DEFAULT_SERVER_CONFIG_FILE "/etc/dnbd3/server.conf"
#define DEFAULT_CLIENT_CONFIG_FILE "/etc/dnbd3/client.conf"
#define UNIX_SOCKET "/run/dnbd3-server.sock"
diff --git a/src/kernel/blk.c b/src/kernel/blk.c
index 2f52cc3..1177f3a 100644
--- a/src/kernel/blk.c
+++ b/src/kernel/blk.c
@@ -35,12 +35,10 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor)
INIT_LIST_HEAD(&dev->request_queue_send);
INIT_LIST_HEAD(&dev->request_queue_receive);
- memset(dev->cur_server.host, 0, 16);
- memset(dev->cur_server.port, 0, 6);
- dev->cur_server.rtt = 0;
- dev->cur_server.sock = NULL;
+ memset(&dev->cur_server, 0, sizeof(dnbd3_server_t));
+ dev->better_sock = NULL;
- dev->vid = 0;
+ dev->imgname = NULL;
dev->rid = 0;
dev->update_available = 0;
dev->alt_servers_num = 0;
@@ -50,6 +48,8 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor)
dev->thread_discover = NULL;
dev->discover = 0;
dev->panic = 0;
+ dev->panic_count = 0;
+ dev->reported_size = 0;
if (!(disk = alloc_disk(1)))
{
@@ -102,28 +102,60 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u
int result = 0;
dnbd3_device_t *dev = bdev->bd_disk->private_data;
struct request_queue *blk_queue = dev->disk->queue;
+ char *imgname = NULL;
dnbd3_ioctl_t *msg = kmalloc(sizeof(dnbd3_ioctl_t), GFP_KERNEL);
- copy_from_user((char *)msg, (char *)arg, sizeof(*msg));
+
+ if (msg == NULL) return -ENOMEM;
+ copy_from_user((char *)msg, (char *)arg, 2);
+ if (msg->len != sizeof(dnbd3_ioctl_t))
+ {
+ result = -ENOEXEC;
+ goto cleanup_return;
+ }
+ copy_from_user((char *)msg, (char *)arg, sizeof(dnbd3_ioctl_t));
+ if (msg->imgname != NULL && msg->imgnamelen > 0)
+ {
+ imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL);
+ if (imgname == NULL)
+ {
+ result = -ENOMEM;
+ goto cleanup_return;
+ }
+ copy_from_user(imgname, msg->imgname, msg->imgnamelen);
+ imgname[msg->imgnamelen] = '\0';
+ }
switch (cmd)
{
case IOCTL_OPEN:
- strcpy(dev->cur_server.host, msg->host);
- strcpy(dev->cur_server.port, PORTSTR);
- dev->vid = msg->vid;
- dev->rid = msg->rid;
- blk_queue->backing_dev_info.ra_pages = (msg->read_ahead_kb * 1024)/ PAGE_CACHE_SIZE;
- result = dnbd3_net_connect(dev);
+ if (imgname == NULL)
+ {
+ result = -EINVAL;
+ }
+ else
+ {
+ memcpy(dev->cur_server.hostaddr, msg->addr, 16);
+ dev->cur_server.port = msg->port;
+ dev->cur_server.hostaddrtype = msg->addrtype;
+ dev->imgname = imgname;
+ imgname = NULL;
+ dev->rid = msg->rid;
+ blk_queue->backing_dev_info.ra_pages = (msg->read_ahead_kb * 1024) / PAGE_CACHE_SIZE;
+ result = dnbd3_net_connect(dev);
+ }
break;
case IOCTL_CLOSE:
set_capacity(dev->disk, 0);
result = dnbd3_net_disconnect(dev);
+ dnbd3_blk_fail_all_requests(dev);
break;
case IOCTL_SWITCH:
dnbd3_net_disconnect(dev);
- strcpy(dev->cur_server.host, msg->host);
+ memcpy(dev->cur_server.hostaddr, msg->addr, 16);
+ dev->cur_server.port = msg->port;
+ dev->cur_server.hostaddrtype = msg->addrtype;
result = dnbd3_net_connect(dev);
break;
@@ -132,10 +164,12 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u
default:
result = -EIO;
-
+ break;
}
- kfree(msg);
+cleanup_return:
+ if (msg) kfree(msg);
+ if (imgname) kfree(imgname);
return result;
}
@@ -148,18 +182,78 @@ void dnbd3_blk_request(struct request_queue *q)
{
dev = req->rq_disk->private_data;
+ if (dev->cur_server.hostaddrtype == 0)
+ {
+ __blk_end_request_all(req, -EIO);
+ continue;
+ }
+
if (req->cmd_type != REQ_TYPE_FS)
{
__blk_end_request_all(req, 0);
continue;
}
- if (rq_data_dir(req) == READ)
+ if (dev->panic_count >= 20)
+ {
+ __blk_end_request_all(req, -EIO);
+ continue;
+ }
+
+ if (rq_data_dir(req) != READ)
{
- list_add_tail(&req->queuelist, &dev->request_queue_send);
- spin_unlock_irq(q->queue_lock);
- wake_up(&dev->process_queue_send);
- spin_lock_irq(q->queue_lock);
+ __blk_end_request_all(req, -EACCES);
+ continue;
}
+
+ list_add_tail(&req->queuelist, &dev->request_queue_send);
+ spin_unlock_irq(q->queue_lock);
+ wake_up(&dev->process_queue_send);
+ spin_lock_irq(q->queue_lock);
}
}
+
+void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev)
+{
+ struct request *blk_request, *tmp_request;
+ struct request *blk_request2, *tmp_request2;
+ unsigned long flags;
+ struct list_head local_copy;
+ int dup;
+ INIT_LIST_HEAD(&local_copy);
+ spin_lock_irq(&dev->blk_lock);
+ list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist)
+ {
+ list_del_init(&blk_request->queuelist);
+ list_add(&blk_request->queuelist, &local_copy);
+ }
+ list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_send, queuelist)
+ {
+ list_del_init(&blk_request->queuelist);
+ dup = 0;
+ list_for_each_entry_safe(blk_request2, tmp_request2, &local_copy, queuelist)
+ {
+ if (blk_request == blk_request2)
+ {
+ printk("WARNING: Request is in both lists!\n");
+ dup = 1;
+ }
+ }
+ if (!dup) list_add(&blk_request->queuelist, &local_copy);
+ }
+ spin_unlock_irq(&dev->blk_lock);
+ list_for_each_entry_safe(blk_request, tmp_request, &local_copy, queuelist)
+ {
+ list_del_init(&blk_request->queuelist);
+ if (blk_request->cmd_type == REQ_TYPE_FS)
+ {
+ spin_lock_irqsave(&dev->blk_lock, flags);
+ __blk_end_request_all(blk_request, -EIO);
+ spin_unlock_irqrestore(&dev->blk_lock, flags);
+ }
+ else if (blk_request->cmd_type == REQ_TYPE_SPECIAL)
+ {
+ kfree(blk_request);
+ }
+ }
+}
diff --git a/src/kernel/blk.h b/src/kernel/blk.h
index 57d9bfa..28f6f8c 100644
--- a/src/kernel/blk.h
+++ b/src/kernel/blk.h
@@ -23,12 +23,6 @@
#include "dnbd3.h"
-enum
-{
- REQ_GET_SERVERS = 1,
- REQ_GET_FILESIZE = 2,
-};
-
extern struct block_device_operations dnbd3_blk_ops;
int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg);
@@ -39,4 +33,6 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor);
int dnbd3_blk_del_device(dnbd3_device_t *dev);
+void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev);
+
#endif /* BLK_H_ */
diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h
index 0e408dc..dfed0f2 100644
--- a/src/kernel/dnbd3.h
+++ b/src/kernel/dnbd3.h
@@ -29,17 +29,19 @@
#include "config.h"
#include "types.h"
+#include "serialize.h"
extern int major;
typedef struct
{
- char host[16];
- char port[6];
- uint64_t rtt;
- uint64_t rtts[4];
- struct socket *sock;
- struct kobject kobj;
+ uint64_t rtts[4]; // Last four round trip time measurements in µs
+ uint16_t port; // Port in network representation
+ uint16_t protocol_version; // dnbd3 protocol version of this server
+ uint8_t hostaddr[16]; // Address in network representation (IPv4 or IPv6)
+ uint8_t hostaddrtype; // Address type (AF_INET or AF_INET6)
+ uint8_t skip_count; // Do not check this server the next skip_count times
+ struct kobject kobj; // SysFS
} dnbd3_server_t;
typedef struct
@@ -52,11 +54,20 @@ typedef struct
struct kobject kobj;
// network
+ struct socket *sock;
dnbd3_server_t cur_server;
- int vid, rid, update_available;
- int alt_servers_num;
- dnbd3_server_t alt_servers[NUMBER_SERVERS];
- int discover, panic;
+ uint64_t cur_rtt;
+ char *imgname;
+ serialized_buffer_t payload_buffer;
+ int rid, update_available;
+ int alt_servers_num; // number of currently known alt servers
+ dnbd3_server_t alt_servers[NUMBER_SERVERS]; // array of alt servers
+ int new_servers_num; // number of new alt servers that are waiting to be copied to above array
+ dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers
+ int discover, panic, panic_count, disconnecting;
+ uint64_t reported_size;
+ // server switch
+ struct socket *better_sock;
// process
struct timer_list hb_timer;
diff --git a/src/kernel/net.c b/src/kernel/net.c
index 840282f..4d33842 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -21,87 +21,169 @@
#include "net.h"
#include "blk.h"
#include "utils.h"
+#include "serialize.h"
#include <linux/time.h>
+#ifndef MIN
+#define MIN(a,b) (a < b ? a : b)
+#endif
+
int dnbd3_net_connect(dnbd3_device_t *dev)
{
struct sockaddr_in sin;
- struct request *req0 = kmalloc(sizeof(struct request), GFP_ATOMIC);
- struct request *req1 = kmalloc(sizeof(struct request), GFP_ATOMIC);
+ struct request *req1 = kmalloc(sizeof(*req1), GFP_ATOMIC);
struct timeval timeout;
timeout.tv_sec = SOCKET_TIMEOUT_CLIENT_DATA;
timeout.tv_usec = 0;
// do some checks before connecting
- if (!req0 || !req1)
+ if (!req1)
{
- printk("FATAL: Kmalloc failed.\n");
- memset(dev->cur_server.host, '\0', sizeof(dev->cur_server.host));
- return -1;
+ printk("FATAL: Kmalloc(1) failed.\n");
+ goto error;
}
- if (!dev->cur_server.host || !dev->cur_server.port || (dev->vid == 0))
+ if (dev->cur_server.port == 0 || dev->cur_server.hostaddrtype == 0 || dev->imgname == NULL)
{
- printk("FATAL: Host, port or vid not set.\n");
- memset(dev->cur_server.host, '\0', sizeof(dev->cur_server.host));
- return -1;
+ printk("FATAL: Host, port or image name not set.\n");
+ goto error;
}
- if (dev->cur_server.sock)
+ if (dev->sock)
{
- printk("ERROR: Device %s is already connected to %s.\n", dev->disk->disk_name, dev->cur_server.host);
- return -1;
+ if (dev->cur_server.hostaddrtype == AF_INET)
+ printk("ERROR: Device %s is already connected to %pI4 : %d.\n", dev->disk->disk_name, dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port));
+ else
+ printk("ERROR: Device %s is already connected to %pI6 : %d.\n", dev->disk->disk_name, dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port));
+ goto error;
}
- printk("INFO: Connecting device %s to %s\n", dev->disk->disk_name, dev->cur_server.host);
-
- // initialize socket
- if (sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &dev->cur_server.sock) < 0)
+ if (dev->cur_server.hostaddrtype == AF_INET)
+ printk("INFO: Connecting device %s to %pI4 : %d\n", dev->disk->disk_name, dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port));
+ else
{
- printk("ERROR: Couldn't create socket.\n");
- dev->cur_server.sock = NULL;
- memset(dev->cur_server.host, '\0', sizeof(dev->cur_server.host));
- return -1;
+ printk("ERROR: Cannot connect to %pI6 - IPv6 not yet implemented.\n", dev->cur_server.hostaddr);
+ //printk("INFO: Connecting device %s to %pI6 : %d\n", dev->disk->disk_name, dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port));
+ goto error;
+ }
+
+ if (dev->better_sock == NULL)
+ { // no established connection yet from discovery thread, start new one
+ dnbd3_request_t dnbd3_request;
+ dnbd3_reply_t dnbd3_reply;
+ struct msghdr msg;
+ struct kvec iov[2];
+ uint16_t rid;
+ char *name;
+ init_msghdr(msg);
+ if (sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &dev->sock) < 0)
+ {
+ printk("ERROR: Couldn't create socket.\n");
+ goto error;
+ }
+ kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &timeout, sizeof(timeout));
+ kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout, sizeof(timeout));
+ dev->sock->sk->sk_allocation = GFP_NOIO;
+ sin.sin_family = AF_INET;
+ memcpy(&(sin.sin_addr.s_addr), dev->cur_server.hostaddr, 4);
+ sin.sin_port = dev->cur_server.port;
+ if (kernel_connect(dev->sock, (struct sockaddr *) &sin, sizeof(sin), 0) < 0)
+ {
+ printk("ERROR: Couldn't connect to host %pI4 : %d\n", dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port));
+ goto error;
+ }
+ // Request filesize
+ dnbd3_request.magic = dnbd3_packet_magic;
+ dnbd3_request.cmd = CMD_GET_SIZE;
+ dnbd3_request.size = strlen(dev->imgname) + 1 + 2 + 2; // str+\0, version, rid
+ fixup_request(dnbd3_request);
+ iov[0].iov_base = &dnbd3_request;
+ iov[0].iov_len = sizeof(dnbd3_request);
+ serializer_reset_write(&dev->payload_buffer);
+ serializer_put_uint16(&dev->payload_buffer, PROTOCOL_VERSION);
+ serializer_put_string(&dev->payload_buffer, dev->imgname);
+ serializer_put_uint16(&dev->payload_buffer, dev->rid);
+ iov[1].iov_base = &dev->payload_buffer;
+ iov[1].iov_len = serializer_get_written_length(&dev->payload_buffer);
+ if (kernel_sendmsg(dev->sock, &msg, iov, 2, sizeof(dnbd3_request) + iov[1].iov_len) <= 0)
+ goto error;
+ // receive reply header
+ iov[0].iov_base = &dnbd3_reply;
+ iov[0].iov_len = sizeof(dnbd3_reply);
+ if (kernel_recvmsg(dev->sock, &msg, iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply) || dnbd3_reply.cmd != CMD_GET_SIZE || dnbd3_reply.size < 3 || dnbd3_reply.size > MAX_PAYLOAD || dnbd3_reply.magic != dnbd3_packet_magic)
+ {
+ printk("FATAL: Requested image does not exist on server.\n");
+ goto error;
+ }
+ // receive reply payload
+ iov[0].iov_base = &dev->payload_buffer;
+ iov[0].iov_len = dnbd3_reply.size;
+ if (kernel_recvmsg(dev->sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != dnbd3_reply.size)
+ {
+ printk("FATAL: Cold not read CMD_GET_SIZE payload on handshake.\n");
+ goto error;
+ }
+ // read reply payload
+ dev->cur_server.protocol_version = serializer_get_uint16(&dev->payload_buffer);
+ if (dev->cur_server.protocol_version < MIN_SUPPORTED_SERVER)
+ {
+ printk("FATAL: Server version is lower than min supported version.\n");
+ goto error;
+ }
+ name = serializer_get_string(&dev->payload_buffer);
+ if (dev->rid != 0 && strcmp(name, dev->imgname) != 0)
+ {
+ printk("FATAL: Server provides different image than asked for.\n");
+ goto error;
+ }
+ if (strlen(dev->imgname) < strlen(name))
+ {
+ dev->imgname = krealloc(dev->imgname, strlen(name) + 1, GFP_ATOMIC);
+ if (dev->imgname == NULL)
+ {
+ printk("FATAL: Reallocating buffer for new image name failed");
+ goto error;
+ }
+ }
+ strcpy(dev->imgname, name);
+ rid = serializer_get_uint16(&dev->payload_buffer);
+ if (dev->rid != 0 && dev->rid != rid)
+ {
+ printk("FATAL: Server provides different rid of image than asked for.\n");
+ goto error;
+ }
+ dev->rid = rid;
+ dev->reported_size = serializer_get_uint64(&dev->payload_buffer);
+ // store image information
+ set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte blocks */
+ printk("INFO: Filesize of %s: %llu\n", dev->disk->disk_name, dev->reported_size);
}
- kernel_setsockopt(dev->cur_server.sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &timeout, sizeof(timeout));
- kernel_setsockopt(dev->cur_server.sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout, sizeof(timeout));
- dev->cur_server.sock->sk->sk_allocation = GFP_NOIO;
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = inet_addr(dev->cur_server.host);
- sin.sin_port = htons(simple_strtol(dev->cur_server.port, NULL, 10));
- if (kernel_connect(dev->cur_server.sock, (struct sockaddr *) &sin, sizeof(sin), 0) < 0)
+ else // Switching server, connection is already established and size request was executed
{
- printk("ERROR: Couldn't connect to host %s:%s\n", dev->cur_server.host, dev->cur_server.port);
- dev->cur_server.sock = NULL;
- memset(dev->cur_server.host, '\0', sizeof(dev->cur_server.host));
- return -1;
+ printk("INFO: On-the-fly server change\n");
+ dev->sock = dev->better_sock;
+ dev->better_sock = NULL;
+ kernel_setsockopt(dev->sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &timeout, sizeof(timeout));
+ kernel_setsockopt(dev->sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout, sizeof(timeout));
}
dev->panic = 0;
+ dev->panic_count = 0;
dev->alt_servers_num = 0;
dev->update_available = 0;
-
// enqueue request to request_queue_send (ask alt servers)
req1->cmd_type = REQ_TYPE_SPECIAL;
- req1->cmd_flags = REQ_GET_SERVERS;
+ req1->cmd_flags = CMD_GET_SERVERS;
list_add(&req1->queuelist, &dev->request_queue_send);
- // enqueue request to request_queue_send (ask file size)
- req0->cmd_type = REQ_TYPE_SPECIAL;
- req0->cmd_flags = REQ_GET_FILESIZE;
- list_add(&req0->queuelist, &dev->request_queue_send);
-
- // start sending thread
+ // create required threads
dev->thread_send = kthread_create(dnbd3_net_send, dev, dev->disk->disk_name);
- wake_up_process(dev->thread_send);
-
- // start receiving thread
dev->thread_receive = kthread_create(dnbd3_net_receive, dev, dev->disk->disk_name);
- wake_up_process(dev->thread_receive);
-
- // start discover thread
dev->thread_discover = kthread_create(dnbd3_net_discover, dev, dev->disk->disk_name);
+ // start them up
+ wake_up_process(dev->thread_send);
+ wake_up_process(dev->thread_receive);
wake_up_process(dev->thread_discover);
wake_up(&dev->process_queue_send);
@@ -114,15 +196,32 @@ int dnbd3_net_connect(dnbd3_device_t *dev)
add_timer(&dev->hb_timer);
return 0;
+error:
+ if (dev->sock)
+ {
+ sock_release(dev->sock);
+ dev->sock = NULL;
+ }
+ dev->cur_server.hostaddrtype = 0;
+ dev->cur_server.port = 0;
+ if (req1) kfree(req1);
+ return -1;
}
int dnbd3_net_disconnect(dnbd3_device_t *dev)
{
printk("INFO: Disconnecting device %s\n", dev->disk->disk_name);
+ dev->disconnecting = 1;
+
// clear heartbeat timer
if (&dev->hb_timer)
- del_timer(&dev->hb_timer);
+ del_timer(&dev->hb_timer);
+
+ dev->discover = 0;
+
+ if (dev->sock)
+ kernel_sock_shutdown(dev->sock, SHUT_RDWR);
// kill sending and receiving threads
if (dev->thread_send)
@@ -144,12 +243,15 @@ int dnbd3_net_disconnect(dnbd3_device_t *dev)
}
// clear socket
- if (dev->cur_server.sock)
+ if (dev->sock)
{
- sock_release(dev->cur_server.sock);
- dev->cur_server.sock = NULL;
- memset(dev->cur_server.host, '\0', sizeof(dev->cur_server.host));
+ sock_release(dev->sock);
+ dev->sock = NULL;
}
+ dev->cur_server.hostaddrtype = 0;
+ dev->cur_server.port = 0;
+
+ dev->disconnecting = 0;
return 0;
}
@@ -157,19 +259,23 @@ int dnbd3_net_disconnect(dnbd3_device_t *dev)
void dnbd3_net_heartbeat(unsigned long arg)
{
dnbd3_device_t *dev = (dnbd3_device_t *) arg;
- struct request *req = kmalloc(sizeof(struct request), GFP_ATOMIC);
- // send keepalive
- if (req)
- {
- req->cmd_type = REQ_TYPE_SPECIAL;
- req->cmd_flags = REQ_GET_SERVERS;
- list_add_tail(&req->queuelist, &dev->request_queue_send);
- wake_up(&dev->process_queue_send);
- }
- else
+
+ if (!dev->panic)
{
- printk("ERROR: Couldn't create keepalive request\n");
+ struct request *req = kmalloc(sizeof(struct request), GFP_ATOMIC);
+ // send keepalive
+ if (req)
+ {
+ req->cmd_type = REQ_TYPE_SPECIAL;
+ req->cmd_flags = CMD_GET_SERVERS;
+ list_add_tail(&req->queuelist, &dev->request_queue_send);
+ wake_up(&dev->process_queue_send);
+ }
+ else
+ {
+ printk("ERROR: Couldn't create keepalive request\n");
+ }
}
// start discover
@@ -188,20 +294,21 @@ int dnbd3_net_discover(void *data)
{
dnbd3_device_t *dev = data;
struct sockaddr_in sin;
- struct socket *sock;
+ struct socket *sock, *best_sock = NULL;
dnbd3_request_t dnbd3_request;
dnbd3_reply_t dnbd3_reply;
struct msghdr msg;
- struct kvec iov;
+ struct kvec iov[2];
- char *buf;
+ char *buf, *name;
+ serialized_buffer_t *payload;
uint64_t filesize;
- char current_server[16], best_server[16];
+ uint16_t rid;
struct timeval start, end;
- uint64_t t1, t2, best_rtt = 0;
- int i, num = 0;
+ uint64_t rtt, best_rtt = 0;
+ int i, best_server, current_server;
int turn = 0;
int ready = 0;
@@ -217,22 +324,55 @@ int dnbd3_net_discover(void *data)
printk("FATAL: Kmalloc failed (discover)\n");
return -1;
}
+ payload = (serialized_buffer_t*)buf;
- while (!kthread_should_stop())
+ dnbd3_request.magic = dnbd3_packet_magic;
+
+ for (;;)
{
- wait_event_interruptible(dev->process_queue_discover, kthread_should_stop() || dev->discover);
+ wait_event_interruptible(dev->process_queue_discover,
+ kthread_should_stop() || dev->discover);
- if (!&dev->discover)
- continue;
+ if (kthread_should_stop() || dev->imgname == NULL)
+ break;
- num = dev->alt_servers_num;
+ if (!dev->discover)
+ continue;
dev->discover = 0;
- strcpy(best_server, "0.0.0.0");
- best_rtt = -1;
- for (i=0; i < num && i < NUMBER_SERVERS; i++)
+ // Check if the list of alt servers needs to be updated and do so if neccessary
+ spin_lock_irq(&dev->blk_lock);
+ if (dev->new_servers_num)
{
- // initialize socket and connect
+ for (i = 0; i < dev->new_servers_num; ++i)
+ {
+ memcpy(dev->alt_servers[i].hostaddr, dev->new_servers[i].ipaddr, 16);
+ dev->alt_servers[i].hostaddrtype = dev->new_servers[i].addrtype;
+ dev->alt_servers[i].port = dev->new_servers[i].port;
+ memset(dev->alt_servers[i].rtts, 0xFF, sizeof(dev->alt_servers[i].rtts[0]) * 4);
+ dev->alt_servers[i].protocol_version = 0;
+ dev->alt_servers[i].skip_count = 0;
+ }
+ dev->alt_servers_num = dev->new_servers_num;
+ dev->new_servers_num = 0;
+ }
+ spin_unlock_irq(&dev->blk_lock);
+
+ current_server = best_server = -1;
+ best_rtt = 0xFFFFFFFFFFFFull;
+
+ for (i=0; i < dev->alt_servers_num; ++i)
+ {
+ if (dev->alt_servers[i].hostaddrtype != AF_INET) // add IPv6....
+ continue;
+
+ if (!dev->panic && dev->alt_servers[i].skip_count) // If not in panic mode, skip server if indicated
+ {
+ --dev->alt_servers[i].skip_count;
+ continue;
+ }
+
+ // Initialize socket and connect
if (sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock) < 0)
{
printk("ERROR: Couldn't create socket (discover)\n");
@@ -241,49 +381,102 @@ int dnbd3_net_discover(void *data)
}
kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &timeout, sizeof(timeout));
kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout, sizeof(timeout));
- strcpy(current_server, dev->alt_servers[i].host);
sock->sk->sk_allocation = GFP_NOIO;
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = inet_addr(current_server);
- sin.sin_port = htons(simple_strtol(dev->cur_server.port, NULL, 10));
+ sin.sin_family = AF_INET; // add IPv6.....
+ memcpy(&sin.sin_addr.s_addr, dev->alt_servers[i].hostaddr, 4);
+ sin.sin_port = dev->alt_servers[i].port;
if (kernel_connect(sock, (struct sockaddr *) &sin, sizeof(sin), 0) < 0)
{
- printk("ERROR: Couldn't connect to host %s:%s (discover)\n", current_server, dev->cur_server.port);
- dev->alt_servers[i].rtt = -1;
- sock = NULL;
- continue;
+ //printk("ERROR: Couldn't connect to host %s:%s (discover)\n", current_server, dev->cur_server.port);
+ goto error;
}
// Request filesize
dnbd3_request.cmd = CMD_GET_SIZE;
- dnbd3_request.vid = dev->vid;
- dnbd3_request.rid = dev->rid;
- iov.iov_base = &dnbd3_request;
- iov.iov_len = sizeof(dnbd3_request);
- if (kernel_sendmsg(sock, &msg, &iov, 1, sizeof(dnbd3_request)) <= 0)
+ dnbd3_request.size = strlen(dev->imgname) + 1 + 2 + 2; // str+\0, version, rid
+ fixup_request(dnbd3_request);
+ iov[0].iov_base = &dnbd3_request;
+ iov[0].iov_len = sizeof(dnbd3_request);
+ serializer_reset_write(payload);
+ serializer_put_uint16(payload, PROTOCOL_VERSION);
+ serializer_put_string(payload, dev->imgname);
+ serializer_put_uint16(payload, dev->rid);
+ iov[1].iov_base = payload;
+ iov[1].iov_len = serializer_get_written_length(payload);
+ if (kernel_sendmsg(sock, &msg, iov, 2, sizeof(dnbd3_request) + iov[1].iov_len) != sizeof(dnbd3_request) + iov[1].iov_len)
+ {
+ printk("ERROR: Requesting image size failed (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
goto error;
+ }
// receive net reply
- iov.iov_base = &dnbd3_reply;
- iov.iov_len = sizeof(dnbd3_reply);
- if (kernel_recvmsg(sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags) <= 0)
+ iov[0].iov_base = &dnbd3_reply;
+ iov[0].iov_len = sizeof(dnbd3_reply);
+ if (kernel_recvmsg(sock, &msg, iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply))
+ {
+ printk("ERROR: Receiving image size packet (header) failed (%pI4 :%d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
goto error;
+ }
+ fixup_reply(dnbd3_reply);
+ if (dnbd3_reply.magic != dnbd3_packet_magic || dnbd3_reply.cmd != CMD_GET_SIZE || dnbd3_reply.size < 4)
+ {
+ printk("ERROR: Content of image size packet (header) mismatched (%pI4 :%d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
+ goto error;
+ }
// receive data
- iov.iov_base = &filesize;
- iov.iov_len = sizeof(uint64_t);
- if (kernel_recvmsg(sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags) <= 0)
+ iov[0].iov_base = payload;
+ iov[0].iov_len = dnbd3_reply.size;
+ if (kernel_recvmsg(sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != dnbd3_reply.size)
+ {
+ printk("ERROR: Receiving image size packet (payload) failed (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
goto error;
+ }
+ serializer_reset_read(payload, dnbd3_reply.size);
+
+ dev->alt_servers[i].protocol_version = serializer_get_uint16(payload);
+ if (dev->alt_servers[i].protocol_version < MIN_SUPPORTED_SERVER)
+ {
+ printk("ERROR: Server version too old (client: %d, server: %d, min supported: %d) (%pI4 : %d, discover)\n", (int)PROTOCOL_VERSION, (int)dev->alt_servers[i].protocol_version, (int)MIN_SUPPORTED_SERVER, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
+ goto error;
+ }
+
+ name = serializer_get_string(payload);
+ if (name == NULL)
+ {
+ printk("ERROR: Server did not supply an image name (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
+ goto error;
+ }
+ if (strcmp(name, dev->imgname) != 0)
+ {
+ printk("ERROR: Image name does not match requested one (client: '%s', server: '%s') (%pI4 : %d, discover)\n", dev->imgname, name, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
+ goto error;
+ }
+
+ rid = serializer_get_uint16(payload);
+ if (rid != dev->rid)
+ {
+ printk("ERROR: Server supplied wrong rid (client: '%d', server: '%d') (%pI4 : %d, discover)\n", (int)dev->rid, (int)rid, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
+ goto error;
+ }
+
+ filesize = serializer_get_uint64(payload);
+ if (filesize != dev->reported_size)
+ {
+ printk("ERROR: Reported image size of %llu does not match expected value %llu. (%pI4 :%d, discover)\n", (unsigned long long)filesize, (unsigned long long)dev->reported_size, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
+ goto error;
+ }
// panic mode, take first responding server
if (dev->panic)
{
- printk("WARN: Panic mode (%s), taking server %s\n", dev->disk->disk_name, current_server);
- sock_release(sock);
+ printk("WARN: Panic mode (%s), taking server %pI4 : %d\n", dev->disk->disk_name, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
+ if (best_sock != NULL) sock_release(best_sock);
+ dev->better_sock = sock; // Pass over socket to take a shortcut in *_connect();
kfree(buf);
dev->thread_discover = NULL;
dnbd3_net_disconnect(dev);
- strcpy(dev->cur_server.host, current_server);
+ memcpy(&dev->cur_server, &dev->alt_servers[i], sizeof(dev->cur_server));
dnbd3_net_connect(dev);
return 0;
}
@@ -292,81 +485,129 @@ int dnbd3_net_discover(void *data)
// Request block
dnbd3_request.cmd = CMD_GET_BLOCK;
- dnbd3_request.offset = 0; // TODO: take random block
- dnbd3_request.size = 4096;
- iov.iov_base = &dnbd3_request;
- iov.iov_len = sizeof(dnbd3_request);
- if (kernel_sendmsg(sock, &msg, &iov, 1, sizeof(dnbd3_request)) <= 0)
+ dnbd3_request.offset = ((start.tv_usec ^ start.tv_sec) % dev->reported_size) & ~(uint64_t)(RTT_BLOCK_SIZE-1); // Pick random block
+ dnbd3_request.size = RTT_BLOCK_SIZE;
+ fixup_request(dnbd3_request);
+ iov[0].iov_base = &dnbd3_request;
+ iov[0].iov_len = sizeof(dnbd3_request);
+ if (kernel_sendmsg(sock, &msg, iov, 1, sizeof(dnbd3_request)) <= 0)
+ {
+ printk("ERROR: Requesting test block failed (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
goto error;
+ }
- // receive net replay
- iov.iov_base = &dnbd3_reply;
- iov.iov_len = sizeof(dnbd3_reply);
- if (kernel_recvmsg(sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags) <= 0)
+ // receive net reply
+ iov[0].iov_base = &dnbd3_reply;
+ iov[0].iov_len = sizeof(dnbd3_reply);
+ if (kernel_recvmsg(sock, &msg, iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply))
+ {
+ printk("ERROR: Receiving test block header packet failed (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
+ goto error;
+ }
+ fixup_reply(dnbd3_reply);
+ if (dnbd3_reply.cmd != CMD_GET_BLOCK || dnbd3_reply.size != RTT_BLOCK_SIZE)
+ {
+ printk("ERROR: Unexpected reply to block request: cmd=%d, size=%d (%pI4 : %d, discover)\n", (int)dnbd3_reply.cmd, (int)dnbd3_reply.size, dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
goto error;
+ }
// receive data
- iov.iov_base = buf;
- iov.iov_len = 4096;
- if (kernel_recvmsg(sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags) <= 0)
+ iov[0].iov_base = buf;
+ iov[0].iov_len = RTT_BLOCK_SIZE;
+ if (kernel_recvmsg(sock, &msg, iov, 1, dnbd3_reply.size, msg.msg_flags) != RTT_BLOCK_SIZE)
+ {
+ printk("ERROR: Receiving test block payload failed (%pI4 : %d, discover)\n", dev->alt_servers[i].hostaddr, (int)ntohs(dev->alt_servers[i].port));
goto error;
+ }
do_gettimeofday(&end); // end rtt measurement
- // clear socket
- sock_release(sock);
- sock = NULL;
+ dev->alt_servers[i].rtts[turn] =
+ (end.tv_sec - start.tv_sec) * 1000000ull
+ + (end.tv_usec - start.tv_usec);
- t1 = (start.tv_sec*1000000ull) + start.tv_usec;
- t2 = (end.tv_sec*1000000ull) + end.tv_usec;
- dev->alt_servers[i].rtts[turn] = t2 -t1;
+ rtt = ( dev->alt_servers[i].rtts[0]
+ + dev->alt_servers[i].rtts[1]
+ + dev->alt_servers[i].rtts[2]
+ + dev->alt_servers[i].rtts[3] ) / 4;
- dev->alt_servers[i].rtt = ( dev->alt_servers[i].rtts[0]
- +dev->alt_servers[i].rtts[1]
- +dev->alt_servers[i].rtts[2]
- +dev->alt_servers[i].rtts[3] ) / 4;
-
- if (best_rtt > dev->alt_servers[i].rtt)
- {
- best_rtt = dev->alt_servers[i].rtt;
- strcpy(best_server, current_server);
+ if (best_rtt > rtt)
+ { // This one is better, keep socket open in case we switch
+ best_rtt = rtt;
+ best_server = i;
+ if (best_sock != NULL) sock_release(best_sock);
+ best_sock = sock;
+ sock = NULL;
+ }
+ else
+ { // Not better, discard connection
+ sock_release(sock);
+ sock = NULL;
}
// update cur servers rtt
- if (strcmp(dev->cur_server.host, dev->alt_servers[i].host) == 0)
+ if (dev->cur_server.port == dev->alt_servers[i].port && dev->cur_server.hostaddrtype == dev->alt_servers[i].hostaddrtype
+ && (
+ (dev->cur_server.hostaddrtype == AF_INET
+ && memcmp(dev->cur_server.hostaddr, dev->alt_servers[i].hostaddr, 4) == 0)
+ ||
+ (dev->cur_server.hostaddrtype == AF_INET6
+ && memcmp(dev->cur_server.hostaddr, dev->alt_servers[i].hostaddr, 16) == 0)
+ )
+ )
{
- dev->cur_server.rtt = dev->alt_servers[i].rtt;
+ dev->cur_rtt = rtt;
+ current_server = i;
}
continue;
error:
- printk("ERROR: Send/Receive failed, host %s:%s (discover)\n", current_server, dev->cur_server.port);
sock_release(sock);
sock = NULL;
+ dev->alt_servers[i].rtts[turn] = 0xFFFFFFFF;
continue;
}
+ if (dev->panic && ++dev->panic_count == 21)
+ { // After 21 retries, bail out by reporting errors to block layer
+ dnbd3_blk_fail_all_requests(dev);
+ }
- if (!strcmp(best_server, "0.0.0.0") || best_rtt == (uint64_t)-1)
+ if (best_server == -1 || kthread_should_stop()) // No alt server could be reached at all or thread should stop
+ {
+ if (best_sock != NULL) // Should never happen actually
+ {
+ sock_release(best_sock);
+ best_sock = NULL;
+ }
continue;
+ }
// take server with lowest rtt
- if (ready && num > 1 && strcmp(dev->cur_server.host, best_server) && !kthread_should_stop()
- && dev->cur_server.rtt > best_rtt + RTT_THRESHOLD)
+ if (ready && best_server != current_server
+ && dev->cur_rtt > best_rtt + RTT_THRESHOLD)
{
- printk("INFO: Server %s on %s is faster (%lluus)\n", best_server, dev->disk->disk_name, best_rtt);
+ printk("INFO: Server %d on %s is faster (%lluµs)\n", best_server, dev->disk->disk_name, best_rtt);
kfree(buf);
+ dev->better_sock = best_sock; // Take shortcut by continuing to use open connection
dev->thread_discover = NULL;
dnbd3_net_disconnect(dev);
- strcpy(dev->cur_server.host, best_server);
- dev->cur_server.rtt = best_rtt;
+ memcpy(&dev->cur_server, &dev->alt_servers[best_server], sizeof(dev->cur_server));
+ dev->cur_rtt = best_rtt;
dnbd3_net_connect(dev);
return 0;
}
- turn = (turn +1) % 4;
+ // Clean up connection that was held open for quicker server switch
+ if (best_sock != NULL)
+ {
+ sock_release(best_sock);
+ best_sock = NULL;
+ }
+
+ turn = (turn + 1) % 4;
if (turn == 3)
ready = 1;
@@ -386,21 +627,25 @@ int dnbd3_net_send(void *data)
init_msghdr(msg);
- dnbd3_request.vid = dev->vid;
- dnbd3_request.rid = dev->rid;
+ dnbd3_request.magic = dnbd3_packet_magic;
set_user_nice(current, -20);
- while (!kthread_should_stop() || !list_empty(&dev->request_queue_send))
+ for (;;)
{
wait_event_interruptible(dev->process_queue_send,
kthread_should_stop() || !list_empty(&dev->request_queue_send));
- if (list_empty(&dev->request_queue_send))
- continue;
+ if (kthread_should_stop())
+ break;
// extract block request
- spin_lock_irq(&dev->blk_lock);
+ spin_lock_irq(&dev->blk_lock); // TODO: http://www.linuxjournal.com/article/5833 says spin_lock_irq should not be used in general, but article is 10 years old
+ if (list_empty(&dev->request_queue_send))
+ {
+ spin_unlock_irq(&dev->blk_lock);
+ continue;
+ }
blk_request = list_entry(dev->request_queue_send.next, struct request, queuelist);
spin_unlock_irq(&dev->blk_lock);
@@ -411,18 +656,19 @@ int dnbd3_net_send(void *data)
dnbd3_request.cmd = CMD_GET_BLOCK;
dnbd3_request.offset = blk_rq_pos(blk_request) << 9; // *512
dnbd3_request.size = blk_rq_bytes(blk_request); // bytes left to complete entire request
+ // enqueue request to request_queue_receive
+ spin_lock_irq(&dev->blk_lock);
+ list_del_init(&blk_request->queuelist);
+ list_add_tail(&blk_request->queuelist, &dev->request_queue_receive);
+ spin_unlock_irq(&dev->blk_lock);
break;
case REQ_TYPE_SPECIAL:
- switch (blk_request->cmd_flags)
- {
- case REQ_GET_FILESIZE:
- dnbd3_request.cmd = CMD_GET_SIZE;
- break;
- case REQ_GET_SERVERS:
- dnbd3_request.cmd = CMD_GET_SERVERS;
- break;
- }
+ dnbd3_request.cmd = blk_request->cmd_flags;
+ dnbd3_request.size = 0;
+ spin_lock_irq(&dev->blk_lock);
+ list_del_init(&blk_request->queuelist);
+ spin_unlock_irq(&dev->blk_lock);
break;
default:
@@ -434,29 +680,30 @@ int dnbd3_net_send(void *data)
}
// send net request
- memcpy(dnbd3_request.handle, &blk_request, sizeof(blk_request));
+ dnbd3_request.handle = (uint64_t)blk_request;
+ fixup_request(dnbd3_request);
iov.iov_base = &dnbd3_request;
iov.iov_len = sizeof(dnbd3_request);
- if (kernel_sendmsg(dev->cur_server.sock, &msg, &iov, 1, sizeof(dnbd3_request)) <= 0)
+ if (kernel_sendmsg(dev->sock, &msg, &iov, 1, sizeof(dnbd3_request)) != sizeof(dnbd3_request))
goto error;
-
- // enqueue request to request_queue_receive
- spin_lock_irq(&dev->blk_lock);
- list_del_init(&blk_request->queuelist);
- list_add_tail(&blk_request->queuelist, &dev->request_queue_receive);
- spin_unlock_irq(&dev->blk_lock);
wake_up(&dev->process_queue_receive);
}
return 0;
- error:
- printk("ERROR: Connection to server %s lost (send)\n", dev->cur_server.host);
- if (dev->cur_server.sock)
- kernel_sock_shutdown(dev->cur_server.sock, SHUT_RDWR);
- dev->thread_send = NULL;
- dev->panic = 1;
- return -1;
+error:
+ printk("ERROR: Connection to server %pI4 : %d lost (send)\n", dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port));
+ if (dev->sock)
+ kernel_sock_shutdown(dev->sock, SHUT_RDWR);
+ dev->thread_send = NULL;
+ if (!dev->disconnecting)
+ {
+ dev->panic = 1;
+ // start discover
+ dev->discover = 1;
+ wake_up(&dev->process_queue_discover);
+ }
+ return -1;
}
int dnbd3_net_receive(void *data)
@@ -473,59 +720,64 @@ int dnbd3_net_receive(void *data)
unsigned long flags;
sigset_t blocked, oldset;
- unsigned int size, i;
- uint64_t filesize;
- struct in_addr tmp_addr;
+ int count, remaining;
init_msghdr(msg);
set_user_nice(current, -20);
- while (!kthread_should_stop() || !list_empty(&dev->request_queue_receive))
+ for (;;)
{
wait_event_interruptible(dev->process_queue_receive,
kthread_should_stop() || !list_empty(&dev->request_queue_receive));
+ if (kthread_should_stop())
+ break;
+
if (list_empty(&dev->request_queue_receive))
continue;
// receive net reply
iov.iov_base = &dnbd3_reply;
iov.iov_len = sizeof(dnbd3_reply);
- if (kernel_recvmsg(dev->cur_server.sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags) <= 0)
+ if (kernel_recvmsg(dev->sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags) != sizeof(dnbd3_reply))
goto error;
+ fixup_reply(dnbd3_reply);
- // search for replied request in queue
- received_request = *(struct request **) dnbd3_reply.handle;
- spin_lock_irq(&dev->blk_lock);
- list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist)
+ // check error
+ if (dnbd3_reply.magic != dnbd3_packet_magic)
{
- if (blk_request == received_request)
- break;
+ printk("ERROR: Wrong packet magic (Receive)\n");
+ goto error;
}
- spin_unlock_irq(&dev->blk_lock);
-
- // check error
if (dnbd3_reply.cmd == 0)
{
printk("ERROR: Command was 0 (Receive)\n");
goto error;
}
- if (dnbd3_reply.size == 0)
- {
- printk("FATAL: Requested image does't exist cmd: %i\n", dnbd3_reply.cmd);
- spin_lock_irq(&dev->blk_lock);
- list_del_init(&blk_request->queuelist);
- spin_unlock_irq(&dev->blk_lock);
- if ( (dnbd3_reply.cmd == CMD_GET_SIZE) || (dnbd3_reply.cmd == CMD_GET_SERVERS) )
- kfree(blk_request);
- continue;
- }
// what to do?
switch (dnbd3_reply.cmd)
{
case CMD_GET_BLOCK:
+ // search for replied request in queue
+ blk_request = NULL;
+ spin_lock_irq(&dev->blk_lock);
+ list_for_each_entry_safe(received_request, tmp_request, &dev->request_queue_receive, queuelist)
+ {
+ if ((uint64_t)received_request == dnbd3_reply.handle)
+ {
+ blk_request = received_request;
+ break;
+ }
+ }
+ spin_unlock_irq(&dev->blk_lock);
+ if (blk_request == NULL)
+ {
+ printk("ERROR: Received block data for unrequested handle (%llu: %llu).\n",
+ (unsigned long long)dnbd3_reply.handle, (unsigned long long)dnbd3_reply.size);
+ goto error;
+ }
// receive data and answer to block layer
rq_for_each_segment(bvec, blk_request, iter)
{
@@ -533,10 +785,9 @@ int dnbd3_net_receive(void *data)
sigprocmask(SIG_SETMASK, &blocked, &oldset);
kaddr = kmap(bvec->bv_page) + bvec->bv_offset;
- size = bvec->bv_len;
iov.iov_base = kaddr;
- iov.iov_len = size;
- if (kernel_recvmsg(dev->cur_server.sock, &msg, &iov, 1, size, msg.msg_flags) <= 0)
+ iov.iov_len = bvec->bv_len;
+ if (kernel_recvmsg(dev->sock, &msg, &iov, 1, bvec->bv_len, msg.msg_flags) <= 0)
{
kunmap(bvec->bv_page);
goto error;
@@ -551,47 +802,38 @@ int dnbd3_net_receive(void *data)
spin_unlock_irqrestore(&dev->blk_lock, flags);
continue;
- case CMD_GET_SIZE:
- dev->vid = dnbd3_reply.vid;
- dev->rid = dnbd3_reply.rid;
- iov.iov_base = &filesize;
- iov.iov_len = sizeof(uint64_t);
- if (kernel_recvmsg(dev->cur_server.sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags) <= 0)
- goto error;
- set_capacity(dev->disk, filesize >> 9); /* 512 Byte blocks */
- printk("INFO: Filesize %s: %llu\n", dev->disk->disk_name, filesize);
- spin_lock_irq(&dev->blk_lock);
- list_del_init(&blk_request->queuelist);
- spin_unlock_irq(&dev->blk_lock);
- kfree(blk_request);
- continue;
-
case CMD_GET_SERVERS:
- dev->alt_servers_num = dnbd3_reply.size / sizeof(struct in_addr);
- size = sizeof(struct in_addr);
- for (i = 0; i < dev->alt_servers_num && i < NUMBER_SERVERS; i++)
+ spin_lock_irq(&dev->blk_lock);
+ dev->new_servers_num = 0;
+ spin_unlock_irq(&dev->blk_lock);
+ count = MIN(NUMBER_SERVERS, dnbd3_reply.size / sizeof(dnbd3_server_entry_t));
+
+ if (count != 0)
{
- iov.iov_base = &tmp_addr;
- iov.iov_len = size;
- if (kernel_recvmsg(dev->cur_server.sock, &msg, &iov, 1, size, msg.msg_flags) <= 0)
- goto error;
- inet_ntoa(tmp_addr, dev->alt_servers[i].host);
+ iov.iov_base = dev->new_servers;
+ iov.iov_len = count * sizeof(dnbd3_server_entry_t);
+ if (kernel_recvmsg(dev->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags) != iov.iov_len)
+ goto error;
+ spin_lock_irq(&dev->blk_lock);
+ dev->new_servers_num = count;
+ spin_unlock_irq(&dev->blk_lock);
+ // TODO: Re-Add update check
+ }
+ // If there were more servers than accepted, remove the remaining data from the socket buffer
+ remaining = dnbd3_reply.size - count * sizeof(dnbd3_server_entry_t);
+ while (remaining > 0)
+ {
+ count = MIN(sizeof(dnbd3_reply), remaining);
+ iov.iov_base = &dnbd3_reply;
+ iov.iov_len = count;
+ if (kernel_recvmsg(dev->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags) <= 0)
+ goto error;
+ remaining -= count;
}
- spin_lock_irq(&dev->blk_lock);
- list_del_init(&blk_request->queuelist);
- spin_unlock_irq(&dev->blk_lock);
- kfree(blk_request);
-
- if (dev->rid < dnbd3_reply.rid)
- dev->update_available = 1;
-
continue;
default:
printk("ERROR: Unknown command (Receive)\n");
- spin_lock_irq(&dev->blk_lock);
- list_del_init(&blk_request->queuelist);
- spin_unlock_irq(&dev->blk_lock);
continue;
}
@@ -599,23 +841,29 @@ int dnbd3_net_receive(void *data)
return 0;
- error:
- printk("ERROR: Connection to server %s lost (receive)\n", dev->cur_server.host);
- // move already send requests to request_queue_send again
- if (!list_empty(&dev->request_queue_receive))
- {
- printk("WARN: Request queue was not empty on %s\n", dev->disk->disk_name);
- spin_lock_irq(&dev->blk_lock);
- list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist)
- {
- list_del_init(&blk_request->queuelist);
- list_add(&blk_request->queuelist, &dev->request_queue_send);
- }
- spin_unlock_irq(&dev->blk_lock);
- }
- if (dev->cur_server.sock)
- kernel_sock_shutdown(dev->cur_server.sock, SHUT_RDWR);
- dev->thread_receive = NULL;
- dev->panic = 1;
- return -1;
+error:
+ printk("ERROR: Connection to server %pI4 : %d lost (receive)\n", dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port));
+ // move already send requests to request_queue_send again
+ if (!list_empty(&dev->request_queue_receive))
+ {
+ printk("WARN: Request queue was not empty on %s\n", dev->disk->disk_name);
+ spin_lock_irq(&dev->blk_lock);
+ list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist)
+ {
+ list_del_init(&blk_request->queuelist);
+ list_add(&blk_request->queuelist, &dev->request_queue_send);
+ }
+ spin_unlock_irq(&dev->blk_lock);
+ }
+ if (dev->sock)
+ kernel_sock_shutdown(dev->sock, SHUT_RDWR);
+ dev->thread_receive = NULL;
+ if (!dev->disconnecting)
+ {
+ dev->panic = 1;
+ // start discover
+ dev->discover = 1;
+ wake_up(&dev->process_queue_discover);
+ }
+ return -1;
}
diff --git a/src/kernel/net.h b/src/kernel/net.h
index 64816bc..18aa227 100644
--- a/src/kernel/net.h
+++ b/src/kernel/net.h
@@ -23,12 +23,13 @@
#include "dnbd3.h"
-#define init_msghdr(h)\
- h.msg_name = NULL;\
- h.msg_namelen = 0;\
- h.msg_control = NULL;\
- h.msg_controllen = 0;\
- h.msg_flags = MSG_WAITALL | MSG_NOSIGNAL;
+#define init_msghdr(h) do { \
+ h.msg_name = NULL; \
+ h.msg_namelen = 0; \
+ h.msg_control = NULL; \
+ h.msg_controllen = 0; \
+ h.msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \
+ } while (0)
int dnbd3_net_connect(dnbd3_device_t *lo);
diff --git a/src/kernel/serialize_kmod.c b/src/kernel/serialize_kmod.c
new file mode 100644
index 0000000..a6a9b03
--- /dev/null
+++ b/src/kernel/serialize_kmod.c
@@ -0,0 +1,4 @@
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include "serialize.c"
diff --git a/src/kernel/sysfs.c b/src/kernel/sysfs.c
index a455bd2..df94d20 100644
--- a/src/kernel/sysfs.c
+++ b/src/kernel/sysfs.c
@@ -25,12 +25,12 @@
ssize_t show_cur_server_ip(char *buf, dnbd3_device_t *dev)
{
- return sprintf(buf, "%s\n", dev->cur_server.host);
+ return sprintf(buf, "%pI4\n", dev->cur_server.hostaddr);
}
ssize_t show_cur_server_rtt(char *buf, dnbd3_device_t *dev)
{
- return sprintf(buf, "%llu\n", dev->cur_server.rtt);
+ return sprintf(buf, "%llu\n", (unsigned long long)dev->cur_rtt);
}
ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev)
@@ -38,9 +38,10 @@ ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev)
return sprintf(buf, "%d\n", dev->alt_servers_num);
}
-ssize_t show_vid(char *buf, dnbd3_device_t *dev)
+ssize_t show_image_name(char *buf, dnbd3_device_t *dev)
{
- return sprintf(buf, "%d\n", dev->vid);
+ if (dev->imgname == NULL) return sprintf(buf, "(null)");
+ return sprintf(buf, "%s\n", dev->imgname);
}
ssize_t show_rid(char *buf, dnbd3_device_t *dev)
@@ -55,12 +56,12 @@ ssize_t show_update_available(char *buf, dnbd3_device_t *dev)
ssize_t show_alt_server_ip(char *buf, dnbd3_server_t *srv)
{
- return sprintf(buf, "%s\n", srv->host);
+ return sprintf(buf, "%pI4\n", srv->hostaddr);
}
ssize_t show_alt_server_rtt(char *buf, dnbd3_server_t *srv)
{
- return sprintf(buf, "%llu\n", srv->rtt);
+ return sprintf(buf, "%llu\n", (uint64_t)((srv->rtts[0]+srv->rtts[1]+srv->rtts[2]+srv->rtts[3]) / 4));
}
device_attr_t cur_server_ip =
@@ -84,10 +85,10 @@ device_attr_t alt_server_num =
.store = NULL,
};
-device_attr_t vid =
+device_attr_t image_name =
{
- .attr = {.name = "vid", .mode = 0444 },
- .show = show_vid,
+ .attr = {.name = "image_name", .mode = 0444 },
+ .show = show_image_name,
.store = NULL,
};
@@ -138,7 +139,7 @@ struct attribute *device_attrs[] =
&cur_server_ip.attr,
&cur_server_rtt.attr,
&alt_server_num.attr,
- &vid.attr,
+ &image_name.attr,
&rid.attr,
&update_available.attr,
NULL,
@@ -183,7 +184,7 @@ struct kobj_type server_ktype =
void dnbd3_sysfs_init(dnbd3_device_t *dev)
{
int i;
- char name[] = "alt_server99";
+ char name[15] = "alt_server99";
struct kobject *kobj = &dev->kobj;
struct kobj_type *ktype = &device_ktype;
struct kobject *parent = &disk_to_dev(dev->disk)->kobj;
diff --git a/src/serialize.c b/src/serialize.c
new file mode 100644
index 0000000..7961cfe
--- /dev/null
+++ b/src/serialize.c
@@ -0,0 +1,74 @@
+#include "serialize.h"
+#include "types.h"
+
+#ifndef MIN
+#define MIN(a,b) (a < b ? a : b)
+#endif
+
+void serializer_reset_read(serialized_buffer_t *buffer, size_t data_len)
+{
+ buffer->buffer_end = buffer->buffer + MIN(MAX_PAYLOAD, data_len);
+ buffer->buffer_pointer = buffer->buffer;
+}
+
+void serializer_reset_write(serialized_buffer_t *buffer)
+{
+ buffer->buffer_end = buffer->buffer + MAX_PAYLOAD;
+ buffer->buffer_pointer = buffer->buffer;
+}
+
+uint16_t serializer_get_uint16(serialized_buffer_t *buffer)
+{
+ uint16_t ret;
+ if (buffer->buffer_pointer + 2 > buffer->buffer_end) return 0;
+ memcpy(&ret, buffer->buffer_pointer, 2);
+ *buffer->buffer_pointer += 2;
+ return net_order_16(ret);
+}
+
+uint64_t serializer_get_uint64(serialized_buffer_t *buffer)
+{
+ uint64_t ret;
+ if (buffer->buffer_pointer + 8 > buffer->buffer_end) return 0;
+ memcpy(&ret, buffer->buffer_pointer, 8);
+ *buffer->buffer_pointer += 8;
+ return net_order_64(ret);
+}
+
+char* serializer_get_string(serialized_buffer_t *buffer)
+{
+ char *ptr = buffer->buffer_pointer, *start = buffer->buffer_pointer;
+ while (ptr < buffer->buffer_end && *ptr) ++ptr;
+ if (*ptr) return NULL; // String did not terminate within buffer (possibly corrupted/malicious packet)
+ buffer->buffer_pointer = ptr + 1;
+ return start;
+}
+
+void serializer_put_uint16(serialized_buffer_t *buffer, uint16_t value)
+{
+ if (buffer->buffer_pointer + 2 > buffer->buffer_end) return;
+ value = net_order_16(value);
+ memcpy(buffer->buffer_pointer, &value, 2);
+ buffer->buffer_pointer += 2;
+}
+
+void serializer_put_uint64(serialized_buffer_t *buffer, uint64_t value)
+{
+ if (buffer->buffer_pointer + 8 > buffer->buffer_end) return;
+ value = net_order_64(value);
+ memcpy(buffer->buffer_pointer, &value, 8);
+ buffer->buffer_pointer += 8;
+}
+
+void serializer_put_string(serialized_buffer_t *buffer, char *value)
+{
+ size_t len = strlen(value) + 1;
+ if (buffer->buffer_pointer + len > buffer->buffer_end) return;
+ memcpy(buffer->buffer_pointer, value, len);
+ buffer->buffer_pointer += len;
+}
+
+ssize_t serializer_get_written_length(serialized_buffer_t *buffer)
+{
+ return buffer->buffer_pointer - buffer->buffer;
+}
diff --git a/src/serialize.h b/src/serialize.h
new file mode 100644
index 0000000..eeb3b26
--- /dev/null
+++ b/src/serialize.h
@@ -0,0 +1,31 @@
+#ifndef SERIALIZER_H_
+#define SERIALIZER_H_
+
+#include "config.h"
+
+typedef struct
+{
+ char buffer[MAX_PAYLOAD]; // This MUST be the first member or send_reply() will blow up
+ char *buffer_end;
+ char *buffer_pointer;
+} serialized_buffer_t;
+
+void serializer_reset_read(serialized_buffer_t *buffer, size_t data_len);
+
+void serializer_reset_write(serialized_buffer_t *buffer);
+
+uint16_t serializer_get_uint16(serialized_buffer_t *buffer);
+
+uint64_t serializer_get_uint64(serialized_buffer_t *buffer);
+
+char* serializer_get_string(serialized_buffer_t *buffer);
+
+void serializer_put_uint16(serialized_buffer_t *buffer, uint16_t value);
+
+void serializer_put_uint64(serialized_buffer_t *buffer, uint64_t value);
+
+void serializer_put_string(serialized_buffer_t *buffer, char *value);
+
+ssize_t serializer_get_written_length(serialized_buffer_t *buffer);
+
+#endif
diff --git a/src/server/ipc.c b/src/server/ipc.c
index 917d6e5..5da811c 100644
--- a/src/server/ipc.c
+++ b/src/server/ipc.c
@@ -34,16 +34,17 @@
#include <libxml/xpath.h>
#include "ipc.h"
-#include "config.h"
+#include "../config.h"
#include "server.h"
#include "utils.h"
+#include "memlog.h"
void* dnbd3_ipc_receive()
{
GSList *iterator = NULL;
struct tm * timeinfo;
- char time_buff[64];
+ char time_buff[64], rid[20], ipaddr[100];
dnbd3_ipc_t header;
int server_sock, client_sock;
@@ -118,7 +119,7 @@ void* dnbd3_ipc_receive()
grp = getgrnam(UNIX_SOCKET_GROUP);
if (grp == NULL)
{
- printf("WARN: Group '%s' not found.\n", UNIX_SOCKET_GROUP);
+ memlogf("WARN: Group '%s' not found.\n", UNIX_SOCKET_GROUP);
}
else
{
@@ -129,9 +130,12 @@ void* dnbd3_ipc_receive()
while (1)
{
- int i = 0, size = 0;
+ int size;
char* buf;
xmlDocPtr doc;
+ xmlNodePtr root_node, images_node, clients_node, tmp_node, log_parent_node, log_node;
+ xmlChar *xmlbuff;
+ int buffersize;
// Accept connection
if ((client_sock = accept(server_sock, &client, &len)) < 0)
@@ -151,7 +155,7 @@ void* dnbd3_ipc_receive()
switch (header.cmd)
{
case IPC_EXIT:
- printf("INFO: Server shutdown...\n");
+ memlogf("INFO: Server shutdown...\n");
header.size = ntohl(0);
header.error = ntohl(0);
send(client_sock, (char *) &header, sizeof(header), MSG_WAITALL);
@@ -161,21 +165,13 @@ void* dnbd3_ipc_receive()
break;
case IPC_RELOAD:
- printf("INFO: Reloading configuration...\n");
- dnbd3_reload_config(_config_file_name);
- header.size = ntohl(0);
- header.error = ntohl(0);
+ header.size = ntohl(0);
+ header.error = ntohl(ERROR_UNKNOWN);
send(client_sock, (char *) &header, sizeof(header), MSG_WAITALL);
close(client_sock);
break;
case IPC_INFO:
- pthread_spin_lock(&_spinlock);
-
- xmlNodePtr root_node, images_node, clients_node, tmp_node;
- xmlChar *xmlbuff;
- int buffersize;
-
doc = xmlNewDoc(BAD_CAST "1.0");
root_node = xmlNewNode(NULL, BAD_CAST "info");
xmlDocSetRootElement(doc, root_node);
@@ -183,38 +179,46 @@ void* dnbd3_ipc_receive()
// Images
images_node = xmlNewNode(NULL, BAD_CAST "images");
xmlAddChild(root_node, images_node);
- for (i = 0; i < _num_images; i++)
- {
- char vid[20], rid[20];
- sprintf(vid,"%d",_images[i].vid);
- sprintf(rid,"%d",_images[i].rid);
- timeinfo = localtime(&_images[i].atime);
+ pthread_spin_lock(&_spinlock);
+ for (iterator = _dnbd3_images; iterator; iterator = iterator->next)
+ {
+ const dnbd3_image_t *image = iterator->data;
+ sprintf(rid,"%d",image->rid);
+ timeinfo = localtime(&image->atime);
strftime(time_buff,64,"%d.%m.%y %H:%M:%S",timeinfo);
tmp_node = xmlNewNode(NULL, BAD_CAST "image");
- xmlNewProp(tmp_node, BAD_CAST "group", BAD_CAST _images[i].group);
+ xmlNewProp(tmp_node, BAD_CAST "name", BAD_CAST image->name);
xmlNewProp(tmp_node, BAD_CAST "atime", BAD_CAST time_buff);
- xmlNewProp(tmp_node, BAD_CAST "vid", BAD_CAST vid);
xmlNewProp(tmp_node, BAD_CAST "rid", BAD_CAST rid);
- xmlNewProp(tmp_node, BAD_CAST "file", BAD_CAST _images[i].file);
- xmlNewProp(tmp_node, BAD_CAST "servers", BAD_CAST _images[i].serverss);
- xmlNewProp(tmp_node, BAD_CAST "cache", BAD_CAST _images[i].cache_file);
+ xmlNewProp(tmp_node, BAD_CAST "file", BAD_CAST image->file);
+ xmlNewProp(tmp_node, BAD_CAST "servers", BAD_CAST "???");
+ xmlNewProp(tmp_node, BAD_CAST "cache", BAD_CAST image->cache_file);
xmlAddChild(images_node, tmp_node);
}
-
// Clients
clients_node = xmlNewNode(NULL, BAD_CAST "clients");
- xmlAddChild(root_node, clients_node);
+ log_node = xmlAddChild(root_node, clients_node);
for (iterator = _dnbd3_clients; iterator; iterator = iterator->next)
{
dnbd3_client_t *client = iterator->data;
if (client->image)
{
tmp_node = xmlNewNode(NULL, BAD_CAST "client");
- xmlNewProp(tmp_node, BAD_CAST "ip", BAD_CAST client->ip);
+ *ipaddr = '\0';
+ inet_ntop(client->addrtype, client->ipaddr, ipaddr, 100);
+ xmlNewProp(tmp_node, BAD_CAST "ip", BAD_CAST ipaddr);
xmlNewProp(tmp_node, BAD_CAST "file", BAD_CAST client->image->file);
xmlAddChild(clients_node, tmp_node);
}
}
+ pthread_spin_unlock(&_spinlock);
+
+ // Log
+ log_parent_node = xmlNewChild(root_node, NULL, BAD_CAST "log", NULL);
+ char *log = fetchlog(0);
+ if (log == NULL) log = "LOG IS NULL";
+ log_node = xmlNewCDataBlock(doc, BAD_CAST log, strlen(log));
+ xmlAddChild(log_parent_node, log_node);
// Dump and send
xmlDocDumpFormatMemory(doc, &xmlbuff, &buffersize, 1);
@@ -224,10 +228,10 @@ void* dnbd3_ipc_receive()
send(client_sock, (char *) xmlbuff, buffersize, MSG_WAITALL);
// Cleanup
- pthread_spin_unlock(&_spinlock);
close(client_sock);
xmlFree(xmlbuff);
xmlFreeDoc(doc);
+ free(log);
break;
case IPC_ADDIMG:
@@ -257,11 +261,10 @@ void* dnbd3_ipc_receive()
if(cur->type == XML_ELEMENT_NODE)
{
dnbd3_image_t image;
- image.group = (char *) xmlGetNoNsProp(cur, BAD_CAST "group");
- image.vid = atoi((char *) xmlGetNoNsProp(cur, BAD_CAST "vid"));
+ memset(&image, 0, sizeof(dnbd3_image_t));
+ image.name = (char *) xmlGetNoNsProp(cur, BAD_CAST "name");
image.rid = atoi((char *) xmlGetNoNsProp(cur, BAD_CAST "rid"));
image.file = (char *) xmlGetNoNsProp(cur, BAD_CAST "file");
- image.serverss = (char *) xmlGetNoNsProp(cur, BAD_CAST "servers");
image.cache_file = (char *) xmlGetNoNsProp(cur, BAD_CAST "cache");
header.error = htonl(dnbd3_add_image(&image, _config_file_name));
}
@@ -308,11 +311,10 @@ void* dnbd3_ipc_receive()
if(cur->type == XML_ELEMENT_NODE)
{
dnbd3_image_t image;
- image.group = (char *) xmlGetNoNsProp(cur, BAD_CAST "group");
- image.vid = atoi((char *) xmlGetNoNsProp(cur, BAD_CAST "vid"));
+ memset(&image, 0, sizeof(dnbd3_image_t));
+ image.name = (char *) xmlGetNoNsProp(cur, BAD_CAST "name");
image.rid = atoi((char *) xmlGetNoNsProp(cur, BAD_CAST "rid"));
image.file = (char *) xmlGetNoNsProp(cur, BAD_CAST "file");
- image.serverss = (char *) xmlGetNoNsProp(cur, BAD_CAST "servers");
image.cache_file = (char *) xmlGetNoNsProp(cur, BAD_CAST "cache");
header.error = htonl(dnbd3_del_image(&image, _config_file_name));
}
@@ -333,7 +335,7 @@ void* dnbd3_ipc_receive()
break;
default:
- printf("ERROR: Unknown command: %i\n", header.cmd);
+ memlogf("ERROR: Unknown command: %i\n", header.cmd);
header.size = htonl(0);
header.error = htonl(ERROR_UNKNOWN);
send(client_sock, (char *) &header, sizeof(header), MSG_WAITALL);
@@ -343,6 +345,7 @@ void* dnbd3_ipc_receive()
}
}
close(server_sock);
+ pthread_exit((void *) 0);
}
void dnbd3_ipc_send(int cmd)
@@ -414,11 +417,21 @@ void dnbd3_ipc_send(int cmd)
xmlNodeSetPtr nodes;
xmlNodePtr cur;
+ // Print log
+ xpathExpr = BAD_CAST "/info/log";
+ xpathCtx = xmlXPathNewContext(doc);
+ xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx);
+ if (xpathObj->nodesetval && xpathObj->nodesetval->nodeTab && xpathObj->nodesetval->nodeTab[0]) {
+ printf("--- Last log lines ----\n%s\n\n", xmlNodeGetContent(xpathObj->nodesetval->nodeTab[0]));
+ }
+ xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+
// Print images
xpathExpr = BAD_CAST "/info/images/image";
xpathCtx = xmlXPathNewContext(doc);
xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx);
- printf("Exported images (atime, vid, rid, file):\n");
+ printf("Exported images (atime, name, rid, file):\n");
printf("========================================\n");
nodes = xpathObj->nodesetval;
n = (nodes) ? nodes->nodeNr : 0;
@@ -428,7 +441,7 @@ void dnbd3_ipc_send(int cmd)
{
cur = nodes->nodeTab[i];
xmlChar *atime = xmlGetNoNsProp(cur, BAD_CAST "atime");
- xmlChar *vid = xmlGetNoNsProp(cur, BAD_CAST "vid");
+ xmlChar *vid = xmlGetNoNsProp(cur, BAD_CAST "name");
xmlChar *rid = xmlGetNoNsProp(cur, BAD_CAST "rid");
xmlChar *file = xmlGetNoNsProp(cur, BAD_CAST "file");
printf("%s\t%s\t%s\t%s\n", atime, vid, rid, file);
diff --git a/src/server/net.c b/src/server/net.c
index cd93c0d..9ae168a 100644
--- a/src/server/net.c
+++ b/src/server/net.c
@@ -34,6 +34,83 @@
#include "server.h"
#include "utils.h"
+#include "memlog.h"
+#include "../serialize.h"
+#include "../config.h"
+
+
+static char recv_request_header(int sock, dnbd3_request_t *request)
+{
+ // Read request heade from socket
+ if (recv(sock, request, sizeof(dnbd3_request_t), MSG_WAITALL) != sizeof(dnbd3_request_t))
+ {
+ printf("[DEBUG] Error receiving request: Could not read message header\n");
+ return 0;
+ }
+ // Make sure all bytes are in the right order (endianness)
+ fixup_request(*request);
+ if (request->magic != dnbd3_packet_magic)
+ {
+ printf("[DEBUG] Magic in client request incorrect\n");
+ return 0;
+ }
+ // Payload sanity check
+ if (request->size > MAX_PAYLOAD)
+ {
+ memlogf("[WARNING] Client tries to send a packet of type %d with %d bytes payload. Dropping client.", (int)request->cmd, (int)request->size);
+ return 0;
+ }
+ return 1;
+}
+
+static char recv_request_payload(int sock, uint32_t size, serialized_buffer_t *payload)
+{
+ if (size == 0)
+ {
+ memlogf("[BUG] Called recv_request_payload() to receive 0 bytes");
+ return 0;
+ }
+ if (size > MAX_PAYLOAD)
+ {
+ memlogf("[BUG] Called recv_request_payload() for more bytes than the passed buffer could hold!");
+ return 0;
+ }
+ if (recv(sock, payload->buffer, size, MSG_WAITALL) != size)
+ {
+ printf("[ERROR] Could not receive request payload of length %d\n", (int)size);
+ return 0;
+ }
+ // Prepare payload buffer for reading
+ serializer_reset_read(payload, size);
+ return 1;
+}
+
+static char send_reply(int sock, dnbd3_reply_t *reply, void *payload)
+{
+ fixup_reply(*reply);
+ if (!payload || reply->size == 0)
+ {
+ if (send(sock, reply, sizeof(dnbd3_reply_t), MSG_WAITALL) != sizeof(dnbd3_reply_t))
+ {
+ printf("[DEBUG] Send failed (header-only)\n");
+ return 0;
+ }
+ }
+ else
+ {
+ struct iovec iov[2];
+ iov[0].iov_base = reply;
+ iov[0].iov_len = sizeof(dnbd3_reply_t);
+ iov[1].iov_base = payload;
+ iov[1].iov_len = reply->size;
+ if (writev(sock, iov, 2) != sizeof(dnbd3_reply_t) + reply->size)
+ {
+ printf("[DEBUG] Send failed (reply with payload of %d bytes)\n", (int)reply->size);
+ return 0;
+ }
+ }
+ return 1;
+}
void *dnbd3_handle_query(void *dnbd3_client)
{
@@ -41,87 +118,146 @@ void *dnbd3_handle_query(void *dnbd3_client)
dnbd3_request_t request;
dnbd3_reply_t reply;
- int cork = 1;
- int uncork = 0;
+ const int cork = 1;
+ const int uncork = 0;
dnbd3_image_t *image = NULL;
- int image_file, image_cache = -1;
+ int image_file = -1, image_cache = -1;
- struct in_addr alt_server;
- int i = 0;
+ int i, num;
uint64_t map_y;
char map_x, bit_mask;
+ serialized_buffer_t payload;
+ char *image_name;
+ uint16_t rid, client_version;
uint64_t todo_size = 0;
uint64_t todo_offset = 0;
uint64_t cur_offset = 0;
uint64_t last_offset = 0;
+ dnbd3_server_entry_t server_list[NUMBER_SERVERS];
+
int dirty = 0;
- while (recv(client->sock, &request, sizeof(dnbd3_request_t), MSG_WAITALL) > 0)
+ reply.magic = dnbd3_packet_magic;
+
+ // Receive first packet. This must be CMD_GET_SIZE by protocol specification
+ if (recv_request_header(client->sock, &request))
+ {
+ if (request.cmd != CMD_GET_SIZE)
+ {
+ printf("[DEBUG] Client sent invalid handshake (%d). Dropping Client\n", (int)request.cmd);
+ }
+ else
+ {
+ if (recv_request_payload(client->sock, request.size, &payload))
+ {
+ client_version = serializer_get_uint16(&payload);
+ image_name = serializer_get_string(&payload);
+ rid = serializer_get_uint16(&payload);
+ if (request.size < 3 || !image_name || client_version < MIN_SUPPORTED_CLIENT)
+ {
+ if (client_version < MIN_SUPPORTED_CLIENT)
+ {
+ printf("[DEBUG] Client too old\n");
+ }
+ else
+ {
+ printf("[DEBUG] Incomplete handshake received\n");
+ }
+ }
+ else
+ {
+ pthread_spin_lock(&_spinlock);
+ image = dnbd3_get_image(image_name, rid, 0);
+ if (!image)
+ {
+ printf("[DEBUG] Client requested non-existent image '%s'\n", image_name);
+ }
+ else
+ {
+ serializer_put_uint16(&payload, PROTOCOL_VERSION);
+ serializer_put_string(&payload, image->low_name);
+ serializer_put_uint16(&payload, image->rid);
+ serializer_put_uint64(&payload, image->filesize);
+ reply.cmd = CMD_GET_SIZE;
+ reply.size = serializer_get_written_length(&payload);
+ if (!send_reply(client->sock, &reply, &payload))
+ {
+ image = NULL;
+ }
+ else
+ {
+ image_file = open(image->file, O_RDONLY);
+ if (image_file == -1)
+ {
+ image = NULL;
+ }
+ else
+ {
+ client->image = image;
+ image->atime = time(NULL); // TODO: check if mutex is needed
+
+ if (image->cache_map && image->cache_file)
+ image_cache = open(image->cache_file, O_RDWR);
+ }
+ }
+ }
+ pthread_spin_unlock(&_spinlock);
+ }
+ }
+ }
+ }
+
+ if (image) while (recv_request_header(client->sock, &request))
{
- reply.cmd = request.cmd;
- reply.size = 0;
- memcpy(reply.handle, request.handle, sizeof(request.handle));
- pthread_spin_lock(&client->spinlock);
switch (request.cmd)
{
- case CMD_GET_SERVERS:
- image = dnbd3_get_image(request.vid, request.rid);
- if(!image)
- goto error;
-
- int num = (image->num_servers < NUMBER_SERVERS) ? image->num_servers : NUMBER_SERVERS;
- reply.vid = image->vid;
- reply.rid = dnbd3_get_image(request.vid, 0)->rid;
- reply.size = num * sizeof(struct in_addr);
- send(client->sock, (char *) &reply, sizeof(dnbd3_reply_t), 0);
-
- for (i = 0; i < num; i++)
- {
- inet_aton(image->servers[i], &alt_server);
- send(client->sock, (char *) &alt_server, sizeof(struct in_addr), 0);
- }
- client->image = image;
- image->atime = time(NULL); // TODO: check if mutex is needed
- break;
-
- case CMD_GET_SIZE:
- image = dnbd3_get_image(request.vid, request.rid);
- if(!image)
- goto error;
-
- reply.vid = image->vid;
- reply.rid = image->rid;
- reply.size = sizeof(uint64_t);
- send(client->sock, (char *) &reply, sizeof(dnbd3_reply_t), 0);
-
- send(client->sock, &image->filesize, sizeof(uint64_t), 0);
- image_file = open(image->file, O_RDONLY);
- client->image = image;
- image->atime = time(NULL); // TODO: check if mutex is needed
-
- if (image->cache_file)
- image_cache = open(image->cache_file, O_RDWR);
-
- break;
case CMD_GET_BLOCK:
- if (image_file < 0)
- goto error;
+ if (request.offset >= image->filesize)
+ { // Sanity check
+ memlogf("[WARNING] Client requested non-existent block");
+ reply.size = 0;
+ reply.cmd = CMD_ERROR;
+ send_reply(client->sock, &reply, NULL);
+ break;
+ }
+ if (request.offset + request.size > image->filesize)
+ { // Sanity check
+ memlogf("[WARNING] Client requested data block that extends beyond image size");
+ reply.size = 0;
+ reply.cmd = CMD_ERROR;
+ send_reply(client->sock, &reply, NULL);
+ break;
+ }
+ if (request.size > image->filesize)
+ { // Sanity check
+ memlogf("[WARNING] Client requested data block that is bigger than the image size");
+ reply.size = 0;
+ reply.cmd = CMD_ERROR;
+ send_reply(client->sock, &reply, NULL);
+ break;
+ }
+ // TODO: Try MSG_MORE instead of cork+uncork if performance ever becomes an issue..
setsockopt(client->sock, SOL_TCP, TCP_CORK, &cork, sizeof(cork));
+ reply.cmd = CMD_GET_BLOCK;
reply.size = request.size;
- send(client->sock, (char *) &reply, sizeof(dnbd3_reply_t), 0);
+ reply.handle = request.handle;
+ send_reply(client->sock, &reply, NULL);
// caching is off
- if (!image->cache_file)
+ if (image_cache == -1)
{
- if (sendfile(client->sock, image_file, (off_t *) &request.offset, request.size) < 0)
- printf("ERROR: Sendfile failed (sock)\n");
+ if (sendfile(client->sock, image_file, (off_t *)&request.offset, request.size) != request.size)
+ {
+ printf("[ERROR] sendfile failed (image to net)\n");
+ close(client->sock);
+ }
setsockopt(client->sock, SOL_TCP, TCP_CORK, &uncork, sizeof(uncork));
break;
@@ -134,10 +270,11 @@ void *dnbd3_handle_query(void *dnbd3_client)
cur_offset = request.offset;
last_offset = request.offset + request.size;
+ // first make sure the whole requested part is in the local cache file
while(cur_offset < last_offset)
{
map_y = cur_offset >> 15;
- map_x = (cur_offset >> 12) & 7; // mod 8
+ map_x = (cur_offset >> 12) & 7; // mod 256
bit_mask = 0b00000001 << (map_x);
cur_offset += 4096;
@@ -147,8 +284,15 @@ void *dnbd3_handle_query(void *dnbd3_client)
if (todo_size != 0) // fetch missing chunks
{
lseek(image_cache, todo_offset, SEEK_SET);
- if (sendfile(image_cache, image_file, (off_t *) &todo_offset, todo_size) < 0)
- printf("ERROR: Sendfile failed (cache)\n");
+ if (sendfile(image_cache, image_file, (off_t *) &todo_offset, todo_size) != todo_size)
+ {
+ printf("[ERROR] sendfile failed (copy to cache 1)\n");
+ close(client->sock);
+ // Reset these so we don't update the cache map with false information
+ dirty = 0;
+ todo_size = 0;
+ break;
+ }
todo_size = 0;
dirty = 1;
}
@@ -164,13 +308,16 @@ void *dnbd3_handle_query(void *dnbd3_client)
if (todo_size != 0)
{
lseek(image_cache, todo_offset, SEEK_SET);
- if (sendfile(image_cache, image_file, (off_t *) &todo_offset, todo_size) < 0)
- printf("ERROR: Sendfile failed (cache)\n");
-
+ if (sendfile(image_cache, image_file, (off_t *) &todo_offset, todo_size) != todo_size)
+ {
+ printf("[ERROR] sendfile failed (copy to cache 2)\n");
+ close(client->sock);
+ break;
+ }
dirty = 1;
}
- if (dirty)
+ if (dirty) // cache map needs to be updated as something was missing locally
{
// set 1 in cache map for whole request
cur_offset = request.offset;
@@ -185,35 +332,42 @@ void *dnbd3_handle_query(void *dnbd3_client)
}
// send data to client
- if (sendfile(client->sock, image_cache, (off_t *) &request.offset, request.size) < 0)
- printf("ERROR: Sendfile failed (net)\n");
+ if (sendfile(client->sock, image_cache, (off_t *) &request.offset, request.size) != request.size)
+ {
+ memlogf("[ERROR] sendfile failed (cache to net)\n");
+ close(client->sock);
+ }
setsockopt(client->sock, SOL_TCP, TCP_CORK, &uncork, sizeof(uncork));
break;
+
+ case CMD_GET_SERVERS:
+ // Build list of known working alt servers
+ num = 0;
+ for (i = 0; i < NUMBER_SERVERS; i++)
+ {
+ if (image->servers[i].addrtype == 0 || image->servers[i].failures > 200) continue;
+ memcpy(server_list + num++, image->servers + i, sizeof(dnbd3_server_entry_t));
+ }
+ reply.cmd = CMD_GET_SERVERS;
+ reply.size = num * sizeof(dnbd3_server_entry_t);
+ send_reply(client->sock, &reply, server_list);
+ break;
+
default:
- printf("ERROR: Unknown command\n");
+ memlogf("ERROR: Unknown command\n");
break;
}
- pthread_spin_unlock(&client->spinlock);
- continue;
-
- error:
- printf("ERROR: Client requested an unknown image id.\n");
- send(client->sock, (char *) &reply, sizeof(dnbd3_reply_t), 0);
- pthread_spin_unlock(&client->spinlock);
- continue;
-
}
close(client->sock);
- close(image_file);
- close(image_cache);
+ if (image_file != -1) close(image_file);
+ if (image_cache != -1) close(image_cache);
pthread_spin_lock(&_spinlock);
_dnbd3_clients = g_slist_remove(_dnbd3_clients, client);
pthread_spin_unlock(&_spinlock);
- printf("INFO: Client %s exit\n", client->ip);
free(client);
pthread_exit((void *) 0);
}
@@ -227,7 +381,7 @@ int dnbd3_setup_socket()
sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (sock < 0)
{
- printf("ERROR: Socket failure\n");
+ memlogf("ERROR: Socket setup failure\n");
return -1;
}
@@ -239,14 +393,14 @@ int dnbd3_setup_socket()
// Bind to socket
if (bind(sock, (struct sockaddr*) &server, sizeof(server)) < 0)
{
- printf("ERROR: Bind failure\n");
+ memlogf("ERROR: Bind failure\n");
return -1;
}
// Listen on socket
if (listen(sock, 100) == -1)
{
- printf("ERROR: Listen failure\n");
+ memlogf("ERROR: Listen failure\n");
return -1;
}
diff --git a/src/server/serialize.c b/src/server/serialize.c
new file mode 100644
index 0000000..4934132
--- /dev/null
+++ b/src/server/serialize.c
@@ -0,0 +1,5 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "../serialize.c"
diff --git a/src/server/server.c b/src/server/server.c
index 371d27a..0a206d1 100644
--- a/src/server/server.c
+++ b/src/server/server.c
@@ -34,14 +34,14 @@
#include "utils.h"
#include "net.h"
#include "ipc.h"
+#include "memlog.h"
int _sock;
pthread_spinlock_t _spinlock;
GSList *_dnbd3_clients = NULL;
char *_config_file_name = DEFAULT_SERVER_CONFIG_FILE;
-dnbd3_image_t *_images;
-size_t _num_images = 0;
+GSList *_dnbd3_images; // of dnbd3_image_t
void dnbd3_print_help(char* argv_0)
{
@@ -65,8 +65,10 @@ void dnbd3_print_version()
void dnbd3_cleanup()
{
- int i, fd;
- printf("INFO: Cleanup...\n");
+ int fd;
+ memlogf("INFO: Cleanup...\n");
+
+ close(_sock);
pthread_spin_lock(&_spinlock);
GSList *iterator = NULL;
@@ -74,39 +76,38 @@ void dnbd3_cleanup()
{
dnbd3_client_t *client = iterator->data;
shutdown(client->sock, SHUT_RDWR);
- pthread_join(*client->thread, NULL);
+ pthread_join(client->thread, NULL);
+ g_free(client);
}
g_slist_free(_dnbd3_clients);
- for (i = 0; i < _num_images; i++)
+ for (iterator = _dnbd3_images; iterator; iterator = iterator->next)
{
// save cache maps to files
- if (_images[i].cache_file)
+ dnbd3_image_t *image = iterator->data;
+ if (image->cache_file)
{
- char tmp[strlen(_images[i].cache_file)+4];
- strcpy(tmp, _images[i].cache_file);
+ char tmp[strlen(image->cache_file)+4];
+ strcpy(tmp, image->cache_file);
strcat(tmp, ".map");
fd = open(tmp, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
if (fd > 0)
- write(fd, _images[i].cache_map, (_images[i].filesize >> 15) * sizeof(char));
+ write(fd, image->cache_map, ((image->filesize + (1 << 15) - 1) >> 15) * sizeof(char));
close(fd);
}
- free(_images[i].group);
- free(_images[i].file);
- free(_images[i].servers);
- free(_images[i].serverss);
- free(_images[i].cache_file);
- free(_images[i].cache_map);
+ free(image->name);
+ g_free(image->file);
+ g_free(image->cache_file);
+ free(image->cache_map);
+ g_free(image);
}
+ g_slist_free(_dnbd3_images);
pthread_spin_unlock(&_spinlock);
-
- close(_sock);
- free(_images);
#ifndef IPC_TCP
unlink(UNIX_SOCKET);
#endif
@@ -161,6 +162,7 @@ int main(int argc, char* argv[])
break;
case '?':
dnbd3_print_help(argv[0]);
+ break;
}
opt = getopt_long(argc, argv, optString, longOpts, &longIndex);
}
@@ -168,6 +170,10 @@ int main(int argc, char* argv[])
if (demonize)
daemon(1, 0);
+ pthread_spin_init(&_spinlock, PTHREAD_PROCESS_PRIVATE);
+
+ initmemlog();
+
// load config file
dnbd3_load_config(_config_file_name);
@@ -191,9 +197,7 @@ int main(int argc, char* argv[])
pthread_t thread_ipc;
pthread_create(&(thread_ipc), NULL, dnbd3_ipc_receive, NULL);
- pthread_spin_init(&_spinlock, PTHREAD_PROCESS_PRIVATE);
-
- printf("INFO: Server is ready...\n");
+ memlogf("[INFO] Server is ready...");
// main loop
while (1)
@@ -201,28 +205,45 @@ int main(int argc, char* argv[])
fd = accept(_sock, (struct sockaddr*) &client, &len);
if (fd < 0)
{
- printf("ERROR: Accept failure\n");
+ memlogf("[ERROR] Accept failure");
continue;
}
- printf("INFO: Client %s connected\n", inet_ntoa(client.sin_addr));
+ //memlogf("INFO: Client %s connected\n", inet_ntoa(client.sin_addr));
setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout, sizeof(timeout));
setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (char *) &timeout, sizeof(timeout));
- pthread_t thread;
- dnbd3_client_t *dnbd3_client = (dnbd3_client_t *) malloc(sizeof(dnbd3_client_t));
- pthread_spin_init(&dnbd3_client->spinlock, PTHREAD_PROCESS_PRIVATE);
- strcpy(dnbd3_client->ip, inet_ntoa(client.sin_addr));
+ dnbd3_client_t *dnbd3_client = g_new0(dnbd3_client_t, 1);
+ if (dnbd3_client == NULL)
+ {
+ memlogf("[ERROR] Could not alloc dnbd3_client_t for new client.");
+ close(fd);
+ continue;
+ }
+ // TODO: Extend this if you ever want to add IPv6 (something like:)
+ // dnbd3_client->addrtype = AF_INET6;
+ // memcpy(dnbd3_client->ipaddr, &(client.sin6_addr), 16);
+ dnbd3_client->addrtype = AF_INET;
+ memcpy(dnbd3_client->ipaddr, &(client.sin_addr), 4);
dnbd3_client->sock = fd;
- dnbd3_client->thread = &thread;
dnbd3_client->image = NULL;
+ // This has to be done before creating the thread, otherwise a race condition might occur when the new thread dies faster than this thread adds the client to the list after creating the thread
pthread_spin_lock(&_spinlock);
_dnbd3_clients = g_slist_append(_dnbd3_clients, dnbd3_client);
pthread_spin_unlock(&_spinlock);
- pthread_create(&(thread), NULL, dnbd3_handle_query, (void *) (uintptr_t) dnbd3_client);
- pthread_detach(thread);
+ if (0 != pthread_create(&(dnbd3_client->thread), NULL, dnbd3_handle_query, (void *) (uintptr_t) dnbd3_client))
+ {
+ memlogf("[ERROR] Could not start thread for new client.");
+ pthread_spin_lock(&_spinlock);
+ _dnbd3_clients = g_slist_remove(_dnbd3_clients, dnbd3_client);
+ pthread_spin_unlock(&_spinlock);
+ g_free(dnbd3_client);
+ close(fd);
+ continue;
+ }
+ pthread_detach(dnbd3_client->thread);
}
dnbd3_cleanup();
diff --git a/src/server/server.h b/src/server/server.h
index f499acc..69d597b 100644
--- a/src/server/server.h
+++ b/src/server/server.h
@@ -30,33 +30,31 @@
typedef struct
{
- char *group;
- char *file;
- uint64_t filesize;
- size_t num_servers;
- char **servers;
- char *serverss;
- int vid;
- int rid;
- time_t atime;
- char *cache_map;
- char *cache_file;
+ char *name; // full name of image, eg. "uni-freiburg.ubuntu-12.04"
+ char *low_name; // full name of image, lowercased for comparison
+ int rid; // revision of provided image
+ char *file; // path to image file or device
+ uint64_t filesize; // size of image
+ dnbd3_server_entry_t servers[NUMBER_SERVERS]; // known alt servers that also offer that image
+ time_t atime; // last access time
+ uint8_t *cache_map; // cache map telling which parts are locally cached
+ char *cache_file; // path to local cache of image (in case the image is read from a dnbd3 device)
+ char working; // whether this image is considered working. local images are "working" if the local file exists, proxied images have to have at least one working upstream server or a complete local cache file
} dnbd3_image_t;
typedef struct
{
int sock;
- char ip[16];
- pthread_t *thread;
+ uint8_t ipaddr[16];
+ uint8_t addrtype; // ip version (AF_INET or AF_INET6)
+ pthread_t thread;
dnbd3_image_t *image;
- pthread_spinlock_t spinlock;
} dnbd3_client_t;
-extern GSList *_dnbd3_clients;
+extern GSList *_dnbd3_clients; // of dnbd3_client_t
extern pthread_spinlock_t _spinlock;
extern char *_config_file_name;
-extern dnbd3_image_t *_images;
-extern size_t _num_images;
+extern GSList *_dnbd3_images; // of dnbd3_image_t
void dnbd3_cleanup();
diff --git a/src/server/utils.c b/src/server/utils.c
index 0be1569..15e51f8 100644
--- a/src/server/utils.c
+++ b/src/server/utils.c
@@ -23,14 +23,124 @@
#include <sys/stat.h>
#include <pthread.h>
#include <string.h>
+#include <glib.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
#include "server.h"
#include "utils.h"
+#include "memlog.h"
+
+/**
+ * Parse IPv4 or IPv6 address in string representation to a suitable format usable by the BSD socket library
+ * @string eg. "1.2.3.4" or "2a01::10:5", optially with port appended, eg "1.2.3.4:6666" or "2a01::10:5:6666"
+ * @af will contain either AF_INET or AF_INET6
+ * @addr will contain the address in network representation
+ * @port will contain the port in network representation, defaulting to #define PORT if none was given
+ * returns 1 on success, 0 in failure. contents of af, addr and port are undefined in the latter case
+ */
+static char parse_address(char *string, uint8_t *af, uint8_t *addr, uint16_t *port)
+{
+ struct in_addr v4;
+ struct in6_addr v6;
+
+ // Try IPv4 without port
+ if (1 == inet_pton(AF_INET, string, &v4))
+ {
+ *af = AF_INET;
+ memcpy(addr, &v4, 4);
+ *port = htons(PORT);
+ return 1;
+ }
+ // Try IPv6 without port
+ if (1 == inet_pton(AF_INET6, string, &v6))
+ {
+ *af = AF_INET6;
+ memcpy(addr, &v6, 16);
+ *port = htons(PORT);
+ return 1;
+ }
+
+ // Scan for port
+ char *portpos = NULL, *ptr = string;
+ while (*ptr)
+ {
+ if (*ptr == ':') portpos = ptr;
+ ++ptr;
+ }
+ if (portpos == NULL) return 0; // No port in string
+ // Consider IP being surrounded by [ ]
+ if (*string == '[' && *(portpos-1) == ']')
+ {
+ ++string;
+ *(portpos-1) = '\0';
+ }
+ *portpos++ = '\0';
+ int p = atoi(portpos);
+ if (p < 1 || p > 65535) return 0; // Invalid port
+ *port = htons((uint16_t)p);
+
+ // Try IPv4 with port
+ if (1 == inet_pton(AF_INET, string, &v4))
+ {
+ *af = AF_INET;
+ memcpy(addr, &v4, 4);
+ return 1;
+ }
+ // Try IPv6 with port
+ if (1 == inet_pton(AF_INET6, string, &v6))
+ {
+ *af = AF_INET6;
+ memcpy(addr, &v6, 16);
+ return 1;
+ }
+
+ // FAIL
+ return 0;
+}
+
+static char is_valid_namespace(char *namespace)
+{
+ if (*namespace == '\0' || *namespace == '/') return 0; // Invalid: Length = 0 or starting with a slash
+ while (*namespace)
+ {
+ if (*namespace != '/' && *namespace != '-'
+ && (*namespace < 'a' || *namespace > 'z')
+ && (*namespace < 'A' || *namespace > 'Z')) return 0;
+ ++namespace;
+ }
+ if (*(namespace - 1) == '/') return 0; // Invalid: Ends in a slash
+ return 1;
+}
+
+static char is_valid_imagename(char *namespace)
+{
+ if (*namespace == '\0' || *namespace == ' ') return 0; // Invalid: Length = 0 or starting with a space
+ while (*namespace)
+ { // Check for invalid chars
+ if (*namespace != '.' && *namespace != '-' && *namespace != ' '
+ && *namespace != '(' && *namespace != ')'
+ && (*namespace < 'a' || *namespace > 'z')
+ && (*namespace < 'A' || *namespace > 'Z')) return 0;
+ ++namespace;
+ }
+ if (*(namespace - 1) == ' ') return 0; // Invalid: Ends in a space
+ return 1;
+}
+
+static void strtolower(char *string)
+{
+ while (*string)
+ {
+ if (*string >= 'A' && *string <= 'Z') *string += 32;
+ ++string;
+ }
+}
void dnbd3_load_config(char *file)
{
int fd;
- gint i;
+ gint i, j, k;
GKeyFile* gkf;
gkf = g_key_file_new();
@@ -40,111 +150,209 @@ void dnbd3_load_config(char *file)
exit(EXIT_FAILURE);
}
+ char *namespace = g_key_file_get_string(gkf, "settings", "default_namespace", NULL);
+ if (namespace && !is_valid_namespace(namespace))
+ {
+ memlogf("[ERROR] Ignoring default namespace: '%s' is not a valid namespace", namespace);
+ g_free(namespace);
+ namespace = NULL;
+ }
+
gchar **groups = NULL;
- groups = g_key_file_get_groups(gkf, &_num_images);
- _images = calloc(_num_images, sizeof(dnbd3_image_t));
+ gsize section_count;
+ groups = g_key_file_get_groups(gkf, &section_count);
- for (i = 0; i < _num_images; i++)
+ for (i = 0; i < section_count; i++)
{
- _images[i].group = malloc(strlen(groups[i]));
- strcpy(_images[i].group, groups[i]);
- _images[i].file = g_key_file_get_string(gkf, groups[i], "file", NULL);
- _images[i].servers = g_key_file_get_string_list(gkf, groups[i], "servers", &_images[i].num_servers, NULL);
- _images[i].serverss = g_key_file_get_string(gkf, groups[i], "servers", NULL);
- _images[i].vid = g_key_file_get_integer(gkf, groups[i], "vid", NULL);
- _images[i].rid = g_key_file_get_integer(gkf, groups[i], "rid", NULL);
- _images[i].cache_file = g_key_file_get_string(gkf, groups[i], "cache", NULL);
- _images[i].atime = 0;
-
- if (_images[i].num_servers > NUMBER_SERVERS)
- printf("WARN: Max allowed servers %i\n", NUMBER_SERVERS);
-
- fd = open(_images[i].file, O_RDONLY);
- if (fd > 0)
- _images[i].filesize = lseek(fd, 0, SEEK_END);
+ // Special group
+ if (strcmp(groups[i], "settings") == 0 || strcmp(groups[i], "trusted") == 0)
+ {
+ continue;
+ }
+
+ // An actual image definition
+
+ if (!is_valid_imagename(groups[i]))
+ {
+ memlogf("[ERROR] Invalid image name: '%s'", groups[i]);
+ continue;
+ }
+
+ int rid = g_key_file_get_integer(gkf, groups[i], "rid", NULL);
+ if (rid <= 0)
+ {
+ memlogf("[ERROR] Invalid rid '%d' for image '%s'", rid, groups[i]);
+ continue;
+ }
+
+ if (strchr(groups[i], '.') == NULL && namespace == NULL)
+ {
+ memlogf("[ERROR] Image '%s' has local name and no default namespace is defined; entry ignored.", groups[i]);
+ continue;
+ }
+
+ dnbd3_image_t *image = g_new0(dnbd3_image_t, 1);
+ if (image == NULL)
+ {
+ memlogf("[ERROR] Could not allocate dnbd3_image_t while reading config");
+ continue;
+ }
+
+ if (strchr(groups[i], '/') == NULL)
+ { // Local image, build global name
+ image->name = calloc(strlen(namespace) + strlen(groups[i]) + 2, sizeof(char));
+ sprintf(image->name, "%s/%s", namespace, groups[i]);
+ }
else
- printf("ERROR: Image file not found: %s\n", _images[i].file);
-
- close(fd);
-
- if (_images[i].cache_file)
{
- // read cache map from file
- _images[i].cache_map = calloc(_images[i].filesize >> 15, sizeof(char));
- memset(_images[i].cache_map, 0, (_images[i].filesize >> 15) * sizeof(char));
- char tmp[strlen(_images[i].cache_file)+4];
- strcpy(tmp, _images[i].cache_file);
- strcat(tmp, ".map");
- fd = open(tmp, O_RDONLY);
- if (fd > 0)
- read(fd, _images[i].cache_map, (_images[i].filesize >> 15) * sizeof(char));
- close(fd);
-
- // open cache file
- fd = open(_images[i].cache_file, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
- if (fd < 1)
- printf("ERROR: Could't create cache file\n");
-
- if (_images[i].filesize != lseek(fd, 0, SEEK_END))
- fallocate(fd, 0, 0, _images[i].filesize);
-
- close(fd);
+ image->name = strdup(groups[i]);
}
- }
- g_strfreev(groups);
- g_key_file_free(gkf);
-}
-
-void dnbd3_reload_config(char* config_file_name)
-{
- int i, fd;
- pthread_spin_lock(&_spinlock);
- GSList *iterator = NULL;
- for (iterator = _dnbd3_clients; iterator; iterator = iterator->next)
- {
- dnbd3_client_t *client = iterator->data;
- pthread_spin_lock(&client->spinlock);
- client->image = NULL;
- }
-
- for (i = 0; i < _num_images; i++)
- {
- // save cache maps
- if (_images[i].cache_file)
+ if (dnbd3_get_image(image->name, rid, 0))
+ {
+ memlogf("[ERROR] Duplicate image in config: '%s' rid:%d", image->name, rid);
+ free(image->name);
+ g_free(image);
+ continue;
+ }
+
+ image->low_name = strdup(image->name);
+ strtolower(image->low_name);
+
+ image->rid = rid;
+ image->file = g_key_file_get_string(gkf, groups[i], "file", NULL);
+ char relayed = image->file == NULL || *image->file == '\0';
+ if (relayed && image->file)
+ {
+ g_free(image->file);
+ image->file = NULL;
+ }
+
+ if (relayed) // Image is relayed (this server acts as proxy)
+ {
+ if (strchr(groups[i], '.') == NULL)
+ {
+ memlogf("[ERROR] Relayed image without global name in config: '%s'", groups[i]);
+ g_free(image);
+ continue;
+ }
+ image->cache_file = g_key_file_get_string(gkf, groups[i], "cache", NULL);
+ if (image->cache_file && *image->cache_file == '\0') g_free(image->cache_file);
+ }
+ else // Image is a local one, open file to get size
{
- char tmp[strlen(_images[i].cache_file)+4];
- strcpy(tmp, _images[i].cache_file);
- strcat(tmp, ".map");
- fd = open(tmp, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
-
- if (fd > 0)
- write(fd, _images[i].cache_map, (_images[i].filesize >> 15) * sizeof(char));
+ fd = open(image->file, O_RDONLY);
+ if (fd > 0) {
+ image->filesize = lseek(fd, 0, SEEK_END);
+ if (image->filesize & 4095) {
+ memlogf("[WARNING] Size of image '%s' is not a multiple of 4096. Last incomplete block will be ignored!", image->file);
+ image->filesize &= ~(uint64_t)4095;
+ }
+ close(fd);
+ image->working = 1;
+ } else {
+ memlogf("[ERROR] Image file not found: '%s'", image->file);
+ }
+ }
- close(fd);
+ // A list of servers that are known to also host or relay this image
+ gsize num_servers;
+ gchar **servers = g_key_file_get_string_list(gkf, groups[i], "servers", &num_servers, NULL);
+ if (servers) for (k = 0, j = 0; j < MIN(num_servers, NUMBER_SERVERS); ++j)
+ {
+ if (parse_address(servers[j], &(image->servers[k].addrtype), image->servers[k].ipaddr, &(image->servers[k].port)))
+ {
+ ++k; continue;
+ }
+ image->servers[k].addrtype = 0;
}
+ g_strfreev(servers);
- free(_images[i].group);
- free(_images[i].file);
- free(_images[i].servers);
- free(_images[i].serverss);
- free(_images[i].cache_file);
- free(_images[i].cache_map);
+ if (image->cache_file)
+ {
+ // Determine size of cached image
+ fd = open(image->cache_file, O_RDONLY);
+ if (fd > 0)
+ {
+ image->filesize = lseek(fd, 0, SEEK_END);
+ close(fd);
+ }
+ if (image->filesize & 4095)
+ { // Cache files should always be trincated to 4kib boundaries already
+ memlogf("[WARNING] Size of cache file '%s' is not a multiple of 4096. Something's fishy!", image->cache_file);
+ image->filesize = 0;
+ }
+ else if (image->filesize > 0)
+ {
+ const size_t map_len_bytes = (image->filesize + (1 << 15) - 1) >> 15;
+ image->cache_map = calloc(map_len_bytes, sizeof(uint8_t));
+ // read cache map from file
+ // one byte in the map covers 8 4kib blocks, so 32kib per byte
+ // "+ (1 << 15) - 1" is required to account for the last bit of
+ // the image that is smaller than 32kib
+ // this would be the case whenever the image file size is not a
+ // multiple of 32kib (= the number of blocks is not dividable by 8)
+ // ie: if the image is 49152 bytes and you do 49152 >> 15 you get 1,
+ // but you actually need 2 bytes to have a complete cache map
+ char tmp[strlen(image->cache_file)+4];
+ strcpy(tmp, image->cache_file);
+ strcat(tmp, ".map");
+ fd = open(tmp, O_RDONLY);
+ if (fd > 0)
+ {
+ read(fd, image->cache_map, map_len_bytes * sizeof(uint8_t));
+ close(fd);
+ // If the whole image is cached, mark it as working right away without waiting for an upstream server
+ image->working = 1;
+ for (j = 0; j < map_len_bytes - 1; ++j)
+ {
+ if (image->cache_map[j] != 0xFF)
+ {
+ image->working = 0;
+ break;
+ }
+ }
+ const int blocks_in_last_byte = (image->filesize >> 12) & 7;
+ uint8_t last_byte = 0;
+ if (blocks_in_last_byte == 0)
+ last_byte = 0xFF;
+ else
+ for (j = 0; j < k; ++j) last_byte = (last_byte << 1) | 1;
+ if ((image->cache_map[map_len_bytes-1] & last_byte) != last_byte)
+ image->working = 0;
+ else
+ memlogf("[INFO] Publishing relayed image '%s' because the local cache copy is complete", image->name);
+ }
+
+ /*
+ // TODO: Do this as soon as a connection to a upstream server is established
+ // open cache file
+ fd = open(_images[i].cache_file, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
+ if (fd < 1)
+ memlogf("ERROR: Could't create cache file '%s'", _images[i].cache_file);
+
+ if (_images[i].filesize != lseek(fd, 0, SEEK_END))
+ fallocate(fd, 0, 0, _images[i].filesize);
+
+ close(fd);
+ */
+ }
+ } // end cache_file handling
+ pthread_spin_lock(&_spinlock);
+ _dnbd3_images = g_slist_append(_dnbd3_images, image);
+ pthread_spin_unlock(&_spinlock);
+ // DONE IMAGE
}
- _num_images = 0;
- free(_images);
- dnbd3_load_config(config_file_name);
- for (iterator = _dnbd3_clients; iterator; iterator = iterator->next)
- {
- dnbd3_client_t *client = iterator->data;
- pthread_spin_unlock(&client->spinlock);
- }
- pthread_spin_unlock(&_spinlock);
+ g_free(namespace);
+ g_strfreev(groups);
+ g_key_file_free(gkf);
}
int dnbd3_add_image(dnbd3_image_t *image, char *file)
{
+ return ERROR_IMAGE_ALREADY_EXISTS; // TODO: Make it work with image names
+ /*
FILE* f = fopen(image->file,"r");
if (f == NULL)
{
@@ -198,10 +406,13 @@ int dnbd3_add_image(dnbd3_image_t *image, char *file)
printf("ERROR: Config file is not writable: %s\n", file);
return ERROR_CONFIG_FILE_PERMISSIONS;
}
+ */
}
int dnbd3_del_image(dnbd3_image_t *image, char *file)
{
+ return ERROR_IMAGE_NOT_FOUND; // TODO: Make it work with image names
+ /*
if (image->rid == 0)
{
printf("ERROR: Delete with rid=0 is not allowed\n");
@@ -254,38 +465,47 @@ int dnbd3_del_image(dnbd3_image_t *image, char *file)
printf("ERROR: Config file is not writable: %s\n", file);
return ERROR_CONFIG_FILE_PERMISSIONS;
}
+ */
}
-dnbd3_image_t* dnbd3_get_image(int vid, int rid)
+dnbd3_image_t* dnbd3_get_image(char *name_orig, int rid, const char do_lock)
{
- int i, max = 0;
- dnbd3_image_t *result = NULL;
- for (i = 0; i < _num_images; ++i)
+ dnbd3_image_t *result = NULL, *image;
+ GSList *iterator;
+ char name[strlen(name_orig) + 1];
+ strcpy(name, name_orig);
+ strtolower(name);
+ if (do_lock) pthread_spin_lock(&_spinlock);
+ for (iterator = _dnbd3_images; iterator; iterator = iterator->next)
{
+ image = iterator->data;
if (rid != 0) // rid was specified
{
- if (_images[i].vid == vid && _images[i].rid == rid)
- result = &_images[i];
+ if (image->rid == rid && strcmp(name, image->low_name) == 0)
+ {
+ result = image;
+ break;
+ }
}
else // search max. rid available
{
- if (_images[i].vid == vid && _images[i].rid > max)
+ if (strcmp(name, image->low_name) == 0 && (result == NULL || result->rid < image->rid))
{
- result = &_images[i];
- max = _images[i].rid;
+ result = image;
}
}
}
+ if (do_lock) pthread_spin_unlock(&_spinlock);
return result;
}
void dnbd3_handle_sigpipe(int signum)
{
- printf("ERROR: SIGPIPE received!\n");
+ memlogf("ERROR: SIGPIPE received!\n");
}
void dnbd3_handle_sigterm(int signum)
{
- printf("INFO: SIGTERM or SIGINT received!\n");
+ memlogf("INFO: SIGTERM or SIGINT received!\n");
dnbd3_cleanup();
}
diff --git a/src/server/utils.h b/src/server/utils.h
index d9d3ebc..ec83e64 100644
--- a/src/server/utils.h
+++ b/src/server/utils.h
@@ -35,11 +35,10 @@
#define ERROR_UNKNOWN 10
void dnbd3_load_config(char *file);
-void dnbd3_reload_config(char* config_file_name);
int dnbd3_add_image(dnbd3_image_t *image, char *file);
int dnbd3_del_image(dnbd3_image_t *image, char *file);
-dnbd3_image_t* dnbd3_get_image(int vid, int rid);
+dnbd3_image_t* dnbd3_get_image(char *name, int rid, const char do_lock);
void dnbd3_handle_sigpipe(int signum);
void dnbd3_handle_sigterm(int signum);
diff --git a/src/types.h b/src/types.h
index 04dcf85..5332dc4 100644
--- a/src/types.h
+++ b/src/types.h
@@ -29,10 +29,41 @@
#define IOCTL_CLOSE _IO(0xab, 2)
#define IOCTL_SWITCH _IO(0xab, 3)
+#if defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && __BYTE_ORDER == __BIG_ENDIAN) || (defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+const uint16_t dnbd3_packet_magic = (0x73 << 8) | (0x72);
+// Flip bytes around on big endian when putting stuff on the net
+#define net_order_64(a) ((uint64_t)((((a) & 0xFFull) << 56) | (((a) & 0xFF00ull) << 40) | (((a) & 0xFF0000ull) << 24) | (((a) & 0xFF000000ull) << 8) | (((a) & 0xFF00000000ull) >> 8) | (((a) & 0xFF0000000000ull) >> 24) | (((a) & 0xFF000000000000ull) >> 40) | (((a) & 0xFF00000000000000ull) >> 56)))
+#define net_order_32(a) ((uint32_t)((((a) & (uint32_t)0xFF) << 24) | (((a) & (uint32_t)0xFF00) << 8) | (((a) & (uint32_t)0xFF0000) >> 8) | (((a) & (uint32_t)0xFF000000) >> 24)))
+#define net_order_16(a) ((uint16_t)((((a) & (uint16_t)0xFF) << 8) | (((a) & (uint16_t)0xFF00) >> 8)))
+#define fixup_request(a) do { \
+ (a).cmd = net_order_16((a).cmd); \
+ (a).size = net_order_32((a).size); \
+ (a).offset = net_order_64((a).offset); \
+} while (0)
+#define fixup_reply(a) do { \
+ (a).cmd = net_order_16((a).cmd); \
+ (a).size = net_order_32((a).size); \
+} while (0)
+#elif defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN) || (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+static const uint16_t dnbd3_packet_magic = (0x73) | (0x72 << 8);
+// Make little endian our network byte order as probably 99.999% of machines this will be used on are LE
+#define net_order_64(a) (a)
+#define net_order_32(a) (a)
+#define net_order_16(a) (a)
+#define fixup_request(a) while(0)
+#define fixup_reply(a) while(0)
+#else
+#error "Unknown Endianness"
+#endif
+
typedef struct
{
- char *host;
- int vid;
+ uint16_t len;
+ uint8_t addrtype;
+ uint8_t addr[16]; // network representation
+ uint16_t port; // network representation
+ uint16_t imgnamelen;
+ char *imgname;
int rid;
int read_ahead_kb;
} dnbd3_ioctl_t;
@@ -41,28 +72,37 @@ typedef struct
#define CMD_GET_BLOCK 1
#define CMD_GET_SIZE 2
#define CMD_GET_SERVERS 3
+#define CMD_ERROR 4
#pragma pack(1)
typedef struct
{
+ uint16_t magic; // 2byte
uint16_t cmd; // 2byte
- uint16_t vid; // 2byte
- uint16_t rid; // 2byte
- uint64_t offset; // 8byte
- uint64_t size; // 8byte
- char handle[8]; // 8byte
+ uint32_t size; // 4byte
+ uint64_t offset; // 8byte
+ uint64_t handle; // 8byte
} dnbd3_request_t;
#pragma pack(0)
#pragma pack(1)
typedef struct
{
- uint16_t cmd; // 2byte
- uint16_t vid; // 2byte
- uint16_t rid; // 2byte
- uint64_t size; // 8byte
- char handle[8]; // 8byte
+ uint16_t magic; // 2byte
+ uint16_t cmd; // 2byte
+ uint32_t size; // 4byte
+ uint64_t handle; // 8byte
} dnbd3_reply_t;
#pragma pack(0)
+#pragma pack(1)
+typedef struct
+{
+ uint8_t ipaddr[16]; // 16byte (network representation, so it can be directly passed to socket functions)
+ uint16_t port; // 2byte (network representation, so it can be directly passed to socket functions)
+ uint8_t addrtype; // 1byte (ip version. AF_INET or AF_INET6. 0 means this struct is empty and should be ignored)
+ uint8_t failures; // 1byte (number of times server has been consecutively unreachable)
+} dnbd3_server_entry_t;
+#pragma pack(0)
+
#endif /* TYPES_H_ */