summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsr2012-08-27 21:02:49 +0200
committersr2012-08-27 21:02:49 +0200
commitde01183aa40dbbd274e18f681d8a255a886f493e (patch)
treef614e764704aec26df15df68c633178064c60a41
parent[KERNEL] Make rtt threshold relative (diff)
downloaddnbd3-de01183aa40dbbd274e18f681d8a255a886f493e.tar.gz
dnbd3-de01183aa40dbbd274e18f681d8a255a886f493e.tar.xz
dnbd3-de01183aa40dbbd274e18f681d8a255a886f493e.zip
[KERNEL] Refactor and extend sysfs (add data the server will need in proxy mode)
[SERVER] Use MSG_MORE instead of cork/uncork to save two syscalls [KERNEL] Fail-Counter for alt servers, ignore servers that fail too often [KERNEL] Add new alt servers to list, instead of replacing the old list [*] Add CMD_LATEST_RID to tell client about new revisions
-rw-r--r--CMakeLists.txt4
-rw-r--r--src/client/client.c8
-rw-r--r--src/kernel/blk.c13
-rw-r--r--src/kernel/dnbd3.h4
-rw-r--r--src/kernel/net.c181
-rw-r--r--src/kernel/sysfs.c137
-rw-r--r--src/server/ipc.c8
-rw-r--r--src/server/net.c38
-rw-r--r--src/server/server.c19
-rw-r--r--src/server/server.h5
-rw-r--r--src/server/utils.c4
-rw-r--r--src/types.h9
12 files changed, 252 insertions, 178 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1c44769..a959f66 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,9 +6,9 @@ PROJECT(dnbd3)
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
SET(CMAKE_BUILD_TYPE Debug)
-SET(CMAKE_C_FLAGS_DEBUG "-O0 -g -Wall -Wno-unused-result -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64")
+SET(CMAKE_C_FLAGS_DEBUG "-O0 -g -Wall -Wno-unused-result -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_DEBUG")
SET(CMAKE_C_FLAGS_RELEASE "-O2 -Wno-unused-result -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64")
-SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -Wall -Wno-unused-result -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64")
+SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -Wall -Wno-unused-result -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_DEBUG")
SET(CMAKE_CXX_FLAGS_RELEASE "-O2 -Wno-unused-result -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64" )
ADD_DEFINITIONS(-DIPC_TCP)
diff --git a/src/client/client.c b/src/client/client.c
index 57762cc..93aaeb7 100644
--- a/src/client/client.c
+++ b/src/client/client.c
@@ -170,7 +170,7 @@ int main(int argc, char *argv[])
const int ret = ioctl(fd, IOCTL_CLOSE, &msg);
if (ret < 0)
{
- printf("ERROR: ioctl not successful (close, %s (%d))\n", strerror(ret), ret);
+ printf("ERROR: ioctl not successful (close, %s (%d))\n", strerror(-ret), ret);
exit(EXIT_FAILURE);
}
@@ -187,7 +187,7 @@ int main(int argc, char *argv[])
const int ret = ioctl(fd, IOCTL_SWITCH, &msg);
if (ret < 0)
{
- printf("ERROR: ioctl not successful (switch, %s (%d))\n", strerror(ret), ret);
+ printf("ERROR: ioctl not successful (switch, %s (%d))\n", strerror(-ret), ret);
exit(EXIT_FAILURE);
}
@@ -205,7 +205,7 @@ int main(int argc, char *argv[])
const int ret = ioctl(fd, IOCTL_OPEN, &msg);
if (ret < 0)
{
- printf("ERROR: ioctl not successful (connect, %s (%d))\n", strerror(ret), ret);
+ printf("ERROR: ioctl not successful (connect, %s (%d))\n", strerror(-ret), ret);
exit(EXIT_FAILURE);
}
@@ -242,7 +242,7 @@ int main(int argc, char *argv[])
const int ret = ioctl(fd, IOCTL_OPEN, &msg);
if (ret < 0)
{
- printf("ERROR: ioctl not successful (config file, %s (%d))\n", strerror(ret), ret);
+ printf("ERROR: ioctl not successful (config file, %s (%d))\n", strerror(-ret), ret);
exit(EXIT_FAILURE);
}
diff --git a/src/kernel/blk.c b/src/kernel/blk.c
index 70e8419..21a14f0 100644
--- a/src/kernel/blk.c
+++ b/src/kernel/blk.c
@@ -35,14 +35,14 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor)
INIT_LIST_HEAD(&dev->request_queue_send);
INIT_LIST_HEAD(&dev->request_queue_receive);
- memset(&dev->cur_server, 0, sizeof(dnbd3_server_t));
+ memset(&dev->cur_server, 0, sizeof(dev->cur_server));
+ memset(&dev->initial_server, 0, sizeof(dev->initial_server));
dev->better_sock = NULL;
dev->imgname = NULL;
dev->rid = 0;
dev->update_available = 0;
- dev->alt_servers_num = 0;
- memset(dev->alt_servers, 0, sizeof(dnbd3_server_t)*NUMBER_SERVERS);
+ memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS);
dev->thread_send = NULL;
dev->thread_receive = NULL;
dev->thread_discover = NULL;
@@ -103,16 +103,16 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u
dnbd3_device_t *dev = bdev->bd_disk->private_data;
struct request_queue *blk_queue = dev->disk->queue;
char *imgname = NULL;
- dnbd3_ioctl_t *msg = kmalloc(sizeof(dnbd3_ioctl_t), GFP_KERNEL);
+ dnbd3_ioctl_t *msg = kmalloc(sizeof(*msg), GFP_KERNEL);
if (msg == NULL) return -ENOMEM;
copy_from_user((char *)msg, (char *)arg, 2);
- if (msg->len != sizeof(dnbd3_ioctl_t))
+ if (msg->len != sizeof(*msg))
{
result = -ENOEXEC;
goto cleanup_return;
}
- copy_from_user((char *)msg, (char *)arg, sizeof(dnbd3_ioctl_t));
+ copy_from_user((char *)msg, (char *)arg, sizeof(*msg));
if (msg->imgname != NULL && msg->imgnamelen > 0)
{
imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL);
@@ -142,6 +142,7 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u
memcpy(dev->cur_server.hostaddr, msg->addr, 16);
dev->cur_server.port = msg->port;
dev->cur_server.hostaddrtype = msg->addrtype;
+ dev->cur_server.failures = 0;
memcpy(&dev->initial_server, &dev->cur_server, sizeof(dev->initial_server));
dev->imgname = imgname;
dev->rid = msg->rid;
diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h
index 51253a1..7c550e3 100644
--- a/src/kernel/dnbd3.h
+++ b/src/kernel/dnbd3.h
@@ -40,8 +40,7 @@ typedef struct
uint16_t protocol_version; // dnbd3 protocol version of this server
uint8_t hostaddr[16]; // Address in network representation (IPv4 or IPv6)
uint8_t hostaddrtype; // Address type (AF_INET or AF_INET6)
- uint8_t skip_count; // Do not check this server the next skip_count times
- struct kobject kobj; // SysFS
+ uint8_t failures; // How many times the server was unreachable
} dnbd3_server_t;
typedef struct
@@ -60,7 +59,6 @@ typedef struct
char *imgname;
serialized_buffer_t payload_buffer;
int rid, update_available;
- int alt_servers_num; // number of currently known alt servers
dnbd3_server_t alt_servers[NUMBER_SERVERS]; // array of alt servers
int new_servers_num; // number of new alt servers that are waiting to be copied to above array
dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers
diff --git a/src/kernel/net.c b/src/kernel/net.c
index 6a3b02e..91ea7cd 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -36,6 +36,39 @@ static inline int is_same_server(const dnbd3_server_t * const a, const dnbd3_ser
&& (0 == memcmp(a->hostaddr, b->hostaddr, (a->hostaddrtype == AF_INET ? 4 : 16)));
}
+static inline dnbd3_server_t* get_existing_server(const dnbd3_server_entry_t * const newserver, dnbd3_device_t * const dev)
+{
+ int i;
+ for (i = 0; i < NUMBER_SERVERS; ++i)
+ {
+ if ((newserver->hostaddrtype == dev->alt_servers[i].hostaddrtype)
+ && (newserver->port == dev->alt_servers[i].port)
+ && (0 == memcmp(newserver->hostaddr, dev->alt_servers[i].hostaddr, (newserver->hostaddrtype == AF_INET ? 4 : 16))))
+ {
+ return &dev->alt_servers[i];
+ break;
+ }
+ }
+ return NULL;
+}
+
+static inline dnbd3_server_t* get_free_alt_server(dnbd3_device_t * const dev)
+{
+ int i;
+ for (i = 0; i < NUMBER_SERVERS; ++i)
+ {
+ if (dev->alt_servers[i].hostaddrtype == 0)
+ return &dev->alt_servers[i];
+ }
+ for (i = 0; i < NUMBER_SERVERS; ++i)
+ {
+ if (dev->alt_servers[i].failures > 10)
+ return &dev->alt_servers[i];
+ }
+ return NULL;
+}
+
+
int dnbd3_net_connect(dnbd3_device_t *dev)
{
struct sockaddr_in sin;
@@ -195,8 +228,9 @@ int dnbd3_net_connect(dnbd3_device_t *dev)
if (req1) // This connection is established to the initial server (from the ioctl call)
{
- // Set number of known alt servers to 0
- dev->alt_servers_num = 0;
+ // Forget all known alt servers
+ memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS);
+ memcpy(dev->alt_servers, &dev->initial_server, sizeof(dev->alt_servers[0]));
// And then enqueue request to request_queue_send for a fresh list of alt servers
req1->cmd_type = REQ_TYPE_SPECIAL;
req1->cmd_flags = CMD_GET_SERVERS;
@@ -324,6 +358,7 @@ int dnbd3_net_discover(void *data)
dnbd3_request_t dnbd3_request;
dnbd3_reply_t dnbd3_reply;
+ dnbd3_server_t *alt_server;
struct msghdr msg;
struct kvec iov[2];
@@ -334,6 +369,7 @@ int dnbd3_net_discover(void *data)
struct timeval start, end;
unsigned long rtt, best_rtt = 0;
+ unsigned long irqflags;
int i, best_server, current_server;
int turn = 0;
int ready = 0;
@@ -367,39 +403,46 @@ int dnbd3_net_discover(void *data)
dev->discover = 0;
// Check if the list of alt servers needs to be updated and do so if neccessary
- spin_lock_irq(&dev->blk_lock);
if (dev->new_servers_num)
{
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
for (i = 0; i < dev->new_servers_num; ++i)
{
- memcpy(dev->alt_servers[i].hostaddr, dev->new_servers[i].ipaddr, 16);
- dev->alt_servers[i].hostaddrtype = dev->new_servers[i].addrtype;
- dev->alt_servers[i].port = dev->new_servers[i].port;
- dev->alt_servers[i].rtts[0] = dev->alt_servers[i].rtts[1]
- = dev->alt_servers[i].rtts[2] = dev->alt_servers[i].rtts[3]
+ if (dev->new_servers[i].hostaddrtype != AF_INET) // Invalid entry.. (Add IPv6)
+ continue;
+ alt_server = get_existing_server(&dev->new_servers[i], dev);
+ if (alt_server != NULL) // Server already known, reset fail counter
+ {
+ alt_server->failures = 0;
+ continue;
+ }
+ alt_server = get_free_alt_server(dev);
+ if (alt_server == NULL) // All NUMBER_SERVERS slots are taken, ignore entry
+ continue;
+ // Add new server entry
+ memcpy(alt_server->hostaddr, dev->new_servers[i].hostaddr, 16);
+ alt_server->hostaddrtype = dev->new_servers[i].hostaddrtype;
+ alt_server->port = dev->new_servers[i].port;
+ alt_server->rtts[0] = alt_server->rtts[1]
+ = alt_server->rtts[2] = alt_server->rtts[3]
= RTT_UNREACHABLE;
- dev->alt_servers[i].protocol_version = 0;
- dev->alt_servers[i].skip_count = 0;
+ alt_server->protocol_version = 0;
+ alt_server->failures = 0;
}
- dev->alt_servers_num = dev->new_servers_num;
dev->new_servers_num = 0;
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
}
- spin_unlock_irq(&dev->blk_lock);
current_server = best_server = -1;
best_rtt = 0xFFFFFFFul;
- for (i=0; i < dev->alt_servers_num; ++i)
+ for (i=0; i < NUMBER_SERVERS; ++i)
{
- if (dev->alt_servers[i].hostaddrtype != AF_INET) // add IPv6....
+ if (dev->alt_servers[i].hostaddrtype == 0) // Empty slot
+ continue;
+ if (!dev->panic && dev->alt_servers[i].failures > 50) // If not in panic mode, skip server if it failed too many times
continue;
- if (!dev->panic && dev->alt_servers[i].skip_count) // If not in panic mode, skip server if indicated
- {
- --dev->alt_servers[i].skip_count;
- continue;
- }
-
// Initialize socket and connect
if (sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock) < 0)
{
@@ -569,9 +612,7 @@ int dnbd3_net_discover(void *data)
rtt = ( dev->alt_servers[i].rtts[0]
+ dev->alt_servers[i].rtts[1]
+ dev->alt_servers[i].rtts[2]
- + dev->alt_servers[i].rtts[3] ) >> 2; // ">> 2" == "/ 4", needed to prevent 64bit division on 32bit
- printk("RTT: %luµs\n", rtt);
-
+ + dev->alt_servers[i].rtts[3] ) / 4;
if (best_rtt > rtt)
{ // This one is better, keep socket open in case we switch
@@ -596,16 +637,17 @@ int dnbd3_net_discover(void *data)
continue;
- error:
- sock_release(sock);
- sock = NULL;
- dev->alt_servers[i].rtts[turn] = RTT_UNREACHABLE;
- if (is_same_server(&dev->cur_server, &dev->alt_servers[i]))
- {
- dev->cur_rtt = RTT_UNREACHABLE;
- current_server = i;
- }
- continue;
+ error:
+ ++dev->alt_servers[i].failures;
+ sock_release(sock);
+ sock = NULL;
+ dev->alt_servers[i].rtts[turn] = RTT_UNREACHABLE;
+ if (is_same_server(&dev->cur_server, &dev->alt_servers[i]))
+ {
+ dev->cur_rtt = RTT_UNREACHABLE;
+ current_server = i;
+ }
+ continue;
}
if (dev->panic && ++dev->panic_count == 21)
@@ -663,6 +705,8 @@ int dnbd3_net_send(void *data)
struct msghdr msg;
struct kvec iov;
+ unsigned long irqflags;
+
init_msghdr(msg);
dnbd3_request.magic = dnbd3_packet_magic;
@@ -678,14 +722,14 @@ int dnbd3_net_send(void *data)
break;
// extract block request
- spin_lock_irq(&dev->blk_lock); // TODO: http://www.linuxjournal.com/article/5833 says spin_lock_irq should not be used in general, but article is 10 years old
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
if (list_empty(&dev->request_queue_send))
{
- spin_unlock_irq(&dev->blk_lock);
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
continue;
}
blk_request = list_entry(dev->request_queue_send.next, struct request, queuelist);
- spin_unlock_irq(&dev->blk_lock);
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
// what to do?
switch (blk_request->cmd_type)
@@ -695,25 +739,25 @@ int dnbd3_net_send(void *data)
dnbd3_request.offset = blk_rq_pos(blk_request) << 9; // *512
dnbd3_request.size = blk_rq_bytes(blk_request); // bytes left to complete entire request
// enqueue request to request_queue_receive
- spin_lock_irq(&dev->blk_lock);
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
list_del_init(&blk_request->queuelist);
list_add_tail(&blk_request->queuelist, &dev->request_queue_receive);
- spin_unlock_irq(&dev->blk_lock);
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
break;
case REQ_TYPE_SPECIAL:
dnbd3_request.cmd = blk_request->cmd_flags;
dnbd3_request.size = 0;
- spin_lock_irq(&dev->blk_lock);
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
list_del_init(&blk_request->queuelist);
- spin_unlock_irq(&dev->blk_lock);
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
break;
default:
printk("ERROR: Unknown command (send)\n");
- spin_lock_irq(&dev->blk_lock);
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
list_del_init(&blk_request->queuelist);
- spin_unlock_irq(&dev->blk_lock);
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
continue;
}
@@ -758,8 +802,9 @@ int dnbd3_net_receive(void *data)
struct req_iterator iter;
struct bio_vec *bvec;
void *kaddr;
- unsigned long flags;
+ unsigned long irqflags;
sigset_t blocked, oldset;
+ uint16_t rid;
int count, remaining, ret;
@@ -811,7 +856,7 @@ int dnbd3_net_receive(void *data)
case CMD_GET_BLOCK:
// search for replied request in queue
blk_request = NULL;
- spin_lock_irq(&dev->blk_lock);
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
list_for_each_entry_safe(received_request, tmp_request, &dev->request_queue_receive, queuelist)
{
if ((uint64_t)(uintptr_t)received_request == dnbd3_reply.handle) // Double cast to prevent warning on 32bit
@@ -820,7 +865,7 @@ int dnbd3_net_receive(void *data)
break;
}
}
- spin_unlock_irq(&dev->blk_lock);
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
if (blk_request == NULL)
{
printk("ERROR: Received block data for unrequested handle (%llu: %llu).\n",
@@ -846,10 +891,10 @@ int dnbd3_net_receive(void *data)
sigprocmask(SIG_SETMASK, &oldset, NULL);
}
- spin_lock_irqsave(&dev->blk_lock, flags);
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
list_del_init(&blk_request->queuelist);
__blk_end_request_all(blk_request, 0);
- spin_unlock_irqrestore(&dev->blk_lock, flags);
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
continue;
case CMD_GET_SERVERS:
@@ -858,9 +903,9 @@ int dnbd3_net_receive(void *data)
remaining = dnbd3_reply.size;
goto clear_remaining_payload;
}
- spin_lock_irq(&dev->blk_lock);
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
dev->new_servers_num = 0;
- spin_unlock_irq(&dev->blk_lock);
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
count = MIN(NUMBER_SERVERS, dnbd3_reply.size / sizeof(dnbd3_server_entry_t));
if (count != 0)
@@ -874,16 +919,16 @@ int dnbd3_net_receive(void *data)
}
for (remaining = 0; remaining < count; ++remaining)
{
- if (dev->new_servers[remaining].addrtype == AF_INET)
- printk("New Server: %pI4 : %d\n", dev->new_servers[remaining].ipaddr, (int)ntohs(dev->new_servers[remaining].port));
- else if (dev->new_servers[remaining].addrtype == AF_INET6)
- printk("New Server: %pI6 : %d\n", dev->new_servers[remaining].ipaddr, (int)ntohs(dev->new_servers[remaining].port));
+ if (dev->new_servers[remaining].hostaddrtype == AF_INET)
+ printk("New Server: %pI4 : %d\n", dev->new_servers[remaining].hostaddr, (int)ntohs(dev->new_servers[remaining].port));
+ else if (dev->new_servers[remaining].hostaddrtype == AF_INET6)
+ printk("New Server: %pI6 : %d\n", dev->new_servers[remaining].hostaddr, (int)ntohs(dev->new_servers[remaining].port));
else
- printk("New Server of unknown address type (%d)\n", (int)dev->new_servers[remaining].addrtype);
+ printk("New Server of unknown address type (%d)\n", (int)dev->new_servers[remaining].hostaddrtype);
}
- spin_lock_irq(&dev->blk_lock);
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
dev->new_servers_num = count;
- spin_unlock_irq(&dev->blk_lock);
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
// TODO: Re-Add update check
}
// If there were more servers than accepted, remove the remaining data from the socket buffer
@@ -904,6 +949,26 @@ clear_remaining_payload:
}
continue;
+ case CMD_LATEST_RID:
+ if (dnbd3_reply.size != 2)
+ {
+ printk("Error: CMD_LATEST_RID.size != 2.\n");
+ continue;
+ }
+ iov.iov_base = &rid;
+ iov.iov_len = sizeof(rid);
+ if (kernel_recvmsg(dev->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags) <= 0)
+ {
+ printk("Error: Could not receive CMD_LATEST_RID payload.\n");
+ }
+ else
+ {
+ rid = net_order_16(rid);
+ printk("Latest rid of %s is %d (currently using %d)\n", dev->imgname, (int)rid, (int)dev->rid);
+ dev->update_available = (rid > dev->rid ? 1 : 0);
+ }
+ continue;
+
case CMD_KEEPALIVE:
if (dnbd3_reply.size != 0)
printk("Error: keep alive packet with payload.\n");
@@ -925,13 +990,13 @@ error:
while (!list_empty(&dev->request_queue_receive))
{
printk("WARN: Request queue was not empty on %s\n", dev->disk->disk_name);
- spin_lock_irq(&dev->blk_lock);
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist)
{
list_del_init(&blk_request->queuelist);
list_add(&blk_request->queuelist, &dev->request_queue_send);
}
- spin_unlock_irq(&dev->blk_lock);
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
}
if (dev->sock)
kernel_sock_shutdown(dev->sock, SHUT_RDWR);
diff --git a/src/kernel/sysfs.c b/src/kernel/sysfs.c
index df94d20..ff3b5f9 100644
--- a/src/kernel/sysfs.c
+++ b/src/kernel/sysfs.c
@@ -23,51 +23,84 @@
#include "sysfs.h"
#include "utils.h"
-ssize_t show_cur_server_ip(char *buf, dnbd3_device_t *dev)
+#ifndef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+ssize_t show_cur_server_addr(char *buf, dnbd3_device_t *dev)
{
- return sprintf(buf, "%pI4\n", dev->cur_server.hostaddr);
+ if (dev->cur_server.hostaddrtype == AF_INET)
+ return MIN(snprintf(buf, PAGE_SIZE, "%pI4,%d\n", dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port)), PAGE_SIZE);
+ return MIN(snprintf(buf, PAGE_SIZE, "%pI6,%d\n", dev->cur_server.hostaddr, (int)ntohs(dev->cur_server.port)), PAGE_SIZE);
}
ssize_t show_cur_server_rtt(char *buf, dnbd3_device_t *dev)
{
- return sprintf(buf, "%llu\n", (unsigned long long)dev->cur_rtt);
+ return MIN(snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)dev->cur_rtt), PAGE_SIZE);
}
ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev)
{
- return sprintf(buf, "%d\n", dev->alt_servers_num);
+ int i, num = 0;
+ for (i = 0; i < NUMBER_SERVERS; ++i)
+ {
+ if (dev->alt_servers[i].hostaddrtype) ++num;
+ }
+ return MIN(snprintf(buf, PAGE_SIZE, "%d\n", num), PAGE_SIZE);
+}
+
+ssize_t show_alt_servers(char *buf, dnbd3_device_t *dev)
+{
+ int i, size = PAGE_SIZE, ret;
+ for (i = 0; i < NUMBER_SERVERS; ++i)
+ {
+ if (dev->alt_servers[i].hostaddrtype == AF_INET)
+ ret = MIN(snprintf(buf, size, "%pI4,%d,%llu,%d\n",
+ dev->alt_servers[i].hostaddr,
+ (int)ntohs(dev->alt_servers[i].port),
+ (unsigned long long)((dev->alt_servers[i].rtts[0] + dev->alt_servers[i].rtts[1] + dev->alt_servers[i].rtts[2] + dev->alt_servers[i].rtts[3]) / 4),
+ (int)dev->alt_servers[i].failures)
+ , size);
+ else if (dev->alt_servers[i].hostaddrtype == AF_INET6)
+ ret = MIN(snprintf(buf, size, "%pI6,%d,%llu,%d\n",
+ dev->alt_servers[i].hostaddr,
+ (int)ntohs(dev->alt_servers[i].port),
+ (unsigned long long)((dev->alt_servers[i].rtts[0] + dev->alt_servers[i].rtts[1] + dev->alt_servers[i].rtts[2] + dev->alt_servers[i].rtts[3]) / 4),
+ (int)dev->alt_servers[i].failures)
+ , size);
+ else
+ continue;
+ size -= ret;
+ buf += ret;
+ if (size <= 0)
+ {
+ size = 0;
+ break;
+ }
+ }
+ return PAGE_SIZE - size;
}
ssize_t show_image_name(char *buf, dnbd3_device_t *dev)
{
if (dev->imgname == NULL) return sprintf(buf, "(null)");
- return sprintf(buf, "%s\n", dev->imgname);
+ return MIN(snprintf(buf, PAGE_SIZE, "%s\n", dev->imgname), PAGE_SIZE);
}
ssize_t show_rid(char *buf, dnbd3_device_t *dev)
{
- return sprintf(buf, "%d\n", dev->rid);
+ return MIN(snprintf(buf, PAGE_SIZE, "%d\n", dev->rid), PAGE_SIZE);
}
ssize_t show_update_available(char *buf, dnbd3_device_t *dev)
{
- return sprintf(buf, "%d\n", dev->update_available);
-}
-
-ssize_t show_alt_server_ip(char *buf, dnbd3_server_t *srv)
-{
- return sprintf(buf, "%pI4\n", srv->hostaddr);
-}
-
-ssize_t show_alt_server_rtt(char *buf, dnbd3_server_t *srv)
-{
- return sprintf(buf, "%llu\n", (uint64_t)((srv->rtts[0]+srv->rtts[1]+srv->rtts[2]+srv->rtts[3]) / 4));
+ return MIN(snprintf(buf, PAGE_SIZE, "%d\n", dev->update_available), PAGE_SIZE);
}
-device_attr_t cur_server_ip =
+device_attr_t cur_server_addr =
{
- .attr = {.name = "cur_server_ip", .mode = 0444 },
- .show = show_cur_server_ip,
+ .attr = {.name = "cur_server_addr", .mode = 0444 },
+ .show = show_cur_server_addr,
.store = NULL,
};
@@ -85,6 +118,13 @@ device_attr_t alt_server_num =
.store = NULL,
};
+device_attr_t alt_servers =
+{
+ .attr = {.name = "alt_servers", .mode = 0444 },
+ .show = show_alt_servers,
+ .store = NULL,
+};
+
device_attr_t image_name =
{
.attr = {.name = "image_name", .mode = 0444 },
@@ -106,20 +146,6 @@ device_attr_t update_available =
.store = NULL,
};
-server_attr_t alt_server_ip =
-{
- .attr = {.name = "alt_server_ip", .mode = 0444 },
- .show = show_alt_server_ip,
- .store = NULL,
-};
-
-server_attr_t alt_server_rtt =
-{
- .attr = {.name = "alt_server_rtt", .mode = 0444 },
- .show = show_alt_server_rtt,
- .store = NULL,
-};
-
ssize_t device_show(struct kobject *kobj, struct attribute *attr, char *buf)
{
device_attr_t *device_attr = container_of(attr, device_attr_t, attr);
@@ -127,41 +153,24 @@ ssize_t device_show(struct kobject *kobj, struct attribute *attr, char *buf)
return device_attr->show(buf, dev);
}
-ssize_t server_show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
- server_attr_t *server_attr = container_of(attr, server_attr_t, attr);
- dnbd3_server_t *srv = container_of(kobj, dnbd3_server_t, kobj);
- return server_attr->show(buf, srv);
-}
-
struct attribute *device_attrs[] =
{
- &cur_server_ip.attr,
+ &cur_server_addr.attr,
&cur_server_rtt.attr,
&alt_server_num.attr,
+ &alt_servers.attr,
&image_name.attr,
&rid.attr,
&update_available.attr,
NULL,
};
-struct attribute *server_attrs[] =
-{
- &alt_server_ip.attr,
- &alt_server_rtt.attr,
- NULL,
-};
struct sysfs_ops device_ops =
{
.show = device_show,
};
-struct sysfs_ops server_ops =
-{
- .show = server_show,
-};
-
void release(struct kobject *kobj)
{
kobj->state_initialized = 0;
@@ -174,39 +183,17 @@ struct kobj_type device_ktype =
.release = release,
};
-struct kobj_type server_ktype =
-{
- .default_attrs = server_attrs,
- .sysfs_ops = &server_ops,
- .release = release,
-};
void dnbd3_sysfs_init(dnbd3_device_t *dev)
{
- int i;
- char name[15] = "alt_server99";
struct kobject *kobj = &dev->kobj;
struct kobj_type *ktype = &device_ktype;
struct kobject *parent = &disk_to_dev(dev->disk)->kobj;
kobject_init_and_add(kobj, ktype, parent, "net");
-
- for (i = 0; i < NUMBER_SERVERS; i++)
- {
- sprintf(name, "alt_server%d", i);
- kobj = &dev->alt_servers[i].kobj;
- ktype = &server_ktype;
- parent = &dev->kobj;
- kobject_init_and_add(kobj, ktype, parent, name);
- }
}
void dnbd3_sysfs_exit(dnbd3_device_t *dev)
{
- int i;
- for (i = 0; i < NUMBER_SERVERS; i++)
- {
- kobject_put(&dev->alt_servers[i].kobj);
- }
kobject_put(&dev->kobj);
}
diff --git a/src/server/ipc.c b/src/server/ipc.c
index 5da811c..ece6f07 100644
--- a/src/server/ipc.c
+++ b/src/server/ipc.c
@@ -403,9 +403,10 @@ void dnbd3_ipc_send(int cmd)
if (cmd == IPC_INFO && header.size > 0)
{
- char* buf = malloc(header.size);
+ char* buf = malloc(header.size+1);
size = recv(client_sock, buf, header.size, MSG_WAITALL);
xmlDocPtr doc = xmlReadMemory(buf, size, "noname.xml", NULL, 0);
+ buf[header.size] = 0;
if (doc)
{
@@ -479,9 +480,10 @@ void dnbd3_ipc_send(int cmd)
// xmlDocDump(stdout, doc);
- } else
+ }
+ else
{
- printf("ERROR: Failed to parse reply\n");
+ printf("ERROR: Failed to parse reply\n-----------\n%s\n-------------\n", buf);
}
}
diff --git a/src/server/net.c b/src/server/net.c
index c145beb..49cfb24 100644
--- a/src/server/net.c
+++ b/src/server/net.c
@@ -39,7 +39,7 @@
#include "../config.h"
-static char recv_request_header(int sock, dnbd3_request_t *request)
+static inline char recv_request_header(int sock, dnbd3_request_t *request)
{
int ret;
// Read request header from socket
@@ -62,10 +62,13 @@ static char recv_request_header(int sock, dnbd3_request_t *request)
memlogf("[WARNING] Client tries to send a packet of type %d with %d bytes payload. Dropping client.", (int)request->cmd, (int)request->size);
return 0;
}
+#ifdef _DEBUG
+ if (_fake_delay) usleep(_fake_delay);
+#endif
return 1;
}
-static char recv_request_payload(int sock, uint32_t size, serialized_buffer_t *payload)
+static inline char recv_request_payload(int sock, uint32_t size, serialized_buffer_t *payload)
{
if (size == 0)
{
@@ -87,10 +90,11 @@ static char recv_request_payload(int sock, uint32_t size, serialized_buffer_t *p
return 1;
}
-static char send_reply(int sock, dnbd3_reply_t *reply, void *payload)
+static inline char send_reply(int sock, dnbd3_reply_t *reply, void *payload)
{
+ const unsigned int size = reply->size;
fixup_reply(*reply);
- if (!payload || reply->size == 0)
+ if (!payload || size == 0)
{
if (send(sock, reply, sizeof(dnbd3_reply_t), MSG_WAITALL) != sizeof(dnbd3_reply_t))
{
@@ -104,10 +108,10 @@ static char send_reply(int sock, dnbd3_reply_t *reply, void *payload)
iov[0].iov_base = reply;
iov[0].iov_len = sizeof(dnbd3_reply_t);
iov[1].iov_base = payload;
- iov[1].iov_len = reply->size;
- if (writev(sock, iov, 2) != sizeof(dnbd3_reply_t) + reply->size)
+ iov[1].iov_len = size;
+ if (writev(sock, iov, 2) != sizeof(dnbd3_reply_t) + size)
{
- printf("[DEBUG] Send failed (reply with payload of %d bytes)\n", (int)reply->size);
+ printf("[DEBUG] Send failed (reply with payload of %u bytes)\n", size);
return 0;
}
}
@@ -120,9 +124,6 @@ void *dnbd3_handle_query(void *dnbd3_client)
dnbd3_request_t request;
dnbd3_reply_t reply;
- const int cork = 1;
- const int uncork = 0;
-
dnbd3_image_t *image = NULL;
int image_file = -1, image_cache = -1;
@@ -216,7 +217,6 @@ void *dnbd3_handle_query(void *dnbd3_client)
if (image) while (recv_request_header(client->sock, &request))
{
-
switch (request.cmd)
{
@@ -246,12 +246,16 @@ void *dnbd3_handle_query(void *dnbd3_client)
break;
}
- // TODO: Try MSG_MORE instead of cork+uncork if performance ever becomes an issue..
- setsockopt(client->sock, SOL_TCP, TCP_CORK, &cork, sizeof(cork));
reply.cmd = CMD_GET_BLOCK;
reply.size = request.size;
reply.handle = request.handle;
- send_reply(client->sock, &reply, NULL);
+
+ fixup_reply(reply);
+ if (send(client->sock, &reply, sizeof(dnbd3_reply_t), MSG_MORE) != sizeof(dnbd3_reply_t))
+ {
+ printf("[DEBUG] Sending CMD_GET_BLOCK header failed\n");
+ return 0;
+ }
if (request.size == 0) // Request for 0 bytes, done after sending header
break;
@@ -264,8 +268,6 @@ void *dnbd3_handle_query(void *dnbd3_client)
printf("[ERROR] sendfile failed (image to net)\n");
close(client->sock);
}
-
- setsockopt(client->sock, SOL_TCP, TCP_CORK, &uncork, sizeof(uncork));
break;
}
@@ -343,8 +345,6 @@ void *dnbd3_handle_query(void *dnbd3_client)
memlogf("[ERROR] sendfile failed (cache to net)\n");
close(client->sock);
}
-
- setsockopt(client->sock, SOL_TCP, TCP_CORK, &uncork, sizeof(uncork));
break;
@@ -353,7 +353,7 @@ void *dnbd3_handle_query(void *dnbd3_client)
num = 0;
for (i = 0; i < NUMBER_SERVERS; i++)
{
- if (image->servers[i].addrtype == 0 || image->servers[i].failures > 200) continue;
+ if (image->servers[i].hostaddrtype == 0 || image->servers[i].failures > 200) continue;
memcpy(server_list + num++, image->servers + i, sizeof(dnbd3_server_entry_t));
}
reply.cmd = CMD_GET_SERVERS;
diff --git a/src/server/server.c b/src/server/server.c
index b2a36a5..965c368 100644
--- a/src/server/server.c
+++ b/src/server/server.c
@@ -37,6 +37,9 @@
#include "memlog.h"
int _sock;
+#ifdef _DEBUG
+int _fake_delay = 0;
+#endif
pthread_spinlock_t _spinlock;
GSList *_dnbd3_clients = NULL;
@@ -48,6 +51,9 @@ void dnbd3_print_help(char* argv_0)
printf("Usage: %s [OPTIONS]...\n", argv_0);
printf("Start the DNBD3 server\n");
printf("-f or --file \t\t Configuration file (default /etc/dnbd3-server.conf)\n");
+#ifdef _DEBUG
+ printf("-d or --delay \t\t Add a fake network delay of X µs\n");
+#endif
printf("-n or --nodaemon \t Start server in foreground\n");
printf("-r or --reload \t\t Reload configuration file\n");
printf("-s or --stop \t\t Stop running dnbd3-server\n");
@@ -119,10 +125,11 @@ int main(int argc, char* argv[])
int demonize = 1;
int opt = 0;
int longIndex = 0;
- static const char *optString = "f:nrsiHV?";
+ static const char *optString = "f:d:nrsiHV?";
static const struct option longOpts[] =
{
{ "file", required_argument, NULL, 'f' },
+ { "delay", required_argument, NULL, 'd' },
{ "nodaemon", no_argument, NULL, 'n' },
{ "reload", no_argument, NULL, 'r' },
{ "stop", no_argument, NULL, 's' },
@@ -137,8 +144,16 @@ int main(int argc, char* argv[])
switch (opt)
{
case 'f':
- _config_file_name = optarg;
+ _config_file_name = strdup(optarg);
break;
+ case 'd':
+#ifdef _DEBUG
+ _fake_delay = atoi(optarg);
+ break;
+#else
+ printf("This option is only available in debug builds.\n\n");
+ return EXIT_FAILURE;
+#endif
case 'n':
demonize = 0;
break;
diff --git a/src/server/server.h b/src/server/server.h
index 69d597b..7e19f27 100644
--- a/src/server/server.h
+++ b/src/server/server.h
@@ -56,6 +56,11 @@ extern pthread_spinlock_t _spinlock;
extern char *_config_file_name;
extern GSList *_dnbd3_images; // of dnbd3_image_t
+
+#ifdef _DEBUG
+extern int _fake_delay;
+#endif
+
void dnbd3_cleanup();
#endif /* SERVER_H_ */
diff --git a/src/server/utils.c b/src/server/utils.c
index 15e51f8..3fe1900 100644
--- a/src/server/utils.c
+++ b/src/server/utils.c
@@ -260,11 +260,11 @@ void dnbd3_load_config(char *file)
gchar **servers = g_key_file_get_string_list(gkf, groups[i], "servers", &num_servers, NULL);
if (servers) for (k = 0, j = 0; j < MIN(num_servers, NUMBER_SERVERS); ++j)
{
- if (parse_address(servers[j], &(image->servers[k].addrtype), image->servers[k].ipaddr, &(image->servers[k].port)))
+ if (parse_address(servers[j], &(image->servers[k].hostaddrtype), image->servers[k].hostaddr, &(image->servers[k].port)))
{
++k; continue;
}
- image->servers[k].addrtype = 0;
+ image->servers[k].hostaddrtype = 0;
}
g_strfreev(servers);
diff --git a/src/types.h b/src/types.h
index 0227433..4f44f32 100644
--- a/src/types.h
+++ b/src/types.h
@@ -76,6 +76,7 @@ typedef struct
#define CMD_GET_SERVERS 3
#define CMD_ERROR 4
#define CMD_KEEPALIVE 5
+#define CMD_LATEST_RID 6
#pragma pack(1)
typedef struct
@@ -101,10 +102,10 @@ typedef struct
#pragma pack(1)
typedef struct
{
- uint8_t ipaddr[16]; // 16byte (network representation, so it can be directly passed to socket functions)
- uint16_t port; // 2byte (network representation, so it can be directly passed to socket functions)
- uint8_t addrtype; // 1byte (ip version. AF_INET or AF_INET6. 0 means this struct is empty and should be ignored)
- uint8_t failures; // 1byte (number of times server has been consecutively unreachable)
+ uint8_t hostaddr[16]; // 16byte (network representation, so it can be directly passed to socket functions)
+ uint16_t port; // 2byte (network representation, so it can be directly passed to socket functions)
+ uint8_t hostaddrtype; // 1byte (ip version. AF_INET or AF_INET6. 0 means this struct is empty and should be ignored)
+ uint8_t failures; // 1byte (number of times server has been consecutively unreachable)
} dnbd3_server_entry_t;
#pragma pack(0)