From b618a4af91bf03a2828b867c7009338972a169d6 Mon Sep 17 00:00:00 2001 From: Johann Latocha Date: Thu, 16 Feb 2012 16:51:15 +0100 Subject: [KERNEL] Switch server on connection lost --- src/config.h | 3 +- src/kernel/blk.c | 2 + src/kernel/dnbd3.h | 1 + src/kernel/net.c | 119 ++++++++++++++++++++++++++++++++--------------------- 4 files changed, 78 insertions(+), 47 deletions(-) (limited to 'src') diff --git a/src/config.h b/src/config.h index 8506ed6..4b4fb6f 100644 --- a/src/config.h +++ b/src/config.h @@ -25,7 +25,8 @@ #define PORT 5003 #define PORTSTR "5003" #define SERVER_SOCKET_TIMEOUT 30 -#define CLIENT_SOCKET_TIMEOUT 5 +#define CLIENT_SOCKET_TIMEOUT_DATA 5 +#define CLIENT_SOCKET_TIMEOUT_DISCOVERY 1 #define HB_INTERVAL 10*HZ #define MAX_NUMBER_SERVERS 8 diff --git a/src/kernel/blk.c b/src/kernel/blk.c index 3729d5f..abb3f48 100644 --- a/src/kernel/blk.c +++ b/src/kernel/blk.c @@ -39,6 +39,8 @@ int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor) dev->thread_send = NULL; dev->thread_receive = NULL; dev->thread_discover = NULL; + dev->discover = 0; + dev->panic = 0; if (!(disk = alloc_disk(1))) { diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h index 45b1cac..0231306 100644 --- a/src/kernel/dnbd3.h +++ b/src/kernel/dnbd3.h @@ -46,6 +46,7 @@ typedef struct struct timer_list hb_timer; int num_servers; struct in_addr servers[MAX_NUMBER_SERVERS]; + int discover, panic; // process struct task_struct *thread_send; diff --git a/src/kernel/net.c b/src/kernel/net.c index 39068e3..aa93a47 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -30,7 +30,7 @@ void dnbd3_net_connect(dnbd3_device_t *dev) struct request *req = kmalloc(sizeof(struct request), GFP_ATOMIC); struct timeval timeout; - timeout.tv_sec = CLIENT_SOCKET_TIMEOUT; + timeout.tv_sec = CLIENT_SOCKET_TIMEOUT_DATA; timeout.tv_usec = 0; // do some checks before connecting @@ -71,6 +71,8 @@ void dnbd3_net_connect(dnbd3_device_t *dev) return; } + dev->panic = 0; + // enqueue request to request_queue_send (ask file size) req->cmd_type = REQ_TYPE_SPECIAL; req->cmd_flags = REQ_GET_FILESIZE; @@ -108,12 +110,21 @@ void dnbd3_net_disconnect(dnbd3_device_t *dev) del_timer(&dev->hb_timer); // kill sending and receiving threads - if (dev->thread_send && dev->thread_receive && dev->thread_discover) - { +// if (dev->thread_send && dev->thread_receive && dev->thread_discover) +// { +// kthread_stop(dev->thread_send); +// kthread_stop(dev->thread_receive); +// kthread_stop(dev->thread_discover); +// } + + if (dev->thread_send) kthread_stop(dev->thread_send); + + if (dev->thread_receive) kthread_stop(dev->thread_receive); + + if (dev->thread_discover) kthread_stop(dev->thread_discover); - } // clear socket if (dev->sock) @@ -136,6 +147,9 @@ void dnbd3_net_heartbeat(unsigned long arg) wake_up(&dev->process_queue_send); } + dev->discover = 1; + wake_up(&dev->process_queue_discover); + dev->hb_timer.expires = jiffies + HB_INTERVAL; add_timer(&dev->hb_timer); } @@ -157,10 +171,10 @@ int dnbd3_net_discover(void *data) struct timeval start, end; uint64_t t1, t2 = 0; - int a, i, num = 0; + int i, num = 0; struct timeval timeout; - timeout.tv_sec = 1; + timeout.tv_sec = CLIENT_SOCKET_TIMEOUT_DISCOVERY; timeout.tv_usec = 0; init_msghdr(msg); @@ -168,7 +182,7 @@ int dnbd3_net_discover(void *data) buf = kmalloc(4096, GFP_KERNEL); if (!buf) { - printk("ERROR: kmalloc failed"); + printk("ERROR: Kmalloc failed (discover)\n"); return -1; } @@ -176,26 +190,24 @@ int dnbd3_net_discover(void *data) while (!kthread_should_stop()) { - wait_event_interruptible(dev->process_queue_discover, kthread_should_stop() || dev->num_servers); + wait_event_interruptible(dev->process_queue_discover, kthread_should_stop() || dev->discover || dev->panic); - num = dev->num_servers; - dev->num_servers = 0; - - if (!num) + if (!&dev->discover && !dev->panic) continue; + num = dev->num_servers; + dev->discover = 0; + for (i=0; i < num && i < MAX_NUMBER_SERVERS; i++) { - // initialize socket + // initialize socket and connect if (sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock) < 0) { - printk("ERROR: Couldn't create socket.\n"); + printk("ERROR: Couldn't create socket (discover)\n"); sock = NULL; continue; } - kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &timeout, sizeof(timeout)); - inet_ntoa(dev->servers[i], ip); sock->sk->sk_allocation = GFP_NOIO; sin.sin_family = AF_INET; @@ -203,71 +215,86 @@ int dnbd3_net_discover(void *data) sin.sin_port = htons(simple_strtol(dev->port, NULL, 10)); if (kernel_connect(sock, (struct sockaddr *) &sin, sizeof(sin), 0) < 0) { - printk("ERROR: Couldn't connect to host %s:%s\n", ip, dev->port); + printk("ERROR: Couldn't connect to host %s:%s (discover)\n", ip, dev->port); sock = NULL; continue; } + // panic mode, take first responding server + if (dev->panic) + { + printk("WARN: Panic mode, taking server %s\n", ip); + sock_release(sock); + kfree(buf); + dev->thread_discover = NULL; + dnbd3_net_disconnect(dev); + strcpy(dev->host, ip); + dnbd3_net_connect(dev); + return 0; + } + // Request filesize dnbd3_request.cmd = CMD_GET_SIZE; dnbd3_request.vid = dev->vid; dnbd3_request.rid = dev->rid; - // send net request iov.iov_base = &dnbd3_request; iov.iov_len = sizeof(dnbd3_request); if (kernel_sendmsg(sock, &msg, &iov, 1, sizeof(dnbd3_request)) <= 0) - { - printk("ERROR: kernel_sendmsg (discover)\n"); - sock_release(sock); - sock = NULL; - continue; - } + goto error; + // receive net replay iov.iov_base = &dnbd3_reply; iov.iov_len = sizeof(dnbd3_reply); - kernel_recvmsg(sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags); + if (kernel_recvmsg(sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags) <= 0) + goto error; + // receive data iov.iov_base = &filesize; iov.iov_len = sizeof(uint64_t); - kernel_recvmsg(sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags); + if (kernel_recvmsg(sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags) <= 0) + goto error; - do_gettimeofday(&start); + do_gettimeofday(&start); // start rtt measurement // Request block dnbd3_request.cmd = CMD_GET_BLOCK; - dnbd3_request.offset = 0; + dnbd3_request.offset = 0; // TODO: take random block dnbd3_request.size = 4096; - // send net request iov.iov_base = &dnbd3_request; iov.iov_len = sizeof(dnbd3_request); if (kernel_sendmsg(sock, &msg, &iov, 1, sizeof(dnbd3_request)) <= 0) - { - printk("ERROR: kernel_sendmsg (discover)\n"); - sock_release(sock); - sock = NULL; - continue; - } + goto error; + // receive net replay iov.iov_base = &dnbd3_reply; iov.iov_len = sizeof(dnbd3_reply); - kernel_recvmsg(sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags); + if (kernel_recvmsg(sock, &msg, &iov, 1, sizeof(dnbd3_reply), msg.msg_flags) <= 0) + goto error; + // receive data iov.iov_base = buf; iov.iov_len = 4096; - a = kernel_recvmsg(sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags); + if (kernel_recvmsg(sock, &msg, &iov, 1, dnbd3_reply.size, msg.msg_flags) <= 0) + goto error; - do_gettimeofday(&end); + do_gettimeofday(&end); // end rtt measurement // clear socket - if (sock) - { - sock_release(sock); - sock = NULL; - } + sock_release(sock); + sock = NULL; + // TODO: work here t1 = (start.tv_sec*1000000ull) + start.tv_usec; t2 = (end.tv_sec*1000000ull) + end.tv_usec; - //printk("DEBUG: Server: %s RTT: %llums received bytes: %i\n", ip,t2 - t1, a); + printk("DEBUG: Server: %s RTT: %llums\n", ip,t2 - t1); + + + continue; + error: + printk("ERROR: kernel_sendmsg or kernel_recvmsg (discover)\n"); + sock_release(sock); + sock = NULL; + continue; } } kfree(buf); @@ -352,6 +379,7 @@ int dnbd3_net_send(void *data) if (dev->sock) kernel_sock_shutdown(dev->sock, SHUT_RDWR); dev->thread_send = NULL; + dev->panic = 1; return -1; } @@ -475,8 +503,6 @@ int dnbd3_net_receive(void *data) list_del_init(&blk_request->queuelist); spin_unlock_irq(&dev->blk_lock); kfree(blk_request); -// if (dev->num_servers > 1) -// wake_up(&dev->process_queue_discover); continue; default: @@ -507,5 +533,6 @@ int dnbd3_net_receive(void *data) if (dev->sock) kernel_sock_shutdown(dev->sock, SHUT_RDWR); dev->thread_receive = NULL; + dev->panic = 1; return -1; } -- cgit v1.2.3-55-g7522