summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/kernel/dnbd3.h86
-rw-r--r--src/kernel/net.c841
2 files changed, 621 insertions, 306 deletions
diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h
index ccd3bf1..3ae42fd 100644
--- a/src/kernel/dnbd3.h
+++ b/src/kernel/dnbd3.h
@@ -33,15 +33,22 @@
/**
* the number of parallel connections
*/
-#define NUMBER_CONNECTIONS 4 // power of 2
+#define NUMBER_CONNECTIONS 4
/**
- * limit to which the other connected servers are only allowed to be that worser then the best rtt
+ * limit to which the other connected servers are only allowed to be that worser
+ * then the best rtt
*/
#define RTT_THRESOULD_LIMIT(best_rtt) ((best_rtt) * 10)
-#define DEBUG
-#define DEBUG_FILE
+/**
+ * turn on/off debug information (1/0)
+ */
+#define DEBUG 1
+/**
+ * turn on/off file and line information (1/0)
+ */
+#define DEBUG_FILE 1
extern struct workqueue_struct *dnbd3_wq;
@@ -86,7 +93,8 @@ struct dnbd3_server {
* @tx_lock: mutex to lock when sending
* @sock: the socket, 'NULL' if not connected
* @panic: 'true' if it is not possible to send or receive
- * @cookie: is incremented for every send, used to find the mq request in the receiver
+ * @cookie: is incremented for every send, used to find the mq request in the
+ * receiver
* @keepalive_worker: worker to send a keepalive package
* @receive_worker: worker to handle the incoming packages
* @pending: the pending request which is going to be send
@@ -118,16 +126,21 @@ struct dnbd3_sock {
* @device_lock: mutex to lock when device changes
* @socks: array of dnbd3_sock to connect to
* @imgname: the connected image name
- * @initial_server: the server which was configured with ioctl, will not be overriden
+ * @initial_server: the server which was configured with ioctl, will not be
+ * overriden
* @alt_servers: array of alternative servers
- * @new_servers_num: number of new alternative servers that are waiting to be copied to above array
+ * @new_servers_num: number of new alternative servers that are waiting to be
+ * copied to above array
* @new_servers: pending new alternative servers
* @update_available: 'true' if the rid has changes
- * @use_server_provided_alts: 'true' if the alt_servers array is upated by the alternatives provided by the server
+ * @use_server_provided_alts: 'true' if the alt_servers array is upated by the
+ * alternatives provided by the server
* @rid: the revision ID? TODO
* @reported_size: the size of the image
- * @panic_worker: worker to handle panics and to connect if all connections are down
- * @discovery_worker: worker to update the alt_servers, make rtt meassurement and reconnect to better servers
+ * @panic_worker: worker to handle panics and to connect if all connections are
+ * down
+ * @discovery_worker: worker to update the alt_servers, make rtt meassurement
+ * and reconnect to better servers
* @discovery_count: counter for the discovery worker
* @timer: timer to start the appropriate workers
* @timer_count: counter for the timer
@@ -137,21 +150,18 @@ struct dnbd3_device {
struct blk_mq_tag_set tag_set;
struct list_head list;
- // block
struct gendisk *disk;
- // sysfs
struct kobject kobj;
struct mutex device_lock;
- // network
struct dnbd3_sock socks[NUMBER_CONNECTIONS];
char *imgname;
struct dnbd3_server initial_server;
- struct dnbd3_server alt_servers[NUMBER_SERVERS]; // array of alt servers
- int new_servers_num; // number of new alt servers that are waiting to be copied to above array
- dnbd3_server_entry_t new_servers[NUMBER_SERVERS]; // pending new alt servers
+ struct dnbd3_server alt_servers[NUMBER_SERVERS];
+ int new_servers_num;
+ dnbd3_server_entry_t new_servers[NUMBER_SERVERS];
bool update_available;
bool use_server_provided_alts;
uint16_t rid;
@@ -198,12 +208,12 @@ struct dnbd3_cmd {
* print fmt, adds sock and device information to log
* <level>_server(dev, server, fmt,...)
* print fmt, adds device and appends server information to the log
- * DEBUG - if not defined switch of all debug messages
- * DEBUG_FILE - if not defined switch of file and line number information
+ * DEBUG - 1/0 switch all debug messages on
+ * DEBUG_FILE - 1/0 switch on file and line number information
*/
-#ifdef DEBUG_FILE
+#if IS_ENABLED(DEBUG_FILE)
#define __print(level, fmt,...) \
printk(level "%s:%d " fmt "\n", __FILE__, __LINE__, ## __VA_ARGS__)
@@ -211,30 +221,56 @@ struct dnbd3_cmd {
#else
#define __print(level, fmt,...) \
- printk(fmt, ## __VA_ARGS__)
+ printk(level fmt "\n", ## __VA_ARGS__)
#endif
#define __print_sock(level, sock, fmt, ...) \
do { \
if ((sock)->server->host.type == HOST_IP4) { \
- __print(level, "dnbd%d/%d %pI4:%d: " fmt, (sock)->device->minor, (sock)->sock_nr, (sock)->server->host.addr, (sock)->server->host.port, ## __VA_ARGS__); \
+ __print( \
+ level, \
+ "dnbd%d/%d %pI4:%d: " fmt, \
+ (sock)->device->minor, \
+ (sock)->sock_nr, \
+ (sock)->server->host.addr, \
+ (sock)->server->host.port, \
+ ## __VA_ARGS__); \
} else { \
- __print(level, "dnbd%d/%d %pI6:%d: " fmt, (sock)->device->minor, (sock)->sock_nr, (sock)->server->host.addr, (sock)->server->host.port, ## __VA_ARGS__); \
+ __print( \
+ level, \
+ "dnbd%d/%d %pI6:%d: " fmt, \
+ (sock)->device->minor, \
+ (sock)->sock_nr, \
+ (sock)->server->host.addr, \
+ (sock)->server->host.port, \
+ ## __VA_ARGS__); \
} \
} while (0)
#define __print_server(level, dev, server, fmt, ...) \
do { \
if ((server)->host.type == HOST_IP4) { \
- __print(level, "dnbd%d: " fmt " %pI4:%d", (dev)->minor, ## __VA_ARGS__, (server)->host.addr, (server)->host.port); \
+ __print( \
+ level, \
+ "dnbd%d: " fmt " %pI4:%d", \
+ (dev)->minor, \
+ ## __VA_ARGS__, \
+ (server)->host.addr, \
+ (server)->host.port); \
} else { \
- __print(level, "dnbd%d: " fmt " %pI6:%d", (dev)->minor, ## __VA_ARGS__, (server)->host.addr, (server)->host.port); \
+ __print( \
+ level, \
+ "dnbd%d: " fmt " %pI6:%d", \
+ (dev)->minor, \
+ ## __VA_ARGS__, \
+ (server)->host.addr, \
+ (server)->host.port); \
} \
} while (0)
-#ifdef DEBUG
+#if IS_ENABLED(DEBUG)
#define debug(fmt, ...) \
__print(KERN_DEBUG, "dnbd: " fmt, ## __VA_ARGS__)
diff --git a/src/kernel/net.c b/src/kernel/net.c
index da9b897..dfde203 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -20,7 +20,6 @@
*/
-
#include <net/sock.h>
#include <linux/wait.h>
@@ -32,102 +31,136 @@
#define DNBD3_REQ_OP_SPECIAL REQ_OP_DRV_IN
#define DNBD3_REQ_OP_CONNECT REQ_OP_DRV_OUT
-#define dnbd3_cmd_to_priv(req, cmd) (req)->cmd_flags = DNBD3_REQ_OP_SPECIAL | ((cmd) << REQ_FLAG_BITS)
-#define dnbd3_connect(req) (req)->cmd_flags = DNBD3_REQ_OP_CONNECT | ((CMD_SELECT_IMAGE) << REQ_FLAG_BITS)
-#define dnbd3_priv_to_cmd(req) ((req)->cmd_flags >> REQ_FLAG_BITS)
+#define dnbd3_cmd_to_op_special(req, cmd) \
+ (req)->cmd_flags = DNBD3_REQ_OP_SPECIAL | ((cmd) << REQ_FLAG_BITS)
+
+#define dnbd3_op_special_to_cmd(req) \
+ ((req)->cmd_flags >> REQ_FLAG_BITS)
+
+#define dnbd3_connect_to_req(req) \
+ (req)->cmd_flags = DNBD3_REQ_OP_CONNECT \
+ | ((CMD_SELECT_IMAGE) << REQ_FLAG_BITS)
+
#define dnbd3_test_block_to_req(req) \
do { \
(req)->cmd_flags = REQ_OP_READ; \
(req)->__data_len = RTT_BLOCK_SIZE; \
(req)->__sector = 0; \
} while (0)
-#define dnbd3_sock_create(af,type,proto,sock) sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, type, proto, sock)
-#define REQUEST_TIMEOUT (HZ * SOCKET_TIMEOUT_CLIENT_DATA)
+#define dnbd3_sock_create(af,type,proto,sock) \
+ sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, \
+ type, proto, sock)
+#define REQUEST_TIMEOUT \
+ (HZ * SOCKET_TIMEOUT_CLIENT_DATA)
-#define init_msghdr(h) do { \
- h.msg_name = NULL; \
- h.msg_namelen = 0; \
- h.msg_control = NULL; \
- h.msg_controllen = 0; \
- h.msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \
+
+#define dnbd3_init_msghdr(h) \
+ do { \
+ (h).msg_name = NULL; \
+ (h).msg_namelen = 0; \
+ (h).msg_control = NULL; \
+ (h).msg_controllen = 0; \
+ (h).msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \
} while (0)
-static DECLARE_WAIT_QUEUE_HEAD(send_wq);
+#define dnbd3_avg_rtt(server) \
+ (( (server)->rtts[0] + (server)->rtts[1] \
+ + (server)->rtts[2] + (server)->rtts[3] ) / 4 )
-static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *server);
-static int __dnbd3_socket_connect(struct dnbd3_server * server, struct dnbd3_sock *sock);
-static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server *server, struct dnbd3_sock *sock);
+#define dnbd3_set_rtt_unreachable(server) \
+ (server)->rtts[0] = (server)->rtts[1] = (server)->rtts[2] \
+ = (server)->rtts[3] = RTT_UNREACHABLE;
-static void dnbd3_print_server_list(struct dnbd3_device *dev)
-{
- int i;
- info_server(dev, &dev->initial_server, "initial server is");
- for (i = 0; i < NUMBER_SERVERS; i++) {
- if (dev->alt_servers[i].host.addr[0] != 0) {
- info_server(dev, &dev->alt_servers[i], "alternative server is");
- }
- }
-}
+static int dnbd3_server_connect(struct dnbd3_device *dev,
+ struct dnbd3_server *server);
+static int dnbd3_socket_connect(struct dnbd3_sock *sock,
+ struct dnbd3_server * server);
+static int dnbd3_socket_disconnect(struct dnbd3_device *dev,
+ struct dnbd3_server *server, struct dnbd3_sock *sock);
-static inline uint64_t dnbd3_to_wq_signal(int minor, uint16_t dnbd3_cmd, uint16_t sock_nr) {
- return ((uint64_t) minor << 32) | ((uint32_t) dnbd3_cmd << 16) | sock_nr;
-}
+/*
+ * Methods for request and receive commands
+ */
+/**
+ * dnbd3_to_handle - convert tag and cookie to handle
+ * @tag: the tag to convert
+ * @cookie: the cookie to convert
+ */
static inline uint64_t dnbd3_to_handle(uint32_t tag, uint32_t cookie) {
return ((uint64_t) tag << 32) | cookie;
}
+/**
+ * dnbd3_tag_from_handle - get tag from handle
+ * @handle: the handle
+ */
static inline uint32_t dnbd3_tag_from_handle(uint64_t handle) {
return (uint32_t)(handle >> 32);
}
+/**
+ * dnbd3_cookie_from_handle - get cookie from handle
+ * @handle: the handle
+ */
static inline uint32_t dnbd3_cookie_from_handle(uint64_t handle) {
return (uint32_t) handle;
}
-int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, struct dnbd3_cmd *cmd)
+/**
+ * dnbd3_send_request - send a request
+ * @sock: the socket where the request is send
+ * @req: the request to send
+ * @cmd: optional - the dnbd3_cmd from mq
+ *
+ * the tx_lock of the socket must be held
+ */
+int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req,
+ struct dnbd3_cmd *cmd)
{
- dnbd3_request_t dnbd3_request;
+ dnbd3_request_t request;
struct msghdr msg;
struct kvec iov[2];
size_t iov_num = 1;
- size_t send_len;
+ size_t lng;
int result;
uint32_t tag;
uint64_t handle;
serialized_buffer_t payload_buffer;
sock->pending = req;
- init_msghdr(msg);
+ dnbd3_init_msghdr(msg);
- dnbd3_request.magic = dnbd3_packet_magic;
+ request.magic = dnbd3_packet_magic;
switch (req_op(req)) {
case REQ_OP_READ:
debug_sock(sock, "request operation read");
- dnbd3_request.cmd = CMD_GET_BLOCK;
- dnbd3_request.offset = blk_rq_pos(req) << 9; // *512
- dnbd3_request.size = blk_rq_bytes(req); // bytes left to complete entire request
+ request.cmd = CMD_GET_BLOCK;
+ request.offset = blk_rq_pos(req) << 9; // * 512
+ request.size = blk_rq_bytes(req);
break;
case DNBD3_REQ_OP_SPECIAL:
debug_sock(sock, "request operation special");
- dnbd3_request.cmd = dnbd3_priv_to_cmd(req);
- dnbd3_request.size = 0;
+ request.cmd = dnbd3_op_special_to_cmd(req);
+ request.size = 0;
break;
case DNBD3_REQ_OP_CONNECT:
- debug_sock(sock, "request operation connect to %s", sock->device->imgname);
- dnbd3_request.cmd = CMD_SELECT_IMAGE;
+ debug_sock(sock, "request operation connect to %s",
+ sock->device->imgname);
+ request.cmd = CMD_SELECT_IMAGE;
serializer_reset_write(&payload_buffer);
serializer_put_uint16(&payload_buffer, PROTOCOL_VERSION);
serializer_put_string(&payload_buffer, sock->device->imgname);
serializer_put_uint16(&payload_buffer, sock->device->rid);
serializer_put_uint8(&payload_buffer, 0); // is_server = false
iov[1].iov_base = &payload_buffer;
- dnbd3_request.size = iov[1].iov_len = serializer_get_written_length(&payload_buffer);
+ request.size = serializer_get_written_length(&payload_buffer);
+ iov[1].iov_len = request.size;
iov_num = 2;
break;
default:
@@ -137,18 +170,18 @@ int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, struct dnbd
if (cmd != NULL) {
cmd->cookie = sock->cookie;
tag = blk_mq_unique_tag(req);
- handle = dnbd3_to_handle(tag, sock->cookie);// ((uint64_t) tag << 32) | sock->cookie;
+ handle = dnbd3_to_handle(tag, sock->cookie);
} else {
handle = sock->cookie;
}
- memcpy(&dnbd3_request.handle, &handle, sizeof(handle));
+ memcpy(&request.handle, &handle, sizeof(handle));
- fixup_request(dnbd3_request);
- iov[0].iov_base = &dnbd3_request;
- iov[0].iov_len = sizeof(dnbd3_request);
- send_len = iov_num == 1 ? sizeof(dnbd3_request) : iov[0].iov_len + iov[1].iov_len;
- result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, send_len);
- if (result != send_len) {
+ fixup_request(request);
+ iov[0].iov_base = &request;
+ iov[0].iov_len = sizeof(request);
+ lng = iov_num == 1 ? iov[0].iov_len : iov[0].iov_len + iov[1].iov_len;
+ result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, lng);
+ if (result != lng) {
error_sock(sock, "connection to server lost");
sock->server->failures++;
goto error;
@@ -160,6 +193,11 @@ error:
}
+/**
+ * dnbd3_send_request_cmd - send a dndb3 cmd
+ * @sock: the socket where the request is send
+ * @dnbd3_cmd: the dnbd3 cmd to send
+ */
static int dnbd3_send_request_cmd(struct dnbd3_sock *sock, uint16_t dnbd3_cmd)
{
int result;
@@ -173,21 +211,22 @@ static int dnbd3_send_request_cmd(struct dnbd3_sock *sock, uint16_t dnbd3_cmd)
switch (dnbd3_cmd) {
case CMD_KEEPALIVE:
case CMD_GET_SERVERS:
- dnbd3_cmd_to_priv(req, dnbd3_cmd);
+ dnbd3_cmd_to_op_special(req, dnbd3_cmd);
break;
case CMD_SELECT_IMAGE:
- dnbd3_connect(req);
+ dnbd3_connect_to_req(req);
break;
case CMD_GET_BLOCK:
dnbd3_test_block_to_req(req);
break;
default:
- warn_sock(sock, "unsupported command for blocking %d", dnbd3_cmd);
+ warn_sock(sock, "unsupported command %d", dnbd3_cmd);
result = -EINVAL;
goto error;
}
mutex_lock(&sock->tx_lock);
+ sock->pending = req;
result = dnbd3_send_request(sock, req, NULL);
if (result <= 0) {
mutex_unlock(&sock->tx_lock);
@@ -202,21 +241,28 @@ error:
return result;
}
+/**
+ * dnbd3_receive_cmd - receive a command
+ * @sock: the socket where the request is received
+ * @reply: an unused reply will be filled with the reply of the server
+ *
+ * this method should be called directly after the dnbd3_send_request_ method
+ */
static int dnbd3_receive_cmd(struct dnbd3_sock *sock, dnbd3_reply_t *reply)
{
int result;
struct msghdr msg;
struct kvec iov;
- init_msghdr(msg);
+ dnbd3_init_msghdr(msg);
iov.iov_base = reply;
iov.iov_len = sizeof(dnbd3_reply_t);
- result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags);
+ result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
+ msg.msg_flags);
if (result <= 0) {
return result;
}
fixup_reply(dnbd3_reply);
- // check error
if (reply->magic != dnbd3_packet_magic) {
error_sock(sock, "receive cmd wrong magic packet");
return -EIO;
@@ -229,7 +275,18 @@ static int dnbd3_receive_cmd(struct dnbd3_sock *sock, dnbd3_reply_t *reply)
return result;
}
-static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3_sock *sock, dnbd3_reply_t *reply)
+/**
+ * dnbd3_receive_cmd_get_block_mq - receive a block for mq
+ * @sock: the socket where the request is received
+ * @reply: the reply initialized by dnbd3_receive_cmd
+ *
+ * this method should be called directly after the dnbd3_receive_cmd method
+ *
+ * this method copies the data to user space according to the request which is
+ * encoded in the handle by the send request method and decoded here.
+ */
+static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock,
+ dnbd3_reply_t *reply)
{
struct dnbd3_cmd *cmd;
struct msghdr msg;
@@ -237,6 +294,7 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3
struct kvec iov;
struct req_iterator iter;
struct bio_vec bvec_inst;
+ struct dnbd3_device *dev = sock->device;
struct bio_vec *bvec = &bvec_inst;
sigset_t blocked, oldset;
void *kaddr;
@@ -244,7 +302,7 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3
uint16_t hwq;
int result = 0;
uint64_t handle;
- init_msghdr(msg);
+ dnbd3_init_msghdr(msg);
memcpy(&handle, &reply->handle, sizeof(handle));
cookie = dnbd3_cookie_from_handle(handle);
@@ -252,10 +310,12 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3
hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < dev->tag_set.nr_hw_queues) {
- req = blk_mq_tag_to_rq(dev->tag_set.tags[hwq], blk_mq_unique_tag_to_tag(tag));
+ req = blk_mq_tag_to_rq(dev->tag_set.tags[hwq],
+ blk_mq_unique_tag_to_tag(tag));
}
if (!req || !blk_mq_request_started(req)) {
- dev_err(disk_to_dev(dev->disk), "unexpected reply (%d) %p\n", tag, req);
+ dev_err(disk_to_dev(dev->disk), "unexpected reply (%d) %p\n",
+ tag, req);
return -EIO;
}
cmd = blk_mq_rq_to_pdu(req);
@@ -263,12 +323,11 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3
mutex_lock(&cmd->lock);
if (cmd->cookie != cookie) {
dev_err(disk_to_dev(dev->disk), "double reply on req %p, cookie %u, handle cookie %u\n",
- req, cmd->cookie, cookie);
+ req, cmd->cookie, cookie);
mutex_unlock(&cmd->lock);
return -EIO;
}
-
rq_for_each_segment(bvec_inst, req, iter) {
siginitsetinv(&blocked, sigmask(SIGKILL));
sigprocmask(SIG_SETMASK, &blocked, &oldset);
@@ -276,11 +335,12 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3
kaddr = kmap(bvec->bv_page) + bvec->bv_offset;
iov.iov_base = kaddr;
iov.iov_len = bvec->bv_len;
- result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, msg.msg_flags);
+ result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len,
+ msg.msg_flags);
if (result != bvec->bv_len) {
kunmap(bvec->bv_page);
sigprocmask(SIG_SETMASK, &oldset, NULL );
- error_sock(sock, "could not receive form net to block layer");
+ error_sock(sock, "could not receive from net to block layer");
mutex_unlock(&cmd->lock);
return result;
}
@@ -293,7 +353,18 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_device *dev, struct dnbd3
return result;
}
-static int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock, dnbd3_reply_t *reply)
+
+/**
+ * dnbd3_receive_cmd_get_block_test - receive a test block
+ * @sock: the socket where the request is received
+ * @reply: the reply initialized by dnbd3_receive_cmd
+ *
+ * this method should be called directly after the dnbd3_receive_cmd method
+ *
+ * the received data is just thrown away
+ */
+static int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock,
+ dnbd3_reply_t *reply)
{
struct msghdr msg;
struct kvec iov;
@@ -304,10 +375,11 @@ static int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock, dnbd3_reply
goto error;
}
- init_msghdr(msg);
+ dnbd3_init_msghdr(msg);
iov.iov_base = buf;
iov.iov_len = reply->size;
- result = kernel_recvmsg(sock->sock, &msg, &iov, 1, reply->size, msg.msg_flags);
+ result = kernel_recvmsg(sock->sock, &msg, &iov, 1, reply->size,
+ msg.msg_flags);
if (result != RTT_BLOCK_SIZE) {
error_sock(sock, "receive test block failed");
goto error;
@@ -320,14 +392,25 @@ error:
return result;
}
-static int dnbd3_receive_cmd_get_servers(struct dnbd3_device *dev, struct dnbd3_sock *sock, dnbd3_reply_t *reply)
+/**
+ * dnbd3_receive_cmd_get_servers - receive new servers
+ * @sock: the socket where the request is received
+ * @reply: the reply initialized by dnbd3_receive_cmd
+ *
+ * this method should be called directly after the dnbd3_receive_cmd method
+ *
+ * the new servers are copied to dnbd3_device.new_servers and
+ * dnbd3_device.new_server_num is set accordingly
+ */
+static int dnbd3_receive_cmd_get_servers(struct dnbd3_sock *sock,
+ dnbd3_reply_t *reply)
{
struct msghdr msg;
struct kvec iov;
- /* return true if did not receive servers, not an error*/
+ struct dnbd3_device *dev = sock->device;
int result = 1;
int count, remaining;
- init_msghdr(msg);
+ dnbd3_init_msghdr(msg);
debug_sock(sock, "get servers received");
mutex_lock(&dev->device_lock);
@@ -341,9 +424,11 @@ static int dnbd3_receive_cmd_get_servers(struct dnbd3_device *dev, struct dnbd3_
if (count != 0) {
iov.iov_base = dev->new_servers;
iov.iov_len = count * sizeof(dnbd3_server_entry_t);
- result = kernel_recvmsg(sock->sock, &msg, &iov, 1, (count * sizeof(dnbd3_server_entry_t)), msg.msg_flags);
+ result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
+ msg.msg_flags);
if (result <= 0) {
- error_sock(sock, "failed to receive get servers %d", result);
+ error_sock(sock, "failed to receive get servers %d",
+ result);
return result;
} else if (result != (count * sizeof(dnbd3_server_entry_t))) {
error_sock(sock, "failed to get servers");
@@ -352,14 +437,19 @@ static int dnbd3_receive_cmd_get_servers(struct dnbd3_device *dev, struct dnbd3_
}
dev->new_servers_num = count;
}
- // If there were more servers than accepted, remove the remaining data from the socket buffer
+ /*
+ * if there were more servers than accepted, remove the remaining data
+ * from the socket buffer
+ * abuse the reply struct as the receive buffer
+ */
remaining = reply->size - (count * sizeof(dnbd3_server_entry_t));
consume_payload:
while (remaining > 0) {
- count = MIN(sizeof(dnbd3_reply_t), remaining); // Abuse the reply struct as the receive buffer
+ count = MIN(sizeof(dnbd3_reply_t), remaining);
iov.iov_base = reply;
iov.iov_len = count;
- result = kernel_recvmsg(sock->sock, &msg, &iov, 1, count, msg.msg_flags);
+ result = kernel_recvmsg(sock->sock, &msg, &iov, 1, count,
+ msg.msg_flags);
if (result <= 0) {
error_sock(sock, "failed to receive payload from get servers");
mutex_unlock(&dev->device_lock);
@@ -370,13 +460,24 @@ consume_payload:
return result;
}
-static int dnbd3_receive_cmd_latest_rid(struct dnbd3_device *dev, struct dnbd3_sock *sock, dnbd3_reply_t *reply)
+/**
+ * dnbd3_receive_cmd_latest_rid - receive latest rid
+ * @sock: the socket where the request is received
+ * @reply: the reply initialized by dnbd3_receive_cmd
+ *
+ * this method should be called directly after the dnbd3_receive_cmd method
+ *
+ * dnbd3_device.update_available is set if a new RID is received
+ */
+static int dnbd3_receive_cmd_latest_rid(struct dnbd3_sock *sock,
+ dnbd3_reply_t *reply)
{
struct kvec iov;
uint16_t rid;
int result;
struct msghdr msg;
- init_msghdr(msg);
+ struct dnbd3_device *dev = sock->device;
+ dnbd3_init_msghdr(msg);
debug_sock(sock, "latest rid received");
if (reply->size != 2) {
@@ -385,18 +486,32 @@ static int dnbd3_receive_cmd_latest_rid(struct dnbd3_device *dev, struct dnbd3_s
}
iov.iov_base = &rid;
iov.iov_len = sizeof(rid);
- result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags);
+ result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
+ msg.msg_flags);
if (result <= 0) {
error_sock(sock, "failed to receive latest rid");
return result;
}
rid = net_order_16(rid);
- debug_sock(sock, "latest rid of %s is %d (currently using %d)", dev->imgname, (int)rid, (int)dev->rid);
+ debug_sock(sock, "latest rid of %s is %d (currently using %d)",
+ dev->imgname, (int)rid, (int)dev->rid);
dev->update_available = (rid > dev->rid ? true : false);
return result;
}
-static int dnbd3_receive_cmd_select_image(struct dnbd3_device *dev, struct dnbd3_sock *sock, dnbd3_reply_t *reply)
+
+/**
+ * dnbd3_receive_cmd_latest_rid - select the image
+ * @sock: the socket where the request is received
+ * @reply: the reply initialized by dnbd3_receive_cmd
+ *
+ * this method should be called directly after the dnbd3_receive_cmd method
+ *
+ * if this is the first connection the image name, file size and rid will be set
+ * if this is a further connection image name, file size and rid will be checked
+ */
+static int dnbd3_receive_cmd_select_image(struct dnbd3_sock *sock,
+ dnbd3_reply_t *reply)
{
struct kvec iov;
uint16_t rid;
@@ -405,12 +520,13 @@ static int dnbd3_receive_cmd_select_image(struct dnbd3_device *dev, struct dnbd3
struct msghdr msg;
serialized_buffer_t payload_buffer;
uint64_t reported_size;
- init_msghdr(msg);
+ struct dnbd3_device *dev = sock->device;
+ dnbd3_init_msghdr(msg);
debug_sock(sock, "select image received");
- // receive reply payload
iov.iov_base = &payload_buffer;
iov.iov_len = reply->size;
- result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags);
+ result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
+ msg.msg_flags);
if (result <= 0) {
error_sock(sock, "failed to receive select image %d", result);
return result;
@@ -420,7 +536,7 @@ static int dnbd3_receive_cmd_select_image(struct dnbd3_device *dev, struct dnbd3
return -EIO;
}
- // handle/check reply payload
+ /* handle/check reply payload */
serializer_reset_read(&payload_buffer, reply->size);
sock->server->protocol_version = serializer_get_uint16(&payload_buffer);
if (sock->server->protocol_version < MIN_SUPPORTED_SERVER) {
@@ -433,7 +549,8 @@ static int dnbd3_receive_cmd_select_image(struct dnbd3_device *dev, struct dnbd3
name = serializer_get_string(&payload_buffer);
rid = serializer_get_uint16(&payload_buffer);
if (dev->rid != rid && strcmp(name, dev->imgname) != 0) {
- error_sock(sock, "server offers image '%s', requested '%s'", name, dev->imgname);
+ error_sock(sock, "server offers image '%s', requested '%s'",
+ name, dev->imgname);
return -EIO;
}
@@ -444,24 +561,71 @@ static int dnbd3_receive_cmd_select_image(struct dnbd3_device *dev, struct dnbd3
return -EIO;
}
dev->reported_size = reported_size;
- set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte blocks */
+ set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte */
} else if (dev->reported_size != reported_size) {
- error_sock(sock, "reported size by server is %llu but should be %llu", reported_size, dev->reported_size);
+ error_sock(sock, "reported size by server is %llu but should be %llu",
+ reported_size, dev->reported_size);
return -EIO;
}
return result;
}
+
+
+/*
+ * Timer and workers
+ */
+
+/**
+ * dnbd3_timer - the timer to start different workers
+ * @arg: the timer_list used to get the dnbd3_device
+ *
+ * workers to start:
+ * - panic_worker
+ * - keepalive_worker for each connected socket
+ * - discovery_worker
+ */
+static void dnbd3_timer(struct timer_list *arg)
+{
+ struct dnbd3_device *dev;
+ int i;
+ dev = container_of(arg, struct dnbd3_device, timer);
+ queue_work(dnbd3_wq, &dev->panic_worker);
+
+ if (dev->timer_count % TIMER_INTERVAL_KEEPALIVE_PACKET == 0) {
+ for (i = 0; i < NUMBER_CONNECTIONS; i++) {
+ if (dnbd3_is_sock_alive(dev->socks[i])) {
+ queue_work(dnbd3_wq, &dev->socks[i].keepalive_worker);
+ }
+ }
+ }
+ /* start after 4 seconds */
+ if (dev->timer_count % TIMER_INTERVAL_PROBE_NORMAL == 4) {
+ queue_work(dnbd3_wq, &dev->discovery_worker);
+ }
+
+
+ dev->timer_count++;
+ dev->timer.expires = jiffies + HZ;
+ add_timer(&dev->timer);
+}
+
+/**
+ * dnbd3_receive_worker - receives data from a socket
+ * @work: the work used to get the dndb3_sock
+ *
+ * receives data until the socket is closed (returns 0)
+ */
static void dnbd3_receive_worker(struct work_struct *work)
{
- struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, receive_worker);
- struct dnbd3_device *dev = sock->device;
- dnbd3_reply_t dnbd3_reply;
+ struct dnbd3_sock *sock;
+ dnbd3_reply_t reply;
int result;
+ sock = container_of(work, struct dnbd3_sock, receive_worker);
debug_sock(sock, "receive worker is starting");
while(1) { // loop until socket returns 0
- result = dnbd3_receive_cmd(sock, &dnbd3_reply);
+ result = dnbd3_receive_cmd(sock, &reply);
if (result == -EAGAIN) {
continue;
} else if (result <= 0) {
@@ -470,39 +634,43 @@ static void dnbd3_receive_worker(struct work_struct *work)
}
- switch (dnbd3_reply.cmd) {
+ switch (reply.cmd) {
case CMD_GET_BLOCK:
- result = dnbd3_receive_cmd_get_block_mq(dev, sock, &dnbd3_reply);
+ result = dnbd3_receive_cmd_get_block_mq(sock, &reply);
if (result <= 0) {
- error_sock(sock, "receive cmd get block mq failed %d", result);
+ error_sock(sock, "receive cmd get block mq failed %d",
+ result);
goto error;
}
- continue; // we do not need to wake up anyone, wait for next cmd (block)
+ continue;
case CMD_GET_SERVERS:
- result = dnbd3_receive_cmd_get_servers(dev, sock, &dnbd3_reply);
+ result = dnbd3_receive_cmd_get_servers(sock, &reply);
if (result <= 0) {
- error_sock(sock, "receive cmd get servers failed %d", result);
+ error_sock(sock, "receive cmd get servers failed %d",
+ result);
goto error;
}
break;
case CMD_LATEST_RID:
- result = dnbd3_receive_cmd_latest_rid(dev, sock, &dnbd3_reply);
+ result = dnbd3_receive_cmd_latest_rid(sock, &reply);
if (result <= 0) {
- error_sock(sock, "receive cmd latest rid failed %d", result);
+ error_sock(sock, "receive cmd latest rid failed %d",
+ result);
goto error;
}
break;
case CMD_KEEPALIVE:
- if (dnbd3_reply.size != 0) {
+ if (reply.size != 0) {
error_sock(sock, "got keep alive packet with payload");
goto error;
}
debug_sock(sock, "keep alive received");
break;
case CMD_SELECT_IMAGE:
- result = dnbd3_receive_cmd_select_image(dev, sock, &dnbd3_reply);
+ result = dnbd3_receive_cmd_select_image(sock, &reply);
if (result <= 0) {
- error_sock(sock, "receive cmd select image failed %d", result);
+ error_sock(sock, "receive cmd select image failed %d",
+ result);
goto error;
}
break;
@@ -514,10 +682,12 @@ error:
if (result == 0) {
info_sock(sock, "result is 0, socket seems to be down");
sock->panic = true;
- break; //the socket seems to be down
+ break;
} else if (result < 0) {
- sock->server->failures++; // discovery takes care of to many failures
- warn_sock(sock, "receive error happened %d, total failures %d", result, sock->server->failures);
+ /* discovery takes care of to many failures */
+ sock->server->failures++;
+ warn_sock(sock, "receive error happened %d, total failures %d",
+ result, sock->server->failures);
}
debug_sock(sock, "receive completed, waiting for next receive");
}
@@ -526,49 +696,37 @@ error:
}
-static void dnbd3_timer(struct timer_list *arg)
-{
- struct dnbd3_device *dev = container_of(arg, struct dnbd3_device, timer);
- int i;
-
- queue_work(dnbd3_wq, &dev->panic_worker);
-
- if (dev->timer_count % TIMER_INTERVAL_KEEPALIVE_PACKET == 0) {
- for (i = 0; i < NUMBER_CONNECTIONS; i++) {
- if (dnbd3_is_sock_alive(dev->socks[i])) {
- queue_work(dnbd3_wq, &dev->socks[i].keepalive_worker);
- }
- }
- }
- if (dev->timer_count % TIMER_INTERVAL_PROBE_NORMAL == 4) { // wait for 4 seconds
- queue_work(dnbd3_wq, &dev->discovery_worker);
- }
-
-
- dev->timer_count++;
- dev->timer.expires = jiffies + HZ;
- add_timer(&dev->timer);
-}
-
-
+/**
+ * dnbd3_receive_worker - sends a keepalive
+ * @work: the work used to get the dndb3_sock
+ */
static void dnbd3_keepalive_worker(struct work_struct *work)
{
- struct dnbd3_sock *sock = container_of(work, struct dnbd3_sock, keepalive_worker);
+ struct dnbd3_sock *sock;
+ sock = container_of(work, struct dnbd3_sock, keepalive_worker);
debug_sock(sock, "starting keepalive worker");
dnbd3_send_request_cmd(sock, CMD_KEEPALIVE);
}
-static struct dnbd3_server *dnbd3_find_best_alt_server(struct dnbd3_device *dev) {
+/**
+ * dnbd3_find_best_alt_server - find best alternative server
+ * @dev: the device where to search for alternative servers
+ *
+ * searches for an alternative server which has an rtt better than RTT_THRESOLD
+ * of the best connected server
+ */
+static struct dnbd3_server *dnbd3_find_best_alt_server(struct dnbd3_device *dev)
+{
int i, j;
uint64_t rtt = 0;
uint64_t best_rtt = RTT_UNREACHABLE;
uint64_t current_best_rtt = RTT_UNREACHABLE;
- struct dnbd3_server *best_alt_server = NULL;
- struct dnbd3_server *better_alt_server = NULL;
+ struct dnbd3_server *best_server = NULL;
+ struct dnbd3_server *server = NULL;
for (i = 0; i < NUMBER_CONNECTIONS; i++) {
if (dnbd3_is_sock_alive(dev->socks[i])) {
- rtt = (dev->socks[i].server->rtts[0] + dev->socks[i].server->rtts[1] + dev->socks[i].server->rtts[2] + dev->socks[i].server->rtts[3]) / 4;
+ rtt = dnbd3_avg_rtt(dev->socks[i].server);
if (rtt <= current_best_rtt) {
current_best_rtt = rtt;
}
@@ -576,37 +734,48 @@ static struct dnbd3_server *dnbd3_find_best_alt_server(struct dnbd3_device *dev)
}
best_rtt = RTT_THRESOULD_LIMIT(current_best_rtt);
- debug_dev(dev, "best connected rtt is %llu, searching for rtt better than %llu", current_best_rtt, best_rtt);
+ debug_dev(dev, "best connected rtt is %llu, searching for rtt better than %llu",
+ current_best_rtt, best_rtt);
for (i = 0; i < NUMBER_SERVERS; i++) {
if (dev->alt_servers[i].host.type != 0) {
- rtt = (dev->alt_servers[i].rtts[0] + dev->alt_servers[i].rtts[1] + dev->alt_servers[i].rtts[2] + dev->alt_servers[i].rtts[3]) / 4;
+ rtt = dnbd3_avg_rtt(&dev->alt_servers[i]);
if (rtt <= best_rtt) {
- better_alt_server = &dev->alt_servers[i];
- for (j = 0; j < NUMBER_CONNECTIONS; j++) { // check if already connected
- if (better_alt_server == dev->socks[j].server) {
- better_alt_server = NULL; // found already connected server
+ server = &dev->alt_servers[i];
+ /* check if already connected */
+ for (j = 0; j < NUMBER_CONNECTIONS; j++) {
+ if (server == dev->socks[j].server) {
+ server = NULL;
break;
}
}
- if (better_alt_server) {
- best_alt_server = better_alt_server;
+ if (server) {
+ best_server = server;
best_rtt = rtt;
}
}
}
}
- if (best_alt_server) {
- info_server(dev, best_alt_server, "found best alt server with rtt %llu", best_rtt);
+ if (best_server) {
+ debug_server(dev, best_server, "found best alt server with rtt %llu",
+ best_rtt);
} else {
debug_dev(dev, "did not find any alternative server");
}
- return best_alt_server;
+ return best_server;
}
-static bool dnbd3_better_rtt(struct dnbd3_server *new_server, struct dnbd3_server *existing_server) {
- uint64_t new_rtt = (new_server->rtts[0] + new_server->rtts[1] + new_server->rtts[2] + new_server->rtts[3]) / 4;
- uint64_t existing_rtt = (existing_server->rtts[0] + existing_server->rtts[1] + existing_server->rtts[2] + existing_server->rtts[3]) / 4;
+/**
+ * dnbd3_better_rtt - checks if the rtt is better
+ * @new_server: the server to check
+ * @existing_server: current server
+ *
+ * checks if the rtt is better than RTT_THRESHOLD_FACTOR
+ */
+static bool dnbd3_better_rtt(struct dnbd3_server *new_server,
+ struct dnbd3_server *existing_server) {
+ uint64_t new_rtt = dnbd3_avg_rtt(new_server);
+ uint64_t existing_rtt = dnbd3_avg_rtt(existing_server);
if (new_rtt < RTT_THRESHOLD_FACTOR(existing_rtt)) {
return true;
@@ -614,6 +783,14 @@ static bool dnbd3_better_rtt(struct dnbd3_server *new_server, struct dnbd3_serve
return false;
}
+/**
+ * dnbd3_adjust_connections - adjust the connections of the device
+ * @dev: the device
+ *
+ * 1. connect empty sockets if best alternative server is found
+ * 2. replace slow socket with better server if available
+ * 3. remove socket if one is slow
+ */
static void dnbd3_adjust_connections(struct dnbd3_device *dev) {
int i;
int sock_alive = 0;
@@ -621,13 +798,13 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) {
uint64_t best_rtt = RTT_UNREACHABLE;
struct dnbd3_server *server, *existing_server;
- // connect empty sockets
+ /* connect empty sockets */
sock_alive = 0;
for (i = 0; i < NUMBER_CONNECTIONS; i++) {
if (!dnbd3_is_sock_alive(dev->socks[i])) {
server = dnbd3_find_best_alt_server(dev);
if (server) {
- if (dnbd3_socket_connect(dev, server) == 0) {
+ if (dnbd3_server_connect(dev, server) == 0) {
sock_alive++;
} else {
warn_server(dev, server, "failed to connect");
@@ -638,7 +815,7 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) {
}
}
- // replace socket with better server
+ /* replace socket with better server */
if (sock_alive == NUMBER_CONNECTIONS) {
for (i = 0; i < NUMBER_CONNECTIONS; i++) {
if (dnbd3_is_sock_alive(dev->socks[i])) {
@@ -647,20 +824,21 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) {
if (server && dnbd3_better_rtt(server, dev->socks[i].server)) {
dnbd3_socket_disconnect(dev, NULL, &dev->socks[i]);
- if (dnbd3_socket_connect(dev, server) != 0) {
+ /* if it fails reconnect to existing */
+ if (dnbd3_server_connect(dev, server) != 0) {
warn_server(dev, server, "failed to connect");
- dnbd3_socket_connect(dev, existing_server);
+ dnbd3_server_connect(dev, existing_server);
}
}
}
}
}
- // remove a socket if it is much slower than the others
+ /* remove a socket if it is much slower than the others */
if (sock_alive > 1) {
for (i = 0; i < NUMBER_CONNECTIONS; i++) {
if (dnbd3_is_sock_alive(dev->socks[i])) {
- rtt = (dev->socks[i].server->rtts[0] + dev->socks[i].server->rtts[1] + dev->socks[i].server->rtts[2] + dev->socks[i].server->rtts[3]) / 4;
+ rtt = dnbd3_avg_rtt(dev->socks[i].server);
if (rtt <= best_rtt) {
best_rtt = rtt;
}
@@ -668,7 +846,7 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) {
}
for (i = 0; i < NUMBER_CONNECTIONS; i++) {
if (dnbd3_is_sock_alive(dev->socks[i])) {
- rtt = (dev->socks[i].server->rtts[0] + dev->socks[i].server->rtts[1] + dev->socks[i].server->rtts[2] + dev->socks[i].server->rtts[3]) / 4;
+ rtt = dnbd3_avg_rtt(dev->socks[i].server);
if (rtt > RTT_THRESOULD_LIMIT(best_rtt)) {
info_sock(&dev->socks[i], "removing connection with rtt %llu", rtt);
dnbd3_socket_disconnect(dev, NULL, &dev->socks[i]);
@@ -678,16 +856,21 @@ static void dnbd3_adjust_connections(struct dnbd3_device *dev) {
}
}
- debug_dev(dev, "connected to %d/%d sockets", sock_alive, NUMBER_CONNECTIONS);
+ info_dev(dev, "connected to %d/%d sockets", sock_alive, NUMBER_CONNECTIONS);
}
+/**
+ * dnbd3_panic_connect - connect to the first available server
+ * @dev: the device
+ */
static int dnbd3_panic_connect(struct dnbd3_device *dev) {
int result, i;
- result = dnbd3_socket_connect(dev, &dev->initial_server);
+ result = dnbd3_server_connect(dev, &dev->initial_server);
if (result) {
for (i = 0; i < NUMBER_SERVERS; i++) {
if (dev->alt_servers[i].host.type != 0) {
- result = dnbd3_socket_connect(dev, &dev->alt_servers[i]);
+ result = dnbd3_server_connect(dev,
+ &dev->alt_servers[i]);
if (!result) {
info_server(dev, &dev->alt_servers[i], "found server to connect to");
break;
@@ -698,13 +881,22 @@ static int dnbd3_panic_connect(struct dnbd3_device *dev) {
return result;
}
+/**
+ * dnbd3_panic_worker - handle panicked sockets
+ * @work: the work used to get the dndb3_device
+ *
+ * 1. disconnect panicked socket
+ * 2. reconnect to good alternative
+ * 3. if no socket is connected do a panic_connect
+ */
static void dnbd3_panic_worker(struct work_struct *work)
{
- struct dnbd3_device *dev = container_of(work, struct dnbd3_device, panic_worker);
+ struct dnbd3_device *dev;
struct dnbd3_sock *panicked_sock = NULL;
struct dnbd3_server *new_server, *panicked_server;
int i;
int sock_alive = 0;
+ dev = container_of(work, struct dnbd3_device, panic_worker);
for (i = 0; i < NUMBER_CONNECTIONS; i++) {
if (dev->socks[i].panic) {
panicked_sock = &dev->socks[i];
@@ -712,8 +904,10 @@ static void dnbd3_panic_worker(struct work_struct *work)
sock_alive++;
}
}
+
if (panicked_sock) {
- warn_sock(panicked_sock, "panicked, connections still alive %d", sock_alive);
+ warn_sock(panicked_sock, "panicked, connections still alive %d",
+ sock_alive);
panicked_server = panicked_sock->server;
new_server = dnbd3_find_best_alt_server(dev);
@@ -721,19 +915,12 @@ static void dnbd3_panic_worker(struct work_struct *work)
if (new_server != NULL && new_server != panicked_server) {
info_server(dev, new_server, "found replacement");
- if (!dnbd3_socket_connect(dev, new_server)) {
+ if (!dnbd3_server_connect(dev, new_server)) {
sock_alive++;
}
} else if (sock_alive > 0) {
- info_dev(dev, "found no replacement server but still connected to %d servers", sock_alive);
- }
- } else if (sock_alive == 0) {
- new_server = dnbd3_find_best_alt_server(dev);
- if (new_server != NULL) {
- info_server(dev, new_server, "reconnect to server");
- if (!dnbd3_socket_connect(dev, new_server)) {
- sock_alive++;
- }
+ info_dev(dev, "found no replacement server but still connected to %d servers",
+ sock_alive);
}
}
@@ -748,7 +935,13 @@ static void dnbd3_panic_worker(struct work_struct *work)
}
}
-static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *server)
+/**
+ * dnbd3_meassure_rtt - meassure the rtt of a server
+ * @dev: the device this server belongs to
+ * @server: the server to meassure
+ */
+static int dnbd3_meassure_rtt(struct dnbd3_device *dev,
+ struct dnbd3_server *server)
{
struct timeval start, end;
dnbd3_reply_t reply;
@@ -762,12 +955,12 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *ser
.server = server
};
- result = __dnbd3_socket_connect(server, &sock);
+ result = dnbd3_socket_connect(&sock, server);
if (result) {
error_sock(&sock, "socket connect failed in rtt measurement");
goto error;
}
- dnbd3_connect(&req);
+ dnbd3_connect_to_req(&req);
result = dnbd3_send_request_cmd(&sock, CMD_SELECT_IMAGE);
if (result <= 0) {
error_sock(&sock, "request select image failed in rtt measurement");
@@ -780,12 +973,14 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *ser
goto error;
}
- if (reply.magic != dnbd3_packet_magic || reply.cmd != CMD_SELECT_IMAGE || reply.size < 4) {
+ if (reply.magic != dnbd3_packet_magic || reply.cmd != CMD_SELECT_IMAGE
+ || reply.size < 4) {
error_sock(&sock, "receive select image wrong header in rtt measurement");
result = -EIO;
goto error;
}
- result = dnbd3_receive_cmd_select_image(dev, &sock, &reply);
+
+ result = dnbd3_receive_cmd_select_image(&sock, &reply);
if (result <= 0) {
error_sock(&sock, "receive data select image failed in rtt measurement");
goto error;
@@ -799,7 +994,8 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *ser
goto error;
}
result = dnbd3_receive_cmd(&sock, &reply);
- if (reply.magic != dnbd3_packet_magic|| reply.cmd != CMD_GET_BLOCK || reply.size != RTT_BLOCK_SIZE) {
+ if (reply.magic != dnbd3_packet_magic|| reply.cmd != CMD_GET_BLOCK
+ || reply.size != RTT_BLOCK_SIZE) {
error_sock(&sock, "receive header cmd test block failed in rtt measurement");
result = -EIO;
goto error;
@@ -811,9 +1007,10 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, struct dnbd3_server *ser
}
do_gettimeofday(&end); // end rtt measurement
- rtt = (uint64_t)((end.tv_sec - start.tv_sec) * 1000000ull + (end.tv_usec - start.tv_usec));
+ rtt = (uint64_t)((end.tv_sec - start.tv_sec) * 1000000ull
+ + (end.tv_usec - start.tv_usec));
- info_sock(&sock, "new rrt is %llu", rtt);
+ debug_sock(&sock, "new rrt is %llu", rtt);
error:
sock.server->rtts[dev->discovery_count % 4] = rtt;
@@ -830,63 +1027,82 @@ error:
}
-static void dnbd3_discovery_worker(struct work_struct *work)
+static void dnbd3_merge_new_server(struct dnbd3_device *dev,
+ dnbd3_server_entry_t *new_server)
{
- struct dnbd3_device *dev = container_of(work, struct dnbd3_device, discovery_worker);
- int i, j;
+ int i;
struct dnbd3_server *existing_server, *free_server, *failed_server;
- dnbd3_server_entry_t *new_server;
+ existing_server = NULL;
+ free_server = NULL;
+ failed_server = NULL;
+
+ /* find servers in alternative servers */
+ for (i = 0; i < NUMBER_SERVERS; i++) {
+ if ((new_server->host.type == dev->alt_servers[i].host.type)
+ && (new_server->host.port == dev->alt_servers[i].host.port)
+ && (0 == memcmp(new_server->host.addr,
+ dev->alt_servers[i].host.addr,
+ (new_server->host.type == HOST_IP4 ? 4 : 16)
+ ))) {
+
+ existing_server = &dev->alt_servers[i];
+ } else if (dev->alt_servers[i].host.type == 0) {
+ free_server = &dev->alt_servers[i];
+ } else if (dev->alt_servers[i].failures > 20) {
+ failed_server = &dev->alt_servers[i];
+ }
+ }
+
+ if (existing_server) {
+ if (new_server->failures == 1) { /* remove is requested */
+ info_server(dev, new_server,
+ "remove server is requested");
+ dnbd3_socket_disconnect(dev, existing_server, NULL);
+ existing_server->host.type = 0;
+ }
+// existing_server->failures = 0; // reset failure count
+ return;
+ } else if (free_server) {
+ free_server->host = new_server->host;
+ } else if (failed_server) {
+ failed_server->host = new_server->host;
+ free_server = failed_server;
+ } else {
+ /* no server found to replace */
+ return;
+ }
+ info_server(dev, free_server, "got new alternative server");
+ free_server->failures = 0;
+ free_server->protocol_version = 0;
+ dnbd3_set_rtt_unreachable(free_server);
+}
+/**
+ * dnbd3_discovery_worker - handle discovery
+ * @work: the work used to get the dndb3_device
+ *
+ * 1. check if new servers are available and set them to alternative servers
+ * 2. meassure the rtt for all available servers
+ * 3. adjust the connections
+ */
+static void dnbd3_discovery_worker(struct work_struct *work)
+{
+ struct dnbd3_device *dev;
+ int i;
+ struct dnbd3_server *server;
+ dnbd3_server_entry_t *new_server;
+ dev = container_of(work, struct dnbd3_device, discovery_worker);
- debug_dev(dev, "starting discovery worker new server num is %d", dev->new_servers_num);
+ debug_dev(dev, "starting discovery worker new server num is %d",
+ dev->new_servers_num);
if (dev->new_servers_num) {
mutex_lock(&dev->device_lock);
-
for (i = 0; i < dev->new_servers_num; i++) {
new_server = &dev->new_servers[i];
- if (new_server->host.type == HOST_IP4 || new_server->host.type == HOST_IP6) {
- existing_server = NULL;
- free_server = NULL;
- failed_server = NULL;
-
- // find servers in alt servers
- for (j = 0; j < NUMBER_SERVERS; j++) {
- if ((new_server->host.type == dev->alt_servers[j].host.type)
- && (new_server->host.port == dev->alt_servers[j].host.port)
- && (0 == memcmp(new_server->host.addr, dev->alt_servers[j].host.addr,
- (new_server->host.type == HOST_IP4 ? 4 : 16)))) {
-
- existing_server = &dev->alt_servers[j];
- } else if (dev->alt_servers[j].host.type == 0) {
- free_server = &dev->alt_servers[j];
- } else if (dev->alt_servers[j].failures > 20) {
- failed_server = &dev->alt_servers[j];
- }
- }
-
- if (existing_server) {
- if (new_server->failures == 1) { // remove is requested
- info_server(dev, new_server, "remove server is requested");
- dnbd3_socket_disconnect(dev, existing_server, NULL); // TODO what to do when only one connection?
- existing_server->host.type = 0;
- }
-// existing_server->failures = 0; // reset failure count
- continue;
- } else if (free_server) {
- free_server->host = new_server->host;
- } else if (failed_server) {
- failed_server->host = new_server->host;
- free_server = failed_server;
- } else {
- //no server found to replace
- continue;
- }
- info_server(dev, free_server, "got new alt server");
- free_server->failures = 0;
- free_server->protocol_version = 0;
- free_server->rtts[0] = free_server->rtts[1] = free_server->rtts[2] = free_server->rtts[3] = RTT_UNREACHABLE;
+ if (new_server->host.type != 0) {
+ dnbd3_merge_new_server(dev, new_server);
}
}
dev->new_servers_num = 0;
@@ -894,11 +1110,12 @@ static void dnbd3_discovery_worker(struct work_struct *work)
}
// measure rtt for all alt servers
for (i = 0; i < NUMBER_SERVERS; i++) {
- existing_server = &dev->alt_servers[i];
- if (existing_server->host.type) {
- if (dnbd3_meassure_rtt(dev, existing_server) <= 0) {
- existing_server->failures++;
- warn_server(dev, existing_server, "failed to meassure rtt");
+ server = &dev->alt_servers[i];
+ if (server->host.type) {
+ if (dnbd3_meassure_rtt(dev, server) <= 0) {
+ server->failures++;
+ warn_server(dev, server,
+ "failed to meassure rtt");
}
}
}
@@ -909,7 +1126,18 @@ static void dnbd3_discovery_worker(struct work_struct *work)
}
-static int __dnbd3_socket_connect(struct dnbd3_server *server, struct dnbd3_sock *sock)
+/*
+ * Connect and disconnect
+ */
+
+
+/**
+ * dnbd3_socket_connect - connect a socket to a server
+ * @sock: the socket to connect
+ * @server: the server
+ */
+static int dnbd3_socket_connect(struct dnbd3_sock *sock,
+ struct dnbd3_server *server)
{
int result = 0;
struct timeval timeout;
@@ -925,14 +1153,17 @@ static int __dnbd3_socket_connect(struct dnbd3_server *server, struct dnbd3_sock
timeout.tv_sec = SOCKET_TIMEOUT_CLIENT_DATA;
timeout.tv_usec = 0;
-
- if ((result = dnbd3_sock_create(server->host.type, SOCK_STREAM, IPPROTO_TCP, &sock->sock)) < 0) {
+ result = dnbd3_sock_create(server->host.type, SOCK_STREAM, IPPROTO_TCP,
+ &sock->sock);
+ if (result < 0) {
error_sock(sock, "could not create socket");
goto error;
}
- kernel_setsockopt(sock->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout));
- kernel_setsockopt(sock->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout));
+ kernel_setsockopt(sock->sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout,
+ sizeof(timeout));
+ kernel_setsockopt(sock->sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+ sizeof(timeout));
sock->sock->sk->sk_allocation = GFP_NOIO;
if (server->host.type == HOST_IP4) {
struct sockaddr_in sin;
@@ -940,7 +1171,9 @@ static int __dnbd3_socket_connect(struct dnbd3_server *server, struct dnbd3_sock
sin.sin_family = AF_INET;
memcpy(&(sin.sin_addr), server->host.addr, 4);
sin.sin_port = server->host.port;
- if ((result = kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0)) != 0) {
+ result = kernel_connect(sock->sock, (struct sockaddr *)&sin,
+ sizeof(sin), 0);
+ if (result != 0) {
error_sock(sock, "connection to host failed");
goto error;
}
@@ -950,7 +1183,9 @@ static int __dnbd3_socket_connect(struct dnbd3_server *server, struct dnbd3_sock
sin.sin6_family = AF_INET6;
memcpy(&(sin.sin6_addr), server->host.addr, 16);
sin.sin6_port = server->host.port;
- if ((result = kernel_connect(sock->sock, (struct sockaddr *)&sin, sizeof(sin), 0)) != 0){
+ result = kernel_connect(sock->sock, (struct sockaddr *)&sin,
+ sizeof(sin), 0);
+ if (result != 0){
error_sock(sock, "connection to host failed");
goto error;
}
@@ -966,9 +1201,19 @@ error:
}
/**
- * connect a dnbd3 device to a server
+ * dnbd3_server_connect - connect a server to a device
+ * @dev: the device
+ * @server: the server to connect
+ *
+ * 1. connects the server to a free socket if available
+ * 2. select the image
+ * 3. start receiver_worker and keepalive_worker
+ * 4. if it is the first connection start timer, panic_worker and
+ * keepalive_worker
+ * 5. update the mq queues to the number of sockets alive
*/
-static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *server)
+static int dnbd3_server_connect(struct dnbd3_device *dev,
+ struct dnbd3_server *server)
{
int i;
int sock_alive = 0;
@@ -993,7 +1238,12 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s
mutex_init(&sock->tx_lock);
mutex_lock(&sock->tx_lock);
- result = __dnbd3_socket_connect(server, sock);
+ result = dnbd3_socket_connect(sock, server);
+ if (result) {
+ error_sock(sock, "connection to socket failed");
+ result = -EIO;
+ goto error;
+ }
mutex_unlock(&sock->tx_lock);
sock->panic = false;
@@ -1014,22 +1264,27 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s
}
result = dnbd3_receive_cmd(sock, &reply);
if (result <= 0) {
- error_sock(sock, "receive cmd to image %s failed", dev->imgname);
+ error_sock(sock, "receive cmd to image %s failed",
+ dev->imgname);
result = -EIO;
goto error;
}
- if (reply.magic != dnbd3_packet_magic || reply.cmd != CMD_SELECT_IMAGE || reply.size < 4) {
- error_sock(sock, "receive select image wrong header %s", dev->imgname);
+ if (reply.magic != dnbd3_packet_magic || reply.cmd != CMD_SELECT_IMAGE
+ || reply.size < 4) {
+ error_sock(sock, "receive select image wrong header %s",
+ dev->imgname);
result = -EIO;
goto error;
}
- result = dnbd3_receive_cmd_select_image(dev, sock, &reply);
+ result = dnbd3_receive_cmd_select_image(sock, &reply);
if (result <= 0) {
- error_sock(sock, "receive cmd select image %s failed", dev->imgname);
+ error_sock(sock, "receive cmd select image %s failed",
+ dev->imgname);
result = -EIO;
goto error;
}
- debug_sock(sock, "connected to image %s, filesize %llu", dev->imgname, dev->reported_size);
+ debug_sock(sock, "connected to image %s, filesize %llu", dev->imgname,
+ dev->reported_size);
// start the receiver
INIT_WORK(&sock->receive_worker, dnbd3_receive_worker);
@@ -1042,7 +1297,8 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s
sock_alive++;
}
}
- if (sock_alive == 1) { // first socket to connect, start timer and workers
+ /* if first socket to connect, start timer and workers */
+ if (sock_alive == 1) {
debug_sock(sock, "first connection to server, starting workers");
INIT_WORK(&dev->discovery_worker, dnbd3_discovery_worker);
INIT_WORK(&dev->panic_worker, dnbd3_panic_worker);
@@ -1052,7 +1308,7 @@ static int dnbd3_socket_connect(struct dnbd3_device *dev, struct dnbd3_server *s
}
blk_mq_update_nr_hw_queues(&dev->tag_set, sock_alive);
- // request alternative servers receiver will handle this
+ /* request alternative servers receiver will handle this */
if (dnbd3_send_request_cmd(sock, CMD_GET_SERVERS) <= 0) {
error_sock(sock, "failed to get servers in discovery");
}
@@ -1071,7 +1327,18 @@ error:
}
-static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server *server, struct dnbd3_sock *sock)
+/**
+ * dnbd3_socket_disconnect - disconnect a socket or server
+ * @dev: the device
+ * @server: optional the server to disconnect
+ * @sock: optional the socket to disconnect
+ *
+ * 1. update nr of mq queues
+ * 2. if last socket remove timer
+ * 3. disconnect socket
+ */
+static int dnbd3_socket_disconnect(struct dnbd3_device *dev,
+ struct dnbd3_server *server, struct dnbd3_sock *sock)
{
int i;
int sock_alive = 0;
@@ -1091,10 +1358,10 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server
if (sock_alive <= 1) {
info_sock(sock, "shutting down last socket and stopping timer");
del_timer_sync(&dev->timer);
-// dev->timer_count = 0;
-// dev->discovery_count = 0;
-// cancel_work_sync(&dev->discovery_worker); // do not wait
-// cancel_work_sync(&dev->panic_worker); // do not wait for panic_worker, probably we are called from panic_worker
+ /*
+ * do not wait for discovery and panic worker as they may have
+ * called this method
+ */
}
cancel_work_sync(&sock->keepalive_worker);
@@ -1105,9 +1372,11 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server
/*
* Important sequence to shut down socket
* 1. kernel_sock_shutdown
- * socket shutdown, receiver which hangs in kernel_recvmsg returns 0
+ * socket shutdown, receiver which block ins socket receive
+ * returns 0
* 2. cancel_work_sync(receiver)
- * wait for the receiver to finish, so the socket is not used anymore
+ * wait for the receiver to finish, so the socket is not used
+ * anymore
* 3. sock_release
* release the socket and set to NULL
*/
@@ -1128,18 +1397,50 @@ static int dnbd3_socket_disconnect(struct dnbd3_device *dev, struct dnbd3_server
return 0;
}
+/**
+ * dnbd3_net_connect - connect device
+ * @dev: the device to connect
+ *
+ * dnbd3_device.alt_servers[0] must set
+ */
+int dnbd3_net_connect(struct dnbd3_device *dev)
+{
+ int result;
+ debug_dev(dev, "connecting to server");
+
+ if (dev->alt_servers[0].host.type == 0) {
+ return -ENONET;
+ }
+
+ // alt_server[0] is the initial server
+ result = dnbd3_server_connect(dev, &dev->alt_servers[0]);
+ if (result) {
+ error_dev(dev, "failed to connect to initial server");
+ result = -ENOENT;
+ dev->imgname = NULL;
+ dev->socks[0].server = NULL;
+ }
+ return result;
+}
+
+/**
+ * dnbd3_net_disconnect - disconnect device
+ * @dev: the device to disconnect
+ */
int dnbd3_net_disconnect(struct dnbd3_device *dev)
{
int i;
int result = 0;
del_timer_sync(&dev->timer);
+ /* be sure it does not recover while disconnecting */
cancel_work_sync(&dev->discovery_worker);
- cancel_work_sync(&dev->panic_worker); // be sure it does not recover while disconnecting
+ cancel_work_sync(&dev->panic_worker);
for (i = 0; i < NUMBER_CONNECTIONS; i++) {
if (dev->socks[i].sock) {
- if (dnbd3_socket_disconnect(dev, NULL, &dev->socks[i])) {
+ if (dnbd3_socket_disconnect(dev, NULL,
+ &dev->socks[i])) {
result = -EIO;
}
}
@@ -1148,25 +1449,3 @@ int dnbd3_net_disconnect(struct dnbd3_device *dev)
}
-int dnbd3_net_connect(struct dnbd3_device *dev)
-{
- int result;
- debug_dev(dev, "connecting to server");
-
- // alt_server[0] is the initial server
- if (dnbd3_socket_connect(dev, &dev->alt_servers[0]) == 0) {
- dnbd3_print_server_list(dev);
-
-
- result = 0;
- } else {
- error_dev(dev, "failed to connect to initial server");
- result = -ENOENT;
- dev->imgname = NULL;
- dev->socks[0].server = NULL;
- }
- return result;
-}
-
-
-