summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Rettberg2021-03-24 16:07:15 +0100
committerSimon Rettberg2021-03-24 16:07:15 +0100
commitc73bc9413da8bc964d82e0a58525c7280cc1a456 (patch)
treec2ec982722fd79c413d97edefa089a6176ebef9b
parent[KERNEL] Set fake low RTT after manual server switch (diff)
downloaddnbd3-c73bc9413da8bc964d82e0a58525c7280cc1a456.tar.gz
dnbd3-c73bc9413da8bc964d82e0a58525c7280cc1a456.tar.xz
dnbd3-c73bc9413da8bc964d82e0a58525c7280cc1a456.zip
[KERNEL] Use sockaddr instead of dnbd3_host_t where possible
Convert dnbd3_host_t to struct sockaddr immediately when adding alt servers, so we don't have to convert it every time we establish a connection. Additionally we can now use %pISpc in printf-like functions instead of having if/else constructs whenever we want to print an address.
-rw-r--r--src/kernel/blk.c113
-rw-r--r--src/kernel/dnbd3_main.c103
-rw-r--r--src/kernel/dnbd3_main.h25
-rw-r--r--src/kernel/net.c169
-rw-r--r--src/kernel/sysfs.c43
-rw-r--r--src/kernel/sysfs.h4
6 files changed, 216 insertions, 241 deletions
diff --git a/src/kernel/blk.c b/src/kernel/blk.c
index 90e9b34..c313d63 100644
--- a/src/kernel/blk.c
+++ b/src/kernel/blk.c
@@ -107,9 +107,6 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
} else if (msg == NULL) {
result = -EINVAL;
} else {
- if (sizeof(msg->hosts[0]) != sizeof(dev->cur_server.host))
- dev_warn(dnbd3_device_to_dev(dev), "odd size bug triggered in IOCTL\n");
-
/* assert that at least one and not to many hosts are given */
if (msg->hosts_num < 1 || msg->hosts_num > NUMBER_SERVERS) {
result = -EINVAL;
@@ -127,16 +124,12 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
/* add specified servers to alt server list */
for (i = 0; i < msg->hosts_num; i++) {
/* copy provided host into corresponding alt server slot */
- memset(&dev->alt_servers[i], 0, sizeof(dev->alt_servers[i]));
- memcpy(&dev->alt_servers[i].host, &msg->hosts[i], sizeof(msg->hosts[i]));
- dev->alt_servers[i].failures = 0;
-
- if (dev->alt_servers[i].host.type == HOST_IP4)
- dev_dbg(dnbd3_device_to_dev(dev), "adding server %pI4\n",
- dev->alt_servers[i].host.addr);
+ if (dnbd3_add_server(dev, &msg->hosts[i]) == 0)
+ dev_dbg(dnbd3_device_to_dev(dev), "adding server %pISpc\n",
+ &dev->alt_servers[i].host);
else
- dev_dbg(dnbd3_device_to_dev(dev), "adding server [%pI6]\n",
- dev->alt_servers[i].host.addr);
+ dev_warn(dnbd3_device_to_dev(dev), "could not add alt server %pISpc\n",
+ &dev->alt_servers[i].host);
}
/*
@@ -145,8 +138,10 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
*/
for (i = 0; i < msg->hosts_num; i++) {
/* probe added alt server */
- memcpy(&dev->cur_server, &dev->alt_servers[i], sizeof(dev->cur_server));
+ if (dev->alt_servers[i].host.ss_family == 0)
+ continue; // Empty slot
+ dev->cur_server.host = dev->alt_servers[i].host;
if (dnbd3_net_connect(dev) != 0) {
/*
* probing server failed, cleanup connection and
@@ -163,13 +158,8 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
if (result >= 0) {
/* probing was successful */
- if (dev->cur_server.host.type == HOST_IP4)
- dev_dbg(dnbd3_device_to_dev(dev), "server %pI4 is initial server\n",
- dev->cur_server.host.addr);
- else
- dev_dbg(dnbd3_device_to_dev(dev), "server [%pI6] is initial server\n",
- dev->cur_server.host.addr);
-
+ dev_dbg(dnbd3_device_to_dev(dev), "server %pISpc is initial server\n",
+ &dev->cur_server.host);
imgname = NULL; // Prevent kfree at the end
} else {
/* probing failed */
@@ -198,44 +188,43 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
} else if (msg == NULL) {
result = -EINVAL;
} else {
- dnbd3_server_t *alt_server;
+ dnbd3_alt_server_t *alt_server;
+ struct sockaddr_storage new_addr;
mutex_lock(&dev->alt_servers_lock);
- alt_server = get_existing_server(&msg->hosts[0], dev);
+ alt_server = get_existing_alt_from_host(&msg->hosts[0], dev);
if (alt_server == NULL) {
mutex_unlock(&dev->alt_servers_lock);
/* specified server is not known, so do not switch */
result = -ENOENT;
} else {
/* specified server is known, so try to switch to it */
- dnbd3_server_t new_server = *alt_server;
-
- new_server = *alt_server;
+ new_addr = alt_server->host;
mutex_unlock(&dev->alt_servers_lock);
- if (!is_same_server(&dev->cur_server, &new_server)) {
- dnbd3_server_t old_server;
-
- if (new_server.host.type == HOST_IP4)
- dev_info(dnbd3_device_to_dev(dev), "manual server switch to %pI4\n",
- new_server.host.addr);
- else
- dev_info(dnbd3_device_to_dev(dev), "manual server switch to [%pI6]\n",
- new_server.host.addr);
+ if (!is_same_server(&dev->cur_server.host, &new_addr)) {
+ struct sockaddr_storage old_server;
+
+ dev_info(dnbd3_device_to_dev(dev), "manual server switch to %pISpc\n",
+ &new_addr);
/* save current working server */
/* lock device to get consistent copy of current working server */
spin_lock_irqsave(&dev->blk_lock, irqflags);
- memcpy(&old_server, &dev->cur_server, sizeof(old_server));
+ old_server = dev->cur_server.host;
spin_unlock_irqrestore(&dev->blk_lock, irqflags);
/* disconnect old server */
dnbd3_net_disconnect(dev);
/* connect to new specified server (switching) */
- memcpy(&dev->cur_server, &new_server, sizeof(dev->cur_server));
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
+ dev->cur_server.host = new_addr;
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
result = dnbd3_net_connect(dev);
if (result != 0) {
/* reconnect with old server if switching has failed */
- memcpy(&dev->cur_server, &old_server, sizeof(dev->cur_server));
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
+ dev->cur_server.host = old_server;
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
if (dnbd3_net_connect(dev) != 0) {
/* we couldn't reconnect to the old server */
/* device is dangling now and needs another SWITCH call */
@@ -250,7 +239,7 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
} else {
/* switch succeeded, fake very low RTT so we don't switch away again soon */
mutex_lock(&dev->alt_servers_lock);
- if (is_same_server(alt_server, &new_server)) {
+ if (is_same_server(&alt_server->host, &new_addr)) {
alt_server->rtts[0] = alt_server->rtts[1] = alt_server->rtts[2] = alt_server->rtts[3] = 4;
}
mutex_unlock(&dev->alt_servers_lock);
@@ -264,7 +253,10 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
break;
case IOCTL_ADD_SRV:
- case IOCTL_REM_SRV:
+ case IOCTL_REM_SRV: {
+ struct sockaddr_storage addr;
+ dnbd3_host_t *host;
+
if (dev->imgname == NULL) {
result = -ENOTCONN;
break;
@@ -273,44 +265,31 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
result = -EINVAL;
break;
}
- if (cmd == IOCTL_ADD_SRV) {
- dnbd3_host_t *host = &msg->hosts[0];
+ host = &msg->hosts[0];
+ if (!dnbd3_host_to_sockaddr(host, &addr)) {
+ result = -EINVAL;
+ break;
+ }
+ if (cmd == IOCTL_ADD_SRV) {
result = dnbd3_add_server(dev, host);
if (result == -EEXIST) {
- // Exists
- if (host->type == HOST_IP4) {
- dev_info(dnbd3_device_to_dev(dev), "alt server %pI4 already exists\n",
- host->addr);
- } else {
- dev_info(dnbd3_device_to_dev(dev), "alt server [%pI6] already exists\n",
- host->addr);
- }
+ dev_info(dnbd3_device_to_dev(dev), "alt server %pISpc already exists\n", &addr);
} else if (result == -ENOSPC) {
- if (host->type == HOST_IP4) {
- dev_info(dnbd3_device_to_dev(dev), "cannot add %pI4; no free slot\n",
- host->addr);
- } else {
- dev_info(dnbd3_device_to_dev(dev), "cannot add [%pI6]; no free slot\n",
- host->addr);
- }
+ dev_info(dnbd3_device_to_dev(dev), "cannot add %pISpc; no free slot\n", &addr);
+ } else {
+ dev_info(dnbd3_device_to_dev(dev), "added alt server %pISpc\n", &addr);
}
} else { // IOCTL_REM_SRV
- dnbd3_host_t *host = &msg->hosts[0];
-
- result = dnbd3_rem_server(dev, &msg->hosts[0]);
+ result = dnbd3_rem_server(dev, host);
if (result == -ENOENT) {
- if (host->type == HOST_IP4) {
- dev_info(dnbd3_device_to_dev(dev), "alt server %pI4 not found\n",
- host->addr);
- } else {
- dev_info(dnbd3_device_to_dev(dev), "alt server [%pI6] not found\n",
- host->addr);
- }
+ dev_info(dnbd3_device_to_dev(dev), "alt server %pISpc not found\n", &addr);
+ } else {
+ dev_info(dnbd3_device_to_dev(dev), "removed alt server %pISpc\n", &addr);
}
}
break;
-
+ }
case BLKFLSBUF:
result = 0;
break;
diff --git a/src/kernel/dnbd3_main.c b/src/kernel/dnbd3_main.c
index 7a3b1d5..9b5591d 100644
--- a/src/kernel/dnbd3_main.c
+++ b/src/kernel/dnbd3_main.c
@@ -23,6 +23,7 @@
#include <dnbd3/config/client.h>
#include <dnbd3/version.h>
+#include <net/ipv6.h>
#include "dnbd3_main.h"
#include "blk.h"
@@ -35,10 +36,54 @@ struct device *dnbd3_device_to_dev(dnbd3_device_t *dev)
return disk_to_dev(dev->disk);
}
-int is_same_server(const dnbd3_server_t *const a, const dnbd3_server_t *const b)
+int dnbd3_host_to_sockaddr(const dnbd3_host_t *host, struct sockaddr_storage *dest)
{
- return (a->host.type == b->host.type) && (a->host.port == b->host.port) &&
- (0 == memcmp(a->host.addr, b->host.addr, (a->host.type == HOST_IP4 ? 4 : 16)));
+ struct sockaddr_in *sin4;
+ struct sockaddr_in6 *sin6;
+
+ memset(dest, 0, sizeof(*dest));
+ if (host->type == HOST_IP4) {
+ sin4 = (struct sockaddr_in*)dest;
+ sin4->sin_family = AF_INET;
+ memcpy(&(sin4->sin_addr), host->addr, 4);
+ sin4->sin_port = host->port;
+ } else if (host->type == HOST_IP6) {
+ sin6 = (struct sockaddr_in6*)dest;
+ sin6->sin6_family = AF_INET6;
+ memcpy(&(sin6->sin6_addr), host->addr, 16);
+ sin6->sin6_port = host->port;
+ } else
+ return 0;
+ return 1;
+}
+
+int is_same_server(const struct sockaddr_storage *const x, const struct sockaddr_storage *const y)
+{
+ if (x->ss_family != y->ss_family)
+ return 0;
+ switch (x->ss_family) {
+ case AF_INET: {
+ const struct sockaddr_in *sinx = (const struct sockaddr_in *)x;
+ const struct sockaddr_in *siny = (const struct sockaddr_in *)y;
+ if (sinx->sin_port != siny->sin_port)
+ return 0;
+ if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
+ return 0;
+ break;
+ }
+ case AF_INET6: {
+ const struct sockaddr_in6 *sinx = (const struct sockaddr_in6 *)x;
+ const struct sockaddr_in6 *siny = (const struct sockaddr_in6 *)y;
+ if (sinx->sin6_port != siny->sin6_port)
+ return 0;
+ if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
+ return 0;
+ break;
+ }
+ default:
+ return 0;
+ }
+ return 1;
}
/**
@@ -48,12 +93,12 @@ int is_same_server(const dnbd3_server_t *const a, const dnbd3_server_t *const b)
* conditions match.
* The caller has to hold dev->alt_servers_lock.
*/
-static inline dnbd3_server_t *get_free_alt_server(dnbd3_device_t *const dev)
+static dnbd3_alt_server_t *get_free_alt_server(dnbd3_device_t *const dev)
{
int i;
for (i = 0; i < NUMBER_SERVERS; ++i) {
- if (dev->alt_servers[i].host.type == 0)
+ if (dev->alt_servers[i].host.ss_family == 0)
return &dev->alt_servers[i];
}
for (i = 0; i < NUMBER_SERVERS; ++i) {
@@ -63,33 +108,43 @@ static inline dnbd3_server_t *get_free_alt_server(dnbd3_device_t *const dev)
return NULL;
}
-/**
- * Returns pointer to existing entry in alt_servers that matches the given
- * alt server, or NULL if not found.
- * The caller has to hold dev->alt_servers_lock.
- */
-dnbd3_server_t *get_existing_server(const dnbd3_host_t *const newserver, dnbd3_device_t *const dev)
+dnbd3_alt_server_t *get_existing_alt_from_addr(const struct sockaddr_storage *const addr,
+ dnbd3_device_t *const dev)
{
int i;
for (i = 0; i < NUMBER_SERVERS; ++i) {
- if ((newserver->type == dev->alt_servers[i].host.type) &&
- (newserver->port == dev->alt_servers[i].host.port) &&
- (0 == memcmp(newserver->addr, dev->alt_servers[i].host.addr,
- (newserver->type == HOST_IP4 ? 4 : 16)))) {
+ if (is_same_server(addr, &dev->alt_servers[i].host))
return &dev->alt_servers[i];
- }
}
return NULL;
}
+/**
+ * Returns pointer to existing entry in alt_servers that matches the given
+ * alt server, or NULL if not found.
+ * The caller has to hold dev->alt_servers_lock.
+ */
+dnbd3_alt_server_t *get_existing_alt_from_host(const dnbd3_host_t *const host, dnbd3_device_t *const dev)
+{
+ struct sockaddr_storage addr;
+
+ if (!dnbd3_host_to_sockaddr(host, &addr))
+ return NULL;
+ return get_existing_alt_from_addr(&addr, dev);
+}
+
int dnbd3_add_server(dnbd3_device_t *dev, dnbd3_host_t *host)
{
int result;
- dnbd3_server_t *alt_server;
+ dnbd3_alt_server_t *alt_server;
+
+ if (host->type != HOST_IP4 && host->type != HOST_IP6)
+ return -EINVAL;
+
/* protect access to 'alt_servers' */
mutex_lock(&dev->alt_servers_lock);
- alt_server = get_existing_server(host, dev);
+ alt_server = get_existing_alt_from_host(host, dev);
// ADD
if (alt_server != NULL) {
// Exists
@@ -100,7 +155,10 @@ int dnbd3_add_server(dnbd3_device_t *dev, dnbd3_host_t *host)
if (alt_server == NULL) {
result = -ENOSPC;
} else {
- alt_server->host = *host;
+ dnbd3_host_to_sockaddr(host, &alt_server->host);
+ alt_server->protocol_version = 0;
+ alt_server->rtts[0] = alt_server->rtts[1] = alt_server->rtts[2]
+ = alt_server->rtts[3] = RTT_UNREACHABLE;
alt_server->failures = 0;
result = 0;
}
@@ -111,18 +169,19 @@ int dnbd3_add_server(dnbd3_device_t *dev, dnbd3_host_t *host)
int dnbd3_rem_server(dnbd3_device_t *dev, dnbd3_host_t *host)
{
- dnbd3_server_t *alt_server;
+ dnbd3_alt_server_t *alt_server;
int result;
+
/* protect access to 'alt_servers' */
mutex_lock(&dev->alt_servers_lock);
- alt_server = get_existing_server(host, dev);
+ alt_server = get_existing_alt_from_host(host, dev);
// REMOVE
if (alt_server == NULL) {
// Not found
result = -ENOENT;
} else {
// Remove
- alt_server->host.type = 0;
+ alt_server->host.ss_family = 0;
result = 0;
}
mutex_unlock(&dev->alt_servers_lock);
diff --git a/src/kernel/dnbd3_main.h b/src/kernel/dnbd3_main.h
index a69d588..42b9f58 100644
--- a/src/kernel/dnbd3_main.h
+++ b/src/kernel/dnbd3_main.h
@@ -37,11 +37,11 @@
extern int major;
typedef struct {
- dnbd3_host_t host;
- unsigned long rtts[4]; // Last four round trip time measurements in µs
+ unsigned long rtts[4]; // Last four round trip time measurements in µs
uint16_t protocol_version; // dnbd3 protocol version of this server
- uint8_t failures; // How many times the server was unreachable
-} dnbd3_server_t;
+ uint8_t failures; // How many times the server was unreachable
+ struct sockaddr_storage host; // Address of server
+} dnbd3_alt_server_t;
typedef struct {
// block
@@ -57,10 +57,13 @@ typedef struct {
struct mutex alt_servers_lock;
char *imgname;
struct socket *sock;
- dnbd3_server_t cur_server;
- unsigned long cur_rtt;
+ struct {
+ unsigned long rtt;
+ struct sockaddr_storage host;
+ uint16_t protocol_version;
+ } cur_server;
serialized_buffer_t payload_buffer;
- dnbd3_server_t alt_servers[NUMBER_SERVERS]; // array of alt servers, protected by altservers_lock
+ dnbd3_alt_server_t alt_servers[NUMBER_SERVERS]; // array of alt servers, protected by alt_servers_lock
uint8_t discover, panic, update_available, panic_count;
atomic_t connection_lock;
uint8_t use_server_provided_alts;
@@ -84,9 +87,13 @@ typedef struct {
extern inline struct device *dnbd3_device_to_dev(dnbd3_device_t *dev);
-extern inline int is_same_server(const dnbd3_server_t *const a, const dnbd3_server_t *const b);
+extern inline int is_same_server(const struct sockaddr_storage *const x, const struct sockaddr_storage *const y);
-extern dnbd3_server_t *get_existing_server(const dnbd3_host_t *const newserver, dnbd3_device_t *const dev);
+extern int dnbd3_host_to_sockaddr(const dnbd3_host_t *host, struct sockaddr_storage *dest);
+
+extern dnbd3_alt_server_t *get_existing_alt_from_host(const dnbd3_host_t *const host, dnbd3_device_t *const dev);
+
+extern dnbd3_alt_server_t *get_existing_alt_from_addr(const struct sockaddr_storage *const addr, dnbd3_device_t *const dev);
extern int dnbd3_add_server(dnbd3_device_t *dev, dnbd3_host_t *host);
diff --git a/src/kernel/net.c b/src/kernel/net.c
index 6d821fc..49a4fe7 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -51,9 +51,6 @@
} while (0)
#endif
-#define dnbd3_sock_create(af, type, proto, sock) \
- sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, type, proto, sock)
-
// cmd_flags and cmd_type are merged into cmd_flags now
#if REQ_FLAG_BITS > 24
#error "Fix CMD bitshift"
@@ -65,40 +62,17 @@
#define DNBD3_DEV_READ REQ_OP_READ
#define DNBD3_REQ_OP_SPECIAL REQ_OP_DRV_IN
-/**
- * Some macros for easier debug output.
- * Server IP:port info will be printed.
- */
-#define __dnbd3_dev_dbg_host(dev, host, fmt, ...) \
- do { \
- if ((host).type == HOST_IP4) { \
- dev_dbg(dnbd3_device_to_dev((dev)), "(%pI4:%d): " fmt, (host).addr, (int)ntohs((host).port), \
- ##__VA_ARGS__); \
- } else { \
- dev_dbg(dnbd3_device_to_dev((dev)), "([%pI6]:%d): " fmt, (host).addr, (int)ntohs((host).port), \
- ##__VA_ARGS__); \
- } \
- } while (0)
-
-#define __dnbd3_dev_err_host(dev, host, fmt, ...) \
- do { \
- if ((host).type == HOST_IP4) { \
- dev_err(dnbd3_device_to_dev((dev)), "(%pI4:%d): " fmt, (host).addr, (int)ntohs((host).port), \
- ##__VA_ARGS__); \
- } else { \
- dev_err(dnbd3_device_to_dev((dev)), "([%pI6]:%d): " fmt, (host).addr, (int)ntohs((host).port), \
- ##__VA_ARGS__); \
- } \
- } while (0)
+#define dnbd3_dev_dbg_host_cur(dev, fmt, ...) \
+ dev_dbg(dnbd3_device_to_dev(dev), "(%pISpc): " fmt, &(dev)->cur_server.host, ##__VA_ARGS__)
+#define dnbd3_dev_err_host_cur(dev, fmt, ...) \
+ dev_err(dnbd3_device_to_dev(dev), "(%pISpc): " fmt, &(dev)->cur_server.host, ##__VA_ARGS__)
-#define dnbd3_dev_dbg_host_cur(dev, fmt, ...) __dnbd3_dev_dbg_host((dev), (dev)->cur_server.host, fmt, ##__VA_ARGS__)
-#define dnbd3_dev_err_host_cur(dev, fmt, ...) __dnbd3_dev_err_host((dev), (dev)->cur_server.host, fmt, ##__VA_ARGS__)
-#define dnbd3_dev_dbg_host_alt(dev, fmt, ...) \
- __dnbd3_dev_dbg_host((dev), (dev)->alt_servers[i].host, fmt, ##__VA_ARGS__)
-#define dnbd3_dev_err_host_alt(dev, fmt, ...) \
- __dnbd3_dev_err_host((dev), (dev)->alt_servers[i].host, fmt, ##__VA_ARGS__)
+#define dnbd3_dev_dbg_host_alt(dev, fmt, ...) \
+ dev_dbg(dnbd3_device_to_dev(dev), "(%pISpc): " fmt, &(dev)->alt_servers[i].host, ##__VA_ARGS__)
+#define dnbd3_dev_err_host_alt(dev, fmt, ...) \
+ dev_err(dnbd3_device_to_dev(dev), "(%pISpc): " fmt, &(dev)->alt_servers[i].host, ##__VA_ARGS__)
-static struct socket *dnbd3_connect(dnbd3_device_t *dev, dnbd3_host_t *host);
+static struct socket *dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr);
static void dnbd3_net_heartbeat(struct timer_list *arg)
{
@@ -153,7 +127,7 @@ static int dnbd3_net_discover(void *data)
dnbd3_request_t dnbd3_request;
dnbd3_reply_t dnbd3_reply;
- dnbd3_server_t host_compare, best_server;
+ struct sockaddr_storage host_compare, best_server;
struct msghdr msg;
struct kvec iov[2];
@@ -166,7 +140,7 @@ static int dnbd3_net_discover(void *data)
ktime_t start = 0, end = 0;
unsigned long rtt, best_rtt = 0;
unsigned long irqflags;
- int i, j, isize;
+ int i, j, isize, fails;
int turn = 0;
int ready = 0, do_change = 0;
char check_order[NUMBER_SERVERS];
@@ -204,7 +178,7 @@ static int dnbd3_net_discover(void *data)
if (dev->reported_size < 4096)
continue;
- best_server.host.type = 0;
+ best_server.ss_family = 0;
best_rtt = 0xFFFFFFFul;
if (dev->heartbeat_count < STARTUP_MODE_DURATION || dev->panic)
@@ -226,22 +200,21 @@ static int dnbd3_net_discover(void *data)
for (j = 0; j < NUMBER_SERVERS; ++j) {
i = check_order[j];
mutex_lock(&dev->alt_servers_lock);
- host_compare = dev->alt_servers[i];
+ host_compare = dev->alt_servers[i].host;
+ fails = dev->alt_servers[i].failures;
mutex_unlock(&dev->alt_servers_lock);
- if (host_compare.host.type == 0)
+ if (host_compare.ss_family == 0)
continue; // Empty slot
- if (!dev->panic && host_compare.failures > 50
+ if (!dev->panic && fails > 50
&& (ktime_to_us(start) & 7) != 0)
continue; // If not in panic mode, skip server if it failed too many times
- if (isize-- <= 0 && !is_same_server(&dev->cur_server, &host_compare))
+ if (isize-- <= 0 && !is_same_server(&dev->cur_server.host, &host_compare))
continue; // Only test isize servers plus current server
// Initialize socket and connect
- sock = dnbd3_connect(dev, &host_compare.host);
- if (sock == NULL) {
- dnbd3_dev_dbg_host_alt(dev, "%s: Couldn't connect\n", __func__);
+ sock = dnbd3_connect(dev, &host_compare);
+ if (sock == NULL)
goto error;
- }
// Request filesize
dnbd3_request.cmd = CMD_SELECT_IMAGE;
@@ -345,7 +318,7 @@ static int dnbd3_net_discover(void *data)
dev->thread_discover = NULL;
dnbd3_net_disconnect(dev);
spin_lock_irqsave(&dev->blk_lock, irqflags);
- dev->cur_server = host_compare;
+ dev->cur_server.host = host_compare;
spin_unlock_irqrestore(&dev->blk_lock, irqflags);
dnbd3_net_connect(dev);
atomic_set(&dev->connection_lock, 0);
@@ -400,7 +373,7 @@ static int dnbd3_net_discover(void *data)
end = ktime_get_real(); // end rtt measurement
mutex_lock(&dev->alt_servers_lock);
- if (is_same_server(&dev->alt_servers[i], &host_compare)) {
+ if (is_same_server(&dev->alt_servers[i].host, &host_compare)) {
dev->alt_servers[i].protocol_version = remote_version;
dev->alt_servers[i].rtts[turn] = (unsigned long)ktime_us_delta(end, start);
@@ -426,8 +399,8 @@ static int dnbd3_net_discover(void *data)
}
// update cur servers rtt
- if (is_same_server(&dev->cur_server, &host_compare))
- dev->cur_rtt = rtt;
+ if (is_same_server(&dev->cur_server.host, &host_compare))
+ dev->cur_server.rtt = rtt;
continue;
@@ -437,16 +410,14 @@ error:
sock = NULL;
}
mutex_lock(&dev->alt_servers_lock);
- if (is_same_server(&dev->alt_servers[i], &host_compare)) {
+ if (is_same_server(&dev->alt_servers[i].host, &host_compare)) {
++dev->alt_servers[i].failures;
dev->alt_servers[i].rtts[turn] = RTT_UNREACHABLE;
}
mutex_unlock(&dev->alt_servers_lock);
- if (is_same_server(&dev->cur_server, &host_compare))
- dev->cur_rtt = RTT_UNREACHABLE;
-
- continue;
- }
+ if (is_same_server(&dev->cur_server.host, &host_compare))
+ dev->cur_server.rtt = RTT_UNREACHABLE;
+ } // for loop over alt_servers
if (dev->panic) {
// If probe timeout is set, report error to block layer
@@ -455,7 +426,7 @@ error:
dnbd3_blk_fail_all_requests(dev);
}
- if (best_server.host.type == 0 || kthread_should_stop() || dev->thread_discover == NULL) {
+ if (best_server.ss_family == 0 || kthread_should_stop() || dev->thread_discover == NULL) {
// No alt server could be reached at all or thread should stop
if (best_sock != NULL) {
// Should never happen actually
@@ -465,16 +436,16 @@ error:
continue;
}
- do_change = ready && !is_same_server(&best_server, &dev->cur_server) && (ktime_to_us(start) & 3) != 0
- && RTT_THRESHOLD_FACTOR(dev->cur_rtt) > best_rtt + 1500;
+ do_change = ready && !is_same_server(&best_server, &dev->cur_server.host)
+ && (ktime_to_us(start) & 3) != 0 && RTT_THRESHOLD_FACTOR(dev->cur_server.rtt) > best_rtt + 1500;
- if (ready && !do_change) {
+ if (ready && !do_change && best_sock != NULL) {
spin_lock_irqsave(&dev->blk_lock, irqflags);
if (!list_empty(&dev->request_queue_send)) {
cur_request = list_entry(dev->request_queue_send.next, struct request, queuelist);
do_change = (cur_request == last_request);
if (do_change)
- dev_warn(dnbd3_device_to_dev(dev), "hung request\n");
+ dev_warn(dnbd3_device_to_dev(dev), "hung request, triggering change\n");
} else {
cur_request = (struct request *)123;
}
@@ -485,17 +456,17 @@ error:
// take server with lowest rtt
// if a (dis)connect is already in progress, we do nothing, this is not panic mode
if (do_change && atomic_cmpxchg(&dev->connection_lock, 0, 1) == 0) {
- dev_info(dnbd3_device_to_dev(dev), "server %d is faster (%lluµs vs. %lluµs)\n", -1, // XXX
- (unsigned long long)best_rtt, (unsigned long long)dev->cur_rtt);
+ dev_info(dnbd3_device_to_dev(dev), "server %pISpc is faster (%lluµs vs. %lluµs)\n", &best_server,
+ (unsigned long long)best_rtt, (unsigned long long)dev->cur_server.rtt);
kfree(buf);
dev->better_sock = best_sock; // Take shortcut by continuing to use open connection
put_task_struct(dev->thread_discover);
dev->thread_discover = NULL;
dnbd3_net_disconnect(dev);
spin_lock_irqsave(&dev->blk_lock, irqflags);
- dev->cur_server = best_server;
+ dev->cur_server.host = best_server;
spin_unlock_irqrestore(&dev->blk_lock, irqflags);
- dev->cur_rtt = best_rtt;
+ dev->cur_server.rtt = best_rtt;
dnbd3_net_connect(dev);
atomic_set(&dev->connection_lock, 0);
return 0;
@@ -507,6 +478,7 @@ error:
best_sock = NULL;
}
+ // Increase rtt array index pointer, low probability that it doesn't advance
if (!ready || (ktime_to_us(start) & 15) != 0)
turn = (turn + 1) % 4;
if (turn == 2) // Set ready when we only have 2 of 4 measurements for quicker load balancing
@@ -851,8 +823,9 @@ cleanup:
return 0;
}
-static struct socket *dnbd3_connect(dnbd3_device_t *dev, dnbd3_host_t *host)
+static struct socket *dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr)
{
+ ktime_t start;
int ret;
struct socket *sock;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)
@@ -873,7 +846,7 @@ static struct socket *dnbd3_connect(dnbd3_device_t *dev, dnbd3_host_t *host)
timeout.tv_sec = SOCKET_TIMEOUT_CLIENT_DATA;
timeout.tv_usec = 0;
- if (dnbd3_sock_create(host->type, SOCK_STREAM, IPPROTO_TCP, &sock) < 0) {
+ if (sock_create_kern(&init_net, addr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock) < 0) {
dev_err(dnbd3_device_to_dev(dev), "couldn't create socket\n");
return NULL;
}
@@ -886,34 +859,16 @@ static struct socket *dnbd3_connect(dnbd3_device_t *dev, dnbd3_host_t *host)
sock_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, timeout_ptr, sizeof(timeout));
#endif
sock->sk->sk_allocation = GFP_NOIO;
- if (host->type == HOST_IP4) {
- struct sockaddr_in sin;
-
- memset(&sin, 0, sizeof(sin));
- sin.sin_family = AF_INET;
- memcpy(&(sin.sin_addr), host->addr, 4);
- sin.sin_port = host->port;
- ret = kernel_connect(sock, (struct sockaddr *)&sin, sizeof(sin), O_NONBLOCK);
- if (ret != 0 && ret != -EINPROGRESS) {
- dev_err(dnbd3_device_to_dev(dev), "connection to host failed (v4)\n");
- goto error;
- }
- } else {
- struct sockaddr_in6 sin;
-
- memset(&sin, 0, sizeof(sin));
- sin.sin6_family = AF_INET6;
- memcpy(&(sin.sin6_addr), host->addr, 16);
- sin.sin6_port = host->port;
- ret = kernel_connect(sock, (struct sockaddr *)&sin, sizeof(sin), O_NONBLOCK);
- if (ret != 0 && ret != -EINPROGRESS) {
- dev_err(dnbd3_device_to_dev(dev), "connection to host failed (v6)\n");
- goto error;
- }
+ start = ktime_get_real();
+ ret = kernel_connect(sock, (struct sockaddr *)addr, sizeof(*addr), O_NONBLOCK);
+ if (ret != 0 && ret != -EINPROGRESS) {
+ dev_dbg(dnbd3_device_to_dev(dev), "%pISpc connect failed (%d, blocked %dms)\n",
+ addr, ret, (int)ktime_ms_delta(ktime_get_real(), start));
+ goto error;
}
if (ret != 0) {
/* XXX How can we do a connect with short timeout? This is dumb */
- ktime_t start = ktime_get_real();
+ start = ktime_get_real();
while (ktime_ms_delta(ktime_get_real(), start) < SOCKET_TIMEOUT_CLIENT_DATA * 1000) {
struct sockaddr_storage addr;
@@ -924,7 +879,8 @@ static struct socket *dnbd3_connect(dnbd3_device_t *dev, dnbd3_host_t *host)
msleep(1);
}
if (ret < 0) {
- dev_dbg(dnbd3_device_to_dev(dev), "connect timed out (%d)\n", ret);
+ dev_dbg(dnbd3_device_to_dev(dev), "%pISpc: connect timed out (%d, %dms)\n",
+ ret, (int)ktime_ms_delta(ktime_get_real(), start));
goto error;
}
}
@@ -948,8 +904,8 @@ int dnbd3_net_connect(dnbd3_device_t *dev)
goto error;
}
- if (dev->cur_server.host.port == 0 || dev->cur_server.host.type == 0 || dev->imgname == NULL) {
- dnbd3_dev_err_host_cur(dev, "host, port or image name not set\n");
+ if (dev->cur_server.host.ss_family == 0 || dev->imgname == NULL) {
+ dnbd3_dev_err_host_cur(dev, "connect: host or image name not set\n");
goto error;
}
@@ -958,18 +914,18 @@ int dnbd3_net_connect(dnbd3_device_t *dev)
goto error;
}
- if (dev->cur_server.host.type != HOST_IP4 && dev->cur_server.host.type != HOST_IP6) {
- dnbd3_dev_err_host_cur(dev, "unknown address type %d\n", (int)dev->cur_server.host.type);
- goto error;
- }
-
ASSERT(dev->thread_send == NULL);
ASSERT(dev->thread_receive == NULL);
ASSERT(dev->thread_discover == NULL);
dnbd3_dev_dbg_host_cur(dev, "connecting ...\n");
- if (dev->better_sock == NULL) {
+ if (dev->better_sock != NULL) {
+ // Switching server, connection is already established and size request was executed
+ dnbd3_dev_dbg_host_cur(dev, "on-the-fly server change ...\n");
+ dev->sock = dev->better_sock;
+ dev->better_sock = NULL;
+ } else {
// no established connection yet from discovery thread, start new one
uint64_t reported_size;
dnbd3_request_t dnbd3_request;
@@ -1075,11 +1031,6 @@ int dnbd3_net_connect(dnbd3_device_t *dev)
dnbd3_dev_dbg_host_cur(dev, "image size: %llu\n", dev->reported_size);
dev->update_available = 0;
}
- } else {
- // Switching server, connection is already established and size request was executed
- dnbd3_dev_dbg_host_cur(dev, "on-the-fly server change ...\n");
- dev->sock = dev->better_sock;
- dev->better_sock = NULL;
}
// create required threads
@@ -1158,8 +1109,7 @@ error:
dev->sock = NULL;
}
spin_lock_irqsave(&dev->blk_lock, irqflags);
- dev->cur_server.host.type = 0;
- dev->cur_server.host.port = 0;
+ dev->cur_server.host.ss_family = 0;
spin_unlock_irqrestore(&dev->blk_lock, irqflags);
kfree(req1);
@@ -1239,8 +1189,7 @@ int dnbd3_net_disconnect(dnbd3_device_t *dev)
dev->sock = NULL;
}
spin_lock_irqsave(&dev->blk_lock, irqflags);
- dev->cur_server.host.type = 0;
- dev->cur_server.host.port = 0;
+ dev->cur_server.host.ss_family = 0;
spin_unlock_irqrestore(&dev->blk_lock, irqflags);
return 0;
diff --git a/src/kernel/sysfs.c b/src/kernel/sysfs.c
index 5b9a5ce..3355080 100644
--- a/src/kernel/sysfs.c
+++ b/src/kernel/sysfs.c
@@ -30,21 +30,12 @@
ssize_t show_cur_server_addr(char *buf, dnbd3_device_t *dev)
{
- if (dev->cur_server.host.type == HOST_IP4)
- return MIN(snprintf(buf, PAGE_SIZE, "%pI4:%d\n", dev->cur_server.host.addr,
- (int)ntohs(dev->cur_server.host.port)),
- PAGE_SIZE);
- else if (dev->cur_server.host.type == HOST_IP6)
- return MIN(snprintf(buf, PAGE_SIZE, "[%pI6]:%d\n", dev->cur_server.host.addr,
- (int)ntohs(dev->cur_server.host.port)),
- PAGE_SIZE);
- *buf = '\0';
- return 0;
+ return MIN(snprintf(buf, PAGE_SIZE, "%pISpc\n", &dev->cur_server.host), PAGE_SIZE);
}
ssize_t show_cur_server_rtt(char *buf, dnbd3_device_t *dev)
{
- return MIN(snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)dev->cur_rtt), PAGE_SIZE);
+ return MIN(snprintf(buf, PAGE_SIZE, "%lu\n", dev->cur_server.rtt), PAGE_SIZE);
}
ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev)
@@ -52,7 +43,7 @@ ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev)
int i, num = 0;
for (i = 0; i < NUMBER_SERVERS; ++i) {
- if (dev->alt_servers[i].host.type)
+ if (dev->alt_servers[i].host.ss_family != 0)
++num;
}
return MIN(snprintf(buf, PAGE_SIZE, "%d\n", num), PAGE_SIZE);
@@ -63,26 +54,16 @@ ssize_t show_alt_servers(char *buf, dnbd3_device_t *dev)
int i, size = PAGE_SIZE, ret;
for (i = 0; i < NUMBER_SERVERS; ++i) {
- if (dev->alt_servers[i].host.type == HOST_IP4)
- ret = MIN(snprintf(buf, size, "%pI4:%d,%llu,%d\n", dev->alt_servers[i].host.addr,
- (int)ntohs(dev->alt_servers[i].host.port),
- (unsigned long long)((dev->alt_servers[i].rtts[0] +
- dev->alt_servers[i].rtts[1] +
- dev->alt_servers[i].rtts[2] +
- dev->alt_servers[i].rtts[3]) / 4),
- (int)dev->alt_servers[i].failures),
- size);
- else if (dev->alt_servers[i].host.type == HOST_IP6)
- ret = MIN(snprintf(buf, size, "[%pI6]:%d,%llu,%d\n", dev->alt_servers[i].host.addr,
- (int)ntohs(dev->alt_servers[i].host.port),
- (unsigned long long)((dev->alt_servers[i].rtts[0] +
- dev->alt_servers[i].rtts[1] +
- dev->alt_servers[i].rtts[2] +
- dev->alt_servers[i].rtts[3]) / 4),
- (int)dev->alt_servers[i].failures),
- size);
- else
+ if (dev->alt_servers[i].host.ss_family == 0)
continue;
+
+ ret = MIN(snprintf(buf, size, "%pISpc,%llu,%d\n", &dev->alt_servers[i].host,
+ (unsigned long long)((dev->alt_servers[i].rtts[0] +
+ dev->alt_servers[i].rtts[1] +
+ dev->alt_servers[i].rtts[2] +
+ dev->alt_servers[i].rtts[3]) / 4),
+ (int)dev->alt_servers[i].failures),
+ size);
size -= ret;
buf += ret;
if (size <= 0) {
diff --git a/src/kernel/sysfs.h b/src/kernel/sysfs.h
index ec6c261..b744c8b 100644
--- a/src/kernel/sysfs.h
+++ b/src/kernel/sysfs.h
@@ -36,8 +36,8 @@ typedef struct {
typedef struct {
struct attribute attr;
- ssize_t (*show)(char *buf, dnbd3_server_t *dev);
- ssize_t (*store)(const char *buf, size_t len, dnbd3_server_t *dev);
+ ssize_t (*show)(char *buf, dnbd3_alt_server_t *dev);
+ ssize_t (*store)(const char *buf, size_t len, dnbd3_alt_server_t *dev);
} server_attr_t;
#endif /* SYSFS_H_ */