diff options
author | Simon Rettberg | 2021-03-24 16:07:15 +0100 |
---|---|---|
committer | Simon Rettberg | 2021-03-24 16:07:15 +0100 |
commit | c73bc9413da8bc964d82e0a58525c7280cc1a456 (patch) | |
tree | c2ec982722fd79c413d97edefa089a6176ebef9b /src | |
parent | [KERNEL] Set fake low RTT after manual server switch (diff) | |
download | dnbd3-c73bc9413da8bc964d82e0a58525c7280cc1a456.tar.gz dnbd3-c73bc9413da8bc964d82e0a58525c7280cc1a456.tar.xz dnbd3-c73bc9413da8bc964d82e0a58525c7280cc1a456.zip |
[KERNEL] Use sockaddr instead of dnbd3_host_t where possible
Convert dnbd3_host_t to struct sockaddr immediately when
adding alt servers, so we don't have to convert it every time
we establish a connection. Additionally we can now use %pISpc
in printf-like functions instead of having if/else constructs
whenever we want to print an address.
Diffstat (limited to 'src')
-rw-r--r-- | src/kernel/blk.c | 113 | ||||
-rw-r--r-- | src/kernel/dnbd3_main.c | 103 | ||||
-rw-r--r-- | src/kernel/dnbd3_main.h | 25 | ||||
-rw-r--r-- | src/kernel/net.c | 169 | ||||
-rw-r--r-- | src/kernel/sysfs.c | 43 | ||||
-rw-r--r-- | src/kernel/sysfs.h | 4 |
6 files changed, 216 insertions, 241 deletions
diff --git a/src/kernel/blk.c b/src/kernel/blk.c index 90e9b34..c313d63 100644 --- a/src/kernel/blk.c +++ b/src/kernel/blk.c @@ -107,9 +107,6 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int } else if (msg == NULL) { result = -EINVAL; } else { - if (sizeof(msg->hosts[0]) != sizeof(dev->cur_server.host)) - dev_warn(dnbd3_device_to_dev(dev), "odd size bug triggered in IOCTL\n"); - /* assert that at least one and not to many hosts are given */ if (msg->hosts_num < 1 || msg->hosts_num > NUMBER_SERVERS) { result = -EINVAL; @@ -127,16 +124,12 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int /* add specified servers to alt server list */ for (i = 0; i < msg->hosts_num; i++) { /* copy provided host into corresponding alt server slot */ - memset(&dev->alt_servers[i], 0, sizeof(dev->alt_servers[i])); - memcpy(&dev->alt_servers[i].host, &msg->hosts[i], sizeof(msg->hosts[i])); - dev->alt_servers[i].failures = 0; - - if (dev->alt_servers[i].host.type == HOST_IP4) - dev_dbg(dnbd3_device_to_dev(dev), "adding server %pI4\n", - dev->alt_servers[i].host.addr); + if (dnbd3_add_server(dev, &msg->hosts[i]) == 0) + dev_dbg(dnbd3_device_to_dev(dev), "adding server %pISpc\n", + &dev->alt_servers[i].host); else - dev_dbg(dnbd3_device_to_dev(dev), "adding server [%pI6]\n", - dev->alt_servers[i].host.addr); + dev_warn(dnbd3_device_to_dev(dev), "could not add alt server %pISpc\n", + &dev->alt_servers[i].host); } /* @@ -145,8 +138,10 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int */ for (i = 0; i < msg->hosts_num; i++) { /* probe added alt server */ - memcpy(&dev->cur_server, &dev->alt_servers[i], sizeof(dev->cur_server)); + if (dev->alt_servers[i].host.ss_family == 0) + continue; // Empty slot + dev->cur_server.host = dev->alt_servers[i].host; if (dnbd3_net_connect(dev) != 0) { /* * probing server failed, cleanup connection and @@ -163,13 +158,8 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int if (result >= 0) { /* probing was successful */ - if (dev->cur_server.host.type == HOST_IP4) - dev_dbg(dnbd3_device_to_dev(dev), "server %pI4 is initial server\n", - dev->cur_server.host.addr); - else - dev_dbg(dnbd3_device_to_dev(dev), "server [%pI6] is initial server\n", - dev->cur_server.host.addr); - + dev_dbg(dnbd3_device_to_dev(dev), "server %pISpc is initial server\n", + &dev->cur_server.host); imgname = NULL; // Prevent kfree at the end } else { /* probing failed */ @@ -198,44 +188,43 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int } else if (msg == NULL) { result = -EINVAL; } else { - dnbd3_server_t *alt_server; + dnbd3_alt_server_t *alt_server; + struct sockaddr_storage new_addr; mutex_lock(&dev->alt_servers_lock); - alt_server = get_existing_server(&msg->hosts[0], dev); + alt_server = get_existing_alt_from_host(&msg->hosts[0], dev); if (alt_server == NULL) { mutex_unlock(&dev->alt_servers_lock); /* specified server is not known, so do not switch */ result = -ENOENT; } else { /* specified server is known, so try to switch to it */ - dnbd3_server_t new_server = *alt_server; - - new_server = *alt_server; + new_addr = alt_server->host; mutex_unlock(&dev->alt_servers_lock); - if (!is_same_server(&dev->cur_server, &new_server)) { - dnbd3_server_t old_server; - - if (new_server.host.type == HOST_IP4) - dev_info(dnbd3_device_to_dev(dev), "manual server switch to %pI4\n", - new_server.host.addr); - else - dev_info(dnbd3_device_to_dev(dev), "manual server switch to [%pI6]\n", - new_server.host.addr); + if (!is_same_server(&dev->cur_server.host, &new_addr)) { + struct sockaddr_storage old_server; + + dev_info(dnbd3_device_to_dev(dev), "manual server switch to %pISpc\n", + &new_addr); /* save current working server */ /* lock device to get consistent copy of current working server */ spin_lock_irqsave(&dev->blk_lock, irqflags); - memcpy(&old_server, &dev->cur_server, sizeof(old_server)); + old_server = dev->cur_server.host; spin_unlock_irqrestore(&dev->blk_lock, irqflags); /* disconnect old server */ dnbd3_net_disconnect(dev); /* connect to new specified server (switching) */ - memcpy(&dev->cur_server, &new_server, sizeof(dev->cur_server)); + spin_lock_irqsave(&dev->blk_lock, irqflags); + dev->cur_server.host = new_addr; + spin_unlock_irqrestore(&dev->blk_lock, irqflags); result = dnbd3_net_connect(dev); if (result != 0) { /* reconnect with old server if switching has failed */ - memcpy(&dev->cur_server, &old_server, sizeof(dev->cur_server)); + spin_lock_irqsave(&dev->blk_lock, irqflags); + dev->cur_server.host = old_server; + spin_unlock_irqrestore(&dev->blk_lock, irqflags); if (dnbd3_net_connect(dev) != 0) { /* we couldn't reconnect to the old server */ /* device is dangling now and needs another SWITCH call */ @@ -250,7 +239,7 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int } else { /* switch succeeded, fake very low RTT so we don't switch away again soon */ mutex_lock(&dev->alt_servers_lock); - if (is_same_server(alt_server, &new_server)) { + if (is_same_server(&alt_server->host, &new_addr)) { alt_server->rtts[0] = alt_server->rtts[1] = alt_server->rtts[2] = alt_server->rtts[3] = 4; } mutex_unlock(&dev->alt_servers_lock); @@ -264,7 +253,10 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int break; case IOCTL_ADD_SRV: - case IOCTL_REM_SRV: + case IOCTL_REM_SRV: { + struct sockaddr_storage addr; + dnbd3_host_t *host; + if (dev->imgname == NULL) { result = -ENOTCONN; break; @@ -273,44 +265,31 @@ static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int result = -EINVAL; break; } - if (cmd == IOCTL_ADD_SRV) { - dnbd3_host_t *host = &msg->hosts[0]; + host = &msg->hosts[0]; + if (!dnbd3_host_to_sockaddr(host, &addr)) { + result = -EINVAL; + break; + } + if (cmd == IOCTL_ADD_SRV) { result = dnbd3_add_server(dev, host); if (result == -EEXIST) { - // Exists - if (host->type == HOST_IP4) { - dev_info(dnbd3_device_to_dev(dev), "alt server %pI4 already exists\n", - host->addr); - } else { - dev_info(dnbd3_device_to_dev(dev), "alt server [%pI6] already exists\n", - host->addr); - } + dev_info(dnbd3_device_to_dev(dev), "alt server %pISpc already exists\n", &addr); } else if (result == -ENOSPC) { - if (host->type == HOST_IP4) { - dev_info(dnbd3_device_to_dev(dev), "cannot add %pI4; no free slot\n", - host->addr); - } else { - dev_info(dnbd3_device_to_dev(dev), "cannot add [%pI6]; no free slot\n", - host->addr); - } + dev_info(dnbd3_device_to_dev(dev), "cannot add %pISpc; no free slot\n", &addr); + } else { + dev_info(dnbd3_device_to_dev(dev), "added alt server %pISpc\n", &addr); } } else { // IOCTL_REM_SRV - dnbd3_host_t *host = &msg->hosts[0]; - - result = dnbd3_rem_server(dev, &msg->hosts[0]); + result = dnbd3_rem_server(dev, host); if (result == -ENOENT) { - if (host->type == HOST_IP4) { - dev_info(dnbd3_device_to_dev(dev), "alt server %pI4 not found\n", - host->addr); - } else { - dev_info(dnbd3_device_to_dev(dev), "alt server [%pI6] not found\n", - host->addr); - } + dev_info(dnbd3_device_to_dev(dev), "alt server %pISpc not found\n", &addr); + } else { + dev_info(dnbd3_device_to_dev(dev), "removed alt server %pISpc\n", &addr); } } break; - + } case BLKFLSBUF: result = 0; break; diff --git a/src/kernel/dnbd3_main.c b/src/kernel/dnbd3_main.c index 7a3b1d5..9b5591d 100644 --- a/src/kernel/dnbd3_main.c +++ b/src/kernel/dnbd3_main.c @@ -23,6 +23,7 @@ #include <dnbd3/config/client.h> #include <dnbd3/version.h> +#include <net/ipv6.h> #include "dnbd3_main.h" #include "blk.h" @@ -35,10 +36,54 @@ struct device *dnbd3_device_to_dev(dnbd3_device_t *dev) return disk_to_dev(dev->disk); } -int is_same_server(const dnbd3_server_t *const a, const dnbd3_server_t *const b) +int dnbd3_host_to_sockaddr(const dnbd3_host_t *host, struct sockaddr_storage *dest) { - return (a->host.type == b->host.type) && (a->host.port == b->host.port) && - (0 == memcmp(a->host.addr, b->host.addr, (a->host.type == HOST_IP4 ? 4 : 16))); + struct sockaddr_in *sin4; + struct sockaddr_in6 *sin6; + + memset(dest, 0, sizeof(*dest)); + if (host->type == HOST_IP4) { + sin4 = (struct sockaddr_in*)dest; + sin4->sin_family = AF_INET; + memcpy(&(sin4->sin_addr), host->addr, 4); + sin4->sin_port = host->port; + } else if (host->type == HOST_IP6) { + sin6 = (struct sockaddr_in6*)dest; + sin6->sin6_family = AF_INET6; + memcpy(&(sin6->sin6_addr), host->addr, 16); + sin6->sin6_port = host->port; + } else + return 0; + return 1; +} + +int is_same_server(const struct sockaddr_storage *const x, const struct sockaddr_storage *const y) +{ + if (x->ss_family != y->ss_family) + return 0; + switch (x->ss_family) { + case AF_INET: { + const struct sockaddr_in *sinx = (const struct sockaddr_in *)x; + const struct sockaddr_in *siny = (const struct sockaddr_in *)y; + if (sinx->sin_port != siny->sin_port) + return 0; + if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) + return 0; + break; + } + case AF_INET6: { + const struct sockaddr_in6 *sinx = (const struct sockaddr_in6 *)x; + const struct sockaddr_in6 *siny = (const struct sockaddr_in6 *)y; + if (sinx->sin6_port != siny->sin6_port) + return 0; + if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) + return 0; + break; + } + default: + return 0; + } + return 1; } /** @@ -48,12 +93,12 @@ int is_same_server(const dnbd3_server_t *const a, const dnbd3_server_t *const b) * conditions match. * The caller has to hold dev->alt_servers_lock. */ -static inline dnbd3_server_t *get_free_alt_server(dnbd3_device_t *const dev) +static dnbd3_alt_server_t *get_free_alt_server(dnbd3_device_t *const dev) { int i; for (i = 0; i < NUMBER_SERVERS; ++i) { - if (dev->alt_servers[i].host.type == 0) + if (dev->alt_servers[i].host.ss_family == 0) return &dev->alt_servers[i]; } for (i = 0; i < NUMBER_SERVERS; ++i) { @@ -63,33 +108,43 @@ static inline dnbd3_server_t *get_free_alt_server(dnbd3_device_t *const dev) return NULL; } -/** - * Returns pointer to existing entry in alt_servers that matches the given - * alt server, or NULL if not found. - * The caller has to hold dev->alt_servers_lock. - */ -dnbd3_server_t *get_existing_server(const dnbd3_host_t *const newserver, dnbd3_device_t *const dev) +dnbd3_alt_server_t *get_existing_alt_from_addr(const struct sockaddr_storage *const addr, + dnbd3_device_t *const dev) { int i; for (i = 0; i < NUMBER_SERVERS; ++i) { - if ((newserver->type == dev->alt_servers[i].host.type) && - (newserver->port == dev->alt_servers[i].host.port) && - (0 == memcmp(newserver->addr, dev->alt_servers[i].host.addr, - (newserver->type == HOST_IP4 ? 4 : 16)))) { + if (is_same_server(addr, &dev->alt_servers[i].host)) return &dev->alt_servers[i]; - } } return NULL; } +/** + * Returns pointer to existing entry in alt_servers that matches the given + * alt server, or NULL if not found. + * The caller has to hold dev->alt_servers_lock. + */ +dnbd3_alt_server_t *get_existing_alt_from_host(const dnbd3_host_t *const host, dnbd3_device_t *const dev) +{ + struct sockaddr_storage addr; + + if (!dnbd3_host_to_sockaddr(host, &addr)) + return NULL; + return get_existing_alt_from_addr(&addr, dev); +} + int dnbd3_add_server(dnbd3_device_t *dev, dnbd3_host_t *host) { int result; - dnbd3_server_t *alt_server; + dnbd3_alt_server_t *alt_server; + + if (host->type != HOST_IP4 && host->type != HOST_IP6) + return -EINVAL; + /* protect access to 'alt_servers' */ mutex_lock(&dev->alt_servers_lock); - alt_server = get_existing_server(host, dev); + alt_server = get_existing_alt_from_host(host, dev); // ADD if (alt_server != NULL) { // Exists @@ -100,7 +155,10 @@ int dnbd3_add_server(dnbd3_device_t *dev, dnbd3_host_t *host) if (alt_server == NULL) { result = -ENOSPC; } else { - alt_server->host = *host; + dnbd3_host_to_sockaddr(host, &alt_server->host); + alt_server->protocol_version = 0; + alt_server->rtts[0] = alt_server->rtts[1] = alt_server->rtts[2] + = alt_server->rtts[3] = RTT_UNREACHABLE; alt_server->failures = 0; result = 0; } @@ -111,18 +169,19 @@ int dnbd3_add_server(dnbd3_device_t *dev, dnbd3_host_t *host) int dnbd3_rem_server(dnbd3_device_t *dev, dnbd3_host_t *host) { - dnbd3_server_t *alt_server; + dnbd3_alt_server_t *alt_server; int result; + /* protect access to 'alt_servers' */ mutex_lock(&dev->alt_servers_lock); - alt_server = get_existing_server(host, dev); + alt_server = get_existing_alt_from_host(host, dev); // REMOVE if (alt_server == NULL) { // Not found result = -ENOENT; } else { // Remove - alt_server->host.type = 0; + alt_server->host.ss_family = 0; result = 0; } mutex_unlock(&dev->alt_servers_lock); diff --git a/src/kernel/dnbd3_main.h b/src/kernel/dnbd3_main.h index a69d588..42b9f58 100644 --- a/src/kernel/dnbd3_main.h +++ b/src/kernel/dnbd3_main.h @@ -37,11 +37,11 @@ extern int major; typedef struct { - dnbd3_host_t host; - unsigned long rtts[4]; // Last four round trip time measurements in µs + unsigned long rtts[4]; // Last four round trip time measurements in µs uint16_t protocol_version; // dnbd3 protocol version of this server - uint8_t failures; // How many times the server was unreachable -} dnbd3_server_t; + uint8_t failures; // How many times the server was unreachable + struct sockaddr_storage host; // Address of server +} dnbd3_alt_server_t; typedef struct { // block @@ -57,10 +57,13 @@ typedef struct { struct mutex alt_servers_lock; char *imgname; struct socket *sock; - dnbd3_server_t cur_server; - unsigned long cur_rtt; + struct { + unsigned long rtt; + struct sockaddr_storage host; + uint16_t protocol_version; + } cur_server; serialized_buffer_t payload_buffer; - dnbd3_server_t alt_servers[NUMBER_SERVERS]; // array of alt servers, protected by altservers_lock + dnbd3_alt_server_t alt_servers[NUMBER_SERVERS]; // array of alt servers, protected by alt_servers_lock uint8_t discover, panic, update_available, panic_count; atomic_t connection_lock; uint8_t use_server_provided_alts; @@ -84,9 +87,13 @@ typedef struct { extern inline struct device *dnbd3_device_to_dev(dnbd3_device_t *dev); -extern inline int is_same_server(const dnbd3_server_t *const a, const dnbd3_server_t *const b); +extern inline int is_same_server(const struct sockaddr_storage *const x, const struct sockaddr_storage *const y); -extern dnbd3_server_t *get_existing_server(const dnbd3_host_t *const newserver, dnbd3_device_t *const dev); +extern int dnbd3_host_to_sockaddr(const dnbd3_host_t *host, struct sockaddr_storage *dest); + +extern dnbd3_alt_server_t *get_existing_alt_from_host(const dnbd3_host_t *const host, dnbd3_device_t *const dev); + +extern dnbd3_alt_server_t *get_existing_alt_from_addr(const struct sockaddr_storage *const addr, dnbd3_device_t *const dev); extern int dnbd3_add_server(dnbd3_device_t *dev, dnbd3_host_t *host); diff --git a/src/kernel/net.c b/src/kernel/net.c index 6d821fc..49a4fe7 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -51,9 +51,6 @@ } while (0) #endif -#define dnbd3_sock_create(af, type, proto, sock) \ - sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, type, proto, sock) - // cmd_flags and cmd_type are merged into cmd_flags now #if REQ_FLAG_BITS > 24 #error "Fix CMD bitshift" @@ -65,40 +62,17 @@ #define DNBD3_DEV_READ REQ_OP_READ #define DNBD3_REQ_OP_SPECIAL REQ_OP_DRV_IN -/** - * Some macros for easier debug output. - * Server IP:port info will be printed. - */ -#define __dnbd3_dev_dbg_host(dev, host, fmt, ...) \ - do { \ - if ((host).type == HOST_IP4) { \ - dev_dbg(dnbd3_device_to_dev((dev)), "(%pI4:%d): " fmt, (host).addr, (int)ntohs((host).port), \ - ##__VA_ARGS__); \ - } else { \ - dev_dbg(dnbd3_device_to_dev((dev)), "([%pI6]:%d): " fmt, (host).addr, (int)ntohs((host).port), \ - ##__VA_ARGS__); \ - } \ - } while (0) - -#define __dnbd3_dev_err_host(dev, host, fmt, ...) \ - do { \ - if ((host).type == HOST_IP4) { \ - dev_err(dnbd3_device_to_dev((dev)), "(%pI4:%d): " fmt, (host).addr, (int)ntohs((host).port), \ - ##__VA_ARGS__); \ - } else { \ - dev_err(dnbd3_device_to_dev((dev)), "([%pI6]:%d): " fmt, (host).addr, (int)ntohs((host).port), \ - ##__VA_ARGS__); \ - } \ - } while (0) +#define dnbd3_dev_dbg_host_cur(dev, fmt, ...) \ + dev_dbg(dnbd3_device_to_dev(dev), "(%pISpc): " fmt, &(dev)->cur_server.host, ##__VA_ARGS__) +#define dnbd3_dev_err_host_cur(dev, fmt, ...) \ + dev_err(dnbd3_device_to_dev(dev), "(%pISpc): " fmt, &(dev)->cur_server.host, ##__VA_ARGS__) -#define dnbd3_dev_dbg_host_cur(dev, fmt, ...) __dnbd3_dev_dbg_host((dev), (dev)->cur_server.host, fmt, ##__VA_ARGS__) -#define dnbd3_dev_err_host_cur(dev, fmt, ...) __dnbd3_dev_err_host((dev), (dev)->cur_server.host, fmt, ##__VA_ARGS__) -#define dnbd3_dev_dbg_host_alt(dev, fmt, ...) \ - __dnbd3_dev_dbg_host((dev), (dev)->alt_servers[i].host, fmt, ##__VA_ARGS__) -#define dnbd3_dev_err_host_alt(dev, fmt, ...) \ - __dnbd3_dev_err_host((dev), (dev)->alt_servers[i].host, fmt, ##__VA_ARGS__) +#define dnbd3_dev_dbg_host_alt(dev, fmt, ...) \ + dev_dbg(dnbd3_device_to_dev(dev), "(%pISpc): " fmt, &(dev)->alt_servers[i].host, ##__VA_ARGS__) +#define dnbd3_dev_err_host_alt(dev, fmt, ...) \ + dev_err(dnbd3_device_to_dev(dev), "(%pISpc): " fmt, &(dev)->alt_servers[i].host, ##__VA_ARGS__) -static struct socket *dnbd3_connect(dnbd3_device_t *dev, dnbd3_host_t *host); +static struct socket *dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr); static void dnbd3_net_heartbeat(struct timer_list *arg) { @@ -153,7 +127,7 @@ static int dnbd3_net_discover(void *data) dnbd3_request_t dnbd3_request; dnbd3_reply_t dnbd3_reply; - dnbd3_server_t host_compare, best_server; + struct sockaddr_storage host_compare, best_server; struct msghdr msg; struct kvec iov[2]; @@ -166,7 +140,7 @@ static int dnbd3_net_discover(void *data) ktime_t start = 0, end = 0; unsigned long rtt, best_rtt = 0; unsigned long irqflags; - int i, j, isize; + int i, j, isize, fails; int turn = 0; int ready = 0, do_change = 0; char check_order[NUMBER_SERVERS]; @@ -204,7 +178,7 @@ static int dnbd3_net_discover(void *data) if (dev->reported_size < 4096) continue; - best_server.host.type = 0; + best_server.ss_family = 0; best_rtt = 0xFFFFFFFul; if (dev->heartbeat_count < STARTUP_MODE_DURATION || dev->panic) @@ -226,22 +200,21 @@ static int dnbd3_net_discover(void *data) for (j = 0; j < NUMBER_SERVERS; ++j) { i = check_order[j]; mutex_lock(&dev->alt_servers_lock); - host_compare = dev->alt_servers[i]; + host_compare = dev->alt_servers[i].host; + fails = dev->alt_servers[i].failures; mutex_unlock(&dev->alt_servers_lock); - if (host_compare.host.type == 0) + if (host_compare.ss_family == 0) continue; // Empty slot - if (!dev->panic && host_compare.failures > 50 + if (!dev->panic && fails > 50 && (ktime_to_us(start) & 7) != 0) continue; // If not in panic mode, skip server if it failed too many times - if (isize-- <= 0 && !is_same_server(&dev->cur_server, &host_compare)) + if (isize-- <= 0 && !is_same_server(&dev->cur_server.host, &host_compare)) continue; // Only test isize servers plus current server // Initialize socket and connect - sock = dnbd3_connect(dev, &host_compare.host); - if (sock == NULL) { - dnbd3_dev_dbg_host_alt(dev, "%s: Couldn't connect\n", __func__); + sock = dnbd3_connect(dev, &host_compare); + if (sock == NULL) goto error; - } // Request filesize dnbd3_request.cmd = CMD_SELECT_IMAGE; @@ -345,7 +318,7 @@ static int dnbd3_net_discover(void *data) dev->thread_discover = NULL; dnbd3_net_disconnect(dev); spin_lock_irqsave(&dev->blk_lock, irqflags); - dev->cur_server = host_compare; + dev->cur_server.host = host_compare; spin_unlock_irqrestore(&dev->blk_lock, irqflags); dnbd3_net_connect(dev); atomic_set(&dev->connection_lock, 0); @@ -400,7 +373,7 @@ static int dnbd3_net_discover(void *data) end = ktime_get_real(); // end rtt measurement mutex_lock(&dev->alt_servers_lock); - if (is_same_server(&dev->alt_servers[i], &host_compare)) { + if (is_same_server(&dev->alt_servers[i].host, &host_compare)) { dev->alt_servers[i].protocol_version = remote_version; dev->alt_servers[i].rtts[turn] = (unsigned long)ktime_us_delta(end, start); @@ -426,8 +399,8 @@ static int dnbd3_net_discover(void *data) } // update cur servers rtt - if (is_same_server(&dev->cur_server, &host_compare)) - dev->cur_rtt = rtt; + if (is_same_server(&dev->cur_server.host, &host_compare)) + dev->cur_server.rtt = rtt; continue; @@ -437,16 +410,14 @@ error: sock = NULL; } mutex_lock(&dev->alt_servers_lock); - if (is_same_server(&dev->alt_servers[i], &host_compare)) { + if (is_same_server(&dev->alt_servers[i].host, &host_compare)) { ++dev->alt_servers[i].failures; dev->alt_servers[i].rtts[turn] = RTT_UNREACHABLE; } mutex_unlock(&dev->alt_servers_lock); - if (is_same_server(&dev->cur_server, &host_compare)) - dev->cur_rtt = RTT_UNREACHABLE; - - continue; - } + if (is_same_server(&dev->cur_server.host, &host_compare)) + dev->cur_server.rtt = RTT_UNREACHABLE; + } // for loop over alt_servers if (dev->panic) { // If probe timeout is set, report error to block layer @@ -455,7 +426,7 @@ error: dnbd3_blk_fail_all_requests(dev); } - if (best_server.host.type == 0 || kthread_should_stop() || dev->thread_discover == NULL) { + if (best_server.ss_family == 0 || kthread_should_stop() || dev->thread_discover == NULL) { // No alt server could be reached at all or thread should stop if (best_sock != NULL) { // Should never happen actually @@ -465,16 +436,16 @@ error: continue; } - do_change = ready && !is_same_server(&best_server, &dev->cur_server) && (ktime_to_us(start) & 3) != 0 - && RTT_THRESHOLD_FACTOR(dev->cur_rtt) > best_rtt + 1500; + do_change = ready && !is_same_server(&best_server, &dev->cur_server.host) + && (ktime_to_us(start) & 3) != 0 && RTT_THRESHOLD_FACTOR(dev->cur_server.rtt) > best_rtt + 1500; - if (ready && !do_change) { + if (ready && !do_change && best_sock != NULL) { spin_lock_irqsave(&dev->blk_lock, irqflags); if (!list_empty(&dev->request_queue_send)) { cur_request = list_entry(dev->request_queue_send.next, struct request, queuelist); do_change = (cur_request == last_request); if (do_change) - dev_warn(dnbd3_device_to_dev(dev), "hung request\n"); + dev_warn(dnbd3_device_to_dev(dev), "hung request, triggering change\n"); } else { cur_request = (struct request *)123; } @@ -485,17 +456,17 @@ error: // take server with lowest rtt // if a (dis)connect is already in progress, we do nothing, this is not panic mode if (do_change && atomic_cmpxchg(&dev->connection_lock, 0, 1) == 0) { - dev_info(dnbd3_device_to_dev(dev), "server %d is faster (%lluµs vs. %lluµs)\n", -1, // XXX - (unsigned long long)best_rtt, (unsigned long long)dev->cur_rtt); + dev_info(dnbd3_device_to_dev(dev), "server %pISpc is faster (%lluµs vs. %lluµs)\n", &best_server, + (unsigned long long)best_rtt, (unsigned long long)dev->cur_server.rtt); kfree(buf); dev->better_sock = best_sock; // Take shortcut by continuing to use open connection put_task_struct(dev->thread_discover); dev->thread_discover = NULL; dnbd3_net_disconnect(dev); spin_lock_irqsave(&dev->blk_lock, irqflags); - dev->cur_server = best_server; + dev->cur_server.host = best_server; spin_unlock_irqrestore(&dev->blk_lock, irqflags); - dev->cur_rtt = best_rtt; + dev->cur_server.rtt = best_rtt; dnbd3_net_connect(dev); atomic_set(&dev->connection_lock, 0); return 0; @@ -507,6 +478,7 @@ error: best_sock = NULL; } + // Increase rtt array index pointer, low probability that it doesn't advance if (!ready || (ktime_to_us(start) & 15) != 0) turn = (turn + 1) % 4; if (turn == 2) // Set ready when we only have 2 of 4 measurements for quicker load balancing @@ -851,8 +823,9 @@ cleanup: return 0; } -static struct socket *dnbd3_connect(dnbd3_device_t *dev, dnbd3_host_t *host) +static struct socket *dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr) { + ktime_t start; int ret; struct socket *sock; #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0) @@ -873,7 +846,7 @@ static struct socket *dnbd3_connect(dnbd3_device_t *dev, dnbd3_host_t *host) timeout.tv_sec = SOCKET_TIMEOUT_CLIENT_DATA; timeout.tv_usec = 0; - if (dnbd3_sock_create(host->type, SOCK_STREAM, IPPROTO_TCP, &sock) < 0) { + if (sock_create_kern(&init_net, addr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock) < 0) { dev_err(dnbd3_device_to_dev(dev), "couldn't create socket\n"); return NULL; } @@ -886,34 +859,16 @@ static struct socket *dnbd3_connect(dnbd3_device_t *dev, dnbd3_host_t *host) sock_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, timeout_ptr, sizeof(timeout)); #endif sock->sk->sk_allocation = GFP_NOIO; - if (host->type == HOST_IP4) { - struct sockaddr_in sin; - - memset(&sin, 0, sizeof(sin)); - sin.sin_family = AF_INET; - memcpy(&(sin.sin_addr), host->addr, 4); - sin.sin_port = host->port; - ret = kernel_connect(sock, (struct sockaddr *)&sin, sizeof(sin), O_NONBLOCK); - if (ret != 0 && ret != -EINPROGRESS) { - dev_err(dnbd3_device_to_dev(dev), "connection to host failed (v4)\n"); - goto error; - } - } else { - struct sockaddr_in6 sin; - - memset(&sin, 0, sizeof(sin)); - sin.sin6_family = AF_INET6; - memcpy(&(sin.sin6_addr), host->addr, 16); - sin.sin6_port = host->port; - ret = kernel_connect(sock, (struct sockaddr *)&sin, sizeof(sin), O_NONBLOCK); - if (ret != 0 && ret != -EINPROGRESS) { - dev_err(dnbd3_device_to_dev(dev), "connection to host failed (v6)\n"); - goto error; - } + start = ktime_get_real(); + ret = kernel_connect(sock, (struct sockaddr *)addr, sizeof(*addr), O_NONBLOCK); + if (ret != 0 && ret != -EINPROGRESS) { + dev_dbg(dnbd3_device_to_dev(dev), "%pISpc connect failed (%d, blocked %dms)\n", + addr, ret, (int)ktime_ms_delta(ktime_get_real(), start)); + goto error; } if (ret != 0) { /* XXX How can we do a connect with short timeout? This is dumb */ - ktime_t start = ktime_get_real(); + start = ktime_get_real(); while (ktime_ms_delta(ktime_get_real(), start) < SOCKET_TIMEOUT_CLIENT_DATA * 1000) { struct sockaddr_storage addr; @@ -924,7 +879,8 @@ static struct socket *dnbd3_connect(dnbd3_device_t *dev, dnbd3_host_t *host) msleep(1); } if (ret < 0) { - dev_dbg(dnbd3_device_to_dev(dev), "connect timed out (%d)\n", ret); + dev_dbg(dnbd3_device_to_dev(dev), "%pISpc: connect timed out (%d, %dms)\n", + ret, (int)ktime_ms_delta(ktime_get_real(), start)); goto error; } } @@ -948,8 +904,8 @@ int dnbd3_net_connect(dnbd3_device_t *dev) goto error; } - if (dev->cur_server.host.port == 0 || dev->cur_server.host.type == 0 || dev->imgname == NULL) { - dnbd3_dev_err_host_cur(dev, "host, port or image name not set\n"); + if (dev->cur_server.host.ss_family == 0 || dev->imgname == NULL) { + dnbd3_dev_err_host_cur(dev, "connect: host or image name not set\n"); goto error; } @@ -958,18 +914,18 @@ int dnbd3_net_connect(dnbd3_device_t *dev) goto error; } - if (dev->cur_server.host.type != HOST_IP4 && dev->cur_server.host.type != HOST_IP6) { - dnbd3_dev_err_host_cur(dev, "unknown address type %d\n", (int)dev->cur_server.host.type); - goto error; - } - ASSERT(dev->thread_send == NULL); ASSERT(dev->thread_receive == NULL); ASSERT(dev->thread_discover == NULL); dnbd3_dev_dbg_host_cur(dev, "connecting ...\n"); - if (dev->better_sock == NULL) { + if (dev->better_sock != NULL) { + // Switching server, connection is already established and size request was executed + dnbd3_dev_dbg_host_cur(dev, "on-the-fly server change ...\n"); + dev->sock = dev->better_sock; + dev->better_sock = NULL; + } else { // no established connection yet from discovery thread, start new one uint64_t reported_size; dnbd3_request_t dnbd3_request; @@ -1075,11 +1031,6 @@ int dnbd3_net_connect(dnbd3_device_t *dev) dnbd3_dev_dbg_host_cur(dev, "image size: %llu\n", dev->reported_size); dev->update_available = 0; } - } else { - // Switching server, connection is already established and size request was executed - dnbd3_dev_dbg_host_cur(dev, "on-the-fly server change ...\n"); - dev->sock = dev->better_sock; - dev->better_sock = NULL; } // create required threads @@ -1158,8 +1109,7 @@ error: dev->sock = NULL; } spin_lock_irqsave(&dev->blk_lock, irqflags); - dev->cur_server.host.type = 0; - dev->cur_server.host.port = 0; + dev->cur_server.host.ss_family = 0; spin_unlock_irqrestore(&dev->blk_lock, irqflags); kfree(req1); @@ -1239,8 +1189,7 @@ int dnbd3_net_disconnect(dnbd3_device_t *dev) dev->sock = NULL; } spin_lock_irqsave(&dev->blk_lock, irqflags); - dev->cur_server.host.type = 0; - dev->cur_server.host.port = 0; + dev->cur_server.host.ss_family = 0; spin_unlock_irqrestore(&dev->blk_lock, irqflags); return 0; diff --git a/src/kernel/sysfs.c b/src/kernel/sysfs.c index 5b9a5ce..3355080 100644 --- a/src/kernel/sysfs.c +++ b/src/kernel/sysfs.c @@ -30,21 +30,12 @@ ssize_t show_cur_server_addr(char *buf, dnbd3_device_t *dev) { - if (dev->cur_server.host.type == HOST_IP4) - return MIN(snprintf(buf, PAGE_SIZE, "%pI4:%d\n", dev->cur_server.host.addr, - (int)ntohs(dev->cur_server.host.port)), - PAGE_SIZE); - else if (dev->cur_server.host.type == HOST_IP6) - return MIN(snprintf(buf, PAGE_SIZE, "[%pI6]:%d\n", dev->cur_server.host.addr, - (int)ntohs(dev->cur_server.host.port)), - PAGE_SIZE); - *buf = '\0'; - return 0; + return MIN(snprintf(buf, PAGE_SIZE, "%pISpc\n", &dev->cur_server.host), PAGE_SIZE); } ssize_t show_cur_server_rtt(char *buf, dnbd3_device_t *dev) { - return MIN(snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)dev->cur_rtt), PAGE_SIZE); + return MIN(snprintf(buf, PAGE_SIZE, "%lu\n", dev->cur_server.rtt), PAGE_SIZE); } ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev) @@ -52,7 +43,7 @@ ssize_t show_alt_server_num(char *buf, dnbd3_device_t *dev) int i, num = 0; for (i = 0; i < NUMBER_SERVERS; ++i) { - if (dev->alt_servers[i].host.type) + if (dev->alt_servers[i].host.ss_family != 0) ++num; } return MIN(snprintf(buf, PAGE_SIZE, "%d\n", num), PAGE_SIZE); @@ -63,26 +54,16 @@ ssize_t show_alt_servers(char *buf, dnbd3_device_t *dev) int i, size = PAGE_SIZE, ret; for (i = 0; i < NUMBER_SERVERS; ++i) { - if (dev->alt_servers[i].host.type == HOST_IP4) - ret = MIN(snprintf(buf, size, "%pI4:%d,%llu,%d\n", dev->alt_servers[i].host.addr, - (int)ntohs(dev->alt_servers[i].host.port), - (unsigned long long)((dev->alt_servers[i].rtts[0] + - dev->alt_servers[i].rtts[1] + - dev->alt_servers[i].rtts[2] + - dev->alt_servers[i].rtts[3]) / 4), - (int)dev->alt_servers[i].failures), - size); - else if (dev->alt_servers[i].host.type == HOST_IP6) - ret = MIN(snprintf(buf, size, "[%pI6]:%d,%llu,%d\n", dev->alt_servers[i].host.addr, - (int)ntohs(dev->alt_servers[i].host.port), - (unsigned long long)((dev->alt_servers[i].rtts[0] + - dev->alt_servers[i].rtts[1] + - dev->alt_servers[i].rtts[2] + - dev->alt_servers[i].rtts[3]) / 4), - (int)dev->alt_servers[i].failures), - size); - else + if (dev->alt_servers[i].host.ss_family == 0) continue; + + ret = MIN(snprintf(buf, size, "%pISpc,%llu,%d\n", &dev->alt_servers[i].host, + (unsigned long long)((dev->alt_servers[i].rtts[0] + + dev->alt_servers[i].rtts[1] + + dev->alt_servers[i].rtts[2] + + dev->alt_servers[i].rtts[3]) / 4), + (int)dev->alt_servers[i].failures), + size); size -= ret; buf += ret; if (size <= 0) { diff --git a/src/kernel/sysfs.h b/src/kernel/sysfs.h index ec6c261..b744c8b 100644 --- a/src/kernel/sysfs.h +++ b/src/kernel/sysfs.h @@ -36,8 +36,8 @@ typedef struct { typedef struct { struct attribute attr; - ssize_t (*show)(char *buf, dnbd3_server_t *dev); - ssize_t (*store)(const char *buf, size_t len, dnbd3_server_t *dev); + ssize_t (*show)(char *buf, dnbd3_alt_server_t *dev); + ssize_t (*store)(const char *buf, size_t len, dnbd3_alt_server_t *dev); } server_attr_t; #endif /* SYSFS_H_ */ |