From a231e6283e1c79a33d6f3bdf786832f10e355d0c Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Fri, 17 Jul 2020 18:19:58 +0200 Subject: [KERNEL] Trying to get locking under control Current code doesn't pay much attention to properly locking when accessing structures that are being used in multiple workers/threads. Try to fix this, slowly. --- src/kernel/net.c | 86 ++++++++++++++++++++++---------------------------------- 1 file changed, 33 insertions(+), 53 deletions(-) (limited to 'src/kernel/net.c') diff --git a/src/kernel/net.c b/src/kernel/net.c index 6801659..1dd6725 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -35,12 +35,10 @@ sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, \ type, proto, sock) -#define dnbd3_sock_release(sock) \ - do { \ - struct socket *s = (sock)->sock; \ - sock->sock = NULL; \ - sock_release(s); \ - }while (0) +static void dnbd3_sock_release_nolock(struct dnbd3_sock *sock) { + sock_release(sock->sock); + sock->sock = NULL; +} #define dnbd3_is_sock_alive(s) ((s).sock && (s).server) @@ -49,7 +47,7 @@ static int __dnbd3_socket_connect(struct dnbd3_sock *sock, struct dnbd3_server *server); static int dnbd3_socket_connect(struct dnbd3_sock *sock, struct dnbd3_server *server); -static int dnbd3_socket_disconnect(struct dnbd3_sock *sock); +static void dnbd3_socket_disconnect(struct dnbd3_sock *sock); /* * Timer and workers @@ -160,11 +158,6 @@ static void dnbd3_receive_worker(struct work_struct *work) error: if (result == 0) { info_sock(sock, "result is 0, socket seems to be down"); - /* be sure nobody is sending on the socket */ - while (unlikely(sock->pending)) { - msleep(50); - } - dnbd3_sock_release(sock); break; } else if (result < 0) { /* discovery takes care of to many failures */ @@ -187,12 +180,12 @@ static void dnbd3_keepalive_worker(struct work_struct *work) { struct dnbd3_sock *sock; sock = container_of(work, struct dnbd3_sock, keepalive_worker); - if (dnbd3_is_sock_alive(*sock)) { - debug_sock(sock, "starting keepalive worker"); - if (mutex_trylock(&sock->tx_lock)) { + if (mutex_trylock(&sock->tx_lock)) { + if (dnbd3_is_sock_alive(*sock)) { + debug_sock(sock, "starting keepalive worker"); dnbd3_send_request_cmd(sock, CMD_KEEPALIVE); - mutex_unlock(&sock->tx_lock); } + mutex_unlock(&sock->tx_lock); } } @@ -321,12 +314,9 @@ static int dnbd3_adjust_connections(struct dnbd3_device *dev) { alive = 0; for (i = 0; i < dev->number_connections; i++) { - if (plan[i] != dev->socks[i].server || - !dnbd3_is_sock_alive(dev->socks[i])) { - - if (dnbd3_is_sock_alive(dev->socks[i])) { - dnbd3_socket_disconnect(&dev->socks[i]); - } + if (plan[i] != dev->socks[i].server + || !dnbd3_is_sock_alive(dev->socks[i])) { + dnbd3_socket_disconnect(&dev->socks[i]); if (!dnbd3_socket_connect(&dev->socks[i], plan[i])) { alive++; @@ -414,7 +404,7 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, }; result = __dnbd3_socket_connect(&sock, server); - if (result) { + if (result != 0) { error_sock(&sock, "socket connect failed in rtt measurement"); goto error; } @@ -658,13 +648,14 @@ static int __dnbd3_socket_connect(struct dnbd3_sock *sock, return 0; error: if (sock->sock) { - dnbd3_sock_release(sock); + dnbd3_sock_release_nolock(sock); } return result; } /** * dnbd3_socket_connect - connect a socket to a server + * !! HOLD TX_LOCK WHEN CALLING !! * @sock: the socket * @server: the server to connect * @@ -745,7 +736,7 @@ error: if (sock->sock) { kernel_sock_shutdown(sock->sock, SHUT_RDWR); cancel_work_sync(&sock->receive_worker); - dnbd3_sock_release(sock); + dnbd3_sock_release_nolock(sock); } return result; } @@ -753,14 +744,15 @@ error: /** * dnbd3_socket_disconnect - disconnect a socket + * !! HOLD TX_LOCK WHEN CALLING !! * @sock: the socket to disconnect */ -static int dnbd3_socket_disconnect(struct dnbd3_sock *sock) +static void dnbd3_socket_disconnect(struct dnbd3_sock *sock) { + if (!sock->sock) + return; cancel_work_sync(&sock->keepalive_worker); - debug_sock(sock, "socket disconnect"); - /* * Important sequence to shut down socket * 1. kernel_sock_shutdown @@ -772,25 +764,18 @@ static int dnbd3_socket_disconnect(struct dnbd3_sock *sock) * 3. sock_release * release the socket and set to NULL */ - if (sock->sock) { - kernel_sock_shutdown(sock->sock, SHUT_RDWR); - } - + kernel_sock_shutdown(sock->sock, SHUT_RDWR); cancel_work_sync(&sock->receive_worker); - dndb3_reque_busy_requests(sock); - - if (sock->sock) { - dnbd3_sock_release(sock); - } - + dnbd3_sock_release_nolock(sock); sock->server = NULL; - return 0; + return; } /** * dnbd3_net_connect - connect device + * Hold the device_lock when calling this! * @dev: the device to connect * * dnbd3_device.alt_servers[0] must be set @@ -799,6 +784,10 @@ int dnbd3_net_connect(struct dnbd3_device *dev) { int i, result; debug_dev(dev, "connecting to server"); + + if (dev->alt_servers[0].host.type == 0) { + return -ENONET; + } dev->socks = kmalloc(sizeof(struct dnbd3_sock) * dev->number_connections, GFP_KERNEL); if (!dev->socks) { @@ -815,17 +804,12 @@ int dnbd3_net_connect(struct dnbd3_device *dev) debug_dev(dev, "set nr hw queues to %d", dev->number_connections); blk_mq_update_nr_hw_queues(&dev->tag_set, dev->number_connections); - if (dev->alt_servers[0].host.type == 0) { - return -ENONET; - } - result = dnbd3_adjust_connections(dev); if (result) { error_dev(dev, "failed to connect to initial server"); dnbd3_net_disconnect(dev); return -ENOENT; } - dev->connected = true; debug_dev(dev, "connected, starting workers"); INIT_WORK(&dev->discovery_worker, dnbd3_discovery_worker); @@ -839,6 +823,7 @@ int dnbd3_net_connect(struct dnbd3_device *dev) /** * dnbd3_net_disconnect - disconnect device + * Hold the device_lock when calling this! * @dev: the device to disconnect */ int dnbd3_net_disconnect(struct dnbd3_device *dev) @@ -851,20 +836,15 @@ int dnbd3_net_disconnect(struct dnbd3_device *dev) return 0; } - dev->connected = false; del_timer_sync(&dev->timer); /* be sure it does not recover while disconnecting */ cancel_work_sync(&dev->discovery_worker); cancel_work_sync(&dev->panic_worker); for (i = 0; i < dev->number_connections; i++) { - if (dev->socks[i].sock) { - mutex_lock(&dev->socks[i].tx_lock); - if (dnbd3_socket_disconnect(&dev->socks[i])) { - result = -EIO; - } - mutex_unlock(&dev->socks[i].tx_lock); - mutex_destroy(&dev->socks[i].tx_lock); - } + mutex_lock(&dev->socks[i].tx_lock); + dnbd3_socket_disconnect(&dev->socks[i]); + mutex_unlock(&dev->socks[i].tx_lock); + mutex_destroy(&dev->socks[i].tx_lock); } kfree(dev->socks); dev->socks = NULL; -- cgit v1.2.3-55-g7522