summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Rettberg2020-07-17 18:19:58 +0200
committerSimon Rettberg2020-07-17 18:19:58 +0200
commita231e6283e1c79a33d6f3bdf786832f10e355d0c (patch)
treea5be40fa9fe93230d06de56ccac3c378f80c3833
parent[KERNEL] After nullptr check, MAYBE RETURN!? (diff)
downloaddnbd3-ng-a231e6283e1c79a33d6f3bdf786832f10e355d0c.tar.gz
dnbd3-ng-a231e6283e1c79a33d6f3bdf786832f10e355d0c.tar.xz
dnbd3-ng-a231e6283e1c79a33d6f3bdf786832f10e355d0c.zip
[KERNEL] Trying to get locking under control
Current code doesn't pay much attention to properly locking when accessing structures that are being used in multiple workers/threads. Try to fix this, slowly.
-rw-r--r--src/kernel/core.c12
-rw-r--r--src/kernel/dnbd3.h1
-rw-r--r--src/kernel/mq.c55
-rw-r--r--src/kernel/net-txrx.c2
-rw-r--r--src/kernel/net.c86
5 files changed, 71 insertions, 85 deletions
diff --git a/src/kernel/core.c b/src/kernel/core.c
index 35ea1ef..8da2b39 100644
--- a/src/kernel/core.c
+++ b/src/kernel/core.c
@@ -124,6 +124,7 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode,
switch (cmd) {
case IOCTL_OPEN:
debug_dev(dev, "ioctl open");
+ mutex_lock(&dev->device_lock);
if (dev->imgname != NULL) {
result = -EBUSY;
} else if (imgname == NULL) {
@@ -131,7 +132,6 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode,
} else if (msg == NULL) {
result = -EINVAL;
} else {
- mutex_lock(&dev->device_lock);
if (sizeof(msg->host) != sizeof(dnbd3_host_t)) {
warn_dev(dev, "odd size bug#1 triggered in ioctl");
}
@@ -154,7 +154,6 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode,
} else {
dev->number_connections = msg->number_connections;
}
- mutex_unlock(&dev->device_lock);
result = dnbd3_net_connect(dev);
if (result) {
@@ -165,10 +164,12 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode,
}
imgname = NULL;
}
+ mutex_unlock(&dev->device_lock);
break;
case IOCTL_CLOSE:
debug_dev(dev, "ioctl close");
+ mutex_lock(&dev->device_lock);
result = dnbd3_net_disconnect(dev);
set_capacity(dev->disk, 0);
if (dev->imgname) {
@@ -177,6 +178,7 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode,
}
dev->rid = 0;
dev->reported_size = 0;
+ mutex_unlock(&dev->device_lock);
break;
case IOCTL_SWITCH:
@@ -187,6 +189,7 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode,
case IOCTL_ADD_SRV:
case IOCTL_REM_SRV:
debug_dev(dev, "ioctl add/rem srv");
+ mutex_lock(&dev->device_lock);
if (dev->imgname == NULL) {
result = -ENOENT;
} else if (dev->new_servers_num >= NUMBER_SERVERS) {
@@ -194,16 +197,15 @@ static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode,
} else if (msg == NULL) {
result = -EINVAL;
} else {
- mutex_lock(&dev->device_lock);
memcpy(&dev->new_servers[dev->new_servers_num].host,
&msg->host, sizeof(msg->host));
/* 0 = ADD, 1 = REM */
dev->new_servers[dev->new_servers_num].failures =
(cmd == IOCTL_ADD_SRV ? 0 : 1);
dev->new_servers_num++;
- mutex_unlock(&dev->device_lock);
result = 0;
}
+ mutex_unlock(&dev->device_lock);
break;
case BLKFLSBUF:
@@ -417,7 +419,7 @@ static void dnbd3_dev_remove(struct dnbd3_device *dev)
struct gendisk *disk = dev->disk;
struct request_queue *q;
- if (dev->connected) {
+ if (dev->socks) {
dnbd3_net_disconnect(dev);
}
diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h
index f8d4bae..87145f8 100644
--- a/src/kernel/dnbd3.h
+++ b/src/kernel/dnbd3.h
@@ -138,7 +138,6 @@ struct dnbd3_device {
struct mutex device_lock;
- bool connected;
uint8_t number_connections;
struct dnbd3_sock *socks;
char *imgname;
diff --git a/src/kernel/mq.c b/src/kernel/mq.c
index f9e1bd2..cc710b2 100644
--- a/src/kernel/mq.c
+++ b/src/kernel/mq.c
@@ -89,14 +89,14 @@ static void dnbd3_busy_iter_requeue(struct request *req, void *priv, bool arg)
/* request is in sending, so we will not requeue */
return;
}
- if (!cmd->dnbd3->connected) {
+ if (!cmd->dnbd3->socks) {
dnbd3_end_cmd(cmd, BLK_STS_IOERR);
mutex_unlock(&cmd->lock);
return;
}
debug_sock(sock, "requeue busy request %p", req);
- dnbd3_requeue_cmd(cmd);
mutex_unlock(&cmd->lock);
+ dnbd3_requeue_cmd(cmd);
}
/**
@@ -116,16 +116,27 @@ void dndb3_reque_busy_requests(struct dnbd3_sock *sock)
static void dnbd3_wait_for_sockets(struct dnbd3_device *dev) {
int i;
bool wait = true;
- while (wait && dev->connected) {
+ bool msg = false;
+ mutex_lock(&dev->device_lock);
+ while (dev->socks) {
for (i = 0; i < dev->number_connections; i++) {
if (dnbd3_is_sock_alive(dev->socks[i])) {
wait = false;
}
}
- if (wait && dev->connected) {
- debug("no socket alive sleeping");
- msleep(100);
+ if (!wait)
+ break;
+ mutex_unlock(&dev->device_lock);
+ if (!msg) {
+ debug_dev(dev, "no socket alive sleeping");
+ msg = true;
}
+ msleep(100);
+ mutex_lock(&dev->device_lock);
+ }
+ mutex_unlock(&dev->device_lock);
+ if (msg) {
+ debug_dev(dev, "done sleeping");
}
}
@@ -136,7 +147,7 @@ static void dnbd3_wait_for_sockets(struct dnbd3_device *dev) {
void dnbd3_requeue_cmd(struct dnbd3_cmd *cmd)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
- dnbd3_wait_for_sockets(cmd->dnbd3);
+ //dnbd3_wait_for_sockets(cmd->dnbd3);
if (!cmd->requed/* && blk_queued_rq(req)*/) {
cmd->requed = true;
@@ -158,6 +169,7 @@ void dnbd3_end_cmd(struct dnbd3_cmd *cmd, blk_status_t error)
/**
* dnbd3_handle_cmd - handles a mq command
+ * !! cmd->lock is held when being called !!
* @cmd: the cmd to send
* @index: the index of the queue
*/
@@ -165,25 +177,19 @@ static int dnbd3_handle_cmd(struct dnbd3_cmd *cmd, int index)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
struct dnbd3_device *dev = cmd->dnbd3;
- struct dnbd3_sock *sock = &dev->socks[index];
+ struct dnbd3_sock *sock;
int ret = -1;
// debug_dev(dev, "handle request at position %lu, size %d, index %d",
// blk_rq_pos(req), blk_rq_bytes(req), index);
- if (!dev->socks || !dnbd3_is_sock_alive(*sock)) {
-
- if (dev->connected) {
- info_dev(dev, "reset request to new socket");
- dnbd3_requeue_cmd(cmd);
- return 0;
- } else {
- error_dev(dev, "ending request device not connected");
- dnbd3_end_cmd(cmd, BLK_STS_IOERR);
- return -EIO;
- }
+ if (!dev->socks || !dnbd3_is_sock_alive(dev->socks[index])) {
+ error_dev(dev, "ending request device not connected");
+ dnbd3_end_cmd(cmd, BLK_STS_IOERR);
+ return -EIO;
}
+ sock = &dev->socks[index];
mutex_lock(&sock->tx_lock);
if (unlikely(!sock->sock)) {
@@ -270,16 +276,15 @@ static enum blk_eh_timer_return dnbd3_xmit_timeout(struct request *req,
return BLK_EH_RESET_TIMER;
}
- if (dev->connected) {
+ if (dev->socks) {
info_dev(dev, "reset request to new socket");
+ mutex_unlock(&cmd->lock);
dnbd3_requeue_cmd(cmd);
+ } else {
+ error_dev(dev, "connection timed out");
+ dnbd3_end_cmd(cmd, BLK_STS_TIMEOUT);
mutex_unlock(&cmd->lock);
- return BLK_EH_DONE;
}
-
- error_dev(dev, "connection timed out");
- dnbd3_end_cmd(cmd, BLK_STS_TIMEOUT);
- mutex_unlock(&cmd->lock);
return BLK_EH_DONE;
}
diff --git a/src/kernel/net-txrx.c b/src/kernel/net-txrx.c
index c839561..51c684d 100644
--- a/src/kernel/net-txrx.c
+++ b/src/kernel/net-txrx.c
@@ -345,8 +345,8 @@ int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock,
error_sock(sock, "unexpected reply (%d) %p", tag, req);
if (req) {
cmd = blk_mq_rq_to_pdu(req);
- mutex_lock(&cmd->lock);
debug_sock(sock, "requeue request");
+ mutex_lock(&cmd->lock);
dnbd3_requeue_cmd(cmd);
mutex_unlock(&cmd->lock);
}
diff --git a/src/kernel/net.c b/src/kernel/net.c
index 6801659..1dd6725 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -35,12 +35,10 @@
sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, \
type, proto, sock)
-#define dnbd3_sock_release(sock) \
- do { \
- struct socket *s = (sock)->sock; \
- sock->sock = NULL; \
- sock_release(s); \
- }while (0)
+static void dnbd3_sock_release_nolock(struct dnbd3_sock *sock) {
+ sock_release(sock->sock);
+ sock->sock = NULL;
+}
#define dnbd3_is_sock_alive(s) ((s).sock && (s).server)
@@ -49,7 +47,7 @@ static int __dnbd3_socket_connect(struct dnbd3_sock *sock,
struct dnbd3_server *server);
static int dnbd3_socket_connect(struct dnbd3_sock *sock,
struct dnbd3_server *server);
-static int dnbd3_socket_disconnect(struct dnbd3_sock *sock);
+static void dnbd3_socket_disconnect(struct dnbd3_sock *sock);
/*
* Timer and workers
@@ -160,11 +158,6 @@ static void dnbd3_receive_worker(struct work_struct *work)
error:
if (result == 0) {
info_sock(sock, "result is 0, socket seems to be down");
- /* be sure nobody is sending on the socket */
- while (unlikely(sock->pending)) {
- msleep(50);
- }
- dnbd3_sock_release(sock);
break;
} else if (result < 0) {
/* discovery takes care of to many failures */
@@ -187,12 +180,12 @@ static void dnbd3_keepalive_worker(struct work_struct *work)
{
struct dnbd3_sock *sock;
sock = container_of(work, struct dnbd3_sock, keepalive_worker);
- if (dnbd3_is_sock_alive(*sock)) {
- debug_sock(sock, "starting keepalive worker");
- if (mutex_trylock(&sock->tx_lock)) {
+ if (mutex_trylock(&sock->tx_lock)) {
+ if (dnbd3_is_sock_alive(*sock)) {
+ debug_sock(sock, "starting keepalive worker");
dnbd3_send_request_cmd(sock, CMD_KEEPALIVE);
- mutex_unlock(&sock->tx_lock);
}
+ mutex_unlock(&sock->tx_lock);
}
}
@@ -321,12 +314,9 @@ static int dnbd3_adjust_connections(struct dnbd3_device *dev) {
alive = 0;
for (i = 0; i < dev->number_connections; i++) {
- if (plan[i] != dev->socks[i].server ||
- !dnbd3_is_sock_alive(dev->socks[i])) {
-
- if (dnbd3_is_sock_alive(dev->socks[i])) {
- dnbd3_socket_disconnect(&dev->socks[i]);
- }
+ if (plan[i] != dev->socks[i].server
+ || !dnbd3_is_sock_alive(dev->socks[i])) {
+ dnbd3_socket_disconnect(&dev->socks[i]);
if (!dnbd3_socket_connect(&dev->socks[i],
plan[i])) {
alive++;
@@ -414,7 +404,7 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev,
};
result = __dnbd3_socket_connect(&sock, server);
- if (result) {
+ if (result != 0) {
error_sock(&sock, "socket connect failed in rtt measurement");
goto error;
}
@@ -658,13 +648,14 @@ static int __dnbd3_socket_connect(struct dnbd3_sock *sock,
return 0;
error:
if (sock->sock) {
- dnbd3_sock_release(sock);
+ dnbd3_sock_release_nolock(sock);
}
return result;
}
/**
* dnbd3_socket_connect - connect a socket to a server
+ * !! HOLD TX_LOCK WHEN CALLING !!
* @sock: the socket
* @server: the server to connect
*
@@ -745,7 +736,7 @@ error:
if (sock->sock) {
kernel_sock_shutdown(sock->sock, SHUT_RDWR);
cancel_work_sync(&sock->receive_worker);
- dnbd3_sock_release(sock);
+ dnbd3_sock_release_nolock(sock);
}
return result;
}
@@ -753,14 +744,15 @@ error:
/**
* dnbd3_socket_disconnect - disconnect a socket
+ * !! HOLD TX_LOCK WHEN CALLING !!
* @sock: the socket to disconnect
*/
-static int dnbd3_socket_disconnect(struct dnbd3_sock *sock)
+static void dnbd3_socket_disconnect(struct dnbd3_sock *sock)
{
+ if (!sock->sock)
+ return;
cancel_work_sync(&sock->keepalive_worker);
-
debug_sock(sock, "socket disconnect");
-
/*
* Important sequence to shut down socket
* 1. kernel_sock_shutdown
@@ -772,25 +764,18 @@ static int dnbd3_socket_disconnect(struct dnbd3_sock *sock)
* 3. sock_release
* release the socket and set to NULL
*/
- if (sock->sock) {
- kernel_sock_shutdown(sock->sock, SHUT_RDWR);
- }
-
+ kernel_sock_shutdown(sock->sock, SHUT_RDWR);
cancel_work_sync(&sock->receive_worker);
-
dndb3_reque_busy_requests(sock);
-
- if (sock->sock) {
- dnbd3_sock_release(sock);
- }
-
+ dnbd3_sock_release_nolock(sock);
sock->server = NULL;
- return 0;
+ return;
}
/**
* dnbd3_net_connect - connect device
+ * Hold the device_lock when calling this!
* @dev: the device to connect
*
* dnbd3_device.alt_servers[0] must be set
@@ -799,6 +784,10 @@ int dnbd3_net_connect(struct dnbd3_device *dev)
{
int i, result;
debug_dev(dev, "connecting to server");
+
+ if (dev->alt_servers[0].host.type == 0) {
+ return -ENONET;
+ }
dev->socks = kmalloc(sizeof(struct dnbd3_sock) *
dev->number_connections, GFP_KERNEL);
if (!dev->socks) {
@@ -815,17 +804,12 @@ int dnbd3_net_connect(struct dnbd3_device *dev)
debug_dev(dev, "set nr hw queues to %d", dev->number_connections);
blk_mq_update_nr_hw_queues(&dev->tag_set, dev->number_connections);
- if (dev->alt_servers[0].host.type == 0) {
- return -ENONET;
- }
-
result = dnbd3_adjust_connections(dev);
if (result) {
error_dev(dev, "failed to connect to initial server");
dnbd3_net_disconnect(dev);
return -ENOENT;
}
- dev->connected = true;
debug_dev(dev, "connected, starting workers");
INIT_WORK(&dev->discovery_worker, dnbd3_discovery_worker);
@@ -839,6 +823,7 @@ int dnbd3_net_connect(struct dnbd3_device *dev)
/**
* dnbd3_net_disconnect - disconnect device
+ * Hold the device_lock when calling this!
* @dev: the device to disconnect
*/
int dnbd3_net_disconnect(struct dnbd3_device *dev)
@@ -851,20 +836,15 @@ int dnbd3_net_disconnect(struct dnbd3_device *dev)
return 0;
}
- dev->connected = false;
del_timer_sync(&dev->timer);
/* be sure it does not recover while disconnecting */
cancel_work_sync(&dev->discovery_worker);
cancel_work_sync(&dev->panic_worker);
for (i = 0; i < dev->number_connections; i++) {
- if (dev->socks[i].sock) {
- mutex_lock(&dev->socks[i].tx_lock);
- if (dnbd3_socket_disconnect(&dev->socks[i])) {
- result = -EIO;
- }
- mutex_unlock(&dev->socks[i].tx_lock);
- mutex_destroy(&dev->socks[i].tx_lock);
- }
+ mutex_lock(&dev->socks[i].tx_lock);
+ dnbd3_socket_disconnect(&dev->socks[i]);
+ mutex_unlock(&dev->socks[i].tx_lock);
+ mutex_destroy(&dev->socks[i].tx_lock);
}
kfree(dev->socks);
dev->socks = NULL;