summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Henderson2021-12-23 16:56:01 +0100
committerRichard Henderson2021-12-23 16:56:01 +0100
commit1bd88c4542e97f49955c142f8dc04dd32df9e91f (patch)
treedb76f6a4ac0ebefcf14e4e7e189f1ec34b3c16e6
parentMerge tag 'pull-block-2021-12-22' of https://gitlab.com/hreitz/qemu into staging (diff)
parentiotests: add nbd-reconnect-on-open test (diff)
downloadqemu-1bd88c4542e97f49955c142f8dc04dd32df9e91f.tar.gz
qemu-1bd88c4542e97f49955c142f8dc04dd32df9e91f.tar.xz
qemu-1bd88c4542e97f49955c142f8dc04dd32df9e91f.zip
Merge tag 'pull-nbd-2021-12-22-v2' of https://src.openvz.org/scm/~vsementsov/qemu into staging
nbd: reconnect-on-open feature v2: simple fix for mypy and pylint complains on patch 04 # gpg: Signature made Thu 23 Dec 2021 12:45:20 AM PST # gpg: using RSA key 8B9C26CDB2FD147C880E86A1561F24C1F19F79FB # gpg: Good signature from "Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 8B9C 26CD B2FD 147C 880E 86A1 561F 24C1 F19F 79FB * tag 'pull-nbd-2021-12-22-v2' of https://src.openvz.org/scm/~vsementsov/qemu: iotests: add nbd-reconnect-on-open test iotests.py: add qemu_io_popen() iotests.py: add and use qemu_io_wrap_args() iotests.py: add qemu_tool_popen() nbd/client-connection: improve error message of cancelled attempt nbd/client-connection: nbd_co_establish_connection(): return real error nbd: allow reconnect on open, with corresponding new options Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--block/nbd.c45
-rw-r--r--nbd/client-connection.c57
-rw-r--r--qapi/block-core.json9
-rw-r--r--tests/qemu-iotests/iotests.py37
-rwxr-xr-xtests/qemu-iotests/tests/nbd-reconnect-on-open71
-rw-r--r--tests/qemu-iotests/tests/nbd-reconnect-on-open.out11
6 files changed, 199 insertions, 31 deletions
diff --git a/block/nbd.c b/block/nbd.c
index 5ef462db1b..63dbfa807d 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -80,6 +80,7 @@ typedef struct BDRVNBDState {
NBDClientState state;
QEMUTimer *reconnect_delay_timer;
+ QEMUTimer *open_timer;
NBDClientRequest requests[MAX_NBD_REQUESTS];
NBDReply reply;
@@ -87,6 +88,7 @@ typedef struct BDRVNBDState {
/* Connection parameters */
uint32_t reconnect_delay;
+ uint32_t open_timeout;
SocketAddress *saddr;
char *export, *tlscredsid;
QCryptoTLSCreds *tlscreds;
@@ -218,6 +220,32 @@ static void nbd_teardown_connection(BlockDriverState *bs)
s->state = NBD_CLIENT_QUIT;
}
+static void open_timer_del(BDRVNBDState *s)
+{
+ if (s->open_timer) {
+ timer_free(s->open_timer);
+ s->open_timer = NULL;
+ }
+}
+
+static void open_timer_cb(void *opaque)
+{
+ BDRVNBDState *s = opaque;
+
+ nbd_co_establish_connection_cancel(s->conn);
+ open_timer_del(s);
+}
+
+static void open_timer_init(BDRVNBDState *s, uint64_t expire_time_ns)
+{
+ assert(!s->open_timer);
+ s->open_timer = aio_timer_new(bdrv_get_aio_context(s->bs),
+ QEMU_CLOCK_REALTIME,
+ SCALE_NS,
+ open_timer_cb, s);
+ timer_mod(s->open_timer, expire_time_ns);
+}
+
static bool nbd_client_connecting(BDRVNBDState *s)
{
NBDClientState state = qatomic_load_acquire(&s->state);
@@ -1742,6 +1770,15 @@ static QemuOptsList nbd_runtime_opts = {
"future requests before a successful reconnect will "
"immediately fail. Default 0",
},
+ {
+ .name = "open-timeout",
+ .type = QEMU_OPT_NUMBER,
+ .help = "In seconds. If zero, the nbd driver tries the connection "
+ "only once, and fails to open if the connection fails. "
+ "If non-zero, the nbd driver will repeat connection "
+ "attempts until successful or until @open-timeout seconds "
+ "have elapsed. Default 0",
+ },
{ /* end of list */ }
},
};
@@ -1797,6 +1834,7 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
}
s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0);
+ s->open_timeout = qemu_opt_get_number(opts, "open-timeout", 0);
ret = 0;
@@ -1828,7 +1866,12 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
s->conn = nbd_client_connection_new(s->saddr, true, s->export,
s->x_dirty_bitmap, s->tlscreds);
- /* TODO: Configurable retry-until-timeout behaviour. */
+ if (s->open_timeout) {
+ nbd_client_connection_enable_retry(s->conn);
+ open_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+ s->open_timeout * NANOSECONDS_PER_SECOND);
+ }
+
s->state = NBD_CLIENT_CONNECTING_WAIT;
ret = nbd_do_establish_connection(bs, errp);
if (ret < 0) {
diff --git a/nbd/client-connection.c b/nbd/client-connection.c
index 695f855754..2bda42641d 100644
--- a/nbd/client-connection.c
+++ b/nbd/client-connection.c
@@ -39,16 +39,18 @@ struct NBDClientConnection {
QemuMutex mutex;
+ NBDExportInfo updated_info;
/*
- * @sioc and @err represent a connection attempt. While running
- * is true, they are only used by the connection thread, and mutex
- * locking is not needed. Once the thread finishes,
- * nbd_co_establish_connection then steals these pointers while
- * under the mutex.
+ * @sioc represents a successful result. While thread is running, @sioc is
+ * used only by thread and not protected by mutex. When thread is not
+ * running, @sioc is stolen by nbd_co_establish_connection() under mutex.
*/
- NBDExportInfo updated_info;
QIOChannelSocket *sioc;
QIOChannel *ioc;
+ /*
+ * @err represents previous attempt. It may be copied by
+ * nbd_co_establish_connection() when it reports failure.
+ */
Error *err;
/* All further fields are accessed only under mutex */
@@ -170,18 +172,18 @@ static void *connect_thread_func(void *opaque)
qemu_mutex_lock(&conn->mutex);
while (!conn->detached) {
+ Error *local_err = NULL;
+
assert(!conn->sioc);
conn->sioc = qio_channel_socket_new();
qemu_mutex_unlock(&conn->mutex);
- error_free(conn->err);
- conn->err = NULL;
conn->updated_info = conn->initial_info;
ret = nbd_connect(conn->sioc, conn->saddr,
conn->do_negotiation ? &conn->updated_info : NULL,
- conn->tlscreds, &conn->ioc, &conn->err);
+ conn->tlscreds, &conn->ioc, &local_err);
/*
* conn->updated_info will finally be returned to the user. Clear the
@@ -194,6 +196,10 @@ static void *connect_thread_func(void *opaque)
qemu_mutex_lock(&conn->mutex);
+ error_free(conn->err);
+ conn->err = NULL;
+ error_propagate(&conn->err, local_err);
+
if (ret < 0) {
object_unref(OBJECT(conn->sioc));
conn->sioc = NULL;
@@ -311,14 +317,17 @@ nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info,
}
conn->running = true;
- error_free(conn->err);
- conn->err = NULL;
qemu_thread_create(&thread, "nbd-connect",
connect_thread_func, conn, QEMU_THREAD_DETACHED);
}
if (!blocking) {
- error_setg(errp, "No connection at the moment");
+ if (conn->err) {
+ error_propagate(errp, error_copy(conn->err));
+ } else {
+ error_setg(errp, "No connection at the moment");
+ }
+
return NULL;
}
@@ -339,14 +348,30 @@ nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info,
* attempt as failed, but leave the connection thread running,
* to reuse it for the next connection attempt.
*/
- error_setg(errp, "Connection attempt cancelled by other operation");
+ if (conn->err) {
+ error_propagate(errp, error_copy(conn->err));
+ } else {
+ /*
+ * The only possible case here is cancelling by open_timer
+ * during nbd_open(). So, the error message is for that case.
+ * If we have more use cases, we can refactor
+ * nbd_co_establish_connection_cancel() to take an additional
+ * parameter cancel_reason, that would be passed than to the
+ * caller of cancelled nbd_co_establish_connection().
+ */
+ error_setg(errp, "Connection attempt cancelled by timeout");
+ }
+
return NULL;
} else {
- error_propagate(errp, conn->err);
- conn->err = NULL;
- if (!conn->sioc) {
+ /* Thread finished. There must be either error or sioc */
+ assert(!conn->err != !conn->sioc);
+
+ if (conn->err) {
+ error_propagate(errp, error_copy(conn->err));
return NULL;
}
+
if (conn->do_negotiation) {
memcpy(info, &conn->updated_info, sizeof(*info));
if (conn->ioc) {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1d3dd9cb48..bd0b285245 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4096,6 +4096,12 @@
# future requests before a successful reconnect will
# immediately fail. Default 0 (Since 4.2)
#
+# @open-timeout: In seconds. If zero, the nbd driver tries the connection
+# only once, and fails to open if the connection fails.
+# If non-zero, the nbd driver will repeat connection attempts
+# until successful or until @open-timeout seconds have elapsed.
+# Default 0 (Since 7.0)
+#
# Features:
# @unstable: Member @x-dirty-bitmap is experimental.
#
@@ -4106,7 +4112,8 @@
'*export': 'str',
'*tls-creds': 'str',
'*x-dirty-bitmap': { 'type': 'str', 'features': [ 'unstable' ] },
- '*reconnect-delay': 'uint32' } }
+ '*reconnect-delay': 'uint32',
+ '*open-timeout': 'uint32' } }
##
# @BlockdevOptionsRaw:
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 83bfedb902..1e2f2391d1 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -138,14 +138,22 @@ def unarchive_sample_image(sample, fname):
shutil.copyfileobj(f_in, f_out)
+def qemu_tool_popen(args: Sequence[str],
+ connect_stderr: bool = True) -> 'subprocess.Popen[str]':
+ stderr = subprocess.STDOUT if connect_stderr else None
+ # pylint: disable=consider-using-with
+ return subprocess.Popen(args,
+ stdout=subprocess.PIPE,
+ stderr=stderr,
+ universal_newlines=True)
+
+
def qemu_tool_pipe_and_status(tool: str, args: Sequence[str],
connect_stderr: bool = True) -> Tuple[str, int]:
"""
Run a tool and return both its output and its exit code
"""
- stderr = subprocess.STDOUT if connect_stderr else None
- with subprocess.Popen(args, stdout=subprocess.PIPE,
- stderr=stderr, universal_newlines=True) as subp:
+ with qemu_tool_popen(args, connect_stderr) as subp:
output = subp.communicate()[0]
if subp.returncode < 0:
cmd = ' '.join(args)
@@ -233,10 +241,18 @@ def img_info_log(filename, filter_path=None, imgopts=False, extra_args=()):
filter_path = filename
log(filter_img_info(output, filter_path))
+def qemu_io_wrap_args(args: Sequence[str]) -> List[str]:
+ if '-f' in args or '--image-opts' in args:
+ return qemu_io_args_no_fmt + list(args)
+ else:
+ return qemu_io_args + list(args)
+
+def qemu_io_popen(*args):
+ return qemu_tool_popen(qemu_io_wrap_args(args))
+
def qemu_io(*args):
'''Run qemu-io and return the stdout data'''
- args = qemu_io_args + list(args)
- return qemu_tool_pipe_and_status('qemu-io', args)[0]
+ return qemu_tool_pipe_and_status('qemu-io', qemu_io_wrap_args(args))[0]
def qemu_io_log(*args):
result = qemu_io(*args)
@@ -245,12 +261,7 @@ def qemu_io_log(*args):
def qemu_io_silent(*args):
'''Run qemu-io and return the exit code, suppressing stdout'''
- if '-f' in args or '--image-opts' in args:
- default_args = qemu_io_args_no_fmt
- else:
- default_args = qemu_io_args
-
- args = default_args + list(args)
+ args = qemu_io_wrap_args(args)
result = subprocess.run(args, stdout=subprocess.DEVNULL, check=False)
if result.returncode < 0:
sys.stderr.write('qemu-io received signal %i: %s\n' %
@@ -259,14 +270,14 @@ def qemu_io_silent(*args):
def qemu_io_silent_check(*args):
'''Run qemu-io and return the true if subprocess returned 0'''
- args = qemu_io_args + list(args)
+ args = qemu_io_wrap_args(args)
result = subprocess.run(args, stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT, check=False)
return result.returncode == 0
class QemuIoInteractive:
def __init__(self, *args):
- self.args = qemu_io_args_no_fmt + list(args)
+ self.args = qemu_io_wrap_args(args)
# We need to keep the Popen objext around, and not
# close it immediately. Therefore, disable the pylint check:
# pylint: disable=consider-using-with
diff --git a/tests/qemu-iotests/tests/nbd-reconnect-on-open b/tests/qemu-iotests/tests/nbd-reconnect-on-open
new file mode 100755
index 0000000000..8be721a24f
--- /dev/null
+++ b/tests/qemu-iotests/tests/nbd-reconnect-on-open
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+#
+# Test nbd reconnect on open
+#
+# Copyright (c) 2020 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import time
+
+import iotests
+from iotests import qemu_img_create, file_path, qemu_io_popen, qemu_nbd, \
+ qemu_io_log, log
+
+iotests.script_initialize(supported_fmts=['qcow2'])
+
+disk, nbd_sock = file_path('disk', 'nbd-sock')
+
+
+def create_args(open_timeout):
+ return ['--image-opts', '-c', 'read 0 1M',
+ f'driver=nbd,open-timeout={open_timeout},'
+ f'server.type=unix,server.path={nbd_sock}']
+
+
+def check_fail_to_connect(open_timeout):
+ log(f'Check fail to connect with {open_timeout} seconds of timeout')
+
+ start_t = time.time()
+ qemu_io_log(*create_args(open_timeout))
+ delta_t = time.time() - start_t
+
+ max_delta = open_timeout + 0.2
+ if open_timeout <= delta_t <= max_delta:
+ log(f'qemu_io finished in {open_timeout}..{max_delta} seconds, OK')
+ else:
+ note = 'too early' if delta_t < open_timeout else 'too long'
+ log(f'qemu_io finished in {delta_t:.1f} seconds, {note}')
+
+
+qemu_img_create('-f', iotests.imgfmt, disk, '1M')
+
+# Start NBD client when NBD server is not yet running. It should not fail, but
+# wait for 5 seconds for the server to be available.
+client = qemu_io_popen(*create_args(5))
+
+time.sleep(1)
+qemu_nbd('-k', nbd_sock, '-f', iotests.imgfmt, disk)
+
+# client should succeed
+log(client.communicate()[0], filters=[iotests.filter_qemu_io])
+
+# Server was started without --persistent flag, so it should be off now. Let's
+# check it and at the same time check that with open-timeout=0 client fails
+# immediately.
+check_fail_to_connect(0)
+
+# Check that we will fail after non-zero timeout if server is still unavailable
+check_fail_to_connect(1)
diff --git a/tests/qemu-iotests/tests/nbd-reconnect-on-open.out b/tests/qemu-iotests/tests/nbd-reconnect-on-open.out
new file mode 100644
index 0000000000..a35ae30ea4
--- /dev/null
+++ b/tests/qemu-iotests/tests/nbd-reconnect-on-open.out
@@ -0,0 +1,11 @@
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+Check fail to connect with 0 seconds of timeout
+qemu-io: can't open: Failed to connect to 'TEST_DIR/PID-nbd-sock': No such file or directory
+
+qemu_io finished in 0..0.2 seconds, OK
+Check fail to connect with 1 seconds of timeout
+qemu-io: can't open: Failed to connect to 'TEST_DIR/PID-nbd-sock': No such file or directory
+
+qemu_io finished in 1..1.2 seconds, OK