From 61cc872456492da24e36cfbd668abfbe127c739f Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 15 Aug 2019 13:50:24 -0500 Subject: nbd: Advertise multi-conn for shared read-only connections The NBD specification defines NBD_FLAG_CAN_MULTI_CONN, which can be advertised when the server promises cache consistency between simultaneous clients (basically, rules that determine what FUA and flush from one client are able to guarantee for reads from another client). When we don't permit simultaneous clients (such as qemu-nbd without -e), the bit makes no sense; and for writable images, we probably have a lot more work before we can declare that actions from one client are cache-consistent with actions from another. But for read-only images, where flush isn't changing any data, we might as well advertise multi-conn support. What's more, advertisement of the bit makes it easier for clients to determine if 'qemu-nbd -e' was in use, where a second connection will succeed rather than hang until the first client goes away. This patch affects qemu as server in advertising the bit. We may want to consider patches to qemu as client to attempt parallel connections for higher throughput by spreading the load over those connections when a server advertises multi-conn, but for now sticking to one connection per nbd:// BDS is okay. See also: https://bugzilla.redhat.com/1708300 Signed-off-by: Eric Blake Message-Id: <20190815185024.7010-1-eblake@redhat.com> [eblake: tweak blockdev-nbd.c to not request shared when writable, fix iotest 233] Reviewed-by: John Snow --- docs/interop/nbd.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'docs') diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt index fc64473e02..6dfec7f476 100644 --- a/docs/interop/nbd.txt +++ b/docs/interop/nbd.txt @@ -53,3 +53,4 @@ the operation of that feature. * 2.12: NBD_CMD_BLOCK_STATUS for "base:allocation" * 3.0: NBD_OPT_STARTTLS with TLS Pre-Shared Keys (PSK), NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE +* 4.2: NBD_FLAG_CAN_MULTI_CONN for sharable read-only exports -- cgit v1.2.3-55-g7522 From 0a4795455ceeebdca999a60583dd42434f2ec0d1 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Fri, 23 Aug 2019 09:37:23 -0500 Subject: nbd: Prepare for NBD_CMD_FLAG_FAST_ZERO Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to avoid wasting time on a preliminary write-zero request that will later be rewritten by actual data, if it is known that the write-zero request will use a slow fallback; but in doing so, could not optimize for NBD. The NBD specification is now considering an extension that will allow passing on those semantics; this patch updates the new protocol bits and 'qemu-nbd --list' output to recognize the bit, as well as the new errno value possible when using the new flag; while upcoming patches will improve the client to use the feature when present, and the server to advertise support for it. The NBD spec recommends (but not requires) that ENOTSUP be avoided for all but failures of a fast zero (the only time it is mandatory to avoid an ENOTSUP failure is when fast zero is supported but not requested during write zeroes; the questionable use is for ENOTSUP to other actions like a normal write request). However, clients that get an unexpected ENOTSUP will either already be treating it the same as EINVAL, or may appreciate the extra bit of information. We were equally loose for returning EOVERFLOW in more situations than recommended by the spec, so if it turns out to be a problem in practice, a later patch can tighten handling for both error codes. Signed-off-by: Eric Blake Message-Id: <20190823143726.27062-3-eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy [eblake: tweak commit message, also handle EOPNOTSUPP] --- docs/interop/nbd.txt | 3 ++- include/block/nbd.h | 4 ++++ nbd/common.c | 5 +++++ nbd/server.c | 5 +++++ qemu-nbd.c | 1 + 5 files changed, 17 insertions(+), 1 deletion(-) (limited to 'docs') diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt index 6dfec7f476..4511880961 100644 --- a/docs/interop/nbd.txt +++ b/docs/interop/nbd.txt @@ -53,4 +53,5 @@ the operation of that feature. * 2.12: NBD_CMD_BLOCK_STATUS for "base:allocation" * 3.0: NBD_OPT_STARTTLS with TLS Pre-Shared Keys (PSK), NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE -* 4.2: NBD_FLAG_CAN_MULTI_CONN for sharable read-only exports +* 4.2: NBD_FLAG_CAN_MULTI_CONN for sharable read-only exports, +NBD_CMD_FLAG_FAST_ZERO diff --git a/include/block/nbd.h b/include/block/nbd.h index 2c87b42dfd..21550747cf 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -140,6 +140,7 @@ enum { NBD_FLAG_CAN_MULTI_CONN_BIT = 8, /* Multi-client cache consistent */ NBD_FLAG_SEND_RESIZE_BIT = 9, /* Send resize */ NBD_FLAG_SEND_CACHE_BIT = 10, /* Send CACHE (prefetch) */ + NBD_FLAG_SEND_FAST_ZERO_BIT = 11, /* FAST_ZERO flag for WRITE_ZEROES */ }; #define NBD_FLAG_HAS_FLAGS (1 << NBD_FLAG_HAS_FLAGS_BIT) @@ -153,6 +154,7 @@ enum { #define NBD_FLAG_CAN_MULTI_CONN (1 << NBD_FLAG_CAN_MULTI_CONN_BIT) #define NBD_FLAG_SEND_RESIZE (1 << NBD_FLAG_SEND_RESIZE_BIT) #define NBD_FLAG_SEND_CACHE (1 << NBD_FLAG_SEND_CACHE_BIT) +#define NBD_FLAG_SEND_FAST_ZERO (1 << NBD_FLAG_SEND_FAST_ZERO_BIT) /* New-style handshake (global) flags, sent from server to client, and control what will happen during handshake phase. */ @@ -205,6 +207,7 @@ enum { #define NBD_CMD_FLAG_DF (1 << 2) /* don't fragment structured read */ #define NBD_CMD_FLAG_REQ_ONE (1 << 3) /* only one extent in BLOCK_STATUS * reply chunk */ +#define NBD_CMD_FLAG_FAST_ZERO (1 << 4) /* fail if WRITE_ZEROES is not fast */ /* Supported request types */ enum { @@ -270,6 +273,7 @@ static inline bool nbd_reply_type_is_error(int type) #define NBD_EINVAL 22 #define NBD_ENOSPC 28 #define NBD_EOVERFLOW 75 +#define NBD_ENOTSUP 95 #define NBD_ESHUTDOWN 108 /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */ diff --git a/nbd/common.c b/nbd/common.c index cc8b278e54..ddfe7d1183 100644 --- a/nbd/common.c +++ b/nbd/common.c @@ -201,6 +201,8 @@ const char *nbd_err_lookup(int err) return "ENOSPC"; case NBD_EOVERFLOW: return "EOVERFLOW"; + case NBD_ENOTSUP: + return "ENOTSUP"; case NBD_ESHUTDOWN: return "ESHUTDOWN"; default: @@ -231,6 +233,9 @@ int nbd_errno_to_system_errno(int err) case NBD_EOVERFLOW: ret = EOVERFLOW; break; + case NBD_ENOTSUP: + ret = ENOTSUP; + break; case NBD_ESHUTDOWN: ret = ESHUTDOWN; break; diff --git a/nbd/server.c b/nbd/server.c index d5078f7468..4992148de1 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -55,6 +55,11 @@ static int system_errno_to_nbd_errno(int err) return NBD_ENOSPC; case EOVERFLOW: return NBD_EOVERFLOW; + case ENOTSUP: +#if ENOTSUP != EOPNOTSUPP + case EOPNOTSUPP: +#endif + return NBD_ENOTSUP; case ESHUTDOWN: return NBD_ESHUTDOWN; case EINVAL: diff --git a/qemu-nbd.c b/qemu-nbd.c index ae84115076..9032b6de2a 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -294,6 +294,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls, [NBD_FLAG_CAN_MULTI_CONN_BIT] = "multi", [NBD_FLAG_SEND_RESIZE_BIT] = "resize", [NBD_FLAG_SEND_CACHE_BIT] = "cache", + [NBD_FLAG_SEND_FAST_ZERO_BIT] = "fast-zero", }; printf(" size: %" PRIu64 "\n", list[i].size); -- cgit v1.2.3-55-g7522