From f25e7ab2b091fae1ae3e9f55b5244ddcdb1653bb Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 1 Oct 2020 18:29:39 +0200 Subject: block/nvme: Add driver statistics for access alignment and hw errors Keep statistics of some hardware errors, and number of aligned/unaligned I/O accesses. QMP example booting a full RHEL 8.3 aarch64 guest: { "execute": "query-blockstats" } { "return": [ { "device": "", "node-name": "drive0", "stats": { "flush_total_time_ns": 6026948, "wr_highest_offset": 3383991230464, "wr_total_time_ns": 807450995, "failed_wr_operations": 0, "failed_rd_operations": 0, "wr_merged": 3, "wr_bytes": 50133504, "failed_unmap_operations": 0, "failed_flush_operations": 0, "account_invalid": false, "rd_total_time_ns": 1846979900, "flush_operations": 130, "wr_operations": 659, "rd_merged": 1192, "rd_bytes": 218244096, "account_failed": false, "idle_time_ns": 2678641497, "rd_operations": 7406, }, "driver-specific": { "driver": "nvme", "completion-errors": 0, "unaligned-accesses": 2959, "aligned-accesses": 4477 }, "qdev": "/machine/peripheral-anon/device[0]/virtio-backend" } ] } Suggested-by: Stefan Hajnoczi Signed-off-by: Philippe Mathieu-Daudé Acked-by: Markus Armbruster Message-id: 20201001162939.1567915-1-philmd@redhat.com Signed-off-by: Stefan Hajnoczi --- qapi/block-core.json | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'qapi') diff --git a/qapi/block-core.json b/qapi/block-core.json index ee5ebef7f2..e00fc27b5e 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -947,6 +947,27 @@ 'discard-nb-failed': 'uint64', 'discard-bytes-ok': 'uint64' } } +## +# @BlockStatsSpecificNvme: +# +# NVMe driver statistics +# +# @completion-errors: The number of completion errors. +# +# @aligned-accesses: The number of aligned accesses performed by +# the driver. +# +# @unaligned-accesses: The number of unaligned accesses performed by +# the driver. +# +# Since: 5.2 +## +{ 'struct': 'BlockStatsSpecificNvme', + 'data': { + 'completion-errors': 'uint64', + 'aligned-accesses': 'uint64', + 'unaligned-accesses': 'uint64' } } + ## # @BlockStatsSpecific: # @@ -959,7 +980,8 @@ 'discriminator': 'driver', 'data': { 'file': 'BlockStatsSpecificFile', - 'host_device': 'BlockStatsSpecificFile' } } + 'host_device': 'BlockStatsSpecificFile', + 'nvme': 'BlockStatsSpecificNvme' } } ## # @BlockStats: -- cgit v1.2.3-55-g7522 From 90fc91d50b76cf3f32e2ad5b79484ac9680e1d6a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 24 Sep 2020 16:15:47 +0100 Subject: block/export: convert vhost-user-blk server to block export API Use the new QAPI block exports API instead of defining our own QOM objects. This is a large change because the lifecycle of VuBlockDev needs to follow BlockExportDriver. QOM properties are replaced by QAPI options objects. VuBlockDev is renamed VuBlkExport and contains a BlockExport field. Several fields can be dropped since BlockExport already has equivalents. The file names and meson build integration will be adjusted in a future patch. libvhost-user should probably be built as a static library that is linked into QEMU instead of as a .c file that results in duplicate compilation. The new command-line syntax is: $ qemu-storage-daemon \ --blockdev file,node-name=drive0,filename=test.img \ --export vhost-user-blk,node-name=drive0,id=export0,unix-socket=/tmp/vhost-user-blk.sock Note that unix-socket is optional because we may wish to accept chardevs too in the future. Markus noted that supported address families are not explicit in the QAPI schema. It is unlikely that support for more address families will be added since file descriptor passing is required and few address families support it. If a new address family needs to be added, then the QAPI 'features' syntax can be used to advertize them. Signed-off-by: Stefan Hajnoczi Acked-by: Markus Armbruster Message-id: 20200924151549.913737-12-stefanha@redhat.com [Skip test on big-endian host architectures because this device doesn't support them yet (as already mentioned in a code comment). --Stefan] Signed-off-by: Stefan Hajnoczi --- block/export/export.c | 6 + block/export/meson.build | 1 + block/export/vhost-user-blk-server.c | 448 +++++++++-------------------------- block/export/vhost-user-blk-server.h | 23 +- block/meson.build | 1 - qapi/block-export.json | 21 +- util/vhost-user-server.c | 10 +- 7 files changed, 154 insertions(+), 356 deletions(-) (limited to 'qapi') diff --git a/block/export/export.c b/block/export/export.c index f2c00d13bf..bd7cac241f 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -17,6 +17,9 @@ #include "sysemu/block-backend.h" #include "block/export.h" #include "block/nbd.h" +#if CONFIG_LINUX +#include "block/export/vhost-user-blk-server.h" +#endif #include "qapi/error.h" #include "qapi/qapi-commands-block-export.h" #include "qapi/qapi-events-block-export.h" @@ -24,6 +27,9 @@ static const BlockExportDriver *blk_exp_drivers[] = { &blk_exp_nbd, +#if CONFIG_LINUX + &blk_exp_vhost_user_blk, +#endif }; /* Only accessed from the main thread */ diff --git a/block/export/meson.build b/block/export/meson.build index 558ef35d38..ef3a9576f7 100644 --- a/block/export/meson.build +++ b/block/export/meson.build @@ -1 +1,2 @@ block_ss.add(files('export.c')) +block_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-blk-server.c', '../../contrib/libvhost-user/libvhost-user.c')) diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index faefcfcaea..3e5bd6caee 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -11,6 +11,9 @@ */ #include "qemu/osdep.h" #include "block/block.h" +#include "contrib/libvhost-user/libvhost-user.h" +#include "standard-headers/linux/virtio_blk.h" +#include "util/vhost-user-server.h" #include "vhost-user-blk-server.h" #include "qapi/error.h" #include "qom/object_interfaces.h" @@ -24,7 +27,7 @@ struct virtio_blk_inhdr { unsigned char status; }; -typedef struct VuBlockReq { +typedef struct VuBlkReq { VuVirtqElement elem; int64_t sector_num; size_t size; @@ -32,9 +35,19 @@ typedef struct VuBlockReq { struct virtio_blk_outhdr out; VuServer *server; struct VuVirtq *vq; -} VuBlockReq; +} VuBlkReq; -static void vu_block_req_complete(VuBlockReq *req) +/* vhost user block device */ +typedef struct { + BlockExport export; + VuServer vu_server; + uint32_t blk_size; + QIOChannelSocket *sioc; + struct virtio_blk_config blkcfg; + bool writable; +} VuBlkExport; + +static void vu_blk_req_complete(VuBlkReq *req) { VuDev *vu_dev = &req->server->vu_dev; @@ -45,14 +58,9 @@ static void vu_block_req_complete(VuBlockReq *req) free(req); } -static VuBlockDev *get_vu_block_device_by_server(VuServer *server) -{ - return container_of(server, VuBlockDev, vu_server); -} - static int coroutine_fn -vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov, - uint32_t iovcnt, uint32_t type) +vu_blk_discard_write_zeroes(BlockBackend *blk, struct iovec *iov, + uint32_t iovcnt, uint32_t type) { struct virtio_blk_discard_write_zeroes desc; ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc)); @@ -61,16 +69,14 @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov, return -EINVAL; } - VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server); uint64_t range[2] = { le64_to_cpu(desc.sector) << 9, le32_to_cpu(desc.num_sectors) << 9 }; if (type == VIRTIO_BLK_T_DISCARD) { - if (blk_co_pdiscard(vdev_blk->backend, range[0], range[1]) == 0) { + if (blk_co_pdiscard(blk, range[0], range[1]) == 0) { return 0; } } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) { - if (blk_co_pwrite_zeroes(vdev_blk->backend, - range[0], range[1], 0) == 0) { + if (blk_co_pwrite_zeroes(blk, range[0], range[1], 0) == 0) { return 0; } } @@ -78,22 +84,15 @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov, return -EINVAL; } -static int coroutine_fn vu_block_flush(VuBlockReq *req) +static void coroutine_fn vu_blk_virtio_process_req(void *opaque) { - VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server); - BlockBackend *backend = vdev_blk->backend; - return blk_co_flush(backend); -} - -static void coroutine_fn vu_block_virtio_process_req(void *opaque) -{ - VuBlockReq *req = opaque; + VuBlkReq *req = opaque; VuServer *server = req->server; VuVirtqElement *elem = &req->elem; uint32_t type; - VuBlockDev *vdev_blk = get_vu_block_device_by_server(server); - BlockBackend *backend = vdev_blk->backend; + VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); + BlockBackend *blk = vexp->export.blk; struct iovec *in_iov = elem->in_sg; struct iovec *out_iov = elem->out_sg; @@ -133,16 +132,19 @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque) bool is_write = type & VIRTIO_BLK_T_OUT; req->sector_num = le64_to_cpu(req->out.sector); - int64_t offset = req->sector_num * vdev_blk->blk_size; + if (is_write && !vexp->writable) { + req->in->status = VIRTIO_BLK_S_IOERR; + break; + } + + int64_t offset = req->sector_num * vexp->blk_size; QEMUIOVector qiov; if (is_write) { qemu_iovec_init_external(&qiov, out_iov, out_num); - ret = blk_co_pwritev(backend, offset, qiov.size, - &qiov, 0); + ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0); } else { qemu_iovec_init_external(&qiov, in_iov, in_num); - ret = blk_co_preadv(backend, offset, qiov.size, - &qiov, 0); + ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0); } if (ret >= 0) { req->in->status = VIRTIO_BLK_S_OK; @@ -152,7 +154,7 @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque) break; } case VIRTIO_BLK_T_FLUSH: - if (vu_block_flush(req) == 0) { + if (blk_co_flush(blk) == 0) { req->in->status = VIRTIO_BLK_S_OK; } else { req->in->status = VIRTIO_BLK_S_IOERR; @@ -169,8 +171,13 @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque) case VIRTIO_BLK_T_DISCARD: case VIRTIO_BLK_T_WRITE_ZEROES: { int rc; - rc = vu_block_discard_write_zeroes(req, &elem->out_sg[1], - out_num, type); + + if (!vexp->writable) { + req->in->status = VIRTIO_BLK_S_IOERR; + break; + } + + rc = vu_blk_discard_write_zeroes(blk, &elem->out_sg[1], out_num, type); if (rc == 0) { req->in->status = VIRTIO_BLK_S_OK; } else { @@ -183,22 +190,22 @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque) break; } - vu_block_req_complete(req); + vu_blk_req_complete(req); return; err: - free(elem); + free(req); } -static void vu_block_process_vq(VuDev *vu_dev, int idx) +static void vu_blk_process_vq(VuDev *vu_dev, int idx) { VuServer *server = container_of(vu_dev, VuServer, vu_dev); VuVirtq *vq = vu_get_queue(vu_dev, idx); while (1) { - VuBlockReq *req; + VuBlkReq *req; - req = vu_queue_pop(vu_dev, vq, sizeof(VuBlockReq)); + req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq)); if (!req) { break; } @@ -207,26 +214,26 @@ static void vu_block_process_vq(VuDev *vu_dev, int idx) req->vq = vq; Coroutine *co = - qemu_coroutine_create(vu_block_virtio_process_req, req); + qemu_coroutine_create(vu_blk_virtio_process_req, req); qemu_coroutine_enter(co); } } -static void vu_block_queue_set_started(VuDev *vu_dev, int idx, bool started) +static void vu_blk_queue_set_started(VuDev *vu_dev, int idx, bool started) { VuVirtq *vq; assert(vu_dev); vq = vu_get_queue(vu_dev, idx); - vu_set_queue_handler(vu_dev, vq, started ? vu_block_process_vq : NULL); + vu_set_queue_handler(vu_dev, vq, started ? vu_blk_process_vq : NULL); } -static uint64_t vu_block_get_features(VuDev *dev) +static uint64_t vu_blk_get_features(VuDev *dev) { uint64_t features; VuServer *server = container_of(dev, VuServer, vu_dev); - VuBlockDev *vdev_blk = get_vu_block_device_by_server(server); + VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); features = 1ull << VIRTIO_BLK_F_SIZE_MAX | 1ull << VIRTIO_BLK_F_SEG_MAX | 1ull << VIRTIO_BLK_F_TOPOLOGY | @@ -240,35 +247,35 @@ static uint64_t vu_block_get_features(VuDev *dev) 1ull << VIRTIO_RING_F_EVENT_IDX | 1ull << VHOST_USER_F_PROTOCOL_FEATURES; - if (!vdev_blk->writable) { + if (!vexp->writable) { features |= 1ull << VIRTIO_BLK_F_RO; } return features; } -static uint64_t vu_block_get_protocol_features(VuDev *dev) +static uint64_t vu_blk_get_protocol_features(VuDev *dev) { return 1ull << VHOST_USER_PROTOCOL_F_CONFIG | 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD; } static int -vu_block_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) +vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) { + /* TODO blkcfg must be little-endian for VIRTIO 1.0 */ VuServer *server = container_of(vu_dev, VuServer, vu_dev); - VuBlockDev *vdev_blk = get_vu_block_device_by_server(server); - memcpy(config, &vdev_blk->blkcfg, len); - + VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); + memcpy(config, &vexp->blkcfg, len); return 0; } static int -vu_block_set_config(VuDev *vu_dev, const uint8_t *data, +vu_blk_set_config(VuDev *vu_dev, const uint8_t *data, uint32_t offset, uint32_t size, uint32_t flags) { VuServer *server = container_of(vu_dev, VuServer, vu_dev); - VuBlockDev *vdev_blk = get_vu_block_device_by_server(server); + VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); uint8_t wce; /* don't support live migration */ @@ -282,8 +289,8 @@ vu_block_set_config(VuDev *vu_dev, const uint8_t *data, } wce = *data; - vdev_blk->blkcfg.wce = wce; - blk_set_enable_write_cache(vdev_blk->backend, wce); + vexp->blkcfg.wce = wce; + blk_set_enable_write_cache(vexp->export.blk, wce); return 0; } @@ -295,7 +302,7 @@ vu_block_set_config(VuDev *vu_dev, const uint8_t *data, * of vu_process_message. * */ -static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply) +static int vu_blk_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply) { if (vmsg->request == VHOST_USER_NONE) { dev->panic(dev, "disconnect"); @@ -304,29 +311,29 @@ static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply) return false; } -static const VuDevIface vu_block_iface = { - .get_features = vu_block_get_features, - .queue_set_started = vu_block_queue_set_started, - .get_protocol_features = vu_block_get_protocol_features, - .get_config = vu_block_get_config, - .set_config = vu_block_set_config, - .process_msg = vu_block_process_msg, +static const VuDevIface vu_blk_iface = { + .get_features = vu_blk_get_features, + .queue_set_started = vu_blk_queue_set_started, + .get_protocol_features = vu_blk_get_protocol_features, + .get_config = vu_blk_get_config, + .set_config = vu_blk_set_config, + .process_msg = vu_blk_process_msg, }; static void blk_aio_attached(AioContext *ctx, void *opaque) { - VuBlockDev *vub_dev = opaque; - vhost_user_server_attach_aio_context(&vub_dev->vu_server, ctx); + VuBlkExport *vexp = opaque; + vhost_user_server_attach_aio_context(&vexp->vu_server, ctx); } static void blk_aio_detach(void *opaque) { - VuBlockDev *vub_dev = opaque; - vhost_user_server_detach_aio_context(&vub_dev->vu_server); + VuBlkExport *vexp = opaque; + vhost_user_server_detach_aio_context(&vexp->vu_server); } static void -vu_block_initialize_config(BlockDriverState *bs, +vu_blk_initialize_config(BlockDriverState *bs, struct virtio_blk_config *config, uint32_t blk_size) { config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; @@ -343,290 +350,67 @@ vu_block_initialize_config(BlockDriverState *bs, config->max_write_zeroes_seg = 1; } -static VuBlockDev *vu_block_init(VuBlockDev *vu_block_device, Error **errp) +static void vu_blk_exp_request_shutdown(BlockExport *exp) { + VuBlkExport *vexp = container_of(exp, VuBlkExport, export); - BlockBackend *blk; - Error *local_error = NULL; - const char *node_name = vu_block_device->node_name; - bool writable = vu_block_device->writable; - uint64_t perm = BLK_PERM_CONSISTENT_READ; - int ret; - - AioContext *ctx; - - BlockDriverState *bs = bdrv_lookup_bs(node_name, node_name, &local_error); - - if (!bs) { - error_propagate(errp, local_error); - return NULL; - } - - if (bdrv_is_read_only(bs)) { - writable = false; - } - - if (writable) { - perm |= BLK_PERM_WRITE; - } - - ctx = bdrv_get_aio_context(bs); - aio_context_acquire(ctx); - bdrv_invalidate_cache(bs, NULL); - aio_context_release(ctx); - - /* - * Don't allow resize while the vhost user server is running, - * otherwise we don't care what happens with the node. - */ - blk = blk_new(bdrv_get_aio_context(bs), perm, - BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | - BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); - ret = blk_insert_bs(blk, bs, errp); - - if (ret < 0) { - goto fail; - } - - blk_set_enable_write_cache(blk, false); - - blk_set_allow_aio_context_change(blk, true); - - vu_block_device->blkcfg.wce = 0; - vu_block_device->backend = blk; - if (!vu_block_device->blk_size) { - vu_block_device->blk_size = BDRV_SECTOR_SIZE; - } - vu_block_device->blkcfg.blk_size = vu_block_device->blk_size; - blk_set_guest_block_size(blk, vu_block_device->blk_size); - vu_block_initialize_config(bs, &vu_block_device->blkcfg, - vu_block_device->blk_size); - return vu_block_device; - -fail: - blk_unref(blk); - return NULL; -} - -static void vu_block_deinit(VuBlockDev *vu_block_device) -{ - if (vu_block_device->backend) { - blk_remove_aio_context_notifier(vu_block_device->backend, blk_aio_attached, - blk_aio_detach, vu_block_device); - } - - blk_unref(vu_block_device->backend); + vhost_user_server_stop(&vexp->vu_server); } -static void vhost_user_blk_server_stop(VuBlockDev *vu_block_device) +static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, + Error **errp) { - vhost_user_server_stop(&vu_block_device->vu_server); - vu_block_deinit(vu_block_device); -} - -static void vhost_user_blk_server_start(VuBlockDev *vu_block_device, - Error **errp) -{ - AioContext *ctx; - SocketAddress *addr = vu_block_device->addr; - - if (!vu_block_init(vu_block_device, errp)) { - return; - } - - ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend)); - - if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx, - VHOST_USER_BLK_MAX_QUEUES, &vu_block_iface, - errp)) { - goto error; - } - - blk_add_aio_context_notifier(vu_block_device->backend, blk_aio_attached, - blk_aio_detach, vu_block_device); - vu_block_device->running = true; - return; - - error: - vu_block_deinit(vu_block_device); -} - -static bool vu_prop_modifiable(VuBlockDev *vus, Error **errp) -{ - if (vus->running) { - error_setg(errp, "The property can't be modified " - "while the server is running"); - return false; - } - return true; -} - -static void vu_set_node_name(Object *obj, const char *value, Error **errp) -{ - VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj); - - if (!vu_prop_modifiable(vus, errp)) { - return; - } - - if (vus->node_name) { - g_free(vus->node_name); - } - - vus->node_name = g_strdup(value); -} - -static char *vu_get_node_name(Object *obj, Error **errp) -{ - VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj); - return g_strdup(vus->node_name); -} - -static void free_socket_addr(SocketAddress *addr) -{ - g_free(addr->u.q_unix.path); - g_free(addr); -} - -static void vu_set_unix_socket(Object *obj, const char *value, - Error **errp) -{ - VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj); - - if (!vu_prop_modifiable(vus, errp)) { - return; - } - - if (vus->addr) { - free_socket_addr(vus->addr); - } - - SocketAddress *addr = g_new0(SocketAddress, 1); - addr->type = SOCKET_ADDRESS_TYPE_UNIX; - addr->u.q_unix.path = g_strdup(value); - vus->addr = addr; -} - -static char *vu_get_unix_socket(Object *obj, Error **errp) -{ - VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj); - return g_strdup(vus->addr->u.q_unix.path); -} - -static bool vu_get_block_writable(Object *obj, Error **errp) -{ - VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj); - return vus->writable; -} - -static void vu_set_block_writable(Object *obj, bool value, Error **errp) -{ - VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj); - - if (!vu_prop_modifiable(vus, errp)) { - return; - } - - vus->writable = value; -} - -static void vu_get_blk_size(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj); - uint32_t value = vus->blk_size; - - visit_type_uint32(v, name, &value, errp); -} - -static void vu_set_blk_size(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj); - + VuBlkExport *vexp = container_of(exp, VuBlkExport, export); + BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk; Error *local_err = NULL; - uint32_t value; + uint64_t logical_block_size; - if (!vu_prop_modifiable(vus, errp)) { - return; - } + vexp->writable = opts->writable; + vexp->blkcfg.wce = 0; - visit_type_uint32(v, name, &value, &local_err); - if (local_err) { - goto out; + if (vu_opts->has_logical_block_size) { + logical_block_size = vu_opts->logical_block_size; + } else { + logical_block_size = BDRV_SECTOR_SIZE; } - - check_block_size(object_get_typename(obj), name, value, &local_err); + check_block_size(exp->id, "logical-block-size", logical_block_size, + &local_err); if (local_err) { - goto out; + error_propagate(errp, local_err); + return -EINVAL; } + vexp->blk_size = logical_block_size; + blk_set_guest_block_size(exp->blk, logical_block_size); + vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg, + logical_block_size); - vus->blk_size = value; - -out: - error_propagate(errp, local_err); -} - -static void vhost_user_blk_server_instance_finalize(Object *obj) -{ - VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj); + blk_set_allow_aio_context_change(exp->blk, true); + blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, + vexp); - vhost_user_blk_server_stop(vub); - - /* - * Unlike object_property_add_str, object_class_property_add_str - * doesn't have a release method. Thus manual memory freeing is - * needed. - */ - free_socket_addr(vub->addr); - g_free(vub->node_name); -} - -static void vhost_user_blk_server_complete(UserCreatable *obj, Error **errp) -{ - VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj); + if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx, + VHOST_USER_BLK_MAX_QUEUES, &vu_blk_iface, + errp)) { + blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, + blk_aio_detach, vexp); + return -EADDRNOTAVAIL; + } - vhost_user_blk_server_start(vub, errp); + return 0; } -static void vhost_user_blk_server_class_init(ObjectClass *klass, - void *class_data) +static void vu_blk_exp_delete(BlockExport *exp) { - UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); - ucc->complete = vhost_user_blk_server_complete; + VuBlkExport *vexp = container_of(exp, VuBlkExport, export); - object_class_property_add_bool(klass, "writable", - vu_get_block_writable, - vu_set_block_writable); - - object_class_property_add_str(klass, "node-name", - vu_get_node_name, - vu_set_node_name); - - object_class_property_add_str(klass, "unix-socket", - vu_get_unix_socket, - vu_set_unix_socket); - - object_class_property_add(klass, "logical-block-size", "uint32", - vu_get_blk_size, vu_set_blk_size, - NULL, NULL); + blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, + vexp); } -static const TypeInfo vhost_user_blk_server_info = { - .name = TYPE_VHOST_USER_BLK_SERVER, - .parent = TYPE_OBJECT, - .instance_size = sizeof(VuBlockDev), - .instance_finalize = vhost_user_blk_server_instance_finalize, - .class_init = vhost_user_blk_server_class_init, - .interfaces = (InterfaceInfo[]) { - {TYPE_USER_CREATABLE}, - {} - }, +const BlockExportDriver blk_exp_vhost_user_blk = { + .type = BLOCK_EXPORT_TYPE_VHOST_USER_BLK, + .instance_size = sizeof(VuBlkExport), + .create = vu_blk_exp_create, + .delete = vu_blk_exp_delete, + .request_shutdown = vu_blk_exp_request_shutdown, }; - -static void vhost_user_blk_server_register_types(void) -{ - type_register_static(&vhost_user_blk_server_info); -} - -type_init(vhost_user_blk_server_register_types) diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h index f06f37c4c8..fcf46fc8a5 100644 --- a/block/export/vhost-user-blk-server.h +++ b/block/export/vhost-user-blk-server.h @@ -10,27 +10,10 @@ #ifndef VHOST_USER_BLK_SERVER_H #define VHOST_USER_BLK_SERVER_H -#include "util/vhost-user-server.h" -typedef struct VuBlockDev VuBlockDev; -#define TYPE_VHOST_USER_BLK_SERVER "vhost-user-blk-server" -#define VHOST_USER_BLK_SERVER(obj) \ - OBJECT_CHECK(VuBlockDev, obj, TYPE_VHOST_USER_BLK_SERVER) +#include "block/export.h" -/* vhost user block device */ -struct VuBlockDev { - Object parent_obj; - char *node_name; - SocketAddress *addr; - AioContext *ctx; - VuServer vu_server; - bool running; - uint32_t blk_size; - BlockBackend *backend; - QIOChannelSocket *sioc; - QTAILQ_ENTRY(VuBlockDev) next; - struct virtio_blk_config blkcfg; - bool writable; -}; +/* For block/export/export.c */ +extern const BlockExportDriver blk_exp_vhost_user_blk; #endif /* VHOST_USER_BLK_SERVER_H */ diff --git a/block/meson.build b/block/meson.build index 6e6c1dc479..78e8b25232 100644 --- a/block/meson.build +++ b/block/meson.build @@ -60,7 +60,6 @@ block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c') block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit]) block_ss.add(when: 'CONFIG_LIBISCSI', if_true: files('iscsi-opts.c')) block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c')) -block_ss.add(when: 'CONFIG_LINUX', if_true: files('export/vhost-user-blk-server.c', '../contrib/libvhost-user/libvhost-user.c')) block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c')) block_ss.add(when: 'CONFIG_SHEEPDOG', if_true: files('sheepdog.c')) block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c')) diff --git a/qapi/block-export.json b/qapi/block-export.json index 65804834d9..a793e34af9 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -84,6 +84,21 @@ 'data': { '*name': 'str', '*description': 'str', '*bitmap': 'str' } } +## +# @BlockExportOptionsVhostUserBlk: +# +# A vhost-user-blk block export. +# +# @addr: The vhost-user socket on which to listen. Both 'unix' and 'fd' +# SocketAddress types are supported. Passed fds must be UNIX domain +# sockets. +# @logical-block-size: Logical block size in bytes. Defaults to 512 bytes. +# +# Since: 5.2 +## +{ 'struct': 'BlockExportOptionsVhostUserBlk', + 'data': { 'addr': 'SocketAddress', '*logical-block-size': 'size' } } + ## # @NbdServerAddOptions: # @@ -180,11 +195,12 @@ # An enumeration of block export types # # @nbd: NBD export +# @vhost-user-blk: vhost-user-blk export (since 5.2) # # Since: 4.2 ## { 'enum': 'BlockExportType', - 'data': [ 'nbd' ] } + 'data': [ 'nbd', 'vhost-user-blk' ] } ## # @BlockExportOptions: @@ -213,7 +229,8 @@ '*writethrough': 'bool' }, 'discriminator': 'type', 'data': { - 'nbd': 'BlockExportOptionsNbd' + 'nbd': 'BlockExportOptionsNbd', + 'vhost-user-blk': 'BlockExportOptionsVhostUserBlk' } } ## diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c index c448800e58..516999b38a 100644 --- a/util/vhost-user-server.c +++ b/util/vhost-user-server.c @@ -408,7 +408,15 @@ bool vhost_user_server_start(VuServer *server, Error **errp) { QEMUBH *bh; - QIONetListener *listener = qio_net_listener_new(); + QIONetListener *listener; + + if (socket_addr->type != SOCKET_ADDRESS_TYPE_UNIX && + socket_addr->type != SOCKET_ADDRESS_TYPE_FD) { + error_setg(errp, "Only socket address types 'unix' and 'fd' are supported"); + return false; + } + + listener = qio_net_listener_new(); if (qio_net_listener_open_sync(listener, socket_addr, 1, errp) < 0) { object_unref(OBJECT(listener)); -- cgit v1.2.3-55-g7522 From f51d23c80af73c95e0ce703ad06a300f1b3d63ef Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 29 Sep 2020 13:55:16 +0100 Subject: block/export: add iothread and fixed-iothread options Make it possible to specify the iothread where the export will run. By default the block node can be moved to other AioContexts later and the export will follow. The fixed-iothread option forces strict behavior that prevents changing AioContext while the export is active. See the QAPI docs for details. Signed-off-by: Stefan Hajnoczi Message-id: 20200929125516.186715-5-stefanha@redhat.com [Fix stray '#' character in block-export.json and add missing "(since: 5.2)" as suggested by Eric Blake. --Stefan] Signed-off-by: Stefan Hajnoczi --- block/export/export.c | 31 ++++++++++++++++++++++++++++++- block/export/vhost-user-blk-server.c | 5 ++++- nbd/server.c | 2 -- qapi/block-export.json | 11 +++++++++++ 4 files changed, 45 insertions(+), 4 deletions(-) (limited to 'qapi') diff --git a/block/export/export.c b/block/export/export.c index a24fadcfb5..c3478c6c97 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -15,6 +15,7 @@ #include "block/block.h" #include "sysemu/block-backend.h" +#include "sysemu/iothread.h" #include "block/export.h" #include "block/nbd.h" #include "qapi/error.h" @@ -63,10 +64,11 @@ static const BlockExportDriver *blk_exp_find_driver(BlockExportType type) BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) { + bool fixed_iothread = export->has_fixed_iothread && export->fixed_iothread; const BlockExportDriver *drv; BlockExport *exp = NULL; BlockDriverState *bs; - BlockBackend *blk; + BlockBackend *blk = NULL; AioContext *ctx; uint64_t perm; int ret; @@ -102,6 +104,28 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) ctx = bdrv_get_aio_context(bs); aio_context_acquire(ctx); + if (export->has_iothread) { + IOThread *iothread; + AioContext *new_ctx; + + iothread = iothread_by_id(export->iothread); + if (!iothread) { + error_setg(errp, "iothread \"%s\" not found", export->iothread); + goto fail; + } + + new_ctx = iothread_get_aio_context(iothread); + + ret = bdrv_try_set_aio_context(bs, new_ctx, errp); + if (ret == 0) { + aio_context_release(ctx); + aio_context_acquire(new_ctx); + ctx = new_ctx; + } else if (fixed_iothread) { + goto fail; + } + } + /* * Block exports are used for non-shared storage migration. Make sure * that BDRV_O_INACTIVE is cleared and the image is ready for write @@ -116,6 +140,11 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) } blk = blk_new(ctx, perm, BLK_PERM_ALL); + + if (!fixed_iothread) { + blk_set_allow_aio_context_change(blk, true); + } + ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto fail; diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index f7021cbd7b..286eb5fb9a 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -323,13 +323,17 @@ static const VuDevIface vu_blk_iface = { static void blk_aio_attached(AioContext *ctx, void *opaque) { VuBlkExport *vexp = opaque; + + vexp->export.ctx = ctx; vhost_user_server_attach_aio_context(&vexp->vu_server, ctx); } static void blk_aio_detach(void *opaque) { VuBlkExport *vexp = opaque; + vhost_user_server_detach_aio_context(&vexp->vu_server); + vexp->export.ctx = NULL; } static void @@ -384,7 +388,6 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg, logical_block_size); - blk_set_allow_aio_context_change(exp->blk, true); blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, vexp); diff --git a/nbd/server.c b/nbd/server.c index e75c825879..08b621f70a 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1517,8 +1517,6 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, return ret; } - blk_set_allow_aio_context_change(blk, true); - QTAILQ_INIT(&exp->clients); exp->name = g_strdup(arg->name); exp->description = g_strdup(arg->description); diff --git a/qapi/block-export.json b/qapi/block-export.json index a793e34af9..8a4ced817f 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -219,11 +219,22 @@ # export before completion is signalled. (since: 5.2; # default: false) # +# @iothread: The name of the iothread object where the export will run. The +# default is to use the thread currently associated with the +# block node. (since: 5.2) +# +# @fixed-iothread: True prevents the block node from being moved to another +# thread while the export is active. If true and @iothread is +# given, export creation fails if the block node cannot be +# moved to the iothread. The default is false. (since: 5.2) +# # Since: 4.2 ## { 'union': 'BlockExportOptions', 'base': { 'type': 'BlockExportType', 'id': 'str', + '*fixed-iothread': 'bool', + '*iothread': 'str', 'node-name': 'str', '*writable': 'bool', '*writethrough': 'bool' }, -- cgit v1.2.3-55-g7522 From d9b495f9c6a943c9bbd50f7469efb645c23009c3 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 1 Oct 2020 15:46:03 +0100 Subject: block/export: add vhost-user-blk multi-queue support Allow the number of queues to be configured using --export vhost-user-blk,num-queues=N. This setting should match the QEMU --device vhost-user-blk-pci,num-queues=N setting but QEMU vhost-user-blk.c lowers its own value if the vhost-user-blk backend offers fewer queues than QEMU. The vhost-user-blk-server.c code is already capable of multi-queue. All virtqueue processing runs in the same AioContext. No new locking is needed. Add the num-queues=N option and set the VIRTIO_BLK_F_MQ feature bit. Note that the feature bit only announces the presence of the num_queues configuration space field. It does not promise that there is more than 1 virtqueue, so we can set it unconditionally. I tested multi-queue by running a random read fio test with numjobs=4 on an -smp 4 guest. After the benchmark finished the guest /proc/interrupts file showed activity on all 4 virtio-blk MSI-X. The /sys/block/vda/mq/ directory shows that Linux blk-mq has 4 queues configured. An automated test is included in the next commit. Signed-off-by: Stefan Hajnoczi Acked-by: Markus Armbruster Message-id: 20201001144604.559733-2-stefanha@redhat.com [Fixed accidental tab characters as suggested by Markus Armbruster --Stefan] Signed-off-by: Stefan Hajnoczi --- block/export/vhost-user-blk-server.c | 24 ++++++++++++++++++------ qapi/block-export.json | 10 +++++++--- 2 files changed, 25 insertions(+), 9 deletions(-) (limited to 'qapi') diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 286eb5fb9a..41f4933d6e 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -21,7 +21,7 @@ #include "util/block-helpers.h" enum { - VHOST_USER_BLK_MAX_QUEUES = 1, + VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1, }; struct virtio_blk_inhdr { unsigned char status; @@ -242,6 +242,7 @@ static uint64_t vu_blk_get_features(VuDev *dev) 1ull << VIRTIO_BLK_F_DISCARD | 1ull << VIRTIO_BLK_F_WRITE_ZEROES | 1ull << VIRTIO_BLK_F_CONFIG_WCE | + 1ull << VIRTIO_BLK_F_MQ | 1ull << VIRTIO_F_VERSION_1 | 1ull << VIRTIO_RING_F_INDIRECT_DESC | 1ull << VIRTIO_RING_F_EVENT_IDX | @@ -338,7 +339,9 @@ static void blk_aio_detach(void *opaque) static void vu_blk_initialize_config(BlockDriverState *bs, - struct virtio_blk_config *config, uint32_t blk_size) + struct virtio_blk_config *config, + uint32_t blk_size, + uint16_t num_queues) { config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; config->blk_size = blk_size; @@ -346,7 +349,7 @@ vu_blk_initialize_config(BlockDriverState *bs, config->seg_max = 128 - 2; config->min_io_size = 1; config->opt_io_size = 1; - config->num_queues = VHOST_USER_BLK_MAX_QUEUES; + config->num_queues = num_queues; config->max_discard_sectors = 32768; config->max_discard_seg = 1; config->discard_sector_alignment = config->blk_size >> 9; @@ -368,6 +371,7 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk; Error *local_err = NULL; uint64_t logical_block_size; + uint16_t num_queues = VHOST_USER_BLK_NUM_QUEUES_DEFAULT; vexp->writable = opts->writable; vexp->blkcfg.wce = 0; @@ -385,15 +389,23 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, } vexp->blk_size = logical_block_size; blk_set_guest_block_size(exp->blk, logical_block_size); + + if (vu_opts->has_num_queues) { + num_queues = vu_opts->num_queues; + } + if (num_queues == 0) { + error_setg(errp, "num-queues must be greater than 0"); + return -EINVAL; + } + vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg, - logical_block_size); + logical_block_size, num_queues); blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, vexp); if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx, - VHOST_USER_BLK_MAX_QUEUES, &vu_blk_iface, - errp)) { + num_queues, &vu_blk_iface, errp)) { blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, vexp); return -EADDRNOTAVAIL; diff --git a/qapi/block-export.json b/qapi/block-export.json index 8a4ced817f..480c497690 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -93,11 +93,15 @@ # SocketAddress types are supported. Passed fds must be UNIX domain # sockets. # @logical-block-size: Logical block size in bytes. Defaults to 512 bytes. +# @num-queues: Number of request virtqueues. Must be greater than 0. Defaults +# to 1. # # Since: 5.2 ## { 'struct': 'BlockExportOptionsVhostUserBlk', - 'data': { 'addr': 'SocketAddress', '*logical-block-size': 'size' } } + 'data': { 'addr': 'SocketAddress', + '*logical-block-size': 'size', + '*num-queues': 'uint16'} } ## # @NbdServerAddOptions: @@ -233,8 +237,8 @@ { 'union': 'BlockExportOptions', 'base': { 'type': 'BlockExportType', 'id': 'str', - '*fixed-iothread': 'bool', - '*iothread': 'str', + '*fixed-iothread': 'bool', + '*iothread': 'str', 'node-name': 'str', '*writable': 'bool', '*writethrough': 'bool' }, -- cgit v1.2.3-55-g7522