From 8526e39e9974228f937d04e8eae140add8b17b43 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:43 +0100 Subject: block/nvme: Use hex format to display offset in trace events Use the same format used for the hw/vfio/ trace events. Suggested-by: Eric Auger Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-3-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/trace-events | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/trace-events b/block/trace-events index 0e351c3fa3..0955c85c78 100644 --- a/block/trace-events +++ b/block/trace-events @@ -144,13 +144,13 @@ nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d" nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x" nvme_handle_event(void *s) "s %p" nvme_poll_cb(void *s) "s %p" -nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset %"PRId64" bytes %"PRId64" flags %d niov %d" -nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset %"PRId64" bytes %"PRId64" flags %d" +nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" flags %d niov %d" +nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset 0x%"PRIx64" bytes %"PRId64" flags %d" nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x" -nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset %"PRId64" bytes %"PRId64" niov %d is_write %d" -nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset %"PRId64" bytes %"PRId64" ret %d" -nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset %"PRId64" bytes %"PRId64"" -nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset %"PRId64" bytes %"PRId64" ret %d" +nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset 0x%"PRIx64" bytes %"PRId64" niov %d is_write %d" +nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" ret %d" +nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64"" +nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" nvme_dma_map_flush(void *s) "s %p" nvme_free_req_queue_wait(void *q) "q %p" nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" -- cgit v1.2.3-55-g7522 From 58ad6ae0cbad626bd574bc1347f210a35a8316eb Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:44 +0100 Subject: block/nvme: Report warning with warn_report() Instead of displaying warning on stderr, use warn_report() which also displays it on the monitor. Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-4-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index 739a0a700c..6f1d7f9b2a 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -399,8 +399,8 @@ static bool nvme_process_completion(NVMeQueuePair *q) } cid = le16_to_cpu(c->cid); if (cid == 0 || cid > NVME_QUEUE_SIZE) { - fprintf(stderr, "Unexpected CID in completion queue: %" PRIu32 "\n", - cid); + warn_report("NVMe: Unexpected CID in completion queue: %"PRIu32", " + "queue size: %u", cid, NVME_QUEUE_SIZE); continue; } trace_nvme_complete_command(s, q->index, cid); -- cgit v1.2.3-55-g7522 From 15b2260bef333762904535affa7fb30390b3fb98 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:45 +0100 Subject: block/nvme: Trace controller capabilities Controllers have different capabilities and report them in the CAP register. We are particularly interested by the page size limits. Reviewed-by: Stefan Hajnoczi Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-5-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 13 +++++++++++++ block/trace-events | 2 ++ 2 files changed, 15 insertions(+) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index 6f1d7f9b2a..361b5772b7 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -727,6 +727,19 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, * Initialization". */ cap = le64_to_cpu(regs->cap); + trace_nvme_controller_capability_raw(cap); + trace_nvme_controller_capability("Maximum Queue Entries Supported", + 1 + NVME_CAP_MQES(cap)); + trace_nvme_controller_capability("Contiguous Queues Required", + NVME_CAP_CQR(cap)); + trace_nvme_controller_capability("Doorbell Stride", + 2 << (2 + NVME_CAP_DSTRD(cap))); + trace_nvme_controller_capability("Subsystem Reset Supported", + NVME_CAP_NSSRS(cap)); + trace_nvme_controller_capability("Memory Page Size Minimum", + 1 << (12 + NVME_CAP_MPSMIN(cap))); + trace_nvme_controller_capability("Memory Page Size Maximum", + 1 << (12 + NVME_CAP_MPSMAX(cap))); if (!NVME_CAP_CSS(cap)) { error_setg(errp, "Device doesn't support NVMe command set"); ret = -EINVAL; diff --git a/block/trace-events b/block/trace-events index 0955c85c78..b90b07b15f 100644 --- a/block/trace-events +++ b/block/trace-events @@ -134,6 +134,8 @@ qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu" # nvme.c +nvme_controller_capability_raw(uint64_t value) "0x%08"PRIx64 +nvme_controller_capability(const char *desc, uint64_t value) "%s: %"PRIu64 nvme_kick(void *s, int queue) "s %p queue %d" nvme_dma_flush_queue_wait(void *s) "s %p" nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x" -- cgit v1.2.3-55-g7522 From 1c914cd1208fcf7fc61edb043f091b9412642ec8 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:46 +0100 Subject: block/nvme: Trace nvme_poll_queue() per queue As we want to enable multiple queues, report the event in each nvme_poll_queue() call, rather than once in the callback calling nvme_poll_queues(). Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-6-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 2 +- block/trace-events | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index 361b5772b7..8d74401ae7 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -594,6 +594,7 @@ static bool nvme_poll_queue(NVMeQueuePair *q) const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES; NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset]; + trace_nvme_poll_queue(q->s, q->index); /* * Do an early check for completions. q->lock isn't needed because * nvme_process_completion() only runs in the event loop thread and @@ -684,7 +685,6 @@ static bool nvme_poll_cb(void *opaque) BDRVNVMeState *s = container_of(e, BDRVNVMeState, irq_notifier[MSIX_SHARED_IRQ_IDX]); - trace_nvme_poll_cb(s); return nvme_poll_queues(s); } diff --git a/block/trace-events b/block/trace-events index b90b07b15f..86292f3312 100644 --- a/block/trace-events +++ b/block/trace-events @@ -145,7 +145,7 @@ nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d" nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d" nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x" nvme_handle_event(void *s) "s %p" -nvme_poll_cb(void *s) "s %p" +nvme_poll_queue(void *s, unsigned q_index) "s %p q #%u" nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" flags %d niov %d" nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset 0x%"PRIx64" bytes %"PRId64" flags %d" nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x" -- cgit v1.2.3-55-g7522 From 51e98b6d21dbaf0c6adcbf00b76bfbe0a0537c02 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:47 +0100 Subject: block/nvme: Improve nvme_free_req_queue_wait() trace information What we want to trace is the block driver state and the queue index. Suggested-by: Stefan Hajnoczi Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-7-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 2 +- block/trace-events | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index 8d74401ae7..29d2541b91 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -292,7 +292,7 @@ static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q) while (q->free_req_head == -1) { if (qemu_in_coroutine()) { - trace_nvme_free_req_queue_wait(q); + trace_nvme_free_req_queue_wait(q->s, q->index); qemu_co_queue_wait(&q->free_req_queue, &q->lock); } else { qemu_mutex_unlock(&q->lock); diff --git a/block/trace-events b/block/trace-events index 86292f3312..cc5e2b55cb 100644 --- a/block/trace-events +++ b/block/trace-events @@ -154,7 +154,7 @@ nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64"" nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" nvme_dma_map_flush(void *s) "s %p" -nvme_free_req_queue_wait(void *q) "q %p" +nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u" nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64 nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d" -- cgit v1.2.3-55-g7522 From 6e1e9ff2d3709bb89156ea8e42239b6ee96a2a49 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:48 +0100 Subject: block/nvme: Trace queue pair creation/deletion Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-8-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 3 +++ block/trace-events | 2 ++ 2 files changed, 5 insertions(+) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index 29d2541b91..e95d59d312 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -181,6 +181,7 @@ static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, static void nvme_free_queue_pair(NVMeQueuePair *q) { + trace_nvme_free_queue_pair(q->index, q); if (q->completion_bh) { qemu_bh_delete(q->completion_bh); } @@ -216,6 +217,8 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, if (!q) { return NULL; } + trace_nvme_create_queue_pair(idx, q, size, aio_context, + event_notifier_get_fd(s->irq_notifier)); q->prp_list_pages = qemu_try_memalign(s->page_size, s->page_size * NVME_NUM_REQS); if (!q->prp_list_pages) { diff --git a/block/trace-events b/block/trace-events index cc5e2b55cb..f6a0f99df1 100644 --- a/block/trace-events +++ b/block/trace-events @@ -155,6 +155,8 @@ nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" byte nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" nvme_dma_map_flush(void *s) "s %p" nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u" +nvme_create_queue_pair(unsigned q_index, void *q, unsigned size, void *aio_context, int fd) "index %u q %p size %u aioctx %p fd %d" +nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p" nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64 nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d" -- cgit v1.2.3-55-g7522 From 3214b0f0948d5e01ccca62730e60a69e2ded8774 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:49 +0100 Subject: block/nvme: Move definitions before structure declarations To be able to use some definitions in structure declarations, move them earlier. No logical change. Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-9-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index e95d59d312..b0629f5de8 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -41,6 +41,16 @@ typedef struct BDRVNVMeState BDRVNVMeState; +/* Same index is used for queues and IRQs */ +#define INDEX_ADMIN 0 +#define INDEX_IO(n) (1 + n) + +/* This driver shares a single MSIX IRQ for the admin and I/O queues */ +enum { + MSIX_SHARED_IRQ_IDX = 0, + MSIX_IRQ_COUNT = 1 +}; + typedef struct { int32_t head, tail; uint8_t *queue; @@ -81,15 +91,6 @@ typedef struct { QEMUBH *completion_bh; } NVMeQueuePair; -#define INDEX_ADMIN 0 -#define INDEX_IO(n) (1 + n) - -/* This driver shares a single MSIX IRQ for the admin and I/O queues */ -enum { - MSIX_SHARED_IRQ_IDX = 0, - MSIX_IRQ_COUNT = 1 -}; - struct BDRVNVMeState { AioContext *aio_context; QEMUVFIOState *vfio; -- cgit v1.2.3-55-g7522 From 1b539bd6dbe1459f160e25610ec2fc3388f700e8 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:50 +0100 Subject: block/nvme: Use unsigned integer for queue counter/size We can not have negative queue count/size/index, use unsigned type. Rename 'nr_queues' as 'queue_count' to match the spec naming. Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-10-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 38 ++++++++++++++++++-------------------- block/trace-events | 10 +++++----- 2 files changed, 23 insertions(+), 25 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index b0629f5de8..c450499111 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -104,7 +104,7 @@ struct BDRVNVMeState { * [1..]: io queues. */ NVMeQueuePair **queues; - int nr_queues; + unsigned queue_count; size_t page_size; /* How many uint32_t elements does each doorbell entry take. */ size_t doorbell_scale; @@ -161,7 +161,7 @@ static QemuOptsList runtime_opts = { }; static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, - int nentries, int entry_bytes, Error **errp) + unsigned nentries, size_t entry_bytes, Error **errp) { size_t bytes; int r; @@ -206,7 +206,7 @@ static void nvme_free_req_queue_cb(void *opaque) static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, AioContext *aio_context, - int idx, int size, + unsigned idx, size_t size, Error **errp) { int i, r; @@ -623,7 +623,7 @@ static bool nvme_poll_queues(BDRVNVMeState *s) bool progress = false; int i; - for (i = 0; i < s->nr_queues; i++) { + for (i = 0; i < s->queue_count; i++) { if (nvme_poll_queue(s->queues[i])) { progress = true; } @@ -644,10 +644,10 @@ static void nvme_handle_event(EventNotifier *n) static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) { BDRVNVMeState *s = bs->opaque; - int n = s->nr_queues; + unsigned n = s->queue_count; NVMeQueuePair *q; NvmeCmd cmd; - int queue_size = NVME_QUEUE_SIZE; + unsigned queue_size = NVME_QUEUE_SIZE; q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs), n, queue_size, errp); @@ -661,7 +661,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) .cdw11 = cpu_to_le32(0x3), }; if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { - error_setg(errp, "Failed to create CQ io queue [%d]", n); + error_setg(errp, "Failed to create CQ io queue [%u]", n); goto out_error; } cmd = (NvmeCmd) { @@ -671,12 +671,12 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) .cdw11 = cpu_to_le32(0x1 | (n << 16)), }; if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { - error_setg(errp, "Failed to create SQ io queue [%d]", n); + error_setg(errp, "Failed to create SQ io queue [%u]", n); goto out_error; } s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1); s->queues[n] = q; - s->nr_queues++; + s->queue_count++; return true; out_error: nvme_free_queue_pair(q); @@ -785,7 +785,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, ret = -EINVAL; goto out; } - s->nr_queues = 1; + s->queue_count = 1; QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000); regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) | (NVME_QUEUE_SIZE << AQA_ASQS_SHIFT)); @@ -895,10 +895,9 @@ static int nvme_enable_disable_write_cache(BlockDriverState *bs, bool enable, static void nvme_close(BlockDriverState *bs) { - int i; BDRVNVMeState *s = bs->opaque; - for (i = 0; i < s->nr_queues; ++i) { + for (unsigned i = 0; i < s->queue_count; ++i) { nvme_free_queue_pair(s->queues[i]); } g_free(s->queues); @@ -1123,7 +1122,7 @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs, }; trace_nvme_prw_aligned(s, is_write, offset, bytes, flags, qiov->niov); - assert(s->nr_queues > 1); + assert(s->queue_count > 1); req = nvme_get_free_req(ioq); assert(req); @@ -1233,7 +1232,7 @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs) .ret = -EINPROGRESS, }; - assert(s->nr_queues > 1); + assert(s->queue_count > 1); req = nvme_get_free_req(ioq); assert(req); nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); @@ -1285,7 +1284,7 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs, cmd.cdw12 = cpu_to_le32(cdw12); trace_nvme_write_zeroes(s, offset, bytes, flags); - assert(s->nr_queues > 1); + assert(s->queue_count > 1); req = nvme_get_free_req(ioq); assert(req); @@ -1328,7 +1327,7 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, return -ENOTSUP; } - assert(s->nr_queues > 1); + assert(s->queue_count > 1); buf = qemu_try_memalign(s->page_size, s->page_size); if (!buf) { @@ -1408,7 +1407,7 @@ static void nvme_detach_aio_context(BlockDriverState *bs) { BDRVNVMeState *s = bs->opaque; - for (int i = 0; i < s->nr_queues; i++) { + for (unsigned i = 0; i < s->queue_count; i++) { NVMeQueuePair *q = s->queues[i]; qemu_bh_delete(q->completion_bh); @@ -1429,7 +1428,7 @@ static void nvme_attach_aio_context(BlockDriverState *bs, aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX], false, nvme_handle_event, nvme_poll_cb); - for (int i = 0; i < s->nr_queues; i++) { + for (unsigned i = 0; i < s->queue_count; i++) { NVMeQueuePair *q = s->queues[i]; q->completion_bh = @@ -1446,11 +1445,10 @@ static void nvme_aio_plug(BlockDriverState *bs) static void nvme_aio_unplug(BlockDriverState *bs) { - int i; BDRVNVMeState *s = bs->opaque; assert(s->plugged); s->plugged = false; - for (i = INDEX_IO(0); i < s->nr_queues; i++) { + for (unsigned i = INDEX_IO(0); i < s->queue_count; i++) { NVMeQueuePair *q = s->queues[i]; qemu_mutex_lock(&q->lock); nvme_kick(q); diff --git a/block/trace-events b/block/trace-events index f6a0f99df1..8368f4acb0 100644 --- a/block/trace-events +++ b/block/trace-events @@ -136,13 +136,13 @@ qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s # nvme.c nvme_controller_capability_raw(uint64_t value) "0x%08"PRIx64 nvme_controller_capability(const char *desc, uint64_t value) "%s: %"PRIu64 -nvme_kick(void *s, int queue) "s %p queue %d" +nvme_kick(void *s, unsigned q_index) "s %p q #%u" nvme_dma_flush_queue_wait(void *s) "s %p" nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x" -nvme_process_completion(void *s, int index, int inflight) "s %p queue %d inflight %d" -nvme_process_completion_queue_plugged(void *s, int index) "s %p queue %d" -nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d" -nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d" +nvme_process_completion(void *s, unsigned q_index, int inflight) "s %p q #%u inflight %d" +nvme_process_completion_queue_plugged(void *s, unsigned q_index) "s %p q #%u" +nvme_complete_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d" +nvme_submit_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d" nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x" nvme_handle_event(void *s) "s %p" nvme_poll_queue(void *s, unsigned q_index) "s %p q #%u" -- cgit v1.2.3-55-g7522 From 7a5f00dde39c0120b1653f189de9bbd551a6b1d8 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:51 +0100 Subject: block/nvme: Make nvme_identify() return boolean indicating error Just for consistency, following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), return a boolean value indicating an error is set or not. Directly pass errp as the local_err is not requested in our case. Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Message-id: 20201029093306.1063879-11-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index c450499111..9833501245 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -506,9 +506,11 @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q, return ret; } -static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) +/* Returns true on success, false on failure. */ +static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) { BDRVNVMeState *s = bs->opaque; + bool ret = false; union { NvmeIdCtrl ctrl; NvmeIdNs ns; @@ -585,10 +587,13 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) goto out; } + ret = true; s->blkshift = lbaf->ds; out: qemu_vfio_dma_unmap(s->vfio, id); qemu_vfree(id); + + return ret; } static bool nvme_poll_queue(NVMeQueuePair *q) @@ -701,7 +706,6 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, uint64_t cap; uint64_t timeout_ms; uint64_t deadline, now; - Error *local_err = NULL; volatile NvmeBar *regs = NULL; qemu_co_mutex_init(&s->dma_map_lock); @@ -818,9 +822,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, &s->irq_notifier[MSIX_SHARED_IRQ_IDX], false, nvme_handle_event, nvme_poll_cb); - nvme_identify(bs, namespace, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!nvme_identify(bs, namespace, errp)) { ret = -EIO; goto out; } -- cgit v1.2.3-55-g7522 From dfa9c6c65666d431b20f6ae3f18ee1e729f80f19 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:52 +0100 Subject: block/nvme: Make nvme_init_queue() return boolean indicating error Just for consistency, following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), return a boolean value indicating an error is set or not. Directly pass errp as the local_err is not requested in our case. This simplifies a bit nvme_create_queue_pair(). Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-12-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index 9833501245..6eaba4e703 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -160,7 +160,8 @@ static QemuOptsList runtime_opts = { }, }; -static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, +/* Returns true on success, false on failure. */ +static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, unsigned nentries, size_t entry_bytes, Error **errp) { size_t bytes; @@ -171,13 +172,15 @@ static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, q->queue = qemu_try_memalign(s->page_size, bytes); if (!q->queue) { error_setg(errp, "Cannot allocate queue"); - return; + return false; } memset(q->queue, 0, bytes); r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova); if (r) { error_setg(errp, "Cannot map queue"); + return false; } + return true; } static void nvme_free_queue_pair(NVMeQueuePair *q) @@ -210,7 +213,6 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, Error **errp) { int i, r; - Error *local_err = NULL; NVMeQueuePair *q; uint64_t prp_list_iova; @@ -247,16 +249,12 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, req->prp_list_iova = prp_list_iova + i * s->page_size; } - nvme_init_queue(s, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!nvme_init_queue(s, &q->sq, size, NVME_SQ_ENTRY_BYTES, errp)) { goto fail; } q->sq.doorbell = &s->doorbells[idx * s->doorbell_scale].sq_tail; - nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, errp)) { goto fail; } q->cq.doorbell = &s->doorbells[idx * s->doorbell_scale].cq_head; -- cgit v1.2.3-55-g7522 From 76a24781cc82d99251f933c399d78af52e1d9b0e Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:54 +0100 Subject: block/nvme: Use definitions instead of magic values in add_io_queue() Replace magic values by definitions, and simplifiy since the number of queues will never reach 64K. Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-14-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index 6eaba4e703..7285bd2e27 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -652,6 +652,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) NvmeCmd cmd; unsigned queue_size = NVME_QUEUE_SIZE; + assert(n <= UINT16_MAX); q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs), n, queue_size, errp); if (!q) { @@ -660,8 +661,8 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) cmd = (NvmeCmd) { .opcode = NVME_ADM_CMD_CREATE_CQ, .dptr.prp1 = cpu_to_le64(q->cq.iova), - .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), - .cdw11 = cpu_to_le32(0x3), + .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n), + .cdw11 = cpu_to_le32(NVME_CQ_IEN | NVME_CQ_PC), }; if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { error_setg(errp, "Failed to create CQ io queue [%u]", n); @@ -670,8 +671,8 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) cmd = (NvmeCmd) { .opcode = NVME_ADM_CMD_CREATE_SQ, .dptr.prp1 = cpu_to_le64(q->sq.iova), - .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), - .cdw11 = cpu_to_le32(0x1 | (n << 16)), + .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n), + .cdw11 = cpu_to_le32(NVME_SQ_PC | (n << 16)), }; if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { error_setg(errp, "Failed to create SQ io queue [%u]", n); -- cgit v1.2.3-55-g7522 From 3c363c073e024fae7b09cad916e63dae89ed46cc Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:55 +0100 Subject: block/nvme: Correctly initialize Admin Queue Attributes From the specification chapter 3.1.8 "AQA - Admin Queue Attributes" the Admin Submission Queue Size field is a 0’s based value: Admin Submission Queue Size (ASQS): Defines the size of the Admin Submission Queue in entries. Enabling a controller while this field is cleared to 00h produces undefined results. The minimum size of the Admin Submission Queue is two entries. The maximum size of the Admin Submission Queue is 4096 entries. This is a 0’s based value. This bug has never been hit because the device initialization uses a single command synchronously :) Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-15-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index 7285bd2e27..0902aa5542 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -789,9 +789,9 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, goto out; } s->queue_count = 1; - QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000); - regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) | - (NVME_QUEUE_SIZE << AQA_ASQS_SHIFT)); + QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000); + regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | + ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova); regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova); -- cgit v1.2.3-55-g7522 From 52b75ea8ec4f6cd8028bd8406ebcf2e80ffc6e05 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:56 +0100 Subject: block/nvme: Simplify ADMIN queue access We don't need to dereference from BDRVNVMeState each time. Use a NVMeQueuePair pointer on the admin queue. The nvme_init() becomes easier to review, matching the style of nvme_add_io_queue(). Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-16-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index 0902aa5542..eed12f4933 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -700,6 +700,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, Error **errp) { BDRVNVMeState *s = bs->opaque; + NVMeQueuePair *q; AioContext *aio_context = bdrv_get_aio_context(bs); int ret; uint64_t cap; @@ -781,19 +782,18 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, /* Set up admin queue. */ s->queues = g_new(NVMeQueuePair *, 1); - s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0, - NVME_QUEUE_SIZE, - errp); - if (!s->queues[INDEX_ADMIN]) { + q = nvme_create_queue_pair(s, aio_context, 0, NVME_QUEUE_SIZE, errp); + if (!q) { ret = -EINVAL; goto out; } + s->queues[INDEX_ADMIN] = q; s->queue_count = 1; QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000); regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); - regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova); - regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova); + regs->asq = cpu_to_le64(q->sq.iova); + regs->acq = cpu_to_le64(q->cq.iova); /* After setting up all control registers we can enable device now. */ regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | -- cgit v1.2.3-55-g7522 From 08d5406798f7729de8344ef0ed16d8bad8cdfc84 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:57 +0100 Subject: block/nvme: Simplify nvme_cmd_sync() As all commands use the ADMIN queue, it is pointless to pass it as argument each time. Remove the argument, and rename the function as nvme_admin_cmd_sync() to make this new behavior clearer. Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Message-id: 20201029093306.1063879-17-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index eed12f4933..cd875555ca 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -481,16 +481,17 @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req, qemu_mutex_unlock(&q->lock); } -static void nvme_cmd_sync_cb(void *opaque, int ret) +static void nvme_admin_cmd_sync_cb(void *opaque, int ret) { int *pret = opaque; *pret = ret; aio_wait_kick(); } -static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q, - NvmeCmd *cmd) +static int nvme_admin_cmd_sync(BlockDriverState *bs, NvmeCmd *cmd) { + BDRVNVMeState *s = bs->opaque; + NVMeQueuePair *q = s->queues[INDEX_ADMIN]; AioContext *aio_context = bdrv_get_aio_context(bs); NVMeRequest *req; int ret = -EINPROGRESS; @@ -498,7 +499,7 @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q, if (!req) { return -EBUSY; } - nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret); + nvme_submit_command(q, req, cmd, nvme_admin_cmd_sync_cb, &ret); AIO_WAIT_WHILE(aio_context, ret == -EINPROGRESS); return ret; @@ -535,7 +536,7 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) memset(id, 0, sizeof(*id)); cmd.dptr.prp1 = cpu_to_le64(iova); - if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { + if (nvme_admin_cmd_sync(bs, &cmd)) { error_setg(errp, "Failed to identify controller"); goto out; } @@ -558,7 +559,7 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) memset(id, 0, sizeof(*id)); cmd.cdw10 = 0; cmd.nsid = cpu_to_le32(namespace); - if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { + if (nvme_admin_cmd_sync(bs, &cmd)) { error_setg(errp, "Failed to identify namespace"); goto out; } @@ -664,7 +665,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n), .cdw11 = cpu_to_le32(NVME_CQ_IEN | NVME_CQ_PC), }; - if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { + if (nvme_admin_cmd_sync(bs, &cmd)) { error_setg(errp, "Failed to create CQ io queue [%u]", n); goto out_error; } @@ -674,7 +675,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n), .cdw11 = cpu_to_le32(NVME_SQ_PC | (n << 16)), }; - if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { + if (nvme_admin_cmd_sync(bs, &cmd)) { error_setg(errp, "Failed to create SQ io queue [%u]", n); goto out_error; } @@ -887,7 +888,7 @@ static int nvme_enable_disable_write_cache(BlockDriverState *bs, bool enable, .cdw11 = cpu_to_le32(enable ? 0x01 : 0x00), }; - ret = nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd); + ret = nvme_admin_cmd_sync(bs, &cmd); if (ret) { error_setg(errp, "Failed to configure NVMe write cache"); } -- cgit v1.2.3-55-g7522 From c8228ac35541564c46bf1c91ebe7951f367e2ce6 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:58 +0100 Subject: block/nvme: Set request_alignment at initialization Commit bdd6a90a9e5 ("block: Add VFIO based NVMe driver") sets the request_alignment in nvme_refresh_limits(). For consistency, also set it during initialization. Reported-by: Stefan Hajnoczi Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-18-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 1 + 1 file changed, 1 insertion(+) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index cd875555ca..bb75448a09 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -758,6 +758,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap)); s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t); bs->bl.opt_mem_alignment = s->page_size; + bs->bl.request_alignment = s->page_size; timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000); /* Reset device to get a clean state. */ -- cgit v1.2.3-55-g7522 From a652a3ec697d5b4cfce44599d3540aae535b9545 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:32:59 +0100 Subject: block/nvme: Correct minimum device page size While trying to simplify the code using a macro, we forgot the 12-bit shift... Correct that. Fixes: fad1eb68862 ("block/nvme: Use register definitions from 'block/nvme.h'") Reported-by: Eric Auger Reviewed-by: Stefan Hajnoczi Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-19-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index bb75448a09..bd3860ac4e 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -755,7 +755,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, goto out; } - s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap)); + s->page_size = 1u << (12 + NVME_CAP_MPSMIN(cap)); s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t); bs->bl.opt_mem_alignment = s->page_size; bs->bl.request_alignment = s->page_size; -- cgit v1.2.3-55-g7522 From 0aecd06049ced6c0f863e99510b7a7cbff54aa56 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 29 Oct 2020 10:33:00 +0100 Subject: block/nvme: Change size and alignment of IDENTIFY response buffer In preparation of 64kB host page support, let's change the size and alignment of the IDENTIFY command response buffer so that the VFIO DMA MAP succeeds. We align on the host page size. Signed-off-by: Eric Auger Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-20-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index bd3860ac4e..7628623c05 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -522,19 +522,20 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) .opcode = NVME_ADM_CMD_IDENTIFY, .cdw10 = cpu_to_le32(0x1), }; + size_t id_size = QEMU_ALIGN_UP(sizeof(*id), qemu_real_host_page_size); - id = qemu_try_memalign(s->page_size, sizeof(*id)); + id = qemu_try_memalign(qemu_real_host_page_size, id_size); if (!id) { error_setg(errp, "Cannot allocate buffer for identify response"); goto out; } - r = qemu_vfio_dma_map(s->vfio, id, sizeof(*id), true, &iova); + r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova); if (r) { error_setg(errp, "Cannot map buffer for DMA"); goto out; } - memset(id, 0, sizeof(*id)); + memset(id, 0, id_size); cmd.dptr.prp1 = cpu_to_le64(iova); if (nvme_admin_cmd_sync(bs, &cmd)) { error_setg(errp, "Failed to identify controller"); @@ -556,7 +557,7 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROES); s->supports_discard = !!(oncs & NVME_ONCS_DSM); - memset(id, 0, sizeof(*id)); + memset(id, 0, id_size); cmd.cdw10 = 0; cmd.nsid = cpu_to_le32(namespace); if (nvme_admin_cmd_sync(bs, &cmd)) { -- cgit v1.2.3-55-g7522 From 2387aaced7209872238eaf594997009cffd5501d Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 29 Oct 2020 10:33:01 +0100 Subject: block/nvme: Change size and alignment of queue In preparation of 64kB host page support, let's change the size and alignment of the queue so that the VFIO DMA MAP succeeds. We align on the host page size. Signed-off-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-21-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index 7628623c05..4a8589d2d2 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -167,9 +167,9 @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, size_t bytes; int r; - bytes = ROUND_UP(nentries * entry_bytes, s->page_size); + bytes = ROUND_UP(nentries * entry_bytes, qemu_real_host_page_size); q->head = q->tail = 0; - q->queue = qemu_try_memalign(s->page_size, bytes); + q->queue = qemu_try_memalign(qemu_real_host_page_size, bytes); if (!q->queue) { error_setg(errp, "Cannot allocate queue"); return false; -- cgit v1.2.3-55-g7522 From f8fd3ebac358c187d0aba7f922450ed6addf41a8 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 29 Oct 2020 10:33:02 +0100 Subject: block/nvme: Change size and alignment of prp_list_pages In preparation of 64kB host page support, let's change the size and alignment of the prp_list_pages so that the VFIO DMA MAP succeeds with 64kB host page size. We align on the host page size. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-22-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index 4a8589d2d2..e807dd56df 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -215,6 +215,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, int i, r; NVMeQueuePair *q; uint64_t prp_list_iova; + size_t bytes; q = g_try_new0(NVMeQueuePair, 1); if (!q) { @@ -222,19 +223,19 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, } trace_nvme_create_queue_pair(idx, q, size, aio_context, event_notifier_get_fd(s->irq_notifier)); - q->prp_list_pages = qemu_try_memalign(s->page_size, - s->page_size * NVME_NUM_REQS); + bytes = QEMU_ALIGN_UP(s->page_size * NVME_NUM_REQS, + qemu_real_host_page_size); + q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes); if (!q->prp_list_pages) { goto fail; } - memset(q->prp_list_pages, 0, s->page_size * NVME_NUM_REQS); + memset(q->prp_list_pages, 0, bytes); qemu_mutex_init(&q->lock); q->s = s; q->index = idx; qemu_co_queue_init(&q->free_req_queue); q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q); - r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, - s->page_size * NVME_NUM_REQS, + r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes, false, &prp_list_iova); if (r) { goto fail; -- cgit v1.2.3-55-g7522 From 9e13d598843cca1cdab7b7bdcb9cc0868ebf7fed Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 29 Oct 2020 10:33:03 +0100 Subject: block/nvme: Align iov's va and size on host page size Make sure iov's va and size are properly aligned on the host page size. Signed-off-by: Eric Auger Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-23-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index e807dd56df..f1e2fd34cd 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -1015,11 +1015,12 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd, for (i = 0; i < qiov->niov; ++i) { bool retry = true; uint64_t iova; + size_t len = QEMU_ALIGN_UP(qiov->iov[i].iov_len, + qemu_real_host_page_size); try_map: r = qemu_vfio_dma_map(s->vfio, qiov->iov[i].iov_base, - qiov->iov[i].iov_len, - true, &iova); + len, true, &iova); if (r == -ENOMEM && retry) { retry = false; trace_nvme_dma_flush_queue_wait(s); @@ -1163,8 +1164,9 @@ static inline bool nvme_qiov_aligned(BlockDriverState *bs, BDRVNVMeState *s = bs->opaque; for (i = 0; i < qiov->niov; ++i) { - if (!QEMU_PTR_IS_ALIGNED(qiov->iov[i].iov_base, s->page_size) || - !QEMU_IS_ALIGNED(qiov->iov[i].iov_len, s->page_size)) { + if (!QEMU_PTR_IS_ALIGNED(qiov->iov[i].iov_base, + qemu_real_host_page_size) || + !QEMU_IS_ALIGNED(qiov->iov[i].iov_len, qemu_real_host_page_size)) { trace_nvme_qiov_unaligned(qiov, i, qiov->iov[i].iov_base, qiov->iov[i].iov_len, s->page_size); return false; @@ -1180,7 +1182,7 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, int r; uint8_t *buf = NULL; QEMUIOVector local_qiov; - + size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size); assert(QEMU_IS_ALIGNED(offset, s->page_size)); assert(QEMU_IS_ALIGNED(bytes, s->page_size)); assert(bytes <= s->max_transfer); @@ -1190,7 +1192,7 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, } s->stats.unaligned_accesses++; trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write); - buf = qemu_try_memalign(s->page_size, bytes); + buf = qemu_try_memalign(qemu_real_host_page_size, len); if (!buf) { return -ENOMEM; -- cgit v1.2.3-55-g7522 From 4b19e9b8159b5eecf8e90c81c56c1a13547e411a Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:33:04 +0100 Subject: block/nvme: Fix use of write-only doorbells page on Aarch64 arch qemu_vfio_pci_map_bar() calls mmap(), and mmap(2) states: 'offset' must be a multiple of the page size as returned by sysconf(_SC_PAGE_SIZE). In commit f68453237b9 we started to use an offset of 4K which broke this contract on Aarch64 arch. Fix by mapping at offset 0, and and accessing doorbells at offset=4K. Fixes: f68453237b9 ("block/nvme: Map doorbells pages write-only") Reported-by: Eric Auger Reviewed-by: Eric Auger Reviewed-by: Stefan Hajnoczi Tested-by: Eric Auger Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201029093306.1063879-24-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index f1e2fd34cd..c8ef69cbb2 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -94,6 +94,7 @@ typedef struct { struct BDRVNVMeState { AioContext *aio_context; QEMUVFIOState *vfio; + void *bar0_wo_map; /* Memory mapped registers */ volatile struct { uint32_t sq_tail; @@ -777,8 +778,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, } } - s->doorbells = qemu_vfio_pci_map_bar(s->vfio, 0, sizeof(NvmeBar), - NVME_DOORBELL_SIZE, PROT_WRITE, errp); + s->bar0_wo_map = qemu_vfio_pci_map_bar(s->vfio, 0, 0, + sizeof(NvmeBar) + NVME_DOORBELL_SIZE, + PROT_WRITE, errp); + s->doorbells = (void *)((uintptr_t)s->bar0_wo_map + sizeof(NvmeBar)); if (!s->doorbells) { ret = -EINVAL; goto out; @@ -910,8 +913,8 @@ static void nvme_close(BlockDriverState *bs) &s->irq_notifier[MSIX_SHARED_IRQ_IDX], false, NULL, NULL); event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]); - qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells, - sizeof(NvmeBar), NVME_DOORBELL_SIZE); + qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map, + 0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE); qemu_vfio_close(s->vfio); g_free(s->device); -- cgit v1.2.3-55-g7522 From a0546a7b6f14a919b67cea1d1fc4362ac2901868 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Thu, 29 Oct 2020 10:33:05 +0100 Subject: block/nvme: Fix nvme_submit_command() on big-endian host The Completion Queue Command Identifier is a 16-bit value, so nvme_submit_command() is unlikely to work on big-endian hosts, as the relevant bits are truncated. Fix by using the correct byte-swap function. Fixes: bdd6a90a9e5 ("block: Add VFIO based NVMe driver") Reported-by: Keith Busch Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Message-id: 20201029093306.1063879-25-philmd@redhat.com Signed-off-by: Stefan Hajnoczi Tested-by: Eric Auger --- block/nvme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/nvme.c b/block/nvme.c index c8ef69cbb2..a06a188d53 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -469,7 +469,7 @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req, assert(!req->cb); req->cb = cb; req->opaque = opaque; - cmd->cid = cpu_to_le32(req->cid); + cmd->cid = cpu_to_le16(req->cid); trace_nvme_submit_command(q->s, q->index, req->cid); nvme_trace_command(cmd); -- cgit v1.2.3-55-g7522