From 433c71e494ec66a7455b8ef2e6b2b42118426e50 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 10 Nov 2022 07:59:40 +0100 Subject: hw/nvme: fix aio cancel in format There are several bugs in the async cancel code for the Format command. Firstly, cancelling a format operation neglects to set iocb->ret as well as clearing the iocb->aiocb after cancelling the underlying aiocb which causes the aio callback to ignore the cancellation. Trivial fix. Secondly, and worse, because the request is queued up for posting to the CQ in a bottom half, if the cancellation is due to the submission queue being deleted (which calls blk_aio_cancel), the req structure is deallocated in nvme_del_sq prior to the bottom half being schedulued. Fix this by simply removing the bottom half, there is no reason to defer it anyway. Fixes: 3bcf26d3d619 ("hw/nvme: reimplement format nvm to allow cancellation") Reported-by: Jonathan Derrick Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index ac3885ce50..9bc56075f6 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -5741,7 +5741,6 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) typedef struct NvmeFormatAIOCB { BlockAIOCB common; BlockAIOCB *aiocb; - QEMUBH *bh; NvmeRequest *req; int ret; @@ -5756,14 +5755,15 @@ typedef struct NvmeFormatAIOCB { uint8_t pil; } NvmeFormatAIOCB; -static void nvme_format_bh(void *opaque); - static void nvme_format_cancel(BlockAIOCB *aiocb) { NvmeFormatAIOCB *iocb = container_of(aiocb, NvmeFormatAIOCB, common); + iocb->ret = -ECANCELED; + if (iocb->aiocb) { blk_aio_cancel_async(iocb->aiocb); + iocb->aiocb = NULL; } } @@ -5787,13 +5787,17 @@ static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset, nvme_ns_init_format(ns); } +static void nvme_do_format(NvmeFormatAIOCB *iocb); + static void nvme_format_ns_cb(void *opaque, int ret) { NvmeFormatAIOCB *iocb = opaque; NvmeNamespace *ns = iocb->ns; int bytes; - if (ret < 0) { + if (iocb->ret < 0) { + goto done; + } else if (ret < 0) { iocb->ret = ret; goto done; } @@ -5817,8 +5821,7 @@ static void nvme_format_ns_cb(void *opaque, int ret) iocb->offset = 0; done: - iocb->aiocb = NULL; - qemu_bh_schedule(iocb->bh); + nvme_do_format(iocb); } static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi) @@ -5842,9 +5845,8 @@ static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi) return NVME_SUCCESS; } -static void nvme_format_bh(void *opaque) +static void nvme_do_format(NvmeFormatAIOCB *iocb) { - NvmeFormatAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeCtrl *n = nvme_ctrl(req); uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); @@ -5882,11 +5884,7 @@ static void nvme_format_bh(void *opaque) return; done: - qemu_bh_delete(iocb->bh); - iocb->bh = NULL; - iocb->common.cb(iocb->common.opaque, iocb->ret); - qemu_aio_unref(iocb); } @@ -5905,7 +5903,6 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req); iocb->req = req; - iocb->bh = qemu_bh_new(nvme_format_bh, iocb); iocb->ret = 0; iocb->ns = NULL; iocb->nsid = 0; @@ -5934,14 +5931,13 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) } req->aiocb = &iocb->common; - qemu_bh_schedule(iocb->bh); + nvme_do_format(iocb); return NVME_NO_COMPLETE; out: - qemu_bh_delete(iocb->bh); - iocb->bh = NULL; qemu_aio_unref(iocb); + return status; } -- cgit v1.2.3-55-g7522 From 3dbc1708ea37d03dd18ce498039e31d8565e673a Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 10 Nov 2022 07:59:44 +0100 Subject: hw/nvme: fix aio cancel in flush Make sure that iocb->aiocb is NULL'ed when cancelling. Fix a potential use-after-free by removing the bottom half and enqueuing the completion directly. Fixes: 38f4ac65ac88 ("hw/nvme: reimplement flush to allow cancellation") Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 9bc56075f6..fede5af6af 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -3160,7 +3160,6 @@ typedef struct NvmeFlushAIOCB { BlockAIOCB common; BlockAIOCB *aiocb; NvmeRequest *req; - QEMUBH *bh; int ret; NvmeNamespace *ns; @@ -3176,6 +3175,7 @@ static void nvme_flush_cancel(BlockAIOCB *acb) if (iocb->aiocb) { blk_aio_cancel_async(iocb->aiocb); + iocb->aiocb = NULL; } } @@ -3185,6 +3185,8 @@ static const AIOCBInfo nvme_flush_aiocb_info = { .get_aio_context = nvme_get_aio_context, }; +static void nvme_do_flush(NvmeFlushAIOCB *iocb); + static void nvme_flush_ns_cb(void *opaque, int ret) { NvmeFlushAIOCB *iocb = opaque; @@ -3206,13 +3208,11 @@ static void nvme_flush_ns_cb(void *opaque, int ret) } out: - iocb->aiocb = NULL; - qemu_bh_schedule(iocb->bh); + nvme_do_flush(iocb); } -static void nvme_flush_bh(void *opaque) +static void nvme_do_flush(NvmeFlushAIOCB *iocb) { - NvmeFlushAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeCtrl *n = nvme_ctrl(req); int i; @@ -3239,14 +3239,8 @@ static void nvme_flush_bh(void *opaque) return; done: - qemu_bh_delete(iocb->bh); - iocb->bh = NULL; - iocb->common.cb(iocb->common.opaque, iocb->ret); - qemu_aio_unref(iocb); - - return; } static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) @@ -3258,7 +3252,6 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) iocb = qemu_aio_get(&nvme_flush_aiocb_info, NULL, nvme_misc_cb, req); iocb->req = req; - iocb->bh = qemu_bh_new(nvme_flush_bh, iocb); iocb->ret = 0; iocb->ns = NULL; iocb->nsid = 0; @@ -3280,13 +3273,11 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) } req->aiocb = &iocb->common; - qemu_bh_schedule(iocb->bh); + nvme_do_flush(iocb); return NVME_NO_COMPLETE; out: - qemu_bh_delete(iocb->bh); - iocb->bh = NULL; qemu_aio_unref(iocb); return status; -- cgit v1.2.3-55-g7522 From 36a251c3468f34a2486dd49836e397534a1bb189 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 10 Nov 2022 07:59:47 +0100 Subject: hw/nvme: fix aio cancel in zone reset If the zone reset operation is cancelled but the block unmap operation completes normally, the callback will continue resetting the next zone since it neglects to check iocb->ret which will have been set to -ECANCELED. Make sure that this is checked and bail out if an error is present. Secondly, fix a potential use-after-free by removing the bottom half and enqueuing the completion directly. Fixes: 63d96e4ffd71 ("hw/nvme: reimplement zone reset to allow cancellation") Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index fede5af6af..bf4abf73f7 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -3712,7 +3712,6 @@ typedef struct NvmeZoneResetAIOCB { BlockAIOCB common; BlockAIOCB *aiocb; NvmeRequest *req; - QEMUBH *bh; int ret; bool all; @@ -3741,17 +3740,6 @@ static const AIOCBInfo nvme_zone_reset_aiocb_info = { .cancel_async = nvme_zone_reset_cancel, }; -static void nvme_zone_reset_bh(void *opaque) -{ - NvmeZoneResetAIOCB *iocb = opaque; - - iocb->common.cb(iocb->common.opaque, iocb->ret); - - qemu_bh_delete(iocb->bh); - iocb->bh = NULL; - qemu_aio_unref(iocb); -} - static void nvme_zone_reset_cb(void *opaque, int ret); static void nvme_zone_reset_epilogue_cb(void *opaque, int ret) @@ -3762,14 +3750,8 @@ static void nvme_zone_reset_epilogue_cb(void *opaque, int ret) int64_t moff; int count; - if (ret < 0) { - nvme_zone_reset_cb(iocb, ret); - return; - } - - if (!ns->lbaf.ms) { - nvme_zone_reset_cb(iocb, 0); - return; + if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) { + goto out; } moff = nvme_moff(ns, iocb->zone->d.zslba); @@ -3779,6 +3761,9 @@ static void nvme_zone_reset_epilogue_cb(void *opaque, int ret) BDRV_REQ_MAY_UNMAP, nvme_zone_reset_cb, iocb); return; + +out: + nvme_zone_reset_cb(iocb, ret); } static void nvme_zone_reset_cb(void *opaque, int ret) @@ -3787,7 +3772,9 @@ static void nvme_zone_reset_cb(void *opaque, int ret) NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - if (ret < 0) { + if (iocb->ret < 0) { + goto done; + } else if (ret < 0) { iocb->ret = ret; goto done; } @@ -3835,9 +3822,9 @@ static void nvme_zone_reset_cb(void *opaque, int ret) done: iocb->aiocb = NULL; - if (iocb->bh) { - qemu_bh_schedule(iocb->bh); - } + + iocb->common.cb(iocb->common.opaque, iocb->ret); + qemu_aio_unref(iocb); } static uint16_t nvme_zone_mgmt_send_zrwa_flush(NvmeCtrl *n, NvmeZone *zone, @@ -3942,7 +3929,6 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) nvme_misc_cb, req); iocb->req = req; - iocb->bh = qemu_bh_new(nvme_zone_reset_bh, iocb); iocb->ret = 0; iocb->all = all; iocb->idx = zone_idx; -- cgit v1.2.3-55-g7522 From 818b9b8f5efb728793b9a2c124adab371d2c16e5 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 10 Nov 2022 07:59:50 +0100 Subject: hw/nvme: fix aio cancel in dsm When the DSM operation is cancelled asynchronously, we set iocb->ret to -ECANCELED. However, the callback function only checks the return value of the completed aio, which may have completed succesfully prior to the cancellation and thus the callback ends up continuing the dsm operation instead of bailing out. Fix this. Secondly, fix a potential use-after-free by removing the bottom half and enqueuing the completion directly. Fixes: d7d1474fd85d ("hw/nvme: reimplement dsm to allow cancellation") Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index bf4abf73f7..e847b89461 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -2329,7 +2329,6 @@ typedef struct NvmeDSMAIOCB { BlockAIOCB common; BlockAIOCB *aiocb; NvmeRequest *req; - QEMUBH *bh; int ret; NvmeDsmRange *range; @@ -2351,7 +2350,7 @@ static void nvme_dsm_cancel(BlockAIOCB *aiocb) } else { /* * We only reach this if nvme_dsm_cancel() has already been called or - * the command ran to completion and nvme_dsm_bh is scheduled to run. + * the command ran to completion. */ assert(iocb->idx == iocb->nr); } @@ -2362,17 +2361,6 @@ static const AIOCBInfo nvme_dsm_aiocb_info = { .cancel_async = nvme_dsm_cancel, }; -static void nvme_dsm_bh(void *opaque) -{ - NvmeDSMAIOCB *iocb = opaque; - - iocb->common.cb(iocb->common.opaque, iocb->ret); - - qemu_bh_delete(iocb->bh); - iocb->bh = NULL; - qemu_aio_unref(iocb); -} - static void nvme_dsm_cb(void *opaque, int ret); static void nvme_dsm_md_cb(void *opaque, int ret) @@ -2384,16 +2372,10 @@ static void nvme_dsm_md_cb(void *opaque, int ret) uint64_t slba; uint32_t nlb; - if (ret < 0) { - iocb->ret = ret; + if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) { goto done; } - if (!ns->lbaf.ms) { - nvme_dsm_cb(iocb, 0); - return; - } - range = &iocb->range[iocb->idx - 1]; slba = le64_to_cpu(range->slba); nlb = le32_to_cpu(range->nlb); @@ -2406,7 +2388,6 @@ static void nvme_dsm_md_cb(void *opaque, int ret) ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_ZERO); if (ret) { if (ret < 0) { - iocb->ret = ret; goto done; } @@ -2420,8 +2401,7 @@ static void nvme_dsm_md_cb(void *opaque, int ret) return; done: - iocb->aiocb = NULL; - qemu_bh_schedule(iocb->bh); + nvme_dsm_cb(iocb, ret); } static void nvme_dsm_cb(void *opaque, int ret) @@ -2434,7 +2414,9 @@ static void nvme_dsm_cb(void *opaque, int ret) uint64_t slba; uint32_t nlb; - if (ret < 0) { + if (iocb->ret < 0) { + goto done; + } else if (ret < 0) { iocb->ret = ret; goto done; } @@ -2468,7 +2450,8 @@ next: done: iocb->aiocb = NULL; - qemu_bh_schedule(iocb->bh); + iocb->common.cb(iocb->common.opaque, iocb->ret); + qemu_aio_unref(iocb); } static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) @@ -2486,7 +2469,6 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) nvme_misc_cb, req); iocb->req = req; - iocb->bh = qemu_bh_new(nvme_dsm_bh, iocb); iocb->ret = 0; iocb->range = g_new(NvmeDsmRange, nr); iocb->nr = nr; -- cgit v1.2.3-55-g7522 From 83f56ac321ca2301f00e63b6acbde5c692916a1d Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 14 Jul 2022 09:37:20 +0200 Subject: hw/nvme: remove copy bh scheduling Fix a potential use-after-free by removing the bottom half and enqueuing the completion directly. Fixes: 796d20681d9b ("hw/nvme: reimplement the copy command to allow aio cancellation") Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 63 +++++++++++++--------------------------------------------- 1 file changed, 14 insertions(+), 49 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index e847b89461..e54276dc1d 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -2552,7 +2552,6 @@ typedef struct NvmeCopyAIOCB { BlockAIOCB common; BlockAIOCB *aiocb; NvmeRequest *req; - QEMUBH *bh; int ret; void *ranges; @@ -2590,9 +2589,8 @@ static const AIOCBInfo nvme_copy_aiocb_info = { .cancel_async = nvme_copy_cancel, }; -static void nvme_copy_bh(void *opaque) +static void nvme_copy_done(NvmeCopyAIOCB *iocb) { - NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; BlockAcctStats *stats = blk_get_stats(ns->blkconf.blk); @@ -2604,9 +2602,6 @@ static void nvme_copy_bh(void *opaque) qemu_iovec_destroy(&iocb->iov); g_free(iocb->bounce); - qemu_bh_delete(iocb->bh); - iocb->bh = NULL; - if (iocb->ret < 0) { block_acct_failed(stats, &iocb->acct.read); block_acct_failed(stats, &iocb->acct.write); @@ -2619,7 +2614,7 @@ static void nvme_copy_bh(void *opaque) qemu_aio_unref(iocb); } -static void nvme_copy_cb(void *opaque, int ret); +static void nvme_do_copy(NvmeCopyAIOCB *iocb); static void nvme_copy_source_range_parse_format0(void *ranges, int idx, uint64_t *slba, uint32_t *nlb, @@ -2731,7 +2726,7 @@ static void nvme_copy_out_completed_cb(void *opaque, int ret) iocb->idx++; iocb->slba += nlb; out: - nvme_copy_cb(iocb, iocb->ret); + nvme_do_copy(iocb); } static void nvme_copy_out_cb(void *opaque, int ret) @@ -2743,18 +2738,10 @@ static void nvme_copy_out_cb(void *opaque, int ret) size_t mlen; uint8_t *mbounce; - if (ret < 0) { - iocb->ret = ret; - goto out; - } else if (iocb->ret < 0) { + if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) { goto out; } - if (!ns->lbaf.ms) { - nvme_copy_out_completed_cb(iocb, 0); - return; - } - nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL, &nlb, NULL, NULL, NULL); @@ -2771,7 +2758,7 @@ static void nvme_copy_out_cb(void *opaque, int ret) return; out: - nvme_copy_cb(iocb, ret); + nvme_copy_out_completed_cb(iocb, ret); } static void nvme_copy_in_completed_cb(void *opaque, int ret) @@ -2865,15 +2852,9 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) invalid: req->status = status; - iocb->aiocb = NULL; - if (iocb->bh) { - qemu_bh_schedule(iocb->bh); - } - - return; - + iocb->ret = -1; out: - nvme_copy_cb(iocb, ret); + nvme_do_copy(iocb); } static void nvme_copy_in_cb(void *opaque, int ret) @@ -2884,18 +2865,10 @@ static void nvme_copy_in_cb(void *opaque, int ret) uint64_t slba; uint32_t nlb; - if (ret < 0) { - iocb->ret = ret; - goto out; - } else if (iocb->ret < 0) { + if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) { goto out; } - if (!ns->lbaf.ms) { - nvme_copy_in_completed_cb(iocb, 0); - return; - } - nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, &nlb, NULL, NULL, NULL); @@ -2909,12 +2882,11 @@ static void nvme_copy_in_cb(void *opaque, int ret) return; out: - nvme_copy_cb(iocb, iocb->ret); + nvme_copy_in_completed_cb(iocb, ret); } -static void nvme_copy_cb(void *opaque, int ret) +static void nvme_do_copy(NvmeCopyAIOCB *iocb) { - NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; uint64_t slba; @@ -2922,10 +2894,7 @@ static void nvme_copy_cb(void *opaque, int ret) size_t len; uint16_t status; - if (ret < 0) { - iocb->ret = ret; - goto done; - } else if (iocb->ret < 0) { + if (iocb->ret < 0) { goto done; } @@ -2972,14 +2941,11 @@ static void nvme_copy_cb(void *opaque, int ret) invalid: req->status = status; + iocb->ret = -1; done: - iocb->aiocb = NULL; - if (iocb->bh) { - qemu_bh_schedule(iocb->bh); - } + nvme_copy_done(iocb); } - static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns = req->ns; @@ -3049,7 +3015,6 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) } iocb->req = req; - iocb->bh = qemu_bh_new(nvme_copy_bh, iocb); iocb->ret = 0; iocb->nr = nr; iocb->idx = 0; @@ -3066,7 +3031,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) BLOCK_ACCT_WRITE); req->aiocb = &iocb->common; - nvme_copy_cb(iocb, 0); + nvme_do_copy(iocb); return NVME_NO_COMPLETE; -- cgit v1.2.3-55-g7522