From 66b7e9bed0aee4342aa7cb824b8c46a42cacf7e2 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 24 Jan 2021 11:54:47 +0900 Subject: hw/block/nvme: add CMIC enum value for Identify Controller Added Controller Multi-path I/O and Namespace Sharing Capabilities (CMIC) field to support multi-controller in the following patches. This field is in Identify Controller data structure in [76]. Signed-off-by: Minwoo Im Tested-by: Klaus Jensen Reviewed-by: Klaus Jensen Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen --- include/block/nvme.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/block/nvme.h b/include/block/nvme.h index 07cfc92936..f1d3a78658 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -1034,6 +1034,10 @@ enum NvmeIdCtrlLpa { NVME_LPA_EXTENDED = 1 << 2, }; +enum NvmeIdCtrlCmic { + NVME_CMIC_MULTI_CTRL = 1 << 1, +}; + #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf) #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf) #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf) -- cgit v1.2.3-55-g7522 From adc36b8d21204c00643016d8766a5214e3d54b5b Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 24 Jan 2021 11:54:49 +0900 Subject: hw/block/nvme: add NMIC enum value for Identify Namespace Added Namespace Multi-path I/O and Namespace Sharing Capabilities (NMIC) field to support shared namespace from controller(s). This field is in Identify Namespace data structure in [30]. Signed-off-by: Minwoo Im Tested-by: Klaus Jensen Reviewed-by: Klaus Jensen Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen --- include/block/nvme.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/block/nvme.h b/include/block/nvme.h index f1d3a78658..3db2b9b4cb 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -1203,6 +1203,10 @@ enum NvmeNsIdentifierType { NVME_NIDT_CSI = 0x04, }; +enum NvmeIdNsNmic { + NVME_NMIC_NS_SHARED = 1 << 0, +}; + enum NvmeCsi { NVME_CSI_NVM = 0x00, NVME_CSI_ZONED = 0x02, -- cgit v1.2.3-55-g7522 From 3862efff316c1d02b41d1362f97dfba812050e53 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Mon, 16 Nov 2020 13:40:02 +0100 Subject: nvme: updated shared header for copy command Add new data structures and types for the Simple Copy command. Signed-off-by: Klaus Jensen Reviewed-by: Minwoo Im Acked-by: Stefan Hajnoczi Reviewed-by: Keith Busch --- include/block/nvme.h | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/block/nvme.h b/include/block/nvme.h index 3db2b9b4cb..9f8eb3988c 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -579,6 +579,7 @@ enum NvmeIoCommands { NVME_CMD_COMPARE = 0x05, NVME_CMD_WRITE_ZEROES = 0x08, NVME_CMD_DSM = 0x09, + NVME_CMD_COPY = 0x19, NVME_CMD_ZONE_MGMT_SEND = 0x79, NVME_CMD_ZONE_MGMT_RECV = 0x7a, NVME_CMD_ZONE_APPEND = 0x7d, @@ -724,6 +725,37 @@ typedef struct QEMU_PACKED NvmeDsmRange { uint64_t slba; } NvmeDsmRange; +enum { + NVME_COPY_FORMAT_0 = 0x0, +}; + +typedef struct QEMU_PACKED NvmeCopyCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint32_t rsvd2[4]; + NvmeCmdDptr dptr; + uint64_t sdlba; + uint8_t nr; + uint8_t control[3]; + uint16_t rsvd13; + uint16_t dspec; + uint32_t reftag; + uint16_t apptag; + uint16_t appmask; +} NvmeCopyCmd; + +typedef struct QEMU_PACKED NvmeCopySourceRange { + uint8_t rsvd0[8]; + uint64_t slba; + uint16_t nlb; + uint8_t rsvd18[6]; + uint32_t reftag; + uint16_t apptag; + uint16_t appmask; +} NvmeCopySourceRange; + enum NvmeAsyncEventRequest { NVME_AER_TYPE_ERROR = 0, NVME_AER_TYPE_SMART = 1, @@ -807,6 +839,7 @@ enum NvmeStatusCodes { NVME_CONFLICTING_ATTRS = 0x0180, NVME_INVALID_PROT_INFO = 0x0181, NVME_WRITE_TO_RO = 0x0182, + NVME_CMD_SIZE_LIMIT = 0x0183, NVME_ZONE_BOUNDARY_ERROR = 0x01b8, NVME_ZONE_FULL = 0x01b9, NVME_ZONE_READ_ONLY = 0x01ba, @@ -994,7 +1027,7 @@ typedef struct QEMU_PACKED NvmeIdCtrl { uint8_t nvscc; uint8_t rsvd531; uint16_t acwu; - uint8_t rsvd534[2]; + uint16_t ocfs; uint32_t sgls; uint8_t rsvd540[228]; uint8_t subnqn[256]; @@ -1022,6 +1055,11 @@ enum NvmeIdCtrlOncs { NVME_ONCS_FEATURES = 1 << 4, NVME_ONCS_RESRVATIONS = 1 << 5, NVME_ONCS_TIMESTAMP = 1 << 6, + NVME_ONCS_COPY = 1 << 8, +}; + +enum NvmeIdCtrlOcfs { + NVME_OCFS_COPY_FORMAT_0 = 1 << 0, }; enum NvmeIdCtrlFrmw { @@ -1175,7 +1213,10 @@ typedef struct QEMU_PACKED NvmeIdNs { uint16_t npdg; uint16_t npda; uint16_t nows; - uint8_t rsvd74[30]; + uint16_t mssrl; + uint32_t mcl; + uint8_t msrc; + uint8_t rsvd81[23]; uint8_t nguid[16]; uint64_t eui64; NvmeLBAF lbaf[16]; @@ -1331,6 +1372,7 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeZonedResult) != 8); QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16); QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16); + QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRange) != 32); QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeDeleteQ) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCreateCq) != 64); @@ -1338,6 +1380,7 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeIdentify) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeRwCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCopyCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512); -- cgit v1.2.3-55-g7522 From c94973288cd9cfdb0dc23ae84ba256a7345c372e Mon Sep 17 00:00:00 2001 From: Gollu Appalanaidu Date: Mon, 25 Jan 2021 15:09:24 +0530 Subject: hw/block/nvme: add broadcast nsid support flush command Add support for using the broadcast nsid to issue a flush on all namespaces through a single command. Signed-off-by: Gollu Appalanaidu Reviewed-by: Klaus Jensen Acked-by: Stefan Hajnoczi Acked-by: Keith Busch Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++--- hw/block/trace-events | 2 + include/block/nvme.h | 8 ++++ 3 files changed, 127 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/hw/block/nvme.c b/hw/block/nvme.c index a54ef34ce5..db1a3aabd8 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1466,6 +1466,41 @@ static void nvme_rw_cb(void *opaque, int ret) nvme_enqueue_req_completion(nvme_cq(req), req); } +struct nvme_aio_flush_ctx { + NvmeRequest *req; + NvmeNamespace *ns; + BlockAcctCookie acct; +}; + +static void nvme_aio_flush_cb(void *opaque, int ret) +{ + struct nvme_aio_flush_ctx *ctx = opaque; + NvmeRequest *req = ctx->req; + uintptr_t *num_flushes = (uintptr_t *)&req->opaque; + + BlockBackend *blk = ctx->ns->blkconf.blk; + BlockAcctCookie *acct = &ctx->acct; + BlockAcctStats *stats = blk_get_stats(blk); + + trace_pci_nvme_aio_flush_cb(nvme_cid(req), blk_name(blk)); + + if (!ret) { + block_acct_done(stats, acct); + } else { + block_acct_failed(stats, acct); + nvme_aio_err(req, ret); + } + + (*num_flushes)--; + g_free(ctx); + + if (*num_flushes) { + return; + } + + nvme_enqueue_req_completion(nvme_cq(req), req); +} + static void nvme_aio_discard_cb(void *opaque, int ret) { NvmeRequest *req = opaque; @@ -1949,10 +1984,56 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) { - block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0, - BLOCK_ACCT_FLUSH); - req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_rw_cb, req); - return NVME_NO_COMPLETE; + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uintptr_t *num_flushes = (uintptr_t *)&req->opaque; + uint16_t status; + struct nvme_aio_flush_ctx *ctx; + NvmeNamespace *ns; + + trace_pci_nvme_flush(nvme_cid(req), nsid); + + if (nsid != NVME_NSID_BROADCAST) { + req->ns = nvme_ns(n, nsid); + if (unlikely(!req->ns)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0, + BLOCK_ACCT_FLUSH); + req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_rw_cb, req); + return NVME_NO_COMPLETE; + } + + /* 1-initialize; see comment in nvme_dsm */ + *num_flushes = 1; + + for (int i = 1; i <= n->num_namespaces; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + ctx = g_new(struct nvme_aio_flush_ctx, 1); + ctx->req = req; + ctx->ns = ns; + + (*num_flushes)++; + + block_acct_start(blk_get_stats(ns->blkconf.blk), &ctx->acct, 0, + BLOCK_ACCT_FLUSH); + blk_aio_flush(ns->blkconf.blk, nvme_aio_flush_cb, ctx); + } + + /* account for the 1-initialization */ + (*num_flushes)--; + + if (*num_flushes) { + status = NVME_NO_COMPLETE; + } else { + status = req->status; + } + + return status; } static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) @@ -2608,6 +2689,29 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return NVME_INVALID_NSID | NVME_DNR; } + /* + * In the base NVM command set, Flush may apply to all namespaces + * (indicated by NSID being set to 0xFFFFFFFF). But if that feature is used + * along with TP 4056 (Namespace Types), it may be pretty screwed up. + * + * If NSID is indeed set to 0xFFFFFFFF, we simply cannot associate the + * opcode with a specific command since we cannot determine a unique I/O + * command set. Opcode 0x0 could have any other meaning than something + * equivalent to flushing and say it DOES have completely different + * semantics in some other command set - does an NSID of 0xFFFFFFFF then + * mean "for all namespaces, apply whatever command set specific command + * that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply + * whatever command that uses the 0x0 opcode if, and only if, it allows + * NSID to be 0xFFFFFFFF"? + * + * Anyway (and luckily), for now, we do not care about this since the + * device only supports namespace types that includes the NVM Flush command + * (NVM and Zoned), so always do an NVM Flush. + */ + if (req->cmd.opcode == NVME_CMD_FLUSH) { + return nvme_flush(n, req); + } + req->ns = nvme_ns(n, nsid); if (unlikely(!req->ns)) { return NVME_INVALID_FIELD | NVME_DNR; @@ -2619,8 +2723,6 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) } switch (req->cmd.opcode) { - case NVME_CMD_FLUSH: - return nvme_flush(n, req); case NVME_CMD_WRITE_ZEROES: return nvme_write_zeroes(n, req); case NVME_CMD_ZONE_APPEND: @@ -4750,7 +4852,15 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) NVME_ONCS_FEATURES | NVME_ONCS_DSM | NVME_ONCS_COMPARE | NVME_ONCS_COPY); - id->vwc = (0x2 << 1) | 0x1; + /* + * NOTE: If this device ever supports a command set that does NOT use 0x0 + * as a Flush-equivalent operation, support for the broadcast NSID in Flush + * should probably be removed. + * + * See comment in nvme_io_cmd. + */ + id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT; + id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0); id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN | NVME_CTRL_SGLS_BITBUCKET); diff --git a/hw/block/trace-events b/hw/block/trace-events index 4b5ee04024..b04f7a3e18 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -40,6 +40,7 @@ pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, pci_nvme_map_sgl(uint16_t cid, uint8_t typ, uint64_t len) "cid %"PRIu16" type 0x%"PRIx8" len %"PRIu64"" pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" +pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" @@ -55,6 +56,7 @@ pci_nvme_compare_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64"" +pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 9f8eb3988c..b23f3ae227 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -1062,6 +1062,14 @@ enum NvmeIdCtrlOcfs { NVME_OCFS_COPY_FORMAT_0 = 1 << 0, }; +enum NvmeIdctrlVwc { + NVME_VWC_PRESENT = 1 << 0, + NVME_VWC_NSID_BROADCAST_NO_SUPPORT = 0 << 1, + NVME_VWC_NSID_BROADCAST_RESERVED = 1 << 1, + NVME_VWC_NSID_BROADCAST_CTRL_SPEC = 2 << 1, + NVME_VWC_NSID_BROADCAST_SUPPORT = 3 << 1, +}; + enum NvmeIdCtrlFrmw { NVME_FRMW_SLOT1_RO = 1 << 0, }; -- cgit v1.2.3-55-g7522 From 67ce28a1fdcf73e2c026dbc43bb8fb6dc9a56aed Mon Sep 17 00:00:00 2001 From: Gollu Appalanaidu Date: Sun, 21 Feb 2021 19:39:36 +0100 Subject: hw/block/nvme: report non-mdts command size limit for dsm Dataset Management is not subject to MDTS, but exceeded a certain size per range causes internal looping. Report this limit (DMRSL) in the NVM command set specific identify controller data structure. Signed-off-by: Gollu Appalanaidu Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/block/nvme.c | 27 +++++++++++++++++++-------- hw/block/nvme.h | 2 ++ hw/block/trace-events | 1 + include/block/nvme.h | 11 +++++++++++ 4 files changed, 33 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/hw/block/nvme.c b/hw/block/nvme.c index b81c4c3705..30aef5b09e 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1789,6 +1789,10 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_dsm_deallocate(nvme_cid(req), nvme_nsid(ns), slba, nlb); + if (nlb > n->dmrsl) { + trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl); + } + offset = nvme_l2b(ns, slba); len = nvme_l2b(ns, nlb); @@ -3208,20 +3212,24 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req) static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req) { NvmeIdentify *c = (NvmeIdentify *)&req->cmd; - NvmeIdCtrlZoned id = {}; + uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {}; trace_pci_nvme_identify_ctrl_csi(c->csi); - if (c->csi == NVME_CSI_NVM) { - return nvme_rpt_empty_id_struct(n, req); - } else if (c->csi == NVME_CSI_ZONED) { - id.zasl = n->params.zasl; + switch (c->csi) { + case NVME_CSI_NVM: + ((NvmeIdCtrlNvm *)&id)->dmrsl = cpu_to_le32(n->dmrsl); + break; - return nvme_dma(n, (uint8_t *)&id, sizeof(id), - DMA_DIRECTION_FROM_DEVICE, req); + case NVME_CSI_ZONED: + ((NvmeIdCtrlZoned *)&id)->zasl = n->params.zasl; + break; + + default: + return NVME_INVALID_FIELD | NVME_DNR; } - return NVME_INVALID_FIELD | NVME_DNR; + return nvme_dma(n, id, sizeof(id), DMA_DIRECTION_FROM_DEVICE, req); } static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req) @@ -4655,6 +4663,9 @@ int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) n->namespaces[nsid - 1] = ns; + n->dmrsl = MIN_NON_ZERO(n->dmrsl, + BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); + return 0; } diff --git a/hw/block/nvme.h b/hw/block/nvme.h index f45ace0cff..294fac1def 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -171,6 +171,8 @@ typedef struct NvmeCtrl { QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; int aer_queued; + uint32_t dmrsl; + NvmeSubsystem *subsys; NvmeNamespace namespace; diff --git a/hw/block/trace-events b/hw/block/trace-events index c165ee2a97..8deeacc8c3 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -51,6 +51,7 @@ pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d" pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32"" pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32"" +pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32"" pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_compare_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" diff --git a/include/block/nvme.h b/include/block/nvme.h index b23f3ae227..16d8c4c90f 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -1041,6 +1041,16 @@ typedef struct NvmeIdCtrlZoned { uint8_t rsvd1[4095]; } NvmeIdCtrlZoned; +typedef struct NvmeIdCtrlNvm { + uint8_t vsl; + uint8_t wzsl; + uint8_t wusl; + uint8_t dmrl; + uint32_t dmrsl; + uint64_t dmsl; + uint8_t rsvd16[4080]; +} NvmeIdCtrlNvm; + enum NvmeIdCtrlOacs { NVME_OACS_SECURITY = 1 << 0, NVME_OACS_FORMAT = 1 << 1, @@ -1396,6 +1406,7 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlZoned) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlNvm) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4); QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16); QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096); -- cgit v1.2.3-55-g7522 From 645ce1a70cb6bedc85a11edb547db091375dea55 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 6 Feb 2021 12:18:09 +0900 Subject: hw/block/nvme: support namespace attachment command This patch supports Namespace Attachment command for the pre-defined nvme-ns device nodes. Of course, attach/detach namespace should only be supported in case 'subsys' is given. This is because if we detach a namespace from a controller, somebody needs to manage the detached, but allocated namespace in the NVMe subsystem. As command effect for the namespace attachment command is registered, the host will be notified that namespace inventory is changed so that host will rescan the namespace inventory after this command. For example, kernel driver manages this command effect via passthru IOCTL. Signed-off-by: Minwoo Im Reviewed-by: Keith Busch Reviewed-by: Klaus Jensen Tested-by: Klaus Jensen [k.jensen: rebased for dma refactor] Signed-off-by: Klaus Jensen --- hw/block/nvme-subsys.h | 10 +++++++++ hw/block/nvme.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++- hw/block/nvme.h | 5 +++++ hw/block/trace-events | 2 ++ include/block/nvme.h | 6 +++++ 5 files changed, 82 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h index 83a6427b6e..fb66ae752a 100644 --- a/hw/block/nvme-subsys.h +++ b/hw/block/nvme-subsys.h @@ -34,6 +34,16 @@ typedef struct NvmeSubsystem { int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp); +static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, + uint32_t cntlid) +{ + if (!subsys) { + return NULL; + } + + return subsys->ctrls[cntlid]; +} + /* * Return allocated namespace of the specified nsid in the subsystem. */ diff --git a/hw/block/nvme.c b/hw/block/nvme.c index b4843d3bcf..86fbab1fc4 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -196,6 +196,7 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, }; static const uint32_t nvme_cse_iocs_none[256]; @@ -3905,6 +3906,61 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } +static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns); +static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns; + NvmeCtrl *ctrl; + uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {}; + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + bool attach = !(dw10 & 0xf); + uint16_t *nr_ids = &list[0]; + uint16_t *ids = &list[1]; + uint16_t ret; + int i; + + trace_pci_nvme_ns_attachment(nvme_cid(req), dw10 & 0xf); + + ns = nvme_subsys_ns(n->subsys, nsid); + if (!ns) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ret = nvme_h2c(n, (uint8_t *)list, 4096, req); + if (ret) { + return ret; + } + + if (!*nr_ids) { + return NVME_NS_CTRL_LIST_INVALID | NVME_DNR; + } + + for (i = 0; i < *nr_ids; i++) { + ctrl = nvme_subsys_ctrl(n->subsys, ids[i]); + if (!ctrl) { + return NVME_NS_CTRL_LIST_INVALID | NVME_DNR; + } + + if (attach) { + if (nvme_ns_is_attached(ctrl, ns)) { + return NVME_NS_ALREADY_ATTACHED | NVME_DNR; + } + + nvme_ns_attach(ctrl, ns); + __nvme_select_ns_iocs(ctrl, ns); + } else { + if (!nvme_ns_is_attached(ctrl, ns)) { + return NVME_NS_NOT_ATTACHED | NVME_DNR; + } + + nvme_ns_detach(ctrl, ns); + } + } + + return NVME_SUCCESS; +} + static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, @@ -3941,6 +3997,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_get_feature(n, req); case NVME_ADM_CMD_ASYNC_EV_REQ: return nvme_aer(n, req); + case NVME_ADM_CMD_NS_ATTACHMENT: + return nvme_ns_attachment(n, req); default: assert(false); } @@ -4910,7 +4968,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->mdts = n->params.mdts; id->ver = cpu_to_le32(NVME_SPEC_VER); - id->oacs = cpu_to_le16(0); + id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT); id->cntrltype = 0x1; /* diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 85a7b5a14f..1287bc2cd1 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -235,6 +235,11 @@ static inline void nvme_ns_attach(NvmeCtrl *n, NvmeNamespace *ns) n->namespaces[nvme_nsid(ns) - 1] = ns; } +static inline void nvme_ns_detach(NvmeCtrl *n, NvmeNamespace *ns) +{ + n->namespaces[nvme_nsid(ns) - 1] = NULL; +} + static inline NvmeCQueue *nvme_cq(NvmeRequest *req) { NvmeSQueue *sq = req->sq; diff --git a/hw/block/trace-events b/hw/block/trace-events index 60a076cea5..c5dba935a0 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -84,6 +84,8 @@ pci_nvme_aer(uint16_t cid) "cid %"PRIu16"" pci_nvme_aer_aerl_exceeded(void) "aerl exceeded" pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8"" +pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32"" pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" pci_nvme_enqueue_event_noqueue(int queued) "queued %d" pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 16d8c4c90f..03471a4d5a 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -566,6 +566,7 @@ enum NvmeAdminCommands { NVME_ADM_CMD_ASYNC_EV_REQ = 0x0c, NVME_ADM_CMD_ACTIVATE_FW = 0x10, NVME_ADM_CMD_DOWNLOAD_FW = 0x11, + NVME_ADM_CMD_NS_ATTACHMENT = 0x15, NVME_ADM_CMD_FORMAT_NVM = 0x80, NVME_ADM_CMD_SECURITY_SEND = 0x81, NVME_ADM_CMD_SECURITY_RECV = 0x82, @@ -836,6 +837,9 @@ enum NvmeStatusCodes { NVME_FEAT_NOT_CHANGEABLE = 0x010e, NVME_FEAT_NOT_NS_SPEC = 0x010f, NVME_FW_REQ_SUSYSTEM_RESET = 0x0110, + NVME_NS_ALREADY_ATTACHED = 0x0118, + NVME_NS_NOT_ATTACHED = 0x011A, + NVME_NS_CTRL_LIST_INVALID = 0x011C, NVME_CONFLICTING_ATTRS = 0x0180, NVME_INVALID_PROT_INFO = 0x0181, NVME_WRITE_TO_RO = 0x0182, @@ -951,6 +955,7 @@ typedef struct QEMU_PACKED NvmePSD { uint8_t resv[16]; } NvmePSD; +#define NVME_CONTROLLER_LIST_SIZE 2048 #define NVME_IDENTIFY_DATA_SIZE 4096 enum NvmeIdCns { @@ -1055,6 +1060,7 @@ enum NvmeIdCtrlOacs { NVME_OACS_SECURITY = 1 << 0, NVME_OACS_FORMAT = 1 << 1, NVME_OACS_FW = 1 << 2, + NVME_OACS_NS_MGMT = 1 << 3, }; enum NvmeIdCtrlOncs { -- cgit v1.2.3-55-g7522 From f432fdfa1215bc3a00468b2e711176be279b0fd2 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 28 Feb 2021 17:51:02 +0900 Subject: hw/block/nvme: support changed namespace asynchronous event If namespace inventory is changed due to some reasons (e.g., namespace attachment/detachment), controller can send out event notifier to the host to manage namespaces. This patch sends out the AEN to the host after either attach or detach namespaces from controllers. To support clear of the event from the controller, this patch also implemented Get Log Page command for Changed Namespace List log type. To return namespace id list through the command, when namespace inventory is updated, id is added to the per-controller list (changed_ns_list). To indicate the support of this async event, this patch set OAES(Optional Asynchronous Events Supported) in Identify Controller data structure. Signed-off-by: Minwoo Im Reviewed-by: Keith Busch Reviewed-by: Klaus Jensen Tested-by: Klaus Jensen Signed-off-by: Klaus Jensen --- hw/block/nvme-ns.h | 1 + hw/block/nvme.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ hw/block/nvme.h | 4 ++++ include/block/nvme.h | 7 +++++++ 4 files changed, 68 insertions(+) (limited to 'include') diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h index b0c00e115d..318d3aebe1 100644 --- a/hw/block/nvme-ns.h +++ b/hw/block/nvme-ns.h @@ -53,6 +53,7 @@ typedef struct NvmeNamespace { uint8_t csi; NvmeSubsystem *subsys; + QTAILQ_ENTRY(NvmeNamespace) entry; NvmeIdNsZoned *id_ns_zoned; NvmeZone *zone_array; diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 86fbab1fc4..cf0bb508aa 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -3015,6 +3015,48 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, return nvme_c2h(n, (uint8_t *)&errlog, trans_len, req); } +static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t nslist[1024]; + uint32_t trans_len; + int i = 0; + uint32_t nsid; + + memset(nslist, 0x0, sizeof(nslist)); + trans_len = MIN(sizeof(nslist) - off, buf_len); + + while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) != + NVME_CHANGED_NSID_SIZE) { + /* + * If more than 1024 namespaces, the first entry in the log page should + * be set to 0xffffffff and the others to 0 as spec. + */ + if (i == ARRAY_SIZE(nslist)) { + memset(nslist, 0x0, sizeof(nslist)); + nslist[0] = 0xffffffff; + break; + } + + nslist[i++] = nsid; + clear_bit(nsid, n->changed_nsids); + } + + /* + * Remove all the remaining list entries in case returns directly due to + * more than 1024 namespaces. + */ + if (nslist[0] == 0xffffffff) { + bitmap_zero(n->changed_nsids, NVME_CHANGED_NSID_SIZE); + } + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_NOTICE); + } + + return nvme_c2h(n, ((uint8_t *)nslist) + off, trans_len, req); +} + static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, uint64_t off, NvmeRequest *req) { @@ -3098,6 +3140,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) return nvme_smart_info(n, rae, len, off, req); case NVME_LOG_FW_SLOT_INFO: return nvme_fw_log_info(n, len, off, req); + case NVME_LOG_CHANGED_NSLIST: + return nvme_changed_nslist(n, rae, len, off, req); case NVME_LOG_CMD_EFFECTS: return nvme_cmd_effects(n, csi, len, off, req); default: @@ -3956,6 +4000,16 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) nvme_ns_detach(ctrl, ns); } + + /* + * Add namespace id to the changed namespace id list for event clearing + * via Get Log Page command. + */ + if (!test_and_set_bit(nsid, ctrl->changed_nsids)) { + nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE, + NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED, + NVME_LOG_CHANGED_NSLIST); + } } return NVME_SUCCESS; @@ -4954,6 +5008,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->cntlid = cpu_to_le16(n->cntlid); + id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR); + id->rab = 6; if (n->params.use_intel_id) { diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 1287bc2cd1..4955d649c7 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -192,6 +192,10 @@ typedef struct NvmeCtrl { uint32_t dmrsl; + /* Namespace ID is started with 1 so bitmap should be 1-based */ +#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) + DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); + NvmeSubsystem *subsys; NvmeNamespace namespace; diff --git a/include/block/nvme.h b/include/block/nvme.h index 03471a4d5a..7ee887022a 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -760,6 +760,7 @@ typedef struct QEMU_PACKED NvmeCopySourceRange { enum NvmeAsyncEventRequest { NVME_AER_TYPE_ERROR = 0, NVME_AER_TYPE_SMART = 1, + NVME_AER_TYPE_NOTICE = 2, NVME_AER_TYPE_IO_SPECIFIC = 6, NVME_AER_TYPE_VENDOR_SPECIFIC = 7, NVME_AER_INFO_ERR_INVALID_DB_REGISTER = 0, @@ -771,6 +772,7 @@ enum NvmeAsyncEventRequest { NVME_AER_INFO_SMART_RELIABILITY = 0, NVME_AER_INFO_SMART_TEMP_THRESH = 1, NVME_AER_INFO_SMART_SPARE_THRESH = 2, + NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED = 0, }; typedef struct QEMU_PACKED NvmeAerResult { @@ -940,6 +942,7 @@ enum NvmeLogIdentifier { NVME_LOG_ERROR_INFO = 0x01, NVME_LOG_SMART_INFO = 0x02, NVME_LOG_FW_SLOT_INFO = 0x03, + NVME_LOG_CHANGED_NSLIST = 0x04, NVME_LOG_CMD_EFFECTS = 0x05, }; @@ -1056,6 +1059,10 @@ typedef struct NvmeIdCtrlNvm { uint8_t rsvd16[4080]; } NvmeIdCtrlNvm; +enum NvmeIdCtrlOaes { + NVME_OAES_NS_ATTR = 1 << 8, +}; + enum NvmeIdCtrlOacs { NVME_OACS_SECURITY = 1 << 0, NVME_OACS_FORMAT = 1 << 1, -- cgit v1.2.3-55-g7522 From 23fb7dfeca17c55e4329ca98459d33fc204c1f59 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 11 Feb 2021 00:10:25 +0900 Subject: hw/block/nvme: support Identify NS Attached Controller List Support Identify command for Namespace attached controller list. This command handler will traverse the controller instances in the given subsystem to figure out whether the specified nsid is attached to the controllers or not. The 4096bytes Identify data will return with the first entry (16bits) indicating the number of the controller id entries. So, the data can hold up to 2047 entries for the controller ids. Signed-off-by: Minwoo Im Reviewed-by: Keith Busch Reviewed-by: Klaus Jensen Tested-by: Klaus Jensen [k.jensen: rebased for dma refactor] Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 41 +++++++++++++++++++++++++++++++++++++++++ hw/block/trace-events | 1 + include/block/nvme.h | 1 + 3 files changed, 43 insertions(+) (limited to 'include') diff --git a/hw/block/nvme.c b/hw/block/nvme.c index cf0bb508aa..d439e44db8 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -3338,6 +3338,45 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) return NVME_INVALID_CMD_SET | NVME_DNR; } +static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + uint16_t min_id = le16_to_cpu(c->ctrlid); + uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {}; + uint16_t *ids = &list[1]; + NvmeNamespace *ns; + NvmeCtrl *ctrl; + int cntlid, nr_ids = 0; + + trace_pci_nvme_identify_ns_attached_list(min_id); + + if (c->nsid == NVME_NSID_BROADCAST) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ns = nvme_subsys_ns(n->subsys, c->nsid); + if (!ns) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) { + ctrl = nvme_subsys_ctrl(n->subsys, cntlid); + if (!ctrl) { + continue; + } + + if (!nvme_ns_is_attached(ctrl, ns)) { + continue; + } + + ids[nr_ids++] = cntlid; + } + + list[0] = nr_ids; + + return nvme_c2h(n, (uint8_t *)list, sizeof(list), req); +} + static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, bool active) { @@ -3540,6 +3579,8 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) return nvme_identify_ns(n, req, true); case NVME_ID_CNS_NS_PRESENT: return nvme_identify_ns(n, req, false); + case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST: + return nvme_identify_ns_attached_list(n, req); case NVME_ID_CNS_CS_NS: return nvme_identify_ns_csi(n, req, true); case NVME_ID_CNS_CS_NS_PRESENT: diff --git a/hw/block/trace-events b/hw/block/trace-events index c5dba935a0..ef06d2ea74 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -66,6 +66,7 @@ pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid pci_nvme_identify_ctrl(void) "identify controller" pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" +pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16"" pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 7ee887022a..372d0f2799 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -971,6 +971,7 @@ enum NvmeIdCns { NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07, NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT = 0x11, + NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12, NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a, NVME_ID_CNS_CS_NS_PRESENT = 0x1b, NVME_ID_CNS_IO_COMMAND_SET = 0x1c, -- cgit v1.2.3-55-g7522