From 146f720c55637410062041f68dc908645cd18aaa Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 4 Feb 2021 09:55:48 +0100 Subject: hw/block/nvme: end-to-end data protection Add support for namespaces formatted with protection information. The type of end-to-end data protection (i.e. Type 1, Type 2 or Type 3) is selected with the `pi` nvme-ns device parameter. If the number of metadata bytes is larger than 8, the `pil` nvme-ns device parameter may be used to control the location of the 8-byte DIF tuple. The default `pil` value of '0', causes the DIF tuple to be transferred as the last 8 bytes of the metadata. Set to 1 to store this in the first eight bytes instead. Co-authored-by: Gollu Appalanaidu Signed-off-by: Gollu Appalanaidu Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- include/block/nvme.h | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/block/nvme.h b/include/block/nvme.h index 372d0f2799..288038dc95 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -696,12 +696,17 @@ enum { NVME_RW_DSM_LATENCY_LOW = 3 << 4, NVME_RW_DSM_SEQ_REQ = 1 << 6, NVME_RW_DSM_COMPRESSED = 1 << 7, + NVME_RW_PIREMAP = 1 << 9, NVME_RW_PRINFO_PRACT = 1 << 13, NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, NVME_RW_PRINFO_PRCHK_APP = 1 << 11, NVME_RW_PRINFO_PRCHK_REF = 1 << 10, + NVME_RW_PRINFO_PRCHK_MASK = 7 << 10, + }; +#define NVME_RW_PRINFO(control) ((control >> 10) & 0xf) + typedef struct QEMU_PACKED NvmeDsmCmd { uint8_t opcode; uint8_t flags; @@ -1324,14 +1329,22 @@ typedef struct QEMU_PACKED NvmeIdNsZoned { #define NVME_ID_NS_DPC_TYPE_MASK 0x7 enum NvmeIdNsDps { - DPS_TYPE_NONE = 0, - DPS_TYPE_1 = 1, - DPS_TYPE_2 = 2, - DPS_TYPE_3 = 3, - DPS_TYPE_MASK = 0x7, - DPS_FIRST_EIGHT = 8, + NVME_ID_NS_DPS_TYPE_NONE = 0, + NVME_ID_NS_DPS_TYPE_1 = 1, + NVME_ID_NS_DPS_TYPE_2 = 2, + NVME_ID_NS_DPS_TYPE_3 = 3, + NVME_ID_NS_DPS_TYPE_MASK = 0x7, + NVME_ID_NS_DPS_FIRST_EIGHT = 8, }; +#define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK) + +typedef struct NvmeDifTuple { + uint16_t guard; + uint16_t apptag; + uint32_t reftag; +} NvmeDifTuple; + enum NvmeZoneAttr { NVME_ZA_FINISHED_BY_CTLR = 1 << 0, NVME_ZA_FINISH_RECOMMENDED = 1 << 1, @@ -1428,5 +1441,6 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16); QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsDescr) != 4); QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 8); } #endif -- cgit v1.2.3-55-g7522 From 3e1da158c47f3a6f5d48794f99fe01096531ec2e Mon Sep 17 00:00:00 2001 From: Gollu Appalanaidu Date: Tue, 9 Feb 2021 18:29:42 +0100 Subject: hw/block/nvme: add verify command See NVM Express 1.4, section 6.14 ("Verify Command"). Signed-off-by: Gollu Appalanaidu [k.jensen: rebased, refactored for e2e] Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/block/nvme-dif.c | 4 +- hw/block/nvme-dif.h | 2 + hw/block/nvme.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++++- hw/block/nvme.h | 1 + hw/block/trace-events | 3 ++ include/block/nvme.h | 2 + 6 files changed, 156 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/hw/block/nvme-dif.c b/hw/block/nvme-dif.c index 1b628dead7..2038d724bd 100644 --- a/hw/block/nvme-dif.c +++ b/hw/block/nvme-dif.c @@ -162,8 +162,8 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, return NVME_SUCCESS; } -static uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, - size_t mlen, uint64_t slba) +uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, + uint64_t slba) { BlockBackend *blk = ns->blkconf.blk; BlockDriverState *bs = blk_bs(blk); diff --git a/hw/block/nvme-dif.h b/hw/block/nvme-dif.h index 793829782c..5a8e37c852 100644 --- a/hw/block/nvme-dif.h +++ b/hw/block/nvme-dif.h @@ -39,6 +39,8 @@ static const uint16_t t10_dif_crc_table[256] = { uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, uint32_t reftag); +uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, + uint64_t slba); void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, uint8_t *mbuf, size_t mlen, uint16_t apptag, uint32_t reftag); diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 7af651dc30..d754bd8e04 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -208,6 +208,7 @@ static const uint32_t nvme_cse_iocs_nvm[256] = { [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP, [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, }; @@ -218,6 +219,7 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP, [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, @@ -1884,6 +1886,90 @@ static void nvme_aio_flush_cb(void *opaque, int ret) nvme_enqueue_req_completion(nvme_cq(req), req); } +static void nvme_verify_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + BlockAcctCookie *acct = &req->acct; + BlockAcctStats *stats = blk_get_stats(blk); + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint16_t ctrl = le16_to_cpu(rw->control); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + uint16_t status; + + trace_pci_nvme_verify_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag, + appmask, reftag); + + if (ret) { + block_acct_failed(stats, acct); + nvme_aio_err(req, ret); + goto out; + } + + block_acct_done(stats, acct); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, + ctx->mdata.iov.size, slba); + if (status) { + req->status = status; + goto out; + } + + req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, + ctrl, slba, apptag, appmask, reftag); + } + +out: + qemu_iovec_destroy(&ctx->data.iov); + g_free(ctx->data.bounce); + + qemu_iovec_destroy(&ctx->mdata.iov); + g_free(ctx->mdata.bounce); + + g_free(ctx); + + nvme_enqueue_req_completion(nvme_cq(req), req); +} + + +static void nvme_verify_mdata_in_cb(void *opaque, int ret) +{ + NvmeBounceContext *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + size_t mlen = nvme_m2b(ns, nlb); + uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + BlockBackend *blk = ns->blkconf.blk; + + trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk)); + + if (ret) { + goto out; + } + + ctx->mdata.bounce = g_malloc(mlen); + + qemu_iovec_reset(&ctx->mdata.iov); + qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); + + req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, + nvme_verify_cb, ctx); + return; + +out: + nvme_verify_cb(ctx, ret); +} + static void nvme_aio_discard_cb(void *opaque, int ret) { NvmeRequest *req = opaque; @@ -2431,6 +2517,62 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) return status; } +static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + size_t len = nvme_l2b(ns, nlb); + int64_t offset = nvme_l2b(ns, slba); + uint16_t ctrl = le16_to_cpu(rw->control); + uint32_t reftag = le32_to_cpu(rw->reftag); + NvmeBounceContext *ctx = NULL; + uint16_t status; + + trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + status = nvme_check_prinfo(ns, ctrl, slba, reftag); + if (status) { + return status; + } + + if (ctrl & NVME_RW_PRINFO_PRACT) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + } + + status = nvme_check_bounds(ns, slba, nlb); + if (status) { + trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); + return status; + } + + if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { + status = nvme_check_dulbe(ns, slba, nlb); + if (status) { + return status; + } + } + + ctx = g_new0(NvmeBounceContext, 1); + ctx->req = req; + + ctx->data.bounce = g_malloc(len); + + qemu_iovec_init(&ctx->data.iov, 1); + qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); + + block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, + BLOCK_ACCT_READ); + + req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, + nvme_verify_mdata_in_cb, ctx); + return NVME_NO_COMPLETE; +} + static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns = req->ns; @@ -3449,6 +3591,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_compare(n, req); case NVME_CMD_DSM: return nvme_dsm(n, req); + case NVME_CMD_VERIFY: + return nvme_verify(n, req); case NVME_CMD_COPY: return nvme_copy(n, req); case NVME_CMD_ZONE_MGMT_SEND: @@ -5770,7 +5914,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->nn = cpu_to_le32(n->num_namespaces); id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP | NVME_ONCS_FEATURES | NVME_ONCS_DSM | - NVME_ONCS_COMPARE | NVME_ONCS_COPY); + NVME_ONCS_COMPARE | NVME_ONCS_COPY | + NVME_ONCS_VERIFY); /* * NOTE: If this device ever supports a command set that does NOT use 0x0 diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 115d017765..11c7fcc317 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -98,6 +98,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc) case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; + case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; diff --git a/hw/block/trace-events b/hw/block/trace-events index fc243d0b46..72114a5946 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -60,6 +60,9 @@ pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"P pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16"" pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d" pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 288038dc95..ba757b32db 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -580,6 +580,7 @@ enum NvmeIoCommands { NVME_CMD_COMPARE = 0x05, NVME_CMD_WRITE_ZEROES = 0x08, NVME_CMD_DSM = 0x09, + NVME_CMD_VERIFY = 0x0c, NVME_CMD_COPY = 0x19, NVME_CMD_ZONE_MGMT_SEND = 0x79, NVME_CMD_ZONE_MGMT_RECV = 0x7a, @@ -1084,6 +1085,7 @@ enum NvmeIdCtrlOncs { NVME_ONCS_FEATURES = 1 << 4, NVME_ONCS_RESRVATIONS = 1 << 5, NVME_ONCS_TIMESTAMP = 1 << 6, + NVME_ONCS_VERIFY = 1 << 7, NVME_ONCS_COPY = 1 << 8, }; -- cgit v1.2.3-55-g7522 From dc04d25e2f3f7e26f7f97b860992076b5f04afdb Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 12 Feb 2021 13:11:39 +0100 Subject: hw/block/nvme: add support for the format nvm command Format NVM admin command can make a namespace or namespaces to be with different LBA size and metadata size with protection information types. This patch introduces Format NVM command with LBA format, Metadata, and Protection Information for the device. The secure erase operation things and support for formatting zoned namespaces are yet to be added. The parameter checks inside of this patch has been referred from Keith's old branch. Signed-off-by: Minwoo Im [anaidu.gollu: rebased on e2e] Signed-off-by: Gollu Appalanaidu [k.jensen: rebased for reworked aio tracking] Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/block/nvme-ns.c | 3 +- hw/block/nvme-ns.h | 7 ++ hw/block/nvme.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++++- hw/block/nvme.h | 1 + hw/block/trace-events | 3 + include/block/nvme.h | 1 + 6 files changed, 188 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 1e8ef36ba5..7f8d139a86 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -32,7 +32,7 @@ #define MIN_DISCARD_GRANULARITY (4 * KiB) -static void nvme_ns_init_format(NvmeNamespace *ns) +void nvme_ns_init_format(NvmeNamespace *ns) { NvmeIdNs *id_ns = &ns->id_ns; BlockDriverInfo bdi; @@ -66,6 +66,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) int i; ns->csi = NVME_CSI_NVM; + ns->status = 0x0; ns->id_ns.dlfeat = 0x1; diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h index 07e1688080..9ab7894fc8 100644 --- a/hw/block/nvme-ns.h +++ b/hw/block/nvme-ns.h @@ -59,6 +59,7 @@ typedef struct NvmeNamespace { NvmeIdNs id_ns; const uint32_t *iocs; uint8_t csi; + uint16_t status; NvmeSubsystem *subsys; QTAILQ_ENTRY(NvmeNamespace) entry; @@ -84,6 +85,11 @@ typedef struct NvmeNamespace { } features; } NvmeNamespace; +static inline uint16_t nvme_ns_status(NvmeNamespace *ns) +{ + return ns->status; +} + static inline uint32_t nvme_nsid(NvmeNamespace *ns) { if (ns) { @@ -218,6 +224,7 @@ static inline void nvme_aor_dec_active(NvmeNamespace *ns) assert(ns->nr_active_zones >= 0); } +void nvme_ns_init_format(NvmeNamespace *ns); int nvme_ns_setup(NvmeNamespace *ns, Error **errp); void nvme_ns_drain(NvmeNamespace *ns); void nvme_ns_shutdown(NvmeNamespace *ns); diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 64cb966ab6..6842b01ab5 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -213,6 +213,7 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, + [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, }; static const uint32_t nvme_cse_iocs_none[256]; @@ -1866,6 +1867,42 @@ out: nvme_rw_complete_cb(req, ret); } +struct nvme_aio_format_ctx { + NvmeRequest *req; + NvmeNamespace *ns; + + /* number of outstanding write zeroes for this namespace */ + int *count; +}; + +static void nvme_aio_format_cb(void *opaque, int ret) +{ + struct nvme_aio_format_ctx *ctx = opaque; + NvmeRequest *req = ctx->req; + NvmeNamespace *ns = ctx->ns; + uintptr_t *num_formats = (uintptr_t *)&req->opaque; + int *count = ctx->count; + + g_free(ctx); + + if (ret) { + nvme_aio_err(req, ret); + } + + if (--(*count)) { + return; + } + + g_free(count); + ns->status = 0x0; + + if (--(*num_formats)) { + return; + } + + nvme_enqueue_req_completion(nvme_cq(req), req); +} + struct nvme_aio_flush_ctx { NvmeRequest *req; NvmeNamespace *ns; @@ -3556,6 +3593,7 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) { uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint16_t status; trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode)); @@ -3597,6 +3635,11 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return NVME_INVALID_OPCODE | NVME_DNR; } + status = nvme_ns_status(req->ns); + if (unlikely(status)) { + return status; + } + switch (req->cmd.opcode) { case NVME_CMD_WRITE_ZEROES: return nvme_write_zeroes(n, req); @@ -4898,6 +4941,134 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) return NVME_SUCCESS; } +static uint16_t nvme_format_ns(NvmeCtrl *n, NvmeNamespace *ns, uint8_t lbaf, + uint8_t mset, uint8_t pi, uint8_t pil, + NvmeRequest *req) +{ + int64_t len, offset; + struct nvme_aio_format_ctx *ctx; + BlockBackend *blk = ns->blkconf.blk; + uint16_t ms; + uintptr_t *num_formats = (uintptr_t *)&req->opaque; + int *count; + + if (ns->params.zoned) { + return NVME_INVALID_FORMAT | NVME_DNR; + } + + trace_pci_nvme_format_ns(nvme_cid(req), nvme_nsid(ns), lbaf, mset, pi, pil); + + if (lbaf > ns->id_ns.nlbaf) { + return NVME_INVALID_FORMAT | NVME_DNR; + } + + ms = ns->id_ns.lbaf[lbaf].ms; + + if (pi && (ms < sizeof(NvmeDifTuple))) { + return NVME_INVALID_FORMAT | NVME_DNR; + } + + if (pi && pi > NVME_ID_NS_DPS_TYPE_3) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + nvme_ns_drain(ns); + nvme_ns_shutdown(ns); + nvme_ns_cleanup(ns); + + ns->id_ns.dps = (pil << 3) | pi; + ns->id_ns.flbas = lbaf | (mset << 4); + + nvme_ns_init_format(ns); + + ns->status = NVME_FORMAT_IN_PROGRESS; + + len = ns->size; + offset = 0; + + count = g_new(int, 1); + *count = 1; + + (*num_formats)++; + + while (len) { + ctx = g_new(struct nvme_aio_format_ctx, 1); + ctx->req = req; + ctx->ns = ns; + ctx->count = count; + + size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len); + + (*count)++; + + blk_aio_pwrite_zeroes(blk, offset, bytes, BDRV_REQ_MAY_UNMAP, + nvme_aio_format_cb, ctx); + + offset += bytes; + len -= bytes; + + } + + (*count)--; + + return NVME_NO_COMPLETE; +} + +static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns; + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint8_t lbaf = dw10 & 0xf; + uint8_t mset = (dw10 >> 4) & 0x1; + uint8_t pi = (dw10 >> 5) & 0x7; + uint8_t pil = (dw10 >> 8) & 0x1; + uintptr_t *num_formats = (uintptr_t *)&req->opaque; + uint16_t status; + int i; + + trace_pci_nvme_format(nvme_cid(req), nsid, lbaf, mset, pi, pil); + + /* 1-initialize; see the comment in nvme_dsm */ + *num_formats = 1; + + if (nsid != NVME_NSID_BROADCAST) { + if (!nvme_nsid_valid(n, nsid)) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (!ns) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req); + if (status && status != NVME_NO_COMPLETE) { + req->status = status; + } + } else { + for (i = 1; i <= n->num_namespaces; i++) { + ns = nvme_ns(n, i); + if (!ns) { + continue; + } + + status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req); + if (status && status != NVME_NO_COMPLETE) { + req->status = status; + break; + } + } + } + + /* account for the 1-initialization */ + if (--(*num_formats)) { + return NVME_NO_COMPLETE; + } + + return req->status; +} + static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, @@ -4936,6 +5107,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_aer(n, req); case NVME_ADM_CMD_NS_ATTACHMENT: return nvme_ns_attachment(n, req); + case NVME_ADM_CMD_FORMAT_NVM: + return nvme_format(n, req); default: assert(false); } @@ -5912,7 +6085,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->mdts = n->params.mdts; id->ver = cpu_to_le32(NVME_SPEC_VER); - id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT); + id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT); id->cntrltype = 0x1; /* diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 7b082212db..5b0031b11d 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -86,6 +86,7 @@ static inline const char *nvme_adm_opc_str(uint8_t opc) case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; + case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; default: return "NVME_ADM_CMD_UNKNOWN"; } } diff --git a/hw/block/trace-events b/hw/block/trace-events index 72114a5946..b71cf7a087 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -41,6 +41,9 @@ pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64"" pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" +pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" +pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" +pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" diff --git a/include/block/nvme.h b/include/block/nvme.h index ba757b32db..b0a4e42916 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -828,6 +828,7 @@ enum NvmeStatusCodes { NVME_CAP_EXCEEDED = 0x0081, NVME_NS_NOT_READY = 0x0082, NVME_NS_RESV_CONFLICT = 0x0083, + NVME_FORMAT_IN_PROGRESS = 0x0084, NVME_INVALID_CQID = 0x0100, NVME_INVALID_QID = 0x0101, NVME_MAX_QSIZE_EXCEEDED = 0x0102, -- cgit v1.2.3-55-g7522