diff options
author | Klaus Jensen | 2021-03-04 08:40:11 +0100 |
---|---|---|
committer | Klaus Jensen | 2022-02-14 08:58:29 +0100 |
commit | e321b4cdc2dd0b5e806ecf759138be7f83774142 (patch) | |
tree | 26b2ef800fefc07b33a88ace23f7784a0404b712 /hw/nvme/ctrl.c | |
parent | hw/nvme: add ozcs enum (diff) | |
download | qemu-e321b4cdc2dd0b5e806ecf759138be7f83774142.tar.gz qemu-e321b4cdc2dd0b5e806ecf759138be7f83774142.tar.xz qemu-e321b4cdc2dd0b5e806ecf759138be7f83774142.zip |
hw/nvme: add support for zoned random write area
Add support for TP 4076 ("Zoned Random Write Area"), v2021.08.23
("Ratified").
This adds three new namespace parameters: "zoned.numzrwa" (number of
zrwa resources, i.e. number of zones that can have a zrwa),
"zoned.zrwas" (zrwa size in LBAs), "zoned.zrwafg" (granularity in LBAs
for flushes).
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Diffstat (limited to 'hw/nvme/ctrl.c')
-rw-r--r-- | hw/nvme/ctrl.c | 171 |
1 files changed, 152 insertions, 19 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 7cb4974c5e..98aac98bef 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -299,26 +299,37 @@ static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone, } } -/* - * Check if we can open a zone without exceeding open/active limits. - * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5). - */ -static int nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn) +static uint16_t nvme_zns_check_resources(NvmeNamespace *ns, uint32_t act, + uint32_t opn, uint32_t zrwa) { if (ns->params.max_active_zones != 0 && ns->nr_active_zones + act > ns->params.max_active_zones) { trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones); return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR; } + if (ns->params.max_open_zones != 0 && ns->nr_open_zones + opn > ns->params.max_open_zones) { trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones); return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR; } + if (zrwa > ns->zns.numzrwa) { + return NVME_NOZRWA | NVME_DNR; + } + return NVME_SUCCESS; } +/* + * Check if we can open a zone without exceeding open/active limits. + * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5). + */ +static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn) +{ + return nvme_zns_check_resources(ns, act, opn, 0); +} + static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) { hwaddr hi, lo; @@ -1628,9 +1639,19 @@ static uint16_t nvme_check_zone_write(NvmeNamespace *ns, NvmeZone *zone, return status; } - if (unlikely(slba != zone->w_ptr)) { - trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, zone->w_ptr); - return NVME_ZONE_INVALID_WRITE; + if (zone->d.za & NVME_ZA_ZRWA_VALID) { + uint64_t ezrwa = zone->w_ptr + 2 * ns->zns.zrwas; + + if (slba < zone->w_ptr || slba + nlb > ezrwa) { + trace_pci_nvme_err_zone_invalid_write(slba, zone->w_ptr); + return NVME_ZONE_INVALID_WRITE; + } + } else { + if (unlikely(slba != zone->w_ptr)) { + trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, + zone->w_ptr); + return NVME_ZONE_INVALID_WRITE; + } } if (unlikely((slba + nlb) > zcap)) { @@ -1710,6 +1731,14 @@ static uint16_t nvme_zrm_finish(NvmeNamespace *ns, NvmeZone *zone) /* fallthrough */ case NVME_ZONE_STATE_CLOSED: nvme_aor_dec_active(ns); + + if (zone->d.za & NVME_ZA_ZRWA_VALID) { + zone->d.za &= ~NVME_ZA_ZRWA_VALID; + if (ns->params.numzrwa) { + ns->zns.numzrwa++; + } + } + /* fallthrough */ case NVME_ZONE_STATE_EMPTY: nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL); @@ -1745,6 +1774,13 @@ static uint16_t nvme_zrm_reset(NvmeNamespace *ns, NvmeZone *zone) /* fallthrough */ case NVME_ZONE_STATE_CLOSED: nvme_aor_dec_active(ns); + + if (zone->d.za & NVME_ZA_ZRWA_VALID) { + if (ns->params.numzrwa) { + ns->zns.numzrwa++; + } + } + /* fallthrough */ case NVME_ZONE_STATE_FULL: zone->w_ptr = zone->d.zslba; @@ -1778,6 +1814,7 @@ static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns) enum { NVME_ZRM_AUTO = 1 << 0, + NVME_ZRM_ZRWA = 1 << 1, }; static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns, @@ -1796,7 +1833,8 @@ static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns, if (n->params.auto_transition_zones) { nvme_zrm_auto_transition_zone(ns); } - status = nvme_aor_check(ns, act, 1); + status = nvme_zns_check_resources(ns, act, 1, + (flags & NVME_ZRM_ZRWA) ? 1 : 0); if (status) { return status; } @@ -1824,6 +1862,12 @@ static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns, /* fallthrough */ case NVME_ZONE_STATE_EXPLICITLY_OPEN: + if (flags & NVME_ZRM_ZRWA) { + ns->zns.numzrwa--; + + zone->d.za |= NVME_ZA_ZRWA_VALID; + } + return NVME_SUCCESS; default: @@ -1837,12 +1881,6 @@ static inline uint16_t nvme_zrm_auto(NvmeCtrl *n, NvmeNamespace *ns, return nvme_zrm_open_flags(n, ns, zone, NVME_ZRM_AUTO); } -static inline uint16_t nvme_zrm_open(NvmeCtrl *n, NvmeNamespace *ns, - NvmeZone *zone) -{ - return nvme_zrm_open_flags(n, ns, zone, 0); -} - static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, uint32_t nlb) { @@ -1853,6 +1891,20 @@ static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, } } +static void nvme_zoned_zrwa_implicit_flush(NvmeNamespace *ns, NvmeZone *zone, + uint32_t nlbc) +{ + uint16_t nzrwafgs = DIV_ROUND_UP(nlbc, ns->zns.zrwafg); + + nlbc = nzrwafgs * ns->zns.zrwafg; + + trace_pci_nvme_zoned_zrwa_implicit_flush(zone->d.zslba, nlbc); + + zone->w_ptr += nlbc; + + nvme_advance_zone_wp(ns, zone, nlbc); +} + static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req) { NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; @@ -1865,6 +1917,17 @@ static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req) zone = nvme_get_zone_by_slba(ns, slba); assert(zone); + if (zone->d.za & NVME_ZA_ZRWA_VALID) { + uint64_t ezrwa = zone->w_ptr + ns->zns.zrwas - 1; + uint64_t elba = slba + nlb - 1; + + if (elba > ezrwa) { + nvme_zoned_zrwa_implicit_flush(ns, zone, elba - ezrwa); + } + + return; + } + nvme_advance_zone_wp(ns, zone, nlb); } @@ -2665,7 +2728,9 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) goto invalid; } - iocb->zone->w_ptr += nlb; + if (!(iocb->zone->d.za & NVME_ZA_ZRWA_VALID)) { + iocb->zone->w_ptr += nlb; + } } qemu_iovec_reset(&iocb->iov); @@ -3204,6 +3269,10 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, if (append) { bool piremap = !!(ctrl & NVME_RW_PIREMAP); + if (unlikely(zone->d.za & NVME_ZA_ZRWA_VALID)) { + return NVME_INVALID_ZONE_OP | NVME_DNR; + } + if (unlikely(slba != zone->d.zslba)) { trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba); status = NVME_INVALID_FIELD; @@ -3255,7 +3324,9 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, goto invalid; } - zone->w_ptr += nlb; + if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) { + zone->w_ptr += nlb; + } } data_offset = nvme_l2b(ns, slba); @@ -3339,7 +3410,24 @@ enum NvmeZoneProcessingMask { static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone, NvmeZoneState state, NvmeRequest *req) { - return nvme_zrm_open(nvme_ctrl(req), ns, zone); + NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)&req->cmd; + int flags = 0; + + if (cmd->zsflags & NVME_ZSFLAG_ZRWA_ALLOC) { + uint16_t ozcs = le16_to_cpu(ns->id_ns_zoned->ozcs); + + if (!(ozcs & NVME_ID_NS_ZONED_OZCS_ZRWASUP)) { + return NVME_INVALID_ZONE_OP | NVME_DNR; + } + + if (zone->w_ptr % ns->zns.zrwafg) { + return NVME_NOZRWA | NVME_DNR; + } + + flags = NVME_ZRM_ZRWA; + } + + return nvme_zrm_open_flags(nvme_ctrl(req), ns, zone, flags); } static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone, @@ -3614,6 +3702,44 @@ done: } } +static uint16_t nvme_zone_mgmt_send_zrwa_flush(NvmeCtrl *n, NvmeZone *zone, + uint64_t elba, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + uint16_t ozcs = le16_to_cpu(ns->id_ns_zoned->ozcs); + uint64_t wp = zone->d.wp; + uint32_t nlb = elba - wp + 1; + uint16_t status; + + + if (!(ozcs & NVME_ID_NS_ZONED_OZCS_ZRWASUP)) { + return NVME_INVALID_ZONE_OP | NVME_DNR; + } + + if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (elba < wp || elba > wp + ns->zns.zrwas) { + return NVME_ZONE_BOUNDARY_ERROR | NVME_DNR; + } + + if (nlb % ns->zns.zrwafg) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + status = nvme_zrm_auto(n, ns, zone); + if (status) { + return status; + } + + zone->w_ptr += nlb; + + nvme_advance_zone_wp(ns, zone, nlb); + + return NVME_SUCCESS; +} + static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) { NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)&req->cmd; @@ -3640,7 +3766,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) } zone = &ns->zone_array[zone_idx]; - if (slba != zone->d.zslba) { + if (slba != zone->d.zslba && action != NVME_ZONE_ACTION_ZRWA_FLUSH) { trace_pci_nvme_err_unaligned_zone_cmd(action, slba, zone->d.zslba); return NVME_INVALID_FIELD | NVME_DNR; } @@ -3716,6 +3842,13 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) } break; + case NVME_ZONE_ACTION_ZRWA_FLUSH: + if (all) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return nvme_zone_mgmt_send_zrwa_flush(n, zone, slba, req); + default: trace_pci_nvme_err_invalid_mgmt_action(action); status = NVME_INVALID_FIELD; |