summaryrefslogtreecommitdiffstats
path: root/hw/block/nvme-dif.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/block/nvme-dif.c')
-rw-r--r--hw/block/nvme-dif.c508
1 files changed, 508 insertions, 0 deletions
diff --git a/hw/block/nvme-dif.c b/hw/block/nvme-dif.c
new file mode 100644
index 0000000000..2038d724bd
--- /dev/null
+++ b/hw/block/nvme-dif.c
@@ -0,0 +1,508 @@
+#include "qemu/osdep.h"
+#include "hw/block/block.h"
+#include "sysemu/dma.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+#include "trace.h"
+#include "nvme.h"
+#include "nvme-dif.h"
+
+uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba,
+ uint32_t reftag)
+{
+ if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
+ (ctrl & NVME_RW_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) {
+ return NVME_INVALID_PROT_INFO | NVME_DNR;
+ }
+
+ return NVME_SUCCESS;
+}
+
+/* from Linux kernel (crypto/crct10dif_common.c) */
+static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer,
+ size_t len)
+{
+ unsigned int i;
+
+ for (i = 0; i < len; i++) {
+ crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
+ }
+
+ return crc;
+}
+
+void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
+ uint8_t *mbuf, size_t mlen, uint16_t apptag,
+ uint32_t reftag)
+{
+ uint8_t *end = buf + len;
+ size_t lsize = nvme_lsize(ns);
+ size_t msize = nvme_msize(ns);
+ int16_t pil = 0;
+
+ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+ pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
+ }
+
+ trace_pci_nvme_dif_pract_generate_dif(len, lsize, lsize + pil, apptag,
+ reftag);
+
+ for (; buf < end; buf += lsize, mbuf += msize) {
+ NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
+ uint16_t crc = crc_t10dif(0x0, buf, lsize);
+
+ if (pil) {
+ crc = crc_t10dif(crc, mbuf, pil);
+ }
+
+ dif->guard = cpu_to_be16(crc);
+ dif->apptag = cpu_to_be16(apptag);
+ dif->reftag = cpu_to_be32(reftag);
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
+ reftag++;
+ }
+ }
+}
+
+static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
+ uint8_t *buf, uint8_t *mbuf, size_t pil,
+ uint16_t ctrl, uint16_t apptag,
+ uint16_t appmask, uint32_t reftag)
+{
+ switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ case NVME_ID_NS_DPS_TYPE_3:
+ if (be32_to_cpu(dif->reftag) != 0xffffffff) {
+ break;
+ }
+
+ /* fallthrough */
+ case NVME_ID_NS_DPS_TYPE_1:
+ case NVME_ID_NS_DPS_TYPE_2:
+ if (be16_to_cpu(dif->apptag) != 0xffff) {
+ break;
+ }
+
+ trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag),
+ be32_to_cpu(dif->reftag));
+
+ return NVME_SUCCESS;
+ }
+
+ if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) {
+ uint16_t crc = crc_t10dif(0x0, buf, nvme_lsize(ns));
+
+ if (pil) {
+ crc = crc_t10dif(crc, mbuf, pil);
+ }
+
+ trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc);
+
+ if (be16_to_cpu(dif->guard) != crc) {
+ return NVME_E2E_GUARD_ERROR;
+ }
+ }
+
+ if (ctrl & NVME_RW_PRINFO_PRCHK_APP) {
+ trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag,
+ appmask);
+
+ if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) {
+ return NVME_E2E_APP_ERROR;
+ }
+ }
+
+ if (ctrl & NVME_RW_PRINFO_PRCHK_REF) {
+ trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag);
+
+ if (be32_to_cpu(dif->reftag) != reftag) {
+ return NVME_E2E_REF_ERROR;
+ }
+ }
+
+ return NVME_SUCCESS;
+}
+
+uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
+ uint8_t *mbuf, size_t mlen, uint16_t ctrl,
+ uint64_t slba, uint16_t apptag,
+ uint16_t appmask, uint32_t reftag)
+{
+ uint8_t *end = buf + len;
+ size_t lsize = nvme_lsize(ns);
+ size_t msize = nvme_msize(ns);
+ int16_t pil = 0;
+ uint16_t status;
+
+ status = nvme_check_prinfo(ns, ctrl, slba, reftag);
+ if (status) {
+ return status;
+ }
+
+ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+ pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
+ }
+
+ trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), lsize + pil);
+
+ for (; buf < end; buf += lsize, mbuf += msize) {
+ NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
+
+ status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag,
+ appmask, reftag);
+ if (status) {
+ return status;
+ }
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
+ reftag++;
+ }
+ }
+
+ return NVME_SUCCESS;
+}
+
+uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
+ uint64_t slba)
+{
+ BlockBackend *blk = ns->blkconf.blk;
+ BlockDriverState *bs = blk_bs(blk);
+
+ size_t msize = nvme_msize(ns);
+ size_t lsize = nvme_lsize(ns);
+ int64_t moffset = 0, offset = nvme_l2b(ns, slba);
+ uint8_t *mbufp, *end;
+ bool zeroed;
+ int16_t pil = 0;
+ int64_t bytes = (mlen / msize) * lsize;
+ int64_t pnum = 0;
+
+ Error *err = NULL;
+
+
+ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+ pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
+ }
+
+ do {
+ int ret;
+
+ bytes -= pnum;
+
+ ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
+ if (ret < 0) {
+ error_setg_errno(&err, -ret, "unable to get block status");
+ error_report_err(err);
+
+ return NVME_INTERNAL_DEV_ERROR;
+ }
+
+ zeroed = !!(ret & BDRV_BLOCK_ZERO);
+
+ trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed);
+
+ if (zeroed) {
+ mbufp = mbuf + moffset;
+ mlen = (pnum / lsize) * msize;
+ end = mbufp + mlen;
+
+ for (; mbufp < end; mbufp += msize) {
+ memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
+ }
+ }
+
+ moffset += (pnum / lsize) * msize;
+ offset += pnum;
+ } while (pnum != bytes);
+
+ return NVME_SUCCESS;
+}
+
+static void nvme_dif_rw_cb(void *opaque, int ret)
+{
+ NvmeBounceContext *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ NvmeNamespace *ns = req->ns;
+ BlockBackend *blk = ns->blkconf.blk;
+
+ trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk));
+
+ qemu_iovec_destroy(&ctx->data.iov);
+ g_free(ctx->data.bounce);
+
+ qemu_iovec_destroy(&ctx->mdata.iov);
+ g_free(ctx->mdata.bounce);
+
+ g_free(ctx);
+
+ nvme_rw_complete_cb(req, ret);
+}
+
+static void nvme_dif_rw_check_cb(void *opaque, int ret)
+{
+ NvmeBounceContext *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeCtrl *n = nvme_ctrl(req);
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ uint16_t apptag = le16_to_cpu(rw->apptag);
+ uint16_t appmask = le16_to_cpu(rw->appmask);
+ uint32_t reftag = le32_to_cpu(rw->reftag);
+ uint16_t status;
+
+ trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag,
+ appmask, reftag);
+
+ if (ret) {
+ goto out;
+ }
+
+ status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size,
+ slba);
+ if (status) {
+ req->status = status;
+ goto out;
+ }
+
+ status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
+ ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
+ slba, apptag, appmask, reftag);
+ if (status) {
+ req->status = status;
+ goto out;
+ }
+
+ status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
+ NVME_TX_DIRECTION_FROM_DEVICE, req);
+ if (status) {
+ req->status = status;
+ goto out;
+ }
+
+ if (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8) {
+ goto out;
+ }
+
+ status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
+ NVME_TX_DIRECTION_FROM_DEVICE, req);
+ if (status) {
+ req->status = status;
+ }
+
+out:
+ nvme_dif_rw_cb(ctx, ret);
+}
+
+static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret)
+{
+ NvmeBounceContext *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+ size_t mlen = nvme_m2b(ns, nlb);
+ uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
+ BlockBackend *blk = ns->blkconf.blk;
+
+ trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk));
+
+ if (ret) {
+ goto out;
+ }
+
+ ctx->mdata.bounce = g_malloc(mlen);
+
+ qemu_iovec_reset(&ctx->mdata.iov);
+ qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
+
+ req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
+ nvme_dif_rw_check_cb, ctx);
+ return;
+
+out:
+ nvme_dif_rw_cb(ctx, ret);
+}
+
+static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret)
+{
+ NvmeBounceContext *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ NvmeNamespace *ns = req->ns;
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
+ BlockBackend *blk = ns->blkconf.blk;
+
+ trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk));
+
+ if (ret) {
+ goto out;
+ }
+
+ req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0,
+ nvme_dif_rw_cb, ctx);
+ return;
+
+out:
+ nvme_dif_rw_cb(ctx, ret);
+}
+
+uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ NvmeNamespace *ns = req->ns;
+ BlockBackend *blk = ns->blkconf.blk;
+ bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES;
+ uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ size_t len = nvme_l2b(ns, nlb);
+ size_t mlen = nvme_m2b(ns, nlb);
+ size_t mapped_len = len;
+ int64_t offset = nvme_l2b(ns, slba);
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ uint16_t apptag = le16_to_cpu(rw->apptag);
+ uint16_t appmask = le16_to_cpu(rw->appmask);
+ uint32_t reftag = le32_to_cpu(rw->reftag);
+ bool pract = !!(ctrl & NVME_RW_PRINFO_PRACT);
+ NvmeBounceContext *ctx;
+ uint16_t status;
+
+ trace_pci_nvme_dif_rw(pract, NVME_RW_PRINFO(ctrl));
+
+ ctx = g_new0(NvmeBounceContext, 1);
+ ctx->req = req;
+
+ if (wrz) {
+ BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP;
+
+ if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) {
+ status = NVME_INVALID_PROT_INFO | NVME_DNR;
+ goto err;
+ }
+
+ if (pract) {
+ uint8_t *mbuf, *end;
+ size_t msize = nvme_msize(ns);
+ int16_t pil = msize - sizeof(NvmeDifTuple);
+
+ status = nvme_check_prinfo(ns, ctrl, slba, reftag);
+ if (status) {
+ goto err;
+ }
+
+ flags = 0;
+
+ ctx->mdata.bounce = g_malloc0(mlen);
+
+ qemu_iovec_init(&ctx->mdata.iov, 1);
+ qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
+
+ mbuf = ctx->mdata.bounce;
+ end = mbuf + mlen;
+
+ if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) {
+ pil = 0;
+ }
+
+ for (; mbuf < end; mbuf += msize) {
+ NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
+
+ dif->apptag = cpu_to_be16(apptag);
+ dif->reftag = cpu_to_be32(reftag);
+
+ switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ case NVME_ID_NS_DPS_TYPE_1:
+ case NVME_ID_NS_DPS_TYPE_2:
+ reftag++;
+ }
+ }
+ }
+
+ req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags,
+ nvme_dif_rw_mdata_out_cb, ctx);
+ return NVME_NO_COMPLETE;
+ }
+
+ if (nvme_ns_ext(ns) && !(pract && nvme_msize(ns) == 8)) {
+ mapped_len += mlen;
+ }
+
+ status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd);
+ if (status) {
+ return status;
+ }
+
+ ctx->data.bounce = g_malloc(len);
+
+ qemu_iovec_init(&ctx->data.iov, 1);
+ qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
+
+ if (req->cmd.opcode == NVME_CMD_READ) {
+ block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
+ BLOCK_ACCT_READ);
+
+ req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
+ nvme_dif_rw_mdata_in_cb, ctx);
+ return NVME_NO_COMPLETE;
+ }
+
+ status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
+ NVME_TX_DIRECTION_TO_DEVICE, req);
+ if (status) {
+ goto err;
+ }
+
+ ctx->mdata.bounce = g_malloc(mlen);
+
+ qemu_iovec_init(&ctx->mdata.iov, 1);
+ qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
+
+ if (!(pract && nvme_msize(ns) == 8)) {
+ status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
+ NVME_TX_DIRECTION_TO_DEVICE, req);
+ if (status) {
+ goto err;
+ }
+ }
+
+ status = nvme_check_prinfo(ns, ctrl, slba, reftag);
+ if (status) {
+ goto err;
+ }
+
+ if (pract) {
+ /* splice generated protection information into the buffer */
+ nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size,
+ ctx->mdata.bounce, ctx->mdata.iov.size,
+ apptag, reftag);
+ } else {
+ status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
+ ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
+ slba, apptag, appmask, reftag);
+ if (status) {
+ goto err;
+ }
+ }
+
+ block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
+ BLOCK_ACCT_WRITE);
+
+ req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0,
+ nvme_dif_rw_mdata_out_cb, ctx);
+
+ return NVME_NO_COMPLETE;
+
+err:
+ qemu_iovec_destroy(&ctx->data.iov);
+ g_free(ctx->data.bounce);
+
+ qemu_iovec_destroy(&ctx->mdata.iov);
+ g_free(ctx->mdata.bounce);
+
+ g_free(ctx);
+
+ return status;
+}