summaryrefslogtreecommitdiffstats
path: root/hw/rdma/rdma_rm.c
diff options
context:
space:
mode:
authorYuval Shaia2018-02-09 14:23:18 +0100
committerMarcel Apfelbaum2018-02-19 12:03:24 +0100
commitef6d4ccdc9eba3c184da08e76d52e5003325680b (patch)
tree7dc1abe7280c3a3ffad1534f473d009f8a564cf4 /hw/rdma/rdma_rm.c
parenthw/rdma: Definitions for rdma device and rdma resource manager (diff)
downloadqemu-ef6d4ccdc9eba3c184da08e76d52e5003325680b.tar.gz
qemu-ef6d4ccdc9eba3c184da08e76d52e5003325680b.tar.xz
qemu-ef6d4ccdc9eba3c184da08e76d52e5003325680b.zip
hw/rdma: Implementation of generic rdma device layers
This layer is composed of two sub-modules, backend and resource manager. Backend sub-module is responsible for all the interaction with IB layers such as ibverbs and umad (external libraries). Resource manager is a collection of functions and structures to manage RDMA resources such as QPs, CQs and MRs. Reviewed-by: Dotan Barak <dotanb@mellanox.com> Reviewed-by: Zhu Yanjun <yanjun.zhu@oracle.com> Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Diffstat (limited to 'hw/rdma/rdma_rm.c')
-rw-r--r--hw/rdma/rdma_rm.c544
1 files changed, 544 insertions, 0 deletions
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
new file mode 100644
index 0000000000..b5fc45ddab
--- /dev/null
+++ b/hw/rdma/rdma_rm.c
@@ -0,0 +1,544 @@
+/*
+ * QEMU paravirtual RDMA - Resource Manager Implementation
+ *
+ * Copyright (C) 2018 Oracle
+ * Copyright (C) 2018 Red Hat Inc
+ *
+ * Authors:
+ * Yuval Shaia <yuval.shaia@oracle.com>
+ * Marcel Apfelbaum <marcel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <qemu/osdep.h>
+#include <qapi/error.h>
+#include <cpu.h>
+
+#include "rdma_utils.h"
+#include "rdma_backend.h"
+#include "rdma_rm.h"
+
+#define MAX_RM_TBL_NAME 16
+
+/* Page directory and page tables */
+#define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
+#define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
+
+static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
+ uint32_t tbl_sz, uint32_t res_sz)
+{
+ tbl->tbl = g_malloc(tbl_sz * res_sz);
+
+ strncpy(tbl->name, name, MAX_RM_TBL_NAME);
+ tbl->name[MAX_RM_TBL_NAME - 1] = 0;
+
+ tbl->bitmap = bitmap_new(tbl_sz);
+ tbl->tbl_sz = tbl_sz;
+ tbl->res_sz = res_sz;
+ qemu_mutex_init(&tbl->lock);
+}
+
+static inline void res_tbl_free(RdmaRmResTbl *tbl)
+{
+ qemu_mutex_destroy(&tbl->lock);
+ g_free(tbl->tbl);
+ bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0);
+}
+
+static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
+{
+ pr_dbg("%s, handle=%d\n", tbl->name, handle);
+
+ if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
+ return tbl->tbl + handle * tbl->res_sz;
+ } else {
+ pr_dbg("Invalid handle %d\n", handle);
+ return NULL;
+ }
+}
+
+static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
+{
+ qemu_mutex_lock(&tbl->lock);
+
+ *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
+ if (*handle > tbl->tbl_sz) {
+ pr_dbg("Failed to alloc, bitmap is full\n");
+ qemu_mutex_unlock(&tbl->lock);
+ return NULL;
+ }
+
+ set_bit(*handle, tbl->bitmap);
+
+ qemu_mutex_unlock(&tbl->lock);
+
+ memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
+
+ pr_dbg("%s, handle=%d\n", tbl->name, *handle);
+
+ return tbl->tbl + *handle * tbl->res_sz;
+}
+
+static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
+{
+ pr_dbg("%s, handle=%d\n", tbl->name, handle);
+
+ qemu_mutex_lock(&tbl->lock);
+
+ if (handle < tbl->tbl_sz) {
+ clear_bit(handle, tbl->bitmap);
+ }
+
+ qemu_mutex_unlock(&tbl->lock);
+}
+
+int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+ uint32_t *pd_handle, uint32_t ctx_handle)
+{
+ RdmaRmPD *pd;
+ int ret = -ENOMEM;
+
+ pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
+ if (!pd) {
+ goto out;
+ }
+
+ ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
+ if (ret) {
+ ret = -EIO;
+ goto out_tbl_dealloc;
+ }
+
+ pd->ctx_handle = ctx_handle;
+
+ return 0;
+
+out_tbl_dealloc:
+ res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
+
+out:
+ return ret;
+}
+
+RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
+{
+ return res_tbl_get(&dev_res->pd_tbl, pd_handle);
+}
+
+void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
+{
+ RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
+
+ if (pd) {
+ rdma_backend_destroy_pd(&pd->backend_pd);
+ res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
+ }
+}
+
+int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
+ uint64_t guest_start, size_t guest_length, void *host_virt,
+ int access_flags, uint32_t *mr_handle, uint32_t *lkey,
+ uint32_t *rkey)
+{
+ RdmaRmMR *mr;
+ int ret = 0;
+ RdmaRmPD *pd;
+ uint64_t addr;
+ size_t length;
+
+ pd = rdma_rm_get_pd(dev_res, pd_handle);
+ if (!pd) {
+ pr_dbg("Invalid PD\n");
+ return -EINVAL;
+ }
+
+ mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
+ if (!mr) {
+ pr_dbg("Failed to allocate obj in table\n");
+ return -ENOMEM;
+ }
+
+ if (!host_virt) {
+ /* TODO: This is my guess but not so sure that this needs to be
+ * done */
+ length = TARGET_PAGE_SIZE;
+ addr = (uint64_t)g_malloc(length);
+ } else {
+ mr->user_mr.host_virt = (uint64_t) host_virt;
+ pr_dbg("host_virt=0x%lx\n", mr->user_mr.host_virt);
+ mr->user_mr.length = guest_length;
+ pr_dbg("length=0x%lx\n", guest_length);
+ mr->user_mr.guest_start = guest_start;
+ pr_dbg("guest_start=0x%lx\n", mr->user_mr.guest_start);
+
+ length = mr->user_mr.length;
+ addr = mr->user_mr.host_virt;
+ }
+
+ ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, addr, length,
+ access_flags);
+ if (ret) {
+ pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret);
+ ret = -EIO;
+ goto out_dealloc_mr;
+ }
+
+ if (!host_virt) {
+ *lkey = mr->lkey = rdma_backend_mr_lkey(&mr->backend_mr);
+ *rkey = mr->rkey = rdma_backend_mr_rkey(&mr->backend_mr);
+ } else {
+ /* We keep mr_handle in lkey so send and recv get get mr ptr */
+ *lkey = *mr_handle;
+ *rkey = -1;
+ }
+
+ mr->pd_handle = pd_handle;
+
+ return 0;
+
+out_dealloc_mr:
+ res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
+
+ return ret;
+}
+
+RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
+{
+ return res_tbl_get(&dev_res->mr_tbl, mr_handle);
+}
+
+void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
+{
+ RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
+
+ if (mr) {
+ rdma_backend_destroy_mr(&mr->backend_mr);
+ munmap((void *)mr->user_mr.host_virt, mr->user_mr.length);
+ res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
+ }
+}
+
+int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
+ uint32_t *uc_handle)
+{
+ RdmaRmUC *uc;
+
+ /* TODO: Need to make sure pfn is between bar start address and
+ * bsd+RDMA_BAR2_UAR_SIZE
+ if (pfn > RDMA_BAR2_UAR_SIZE) {
+ pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE);
+ return -ENOMEM;
+ }
+ */
+
+ uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
+ if (!uc) {
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
+{
+ return res_tbl_get(&dev_res->uc_tbl, uc_handle);
+}
+
+void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
+{
+ RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
+
+ if (uc) {
+ res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
+ }
+}
+
+RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
+{
+ return res_tbl_get(&dev_res->cq_tbl, cq_handle);
+}
+
+int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+ uint32_t cqe, uint32_t *cq_handle, void *opaque)
+{
+ int rc;
+ RdmaRmCQ *cq;
+
+ cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
+ if (!cq) {
+ return -ENOMEM;
+ }
+
+ cq->opaque = opaque;
+ cq->notify = false;
+
+ rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
+ if (rc) {
+ rc = -EIO;
+ goto out_dealloc_cq;
+ }
+
+ return 0;
+
+out_dealloc_cq:
+ rdma_rm_dealloc_cq(dev_res, *cq_handle);
+
+ return rc;
+}
+
+void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
+ bool notify)
+{
+ RdmaRmCQ *cq;
+
+ pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify);
+
+ cq = rdma_rm_get_cq(dev_res, cq_handle);
+ if (!cq) {
+ return;
+ }
+
+ cq->notify = notify;
+ pr_dbg("notify=%d\n", cq->notify);
+}
+
+void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
+{
+ RdmaRmCQ *cq;
+
+ cq = rdma_rm_get_cq(dev_res, cq_handle);
+ if (!cq) {
+ return;
+ }
+
+ rdma_backend_destroy_cq(&cq->backend_cq);
+
+ res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
+}
+
+RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
+{
+ GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
+
+ RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
+
+ g_bytes_unref(key);
+
+ return qp;
+}
+
+int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
+ uint8_t qp_type, uint32_t max_send_wr,
+ uint32_t max_send_sge, uint32_t send_cq_handle,
+ uint32_t max_recv_wr, uint32_t max_recv_sge,
+ uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
+{
+ int rc;
+ RdmaRmQP *qp;
+ RdmaRmCQ *scq, *rcq;
+ RdmaRmPD *pd;
+ uint32_t rm_qpn;
+
+ pr_dbg("qp_type=%d\n", qp_type);
+
+ pd = rdma_rm_get_pd(dev_res, pd_handle);
+ if (!pd) {
+ pr_err("Invalid pd handle (%d)\n", pd_handle);
+ return -EINVAL;
+ }
+
+ scq = rdma_rm_get_cq(dev_res, send_cq_handle);
+ rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
+
+ if (!scq || !rcq) {
+ pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n",
+ send_cq_handle, recv_cq_handle);
+ return -EINVAL;
+ }
+
+ qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
+ if (!qp) {
+ return -ENOMEM;
+ }
+ pr_dbg("rm_qpn=%d\n", rm_qpn);
+
+ qp->qpn = rm_qpn;
+ qp->qp_state = IBV_QPS_RESET;
+ qp->qp_type = qp_type;
+ qp->send_cq_handle = send_cq_handle;
+ qp->recv_cq_handle = recv_cq_handle;
+ qp->opaque = opaque;
+
+ rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
+ &scq->backend_cq, &rcq->backend_cq, max_send_wr,
+ max_recv_wr, max_send_sge, max_recv_sge);
+ if (rc) {
+ rc = -EIO;
+ goto out_dealloc_qp;
+ }
+
+ *qpn = rdma_backend_qpn(&qp->backend_qp);
+ pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn);
+ g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
+
+ return 0;
+
+out_dealloc_qp:
+ res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
+
+ return rc;
+}
+
+int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+ uint32_t qp_handle, uint32_t attr_mask,
+ union ibv_gid *dgid, uint32_t dqpn,
+ enum ibv_qp_state qp_state, uint32_t qkey,
+ uint32_t rq_psn, uint32_t sq_psn)
+{
+ RdmaRmQP *qp;
+ int ret;
+
+ pr_dbg("qpn=%d\n", qp_handle);
+
+ qp = rdma_rm_get_qp(dev_res, qp_handle);
+ if (!qp) {
+ return -EINVAL;
+ }
+
+ pr_dbg("qp_type=%d\n", qp->qp_type);
+ pr_dbg("attr_mask=0x%x\n", attr_mask);
+
+ if (qp->qp_type == IBV_QPT_SMI) {
+ pr_dbg("QP0 unsupported\n");
+ return -EPERM;
+ } else if (qp->qp_type == IBV_QPT_GSI) {
+ pr_dbg("QP1\n");
+ return 0;
+ }
+
+ if (attr_mask & IBV_QP_STATE) {
+ qp->qp_state = qp_state;
+ pr_dbg("qp_state=%d\n", qp->qp_state);
+
+ if (qp->qp_state == IBV_QPS_INIT) {
+ ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
+ qp->qp_type, qkey);
+ if (ret) {
+ return -EIO;
+ }
+ }
+
+ if (qp->qp_state == IBV_QPS_RTR) {
+ ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
+ qp->qp_type, dgid, dqpn, rq_psn,
+ qkey, attr_mask & IBV_QP_QKEY);
+ if (ret) {
+ return -EIO;
+ }
+ }
+
+ if (qp->qp_state == IBV_QPS_RTS) {
+ ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
+ sq_psn, qkey,
+ attr_mask & IBV_QP_QKEY);
+ if (ret) {
+ return -EIO;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
+{
+ RdmaRmQP *qp;
+ GBytes *key;
+
+ key = g_bytes_new(&qp_handle, sizeof(qp_handle));
+ qp = g_hash_table_lookup(dev_res->qp_hash, key);
+ g_hash_table_remove(dev_res->qp_hash, key);
+ g_bytes_unref(key);
+
+ if (!qp) {
+ return;
+ }
+
+ rdma_backend_destroy_qp(&qp->backend_qp);
+
+ res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
+}
+
+void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
+{
+ void **cqe_ctx;
+
+ cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
+ if (!cqe_ctx) {
+ return NULL;
+ }
+
+ pr_dbg("ctx=%p\n", *cqe_ctx);
+
+ return *cqe_ctx;
+}
+
+int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
+ void *ctx)
+{
+ void **cqe_ctx;
+
+ cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
+ if (!cqe_ctx) {
+ return -ENOMEM;
+ }
+
+ pr_dbg("ctx=%p\n", ctx);
+ *cqe_ctx = ctx;
+
+ return 0;
+}
+
+void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
+{
+ res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
+}
+
+static void destroy_qp_hash_key(gpointer data)
+{
+ g_bytes_unref(data);
+}
+
+int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
+ Error **errp)
+{
+ dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
+ destroy_qp_hash_key, NULL);
+ if (!dev_res->qp_hash) {
+ return -ENOMEM;
+ }
+
+ res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
+ res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
+ res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
+ res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
+ res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
+ dev_attr->max_qp_wr, sizeof(void *));
+ res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
+
+ return 0;
+}
+
+void rdma_rm_fini(RdmaDeviceResources *dev_res)
+{
+ res_tbl_free(&dev_res->uc_tbl);
+ res_tbl_free(&dev_res->cqe_ctx_tbl);
+ res_tbl_free(&dev_res->qp_tbl);
+ res_tbl_free(&dev_res->cq_tbl);
+ res_tbl_free(&dev_res->mr_tbl);
+ res_tbl_free(&dev_res->pd_tbl);
+ g_hash_table_destroy(dev_res->qp_hash);
+}