diff options
author | Yuval Shaia | 2018-02-09 14:23:18 +0100 |
---|---|---|
committer | Marcel Apfelbaum | 2018-02-19 12:03:24 +0100 |
commit | ef6d4ccdc9eba3c184da08e76d52e5003325680b (patch) | |
tree | 7dc1abe7280c3a3ffad1534f473d009f8a564cf4 /hw/rdma/rdma_rm.c | |
parent | hw/rdma: Definitions for rdma device and rdma resource manager (diff) | |
download | qemu-ef6d4ccdc9eba3c184da08e76d52e5003325680b.tar.gz qemu-ef6d4ccdc9eba3c184da08e76d52e5003325680b.tar.xz qemu-ef6d4ccdc9eba3c184da08e76d52e5003325680b.zip |
hw/rdma: Implementation of generic rdma device layers
This layer is composed of two sub-modules, backend and resource manager.
Backend sub-module is responsible for all the interaction with IB layers
such as ibverbs and umad (external libraries).
Resource manager is a collection of functions and structures to manage
RDMA resources such as QPs, CQs and MRs.
Reviewed-by: Dotan Barak <dotanb@mellanox.com>
Reviewed-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Diffstat (limited to 'hw/rdma/rdma_rm.c')
-rw-r--r-- | hw/rdma/rdma_rm.c | 544 |
1 files changed, 544 insertions, 0 deletions
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c new file mode 100644 index 0000000000..b5fc45ddab --- /dev/null +++ b/hw/rdma/rdma_rm.c @@ -0,0 +1,544 @@ +/* + * QEMU paravirtual RDMA - Resource Manager Implementation + * + * Copyright (C) 2018 Oracle + * Copyright (C) 2018 Red Hat Inc + * + * Authors: + * Yuval Shaia <yuval.shaia@oracle.com> + * Marcel Apfelbaum <marcel@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include <qemu/osdep.h> +#include <qapi/error.h> +#include <cpu.h> + +#include "rdma_utils.h" +#include "rdma_backend.h" +#include "rdma_rm.h" + +#define MAX_RM_TBL_NAME 16 + +/* Page directory and page tables */ +#define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } +#define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } + +static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl, + uint32_t tbl_sz, uint32_t res_sz) +{ + tbl->tbl = g_malloc(tbl_sz * res_sz); + + strncpy(tbl->name, name, MAX_RM_TBL_NAME); + tbl->name[MAX_RM_TBL_NAME - 1] = 0; + + tbl->bitmap = bitmap_new(tbl_sz); + tbl->tbl_sz = tbl_sz; + tbl->res_sz = res_sz; + qemu_mutex_init(&tbl->lock); +} + +static inline void res_tbl_free(RdmaRmResTbl *tbl) +{ + qemu_mutex_destroy(&tbl->lock); + g_free(tbl->tbl); + bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0); +} + +static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle) +{ + pr_dbg("%s, handle=%d\n", tbl->name, handle); + + if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) { + return tbl->tbl + handle * tbl->res_sz; + } else { + pr_dbg("Invalid handle %d\n", handle); + return NULL; + } +} + +static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle) +{ + qemu_mutex_lock(&tbl->lock); + + *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz); + if (*handle > tbl->tbl_sz) { + pr_dbg("Failed to alloc, bitmap is full\n"); + qemu_mutex_unlock(&tbl->lock); + return NULL; + } + + set_bit(*handle, tbl->bitmap); + + qemu_mutex_unlock(&tbl->lock); + + memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz); + + pr_dbg("%s, handle=%d\n", tbl->name, *handle); + + return tbl->tbl + *handle * tbl->res_sz; +} + +static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle) +{ + pr_dbg("%s, handle=%d\n", tbl->name, handle); + + qemu_mutex_lock(&tbl->lock); + + if (handle < tbl->tbl_sz) { + clear_bit(handle, tbl->bitmap); + } + + qemu_mutex_unlock(&tbl->lock); +} + +int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + uint32_t *pd_handle, uint32_t ctx_handle) +{ + RdmaRmPD *pd; + int ret = -ENOMEM; + + pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle); + if (!pd) { + goto out; + } + + ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd); + if (ret) { + ret = -EIO; + goto out_tbl_dealloc; + } + + pd->ctx_handle = ctx_handle; + + return 0; + +out_tbl_dealloc: + res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle); + +out: + return ret; +} + +RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle) +{ + return res_tbl_get(&dev_res->pd_tbl, pd_handle); +} + +void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle) +{ + RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle); + + if (pd) { + rdma_backend_destroy_pd(&pd->backend_pd); + res_tbl_dealloc(&dev_res->pd_tbl, pd_handle); + } +} + +int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, + uint64_t guest_start, size_t guest_length, void *host_virt, + int access_flags, uint32_t *mr_handle, uint32_t *lkey, + uint32_t *rkey) +{ + RdmaRmMR *mr; + int ret = 0; + RdmaRmPD *pd; + uint64_t addr; + size_t length; + + pd = rdma_rm_get_pd(dev_res, pd_handle); + if (!pd) { + pr_dbg("Invalid PD\n"); + return -EINVAL; + } + + mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle); + if (!mr) { + pr_dbg("Failed to allocate obj in table\n"); + return -ENOMEM; + } + + if (!host_virt) { + /* TODO: This is my guess but not so sure that this needs to be + * done */ + length = TARGET_PAGE_SIZE; + addr = (uint64_t)g_malloc(length); + } else { + mr->user_mr.host_virt = (uint64_t) host_virt; + pr_dbg("host_virt=0x%lx\n", mr->user_mr.host_virt); + mr->user_mr.length = guest_length; + pr_dbg("length=0x%lx\n", guest_length); + mr->user_mr.guest_start = guest_start; + pr_dbg("guest_start=0x%lx\n", mr->user_mr.guest_start); + + length = mr->user_mr.length; + addr = mr->user_mr.host_virt; + } + + ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, addr, length, + access_flags); + if (ret) { + pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret); + ret = -EIO; + goto out_dealloc_mr; + } + + if (!host_virt) { + *lkey = mr->lkey = rdma_backend_mr_lkey(&mr->backend_mr); + *rkey = mr->rkey = rdma_backend_mr_rkey(&mr->backend_mr); + } else { + /* We keep mr_handle in lkey so send and recv get get mr ptr */ + *lkey = *mr_handle; + *rkey = -1; + } + + mr->pd_handle = pd_handle; + + return 0; + +out_dealloc_mr: + res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle); + + return ret; +} + +RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle) +{ + return res_tbl_get(&dev_res->mr_tbl, mr_handle); +} + +void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle) +{ + RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle); + + if (mr) { + rdma_backend_destroy_mr(&mr->backend_mr); + munmap((void *)mr->user_mr.host_virt, mr->user_mr.length); + res_tbl_dealloc(&dev_res->mr_tbl, mr_handle); + } +} + +int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn, + uint32_t *uc_handle) +{ + RdmaRmUC *uc; + + /* TODO: Need to make sure pfn is between bar start address and + * bsd+RDMA_BAR2_UAR_SIZE + if (pfn > RDMA_BAR2_UAR_SIZE) { + pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE); + return -ENOMEM; + } + */ + + uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle); + if (!uc) { + return -ENOMEM; + } + + return 0; +} + +RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle) +{ + return res_tbl_get(&dev_res->uc_tbl, uc_handle); +} + +void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle) +{ + RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle); + + if (uc) { + res_tbl_dealloc(&dev_res->uc_tbl, uc_handle); + } +} + +RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle) +{ + return res_tbl_get(&dev_res->cq_tbl, cq_handle); +} + +int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + uint32_t cqe, uint32_t *cq_handle, void *opaque) +{ + int rc; + RdmaRmCQ *cq; + + cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle); + if (!cq) { + return -ENOMEM; + } + + cq->opaque = opaque; + cq->notify = false; + + rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe); + if (rc) { + rc = -EIO; + goto out_dealloc_cq; + } + + return 0; + +out_dealloc_cq: + rdma_rm_dealloc_cq(dev_res, *cq_handle); + + return rc; +} + +void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle, + bool notify) +{ + RdmaRmCQ *cq; + + pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify); + + cq = rdma_rm_get_cq(dev_res, cq_handle); + if (!cq) { + return; + } + + cq->notify = notify; + pr_dbg("notify=%d\n", cq->notify); +} + +void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle) +{ + RdmaRmCQ *cq; + + cq = rdma_rm_get_cq(dev_res, cq_handle); + if (!cq) { + return; + } + + rdma_backend_destroy_cq(&cq->backend_cq); + + res_tbl_dealloc(&dev_res->cq_tbl, cq_handle); +} + +RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn) +{ + GBytes *key = g_bytes_new(&qpn, sizeof(qpn)); + + RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key); + + g_bytes_unref(key); + + return qp; +} + +int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, + uint8_t qp_type, uint32_t max_send_wr, + uint32_t max_send_sge, uint32_t send_cq_handle, + uint32_t max_recv_wr, uint32_t max_recv_sge, + uint32_t recv_cq_handle, void *opaque, uint32_t *qpn) +{ + int rc; + RdmaRmQP *qp; + RdmaRmCQ *scq, *rcq; + RdmaRmPD *pd; + uint32_t rm_qpn; + + pr_dbg("qp_type=%d\n", qp_type); + + pd = rdma_rm_get_pd(dev_res, pd_handle); + if (!pd) { + pr_err("Invalid pd handle (%d)\n", pd_handle); + return -EINVAL; + } + + scq = rdma_rm_get_cq(dev_res, send_cq_handle); + rcq = rdma_rm_get_cq(dev_res, recv_cq_handle); + + if (!scq || !rcq) { + pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n", + send_cq_handle, recv_cq_handle); + return -EINVAL; + } + + qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn); + if (!qp) { + return -ENOMEM; + } + pr_dbg("rm_qpn=%d\n", rm_qpn); + + qp->qpn = rm_qpn; + qp->qp_state = IBV_QPS_RESET; + qp->qp_type = qp_type; + qp->send_cq_handle = send_cq_handle; + qp->recv_cq_handle = recv_cq_handle; + qp->opaque = opaque; + + rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd, + &scq->backend_cq, &rcq->backend_cq, max_send_wr, + max_recv_wr, max_send_sge, max_recv_sge); + if (rc) { + rc = -EIO; + goto out_dealloc_qp; + } + + *qpn = rdma_backend_qpn(&qp->backend_qp); + pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn); + g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp); + + return 0; + +out_dealloc_qp: + res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn); + + return rc; +} + +int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + uint32_t qp_handle, uint32_t attr_mask, + union ibv_gid *dgid, uint32_t dqpn, + enum ibv_qp_state qp_state, uint32_t qkey, + uint32_t rq_psn, uint32_t sq_psn) +{ + RdmaRmQP *qp; + int ret; + + pr_dbg("qpn=%d\n", qp_handle); + + qp = rdma_rm_get_qp(dev_res, qp_handle); + if (!qp) { + return -EINVAL; + } + + pr_dbg("qp_type=%d\n", qp->qp_type); + pr_dbg("attr_mask=0x%x\n", attr_mask); + + if (qp->qp_type == IBV_QPT_SMI) { + pr_dbg("QP0 unsupported\n"); + return -EPERM; + } else if (qp->qp_type == IBV_QPT_GSI) { + pr_dbg("QP1\n"); + return 0; + } + + if (attr_mask & IBV_QP_STATE) { + qp->qp_state = qp_state; + pr_dbg("qp_state=%d\n", qp->qp_state); + + if (qp->qp_state == IBV_QPS_INIT) { + ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp, + qp->qp_type, qkey); + if (ret) { + return -EIO; + } + } + + if (qp->qp_state == IBV_QPS_RTR) { + ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp, + qp->qp_type, dgid, dqpn, rq_psn, + qkey, attr_mask & IBV_QP_QKEY); + if (ret) { + return -EIO; + } + } + + if (qp->qp_state == IBV_QPS_RTS) { + ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type, + sq_psn, qkey, + attr_mask & IBV_QP_QKEY); + if (ret) { + return -EIO; + } + } + } + + return 0; +} + +void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle) +{ + RdmaRmQP *qp; + GBytes *key; + + key = g_bytes_new(&qp_handle, sizeof(qp_handle)); + qp = g_hash_table_lookup(dev_res->qp_hash, key); + g_hash_table_remove(dev_res->qp_hash, key); + g_bytes_unref(key); + + if (!qp) { + return; + } + + rdma_backend_destroy_qp(&qp->backend_qp); + + res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn); +} + +void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id) +{ + void **cqe_ctx; + + cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id); + if (!cqe_ctx) { + return NULL; + } + + pr_dbg("ctx=%p\n", *cqe_ctx); + + return *cqe_ctx; +} + +int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id, + void *ctx) +{ + void **cqe_ctx; + + cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id); + if (!cqe_ctx) { + return -ENOMEM; + } + + pr_dbg("ctx=%p\n", ctx); + *cqe_ctx = ctx; + + return 0; +} + +void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id) +{ + res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id); +} + +static void destroy_qp_hash_key(gpointer data) +{ + g_bytes_unref(data); +} + +int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr, + Error **errp) +{ + dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal, + destroy_qp_hash_key, NULL); + if (!dev_res->qp_hash) { + return -ENOMEM; + } + + res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD)); + res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ)); + res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR)); + res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP)); + res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp * + dev_attr->max_qp_wr, sizeof(void *)); + res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC)); + + return 0; +} + +void rdma_rm_fini(RdmaDeviceResources *dev_res) +{ + res_tbl_free(&dev_res->uc_tbl); + res_tbl_free(&dev_res->cqe_ctx_tbl); + res_tbl_free(&dev_res->qp_tbl); + res_tbl_free(&dev_res->cq_tbl); + res_tbl_free(&dev_res->mr_tbl); + res_tbl_free(&dev_res->pd_tbl); + g_hash_table_destroy(dev_res->qp_hash); +} |