summaryrefslogblamecommitdiffstats
path: root/hw/rdma/rdma_rm.c
blob: 8bf241e91f38bafd2e270c67d04d1a240d937efd (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15














                                                                            


                       




                         



















                                                                    


                       

                                   
                        



































































































                                                                                











                                                    
                                         
 




                                                       
                             
                                
                                  
                                                         
 






                                                                                

     


                                                                   





















                                                                          

                                                   
                                                             

                                         























































                                                                               
                           


























                                                                            



                                                  
























































                                                                          




                                 

































                                                                                
                                                                               






                                                                
                                    
                                





























                                                                          








                                                                              
                                                                         


                                                                              

















                                                                         





                                                                               
                                    










                                                                              





















































                                                                               










                                                                              
                                                                   








                                                                              



                                                                  
                                                  
                                                                   




                                       


                                                          











                                                                            

                                                                            
                                               
                                                                         


                                    
                                                              
 
                                                             

 




                                              

                                                    
          
 
                                                     
 


                                                        







                                                                       
                                        




                                                         
















                                                                                

                        


             

                                                                            
 

                                             


                                        
                                   
                                   
                                   
 


                                               
 
/*
 * QEMU paravirtual RDMA - Resource Manager Implementation
 *
 * Copyright (C) 2018 Oracle
 * Copyright (C) 2018 Red Hat Inc
 *
 * Authors:
 *     Yuval Shaia <yuval.shaia@oracle.com>
 *     Marcel Apfelbaum <marcel@redhat.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

#include "qemu/osdep.h"
#include "qapi/error.h"
#include "cpu.h"

#include "rdma_utils.h"
#include "rdma_backend.h"
#include "rdma_rm.h"

/* Page directory and page tables */
#define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
#define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }

static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
                                uint32_t tbl_sz, uint32_t res_sz)
{
    tbl->tbl = g_malloc(tbl_sz * res_sz);

    strncpy(tbl->name, name, MAX_RM_TBL_NAME);
    tbl->name[MAX_RM_TBL_NAME - 1] = 0;

    tbl->bitmap = bitmap_new(tbl_sz);
    tbl->tbl_sz = tbl_sz;
    tbl->res_sz = res_sz;
    qemu_mutex_init(&tbl->lock);
}

static inline void res_tbl_free(RdmaRmResTbl *tbl)
{
    if (!tbl->bitmap) {
        return;
    }
    qemu_mutex_destroy(&tbl->lock);
    g_free(tbl->tbl);
    g_free(tbl->bitmap);
}

static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
{
    pr_dbg("%s, handle=%d\n", tbl->name, handle);

    if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
        return tbl->tbl + handle * tbl->res_sz;
    } else {
        pr_dbg("Invalid handle %d\n", handle);
        return NULL;
    }
}

static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
{
    qemu_mutex_lock(&tbl->lock);

    *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
    if (*handle > tbl->tbl_sz) {
        pr_dbg("Failed to alloc, bitmap is full\n");
        qemu_mutex_unlock(&tbl->lock);
        return NULL;
    }

    set_bit(*handle, tbl->bitmap);

    qemu_mutex_unlock(&tbl->lock);

    memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);

    pr_dbg("%s, handle=%d\n", tbl->name, *handle);

    return tbl->tbl + *handle * tbl->res_sz;
}

static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
{
    pr_dbg("%s, handle=%d\n", tbl->name, handle);

    qemu_mutex_lock(&tbl->lock);

    if (handle < tbl->tbl_sz) {
        clear_bit(handle, tbl->bitmap);
    }

    qemu_mutex_unlock(&tbl->lock);
}

int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                     uint32_t *pd_handle, uint32_t ctx_handle)
{
    RdmaRmPD *pd;
    int ret = -ENOMEM;

    pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
    if (!pd) {
        goto out;
    }

    ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
    if (ret) {
        ret = -EIO;
        goto out_tbl_dealloc;
    }

    pd->ctx_handle = ctx_handle;

    return 0;

out_tbl_dealloc:
    res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);

out:
    return ret;
}

RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
{
    return res_tbl_get(&dev_res->pd_tbl, pd_handle);
}

void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
{
    RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);

    if (pd) {
        rdma_backend_destroy_pd(&pd->backend_pd);
        res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
    }
}

int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
                     uint64_t guest_start, size_t guest_length, void *host_virt,
                     int access_flags, uint32_t *mr_handle, uint32_t *lkey,
                     uint32_t *rkey)
{
    RdmaRmMR *mr;
    int ret = 0;
    RdmaRmPD *pd;

    pd = rdma_rm_get_pd(dev_res, pd_handle);
    if (!pd) {
        pr_dbg("Invalid PD\n");
        return -EINVAL;
    }

    mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
    if (!mr) {
        pr_dbg("Failed to allocate obj in table\n");
        return -ENOMEM;
    }
    pr_dbg("mr_handle=%d\n", *mr_handle);

    pr_dbg("host_virt=0x%p\n", host_virt);
    pr_dbg("guest_start=0x%" PRIx64 "\n", guest_start);
    pr_dbg("length=%zu\n", guest_length);

    if (host_virt) {
        mr->virt = host_virt;
        mr->start = guest_start;
        mr->length = guest_length;
        mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1));

        ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
                                     mr->length, access_flags);
        if (ret) {
            pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret);
            ret = -EIO;
            goto out_dealloc_mr;
        }
    }

    /* We keep mr_handle in lkey so send and recv get get mr ptr */
    *lkey = *mr_handle;
    *rkey = -1;

    mr->pd_handle = pd_handle;

    return 0;

out_dealloc_mr:
    res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);

    return ret;
}

RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
{
    return res_tbl_get(&dev_res->mr_tbl, mr_handle);
}

void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
{
    RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);

    if (mr) {
        rdma_backend_destroy_mr(&mr->backend_mr);
        pr_dbg("start=0x%" PRIx64 "\n", mr->start);
        if (mr->start) {
            mr->virt -= (mr->start & (TARGET_PAGE_SIZE - 1));
            munmap(mr->virt, mr->length);
        }
        res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
    }
}

int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
                     uint32_t *uc_handle)
{
    RdmaRmUC *uc;

    /* TODO: Need to make sure pfn is between bar start address and
     * bsd+RDMA_BAR2_UAR_SIZE
    if (pfn > RDMA_BAR2_UAR_SIZE) {
        pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE);
        return -ENOMEM;
    }
    */

    uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
    if (!uc) {
        return -ENOMEM;
    }

    return 0;
}

RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
{
    return res_tbl_get(&dev_res->uc_tbl, uc_handle);
}

void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
{
    RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);

    if (uc) {
        res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
    }
}

RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
{
    return res_tbl_get(&dev_res->cq_tbl, cq_handle);
}

int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                     uint32_t cqe, uint32_t *cq_handle, void *opaque)
{
    int rc;
    RdmaRmCQ *cq;

    cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
    if (!cq) {
        return -ENOMEM;
    }

    cq->opaque = opaque;
    cq->notify = CNT_CLEAR;

    rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
    if (rc) {
        rc = -EIO;
        goto out_dealloc_cq;
    }

    return 0;

out_dealloc_cq:
    rdma_rm_dealloc_cq(dev_res, *cq_handle);

    return rc;
}

void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
                           bool notify)
{
    RdmaRmCQ *cq;

    pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify);

    cq = rdma_rm_get_cq(dev_res, cq_handle);
    if (!cq) {
        return;
    }

    if (cq->notify != CNT_SET) {
        cq->notify = notify ? CNT_ARM : CNT_CLEAR;
    }

    pr_dbg("notify=%d\n", cq->notify);
}

void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
{
    RdmaRmCQ *cq;

    cq = rdma_rm_get_cq(dev_res, cq_handle);
    if (!cq) {
        return;
    }

    rdma_backend_destroy_cq(&cq->backend_cq);

    res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
}

RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
{
    GBytes *key = g_bytes_new(&qpn, sizeof(qpn));

    RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);

    g_bytes_unref(key);

    return qp;
}

int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
                     uint8_t qp_type, uint32_t max_send_wr,
                     uint32_t max_send_sge, uint32_t send_cq_handle,
                     uint32_t max_recv_wr, uint32_t max_recv_sge,
                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
{
    int rc;
    RdmaRmQP *qp;
    RdmaRmCQ *scq, *rcq;
    RdmaRmPD *pd;
    uint32_t rm_qpn;

    pr_dbg("qp_type=%d\n", qp_type);

    pd = rdma_rm_get_pd(dev_res, pd_handle);
    if (!pd) {
        pr_err("Invalid pd handle (%d)\n", pd_handle);
        return -EINVAL;
    }

    scq = rdma_rm_get_cq(dev_res, send_cq_handle);
    rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);

    if (!scq || !rcq) {
        pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n",
               send_cq_handle, recv_cq_handle);
        return -EINVAL;
    }

    if (qp_type == IBV_QPT_GSI) {
        scq->notify = CNT_SET;
        rcq->notify = CNT_SET;
    }

    qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
    if (!qp) {
        return -ENOMEM;
    }
    pr_dbg("rm_qpn=%d\n", rm_qpn);

    qp->qpn = rm_qpn;
    qp->qp_state = IBV_QPS_RESET;
    qp->qp_type = qp_type;
    qp->send_cq_handle = send_cq_handle;
    qp->recv_cq_handle = recv_cq_handle;
    qp->opaque = opaque;

    rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
                                &scq->backend_cq, &rcq->backend_cq, max_send_wr,
                                max_recv_wr, max_send_sge, max_recv_sge);
    if (rc) {
        rc = -EIO;
        goto out_dealloc_qp;
    }

    *qpn = rdma_backend_qpn(&qp->backend_qp);
    pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn);
    g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);

    return 0;

out_dealloc_qp:
    res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);

    return rc;
}

int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                      uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
                      union ibv_gid *dgid, uint32_t dqpn,
                      enum ibv_qp_state qp_state, uint32_t qkey,
                      uint32_t rq_psn, uint32_t sq_psn)
{
    RdmaRmQP *qp;
    int ret;

    pr_dbg("qpn=0x%x\n", qp_handle);
    pr_dbg("qkey=0x%x\n", qkey);

    qp = rdma_rm_get_qp(dev_res, qp_handle);
    if (!qp) {
        return -EINVAL;
    }

    pr_dbg("qp_type=%d\n", qp->qp_type);
    pr_dbg("attr_mask=0x%x\n", attr_mask);

    if (qp->qp_type == IBV_QPT_SMI) {
        pr_dbg("QP0 unsupported\n");
        return -EPERM;
    } else if (qp->qp_type == IBV_QPT_GSI) {
        pr_dbg("QP1\n");
        return 0;
    }

    if (attr_mask & IBV_QP_STATE) {
        qp->qp_state = qp_state;
        pr_dbg("qp_state=%d\n", qp->qp_state);

        if (qp->qp_state == IBV_QPS_INIT) {
            ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
                                             qp->qp_type, qkey);
            if (ret) {
                return -EIO;
            }
        }

        if (qp->qp_state == IBV_QPS_RTR) {
            /* Get backend gid index */
            pr_dbg("Guest sgid_idx=%d\n", sgid_idx);
            sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev,
                                                     sgid_idx);
            if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */
                pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", sgid_idx);
                return -EIO;
            }

            ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
                                            qp->qp_type, sgid_idx, dgid, dqpn,
                                            rq_psn, qkey,
                                            attr_mask & IBV_QP_QKEY);
            if (ret) {
                return -EIO;
            }
        }

        if (qp->qp_state == IBV_QPS_RTS) {
            ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
                                            sq_psn, qkey,
                                            attr_mask & IBV_QP_QKEY);
            if (ret) {
                return -EIO;
            }
        }
    }

    return 0;
}

int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                     uint32_t qp_handle, struct ibv_qp_attr *attr,
                     int attr_mask, struct ibv_qp_init_attr *init_attr)
{
    RdmaRmQP *qp;

    pr_dbg("qpn=0x%x\n", qp_handle);

    qp = rdma_rm_get_qp(dev_res, qp_handle);
    if (!qp) {
        return -EINVAL;
    }

    pr_dbg("qp_type=%d\n", qp->qp_type);

    return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
}

void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
{
    RdmaRmQP *qp;
    GBytes *key;

    key = g_bytes_new(&qp_handle, sizeof(qp_handle));
    qp = g_hash_table_lookup(dev_res->qp_hash, key);
    g_hash_table_remove(dev_res->qp_hash, key);
    g_bytes_unref(key);

    if (!qp) {
        return;
    }

    rdma_backend_destroy_qp(&qp->backend_qp);

    res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
}

void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
{
    void **cqe_ctx;

    cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
    if (!cqe_ctx) {
        return NULL;
    }

    pr_dbg("ctx=%p\n", *cqe_ctx);

    return *cqe_ctx;
}

int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
                          void *ctx)
{
    void **cqe_ctx;

    cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
    if (!cqe_ctx) {
        return -ENOMEM;
    }

    pr_dbg("ctx=%p\n", ctx);
    *cqe_ctx = ctx;

    return 0;
}

void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
{
    res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
}

int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                    const char *ifname, union ibv_gid *gid, int gid_idx)
{
    int rc;

    rc = rdma_backend_add_gid(backend_dev, ifname, gid);
    if (rc) {
        pr_dbg("Fail to add gid\n");
        return -EINVAL;
    }

    memcpy(&dev_res->port.gid_tbl[gid_idx].gid, gid, sizeof(*gid));

    return 0;
}

int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                    const char *ifname, int gid_idx)
{
    int rc;

    if (!dev_res->port.gid_tbl[gid_idx].gid.global.interface_id) {
        return 0;
    }

    rc = rdma_backend_del_gid(backend_dev, ifname,
                              &dev_res->port.gid_tbl[gid_idx].gid);
    if (rc) {
        pr_dbg("Fail to delete gid\n");
        return -EINVAL;
    }

    memset(dev_res->port.gid_tbl[gid_idx].gid.raw, 0,
           sizeof(dev_res->port.gid_tbl[gid_idx].gid));
    dev_res->port.gid_tbl[gid_idx].backend_gid_index = -1;

    return 0;
}

int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
                                  RdmaBackendDev *backend_dev, int sgid_idx)
{
    if (unlikely(sgid_idx < 0 || sgid_idx > MAX_PORT_GIDS)) {
        pr_dbg("Got invalid sgid_idx %d\n", sgid_idx);
        return -EINVAL;
    }

    if (unlikely(dev_res->port.gid_tbl[sgid_idx].backend_gid_index == -1)) {
        dev_res->port.gid_tbl[sgid_idx].backend_gid_index =
        rdma_backend_get_gid_index(backend_dev,
                                   &dev_res->port.gid_tbl[sgid_idx].gid);
    }

    pr_dbg("backend_gid_index=%d\n",
           dev_res->port.gid_tbl[sgid_idx].backend_gid_index);

    return dev_res->port.gid_tbl[sgid_idx].backend_gid_index;
}

static void destroy_qp_hash_key(gpointer data)
{
    g_bytes_unref(data);
}

static void init_ports(RdmaDeviceResources *dev_res)
{
    int i;

    memset(&dev_res->port, 0, sizeof(dev_res->port));

    dev_res->port.state = IBV_PORT_DOWN;
    for (i = 0; i < MAX_PORT_GIDS; i++) {
        dev_res->port.gid_tbl[i].backend_gid_index = -1;
    }
}

static void fini_ports(RdmaDeviceResources *dev_res,
                       RdmaBackendDev *backend_dev, const char *ifname)
{
    int i;

    dev_res->port.state = IBV_PORT_DOWN;
    for (i = 0; i < MAX_PORT_GIDS; i++) {
        rdma_rm_del_gid(dev_res, backend_dev, ifname, i);
    }
}

int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
                 Error **errp)
{
    dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
                                             destroy_qp_hash_key, NULL);
    if (!dev_res->qp_hash) {
        return -ENOMEM;
    }

    res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
    res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
    res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
    res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
    res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
                       dev_attr->max_qp_wr, sizeof(void *));
    res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));

    init_ports(dev_res);

    return 0;
}

void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                  const char *ifname)
{
    fini_ports(dev_res, backend_dev, ifname);

    res_tbl_free(&dev_res->uc_tbl);
    res_tbl_free(&dev_res->cqe_ctx_tbl);
    res_tbl_free(&dev_res->qp_tbl);
    res_tbl_free(&dev_res->mr_tbl);
    res_tbl_free(&dev_res->cq_tbl);
    res_tbl_free(&dev_res->pd_tbl);

    if (dev_res->qp_hash) {
        g_hash_table_destroy(dev_res->qp_hash);
    }
}