summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_rdma.c4
-rw-r--r--net/rds/ib.c2
-rw-r--r--net/rds/ib.h6
-rw-r--r--net/rds/ib_cm.c2
-rw-r--r--net/rds/ib_send.c71
-rw-r--r--net/rds/iw.c2
-rw-r--r--net/rds/iw.h9
-rw-r--r--net/rds/iw_cm.c2
-rw-r--r--net/rds/iw_rdma.c129
-rw-r--r--net/rds/iw_send.c154
-rw-r--r--net/rds/rdma_transport.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c119
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c123
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c18
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c38
-rw-r--r--net/sunrpc/xprtrdma/verbs.c3
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h3
17 files changed, 339 insertions, 350 deletions
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index ba1210253f5e..52b4a2f993f2 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -655,8 +655,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
return -ENOMEM;
/* Create the RDMA CM ID */
- rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP,
- IB_QPT_RC);
+ rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(rdma->cm_id))
goto error;
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a833ab7898fe..f222885ac0c7 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -336,7 +336,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
/* Create a CMA ID and try to bind it. This catches both
* IB and iWARP capable NICs.
*/
- cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
+ cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
diff --git a/net/rds/ib.h b/net/rds/ib.h
index f17d09567890..b3fdebb57460 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -75,7 +75,11 @@ struct rds_ib_connect_private {
struct rds_ib_send_work {
void *s_op;
- struct ib_send_wr s_wr;
+ union {
+ struct ib_send_wr s_wr;
+ struct ib_rdma_wr s_rdma_wr;
+ struct ib_atomic_wr s_atomic_wr;
+ };
struct ib_sge s_sge[RDS_IB_MAX_SGE];
unsigned long s_queued;
};
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 2b2370e7f356..da5a7fb98c77 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -668,7 +668,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
- ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
+ ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ic->i_cm_id)) {
ret = PTR_ERR(ic->i_cm_id);
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 670882c752e9..eac30bf486d7 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -777,23 +777,23 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
send->s_queued = jiffies;
if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
- send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
- send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
- send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
- send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
- send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
+ send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
+ send->s_atomic_wr.compare_add = op->op_m_cswp.compare;
+ send->s_atomic_wr.swap = op->op_m_cswp.swap;
+ send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask;
+ send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask;
} else { /* FADD */
- send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
- send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
- send->s_wr.wr.atomic.swap = 0;
- send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
- send->s_wr.wr.atomic.swap_mask = 0;
+ send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
+ send->s_atomic_wr.compare_add = op->op_m_fadd.add;
+ send->s_atomic_wr.swap = 0;
+ send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
+ send->s_atomic_wr.swap_mask = 0;
}
nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
- send->s_wr.num_sge = 1;
- send->s_wr.next = NULL;
- send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
- send->s_wr.wr.atomic.rkey = op->op_rkey;
+ send->s_atomic_wr.wr.num_sge = 1;
+ send->s_atomic_wr.wr.next = NULL;
+ send->s_atomic_wr.remote_addr = op->op_remote_addr;
+ send->s_atomic_wr.rkey = op->op_rkey;
send->s_op = op;
rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
@@ -818,11 +818,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
if (nr_sig)
atomic_add(nr_sig, &ic->i_signaled_sends);
- failed_wr = &send->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
+ failed_wr = &send->s_atomic_wr.wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr);
rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
- send, &send->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &send->s_wr);
+ send, &send->s_atomic_wr, ret, failed_wr);
+ BUG_ON(failed_wr != &send->s_atomic_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@@ -831,9 +831,9 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
goto out;
}
- if (unlikely(failed_wr != &send->s_wr)) {
+ if (unlikely(failed_wr != &send->s_atomic_wr.wr)) {
printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
- BUG_ON(failed_wr != &send->s_wr);
+ BUG_ON(failed_wr != &send->s_atomic_wr.wr);
}
out:
@@ -904,22 +904,23 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
- send->s_wr.wr.rdma.remote_addr = remote_addr;
- send->s_wr.wr.rdma.rkey = op->op_rkey;
+ send->s_rdma_wr.remote_addr = remote_addr;
+ send->s_rdma_wr.rkey = op->op_rkey;
if (num_sge > max_sge) {
- send->s_wr.num_sge = max_sge;
+ send->s_rdma_wr.wr.num_sge = max_sge;
num_sge -= max_sge;
} else {
- send->s_wr.num_sge = num_sge;
+ send->s_rdma_wr.wr.num_sge = num_sge;
}
- send->s_wr.next = NULL;
+ send->s_rdma_wr.wr.next = NULL;
if (prev)
- prev->s_wr.next = &send->s_wr;
+ prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr;
- for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
+ for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
+ scat != &op->op_sg[op->op_count]; j++) {
len = ib_sg_dma_len(ic->i_cm_id->device, scat);
send->s_sge[j].addr =
ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -934,7 +935,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
}
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
- &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+ &send->s_rdma_wr.wr,
+ send->s_rdma_wr.wr.num_sge,
+ send->s_rdma_wr.wr.next);
prev = send;
if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -955,11 +958,11 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
if (nr_sig)
atomic_add(nr_sig, &ic->i_signaled_sends);
- failed_wr = &first->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+ failed_wr = &first->s_rdma_wr.wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
- first, &first->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &first->s_wr);
+ first, &first->s_rdma_wr.wr, ret, failed_wr);
+ BUG_ON(failed_wr != &first->s_rdma_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@@ -968,9 +971,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
goto out;
}
- if (unlikely(failed_wr != &first->s_wr)) {
+ if (unlikely(failed_wr != &first->s_rdma_wr.wr)) {
printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
- BUG_ON(failed_wr != &first->s_wr);
+ BUG_ON(failed_wr != &first->s_rdma_wr.wr);
}
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 3df0295c6659..576f1825fc55 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -223,7 +223,7 @@ static int rds_iw_laddr_check(struct net *net, __be32 addr)
/* Create a CMA ID and try to bind it. This catches both
* IB and iWARP capable NICs.
*/
- cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
+ cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
diff --git a/net/rds/iw.h b/net/rds/iw.h
index cbe6674e31ee..5af01d1758b3 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -74,10 +74,13 @@ struct rds_iw_send_work {
struct rm_rdma_op *s_op;
struct rds_iw_mapping *s_mapping;
struct ib_mr *s_mr;
- struct ib_fast_reg_page_list *s_page_list;
unsigned char s_remap_count;
- struct ib_send_wr s_wr;
+ union {
+ struct ib_send_wr s_send_wr;
+ struct ib_rdma_wr s_rdma_wr;
+ struct ib_reg_wr s_reg_wr;
+ };
struct ib_sge s_sge[RDS_IW_MAX_SGE];
unsigned long s_queued;
};
@@ -195,7 +198,7 @@ struct rds_iw_device {
/* Magic WR_ID for ACKs */
#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL)
-#define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL)
+#define RDS_IW_REG_WR_ID ((u64)0xefefefefefefefefULL)
#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL)
struct rds_iw_statistics {
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a6553a6fb2bc..aea4c911bc76 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -524,7 +524,7 @@ int rds_iw_conn_connect(struct rds_connection *conn)
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
- ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
+ ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ic->i_cm_id)) {
ret = PTR_ERR(ic->i_cm_id);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index d3d4454ffc84..b09a40c1adce 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -47,7 +47,6 @@ struct rds_iw_mr {
struct rdma_cm_id *cm_id;
struct ib_mr *mr;
- struct ib_fast_reg_page_list *page_list;
struct rds_iw_mapping mapping;
unsigned char remap_count;
@@ -77,8 +76,8 @@ struct rds_iw_mr_pool {
static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
-static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
-static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
+static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
+static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr,
struct scatterlist *sg, unsigned int nents);
static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
@@ -258,19 +257,18 @@ static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
sg->bytes = 0;
}
-static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
- struct rds_iw_scatterlist *sg)
+static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
+ struct rds_iw_scatterlist *sg)
{
struct ib_device *dev = rds_iwdev->dev;
- u64 *dma_pages = NULL;
- int i, j, ret;
+ int i, ret;
WARN_ON(sg->dma_len);
sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
if (unlikely(!sg->dma_len)) {
printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
- return ERR_PTR(-EBUSY);
+ return -EBUSY;
}
sg->bytes = 0;
@@ -303,31 +301,14 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
if (sg->dma_npages > fastreg_message_size)
goto out_unmap;
- dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC);
- if (!dma_pages) {
- ret = -ENOMEM;
- goto out_unmap;
- }
- for (i = j = 0; i < sg->dma_len; ++i) {
- unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
- u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
- u64 end_addr;
- end_addr = dma_addr + dma_len;
- dma_addr &= ~PAGE_MASK;
- for (; dma_addr < end_addr; dma_addr += PAGE_SIZE)
- dma_pages[j++] = dma_addr;
- BUG_ON(j > sg->dma_npages);
- }
-
- return dma_pages;
+ return 0;
out_unmap:
ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
sg->dma_len = 0;
- kfree(dma_pages);
- return ERR_PTR(ret);
+ return ret;
}
@@ -440,7 +421,7 @@ static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
INIT_LIST_HEAD(&ibmr->mapping.m_list);
ibmr->mapping.m_mr = ibmr;
- err = rds_iw_init_fastreg(pool, ibmr);
+ err = rds_iw_init_reg(pool, ibmr);
if (err)
goto out_no_cigar;
@@ -620,7 +601,7 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
ibmr->cm_id = cm_id;
ibmr->device = rds_iwdev;
- ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents);
+ ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents);
if (ret == 0)
*key_ret = ibmr->mr->rkey;
else
@@ -636,7 +617,7 @@ out:
}
/*
- * iWARP fastreg handling
+ * iWARP reg handling
*
* The life cycle of a fastreg registration is a bit different from
* FMRs.
@@ -648,7 +629,7 @@ out:
* This creates a bit of a problem for us, as we do not have the destination
* IP in GET_MR, so the connection must be setup prior to the GET_MR call for
* RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit
- * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request
+ * will try to queue a LOCAL_INV (if needed) and a REG_MR work request
* before queuing the SEND. When completions for these arrive, they are
* dispatched to the MR has a bit set showing that RDMa can be performed.
*
@@ -657,11 +638,10 @@ out:
* The expectation there is that this invalidation step includes ALL
* PREVIOUSLY FREED MRs.
*/
-static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
- struct rds_iw_mr *ibmr)
+static int rds_iw_init_reg(struct rds_iw_mr_pool *pool,
+ struct rds_iw_mr *ibmr)
{
struct rds_iw_device *rds_iwdev = pool->device;
- struct ib_fast_reg_page_list *page_list = NULL;
struct ib_mr *mr;
int err;
@@ -674,55 +654,44 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
return err;
}
- /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages
- * is not filled in.
- */
- page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size);
- if (IS_ERR(page_list)) {
- err = PTR_ERR(page_list);
-
- printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err);
- ib_dereg_mr(mr);
- return err;
- }
-
- ibmr->page_list = page_list;
ibmr->mr = mr;
return 0;
}
-static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping)
+static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping)
{
struct rds_iw_mr *ibmr = mapping->m_mr;
- struct ib_send_wr f_wr, *failed_wr;
- int ret;
+ struct rds_iw_scatterlist *m_sg = &mapping->m_sg;
+ struct ib_reg_wr reg_wr;
+ struct ib_send_wr *failed_wr;
+ int ret, n;
+
+ n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE);
+ if (unlikely(n != m_sg->len))
+ return n < 0 ? n : -EINVAL;
+
+ reg_wr.wr.next = NULL;
+ reg_wr.wr.opcode = IB_WR_REG_MR;
+ reg_wr.wr.wr_id = RDS_IW_REG_WR_ID;
+ reg_wr.wr.num_sge = 0;
+ reg_wr.mr = ibmr->mr;
+ reg_wr.key = mapping->m_rkey;
+ reg_wr.access = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE;
/*
- * Perform a WR for the fast_reg_mr. Each individual page
+ * Perform a WR for the reg_mr. Each individual page
* in the sg list is added to the fast reg page list and placed
- * inside the fast_reg_mr WR. The key used is a rolling 8bit
+ * inside the reg_mr WR. The key used is a rolling 8bit
* counter, which should guarantee uniqueness.
*/
ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
mapping->m_rkey = ibmr->mr->rkey;
- memset(&f_wr, 0, sizeof(f_wr));
- f_wr.wr_id = RDS_IW_FAST_REG_WR_ID;
- f_wr.opcode = IB_WR_FAST_REG_MR;
- f_wr.wr.fast_reg.length = mapping->m_sg.bytes;
- f_wr.wr.fast_reg.rkey = mapping->m_rkey;
- f_wr.wr.fast_reg.page_list = ibmr->page_list;
- f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len;
- f_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE;
- f_wr.wr.fast_reg.iova_start = 0;
- f_wr.send_flags = IB_SEND_SIGNALED;
-
- failed_wr = &f_wr;
- ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr);
- BUG_ON(failed_wr != &f_wr);
+ failed_wr = &reg_wr.wr;
+ ret = ib_post_send(ibmr->cm_id->qp, &reg_wr.wr, &failed_wr);
+ BUG_ON(failed_wr != &reg_wr.wr);
if (ret)
printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
__func__, __LINE__, ret);
@@ -754,21 +723,20 @@ out:
return ret;
}
-static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
- struct rds_iw_mr *ibmr,
- struct scatterlist *sg,
- unsigned int sg_len)
+static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
+ struct rds_iw_mr *ibmr,
+ struct scatterlist *sg,
+ unsigned int sg_len)
{
struct rds_iw_device *rds_iwdev = pool->device;
struct rds_iw_mapping *mapping = &ibmr->mapping;
u64 *dma_pages;
- int i, ret = 0;
+ int ret = 0;
rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
- dma_pages = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
- if (IS_ERR(dma_pages)) {
- ret = PTR_ERR(dma_pages);
+ ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
+ if (ret) {
dma_pages = NULL;
goto out;
}
@@ -778,10 +746,7 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
goto out;
}
- for (i = 0; i < mapping->m_sg.dma_npages; ++i)
- ibmr->page_list->page_list[i] = dma_pages[i];
-
- ret = rds_iw_rdma_build_fastreg(mapping);
+ ret = rds_iw_rdma_reg_mr(mapping);
if (ret)
goto out;
@@ -867,8 +832,6 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr)
{
- if (ibmr->page_list)
- ib_free_fast_reg_page_list(ibmr->page_list);
if (ibmr->mr)
ib_dereg_mr(ibmr->mr);
}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 86152ec3b887..e20bd503f4bd 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -137,13 +137,13 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
send->s_op = NULL;
send->s_mapping = NULL;
- send->s_wr.next = NULL;
- send->s_wr.wr_id = i;
- send->s_wr.sg_list = send->s_sge;
- send->s_wr.num_sge = 1;
- send->s_wr.opcode = IB_WR_SEND;
- send->s_wr.send_flags = 0;
- send->s_wr.ex.imm_data = 0;
+ send->s_send_wr.next = NULL;
+ send->s_send_wr.wr_id = i;
+ send->s_send_wr.sg_list = send->s_sge;
+ send->s_send_wr.num_sge = 1;
+ send->s_send_wr.opcode = IB_WR_SEND;
+ send->s_send_wr.send_flags = 0;
+ send->s_send_wr.ex.imm_data = 0;
sge = rds_iw_data_sge(ic, send->s_sge);
sge->lkey = 0;
@@ -159,13 +159,6 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
break;
}
-
- send->s_page_list = ib_alloc_fast_reg_page_list(
- ic->i_cm_id->device, fastreg_message_size);
- if (IS_ERR(send->s_page_list)) {
- printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
- break;
- }
}
}
@@ -177,9 +170,7 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
BUG_ON(!send->s_mr);
ib_dereg_mr(send->s_mr);
- BUG_ON(!send->s_page_list);
- ib_free_fast_reg_page_list(send->s_page_list);
- if (send->s_wr.opcode == 0xdead)
+ if (send->s_send_wr.opcode == 0xdead)
continue;
if (send->s_rm)
rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
@@ -227,7 +218,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
continue;
}
- if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) {
+ if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) {
ic->i_fastreg_posted = 1;
continue;
}
@@ -247,12 +238,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
send = &ic->i_sends[oldest];
/* In the error case, wc.opcode sometimes contains garbage */
- switch (send->s_wr.opcode) {
+ switch (send->s_send_wr.opcode) {
case IB_WR_SEND:
if (send->s_rm)
rds_iw_send_unmap_rm(ic, send, wc.status);
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
@@ -262,12 +253,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
default:
printk_ratelimited(KERN_NOTICE
"RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
- __func__, send->s_wr.opcode);
+ __func__, send->s_send_wr.opcode);
break;
}
- send->s_wr.opcode = 0xdead;
- send->s_wr.num_sge = 1;
+ send->s_send_wr.opcode = 0xdead;
+ send->s_send_wr.num_sge = 1;
if (time_after(jiffies, send->s_queued + HZ/2))
rds_iw_stats_inc(s_iw_tx_stalled);
@@ -455,10 +446,10 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
WARN_ON(pos != send - ic->i_sends);
- send->s_wr.send_flags = send_flags;
- send->s_wr.opcode = IB_WR_SEND;
- send->s_wr.num_sge = 2;
- send->s_wr.next = NULL;
+ send->s_send_wr.send_flags = send_flags;
+ send->s_send_wr.opcode = IB_WR_SEND;
+ send->s_send_wr.num_sge = 2;
+ send->s_send_wr.next = NULL;
send->s_queued = jiffies;
send->s_op = NULL;
@@ -472,7 +463,7 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
} else {
/* We're sending a packet with no payload. There is only
* one SGE */
- send->s_wr.num_sge = 1;
+ send->s_send_wr.num_sge = 1;
sge = &send->s_sge[0];
}
@@ -672,23 +663,23 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
*/
if (ic->i_unsignaled_wrs-- == 0) {
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
}
ic->i_unsignaled_bytes -= len;
if (ic->i_unsignaled_bytes <= 0) {
ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
}
/*
* Always signal the last one if we're stopping due to flow control.
*/
if (flow_controlled && i == (work_alloc-1))
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
- &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+ &send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next);
sent += len;
rm->data.op_dmaoff += len;
@@ -722,7 +713,7 @@ add_header:
}
if (prev)
- prev->s_wr.next = &send->s_wr;
+ prev->s_send_wr.next = &send->s_send_wr;
prev = send;
pos = (pos + 1) % ic->i_send_ring.w_nr;
@@ -736,7 +727,7 @@ add_header:
/* if we finished the message then send completion owns it */
if (scat == &rm->data.op_sg[rm->data.op_count]) {
prev->s_rm = ic->i_rm;
- prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
ic->i_rm = NULL;
}
@@ -748,11 +739,11 @@ add_header:
rds_iw_send_add_credits(conn, credit_alloc - i);
/* XXX need to worry about failed_wr and partial sends. */
- failed_wr = &first->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+ failed_wr = &first->s_send_wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
- first, &first->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &first->s_wr);
+ first, &first->s_send_wr, ret, failed_wr);
+ BUG_ON(failed_wr != &first->s_send_wr);
if (ret) {
printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@@ -770,24 +761,26 @@ out:
return ret;
}
-static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr)
+static int rds_iw_build_send_reg(struct rds_iw_send_work *send,
+ struct scatterlist *sg,
+ int sg_nents)
{
- BUG_ON(nent > send->s_page_list->max_page_list_len);
- /*
- * Perform a WR for the fast_reg_mr. Each individual page
- * in the sg list is added to the fast reg page list and placed
- * inside the fast_reg_mr WR.
- */
- send->s_wr.opcode = IB_WR_FAST_REG_MR;
- send->s_wr.wr.fast_reg.length = len;
- send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey;
- send->s_wr.wr.fast_reg.page_list = send->s_page_list;
- send->s_wr.wr.fast_reg.page_list_len = nent;
- send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
- send->s_wr.wr.fast_reg.iova_start = sg_addr;
+ int n;
+
+ n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE);
+ if (unlikely(n != sg_nents))
+ return n < 0 ? n : -EINVAL;
+
+ send->s_reg_wr.wr.opcode = IB_WR_REG_MR;
+ send->s_reg_wr.wr.wr_id = 0;
+ send->s_reg_wr.wr.num_sge = 0;
+ send->s_reg_wr.mr = send->s_mr;
+ send->s_reg_wr.key = send->s_mr->rkey;
+ send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE;
ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
+
+ return 0;
}
int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
@@ -808,6 +801,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
int sent;
int ret;
int num_sge;
+ int sg_nents;
rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
@@ -861,9 +855,10 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
scat = &op->op_sg[0];
sent = 0;
num_sge = op->op_count;
+ sg_nents = 0;
for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
- send->s_wr.send_flags = 0;
+ send->s_rdma_wr.wr.send_flags = 0;
send->s_queued = jiffies;
/*
@@ -872,7 +867,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
*/
if (ic->i_unsignaled_wrs-- == 0) {
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
- send->s_wr.send_flags = IB_SEND_SIGNALED;
+ send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
}
/* To avoid the need to have the plumbing to invalidate the fastreg_mr used
@@ -880,30 +875,31 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
* IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
*/
if (op->op_write)
- send->s_wr.opcode = IB_WR_RDMA_WRITE;
+ send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
else
- send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
- send->s_wr.wr.rdma.remote_addr = remote_addr;
- send->s_wr.wr.rdma.rkey = op->op_rkey;
+ send->s_rdma_wr.remote_addr = remote_addr;
+ send->s_rdma_wr.rkey = op->op_rkey;
send->s_op = op;
if (num_sge > rds_iwdev->max_sge) {
- send->s_wr.num_sge = rds_iwdev->max_sge;
+ send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge;
num_sge -= rds_iwdev->max_sge;
} else
- send->s_wr.num_sge = num_sge;
+ send->s_rdma_wr.wr.num_sge = num_sge;
- send->s_wr.next = NULL;
+ send->s_rdma_wr.wr.next = NULL;
if (prev)
- prev->s_wr.next = &send->s_wr;
+ prev->s_send_wr.next = &send->s_rdma_wr.wr;
- for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
+ for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
+ scat != &op->op_sg[op->op_count]; j++) {
len = ib_sg_dma_len(ic->i_cm_id->device, scat);
- if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
- send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat);
+ if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV)
+ sg_nents++;
else {
send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
send->s_sge[j].length = len;
@@ -917,15 +913,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
scat++;
}
- if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
- send->s_wr.num_sge = 1;
+ if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
+ send->s_rdma_wr.wr.num_sge = 1;
send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
}
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
- &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+ &send->s_rdma_wr,
+ send->s_rdma_wr.wr.num_sge,
+ send->s_rdma_wr.wr.next);
prev = send;
if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -934,7 +932,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
/* if we finished the message then send completion owns it */
if (scat == &op->op_sg[op->op_count])
- first->s_wr.send_flags = IB_SEND_SIGNALED;
+ first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
if (i < work_alloc) {
rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
@@ -948,16 +946,20 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
* fastreg_mr (or possibly a dma_mr)
*/
if (!op->op_write) {
- rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
- op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
+ ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos],
+ &op->op_sg[0], sg_nents);
+ if (ret) {
+ printk(KERN_WARNING "RDS/IW: failed to reg send mem\n");
+ goto out;
+ }
work_alloc++;
}
- failed_wr = &first->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+ failed_wr = &first->s_rdma_wr.wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
- first, &first->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &first->s_wr);
+ first, &first->s_rdma_wr, ret, failed_wr);
+ BUG_ON(failed_wr != &first->s_rdma_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index b9b40af5345b..9c1fed81bf0f 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -142,8 +142,8 @@ static int rds_rdma_listen_init(void)
struct rdma_cm_id *cm_id;
int ret;
- cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP,
- IB_QPT_RC);
+ cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 5318951b3b53..a1434447b0d6 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
if (IS_ERR(f->fr_mr))
goto out_mr_err;
- f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
- if (IS_ERR(f->fr_pgl))
+
+ f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL);
+ if (!f->sg)
goto out_list_err;
+
+ sg_init_table(f->sg, depth);
+
return 0;
out_mr_err:
@@ -163,9 +167,9 @@ out_mr_err:
return rc;
out_list_err:
- rc = PTR_ERR(f->fr_pgl);
- dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n",
- __func__, rc);
+ rc = -ENOMEM;
+ dprintk("RPC: %s: sg allocation failure\n",
+ __func__);
ib_dereg_mr(f->fr_mr);
return rc;
}
@@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r)
if (rc)
dprintk("RPC: %s: ib_dereg_mr status %i\n",
__func__, rc);
- ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+ kfree(r->r.frmr.sg);
}
static int
@@ -312,13 +316,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
struct rpcrdma_mw *mw;
struct rpcrdma_frmr *frmr;
struct ib_mr *mr;
- struct ib_send_wr fastreg_wr, *bad_wr;
+ struct ib_reg_wr reg_wr;
+ struct ib_send_wr *bad_wr;
+ int rc, i, n, dma_nents;
u8 key;
- int len, pageoff;
- int i, rc;
- int seg_len;
- u64 pa;
- int page_no;
mw = seg1->rl_mw;
seg1->rl_mw = NULL;
@@ -331,64 +332,80 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
} while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
frmr = &mw->r.frmr;
frmr->fr_state = FRMR_IS_VALID;
+ mr = frmr->fr_mr;
- pageoff = offset_in_page(seg1->mr_offset);
- seg1->mr_offset -= pageoff; /* start of page */
- seg1->mr_len += pageoff;
- len = -pageoff;
if (nsegs > ia->ri_max_frmr_depth)
nsegs = ia->ri_max_frmr_depth;
- for (page_no = i = 0; i < nsegs;) {
- rpcrdma_map_one(device, seg, direction);
- pa = seg->mr_dma;
- for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
- frmr->fr_pgl->page_list[page_no++] = pa;
- pa += PAGE_SIZE;
- }
- len += seg->mr_len;
+ for (i = 0; i < nsegs;) {
+ if (seg->mr_page)
+ sg_set_page(&frmr->sg[i],
+ seg->mr_page,
+ seg->mr_len,
+ offset_in_page(seg->mr_offset));
+ else
+ sg_set_buf(&frmr->sg[i], seg->mr_offset,
+ seg->mr_len);
+
++seg;
++i;
+
/* Check for holes */
if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n",
- __func__, mw, i, len);
-
- memset(&fastreg_wr, 0, sizeof(fastreg_wr));
- fastreg_wr.wr_id = (unsigned long)(void *)mw;
- fastreg_wr.opcode = IB_WR_FAST_REG_MR;
- fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff;
- fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
- fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- fastreg_wr.wr.fast_reg.page_list_len = page_no;
- fastreg_wr.wr.fast_reg.length = len;
- fastreg_wr.wr.fast_reg.access_flags = writing ?
- IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
- IB_ACCESS_REMOTE_READ;
- mr = frmr->fr_mr;
+ frmr->sg_nents = i;
+
+ dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction);
+ if (!dma_nents) {
+ pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n",
+ __func__, frmr->sg, frmr->sg_nents);
+ return -ENOMEM;
+ }
+
+ n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+ if (unlikely(n != frmr->sg_nents)) {
+ pr_err("RPC: %s: failed to map mr %p (%u/%u)\n",
+ __func__, frmr->fr_mr, n, frmr->sg_nents);
+ rc = n < 0 ? n : -EINVAL;
+ goto out_senderr;
+ }
+
+ dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
+ __func__, mw, frmr->sg_nents, mr->length);
+
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
- fastreg_wr.wr.fast_reg.rkey = mr->rkey;
+
+ reg_wr.wr.next = NULL;
+ reg_wr.wr.opcode = IB_WR_REG_MR;
+ reg_wr.wr.wr_id = (uintptr_t)mw;
+ reg_wr.wr.num_sge = 0;
+ reg_wr.wr.send_flags = 0;
+ reg_wr.mr = mr;
+ reg_wr.key = mr->rkey;
+ reg_wr.access = writing ?
+ IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+ IB_ACCESS_REMOTE_READ;
DECR_CQCOUNT(&r_xprt->rx_ep);
- rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
+ rc = ib_post_send(ia->ri_id->qp, &reg_wr.wr, &bad_wr);
if (rc)
goto out_senderr;
+ seg1->mr_dir = direction;
seg1->rl_mw = mw;
seg1->mr_rkey = mr->rkey;
- seg1->mr_base = seg1->mr_dma + pageoff;
- seg1->mr_nsegs = i;
- seg1->mr_len = len;
- return i;
+ seg1->mr_base = mr->iova;
+ seg1->mr_nsegs = frmr->sg_nents;
+ seg1->mr_len = mr->length;
+
+ return frmr->sg_nents;
out_senderr:
dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
- while (i--)
- rpcrdma_unmap_one(device, --seg);
+ ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction);
__frwr_queue_recovery(mw);
return rc;
}
@@ -402,22 +419,22 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
struct rpcrdma_mr_seg *seg1 = seg;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mw *mw = seg1->rl_mw;
+ struct rpcrdma_frmr *frmr = &mw->r.frmr;
struct ib_send_wr invalidate_wr, *bad_wr;
int rc, nsegs = seg->mr_nsegs;
dprintk("RPC: %s: FRMR %p\n", __func__, mw);
seg1->rl_mw = NULL;
- mw->r.frmr.fr_state = FRMR_IS_INVALID;
+ frmr->fr_state = FRMR_IS_INVALID;
memset(&invalidate_wr, 0, sizeof(invalidate_wr));
invalidate_wr.wr_id = (unsigned long)(void *)mw;
invalidate_wr.opcode = IB_WR_LOCAL_INV;
- invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
+ invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia->ri_device, seg++);
+ ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
read_lock(&ia->ri_qplock);
rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
read_unlock(&ia->ri_qplock);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f0c3ff67ca98..ff4f01e527ec 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -126,7 +126,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
u64 rs_offset,
bool last)
{
- struct ib_send_wr read_wr;
+ struct ib_rdma_wr read_wr;
int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
int ret, read, pno;
@@ -180,16 +180,16 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
memset(&read_wr, 0, sizeof(read_wr));
- read_wr.wr_id = (unsigned long)ctxt;
- read_wr.opcode = IB_WR_RDMA_READ;
- ctxt->wr_op = read_wr.opcode;
- read_wr.send_flags = IB_SEND_SIGNALED;
- read_wr.wr.rdma.rkey = rs_handle;
- read_wr.wr.rdma.remote_addr = rs_offset;
- read_wr.sg_list = ctxt->sge;
- read_wr.num_sge = pages_needed;
-
- ret = svc_rdma_send(xprt, &read_wr);
+ read_wr.wr.wr_id = (unsigned long)ctxt;
+ read_wr.wr.opcode = IB_WR_RDMA_READ;
+ ctxt->wr_op = read_wr.wr.opcode;
+ read_wr.wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.rkey = rs_handle;
+ read_wr.remote_addr = rs_offset;
+ read_wr.wr.sg_list = ctxt->sge;
+ read_wr.wr.num_sge = pages_needed;
+
+ ret = svc_rdma_send(xprt, &read_wr.wr);
if (ret) {
pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -219,14 +219,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
u64 rs_offset,
bool last)
{
- struct ib_send_wr read_wr;
+ struct ib_rdma_wr read_wr;
struct ib_send_wr inv_wr;
- struct ib_send_wr fastreg_wr;
+ struct ib_reg_wr reg_wr;
u8 key;
- int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+ int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
- int ret, read, pno;
+ int ret, read, pno, dma_nents, n;
u32 pg_off = *page_offset;
u32 pg_no = *page_no;
@@ -235,17 +235,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
ctxt->direction = DMA_FROM_DEVICE;
ctxt->frmr = frmr;
- pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
- read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset,
- rs_length);
+ nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
+ read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length);
- frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
frmr->direction = DMA_FROM_DEVICE;
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
- frmr->map_len = pages_needed << PAGE_SHIFT;
- frmr->page_list_len = pages_needed;
+ frmr->sg_nents = nents;
- for (pno = 0; pno < pages_needed; pno++) {
+ for (pno = 0; pno < nents; pno++) {
int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
@@ -253,17 +250,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
head->arg.len += len;
if (!pg_off)
head->count++;
+
+ sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
+ len, pg_off);
+
rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
rqstp->rq_next_page = rqstp->rq_respages + 1;
- frmr->page_list->page_list[pno] =
- ib_dma_map_page(xprt->sc_cm_id->device,
- head->arg.pages[pg_no], 0,
- PAGE_SIZE, DMA_FROM_DEVICE);
- ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[pno]);
- if (ret)
- goto err;
- atomic_inc(&xprt->sc_dma_used);
/* adjust offset and wrap to next page if needed */
pg_off += len;
@@ -279,43 +271,57 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
else
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
+ frmr->sg, frmr->sg_nents,
+ frmr->direction);
+ if (!dma_nents) {
+ pr_err("svcrdma: failed to dma map sg %p\n",
+ frmr->sg);
+ return -ENOMEM;
+ }
+ atomic_inc(&xprt->sc_dma_used);
+
+ n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+ if (unlikely(n != frmr->sg_nents)) {
+ pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
+ frmr->mr, n, frmr->sg_nents);
+ return n < 0 ? n : -EINVAL;
+ }
+
/* Bump the key */
key = (u8)(frmr->mr->lkey & 0x000000FF);
ib_update_fast_reg_key(frmr->mr, ++key);
- ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+ ctxt->sge[0].addr = frmr->mr->iova;
ctxt->sge[0].lkey = frmr->mr->lkey;
- ctxt->sge[0].length = read;
+ ctxt->sge[0].length = frmr->mr->length;
ctxt->count = 1;
ctxt->read_hdr = head;
- /* Prepare FASTREG WR */
- memset(&fastreg_wr, 0, sizeof(fastreg_wr));
- fastreg_wr.opcode = IB_WR_FAST_REG_MR;
- fastreg_wr.send_flags = IB_SEND_SIGNALED;
- fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
- fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
- fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
- fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- fastreg_wr.wr.fast_reg.length = frmr->map_len;
- fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
- fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
- fastreg_wr.next = &read_wr;
+ /* Prepare REG WR */
+ reg_wr.wr.opcode = IB_WR_REG_MR;
+ reg_wr.wr.wr_id = 0;
+ reg_wr.wr.send_flags = IB_SEND_SIGNALED;
+ reg_wr.wr.num_sge = 0;
+ reg_wr.mr = frmr->mr;
+ reg_wr.key = frmr->mr->lkey;
+ reg_wr.access = frmr->access_flags;
+ reg_wr.wr.next = &read_wr.wr;
/* Prepare RDMA_READ */
memset(&read_wr, 0, sizeof(read_wr));
- read_wr.send_flags = IB_SEND_SIGNALED;
- read_wr.wr.rdma.rkey = rs_handle;
- read_wr.wr.rdma.remote_addr = rs_offset;
- read_wr.sg_list = ctxt->sge;
- read_wr.num_sge = 1;
+ read_wr.wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.rkey = rs_handle;
+ read_wr.remote_addr = rs_offset;
+ read_wr.wr.sg_list = ctxt->sge;
+ read_wr.wr.num_sge = 1;
if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
- read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
- read_wr.wr_id = (unsigned long)ctxt;
- read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+ read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ read_wr.wr.wr_id = (unsigned long)ctxt;
+ read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
} else {
- read_wr.opcode = IB_WR_RDMA_READ;
- read_wr.next = &inv_wr;
+ read_wr.wr.opcode = IB_WR_RDMA_READ;
+ read_wr.wr.next = &inv_wr;
/* Prepare invalidate */
memset(&inv_wr, 0, sizeof(inv_wr));
inv_wr.wr_id = (unsigned long)ctxt;
@@ -323,10 +329,10 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
}
- ctxt->wr_op = read_wr.opcode;
+ ctxt->wr_op = read_wr.wr.opcode;
/* Post the chain */
- ret = svc_rdma_send(xprt, &fastreg_wr);
+ ret = svc_rdma_send(xprt, &reg_wr.wr);
if (ret) {
pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -340,7 +346,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
atomic_inc(&rdma_stat_read);
return ret;
err:
- svc_rdma_unmap_dma(ctxt);
+ ib_dma_unmap_sg(xprt->sc_cm_id->device,
+ frmr->sg, frmr->sg_nents, frmr->direction);
svc_rdma_put_context(ctxt, 0);
svc_rdma_put_frmr(xprt, frmr);
return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1dfae8317065..969a1ab75fc3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -217,7 +217,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
u32 xdr_off, int write_len,
struct svc_rdma_req_map *vec)
{
- struct ib_send_wr write_wr;
+ struct ib_rdma_wr write_wr;
struct ib_sge *sge;
int xdr_sge_no;
int sge_no;
@@ -282,17 +282,17 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
/* Prepare WRITE WR */
memset(&write_wr, 0, sizeof write_wr);
ctxt->wr_op = IB_WR_RDMA_WRITE;
- write_wr.wr_id = (unsigned long)ctxt;
- write_wr.sg_list = &sge[0];
- write_wr.num_sge = sge_no;
- write_wr.opcode = IB_WR_RDMA_WRITE;
- write_wr.send_flags = IB_SEND_SIGNALED;
- write_wr.wr.rdma.rkey = rmr;
- write_wr.wr.rdma.remote_addr = to;
+ write_wr.wr.wr_id = (unsigned long)ctxt;
+ write_wr.wr.sg_list = &sge[0];
+ write_wr.wr.num_sge = sge_no;
+ write_wr.wr.opcode = IB_WR_RDMA_WRITE;
+ write_wr.wr.send_flags = IB_SEND_SIGNALED;
+ write_wr.rkey = rmr;
+ write_wr.remote_addr = to;
/* Post It */
atomic_inc(&rdma_stat_write);
- if (svc_rdma_send(xprt, &write_wr))
+ if (svc_rdma_send(xprt, &write_wr.wr))
goto err;
return write_len - bc;
err:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fcc3eb80c265..a266e870d870 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -692,8 +692,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
- listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
- IB_QPT_RC);
+ listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(listen_id)) {
ret = PTR_ERR(listen_id);
dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
@@ -732,7 +732,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
{
struct ib_mr *mr;
- struct ib_fast_reg_page_list *pl;
+ struct scatterlist *sg;
struct svc_rdma_fastreg_mr *frmr;
u32 num_sg;
@@ -745,13 +745,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
if (IS_ERR(mr))
goto err_free_frmr;
- pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
- num_sg);
- if (IS_ERR(pl))
+ sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
+ if (!sg)
goto err_free_mr;
+ sg_init_table(sg, RPCSVC_MAXPAGES);
+
frmr->mr = mr;
- frmr->page_list = pl;
+ frmr->sg = sg;
INIT_LIST_HEAD(&frmr->frmr_list);
return frmr;
@@ -771,8 +772,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
frmr = list_entry(xprt->sc_frmr_q.next,
struct svc_rdma_fastreg_mr, frmr_list);
list_del_init(&frmr->frmr_list);
+ kfree(frmr->sg);
ib_dereg_mr(frmr->mr);
- ib_free_fast_reg_page_list(frmr->page_list);
kfree(frmr);
}
}
@@ -786,8 +787,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
frmr = list_entry(rdma->sc_frmr_q.next,
struct svc_rdma_fastreg_mr, frmr_list);
list_del_init(&frmr->frmr_list);
- frmr->map_len = 0;
- frmr->page_list_len = 0;
+ frmr->sg_nents = 0;
}
spin_unlock_bh(&rdma->sc_frmr_q_lock);
if (frmr)
@@ -796,25 +796,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
return rdma_alloc_frmr(rdma);
}
-static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
- struct svc_rdma_fastreg_mr *frmr)
-{
- int page_no;
- for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
- dma_addr_t addr = frmr->page_list->page_list[page_no];
- if (ib_dma_mapping_error(frmr->mr->device, addr))
- continue;
- atomic_dec(&xprt->sc_dma_used);
- ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
- frmr->direction);
- }
-}
-
void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
struct svc_rdma_fastreg_mr *frmr)
{
if (frmr) {
- frmr_unmap_dma(rdma, frmr);
+ ib_dma_unmap_sg(rdma->sc_cm_id->device,
+ frmr->sg, frmr->sg_nents, frmr->direction);
+ atomic_dec(&rdma->sc_dma_used);
spin_lock_bh(&rdma->sc_frmr_q_lock);
WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 5502d4dade74..f63369bd01c5 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -432,7 +432,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
init_completion(&ia->ri_done);
- id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
+ id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
+ IB_QPT_RC);
if (IS_ERR(id)) {
rc = PTR_ERR(id);
dprintk("RPC: %s: rdma_create_id() failed %i\n",
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c09414e6f91b..c82abf44e39d 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -193,7 +193,8 @@ enum rpcrdma_frmr_state {
};
struct rpcrdma_frmr {
- struct ib_fast_reg_page_list *fr_pgl;
+ struct scatterlist *sg;
+ int sg_nents;
struct ib_mr *fr_mr;
enum rpcrdma_frmr_state fr_state;
struct work_struct fr_work;