summaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma/verbs.c
diff options
context:
space:
mode:
authorChuck Lever2016-09-15 16:57:24 +0200
committerAnna Schumaker2016-09-19 19:08:38 +0200
commit655fec6987be05964e70c2e2efcbb253710e282f (patch)
treef35806548be49c1c1a1b9955af53b4657f449e9c /net/sunrpc/xprtrdma/verbs.c
parentxprtrdma: Basic support for Remote Invalidation (diff)
downloadkernel-qcow2-linux-655fec6987be05964e70c2e2efcbb253710e282f.tar.gz
kernel-qcow2-linux-655fec6987be05964e70c2e2efcbb253710e282f.tar.xz
kernel-qcow2-linux-655fec6987be05964e70c2e2efcbb253710e282f.zip
xprtrdma: Use gathered Send for large inline messages
An RPC Call message that is sent inline but that has a data payload (ie, one or more items in rq_snd_buf's page list) must be "pulled up:" - call_allocate has to reserve enough RPC Call buffer space to accommodate the data payload - call_transmit has to memcopy the rq_snd_buf's page list and tail into its head iovec before it is sent As the inline threshold is increased beyond its current 1KB default, however, this means data payloads of more than a few KB are copied by the host CPU. For example, if the inline threshold is increased just to 4KB, then NFS WRITE requests up to 4KB would involve a memcpy of the NFS WRITE's payload data into the RPC Call buffer. This is an undesirable amount of participation by the host CPU. The inline threshold may be much larger than 4KB in the future, after negotiation with a peer server. Instead of copying the components of rq_snd_buf into its head iovec, construct a gather list of these components, and send them all in place. The same approach is already used in the Linux server's RPC-over-RDMA reply path. This mechanism also eliminates the need for rpcrdma_tail_pullup, which is used to manage the XDR pad and trailing inline content when a Read list is present. This requires that the pages in rq_snd_buf's page list be DMA-mapped during marshaling, and unmapped when a data-bearing RPC is completed. This is slightly less efficient for very small I/O payloads, but significantly more efficient as data payload size and inline threshold increase past a kilobyte. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net/sunrpc/xprtrdma/verbs.c')
-rw-r--r--net/sunrpc/xprtrdma/verbs.c13
1 files changed, 4 insertions, 9 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e2d639062450..eeaca9c83635 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -493,7 +493,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
unsigned int max_qp_wr;
int rc;
- if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
+ if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) {
dprintk("RPC: %s: insufficient sge's available\n",
__func__);
return -ENOMEM;
@@ -522,7 +522,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
- ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
+ ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES;
ep->rep_attr.cap.max_recv_sge = 1;
ep->rep_attr.cap.max_inline_data = 0;
ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -891,7 +891,7 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
INIT_LIST_HEAD(&req->rl_registered);
req->rl_send_wr.next = NULL;
req->rl_send_wr.wr_cqe = &req->rl_cqe;
- req->rl_send_wr.sg_list = req->rl_send_iov;
+ req->rl_send_wr.sg_list = req->rl_send_sge;
req->rl_send_wr.opcode = IB_WR_SEND;
return req;
}
@@ -1306,11 +1306,9 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct rpcrdma_ep *ep,
struct rpcrdma_req *req)
{
- struct ib_device *device = ia->ri_device;
struct ib_send_wr *send_wr = &req->rl_send_wr;
struct ib_send_wr *send_wr_fail;
- struct ib_sge *sge = req->rl_send_iov;
- int i, rc;
+ int rc;
if (req->rl_reply) {
rc = rpcrdma_ep_post_recv(ia, req->rl_reply);
@@ -1319,9 +1317,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
req->rl_reply = NULL;
}
- for (i = 0; i < send_wr->num_sge; i++)
- ib_dma_sync_single_for_device(device, sge[i].addr,
- sge[i].length, DMA_TO_DEVICE);
dprintk("RPC: %s: posting %d s/g entries\n",
__func__, send_wr->num_sge);