summaryrefslogtreecommitdiffstats
path: root/drivers/staging/rdma/hfi1/rc.c
diff options
context:
space:
mode:
authorMike Marciniszyn2016-02-14 21:10:04 +0100
committerDoug Ledford2016-03-11 02:38:07 +0100
commit46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7 (patch)
tree077ea38ac2f7fd75c1334deadcc141ad6753a009 /drivers/staging/rdma/hfi1/rc.c
parentIB/qib: Rename several functions by adding a "qib_" prefix (diff)
downloadkernel-qcow2-linux-46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7.tar.gz
kernel-qcow2-linux-46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7.tar.xz
kernel-qcow2-linux-46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7.zip
IB/qib, staging/rdma/hfi1: add s_hlock for use in post send
This patch adds an additional lock to reduce contention on the s_lock. This lock is used in post_send() so that the post_send is not serialized with the send engine and other send related processing. To do this the s_next_psn is now maintained on post_send() while post_send() related fields are moved to a new cache line. There is an s_avail maintained for the post_send() to mitigate trading cache lines with the send engine. The lock is released/acquired around releasing the just built packet to the egress mechanism. Reviewed-by: Jubin John <jubin.john@intel.com> Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Dean Luick <dean.luick@intel.com> Signed-off-by: Harish Chegondi <harish.chegondi@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/staging/rdma/hfi1/rc.c')
-rw-r--r--drivers/staging/rdma/hfi1/rc.c44
1 files changed, 9 insertions, 35 deletions
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c
index a4a44d33d857..a62c9424fa86 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/staging/rdma/hfi1/rc.c
@@ -367,6 +367,8 @@ bail:
* hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP
*
+ * Assumes s_lock is held.
+ *
* Return 1 if constructed; otherwise, return 0.
*/
int hfi1_make_rc_req(struct rvt_qp *qp)
@@ -383,7 +385,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
u32 bth2;
u32 pmtu = qp->pmtu;
char newreq;
- unsigned long flags;
int ret = 0;
int middle = 0;
int delta;
@@ -392,12 +393,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
ohdr = &priv->s_hdr->ibh.u.l.oth;
- /*
- * The lock is needed to synchronize between the sending tasklet,
- * the receive interrupt handler, and timeout re-sends.
- */
- spin_lock_irqsave(&qp->s_lock, flags);
-
/* Sending responses has higher priority over sending requests. */
if ((qp->s_flags & RVT_S_RESP_PENDING) &&
make_rc_ack(dev, qp, ohdr, pmtu))
@@ -407,7 +402,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- if (qp->s_last == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_iowait.sdma_busy)) {
@@ -463,8 +459,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail;
}
- wqe->psn = qp->s_next_psn;
newreq = 1;
+ qp->s_psn = wqe->psn;
}
/*
* Note that we have to be careful not to modify the
@@ -483,9 +479,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
- wqe->lpsn = wqe->psn;
if (len > pmtu) {
- wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(SEND_FIRST);
len = pmtu;
break;
@@ -522,9 +516,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32);
- wqe->lpsn = wqe->psn;
if (len > pmtu) {
- wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(RDMA_WRITE_FIRST);
len = pmtu;
break;
@@ -559,13 +551,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
qp->s_num_rd_atomic++;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
- /*
- * Adjust s_next_psn to count the
- * expected number of responses.
- */
- if (len > pmtu)
- qp->s_next_psn += (len - 1) / pmtu;
- wqe->lpsn = qp->s_next_psn++;
}
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->rdma_wr.remote_addr);
@@ -596,7 +581,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
qp->s_num_rd_atomic++;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
- wqe->lpsn = wqe->psn;
}
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
@@ -639,11 +623,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
}
if (wqe->wr.opcode == IB_WR_RDMA_READ)
qp->s_psn = wqe->lpsn + 1;
- else {
+ else
qp->s_psn++;
- if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
- }
break;
case OP(RDMA_READ_RESPONSE_FIRST):
@@ -663,8 +644,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
- if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
@@ -705,8 +684,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
- if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
@@ -777,13 +754,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
bth2,
middle);
done:
- ret = 1;
- goto unlock;
-
+ return 1;
bail:
qp->s_flags &= ~RVT_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -1563,7 +1536,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
trace_hfi1_rc_ack(qp, psn);
/* Ignore invalid responses. */
- if (cmp_psn(psn, qp->s_next_psn) >= 0)
+ smp_read_barrier_depends(); /* see post_one_send */
+ if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
goto ack_done;
/* Ignore duplicate responses. */