diff options
author | Mike Marciniszyn | 2016-02-14 21:10:04 +0100 |
---|---|---|
committer | Doug Ledford | 2016-03-11 02:38:07 +0100 |
commit | 46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7 (patch) | |
tree | 077ea38ac2f7fd75c1334deadcc141ad6753a009 /drivers/staging/rdma/hfi1/rc.c | |
parent | IB/qib: Rename several functions by adding a "qib_" prefix (diff) | |
download | kernel-qcow2-linux-46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7.tar.gz kernel-qcow2-linux-46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7.tar.xz kernel-qcow2-linux-46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7.zip |
IB/qib, staging/rdma/hfi1: add s_hlock for use in post send
This patch adds an additional lock to reduce contention on the s_lock.
This lock is used in post_send() so that the post_send is not
serialized with the send engine and other send related processing.
To do this the s_next_psn is now maintained on post_send() while
post_send() related fields are moved to a new cache line. There is
an s_avail maintained for the post_send() to mitigate trading cache
lines with the send engine. The lock is released/acquired around
releasing the just built packet to the egress mechanism.
Reviewed-by: Jubin John <jubin.john@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Harish Chegondi <harish.chegondi@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/staging/rdma/hfi1/rc.c')
-rw-r--r-- | drivers/staging/rdma/hfi1/rc.c | 44 |
1 files changed, 9 insertions, 35 deletions
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index a4a44d33d857..a62c9424fa86 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -367,6 +367,8 @@ bail: * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP * + * Assumes s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int hfi1_make_rc_req(struct rvt_qp *qp) @@ -383,7 +385,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) u32 bth2; u32 pmtu = qp->pmtu; char newreq; - unsigned long flags; int ret = 0; int middle = 0; int delta; @@ -392,12 +393,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ohdr = &priv->s_hdr->ibh.u.l.oth; - /* - * The lock is needed to synchronize between the sending tasklet, - * the receive interrupt handler, and timeout re-sends. - */ - spin_lock_irqsave(&qp->s_lock, flags); - /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && make_rc_ack(dev, qp, ohdr, pmtu)) @@ -407,7 +402,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { @@ -463,8 +459,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } - wqe->psn = qp->s_next_psn; newreq = 1; + qp->s_psn = wqe->psn; } /* * Note that we have to be careful not to modify the @@ -483,9 +479,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(SEND_FIRST); len = pmtu; break; @@ -522,9 +516,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(RDMA_WRITE_FIRST); len = pmtu; break; @@ -559,13 +551,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* - * Adjust s_next_psn to count the - * expected number of responses. - */ - if (len > pmtu) - qp->s_next_psn += (len - 1) / pmtu; - wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr); @@ -596,7 +581,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - wqe->lpsn = wqe->psn; } if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); @@ -639,11 +623,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) } if (wqe->wr.opcode == IB_WR_RDMA_READ) qp->s_psn = wqe->lpsn + 1; - else { + else qp->s_psn++; - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - } break; case OP(RDMA_READ_RESPONSE_FIRST): @@ -663,8 +644,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = mask_psn(qp->s_psn++); - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -705,8 +684,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = mask_psn(qp->s_psn++); - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -777,13 +754,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp) bth2, middle); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -1563,7 +1536,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, trace_hfi1_rc_ack(qp, psn); /* Ignore invalid responses. */ - if (cmp_psn(psn, qp->s_next_psn) >= 0) + smp_read_barrier_depends(); /* see post_one_send */ + if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0) goto ack_done; /* Ignore duplicate responses. */ |