summaryrefslogtreecommitdiffstats
path: root/drivers/staging/rdma/hfi1/chip.c
diff options
context:
space:
mode:
authorMike Marciniszyn2016-02-14 21:45:36 +0100
committerDoug Ledford2016-03-11 02:38:14 +0100
commit14553ca11039732bcba3c160a26d702dbe71dd49 (patch)
tree22415d51860e0fa1c0b11f4510d2550cb793e23c /drivers/staging/rdma/hfi1/chip.c
parentstaging/rdma/hfi1: use u8 for vl/sl (diff)
downloadkernel-qcow2-linux-14553ca11039732bcba3c160a26d702dbe71dd49.tar.gz
kernel-qcow2-linux-14553ca11039732bcba3c160a26d702dbe71dd49.tar.xz
kernel-qcow2-linux-14553ca11039732bcba3c160a26d702dbe71dd49.zip
staging/rdma/hfi1: Adaptive PIO for short messages
The change requires a new pio_busy field in the iowait structure to track the number of outstanding pios. The new counter together with the sdma counter serve as the basis for a packet by packet decision as to which egress mechanism to use. Since packets given to different egress mechanisms are not ordered, this scheme will preserve the order. The iowait drain/wait mechanisms are extended for a pio case. An additional qp wait flag is added for the PIO drain wait case. Currently the only pio wait is for buffers, so the no_bufs_available() routine name is changed to pio_wait() and a third argument is passed with one of the two pio wait flags to generalize the routine. A module parameter is added to hold a configurable threshold. For now, the module parameter is zero. A heuristic routine is added to return the func pointer of the proper egress routine to use. The heuristic is as follows: - SMI always uses pio - GSI,UD qps <= threshold use pio - UD qps > threadhold use sdma o No coordination with sdma is required because order is not required and this qp pio count is not maintained for UD - RC/UC ONLY packets <= threshold chose as follows: o If sdmas pending, use SDMA o Otherwise use pio and enable the pio tracking count at the time the pio buffer is allocated - RC/UC ONLY packets > threshold use SDMA o If pio's are pending the pio_wait with the new wait flag is called to delay for pios to drain The threshold is potentially reduced by the QP's mtu. The sc_buffer_alloc() has two additional args (a callback, a void *) which are exploited by the RC/UC cases to pass a new complete routine and a qp *. When the shadow ring completes the credit associated with a packet, the new complete routine is called. The verbs_pio_complete() will then decrement the busy count and trigger any drain waiters in qp destroy or reset. Reviewed-by: Jubin John <jubin.john@intel.com> Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/staging/rdma/hfi1/chip.c')
-rw-r--r--drivers/staging/rdma/hfi1/chip.c10
1 files changed, 10 insertions, 0 deletions
diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c
index 129461770186..36e8e3e9b012 100644
--- a/drivers/staging/rdma/hfi1/chip.c
+++ b/drivers/staging/rdma/hfi1/chip.c
@@ -1588,6 +1588,14 @@ static u64 access_sw_pio_wait(const struct cntr_entry *entry,
return dd->verbs_dev.n_piowait;
}
+static u64 access_sw_pio_drain(const struct cntr_entry *entry,
+ void *context, int vl, int mode, u64 data)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+ return dd->verbs_dev.n_piodrain;
+}
+
static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
void *context, int vl, int mode, u64 data)
{
@@ -4129,6 +4137,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
access_sw_vtx_wait),
[C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
access_sw_pio_wait),
+[C_SW_PIO_DRAIN] = CNTR_ELEM("PioDrain", 0, 0, CNTR_NORMAL,
+ access_sw_pio_drain),
[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
access_sw_kmem_wait),
[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,