summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1
diff options
context:
space:
mode:
authorLinus Torvalds2017-05-03 21:45:55 +0200
committerLinus Torvalds2017-05-03 21:45:55 +0200
commit1684096b1ed813f621fb6cbd06e72235c1c2a0ca (patch)
tree13a228c35d6344f5d23b2c195aa3b026e42aac4b /drivers/infiniband/hw/hfi1
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dto... (diff)
parentinfiniband: avoid dereferencing uninitialized dst on error path (diff)
downloadkernel-qcow2-linux-1684096b1ed813f621fb6cbd06e72235c1c2a0ca.tar.gz
kernel-qcow2-linux-1684096b1ed813f621fb6cbd06e72235c1c2a0ca.tar.xz
kernel-qcow2-linux-1684096b1ed813f621fb6cbd06e72235c1c2a0ca.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford: "More exchaustive description of primary updates in this release: - Lots of driver fixes and misc fixes across the board. - I had to base on a net-next tree because the IPoIB Accelorator patches needed it. Unfortunately, it was known to Mellanox that there would need to be an IPoIB accelorator patch to the net tree (which left some functions turned off by an #ifdef construct to avoid warnings about defined but unused functions), then one to the RDMA tree, then a fixup that went back and re-enabled the functions in the net tree and enabled their use in the rdma tree Also, a sparse fix was sent to the net tree after I did my pull, and the fixup patch conflicts quite directly with that sparse fix, so I'm going to submit the fixup patch towards the end of the merge window by itself and based upon your master branch at the time. - Two separate rounds of hfi1 fixes, one that got dropped from last release because it came in just a day or two before the end of the merge window and then the one from this release cycle. Of note is that I now have a third series that just landed from Intel yesterday. It is not included in this pull request, but I may submit it by the end of the week. I'll talk to Intel about improving the timing of thier submissions for my workflow. - Changes to our idr usage in the RDMA subsystem that will tie into our cgroup management and also into the upcoming changes for the RDMA kernel<->userspace API. - Addition of support for a netdev to be tied to an RDMA device at the core level - Addition of the VNIC driver from Intel. While IPoIB provides IP over InfiniBand (and *only* IP, no lower layer protocol headers are allowed or supported), the VNIC driver presents a virtual Ethernet device with support for things like varying Ethertypes, VLANs, priorities and other features of Ethernet. The virtual devices are centrally managed by the OPA fabric manager, making this (for the time being) a strictly OPA specific feature. - Improvements to the On-Demand Paging support in the RDMA subsystem. - Addition of three significant OPA changes. While we added OPA support some time ago (via the hfi1 driver), the RDMA subsystem has so far glossed over the areas where OPA and InfiniBand differ. With this release we are starting to add support for the OPA extensions into the RDMA core in the following area: Extended port information for OPA is now supported, extended Address Handle attributes for OPA are now supported, and extended SA Queries to get OPA specific subnet information is now supported. Concise summary from the tag: - idr usage and locking changes - build fix for hns - ipoib debug path record file fix - hfi1 updates - core RDMA netdev addition - Intel VNIC driver addition - Enhanced accelerators for IPoIB addition - Debug cleanups in cxgb3/4 - Trivial cleanups from SF Markus Elfring - Misc rxe fixes from Mellanox - Misc ipoib fixes from Mellanox - Lots of mlx4/mlx5 changes from Mellanox - Misc fixes across the RDMA subsystem - ODP paging fixes and improvements - qedr updates - hfi1 updates - OPA port info patches - OPA AH patches - OPA SA Query patches" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (191 commits) infiniband: avoid dereferencing uninitialized dst on error path IB/SA: Add OPA addr header IB/mlx5: Add port_xmit_wait to counter registers read IB/ocrdma: fix out of bounds access to local buffer IB/mlx4: Fix incorrect order of formal and actual parameters IB/mlx4: Change flush logic so it adheres to the variable name mlx5: Fix mlx5_ib_map_mr_sg mr length IB/rxe: Don't clamp residual length to mtu IB/SA: Add support to query OPA path records IB/SA: Add OPA path record type IB/SA: Split struct sa_path_rec based on IB and ROCE specific fields IB/SA: Introduce path record specific types IB/SA: Rename ib_sa_path_rec to sa_path_rec IB/CM: Add braces when using sizeof IB/core: Define 'opa' rdma_ah_attr type IB/core: Define 'ib' and 'roce' rdma_ah_attr types IB/core: Use rdma_ah_attr accessor functions IB/core: Add accessor functions for rdma_ah_attr fields IB/PVRDMA: Rename ib_ah_attr related functions IB/mthca: Rename to_ib_ah_attr to to_rdma_ah_attr ...
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile2
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h15
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c590
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h20
-rw-r--r--drivers/infiniband/hw/hfi1/common.h15
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c238
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h62
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c128
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c31
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c14
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h95
-rw-r--r--drivers/infiniband/hw/hfi1/init.c69
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c27
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c95
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c19
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h34
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c12
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c55
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c137
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c43
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h46
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c4
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c5
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h8
-rw-r--r--drivers/infiniband/hw/hfi1/trace_misc.h48
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rc.h7
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h43
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c14
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c73
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c16
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c5
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c22
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c153
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h15
-rw-r--r--drivers/infiniband/hw/hfi1/vnic.h184
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c907
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c323
38 files changed, 3006 insertions, 570 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a09b64b..88085f65432e 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
- verbs_txreq.o
+ verbs_txreq.o vnic_main.o vnic_sdma.o
hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3b49b5..794e6814a531 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long data)
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
}
-/* Disable interrupt processing for verbs contexts when PSM contexts are open */
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
static inline void aspm_disable_all(struct hfi1_devdata *dd)
{
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
- for (i = 0; i < dd->first_user_ctxt; i++) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
del_timer_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
if (aspm_mode != ASPM_MODE_DYNAMIC)
return;
- for (i = 0; i < dd->first_user_ctxt; i++) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = true;
@@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
(unsigned long)rcd);
rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
aspm_mode == ASPM_MODE_DYNAMIC &&
- rcd->ctxt < rcd->dd->first_user_ctxt;
+ rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
}
static inline void aspm_init(struct hfi1_devdata *dd)
@@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd)
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);
- for (i = 0; i < dd->first_user_ctxt; i++)
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++)
aspm_ctx_init(dd->rcd[i]);
/* Start with ASPM disabled */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 121a4c920f1b..0f6916d2d549 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -64,6 +64,7 @@
#include "platform.h"
#include "aspm.h"
#include "affinity.h"
+#include "debugfs.h"
#define NUM_IB_PORTS 1
@@ -125,9 +126,16 @@ struct flag_table {
#define DEFAULT_KRCVQS 2
#define MIN_KERNEL_KCTXTS 2
#define FIRST_KERNEL_KCTXT 1
-/* sizes for both the QP and RSM map tables */
-#define NUM_MAP_ENTRIES 256
-#define NUM_MAP_REGS 32
+
+/*
+ * RSM instance allocation
+ * 0 - Verbs
+ * 1 - User Fecn Handling
+ * 2 - Vnic
+ */
+#define RSM_INS_VERBS 0
+#define RSM_INS_FECN 1
+#define RSM_INS_VNIC 2
/* Bit offset into the GUID which carries HFI id information */
#define GUID_HFI_INDEX_SHIFT 39
@@ -138,8 +146,7 @@ struct flag_table {
#define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
#define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
-/* RSM fields */
-
+/* RSM fields for Verbs */
/* packet type */
#define IB_PACKET_TYPE 2ull
#define QW_SHIFT 6ull
@@ -169,6 +176,28 @@ struct flag_table {
/* QPN[m+n:1] QW 1, OFFSET 1 */
#define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull))
+/* RSM fields for Vnic */
+/* L2_TYPE: QW 0, OFFSET 61 - for match */
+#define L2_TYPE_QW 0ull
+#define L2_TYPE_BIT_OFFSET 61ull
+#define L2_TYPE_OFFSET(off) ((L2_TYPE_QW << QW_SHIFT) | (off))
+#define L2_TYPE_MATCH_OFFSET L2_TYPE_OFFSET(L2_TYPE_BIT_OFFSET)
+#define L2_TYPE_MASK 3ull
+#define L2_16B_VALUE 2ull
+
+/* L4_TYPE QW 1, OFFSET 0 - for match */
+#define L4_TYPE_QW 1ull
+#define L4_TYPE_BIT_OFFSET 0ull
+#define L4_TYPE_OFFSET(off) ((L4_TYPE_QW << QW_SHIFT) | (off))
+#define L4_TYPE_MATCH_OFFSET L4_TYPE_OFFSET(L4_TYPE_BIT_OFFSET)
+#define L4_16B_TYPE_MASK 0xFFull
+#define L4_16B_ETH_VALUE 0x78ull
+
+/* 16B VESWID - for select */
+#define L4_16B_HDR_VESWID_OFFSET ((2 << QW_SHIFT) | (16ull))
+/* 16B ENTROPY - for select */
+#define L2_16B_ENTROPY_OFFSET ((1 << QW_SHIFT) | (32ull))
+
/* defines to build power on SC2VL table */
#define SC2VL_VAL( \
num, \
@@ -1045,6 +1074,8 @@ static void dc_start(struct hfi1_devdata *);
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index);
/*
* Error interrupt table entry. This is used as input to the interrupt
@@ -6379,18 +6410,17 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
*
* The expectation is that the caller of this routine would have taken
* care of properly transitioning the link into the correct state.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_shutdown(struct hfi1_devdata *dd)
+static void _dc_shutdown(struct hfi1_devdata *dd)
{
- unsigned long flags;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
- if (dd->dc_shutdown) {
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ if (dd->dc_shutdown)
return;
- }
+
dd->dc_shutdown = 1;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
/* Shutdown the LCB */
lcb_shutdown(dd, 1);
/*
@@ -6401,35 +6431,45 @@ static void dc_shutdown(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_CFG_RST, 0x1);
}
+static void dc_shutdown(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_shutdown(dd);
+ mutex_unlock(&dd->dc8051_lock);
+}
+
/*
* Calling this after the DC has been brought out of reset should not
* do any damage.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_start(struct hfi1_devdata *dd)
+static void _dc_start(struct hfi1_devdata *dd)
{
- unsigned long flags;
- int ret;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
if (!dd->dc_shutdown)
- goto done;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ return;
+
/* Take the 8051 out of reset */
write_csr(dd, DC_DC8051_CFG_RST, 0ull);
/* Wait until 8051 is ready */
- ret = wait_fm_ready(dd, TIMEOUT_8051_START);
- if (ret) {
+ if (wait_fm_ready(dd, TIMEOUT_8051_START))
dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
__func__);
- }
+
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
write_csr(dd, DCC_CFG_RESET, 0x10);
/* lcb_shutdown() with abort=1 does not restore these */
write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
dd->dc_shutdown = 0;
-done:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+}
+
+static void dc_start(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_start(dd);
+ mutex_unlock(&dd->dc8051_lock);
}
/*
@@ -6701,7 +6741,13 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
int i;
/* enable all kernel contexts */
- for (i = 0; i < dd->n_krcv_queues; i++) {
+ for (i = 0; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *rcd = dd->rcd[i];
+
+ /* Ensure all non-user contexts(including vnic) are enabled */
+ if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER))
+ continue;
+
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
@@ -7077,7 +7123,7 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
- /* Sanity check - ppd->pkeys[2] should be 0, or already initalized */
+ /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */
if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY)))
dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
__func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
@@ -7165,7 +7211,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
* set the max_rate field in handle_verify_cap until v0.19.
*/
if ((dd->icode == ICODE_RTL_SILICON) &&
- (dd->dc8051_ver < dc8051_ver(0, 19))) {
+ (dd->dc8051_ver < dc8051_ver(0, 19, 0))) {
/* max_rate: 0 = 12.5G, 1 = 25G */
switch (max_rate) {
case 0:
@@ -7277,15 +7323,6 @@ void handle_verify_cap(struct work_struct *work)
lcb_shutdown(dd, 0);
adjust_lcb_for_fpga_serdes(dd);
- /*
- * These are now valid:
- * remote VerifyCap fields in the general LNI config
- * CSR DC8051_STS_REMOTE_GUID
- * CSR DC8051_STS_REMOTE_NODE_TYPE
- * CSR DC8051_STS_REMOTE_FM_SECURITY
- * CSR DC8051_STS_REMOTE_PORT_NO
- */
-
read_vc_remote_phy(dd, &power_management, &continious);
read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
&partner_supported_crc);
@@ -7350,7 +7387,7 @@ void handle_verify_cap(struct work_struct *work)
}
ppd->link_speed_active = 0; /* invalid value */
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* remote_tx_rate: 0 = 12.5G, 1 = 25G */
switch (remote_tx_rate) {
case 0:
@@ -7416,20 +7453,6 @@ void handle_verify_cap(struct work_struct *work)
write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
set_8051_lcb_access(dd);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_fm_security =
- read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
- DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
- dd_dev_info(dd,
- "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
- ppd->neighbor_guid, ppd->neighbor_type,
- ppd->mgmt_allowed, ppd->neighbor_fm_security);
if (ppd->mgmt_allowed)
add_full_mgmt_pkey(ppd);
@@ -7897,6 +7920,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
}
+ if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
+ reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
+
/* report any remaining errors */
if (reg)
dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
@@ -7995,7 +8021,9 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
if (likely(source < dd->num_rcv_contexts)) {
rcd = dd->rcd[source];
if (rcd) {
- if (source < dd->first_user_ctxt)
+ /* Check for non-user contexts, including vnic */
+ if ((source < dd->first_dyn_alloc_ctxt) ||
+ (rcd->sc && (rcd->sc->type == SC_KERNEL)))
rcd->do_interrupt(rcd, 0);
else
handle_user_interrupt(rcd);
@@ -8023,7 +8051,8 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
rcd = dd->rcd[source];
if (rcd) {
/* only pay attention to user urgent interrupts */
- if (source >= dd->first_user_ctxt)
+ if ((source >= dd->first_dyn_alloc_ctxt) &&
+ (!rcd->sc || (rcd->sc->type == SC_USER)))
handle_user_interrupt(rcd);
return; /* OK */
}
@@ -8156,10 +8185,10 @@ static irqreturn_t sdma_interrupt(int irq, void *data)
/* handle the interrupt(s) */
sdma_engine_interrupt(sde, status);
- } else
+ } else {
dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
sde->this_idx);
-
+ }
return IRQ_HANDLED;
}
@@ -8344,6 +8373,52 @@ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
}
/*
+ * Provide a cache for some of the LCB registers in case the LCB is
+ * unavailable.
+ * (The LCB is unavailable in certain link states, for example.)
+ */
+struct lcb_datum {
+ u32 off;
+ u64 val;
+};
+
+static struct lcb_datum lcb_cache[] = {
+ { DC_LCB_ERR_INFO_RX_REPLAY_CNT, 0},
+ { DC_LCB_ERR_INFO_SEQ_CRC_CNT, 0 },
+ { DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, 0 },
+};
+
+static void update_lcb_cache(struct hfi1_devdata *dd)
+{
+ int i;
+ int ret;
+ u64 val;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ ret = read_lcb_csr(dd, lcb_cache[i].off, &val);
+
+ /* Update if we get good data */
+ if (likely(ret != -EBUSY))
+ lcb_cache[i].val = val;
+ }
+}
+
+static int read_lcb_cache(u32 off, u64 *val)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ if (lcb_cache[i].off == off) {
+ *val = lcb_cache[i].val;
+ return 0;
+ }
+ }
+
+ pr_warn("%s bad offset 0x%x\n", __func__, off);
+ return -1;
+}
+
+/*
* Read an LCB CSR. Access may not be in host control, so check.
* Return 0 on success, -EBUSY on failure.
*/
@@ -8354,9 +8429,13 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
/* if up, go through the 8051 for the value */
if (ppd->host_link_state & HLS_UP)
return read_lcb_via_8051(dd, addr, data);
- /* if going up or down, no access */
- if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
- return -EBUSY;
+ /* if going up or down, check the cache, otherwise, no access */
+ if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) {
+ if (read_lcb_cache(addr, data))
+ return -EBUSY;
+ return 0;
+ }
+
/* otherwise, host has access */
*data = read_csr(dd, addr);
return 0;
@@ -8371,7 +8450,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
int ret;
if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR ||
- (dd->dc8051_ver < dc8051_ver(0, 20))) {
+ (dd->dc8051_ver < dc8051_ver(0, 20, 0))) {
if (acquire_lcb_access(dd, 0) == 0) {
write_csr(dd, addr, data);
release_lcb_access(dd, 0);
@@ -8420,16 +8499,11 @@ static int do_8051_command(
{
u64 reg, completed;
int return_code;
- unsigned long flags;
unsigned long timeout;
hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
- /*
- * Alternative to holding the lock for a long time:
- * - keep busy wait - have other users bounce off
- */
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ mutex_lock(&dd->dc8051_lock);
/* We can't send any commands to the 8051 if it's in reset */
if (dd->dc_shutdown) {
@@ -8455,10 +8529,8 @@ static int do_8051_command(
return_code = -ENXIO;
goto fail;
}
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
- dc_shutdown(dd);
- dc_start(dd);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ _dc_shutdown(dd);
+ _dc_start(dd);
}
/*
@@ -8539,8 +8611,7 @@ static int do_8051_command(
write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
fail:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
-
+ mutex_unlock(&dd->dc8051_lock);
return return_code;
}
@@ -8677,13 +8748,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
& REMOTE_DEVICE_REV_MASK;
}
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch)
{
u32 frame;
read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
- *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
- *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
+ *ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) &
+ STS_FM_VERSION_MAJOR_MASK;
+ *ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) &
+ STS_FM_VERSION_MINOR_MASK;
+
+ read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame);
+ *ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) &
+ STS_FM_VERSION_PATCH_MASK;
}
static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
@@ -8891,8 +8969,6 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message)
*/
static int do_quick_linkup(struct hfi1_devdata *dd)
{
- u64 reg;
- unsigned long timeout;
int ret;
lcb_shutdown(dd, 0);
@@ -8915,19 +8991,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_RUN,
1ull << DC_LCB_CFG_RUN_EN_SHIFT);
- /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
- timeout = jiffies + msecs_to_jiffies(10);
- while (1) {
- reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
- if (reg)
- break;
- if (time_after(jiffies, timeout)) {
- dd_dev_err(dd,
- "timeout waiting for LINK_TRANSFER_ACTIVE\n");
- return -ETIMEDOUT;
- }
- udelay(2);
- }
+ ret = wait_link_transfer_active(dd, 10);
+ if (ret)
+ return ret;
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
@@ -9091,7 +9157,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret)
goto set_local_link_attributes_fail;
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* set the tx rate to the fastest enabled */
if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
ppd->local_tx_rate = 1;
@@ -9274,7 +9340,7 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable on fire\n",
+ dd_dev_info(dd, "%s: QSFP cable temperature too high\n",
__func__);
if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
@@ -9494,8 +9560,11 @@ static int test_qsfp_read(struct hfi1_pportdata *ppd)
int ret;
u8 status;
- /* report success if not a QSFP */
- if (ppd->port_type != PORT_TYPE_QSFP)
+ /*
+ * Report success if not a QSFP or, if it is a QSFP, but the cable is
+ * not present
+ */
+ if (ppd->port_type != PORT_TYPE_QSFP || !qsfp_mod_present(ppd))
return 0;
/* read byte 2, the status byte */
@@ -10082,6 +10151,64 @@ static void check_lni_states(struct hfi1_pportdata *ppd)
decode_state_complete(ppd, last_remote_state, "received");
}
+/* wait for wait_ms for LINK_TRANSFER_ACTIVE to go to 1 */
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms)
+{
+ u64 reg;
+ unsigned long timeout;
+
+ /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
+ timeout = jiffies + msecs_to_jiffies(wait_ms);
+ while (1) {
+ reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
+ if (reg)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(dd,
+ "timeout waiting for LINK_TRANSFER_ACTIVE\n");
+ return -ETIMEDOUT;
+ }
+ udelay(2);
+ }
+ return 0;
+}
+
+/* called when the logical link state is not down as it should be */
+static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+
+ /*
+ * Bring link up in LCB loopback
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
+ DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
+
+ write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
+ write_csr(dd, DC_LCB_CFG_REINIT_AS_SLAVE, 0);
+ write_csr(dd, DC_LCB_CFG_CNT_FOR_SKIP_STALL, 0x110);
+ write_csr(dd, DC_LCB_CFG_LOOPBACK, 0x2);
+
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
+ (void)read_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET);
+ udelay(3);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1);
+ write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
+
+ wait_link_transfer_active(dd, 100);
+
+ /*
+ * Bring the link down again.
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0);
+
+ /* call again to adjust ppd->statusp, if needed */
+ get_logical_state(ppd);
+}
+
/*
* Helper for set_link_state(). Do not call except from that routine.
* Expects ppd->hls_mutex to be held.
@@ -10098,6 +10225,8 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
int do_transition;
int do_wait;
+ update_lcb_cache(dd);
+
previous_state = ppd->host_link_state;
ppd->host_link_state = HLS_GOING_OFFLINE;
pstate = read_physical_state(dd);
@@ -10135,15 +10264,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
return ret;
}
- /* make sure the logical state is also down */
- wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
-
/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
+
+ /* make sure the logical state is also down */
+ ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
+ if (ret)
+ force_logical_link_state_down(ppd);
+
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
if (ppd->port_type == PORT_TYPE_QSFP &&
@@ -10380,11 +10512,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
goto unexpected;
}
- ppd->host_link_state = HLS_UP_INIT;
ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
if (ret) {
- /* logical state didn't change, stay at going_up */
- ppd->host_link_state = HLS_GOING_UP;
dd_dev_err(dd,
"%s: logical state did not change to INIT\n",
__func__);
@@ -10398,6 +10527,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
handle_linkup_change(dd, 1);
+ ppd->host_link_state = HLS_UP_INIT;
}
break;
case HLS_UP_ARMED:
@@ -11853,6 +11983,10 @@ static void free_cntrs(struct hfi1_devdata *dd)
dd->scntrs = NULL;
kfree(dd->cntrnames);
dd->cntrnames = NULL;
+ if (dd->update_cntr_wq) {
+ destroy_workqueue(dd->update_cntr_wq);
+ dd->update_cntr_wq = NULL;
+ }
}
static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
@@ -12008,7 +12142,7 @@ u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
}
-static void update_synth_timer(unsigned long opaque)
+static void do_update_synth_timer(struct work_struct *work)
{
u64 cur_tx;
u64 cur_rx;
@@ -12017,8 +12151,8 @@ static void update_synth_timer(unsigned long opaque)
int i, j, vl;
struct hfi1_pportdata *ppd;
struct cntr_entry *entry;
-
- struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+ struct hfi1_devdata *dd = container_of(work, struct hfi1_devdata,
+ update_cntr_work);
/*
* Rather than keep beating on the CSRs pick a minimal set that we can
@@ -12101,7 +12235,13 @@ static void update_synth_timer(unsigned long opaque)
} else {
hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
}
+}
+static void update_synth_timer(unsigned long opaque)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+
+ queue_work(dd->update_cntr_wq, &dd->update_cntr_work);
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
}
@@ -12337,6 +12477,13 @@ static int init_cntrs(struct hfi1_devdata *dd)
if (init_cpu_counters(dd))
goto bail;
+ dd->update_cntr_wq = alloc_ordered_workqueue("hfi1_update_cntr_%d",
+ WQ_MEM_RECLAIM, dd->unit);
+ if (!dd->update_cntr_wq)
+ goto bail;
+
+ INIT_WORK(&dd->update_cntr_work, do_update_synth_timer);
+
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
return 0;
bail:
@@ -12726,7 +12873,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
first_sdma = last_general;
last_sdma = first_sdma + dd->num_sdma;
first_rx = last_sdma;
- last_rx = first_rx + dd->n_krcv_queues;
+ last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
+
+ /* VNIC MSIx interrupts get mapped when VNIC contexts are created */
+ dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
/*
* Sanity check - the code expects all SDMA chip source
@@ -12740,7 +12890,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
const char *err_info;
irq_handler_t handler;
irq_handler_t thread = NULL;
- void *arg;
+ void *arg = NULL;
int idx;
struct hfi1_ctxtdata *rcd = NULL;
struct sdma_engine *sde = NULL;
@@ -12767,24 +12917,25 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
} else if (first_rx <= i && i < last_rx) {
idx = i - first_rx;
rcd = dd->rcd[idx];
- /* no interrupt if no rcd */
- if (!rcd)
- continue;
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- handler = receive_context_interrupt;
- thread = receive_context_thread;
- arg = rcd;
- snprintf(me->name, sizeof(me->name),
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
- err_info = "receive context";
- remap_intr(dd, IS_RCVAVAIL_START + idx, i);
- me->type = IRQ_RCVCTXT;
+ if (rcd) {
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
+ rcd->imask = ((u64)1) <<
+ ((IS_RCVAVAIL_START + idx) % 64);
+ handler = receive_context_interrupt;
+ thread = receive_context_thread;
+ arg = rcd;
+ snprintf(me->name, sizeof(me->name),
+ DRIVER_NAME "_%d kctxt%d",
+ dd->unit, idx);
+ err_info = "receive context";
+ remap_intr(dd, IS_RCVAVAIL_START + idx, i);
+ me->type = IRQ_RCVCTXT;
+ rcd->msix_intr = i;
+ }
} else {
/* not in our expected range - complain, then
* ignore it
@@ -12822,6 +12973,84 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
return ret;
}
+void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
+{
+ int i;
+
+ if (!dd->num_msix_entries) {
+ synchronize_irq(dd->pcidev->irq);
+ return;
+ }
+
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+ struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
+
+ synchronize_irq(me->msix.vector);
+ }
+}
+
+void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
+
+ if (!me->arg) /* => no irq, no affinity */
+ return;
+
+ hfi1_put_irq_affinity(dd, me);
+ free_irq(me->msix.vector, me->arg);
+
+ me->arg = NULL;
+}
+
+void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ struct hfi1_msix_entry *me;
+ int idx = rcd->ctxt;
+ void *arg = rcd;
+ int ret;
+
+ rcd->msix_intr = dd->vnic.msix_idx++;
+ me = &dd->msix_entries[rcd->msix_intr];
+
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
+ rcd->imask = ((u64)1) <<
+ ((IS_RCVAVAIL_START + idx) % 64);
+
+ snprintf(me->name, sizeof(me->name),
+ DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
+ me->name[sizeof(me->name) - 1] = 0;
+ me->type = IRQ_RCVCTXT;
+
+ remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
+
+ ret = request_threaded_irq(me->msix.vector, receive_context_interrupt,
+ receive_context_thread, 0, me->name, arg);
+ if (ret) {
+ dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n",
+ me->msix.vector, idx, ret);
+ return;
+ }
+ /*
+ * assign arg after request_irq call, so it will be
+ * cleaned up
+ */
+ me->arg = arg;
+
+ ret = hfi1_get_irq_affinity(dd, me);
+ if (ret) {
+ dd_dev_err(dd,
+ "unable to pin IRQ %d\n", ret);
+ free_irq(me->msix.vector, me->arg);
+ }
+}
+
/*
* Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0.
@@ -12853,7 +13082,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
* N interrupts - one per used SDMA engine
* M interrupt - one per kernel receive context
*/
- total = 1 + dd->num_sdma + dd->n_krcv_queues;
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
if (!entries) {
@@ -12918,7 +13147,8 @@ fail:
*
* num_rcv_contexts - number of contexts being used
* n_krcv_queues - number of kernel contexts
- * first_user_ctxt - first non-kernel context in array of contexts
+ * first_dyn_alloc_ctxt - first dynamically allocated context
+ * in array of contexts
* freectxts - number of free user contexts
* num_send_contexts - number of PIO send contexts being used
*/
@@ -12995,10 +13225,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
total_contexts = num_kernel_contexts + num_user_contexts;
}
- /* the first N are kernel contexts, the rest are user contexts */
+ /* Accommodate VNIC contexts */
+ if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts)
+ total_contexts += HFI1_NUM_VNIC_CTXT;
+
+ /* the first N are kernel contexts, the rest are user/vnic contexts */
dd->num_rcv_contexts = total_contexts;
dd->n_krcv_queues = num_kernel_contexts;
- dd->first_user_ctxt = num_kernel_contexts;
+ dd->first_dyn_alloc_ctxt = num_kernel_contexts;
dd->num_user_contexts = num_user_contexts;
dd->freectxts = num_user_contexts;
dd_dev_info(dd,
@@ -13454,11 +13688,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
- for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
- write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
- write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
- write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
- }
+ for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++)
+ clear_rsm_rule(dd, i);
for (i = 0; i < 32; i++)
write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
@@ -13817,6 +14048,16 @@ static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index,
(u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT);
}
+/*
+ * Clear a receive side mapping rule.
+ */
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+{
+ write_csr(dd, RCV_RSM_CFG + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_SELECT + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_MATCH + (8 * rule_index), 0);
+}
+
/* return the number of RSM map table entries that will be used for QOS */
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np)
@@ -13932,7 +14173,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
rrd.value2 = LRH_SC_VALUE;
/* add rule 0 */
- add_rsm_rule(dd, 0, &rrd);
+ add_rsm_rule(dd, RSM_INS_VERBS, &rrd);
/* mark RSM map entries as used */
rmt->used += rmt_entries;
@@ -13962,7 +14203,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
/*
* RSM will extract the destination context as an index into the
* map table. The destination contexts are a sequential block
- * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive).
+ * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive).
* Map entries are accessed as offset + extracted value. Adjust
* the added offset so this sequence can be placed anywhere in
* the table - as long as the entries themselves do not wrap.
@@ -13970,9 +14211,9 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
* start with that to allow for a "negative" offset.
*/
offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
- (int)dd->first_user_ctxt);
+ (int)dd->first_dyn_alloc_ctxt);
- for (i = dd->first_user_ctxt, idx = rmt->used;
+ for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used;
i < dd->num_rcv_contexts; i++, idx++) {
/* replace with identity mapping */
regoff = (idx % 8) * 8;
@@ -14006,11 +14247,84 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
rrd.value2 = 1;
/* add rule 1 */
- add_rsm_rule(dd, 1, &rrd);
+ add_rsm_rule(dd, RSM_INS_FECN, &rrd);
rmt->used += dd->num_user_contexts;
}
+/* Initialize RSM for VNIC */
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+{
+ u8 i, j;
+ u8 ctx_id = 0;
+ u64 reg;
+ u32 regoff;
+ struct rsm_rule_data rrd;
+
+ if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) {
+ dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n",
+ dd->vnic.rmt_start);
+ return;
+ }
+
+ dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n",
+ dd->vnic.rmt_start,
+ dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES);
+
+ /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
+ regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8;
+ reg = read_csr(dd, regoff);
+ for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) {
+ /* Update map register with vnic context */
+ j = (dd->vnic.rmt_start + i) % 8;
+ reg &= ~(0xffllu << (j * 8));
+ reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8);
+ /* Wrap up vnic ctx index */
+ ctx_id %= dd->vnic.num_ctxt;
+ /* Write back map register */
+ if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) {
+ dev_dbg(&(dd)->pcidev->dev,
+ "Vnic rsm map reg[%d] =0x%llx\n",
+ regoff - RCV_RSM_MAP_TABLE, reg);
+
+ write_csr(dd, regoff, reg);
+ regoff += 8;
+ if (i < (NUM_VNIC_MAP_ENTRIES - 1))
+ reg = read_csr(dd, regoff);
+ }
+ }
+
+ /* Add rule for vnic */
+ rrd.offset = dd->vnic.rmt_start;
+ rrd.pkt_type = 4;
+ /* Match 16B packets */
+ rrd.field1_off = L2_TYPE_MATCH_OFFSET;
+ rrd.mask1 = L2_TYPE_MASK;
+ rrd.value1 = L2_16B_VALUE;
+ /* Match ETH L4 packets */
+ rrd.field2_off = L4_TYPE_MATCH_OFFSET;
+ rrd.mask2 = L4_16B_TYPE_MASK;
+ rrd.value2 = L4_16B_ETH_VALUE;
+ /* Calc context from veswid and entropy */
+ rrd.index1_off = L4_16B_HDR_VESWID_OFFSET;
+ rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES);
+ rrd.index2_off = L2_16B_ENTROPY_OFFSET;
+ rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES);
+ add_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+
+ /* Enable RSM if not already enabled */
+ add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+}
+
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
+{
+ clear_rsm_rule(dd, RSM_INS_VNIC);
+
+ /* Disable RSM if used only by vnic */
+ if (dd->vnic.rmt_start == 0)
+ clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+}
+
static void init_rxe(struct hfi1_devdata *dd)
{
struct rsm_map_table *rmt;
@@ -14023,6 +14337,8 @@ static void init_rxe(struct hfi1_devdata *dd)
init_qos(dd, rmt);
init_user_fecn_handling(dd, rmt);
complete_rsm_map_table(dd, rmt);
+ /* record number of used rsm map entries for vnic */
+ dd->vnic.rmt_start = rmt->used;
kfree(rmt);
/*
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 043fd21dc5f3..b9dbf16d7703 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,7 +1,7 @@
#ifndef _CHIP_H
#define _CHIP_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -394,7 +394,8 @@
#define LAST_REMOTE_STATE_COMPLETE 0x13
#define LINK_QUALITY_INFO 0x14
#define REMOTE_DEVICE_ID 0x15
-#define LINK_DOWN_REASON 0x16
+#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */
+#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */
/* 8051 lane specific register field IDs */
#define TX_EQ_SETTINGS 0x00
@@ -524,10 +525,12 @@ enum {
#define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B)
/* misc status version fields */
-#define STS_FM_VERSION_A_SHIFT 16
-#define STS_FM_VERSION_A_MASK 0xff
-#define STS_FM_VERSION_B_SHIFT 24
-#define STS_FM_VERSION_B_MASK 0xff
+#define STS_FM_VERSION_MINOR_SHIFT 16
+#define STS_FM_VERSION_MINOR_MASK 0xff
+#define STS_FM_VERSION_MAJOR_SHIFT 24
+#define STS_FM_VERSION_MAJOR_MASK 0xff
+#define STS_FM_VERSION_PATCH_SHIFT 24
+#define STS_FM_VERSION_PATCH_MASK 0xff
/* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */
#define LCB_CRC_16B 0x0 /* 16b CRC */
@@ -698,7 +701,8 @@ void fabric_serdes_reset(struct hfi1_devdata *dd);
int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
/* chip.c */
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b);
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch);
void read_guid(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
@@ -1358,6 +1362,8 @@ int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt);
int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey);
int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt);
void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
/*
* Interrupt source table.
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 1b783bbee4bb..995d62c7f9a7 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -331,12 +331,15 @@ struct diag_pkt {
#define FULL_MGMT_P_KEY 0xFFFF
#define DEFAULT_P_KEY LIM_MGMT_P_KEY
-#define HFI1_FECN_SHIFT 31
-#define HFI1_FECN_MASK 1
-#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
-#define HFI1_BECN_SHIFT 30
-#define HFI1_BECN_MASK 1
-#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
+
+/**
+ * 0xF8 - 4 bits of multicast range and 1 bit for collective range
+ * Example: For 24 bit LID space,
+ * Multicast range: 0xF00000 to 0xF7FFFF
+ * Collective range: 0xF80000 to 0xFFFFFE
+ */
+#define HFI1_MCAST_NR 0x4 /* Number of top bits set */
+#define HFI1_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */
#define HFI1_PSM_IOC_BASE_SEQ 0x0
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 7fe9dd885746..e9fa3c293e42 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -1,6 +1,6 @@
#ifdef CONFIG_DEBUG_FS
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -51,8 +51,12 @@
#include <linux/export.h>
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ratelimit.h>
+#include <linux/fault-inject.h>
#include "hfi.h"
+#include "trace.h"
#include "debugfs.h"
#include "device.h"
#include "qp.h"
@@ -170,7 +174,7 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v)
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
- for (j = 0; j < dd->first_user_ctxt; j++) {
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
if (!dd->rcd[j])
continue;
n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
@@ -196,7 +200,7 @@ static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
if (!*pos)
return SEQ_START_TOKEN;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -210,7 +214,7 @@ static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
return pos;
++*pos;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -1063,6 +1067,222 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
DEBUGFS_FILE_OPS(sdma_cpu_list);
+#ifdef CONFIG_FAULT_INJECTION
+static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ ++*pos;
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void _fault_stats_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _fault_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos, j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
+ if (!dd->rcd[j])
+ continue;
+ n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
+ n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
+ }
+ if (!n_packets && !n_bytes)
+ return SEQ_SKIP;
+ if (!ibd->fault_opcode->n_rxfaults[i] &&
+ !ibd->fault_opcode->n_txfaults[i])
+ return SEQ_SKIP;
+ seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
+ (unsigned long long)n_packets,
+ (unsigned long long)n_bytes,
+ (unsigned long long)ibd->fault_opcode->n_rxfaults[i],
+ (unsigned long long)ibd->fault_opcode->n_txfaults[i]);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(fault_stats);
+DEBUGFS_SEQ_FILE_OPEN(fault_stats);
+DEBUGFS_FILE_OPS(fault_stats);
+
+static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+ debugfs_remove_recursive(ibd->fault_opcode->dir);
+ kfree(ibd->fault_opcode);
+ ibd->fault_opcode = NULL;
+}
+
+static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
+ if (!ibd->fault_opcode)
+ return -ENOMEM;
+
+ ibd->fault_opcode->attr.interval = 1;
+ ibd->fault_opcode->attr.require_end = ULONG_MAX;
+ ibd->fault_opcode->attr.stacktrace_depth = 32;
+ ibd->fault_opcode->attr.dname = NULL;
+ ibd->fault_opcode->attr.verbose = 0;
+ ibd->fault_opcode->fault_by_opcode = false;
+ ibd->fault_opcode->opcode = 0;
+ ibd->fault_opcode->mask = 0xff;
+
+ ibd->fault_opcode->dir =
+ fault_create_debugfs_attr("fault_opcode",
+ parent,
+ &ibd->fault_opcode->attr);
+ if (IS_ERR(ibd->fault_opcode->dir)) {
+ kfree(ibd->fault_opcode);
+ return -ENOENT;
+ }
+
+ DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
+ if (!debugfs_create_bool("fault_by_opcode", 0600,
+ ibd->fault_opcode->dir,
+ &ibd->fault_opcode->fault_by_opcode))
+ goto fail;
+ if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
+ &ibd->fault_opcode->opcode))
+ goto fail;
+ if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
+ &ibd->fault_opcode->mask))
+ goto fail;
+
+ return 0;
+fail:
+ fault_exit_opcode_debugfs(ibd);
+ return -ENOMEM;
+}
+
+static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+ debugfs_remove_recursive(ibd->fault_packet->dir);
+ kfree(ibd->fault_packet);
+ ibd->fault_packet = NULL;
+}
+
+static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
+ if (!ibd->fault_packet)
+ return -ENOMEM;
+
+ ibd->fault_packet->attr.interval = 1;
+ ibd->fault_packet->attr.require_end = ULONG_MAX;
+ ibd->fault_packet->attr.stacktrace_depth = 32;
+ ibd->fault_packet->attr.dname = NULL;
+ ibd->fault_packet->attr.verbose = 0;
+ ibd->fault_packet->fault_by_packet = false;
+
+ ibd->fault_packet->dir =
+ fault_create_debugfs_attr("fault_packet",
+ parent,
+ &ibd->fault_opcode->attr);
+ if (IS_ERR(ibd->fault_packet->dir)) {
+ kfree(ibd->fault_packet);
+ return -ENOENT;
+ }
+
+ if (!debugfs_create_bool("fault_by_packet", 0600,
+ ibd->fault_packet->dir,
+ &ibd->fault_packet->fault_by_packet))
+ goto fail;
+ if (!debugfs_create_u64("fault_stats", 0400,
+ ibd->fault_packet->dir,
+ &ibd->fault_packet->n_faults))
+ goto fail;
+
+ return 0;
+fail:
+ fault_exit_packet_debugfs(ibd);
+ return -ENOMEM;
+}
+
+static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+ fault_exit_opcode_debugfs(ibd);
+ fault_exit_packet_debugfs(ibd);
+}
+
+static int fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+ int ret = 0;
+
+ ret = fault_init_opcode_debugfs(ibd);
+ if (ret)
+ return ret;
+
+ ret = fault_init_packet_debugfs(ibd);
+ if (ret)
+ fault_exit_opcode_debugfs(ibd);
+
+ return ret;
+}
+
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return ibd->fault_suppress_err;
+}
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
+{
+ bool ret = false;
+ struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
+
+ if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
+ return false;
+ if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
+ return false;
+ ret = should_fail(&ibd->fault_opcode->attr, 1);
+ if (ret) {
+ trace_hfi1_fault_opcode(qp, opcode);
+ if (rx)
+ ibd->fault_opcode->n_rxfaults[opcode]++;
+ else
+ ibd->fault_opcode->n_txfaults[opcode]++;
+ }
+ return ret;
+}
+
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+ struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
+ struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
+ bool ret = false;
+
+ if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
+ return false;
+
+ ret = should_fail(&ibd->fault_packet->attr, 1);
+ if (ret) {
+ ++ibd->fault_packet->n_faults;
+ trace_hfi1_fault_packet(packet);
+ }
+ return ret;
+}
+#endif
+
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@@ -1112,12 +1332,22 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
!port_cntr_ops[i].ops.write ?
S_IRUGO : S_IRUGO | S_IWUSR);
}
+
+#ifdef CONFIG_FAULT_INJECTION
+ debugfs_create_bool("fault_suppress_err", 0600,
+ ibd->hfi1_ibdev_dbg,
+ &ibd->fault_suppress_err);
+ fault_init_debugfs(ibd);
+#endif
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
if (!hfi1_dbg_root)
goto out;
+#ifdef CONFIG_FAULT_INJECTION
+ fault_exit_debugfs(ibd);
+#endif
debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index b6fb6814f1b8..38c38a98156d 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -53,23 +53,79 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
void hfi1_dbg_init(void);
void hfi1_dbg_exit(void);
+
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h>
+struct fault_opcode {
+ struct fault_attr attr;
+ struct dentry *dir;
+ bool fault_by_opcode;
+ u64 n_rxfaults[256];
+ u64 n_txfaults[256];
+ u8 opcode;
+ u8 mask;
+};
+
+struct fault_packet {
+ struct fault_attr attr;
+ struct dentry *dir;
+ bool fault_by_packet;
+ u64 n_faults;
+};
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
+#else
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+ u32 opcode, bool rx)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
+#endif
+
#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
}
-void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+{
+}
+
+static inline void hfi1_dbg_init(void)
{
}
-void hfi1_dbg_init(void)
+static inline void hfi1_dbg_exit(void)
{
}
-void hfi1_dbg_exit(void)
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
+ return false;
}
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+ u32 opcode, bool rx)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
#endif
#endif /* _HFI1_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 3881c951f6af..527895487175 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -59,6 +59,8 @@
#include "trace.h"
#include "qp.h"
#include "sdma.h"
+#include "debugfs.h"
+#include "vnic.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -283,7 +285,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
{
struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
- int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
+ int lnh = ib_get_lnh(rhdr);
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -295,7 +297,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
/* For TIDERR and RC QPs preemptively schedule a NAK */
struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = be16_to_cpu(rhdr->lrh[1]);
+ u16 lid = ib_get_dlid(rhdr);
u32 qp_num;
u32 rcv_flags = 0;
@@ -396,7 +398,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
u16 rlid;
u8 svc_type, sl, sc5;
- sc5 = hdr2sc(rhdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
@@ -414,7 +416,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
svc_type = IB_CC_SVCTYPE_UD;
break;
case IB_QPT_UC:
- rlid = be16_to_cpu(rhdr->lrh[3]);
+ rlid = ib_get_slid(rhdr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
@@ -460,7 +462,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = NULL;
u32 rqpn = 0, bth1;
- u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
+ u16 rlid, dlid = ib_get_dlid(hdr);
u8 sc, svc_type;
bool is_mcast = false;
@@ -471,19 +473,19 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
- rlid = be16_to_cpu(hdr->lrh[3]);
+ rlid = ib_get_slid(hdr);
rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
svc_type = IB_CC_SVCTYPE_UD;
is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(dlid != be16_to_cpu(IB_LID_PERMISSIVE));
break;
case IB_QPT_UC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
case IB_QPT_RC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_RC;
break;
@@ -491,16 +493,16 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
return;
}
- sc = hdr2sc(hdr, pkt->rhf);
+ sc = hfi1_9B_get_sc5(hdr, pkt->rhf);
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
+ if (do_cnp && (bth1 & IB_FECN_SMASK)) {
u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
}
- if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) {
+ if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 lqpn = bth1 & RVT_QPN_MASK;
u8 sl = ibp->sc_to_sl[sc];
@@ -621,8 +623,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
hdr = packet->hdr;
-
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
@@ -634,7 +635,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
}
bth1 = be32_to_cpu(packet->ohdr->bth[1]);
- is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
+ is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK));
if (!is_ecn)
goto next;
@@ -652,7 +653,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
rcu_read_unlock();
/* turn off BECN, FECN */
- bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
+ bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK);
packet->ohdr->bth[1] = cpu_to_be32(bth1);
next:
update_ps_mdata(&mdata, rcd);
@@ -872,20 +873,42 @@ bail:
return last;
}
-static inline void set_all_nodma_rtail(struct hfi1_devdata *dd)
+static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt)
{
int i;
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+ /*
+ * For dynamically allocated kernel contexts (like vnic) switch
+ * interrupt handler only for that context. Otherwise, switch
+ * interrupt handler for all statically allocated kernel contexts.
+ */
+ if (ctxt >= dd->first_dyn_alloc_ctxt) {
+ dd->rcd[ctxt]->do_interrupt =
+ &handle_receive_interrupt_nodma_rtail;
+ return;
+ }
+
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
dd->rcd[i]->do_interrupt =
&handle_receive_interrupt_nodma_rtail;
}
-static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
+static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt)
{
int i;
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+ /*
+ * For dynamically allocated kernel contexts (like vnic) switch
+ * interrupt handler only for that context. Otherwise, switch
+ * interrupt handler for all statically allocated kernel contexts.
+ */
+ if (ctxt >= dd->first_dyn_alloc_ctxt) {
+ dd->rcd[ctxt]->do_interrupt =
+ &handle_receive_interrupt_dma_rtail;
+ return;
+ }
+
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
dd->rcd[i]->do_interrupt =
&handle_receive_interrupt_dma_rtail;
}
@@ -895,8 +918,13 @@ void set_all_slowpath(struct hfi1_devdata *dd)
int i;
/* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
- dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *rcd = dd->rcd[i];
+
+ if ((i < dd->first_dyn_alloc_ctxt) ||
+ (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL)))
+ rcd->do_interrupt = &handle_receive_interrupt;
+ }
}
static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
@@ -908,7 +936,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
packet->rhf_addr);
u8 etype = rhf_rcv_type(packet->rhf);
- if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
+ if (etype == RHF_RCV_TYPE_IB &&
+ hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) {
int hwstate = read_logical_state(dd);
if (hwstate != LSTATE_ACTIVE) {
@@ -1006,7 +1035,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
last = RCV_PKT_DONE;
if (needset) {
dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
- set_all_nodma_rtail(dd);
+ set_nodma_rtail(dd, rcd->ctxt);
needset = 0;
}
} else {
@@ -1028,7 +1057,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
if (needset) {
dd_dev_info(dd,
"Switching to DMA_RTAIL\n");
- set_all_dma_rtail(dd);
+ set_dma_rtail(dd, rcd->ctxt);
needset = 0;
}
}
@@ -1077,10 +1106,10 @@ void receive_interrupt_work(struct work_struct *work)
set_link_state(ppd, HLS_UP_ACTIVE);
/*
- * Interrupt all kernel contexts that could have had an
- * interrupt during auto activation.
+ * Interrupt all statically allocated kernel contexts that could
+ * have had an interrupt during auto activation.
*/
- for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
+ for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++)
force_recv_intr(dd->rcd[i]);
}
@@ -1294,7 +1323,8 @@ int hfi1_reset_device(int unit)
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (dd->rcd)
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
+ for (i = dd->first_dyn_alloc_ctxt;
+ i < dd->num_rcv_contexts; i++) {
if (!dd->rcd[i] || !dd->rcd[i]->cnt)
continue;
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
@@ -1354,6 +1384,9 @@ void handle_eflags(struct hfi1_packet *packet)
*/
int process_receive_ib(struct hfi1_packet *packet)
{
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
+
trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
packet->rcd->ctxt,
rhf_err_flags(packet->rhf),
@@ -1363,6 +1396,11 @@ int process_receive_ib(struct hfi1_packet *packet)
packet->updegr,
rhf_egr_index(packet->rhf));
+ if (unlikely(
+ (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ (packet->rhf & RHF_DC_ERR))))
+ return RHF_RCV_CONTINUE;
+
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
return RHF_RCV_CONTINUE;
@@ -1372,15 +1410,31 @@ int process_receive_ib(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
}
+static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
+{
+ /* Packet received in VNIC context via RSM */
+ if (packet->rcd->is_vnic)
+ return true;
+
+ if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) &&
+ (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR))
+ return true;
+
+ return false;
+}
+
int process_receive_bypass(struct hfi1_packet *packet)
{
struct hfi1_devdata *dd = packet->rcd->dd;
- if (unlikely(rhf_err_flags(packet->rhf)))
+ if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
+ } else if (hfi1_is_vnic_packet(packet)) {
+ hfi1_vnic_bypass_rcv(packet);
+ return RHF_RCV_CONTINUE;
+ }
- dd_dev_err(dd,
- "Bypass packets are not supported in normal operation. Dropping\n");
+ dd_dev_err(dd, "Unsupported bypass packet. Dropping\n");
incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
u64 *flits = packet->ebuf;
@@ -1398,6 +1452,12 @@ int process_receive_bypass(struct hfi1_packet *packet)
int process_receive_error(struct hfi1_packet *packet)
{
+ /* KHdrHCRCErr -- KDETH packet with a bad HCRC */
+ if (unlikely(
+ hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ rhf_rcv_type_err(packet->rhf) == 3))
+ return RHF_RCV_CONTINUE;
+
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
@@ -1409,6 +1469,8 @@ int process_receive_error(struct hfi1_packet *packet)
int kdeth_process_expected(struct hfi1_packet *packet)
{
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
@@ -1421,6 +1483,8 @@ int kdeth_process_eager(struct hfi1_packet *packet)
{
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index f78c739b330a..3d9bce4bfcc7 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -586,8 +586,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* knows where it's own bitmap is within the page.
*/
memaddr = (unsigned long)(dd->events +
- ((uctxt->ctxt - dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
+ ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
+ HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
memlen = PAGE_SIZE;
/*
* v3.7 removes VM_RESERVED but the effect is kept by
@@ -597,6 +597,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vmf = 1;
break;
case STATUS:
+ if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) {
+ ret = -EPERM;
+ goto done;
+ }
memaddr = kvirt_to_phys((void *)dd->status);
memlen = PAGE_SIZE;
flags |= VM_IO | VM_DONTEXPAND;
@@ -756,7 +760,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
* Clear any left over, unhandled events so the next process that
* gets this context doesn't get confused.
*/
- ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
+ ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fdata->subctxt;
*ev = 0;
@@ -909,12 +913,18 @@ static int find_shared_ctxt(struct file *fp,
if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase))
continue;
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
+ for (i = dd->first_dyn_alloc_ctxt;
+ i < dd->num_rcv_contexts; i++) {
struct hfi1_ctxtdata *uctxt = dd->rcd[i];
/* Skip ctxts which are not yet open */
if (!uctxt || !uctxt->cnt)
continue;
+
+ /* Skip dynamically allocted kernel contexts */
+ if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
+ continue;
+
/* Skip ctxt if it doesn't match the requested one */
if (memcmp(uctxt->uuid, uinfo->uuid,
sizeof(uctxt->uuid)) ||
@@ -960,7 +970,8 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
return -EIO;
}
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++)
+ for (ctxt = dd->first_dyn_alloc_ctxt;
+ ctxt < dd->num_rcv_contexts; ctxt++)
if (!dd->rcd[ctxt])
break;
@@ -1306,7 +1317,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
*/
binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt,
fd->subctxt, 0);
- offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) *
+ offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fd->subctxt) *
sizeof(*dd->events));
binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt,
@@ -1400,12 +1411,12 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
}
spin_lock_irqsave(&dd->uctxt_lock, flags);
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts;
+ for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts;
ctxt++) {
uctxt = dd->rcd[ctxt];
if (uctxt) {
unsigned long *evs = dd->events +
- (uctxt->ctxt - dd->first_user_ctxt) *
+ (uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS;
int i;
/*
@@ -1477,7 +1488,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
if (!dd->events)
return 0;
- evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
+ evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + subctxt;
for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) {
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 0dd50cdb039a..4042c11b2742 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -1004,7 +1004,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
{
u64 reg;
int ret;
- u8 ver_a, ver_b;
+ u8 ver_major;
+ u8 ver_minor;
+ u8 ver_patch;
/*
* DC Reset sequence
@@ -1073,10 +1075,10 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
return -ETIMEDOUT;
}
- read_misc_status(dd, &ver_a, &ver_b);
- dd_dev_info(dd, "8051 firmware version %d.%d\n",
- (int)ver_b, (int)ver_a);
- dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
+ read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
+ dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
+ (int)ver_major, (int)ver_minor, (int)ver_patch);
+ dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 0808e3c3ba39..f06674317abf 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_KERNEL_H
#define _HFI1_KERNEL_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,6 +54,7 @@
#include <linux/list.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <linux/idr.h>
#include <linux/io.h>
#include <linux/fs.h>
#include <linux/completion.h>
@@ -66,6 +67,7 @@
#include <linux/i2c-algo-bit.h>
#include <rdma/ib_hdrs.h>
#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
@@ -278,6 +280,8 @@ struct hfi1_ctxtdata {
struct hfi1_devdata *dd;
/* so functions that need physical port can get it easily */
struct hfi1_pportdata *ppd;
+ /* associated msix interrupt */
+ u32 msix_intr;
/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
void *subctxt_uregbase;
/* An array of pages for the eager receive buffers * N */
@@ -337,6 +341,12 @@ struct hfi1_ctxtdata {
* packets with the wrong interrupt handler.
*/
int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+
+ /* Indicates that this is vnic context */
+ bool is_vnic;
+
+ /* vnic queue index this context is mapped to */
+ u8 vnic_q_idx;
};
/*
@@ -474,7 +484,7 @@ struct rvt_sge_state;
#define HFI1_PART_ENFORCE_OUT 0x2
/* how often we check for synthetic counter wrap around */
-#define SYNTH_CNT_TIME 2
+#define SYNTH_CNT_TIME 3
/* Counter flags */
#define CNTR_NORMAL 0x0 /* Normal counters, just read register */
@@ -808,6 +818,32 @@ struct hfi1_asic_data {
struct hfi1_i2c_bus *i2c_bus1;
};
+/* sizes for both the QP and RSM map tables */
+#define NUM_MAP_ENTRIES 256
+#define NUM_MAP_REGS 32
+
+/*
+ * Number of VNIC contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_NUM_VNIC_CTXT 8
+
+/* Number of VNIC RSM entries */
+#define NUM_VNIC_MAP_ENTRIES 8
+
+/* Virtual NIC information */
+struct hfi1_vnic_data {
+ struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
+ struct kmem_cache *txreq_cache;
+ u8 num_vports;
+ struct idr vesw_idr;
+ u8 rmt_start;
+ u8 num_ctxt;
+ u32 msix_idx;
+};
+
+struct hfi1_vnic_vport_info;
+
/* device data struct now contains only "general per-device" info.
* fields related to a physical IB port are in a hfi1_pportdata struct.
*/
@@ -926,8 +962,9 @@ struct hfi1_devdata {
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
/* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */
spinlock_t uctxt_lock; /* rcd and user context changes */
- /* exclusive access to 8051 */
- spinlock_t dc8051_lock;
+ struct mutex dc8051_lock; /* exclusive access to 8051 */
+ struct workqueue_struct *update_cntr_wq;
+ struct work_struct update_cntr_work;
/* exclusive access to 8051 memory */
spinlock_t dc8051_memlock;
int dc8051_timed_out; /* remember if the 8051 timed out */
@@ -1020,7 +1057,7 @@ struct hfi1_devdata {
u8 qos_shift;
u16 irev; /* implementation revision */
- u16 dc8051_ver; /* 8051 firmware version */
+ u32 dc8051_ver; /* 8051 firmware version */
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
@@ -1031,6 +1068,7 @@ struct hfi1_devdata {
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
u32 num_msix_entries;
+ u32 first_dyn_msix_idx;
/* INTx information */
u32 requested_intx_irq; /* did we request one? */
@@ -1115,6 +1153,9 @@ struct hfi1_devdata {
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+ int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
/* hfi1_pportdata, points to array of (physical) port-specific
* data structs, indexed by pidx (0..n-1)
*/
@@ -1126,8 +1167,8 @@ struct hfi1_devdata {
u16 flags;
/* Number of physical ports available */
u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
+ /* Lowest context number which can be used by user processes or VNIC */
+ u8 first_dyn_alloc_ctxt;
/* adding a new field here would make it part of this cacheline */
/* seqlock for sc2vl */
@@ -1167,15 +1208,24 @@ struct hfi1_devdata {
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
- struct rhashtable sdma_rht;
+ struct rhashtable *sdma_rht;
struct kobject kobj;
+
+ /* vnic data */
+ struct hfi1_vnic_data vnic;
};
+static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
+{
+ return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES;
+}
+
/* 8051 firmware version helper */
-#define dc8051_ver(a, b) ((a) << 8 | (b))
-#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
-#define dc8051_ver_min(a) (a & 0x00ff)
+#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
+#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16)
+#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8)
+#define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */
#define PT_EXPECTED 0
@@ -1235,6 +1285,9 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
void set_all_slowpath(struct hfi1_devdata *dd);
+void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
+void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
+void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
extern const struct pci_device_id hfi1_pci_tbl[];
@@ -1254,16 +1307,24 @@ int hfi1_reset_device(int);
/* return the driver's idea of the logical OPA port state */
static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
{
- return ppd->lstate; /* use the cached value */
+ /*
+ * The driver does some processing from the time the logical
+ * link state is at INIT to the time the SM can be notified
+ * as such. Return IB_PORT_DOWN until the software state
+ * is ready.
+ */
+ if (ppd->lstate == IB_PORT_INIT && !(ppd->host_link_state & HLS_UP))
+ return IB_PORT_DOWN;
+ else
+ return ppd->lstate;
}
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
-static inline int hdr2sc(struct ib_header *hdr, u64 rhf)
+static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf)
{
- return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
- ((!!(rhf_dc_info(rhf))) << 4);
+ return ib_get_sc(hdr) | ((!!(rhf_dc_info(rhf))) << 4);
}
#define HFI1_JKEY_WIDTH 16
@@ -1597,9 +1658,9 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
u32 bth1;
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
+ if (unlikely(bth1 & (IB_BECN_SMASK | IB_FECN_SMASK))) {
hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
- return bth1 & HFI1_FECN_SMASK;
+ return !!(bth1 & IB_FECN_SMASK);
}
return false;
}
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index f40864e9a3b2..4d6b9f82efa3 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -65,6 +65,7 @@
#include "verbs.h"
#include "aspm.h"
#include "affinity.h"
+#include "vnic.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -139,7 +140,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
goto nomem;
/* create one or more kernel contexts */
- for (i = 0; i < dd->first_user_ctxt; ++i) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
struct hfi1_pportdata *ppd;
struct hfi1_ctxtdata *rcd;
@@ -214,9 +215,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
u32 base;
if (dd->rcv_entries.nctxt_extra >
- dd->num_rcv_contexts - dd->first_user_ctxt)
+ dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)
kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
- (dd->num_rcv_contexts - dd->first_user_ctxt));
+ (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt));
rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
if (rcd) {
u32 rcvtids, max_entries;
@@ -238,27 +239,29 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
* Calculate the context's RcvArray entry starting point.
* We do this here because we have to take into account all
* the RcvArray entries that previous context would have
- * taken and we have to account for any extra groups
- * assigned to the kernel or user contexts.
+ * taken and we have to account for any extra groups assigned
+ * to the static (kernel) or dynamic (vnic/user) contexts.
*/
- if (ctxt < dd->first_user_ctxt) {
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
if (ctxt < kctxt_ngroups) {
base = ctxt * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base = kctxt_ngroups +
(ctxt * dd->rcv_entries.ngroups);
+ }
} else {
- u16 ct = ctxt - dd->first_user_ctxt;
+ u16 ct = ctxt - dd->first_dyn_alloc_ctxt;
base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) +
kctxt_ngroups);
if (ct < dd->rcv_entries.nctxt_extra) {
base += ct * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base += dd->rcv_entries.nctxt_extra +
(ct * dd->rcv_entries.ngroups);
+ }
}
rcd->eager_base = base * dd->rcv_entries.group_size;
@@ -322,7 +325,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
}
rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
- if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
+ /* Applicable only for statically created kernel contexts */
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
GFP_KERNEL, numa);
if (!rcd->opstats)
@@ -482,6 +486,9 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
default_pkey_idx = 1;
ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
+ ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
+ ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
+
if (loopback) {
hfi1_early_err(&pdev->dev,
"Faking data partition 0x8001 in idx %u\n",
@@ -585,7 +592,7 @@ static void enable_chip(struct hfi1_devdata *dd)
* Enable kernel ctxts' receive and receive interrupt.
* Other ctxts done as user opens and initializes them.
*/
- for (i = 0; i < dd->first_user_ctxt; ++i) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
@@ -679,6 +686,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
dd->process_pio_send = hfi1_verbs_send_pio;
dd->process_dma_send = hfi1_verbs_send_dma;
dd->pio_inline_send = pio_copy;
+ dd->process_vnic_dma_send = hfi1_vnic_send_dma;
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
@@ -714,7 +722,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
}
/* dd->rcd can be NULL if early initialization failed */
- for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) {
+ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
/*
* Set up the (kernel) rcvhdr queue and egr TIDs. If doing
* re-init, the simplest way to handle this is to free
@@ -1078,11 +1086,11 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->uctxt_lock);
spin_lock_init(&dd->hfi1_diag_trans_lock);
spin_lock_init(&dd->sc_init_lock);
- spin_lock_init(&dd->dc8051_lock);
spin_lock_init(&dd->dc8051_memlock);
seqlock_init(&dd->sc2vl_lock);
spin_lock_init(&dd->sde_map_lock);
spin_lock_init(&dd->pio_map_lock);
+ mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue);
dd->int_counter = alloc_percpu(u64);
@@ -1425,6 +1433,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* First, lock the non-writable module parameters */
HFI1_CAP_LOCK();
+ /* Validate dev ids */
+ if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
+ ent->device == PCI_DEVICE_ID_INTEL1)) {
+ hfi1_early_err(&pdev->dev,
+ "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
+ ret = -ENODEV;
+ goto bail;
+ }
+
/* Validate some global module parameters */
ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
if (ret)
@@ -1470,15 +1488,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ret)
goto bail;
- if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
- ent->device == PCI_DEVICE_ID_INTEL1)) {
- hfi1_early_err(&pdev->dev,
- "Failing on unknown Intel deviceid 0x%x\n",
- ent->device);
- ret = -ENODEV;
- goto clean_bail;
- }
-
/*
* Do device-specific initialization, function table setup, dd
* allocation, etc.
@@ -1497,6 +1506,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* do the generic initialization */
initfail = hfi1_init(dd, 0);
+ /* setup vnic */
+ hfi1_vnic_setup(dd);
+
ret = hfi1_register_ib_device(dd);
/*
@@ -1530,6 +1542,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
hfi1_device_remove(dd);
if (!ret)
hfi1_unregister_ib_device(dd);
+ hfi1_vnic_cleanup(dd);
postinit_cleanup(dd);
if (initfail)
ret = initfail;
@@ -1574,6 +1587,9 @@ static void remove_one(struct pci_dev *pdev)
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
+ /* cleanup vnic */
+ hfi1_vnic_cleanup(dd);
+
/*
* Disable the IB link, disable interrupts on the device,
* clear dma engines, etc.
@@ -1613,8 +1629,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
sizeof(u32));
- gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
- GFP_USER : GFP_KERNEL;
+ if ((rcd->ctxt < dd->first_dyn_alloc_ctxt) ||
+ (rcd->sc && (rcd->sc->type == SC_KERNEL)))
+ gfp_flags = GFP_KERNEL;
+ else
+ gfp_flags = GFP_USER;
rcd->rcvhdrq = dma_zalloc_coherent(
&dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
gfp_flags | __GFP_COMP);
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 65348d16ab2f..232014d46f79 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -131,19 +131,24 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
set_up_vl15(dd, dd->vau, dd->vl15_init);
assign_remote_cm_au_table(dd, dd->vcu);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_port_number =
- read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n",
- ppd->neighbor_guid,
- ppd->neighbor_type);
}
+ ppd->neighbor_guid =
+ read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
+ ppd->neighbor_type =
+ read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
+ DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
+ ppd->neighbor_port_number =
+ read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
+ DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
+ ppd->neighbor_fm_security =
+ read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
+ DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
+ dd_dev_info(dd,
+ "Neighbor Guid %llx, Type %d, Port Num %d\n",
+ ppd->neighbor_guid, ppd->neighbor_type,
+ ppd->neighbor_port_number);
+
/* physical link went up */
ppd->linkup = 1;
ppd->offline_disabled_reason =
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 09cda3c35e82..5977673a52d4 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -53,6 +53,7 @@
#include "mad.h"
#include "trace.h"
#include "qp.h"
+#include "vnic.h"
/* the reset value from the FM is supposed to be 0xffff, handle both */
#define OPA_LINK_WIDTH_RESET_OLD 0x0fff
@@ -650,9 +651,11 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
pi->port_packet_format.supported =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
pi->port_packet_format.enabled =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
/* flit_control.interleave is (OPA V1, version .76):
* bits use
@@ -701,7 +704,13 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
pi->buffer_units = cpu_to_be32(buffer_units);
- pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
+ pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported |
+ OPA_CAP_MASK3_IsEthOnFabricSupported);
+ /* Driver does not support mcast/collective configuration */
+ pi->opa_cap_mask &=
+ cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported);
+ pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7)
+ << 3 | (HFI1_MCAST_NR & 0x7));
/* HFI supports a replay buffer 128 LTPs in size */
pi->replay_depth.buffer = 0x80;
@@ -1146,16 +1155,6 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ppd->linkinit_reason =
(pi->partenforce_filterraw &
OPA_PI_MASK_LINKINIT_REASON);
- /* enable/disable SW pkey checking as per FM control */
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
- ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
-
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
- ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
/* Must be a valid unicast LID address. */
if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
@@ -1167,9 +1166,9 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
spin_lock_irqsave(&ibp->rvp.lock, flags);
if (ibp->rvp.sm_ah) {
if (smlid != ibp->rvp.sm_lid)
- ibp->rvp.sm_ah->attr.dlid = smlid;
+ rdma_ah_set_dlid(&ibp->rvp.sm_ah->attr, smlid);
if (msl != ibp->rvp.sm_sl)
- ibp->rvp.sm_ah->attr.sl = msl;
+ rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
}
spin_unlock_irqrestore(&ibp->rvp.lock, flags);
if (smlid != ibp->rvp.sm_lid)
@@ -1465,25 +1464,15 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
}
-static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
-{
- u64 *val = data;
-
- *val++ = read_csr(dd, SEND_SC2VLT0);
- *val++ = read_csr(dd, SEND_SC2VLT1);
- *val++ = read_csr(dd, SEND_SC2VLT2);
- *val++ = read_csr(dd, SEND_SC2VLT3);
- return 0;
-}
-
#define ILLEGAL_VL 12
/*
* filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
* for SC15, which must map to VL15). If we don't remap things this
* way it is possible for VL15 counters to increment when we try to
* send on a SC which is mapped to an invalid VL.
+ * When getting the table convert ILLEGAL_VL back to VL15.
*/
-static void filter_sc2vlt(void *data)
+static void filter_sc2vlt(void *data, bool set)
{
int i;
u8 *pd = data;
@@ -1491,8 +1480,14 @@ static void filter_sc2vlt(void *data)
for (i = 0; i < OPA_MAX_SCS; i++) {
if (i == 15)
continue;
- if ((pd[i] & 0x1f) == 0xf)
- pd[i] = ILLEGAL_VL;
+
+ if (set) {
+ if ((pd[i] & 0x1f) == 0xf)
+ pd[i] = ILLEGAL_VL;
+ } else {
+ if ((pd[i] & 0x1f) == ILLEGAL_VL)
+ pd[i] = 0xf;
+ }
}
}
@@ -1500,7 +1495,7 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
{
u64 *val = data;
- filter_sc2vlt(data);
+ filter_sc2vlt(data, true);
write_csr(dd, SEND_SC2VLT0, *val++);
write_csr(dd, SEND_SC2VLT1, *val++);
@@ -1512,6 +1507,19 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
return 0;
}
+static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
+{
+ u64 *val = (u64 *)data;
+
+ *val++ = read_csr(dd, SEND_SC2VLT0);
+ *val++ = read_csr(dd, SEND_SC2VLT1);
+ *val++ = read_csr(dd, SEND_SC2VLT2);
+ *val++ = read_csr(dd, SEND_SC2VLT3);
+
+ filter_sc2vlt((u64 *)data, false);
+ return 0;
+}
+
static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
u32 *resp_len)
@@ -1986,31 +1994,6 @@ struct opa_pma_mad {
u8 data[2024];
} __packed;
-struct opa_class_port_info {
- u8 base_version;
- u8 class_version;
- __be16 cap_mask;
- __be32 cap_mask2_resp_time;
-
- u8 redirect_gid[16];
- __be32 redirect_tc_fl;
- __be32 redirect_lid;
- __be32 redirect_sl_qp;
- __be32 redirect_qkey;
-
- u8 trap_gid[16];
- __be32 trap_tc_fl;
- __be32 trap_lid;
- __be32 trap_hl_qp;
- __be32 trap_qkey;
-
- __be16 trap_pkey;
- __be16 redirect_pkey;
-
- u8 trap_sl_rsvd;
- u8 reserved[3];
-} __packed;
-
struct opa_port_status_req {
__u8 port_num;
__u8 reserved[3];
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 0829fce06172..e39e01b79382 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -583,7 +583,7 @@ pci_mmio_enabled(struct pci_dev *pdev)
if (words == ~0ULL)
ret = PCI_ERS_RESULT_NEED_RESET;
dd_dev_info(dd,
- "HFI1 mmio_enabled function called, read wordscntr %Lx, returning %d\n",
+ "HFI1 mmio_enabled function called, read wordscntr %llx, returning %d\n",
words, ret);
}
return ret;
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 615be68e40b3..ed72b5aca139 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -703,6 +703,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
{
struct send_context_info *sci;
struct send_context *sc = NULL;
+ int req_type = type;
dma_addr_t dma;
unsigned long flags;
u64 reg;
@@ -729,6 +730,13 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
return NULL;
}
+ /*
+ * VNIC contexts are dynamically allocated.
+ * Hence, pick a user context for VNIC.
+ */
+ if (type == SC_VNIC)
+ type = SC_USER;
+
spin_lock_irqsave(&dd->sc_lock, flags);
ret = sc_hw_alloc(dd, type, &sw_index, &hw_context);
if (ret) {
@@ -738,6 +746,15 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
return NULL;
}
+ /*
+ * VNIC contexts are used by kernel driver.
+ * Hence, mark them as kernel contexts.
+ */
+ if (req_type == SC_VNIC) {
+ dd->send_contexts[sw_index].type = SC_KERNEL;
+ type = SC_KERNEL;
+ }
+
sci = &dd->send_contexts[sw_index];
sci->sc = sc;
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 867e5ffc3595..99ca5edb0b43 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -1,7 +1,7 @@
#ifndef _PIO_H
#define _PIO_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,6 +54,12 @@
#define SC_USER 3 /* must be the last one: it may take all left */
#define SC_MAX 4 /* count of send context types */
+/*
+ * SC_VNIC types are allocated (dynamically) from the user context pool,
+ * (SC_USER) and used by kernel driver as kernel contexts (SC_KERNEL).
+ */
+#define SC_VNIC SC_MAX
+
/* invalid send context index */
#define INVALID_SCI 0xff
@@ -195,7 +201,7 @@ struct sc_config_sizes {
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | ksc[n] -> sc n |
+ * |--------------------------| --/ | ksc[n-1] -> sc n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
@@ -208,21 +214,21 @@ struct sc_config_sizes {
* |--------------------------| |--------------------|
* | map[vls - 1] |- | * |
* +--------------------------+ \- |--------------------|
- * \- | ksc[m] -> sc m+n |
+ * \- | ksc[m-1] -> sc m+n |
* \ +--------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | ksc[0] -> sc 1+m+n |
- * \- |--------------------|
- * >| ksc[1] -> sc 2+m+n |
- * |--------------------|
- * | * |
- * |--------------------|
- * | ksc[o] -> sc o+m+n |
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | ksc[0] -> sc 1+m+n |
+ * \- |----------------------|
+ * >| ksc[1] -> sc 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | ksc[o-1] -> sc o+m+n |
+ * +----------------------+
*
*/
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index c4ebd097b20f..4573e4c9f35c 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -294,7 +294,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
ah = ibah_to_rvtah(wqe->ud_wr.ah);
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
- if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
+ if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
return -EINVAL;
default:
break;
@@ -631,8 +631,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->s_tail, qp->s_head, qp->s_size,
qp->s_avail,
qp->remote_qpn,
- qp->remote_ah_attr.dlid,
- qp->remote_ah_attr.sl,
+ rdma_ah_get_dlid(&qp->remote_ah_attr),
+ rdma_ah_get_sl(&qp->remote_ah_attr),
qp->pmtu,
qp->s_retry,
qp->s_retry_cnt,
@@ -748,7 +748,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp)
qp->s_mig_state = IB_MIG_MIGRATED;
qp->remote_ah_attr = qp->alt_ah_attr;
- qp->port_num = qp->alt_ah_attr.port_num;
+ qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
qp->s_pkey_index = qp->s_alt_pkey_index;
qp->s_flags |= RVT_S_AHG_CLEAR;
priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
@@ -778,7 +778,7 @@ u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
u8 sc, vl;
ibp = &dd->pport[qp->port_num - 1].ibport_data;
- sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
vl = sc_to_vlt(dd, sc);
mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
@@ -861,7 +861,7 @@ void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
if (qp->port_num == ppd->port &&
(qp->ibqp.qp_type == IB_QPT_UC ||
qp->ibqp.qp_type == IB_QPT_RC) &&
- qp->remote_ah_attr.sl == sl &&
+ rdma_ah_get_sl(&qp->remote_ah_attr) == sl &&
(ib_rvt_state_ops[qp->state] &
RVT_POST_SEND_OK)) {
spin_lock_irq(&qp->r_lock);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 7382be11afca..75a729cd0c3d 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -274,7 +274,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail_no_tx;
ohdr = &ps->s_txreq->phdr.hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
/* Sending responses has higher priority over sending requests. */
@@ -744,9 +744,10 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
/* Construct the header */
/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */
hwords = 6;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
hwords += hfi1_make_grh(ibp, &hdr.u.l.grh,
- &qp->remote_ah_attr.grh, hwords, 0);
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ hwords, 0);
ohdr = &hdr.u.l.oth;
lrh0 = HFI1_LRH_GRH;
} else {
@@ -763,17 +764,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
IB_AETH_CREDIT_SHIFT));
else
ohdr->u.aeth = rvt_compute_aeth(qp);
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
- lrh0 |= (sc5 & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+ lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr)
+ & 0xf) << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
- hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ hdr.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
- hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
+ hdr.lrh[3] = cpu_to_be16(ppd->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
- ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << HFI1_BECN_SHIFT);
+ ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT);
ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
/* Don't try to send ACKs if the link isn't ACTIVE */
@@ -994,12 +997,12 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
return;
/* Find out where the BTH is */
- if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = ib_bth_get_opcode(ohdr);
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
WARN_ON(!qp->s_rdma_ack_cnt);
@@ -1028,13 +1031,17 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
s_last = qp->s_last;
+ trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
rvt_put_swqe(wqe);
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before re-sending,
@@ -1076,12 +1083,16 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
rvt_put_swqe(wqe);
s_last = qp->s_last;
+ trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -1092,10 +1103,11 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
*/
if (ppd->dd->flags & HFI1_HAS_SEND_DMA) {
struct sdma_engine *engine;
+ u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr);
u8 sc5;
/* For now use sc to find engine */
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc5 = ibp->sl_to_sc[sl];
engine = qp_to_sdma_engine(qp, sc5);
sdma_engine_progress_schedule(engine);
}
@@ -1516,7 +1528,7 @@ read_middle:
if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
goto ack_done;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/*
* Check that the data size is >= 0 && <= pmtu.
* Remember to account for ICRC (4).
@@ -1540,7 +1552,7 @@ read_middle:
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/*
* Check that the data size is >= 1 && <= pmtu.
* Remember to account for ICRC (4).
@@ -1922,7 +1934,8 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
int diff;
struct ib_reth *reth;
unsigned long flags;
- int ret, is_fecn = 0;
+ int ret;
+ bool is_fecn = false;
bool copy_last = false;
u32 rkey;
@@ -1934,7 +1947,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
is_fecn = process_ecn(qp, packet, false);
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/*
* Process responses (ACKs) before anything else. Note that the
@@ -2065,7 +2078,7 @@ no_immediate_data:
wc.ex.imm_data = 0;
send_last:
/* Get the number of bytes the message was padded by. */
- pad = (bth0 >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -2089,7 +2102,7 @@ send_last:
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
@@ -2101,7 +2114,7 @@ send_last:
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
- wc.sl = qp->remote_ah_attr.sl;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
@@ -2378,7 +2391,7 @@ void hfi1_rc_hdrerr(
return;
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/* Only deal with RDMA Writes for now */
if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index aa15bcbfb079..891ba0a81bbd 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -219,72 +219,84 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
{
__be64 guid;
unsigned long flags;
- u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
if (!has_grh) {
- if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->alt_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
guid))
goto err;
if (!gid_ok(
&hdr->u.l.grh.sgid,
- qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
- qp->alt_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
+ ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ ib_get_sl(hdr),
0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ ib_get_slid(hdr),
+ ib_get_dlid(hdr));
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
- ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
+ if (ib_get_slid(hdr) !=
+ rdma_ah_get_dlid(&qp->alt_ah_attr) ||
+ ppd_from_ibp(ibp)->port !=
+ rdma_ah_get_port_num(&qp->alt_ah_attr))
goto err;
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_migrate_qp(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
if (!has_grh) {
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp,
- qp->remote_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->remote_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
guid))
goto err;
if (!gid_ok(
&hdr->u.l.grh.sgid,
- qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
- qp->remote_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
+ ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ ib_get_sl(hdr),
0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ ib_get_slid(hdr),
+ ib_get_dlid(hdr));
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
+ if (ib_get_slid(hdr) !=
+ rdma_ah_get_dlid(&qp->remote_ah_attr) ||
ppd_from_ibp(ibp)->port != qp->port_num)
goto err;
if (qp->s_mig_state == IB_MIG_REARM &&
@@ -542,8 +554,8 @@ do_write:
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -637,7 +649,7 @@ done:
* Return the size of the header in 32 bit words.
*/
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords)
+ const struct ib_global_route *grh, u32 hwords, u32 nwords)
{
hdr->version_tclass_flow =
cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
@@ -731,15 +743,17 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
extra_bytes = -ps->s_txreq->s_cur_size & 3;
nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
lrh0 = HFI1_LRH_BTH;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- qp->s_hdrwords += hfi1_make_grh(ibp,
- &ps->s_txreq->phdr.hdr.u.l.grh,
- &qp->remote_ah_attr.grh,
- qp->s_hdrwords, nwords);
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
+ qp->s_hdrwords +=
+ hfi1_make_grh(ibp,
+ &ps->s_txreq->phdr.hdr.u.l.grh,
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
middle = 0;
}
- lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+ lrh0 |= (priv->s_sc & 0xf) << 12 |
+ (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4;
/*
* reset s_ahg/AHG fields
*
@@ -763,11 +777,13 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
else
qp->s_flags &= ~RVT_S_AHG_VALID;
ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
- ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ ps->s_txreq->phdr.hdr.lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
ps->s_txreq->phdr.hdr.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
- qp->remote_ah_attr.src_path_bits);
+ ps->s_txreq->phdr.hdr.lrh[3] =
+ cpu_to_be16(ppd_from_ibp(ibp)->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0);
@@ -775,7 +791,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
if (qp->s_flags & RVT_S_ECN) {
qp->s_flags &= ~RVT_S_ECN;
/* we recently received a FECN, so return a BECN */
- bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
+ bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT);
}
ohdr->bth[1] = cpu_to_be32(bth1);
ohdr->bth[2] = cpu_to_be32(bth2);
@@ -784,23 +800,29 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
/* when sending, force a reschedule every one of these periods */
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
+void hfi1_do_send_from_rvt(struct rvt_qp *qp)
+{
+ hfi1_do_send(qp, false);
+}
+
void _hfi1_do_send(struct work_struct *work)
{
struct iowait *wait = container_of(work, struct iowait, iowork);
struct rvt_qp *qp = iowait_to_qp(wait);
- hfi1_do_send(qp);
+ hfi1_do_send(qp, true);
}
/**
* hfi1_do_send - perform a send on a QP
* @work: contains a pointer to the QP
+ * @in_thread: true if in a workqueue thread
*
* Process entries in the send work queue until credit or queue is
* exhausted. Only allow one CPU to send a packet per QP.
* Otherwise, two threads could send packets out of order.
*/
-void hfi1_do_send(struct rvt_qp *qp)
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
{
struct hfi1_pkt_state ps;
struct hfi1_qp_priv *priv = qp->priv;
@@ -815,9 +837,9 @@ void hfi1_do_send(struct rvt_qp *qp)
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
- if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
- ) - 1)) ==
- ps.ppd->lid)) {
+ if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ps.ppd->lmc) - 1)) ==
+ ps.ppd->lid)) {
ruc_loopback(qp);
return;
}
@@ -825,9 +847,9 @@ void hfi1_do_send(struct rvt_qp *qp)
timeout_int = (qp->timeout_jiffies);
break;
case IB_QPT_UC:
- if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
- ) - 1)) ==
- ps.ppd->lid)) {
+ if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ps.ppd->lmc) - 1)) ==
+ ps.ppd->lid)) {
ruc_loopback(qp);
return;
}
@@ -868,8 +890,10 @@ void hfi1_do_send(struct rvt_qp *qp)
qp->s_hdrwords = 0;
/* allow other tasks to run */
if (unlikely(time_after(jiffies, timeout))) {
- if (workqueue_congested(cpu,
- ps.ppd->hfi1_wq)) {
+ if (!in_thread ||
+ workqueue_congested(
+ cpu,
+ ps.ppd->hfi1_wq)) {
spin_lock_irqsave(
&qp->s_lock,
ps.flags);
@@ -882,11 +906,9 @@ void hfi1_do_send(struct rvt_qp *qp)
*ps.ppd->dd->send_schedule);
return;
}
- if (!irqs_disabled()) {
- cond_resched();
- this_cpu_inc(
- *ps.ppd->dd->send_schedule);
- }
+ cond_resched();
+ this_cpu_inc(
+ *ps.ppd->dd->send_schedule);
timeout = jiffies + (timeout_int) / 8;
}
spin_lock_irqsave(&qp->s_lock, ps.flags);
@@ -909,8 +931,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
last = qp->s_last;
old_last = last;
+ trace_hfi1_qp_send_completion(qp, wqe, last);
if (++last >= qp->s_size)
last = 0;
+ trace_hfi1_qp_send_completion(qp, wqe, last);
qp->s_last = last;
/* See post_send() */
barrier();
@@ -920,7 +944,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- rvt_qp_swqe_complete(qp, wqe, status);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 5cde1ecda0fe..bfd0d5187e9b 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -868,7 +868,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
cpu_id = smp_processor_id();
rcu_read_lock();
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
sdma_rht_params);
if (rht_node && rht_node->map[vl]) {
@@ -962,7 +962,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
continue;
}
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ if (vl >= ARRAY_SIZE(rht_node->map)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (!rht_node) {
rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
@@ -982,7 +987,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
rht_node->map[vl]->ctr = 1;
rht_node->map[vl]->sde[0] = sde;
- ret = rhashtable_insert_fast(&dd->sdma_rht,
+ ret = rhashtable_insert_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
if (ret) {
@@ -1025,7 +1030,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
if (cpumask_test_cpu(cpu, mask))
continue;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (rht_node) {
bool empty = true;
@@ -1049,7 +1054,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
}
if (empty) {
- ret = rhashtable_remove_fast(&dd->sdma_rht,
+ ret = rhashtable_remove_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
WARN_ON(ret);
@@ -1108,7 +1113,7 @@ void sdma_seqfile_dump_cpu_list(struct seq_file *s,
struct sdma_rht_node *rht_node;
int i, j;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
sdma_rht_params);
if (!rht_node)
return;
@@ -1322,6 +1327,12 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
synchronize_rcu();
kfree(dd->per_sdma);
dd->per_sdma = NULL;
+
+ if (dd->sdma_rht) {
+ rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
+ kfree(dd->sdma_rht);
+ dd->sdma_rht = NULL;
+ }
}
/**
@@ -1341,12 +1352,14 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
{
unsigned this_idx;
struct sdma_engine *sde;
+ struct rhashtable *tmp_sdma_rht;
u16 descq_cnt;
void *curr_head;
struct hfi1_pportdata *ppd = dd->pport + port;
u32 per_sdma_credits;
uint idle_cnt = sdma_idle_cnt;
size_t num_engines = dd->chip_sdma_engines;
+ int ret = -ENOMEM;
if (!HFI1_CAP_IS_KSET(SDMA)) {
HFI1_CAP_CLEAR(SDMA_AHG);
@@ -1378,7 +1391,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
/* alloc memory for array of send engines */
dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
if (!dd->per_sdma)
- return -ENOMEM;
+ return ret;
idle_cnt = ns_to_cclock(dd, idle_cnt);
if (!sdma_desct_intr)
@@ -1507,18 +1520,27 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->flags |= HFI1_HAS_SEND_DMA;
dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
dd->num_sdma = num_engines;
- if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
+ ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
+ if (ret < 0)
goto bail;
- if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+ tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
+ if (!tmp_sdma_rht) {
+ ret = -ENOMEM;
goto bail;
+ }
+
+ ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
+ if (ret < 0)
+ goto bail;
+ dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
bail:
sdma_clean(dd, num_engines);
- return -ENOMEM;
+ return ret;
}
/**
@@ -1604,7 +1626,6 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
- rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 21f1e2834f37..64f10b8b5db8 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -966,34 +966,34 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | sde[n] -> eng n |
+ * |--------------------------| --/ | sde[n-1] -> eng n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
- * |--------------------------| +--------------------+
- * | map[1] |--- | mask |
- * |--------------------------| \---- |--------------------|
- * | * | \-- | sde[0] -> eng 1+n |
- * | * | \---- |--------------------|
- * | * | \->| sde[1] -> eng 2+n |
- * |--------------------------| |--------------------|
- * | map[vls - 1] |- | * |
- * +--------------------------+ \- |--------------------|
- * \- | sde[m] -> eng m+n |
- * \ +--------------------+
+ * |--------------------------| +---------------------+
+ * | map[1] |--- | mask |
+ * |--------------------------| \---- |---------------------|
+ * | * | \-- | sde[0] -> eng 1+n |
+ * | * | \---- |---------------------|
+ * | * | \->| sde[1] -> eng 2+n |
+ * |--------------------------| |---------------------|
+ * | map[vls - 1] |- | * |
+ * +--------------------------+ \- |---------------------|
+ * \- | sde[m-1] -> eng m+n |
+ * \ +---------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | sde[0] -> eng 1+m+n|
- * \- |--------------------|
- * >| sde[1] -> eng 2+m+n|
- * |--------------------|
- * | * |
- * |--------------------|
- * | sde[o] -> eng o+m+n|
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | sde[0] -> eng 1+m+n |
+ * \- |----------------------|
+ * >| sde[1] -> eng 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | sde[o-1] -> eng o+m+n|
+ * +----------------------+
*
*/
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 919a5474e651..50d140d25e38 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -542,7 +542,7 @@ static ssize_t show_nctxts(struct device *device,
* give a more accurate picture of total contexts available.
*/
return scnprintf(buf, PAGE_SIZE, "%u\n",
- min(dd->num_rcv_contexts - dd->first_user_ctxt,
+ min(dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt,
(u32)dd->sc_sizes[SC_USER].count));
}
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index e86798af6903..eafae487face 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -51,13 +51,12 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
u8 opcode;
- u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- if (lnh == HFI1_LRH_BTH)
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = ib_bth_get_opcode(ohdr);
return hdr_len_by_opcode[opcode] == 0 ?
0 : hdr_len_by_opcode[opcode] - (12 + 8);
}
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index 382fcda3a5f6..090f6b506953 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -139,11 +139,11 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
__entry->pkey =
be32_to_cpu(ohdr->bth[0]) & 0xffff;
__entry->f =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
- HFI1_FECN_MASK;
+ (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) &
+ IB_FECN_MASK;
__entry->b =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
- HFI1_BECN_MASK;
+ (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) &
+ IB_BECN_MASK;
__entry->qpn =
be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
__entry->a =
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
index d308454af7fd..deac77ddaeab 100644
--- a/drivers/infiniband/hw/hfi1/trace_misc.h
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt,
__entry->src)
);
+#ifdef CONFIG_FAULT_INJECTION
+TRACE_EVENT(hfi1_fault_opcode,
+ TP_PROTO(struct rvt_qp *qp, u8 opcode),
+ TP_ARGS(qp, opcode),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u8, opcode)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->opcode = opcode;
+ ),
+ TP_printk("[%s] qpn 0x%x opcode 0x%x",
+ __get_str(dev), __entry->qpn, __entry->opcode)
+);
+
+TRACE_EVENT(hfi1_fault_packet,
+ TP_PROTO(struct hfi1_packet *packet),
+ TP_ARGS(packet),
+ TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd)
+ __field(u64, eflags)
+ __field(u32, ctxt)
+ __field(u32, hlen)
+ __field(u32, tlen)
+ __field(u32, updegr)
+ __field(u32, etail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd);
+ __entry->eflags = rhf_err_flags(packet->rhf);
+ __entry->ctxt = packet->rcd->ctxt;
+ __entry->hlen = packet->hlen;
+ __entry->tlen = packet->tlen;
+ __entry->updegr = packet->updegr;
+ __entry->etail = rhf_egr_index(packet->rhf);
+ ),
+ TP_printk(
+ "[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->eflags,
+ __entry->hlen,
+ __entry->tlen,
+ __entry->updegr,
+ __entry->etail
+ )
+);
+#endif
+
#endif /* __HFI1_TRACE_MISC_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h
index 5ea5005f9f41..8ce476570462 100644
--- a/drivers/infiniband/hw/hfi1/trace_rc.h
+++ b/drivers/infiniband/hw/hfi1/trace_rc.h
@@ -1,5 +1,5 @@
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015, 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -104,11 +104,6 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_ack,
TP_ARGS(qp, psn)
);
-DEFINE_EVENT(hfi1_rc_template, hfi1_timeout,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 415d6be42c5d..2c9ac57657d3 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -633,6 +633,49 @@ DEFINE_EVENT(hfi1_bct_template, bct_get,
TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
TP_ARGS(dd, bc));
+TRACE_EVENT(
+ hfi1_qp_send_completion,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx),
+ TP_ARGS(qp, wqe, idx),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(struct rvt_swqe *, wqe)
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, qpt)
+ __field(u32, length)
+ __field(u32, idx)
+ __field(u32, ssn)
+ __field(enum ib_wr_opcode, opcode)
+ __field(int, send_flags)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->wqe = wqe;
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->qpt = qp->ibqp.qp_type;
+ __entry->length = wqe->length;
+ __entry->idx = idx;
+ __entry->ssn = wqe->ssn;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->send_flags = wqe->wr.send_flags;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->qpt,
+ __entry->wqe,
+ __entry->idx,
+ __entry->wr_id,
+ __entry->length,
+ __entry->ssn,
+ __entry->opcode,
+ __entry->send_flags
+ )
+);
+
#endif /* __HFI1_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 4b2a8400c823..5da1e4546543 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -94,7 +94,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
ohdr = &ps->s_txreq->phdr.hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
/* Get the next send request. */
@@ -320,7 +320,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
process_ecn(qp, packet, true);
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/* Compare the PSN verses the expected PSN. */
if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
@@ -433,7 +433,7 @@ no_immediate_data:
wc.wc_flags = 0;
send_last:
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -451,7 +451,7 @@ last_imm:
wc.status = IB_WC_SUCCESS;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
@@ -463,7 +463,7 @@ last_imm:
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
- wc.sl = qp->remote_ah_attr.sl;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
@@ -528,7 +528,7 @@ rdma_last_imm:
wc.wc_flags = IB_WC_WITH_IMM;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -555,7 +555,7 @@ rdma_last_imm:
case OP(RDMA_WRITE_LAST):
rdma_last:
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 13ea4eb6ef3d..6a4e95cefae5 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -68,7 +68,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
struct hfi1_pportdata *ppd;
struct rvt_qp *qp;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
unsigned long flags;
struct rvt_sge_state ssge;
struct rvt_sge *sge;
@@ -103,17 +103,17 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (qp->ibqp.qp_num > 1) {
u16 pkey;
u16 slid;
- u8 sc5 = ibp->sl_to_sc[ah_attr->sl];
+ u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index);
- slid = ppd->lid | (ah_attr->src_path_bits &
+ slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
if (unlikely(ingress_pkey_check(ppd, pkey, sc5,
qp->s_pkey_index, slid))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
- slid, ah_attr->dlid);
+ slid, rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@@ -131,13 +131,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (unlikely(qkey != qp->qkey)) {
u16 lid;
- lid = ppd->lid | (ah_attr->src_path_bits &
+ lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
lid,
- ah_attr->dlid);
+ rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@@ -183,11 +183,11 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
goto bail_unlock;
}
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
struct ib_grh grh;
- struct ib_global_route grd = ah_attr->grh;
+ const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr);
- hfi1_make_grh(ibp, &grh, &grd, 0, 0);
+ hfi1_make_grh(ibp, &grh, grd, 0, 0);
hfi1_copy_sge(&qp->r_sge, &grh,
sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
@@ -243,12 +243,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
} else {
wc.pkey_index = 0;
}
- wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
+ wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1));
/* Check for loopback when the port lid is not set */
if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI)
wc.slid = be16_to_cpu(IB_LID_PERMISSIVE);
- wc.sl = ah_attr->sl;
- wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
+ wc.sl = rdma_ah_get_sl(ah_attr);
+ wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -272,7 +273,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct ib_other_headers *ohdr;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
struct rvt_swqe *wqe;
@@ -319,9 +320,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
- if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
- ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
- lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
+ if (rdma_ah_get_dlid(ah_attr) < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1);
if (unlikely(!loopback &&
(lid == ppd->lid ||
(lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
@@ -356,7 +357,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_hdrwords = 7;
ps->s_txreq->s_cur_size = wqe->length;
ps->s_txreq->ss = &qp->s_sge;
- qp->s_srate = ah_attr->static_rate;
+ qp->s_srate = rdma_ah_get_static_rate(ah_attr);
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
qp->s_wqe = wqe;
qp->s_sge.sge = wqe->sg_list[0];
@@ -364,11 +365,11 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
/* Header size in 32-bit words. */
qp->s_hdrwords += hfi1_make_grh(ibp,
&ps->s_txreq->phdr.hdr.u.l.grh,
- &ah_attr->grh,
+ rdma_ah_read_grh(ah_attr),
qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
@@ -388,8 +389,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
} else {
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
}
- sc5 = ibp->sl_to_sc[ah_attr->sl];
- lrh0 |= (ah_attr->sl & 0xf) << 4;
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
+ lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
if (qp->ibqp.qp_type == IB_QPT_SMI) {
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
priv->s_sc = 0xf;
@@ -402,15 +403,17 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
ps->s_txreq->psc = priv->s_sendcontext;
ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
- ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);
+ ps->s_txreq->phdr.hdr.lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(ah_attr));
ps->s_txreq->phdr.hdr.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ if (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) {
ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
} else {
lid = ppd->lid;
if (lid) {
- lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
+ lid |= rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1);
ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid);
} else {
ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
@@ -537,7 +540,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
bth0 = pkey | (IB_OPCODE_CNP << 24);
ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << HFI1_BECN_SHIFT));
+ ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));
ohdr->bth[2] = 0; /* PSN 0 */
hdr.lrh[0] = cpu_to_be16(lrh0);
@@ -680,7 +683,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
- u8 sc5 = hdr2sc(hdr, packet->rhf);
+ u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
u32 bth1;
u8 sl_from_sc, sl;
u16 slid;
@@ -688,18 +691,16 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
- dlid = be16_to_cpu(hdr->lrh[1]);
+ dlid = ib_get_dlid(hdr);
bth1 = be32_to_cpu(ohdr->bth[1]);
- slid = be16_to_cpu(hdr->lrh[3]);
- pkey = (u16)be32_to_cpu(ohdr->bth[0]);
- sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ slid = ib_get_slid(hdr);
+ pkey = ib_bth_get_pkey(ohdr);
+ opcode = ib_bth_get_opcode(ohdr);
+ sl = ib_get_sl(hdr);
+ extra_bytes = ib_bth_get_pad(ohdr);
extra_bytes += (SIZE_OF_CRC << 2);
sl_from_sc = ibp->sc_to_sl[sc5];
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- opcode &= 0xff;
-
process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
/*
* Get the number of bytes the message was padded by
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 4a8295399e71..35c6e7ec8ad6 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -200,8 +200,9 @@ int hfi1_user_exp_rcv_init(struct file *fp)
if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) {
fd->invalid_tid_idx = 0;
- fd->invalid_tids = kzalloc(uctxt->expected_count *
- sizeof(u32), GFP_KERNEL);
+ fd->invalid_tids = kcalloc(uctxt->expected_count,
+ sizeof(*fd->invalid_tids),
+ GFP_KERNEL);
if (!fd->invalid_tids) {
ret = -ENOMEM;
goto done;
@@ -578,6 +579,9 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
u32 *tidinfo;
unsigned tididx;
+ if (unlikely(tinfo->tidcnt > fd->tid_used))
+ return -EINVAL;
+
tidinfo = memdup_user((void __user *)(unsigned long)tinfo->tidlist,
sizeof(tidinfo[0]) * tinfo->tidcnt);
if (IS_ERR(tidinfo))
@@ -607,7 +611,7 @@ int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
unsigned long *ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+ (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
u32 *array;
int ret = 0;
@@ -1011,8 +1015,8 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
* process in question.
*/
ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+ (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
+ HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
}
fdata->invalid_tid_idx++;
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 68295a12b771..e341e6dcc388 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -73,7 +73,8 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
{
unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
size = (cache_size * (1UL << 20)); /* convert to bytes */
- unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt;
+ unsigned int usr_ctxts =
+ dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
bool can_lock = capable(CAP_IPC_LOCK);
/*
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index e6811c4edc73..0749689d7643 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -376,7 +376,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
{
struct hfi1_filedata *fd;
int ret = 0;
- unsigned memsize;
char buf[64];
struct hfi1_devdata *dd;
struct hfi1_user_sdma_comp_q *cq;
@@ -401,13 +400,15 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
if (!pq)
goto pq_nomem;
- memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
- pq->reqs = kzalloc(memsize, GFP_KERNEL);
+ pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
+ sizeof(*pq->reqs),
+ GFP_KERNEL);
if (!pq->reqs)
goto pq_reqs_nomem;
- memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long);
- pq->req_in_use = kzalloc(memsize, GFP_KERNEL);
+ pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
+ sizeof(*pq->req_in_use),
+ GFP_KERNEL);
if (!pq->req_in_use)
goto pq_reqs_no_in_use;
@@ -442,8 +443,8 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
if (!cq)
goto cq_nomem;
- memsize = PAGE_ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size);
- cq->comps = vmalloc_user(memsize);
+ cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps)
+ * hfi1_sdma_comp_ring_size));
if (!cq->comps)
goto cq_comps_nomem;
@@ -704,7 +705,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
/* Save all the IO vector structures */
for (i = 0; i < req->data_iovs; i++) {
INIT_LIST_HEAD(&req->iovs[i].list);
- memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
+ memcpy(&req->iovs[i].iov,
+ iovec + idx++,
+ sizeof(req->iovs[i].iov));
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->status = ret;
@@ -1615,9 +1618,10 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
{
hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
- cq->comps[idx].status = state;
if (state == ERROR)
cq->comps[idx].errcode = -ret;
+ smp_wmb(); /* make sure errcode is visible first */
+ cq->comps[idx].status = state;
trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
idx, state, ret);
}
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 222315fadab1..90e7b77d68e8 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -60,6 +60,8 @@
#include "trace.h"
#include "qp.h"
#include "verbs_txreq.h"
+#include "debugfs.h"
+#include "vnic.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -297,6 +299,22 @@ static inline bool wss_exceeds_threshold(void)
}
/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+ [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [IB_WR_REG_MR] = IB_WC_REG_MR
+};
+
+/*
* Length of header by opcode, 0 --> not supported
*/
const u8 hdr_len_by_opcode[256] = {
@@ -501,6 +519,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
return NULL;
}
+static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
+{
+#ifdef CONFIG_FAULT_INJECTION
+ if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
+ /*
+ * In order to drop non-IB traffic we
+ * set PbcInsertHrc to NONE (0x2).
+ * The packet will still be delivered
+ * to the receiving node but a
+ * KHdrHCRCErr (KDETH packet with a bad
+ * HCRC) will be triggered and the
+ * packet will not be delivered to the
+ * correct context.
+ */
+ pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
+ else
+ /*
+ * In order to drop regular verbs
+ * traffic we set the PbcTestEbp
+ * flag. The packet will still be
+ * delivered to the receiving node but
+ * a 'late ebp error' will be
+ * triggered and will be dropped.
+ */
+ pbc |= PBC_TEST_EBP;
+#endif
+ return pbc;
+}
+
/**
* hfi1_ib_rcv - process an incoming packet
* @packet: data packet information
@@ -525,7 +572,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
u16 lid;
/* Check for GRH */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
@@ -544,12 +591,12 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
trace_input_ibhdr(rcd->dd, hdr);
- opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
+ opcode = ib_bth_get_opcode(packet->ohdr);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* Get the destination QP number. */
qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
- lid = be16_to_cpu(hdr->lrh[1]);
+ lid = ib_get_dlid(hdr);
if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
struct rvt_mcast *mcast;
@@ -557,7 +604,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
if (lnh != HFI1_LRH_GRH)
goto drop;
- mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+ mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
if (!mcast)
goto drop;
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
@@ -583,6 +630,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
rcu_read_unlock();
goto drop;
}
+ if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
+ true))) {
+ rcu_read_unlock();
+ goto drop;
+ }
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(opcode, packet);
if (likely(packet_handler))
@@ -781,7 +833,6 @@ static int build_verbs_tx_desc(
if (ret)
goto bail_txadd;
}
-
/* add the ulp payload - if any. tx->ss can be NULL for acks */
if (tx->ss)
ret = build_verbs_ulp_payload(sde, length, tx);
@@ -800,7 +851,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
struct verbs_txreq *tx;
- u64 pbc_flags = 0;
u8 sc5 = priv->s_sc;
int ret;
@@ -809,12 +859,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (!sdma_txreq_built(&tx->txreq)) {
if (likely(pbc == 0)) {
u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* No vl15 here */
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd,
- pbc_flags,
+ pbc,
qp->srate_mbps,
vl,
plen);
@@ -917,7 +971,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
- u64 pbc_flags = 0;
u8 sc5;
unsigned long flags = 0;
struct send_context *sc;
@@ -942,9 +995,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (likely(pbc == 0)) {
u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ struct verbs_txreq *tx = ps->s_txreq;
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
- pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
+ pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
@@ -1173,7 +1231,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hdr = &ps->s_txreq->phdr.hdr;
/* locate the pkey within the headers */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_GRH)
ohdr = &hdr->u.l.oth;
else
@@ -1220,17 +1278,20 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
{
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- u16 ver = dd->dc8051_ver;
+ u32 ver = dd->dc8051_ver;
memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
- rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) |
- (u64)dc8051_ver_min(ver);
+ rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) |
+ ((u64)(dc8051_ver_min(ver)) << 16) |
+ (u64)dc8051_ver_patch(ver);
+
rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
- IB_DEVICE_MEM_MGT_EXTENSIONS;
+ IB_DEVICE_MEM_MGT_EXTENSIONS |
+ IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1398,14 +1459,14 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
/*
* convert ah port,sl to sc
*/
-u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah)
+u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah)
{
- struct hfi1_ibport *ibp = to_iport(ibdev, ah->port_num);
+ struct hfi1_ibport *ibp = to_iport(ibdev, rdma_ah_get_port_num(ah));
- return ibp->sl_to_sc[ah->sl];
+ return ibp->sl_to_sc[rdma_ah_get_sl(ah)];
}
-static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
{
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
@@ -1413,9 +1474,9 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
u8 sc5;
/* test the mapping for validity */
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- sc5 = ibp->sl_to_sc[ah_attr->sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
dd = dd_from_ppd(ppd);
if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
return -EINVAL;
@@ -1423,7 +1484,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
}
static void hfi1_notify_new_ah(struct ib_device *ibdev,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct rvt_ah *ah)
{
struct hfi1_ibport *ibp;
@@ -1436,9 +1497,9 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev,
* done being setup. We can however modify things which we need to set.
*/
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- sc5 = ibp->sl_to_sc[ah->attr.sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)];
dd = dd_from_ppd(ppd);
ah->vl = sc_to_vlt(dd, sc5);
if (ah->vl < num_vls || ah->vl == 15)
@@ -1447,17 +1508,21 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev,
struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
{
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
struct ib_ah *ah = ERR_PTR(-EINVAL);
struct rvt_qp *qp0;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ struct hfi1_devdata *dd = dd_from_ppd(ppd);
+ u8 port_num = ppd->port;
memset(&attr, 0, sizeof(attr));
- attr.dlid = dlid;
- attr.port_num = ppd_from_ibp(ibp)->port;
+ attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
+ rdma_ah_set_dlid(&attr, dlid);
+ rdma_ah_set_port_num(&attr, ppd_from_ibp(ibp)->port);
rcu_read_lock();
qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
- ah = ib_create_ah(qp0->ibqp.pd, &attr);
+ ah = rdma_create_ah(qp0->ibqp.pd, &attr);
rcu_read_unlock();
return ah;
}
@@ -1504,10 +1569,10 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
struct hfi1_ibdev *dev = dev_from_rdi(rdi);
- u16 ver = dd_from_dev(dev)->dc8051_ver;
+ u32 ver = dd_from_dev(dev)->dc8051_ver;
- snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver),
- dc8051_ver_min(ver));
+ snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver),
+ dc8051_ver_min(ver), dc8051_ver_patch(ver));
}
static const char * const driver_cntr_names[] = {
@@ -1524,6 +1589,7 @@ static const char * const driver_cntr_names[] = {
"DRIVER_EgrHdrFull"
};
+static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
static const char **dev_cntr_names;
static const char **port_cntr_names;
static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
@@ -1578,6 +1644,7 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
{
int i, err;
+ mutex_lock(&cntr_names_lock);
if (!cntr_names_initialized) {
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -1586,8 +1653,10 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
num_driver_cntrs,
&num_dev_cntrs,
&dev_cntr_names);
- if (err)
+ if (err) {
+ mutex_unlock(&cntr_names_lock);
return NULL;
+ }
for (i = 0; i < num_driver_cntrs; i++)
dev_cntr_names[num_dev_cntrs + i] =
@@ -1601,10 +1670,12 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
if (err) {
kfree(dev_cntr_names);
dev_cntr_names = NULL;
+ mutex_unlock(&cntr_names_lock);
return NULL;
}
cntr_names_initialized = 1;
}
+ mutex_unlock(&cntr_names_lock);
if (!port_num)
return rdma_alloc_hw_stats_struct(
@@ -1707,6 +1778,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
ibdev->modify_device = modify_device;
ibdev->alloc_hw_stats = alloc_hw_stats;
ibdev->get_hw_stats = get_hw_stats;
+ ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn;
+ ibdev->free_rdma_netdev = hfi1_vnic_free_rn;
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
@@ -1751,7 +1824,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
- dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
+ dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt;
dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
@@ -1823,9 +1896,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
del_timer_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
+ mutex_lock(&cntr_names_lock);
kfree(dev_cntr_names);
kfree(port_cntr_names);
+ dev_cntr_names = NULL;
+ port_cntr_names = NULL;
cntr_names_initialized = 0;
+ mutex_unlock(&cntr_names_lock);
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
@@ -1840,12 +1917,12 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
switch (packet->qp->ibqp.qp_type) {
case IB_QPT_UC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
case IB_QPT_RC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_RC;
break;
@@ -1859,7 +1936,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
return;
}
- sc5 = hdr2sc(hdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 3a0b589e41c2..52ff275caf54 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -195,6 +195,11 @@ struct hfi1_ibdev {
struct dentry *hfi1_ibdev_dbg;
/* per HFI symlinks to above */
struct dentry *hfi1_ibdev_link;
+#ifdef CONFIG_FAULT_INJECTION
+ struct fault_opcode *fault_opcode;
+ struct fault_packet *fault_packet;
+ bool fault_suppress_err;
+#endif
#endif
};
@@ -303,7 +308,7 @@ void hfi1_rc_hdrerr(
u32 rcv_flags,
struct rvt_qp *qp);
-u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid);
@@ -342,7 +347,7 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords);
+ const struct ib_global_route *grh, u32 hwords, u32 nwords);
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
@@ -350,7 +355,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
void _hfi1_do_send(struct work_struct *work);
-void hfi1_do_send(struct rvt_qp *qp);
+void hfi1_do_send_from_rvt(struct rvt_qp *qp);
+
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status);
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
new file mode 100644
index 000000000000..e2c455299b53
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -0,0 +1,184 @@
+#ifndef _HFI1_VNIC_H
+#define _HFI1_VNIC_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/opa_vnic.h>
+#include "hfi.h"
+#include "sdma.h"
+
+#define HFI1_VNIC_MAX_TXQ 16
+#define HFI1_VNIC_MAX_PAD 12
+
+/* L2 header definitions */
+#define HFI1_L2_TYPE_OFFSET 0x7
+#define HFI1_L2_TYPE_SHFT 0x5
+#define HFI1_L2_TYPE_MASK 0x3
+
+#define HFI1_GET_L2_TYPE(hdr) \
+ ((*((u8 *)(hdr) + HFI1_L2_TYPE_OFFSET) >> HFI1_L2_TYPE_SHFT) & \
+ HFI1_L2_TYPE_MASK)
+
+/* L4 type definitions */
+#define HFI1_L4_TYPE_OFFSET 8
+
+#define HFI1_GET_L4_TYPE(data) \
+ (*((u8 *)(data) + HFI1_L4_TYPE_OFFSET))
+
+/* L4 header definitions */
+#define HFI1_VNIC_L4_HDR_OFFSET OPA_VNIC_L2_HDR_LEN
+
+#define HFI1_VNIC_GET_L4_HDR(data) \
+ (*((u16 *)((u8 *)(data) + HFI1_VNIC_L4_HDR_OFFSET)))
+
+#define HFI1_VNIC_GET_VESWID(data) \
+ (HFI1_VNIC_GET_L4_HDR(data) & 0xFFF)
+
+/* Service class */
+#define HFI1_VNIC_SC_OFFSET_LOW 6
+#define HFI1_VNIC_SC_OFFSET_HI 7
+#define HFI1_VNIC_SC_SHIFT 4
+
+#define HFI1_VNIC_MAX_QUEUE 16
+
+/**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+ struct hfi1_devdata *dd;
+ struct sdma_engine *sde;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct iowait wait;
+ struct sdma_txreq stx;
+ unsigned int state;
+ u8 q_idx;
+};
+
+/**
+ * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue
+ * @idx: queue index
+ * @vinfo: pointer to vport information
+ * @netdev: network device
+ * @napi: netdev napi structure
+ * @skbq: queue of received socket buffers
+ */
+struct hfi1_vnic_rx_queue {
+ u8 idx;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct napi_struct napi;
+ struct sk_buff_head skbq;
+};
+
+/**
+ * struct hfi1_vnic_vport_info - HFI1 VNIC virtual port information
+ * @dd: device data pointer
+ * @netdev: net device pointer
+ * @flags: state flags
+ * @lock: vport lock
+ * @num_tx_q: number of transmit queues
+ * @num_rx_q: number of receive queues
+ * @vesw_id: virtual switch id
+ * @rxq: Array of receive queues
+ * @stats: per queue stats
+ * @sdma: VNIC SDMA structure per TXQ
+ */
+struct hfi1_vnic_vport_info {
+ struct hfi1_devdata *dd;
+ struct net_device *netdev;
+ unsigned long flags;
+
+ /* Lock used around state updates */
+ struct mutex lock;
+
+ u8 num_tx_q;
+ u8 num_rx_q;
+ u16 vesw_id;
+ struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT];
+
+ struct opa_vnic_stats stats[HFI1_VNIC_MAX_QUEUE];
+ struct hfi1_vnic_sdma sdma[HFI1_VNIC_MAX_TXQ];
+};
+
+#define v_dbg(format, arg...) \
+ netdev_dbg(vinfo->netdev, format, ## arg)
+#define v_err(format, arg...) \
+ netdev_err(vinfo->netdev, format, ## arg)
+#define v_info(format, arg...) \
+ netdev_info(vinfo->netdev, format, ## arg)
+
+/* vnic hfi1 internal functions */
+void hfi1_vnic_setup(struct hfi1_devdata *dd);
+void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx);
+
+/* vnic rdma netdev operations */
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
+void hfi1_vnic_free_rn(struct net_device *netdev);
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
+
+#endif /* _HFI1_VNIC_H */
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
new file mode 100644
index 000000000000..392f4d57f3e3
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -0,0 +1,907 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for VNIC functionality
+ */
+
+#include <linux/io.h>
+#include <linux/if_vlan.h>
+
+#include "vnic.h"
+
+#define HFI_TX_TIMEOUT_MS 1000
+
+#define HFI1_VNIC_RCV_Q_SIZE 1024
+
+#define HFI1_VNIC_UP 0
+
+static DEFINE_SPINLOCK(vport_cntr_lock);
+
+static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
+{
+ unsigned int rcvctrl_ops = 0;
+ int ret;
+
+ ret = hfi1_init_ctxt(uctxt->sc);
+ if (ret)
+ goto done;
+
+ uctxt->do_interrupt = &handle_receive_interrupt;
+
+ /* Now allocate the RcvHdr queue and eager buffers. */
+ ret = hfi1_create_rcvhdrq(dd, uctxt);
+ if (ret)
+ goto done;
+
+ ret = hfi1_setup_eagerbufs(uctxt);
+ if (ret)
+ goto done;
+
+ set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
+
+ if (uctxt->rcvhdrtail_kvaddr)
+ clear_rcvhdrtail(uctxt);
+
+ rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
+ rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
+
+ if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+ rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+ rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+
+ hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
+
+ uctxt->is_vnic = true;
+done:
+ return ret;
+}
+
+static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **vnic_ctxt)
+{
+ struct hfi1_ctxtdata *uctxt;
+ unsigned int ctxt;
+ int ret;
+
+ if (dd->flags & HFI1_FROZEN)
+ return -EIO;
+
+ for (ctxt = dd->first_dyn_alloc_ctxt;
+ ctxt < dd->num_rcv_contexts; ctxt++)
+ if (!dd->rcd[ctxt])
+ break;
+
+ if (ctxt == dd->num_rcv_contexts)
+ return -EBUSY;
+
+ uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node);
+ if (!uctxt) {
+ dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
+ return -ENOMEM;
+ }
+
+ uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
+ HFI1_CAP_KGET(NODROP_RHQ_FULL) |
+ HFI1_CAP_KGET(NODROP_EGR_FULL) |
+ HFI1_CAP_KGET(DMA_RTAIL);
+ uctxt->seq_cnt = 1;
+
+ /* Allocate and enable a PIO send context */
+ uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize,
+ uctxt->numa_id);
+
+ ret = uctxt->sc ? 0 : -ENOMEM;
+ if (ret)
+ goto bail;
+
+ dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n",
+ uctxt->sc->sw_index, uctxt->sc->hw_context);
+ ret = sc_enable(uctxt->sc);
+ if (ret)
+ goto bail;
+
+ if (dd->num_msix_entries)
+ hfi1_set_vnic_msix_info(uctxt);
+
+ hfi1_stats.sps_ctxts++;
+ dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
+ *vnic_ctxt = uctxt;
+
+ return ret;
+bail:
+ /*
+ * hfi1_free_ctxtdata() also releases send_context
+ * structure if uctxt->sc is not null
+ */
+ dd->rcd[uctxt->ctxt] = NULL;
+ hfi1_free_ctxtdata(dd, uctxt);
+ dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
+ return ret;
+}
+
+static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata *uctxt)
+{
+ unsigned long flags;
+
+ dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
+ flush_wc();
+
+ if (dd->num_msix_entries)
+ hfi1_reset_vnic_msix_info(uctxt);
+
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ /*
+ * Disable receive context and interrupt available, reset all
+ * RcvCtxtCtrl bits to default values.
+ */
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
+ HFI1_RCVCTRL_TIDFLOW_DIS |
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
+ HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
+ /*
+ * VNIC contexts are allocated from user context pool.
+ * Release them back to user context pool.
+ *
+ * Reset context integrity checks to default.
+ * (writes to CSRs probably belong in chip.c)
+ */
+ write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
+ hfi1_pkt_default_send_ctxt_mask(dd, SC_USER));
+ sc_disable(uctxt->sc);
+
+ dd->send_contexts[uctxt->sc->sw_index].type = SC_USER;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+
+ dd->rcd[uctxt->ctxt] = NULL;
+ uctxt->event_flags = 0;
+
+ hfi1_clear_tids(uctxt);
+ hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
+
+ hfi1_stats.sps_ctxts--;
+ hfi1_free_ctxtdata(dd, uctxt);
+}
+
+void hfi1_vnic_setup(struct hfi1_devdata *dd)
+{
+ idr_init(&dd->vnic.vesw_idr);
+}
+
+void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
+{
+ idr_destroy(&dd->vnic.vesw_idr);
+}
+
+#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
+ u64 *src64, *dst64; \
+ for (src64 = &qstats->x_grp.unicast, \
+ dst64 = &stats->x_grp.unicast; \
+ dst64 <= &stats->x_grp.s_1519_max;) { \
+ *dst64++ += *src64++; \
+ } \
+ } while (0)
+
+/* hfi1_vnic_update_stats - update statistics */
+static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
+ struct opa_vnic_stats *stats)
+{
+ struct net_device *netdev = vinfo->netdev;
+ u8 i;
+
+ /* add tx counters on different queues */
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
+ stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
+ stats->tx_drop_state += qstats->tx_drop_state;
+ stats->tx_dlid_zero += qstats->tx_dlid_zero;
+
+ SUM_GRP_COUNTERS(stats, qstats, tx_grp);
+ stats->netstats.tx_packets += qnstats->tx_packets;
+ stats->netstats.tx_bytes += qnstats->tx_bytes;
+ }
+
+ /* add rx counters on different queues */
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
+ stats->netstats.rx_nohandler += qnstats->rx_nohandler;
+ stats->rx_drop_state += qstats->rx_drop_state;
+ stats->rx_oversize += qstats->rx_oversize;
+ stats->rx_runt += qstats->rx_runt;
+
+ SUM_GRP_COUNTERS(stats, qstats, rx_grp);
+ stats->netstats.rx_packets += qnstats->rx_packets;
+ stats->netstats.rx_bytes += qnstats->rx_bytes;
+ }
+
+ stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
+ stats->netstats.tx_carrier_errors +
+ stats->tx_drop_state + stats->tx_dlid_zero;
+ stats->netstats.tx_dropped = stats->netstats.tx_errors;
+
+ stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
+ stats->netstats.rx_nohandler +
+ stats->rx_drop_state + stats->rx_oversize +
+ stats->rx_runt;
+ stats->netstats.rx_dropped = stats->netstats.rx_errors;
+
+ netdev->stats.tx_packets = stats->netstats.tx_packets;
+ netdev->stats.tx_bytes = stats->netstats.tx_bytes;
+ netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
+ netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
+ netdev->stats.tx_errors = stats->netstats.tx_errors;
+ netdev->stats.tx_dropped = stats->netstats.tx_dropped;
+
+ netdev->stats.rx_packets = stats->netstats.rx_packets;
+ netdev->stats.rx_bytes = stats->netstats.rx_bytes;
+ netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
+ netdev->stats.multicast = stats->rx_grp.mcastbcast;
+ netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
+ netdev->stats.rx_errors = stats->netstats.rx_errors;
+ netdev->stats.rx_dropped = stats->netstats.rx_dropped;
+}
+
+/* update_len_counters - update pkt's len histogram counters */
+static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
+ int len)
+{
+ /* account for 4 byte FCS */
+ if (len >= 1515)
+ grp->s_1519_max++;
+ else if (len >= 1020)
+ grp->s_1024_1518++;
+ else if (len >= 508)
+ grp->s_512_1023++;
+ else if (len >= 252)
+ grp->s_256_511++;
+ else if (len >= 124)
+ grp->s_128_255++;
+ else if (len >= 61)
+ grp->s_65_127++;
+ else
+ grp->s_64++;
+}
+
+/* hfi1_vnic_update_tx_counters - update transmit counters */
+static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.tx_packets++;
+ stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(tx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ tx_grp->mcastbcast++;
+ else
+ tx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ tx_grp->vlan++;
+ else
+ tx_grp->untagged++;
+}
+
+/* hfi1_vnic_update_rx_counters - update receive counters */
+static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.rx_packets++;
+ stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(rx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ rx_grp->mcastbcast++;
+ else
+ rx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ rx_grp->vlan++;
+ else
+ rx_grp->untagged++;
+}
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void hfi1_vnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_update_stats(vinfo, vstats);
+}
+
+static u64 create_bypass_pbc(u32 vl, u32 dw_len)
+{
+ u64 pbc;
+
+ pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
+ | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
+ | PBC_PACKET_BYPASS
+ | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
+ | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
+
+ return pbc;
+}
+
+/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
+static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ netif_stop_subqueue(vinfo->netdev, q_idx);
+ if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
+ return;
+
+ netif_start_subqueue(vinfo->netdev, q_idx);
+}
+
+static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ u8 pad_len, q_idx = skb->queue_mapping;
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct opa_vnic_skb_mdata *mdata;
+ u32 pkt_len, total_len;
+ int err = -EINVAL;
+ u64 pbc;
+
+ v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
+ if (unlikely(!netif_oper_up(netdev))) {
+ vinfo->stats[q_idx].tx_drop_state++;
+ goto tx_finish;
+ }
+
+ /* take out meta data */
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ skb_pull(skb, sizeof(*mdata));
+ if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
+ vinfo->stats[q_idx].tx_dlid_zero++;
+ goto tx_finish;
+ }
+
+ /* add tail padding (for 8 bytes size alignment) and icrc */
+ pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+ pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+ /*
+ * pkt_len is how much data we have to write, includes header and data.
+ * total_len is length of the packet in Dwords plus the PBC should not
+ * include the CRC.
+ */
+ pkt_len = (skb->len + pad_len) >> 2;
+ total_len = pkt_len + 2; /* PBC + packet */
+
+ pbc = create_bypass_pbc(mdata->vl, total_len);
+
+ skb_get(skb);
+ v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
+ err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
+ if (unlikely(err)) {
+ if (err == -ENOMEM)
+ vinfo->stats[q_idx].netstats.tx_fifo_errors++;
+ else if (err != -EBUSY)
+ vinfo->stats[q_idx].netstats.tx_carrier_errors++;
+ }
+ /* remove the header before updating tx counters */
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ if (unlikely(err == -EBUSY)) {
+ hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_BUSY;
+ }
+
+tx_finish:
+ /* update tx counters */
+ hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+static u16 hfi1_vnic_select_queue(struct net_device *netdev,
+ struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ struct opa_vnic_skb_mdata *mdata;
+ struct sdma_engine *sde;
+
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
+ return sde->this_idx;
+}
+
+/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
+static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
+ struct sk_buff *skb)
+{
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
+ int rc = -EFAULT;
+
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ /* Validate Packet length */
+ if (unlikely(skb->len > max_len))
+ vinfo->stats[rxq->idx].rx_oversize++;
+ else if (unlikely(skb->len < ETH_ZLEN))
+ vinfo->stats[rxq->idx].rx_runt++;
+ else
+ rc = 0;
+ return rc;
+}
+
+static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
+{
+ unsigned char *pad_info;
+ struct sk_buff *skb;
+
+ skb = skb_dequeue(&rxq->skbq);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* remove tail padding and icrc */
+ pad_info = skb->data + skb->len - 1;
+ skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
+ ((*pad_info) & 0x7)));
+
+ return skb;
+}
+
+/* hfi1_vnic_handle_rx - handle skb receive */
+static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
+ int *work_done, int work_to_do)
+{
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ struct sk_buff *skb;
+ int rc;
+
+ while (1) {
+ if (*work_done >= work_to_do)
+ break;
+
+ skb = hfi1_vnic_get_skb(rxq);
+ if (unlikely(!skb))
+ break;
+
+ rc = hfi1_vnic_decap_skb(rxq, skb);
+ /* update rx counters */
+ hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
+ if (unlikely(rc)) {
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, rxq->netdev);
+
+ napi_gro_receive(&rxq->napi, skb);
+ (*work_done)++;
+ }
+}
+
+/* hfi1_vnic_napi - napi receive polling callback function */
+static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
+{
+ struct hfi1_vnic_rx_queue *rxq = container_of(napi,
+ struct hfi1_vnic_rx_queue, napi);
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ int work_done = 0;
+
+ v_dbg("napi %d budget %d\n", rxq->idx, budget);
+ hfi1_vnic_handle_rx(rxq, &work_done, budget);
+
+ v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
+ if (work_done < budget)
+ napi_complete(napi);
+
+ return work_done;
+}
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_devdata *dd = packet->rcd->dd;
+ struct hfi1_vnic_vport_info *vinfo = NULL;
+ struct hfi1_vnic_rx_queue *rxq;
+ struct sk_buff *skb;
+ int l4_type, vesw_id = -1;
+ u8 q_idx;
+
+ l4_type = HFI1_GET_L4_TYPE(packet->ebuf);
+ if (likely(l4_type == OPA_VNIC_L4_ETHR)) {
+ vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
+ vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
+
+ /*
+ * In case of invalid vesw id, count the error on
+ * the first available vport.
+ */
+ if (unlikely(!vinfo)) {
+ struct hfi1_vnic_vport_info *vinfo_tmp;
+ int id_tmp = 0;
+
+ vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
+ if (vinfo_tmp) {
+ spin_lock(&vport_cntr_lock);
+ vinfo_tmp->stats[0].netstats.rx_nohandler++;
+ spin_unlock(&vport_cntr_lock);
+ }
+ }
+ }
+
+ if (unlikely(!vinfo)) {
+ dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
+ l4_type, vesw_id, packet->rcd->ctxt);
+ return;
+ }
+
+ q_idx = packet->rcd->vnic_q_idx;
+ rxq = &vinfo->rxq[q_idx];
+ if (unlikely(!netif_oper_up(vinfo->netdev))) {
+ vinfo->stats[q_idx].rx_drop_state++;
+ skb_queue_purge(&rxq->skbq);
+ return;
+ }
+
+ if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
+ vinfo->stats[q_idx].netstats.rx_fifo_errors++;
+ return;
+ }
+
+ skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
+ if (unlikely(!skb)) {
+ vinfo->stats[q_idx].netstats.rx_fifo_errors++;
+ return;
+ }
+
+ memcpy(skb->data, packet->ebuf, packet->tlen);
+ skb_put(skb, packet->tlen);
+ skb_queue_tail(&rxq->skbq, skb);
+
+ if (napi_schedule_prep(&rxq->napi)) {
+ v_dbg("napi %d scheduling\n", q_idx);
+ __napi_schedule(&rxq->napi);
+ }
+}
+
+static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct net_device *netdev = vinfo->netdev;
+ int i, rc;
+
+ /* ensure virtual eth switch id is valid */
+ if (!vinfo->vesw_id)
+ return -EINVAL;
+
+ rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
+ vinfo->vesw_id + 1, GFP_NOWAIT);
+ if (rc < 0)
+ return rc;
+
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ skb_queue_head_init(&rxq->skbq);
+ napi_enable(&rxq->napi);
+ }
+
+ netif_carrier_on(netdev);
+ netif_tx_start_all_queues(netdev);
+ set_bit(HFI1_VNIC_UP, &vinfo->flags);
+
+ return 0;
+}
+
+static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ u8 i;
+
+ clear_bit(HFI1_VNIC_UP, &vinfo->flags);
+ netif_carrier_off(vinfo->netdev);
+ netif_tx_disable(vinfo->netdev);
+ idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
+
+ /* ensure irqs see the change */
+ hfi1_vnic_synchronize_irq(dd);
+
+ /* remove unread skbs */
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ napi_disable(&rxq->napi);
+ skb_queue_purge(&rxq->skbq);
+ }
+}
+
+static int hfi1_netdev_open(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ int rc;
+
+ mutex_lock(&vinfo->lock);
+ rc = hfi1_vnic_up(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return rc;
+}
+
+static int hfi1_netdev_close(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
+ hfi1_vnic_down(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return 0;
+}
+
+static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **vnic_ctxt)
+{
+ int rc;
+
+ rc = allocate_vnic_ctxt(dd, vnic_ctxt);
+ if (rc) {
+ dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
+ return rc;
+ }
+
+ rc = setup_vnic_ctxt(dd, *vnic_ctxt);
+ if (rc) {
+ dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
+ deallocate_vnic_ctxt(dd, *vnic_ctxt);
+ *vnic_ctxt = NULL;
+ }
+
+ return rc;
+}
+
+static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ int i, rc = 0;
+
+ mutex_lock(&hfi1_mutex);
+ if (!dd->vnic.num_vports) {
+ rc = hfi1_vnic_txreq_init(dd);
+ if (rc)
+ goto txreq_fail;
+
+ dd->vnic.msix_idx = dd->first_dyn_msix_idx;
+ }
+
+ for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
+ rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
+ if (rc)
+ break;
+ dd->vnic.ctxt[i]->vnic_q_idx = i;
+ }
+
+ if (i < vinfo->num_rx_q) {
+ /*
+ * If required amount of contexts is not
+ * allocated successfully then remaining contexts
+ * are released.
+ */
+ while (i-- > dd->vnic.num_ctxt) {
+ deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ dd->vnic.ctxt[i] = NULL;
+ }
+ goto alloc_fail;
+ }
+
+ if (dd->vnic.num_ctxt != i) {
+ dd->vnic.num_ctxt = i;
+ hfi1_init_vnic_rsm(dd);
+ }
+
+ dd->vnic.num_vports++;
+ hfi1_vnic_sdma_init(vinfo);
+alloc_fail:
+ if (!dd->vnic.num_vports)
+ hfi1_vnic_txreq_deinit(dd);
+txreq_fail:
+ mutex_unlock(&hfi1_mutex);
+ return rc;
+}
+
+static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ int i;
+
+ mutex_lock(&hfi1_mutex);
+ if (--dd->vnic.num_vports == 0) {
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ dd->vnic.ctxt[i] = NULL;
+ }
+ hfi1_deinit_vnic_rsm(dd);
+ dd->vnic.num_ctxt = 0;
+ hfi1_vnic_txreq_deinit(dd);
+ }
+ mutex_unlock(&hfi1_mutex);
+}
+
+static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ bool reopen = false;
+
+ /*
+ * If vesw_id is being changed, and if the vnic port is up,
+ * reset the vnic port to ensure new vesw_id gets picked up
+ */
+ if (id != vinfo->vesw_id) {
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
+ hfi1_vnic_down(vinfo);
+ reopen = true;
+ }
+
+ vinfo->vesw_id = id;
+ if (reopen)
+ hfi1_vnic_up(vinfo);
+
+ mutex_unlock(&vinfo->lock);
+ }
+}
+
+/* netdev ops */
+static const struct net_device_ops hfi1_netdev_ops = {
+ .ndo_open = hfi1_netdev_open,
+ .ndo_stop = hfi1_netdev_close,
+ .ndo_start_xmit = hfi1_netdev_start_xmit,
+ .ndo_select_queue = hfi1_vnic_select_queue,
+ .ndo_get_stats64 = hfi1_vnic_get_stats64,
+};
+
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+ int i, size, rc;
+
+ if (!port_num || (port_num > dd->num_pports))
+ return ERR_PTR(-EINVAL);
+
+ if (type != RDMA_NETDEV_OPA_VNIC)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
+ netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
+ dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+
+ rn = netdev_priv(netdev);
+ vinfo = opa_vnic_dev_priv(netdev);
+ vinfo->dd = dd;
+ vinfo->num_tx_q = dd->chip_sdma_engines;
+ vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT;
+ vinfo->netdev = netdev;
+ rn->set_id = hfi1_vnic_set_vesw_id;
+
+ netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
+ netdev->hw_features = netdev->features;
+ netdev->vlan_features = netdev->features;
+ netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
+ netdev->netdev_ops = &hfi1_netdev_ops;
+ mutex_init(&vinfo->lock);
+
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ rxq->idx = i;
+ rxq->vinfo = vinfo;
+ rxq->netdev = netdev;
+ netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
+ }
+
+ rc = hfi1_vnic_init(vinfo);
+ if (rc)
+ goto init_fail;
+
+ return netdev;
+init_fail:
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+ return ERR_PTR(rc);
+}
+
+void hfi1_vnic_free_rn(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_deinit(vinfo);
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+}
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
new file mode 100644
index 000000000000..51a817d3aa14
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for VNIC SDMA functionality
+ */
+
+#include "sdma.h"
+#include "vnic.h"
+
+#define HFI1_VNIC_SDMA_Q_ACTIVE BIT(0)
+#define HFI1_VNIC_SDMA_Q_DEFERRED BIT(1)
+
+#define HFI1_VNIC_TXREQ_NAME_LEN 32
+#define HFI1_VNIC_SDMA_DESC_WTRMRK 64
+#define HFI1_VNIC_SDMA_RETRY_COUNT 1
+
+/*
+ * struct vnic_txreq - VNIC transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma: vnic sdma pointer
+ * @skb: skb to send
+ * @pad: pad buffer
+ * @plen: pad length
+ * @pbc_val: pbc value
+ * @retry_count: tx retry count
+ */
+struct vnic_txreq {
+ struct sdma_txreq txreq;
+ struct hfi1_vnic_sdma *sdma;
+
+ struct sk_buff *skb;
+ unsigned char pad[HFI1_VNIC_MAX_PAD];
+ u16 plen;
+ __le64 pbc_val;
+
+ u32 retry_count;
+};
+
+static void vnic_sdma_complete(struct sdma_txreq *txreq,
+ int status)
+{
+ struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
+ struct hfi1_vnic_sdma *vnic_sdma = tx->sdma;
+
+ sdma_txclean(vnic_sdma->dd, txreq);
+ dev_kfree_skb_any(tx->skb);
+ kmem_cache_free(vnic_sdma->dd->vnic.txreq_cache, tx);
+}
+
+static noinline int build_vnic_ulp_payload(struct sdma_engine *sde,
+ struct vnic_txreq *tx)
+{
+ int i, ret = 0;
+
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ tx->skb->data,
+ skb_headlen(tx->skb));
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) {
+ struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i];
+
+ /* combine physically continuous fragments later? */
+ ret = sdma_txadd_page(sde->dd,
+ &tx->txreq,
+ skb_frag_page(frag),
+ frag->page_offset,
+ skb_frag_size(frag));
+ if (unlikely(ret))
+ goto bail_txadd;
+ }
+
+ if (tx->plen)
+ ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
+ tx->pad + HFI1_VNIC_MAX_PAD - tx->plen,
+ tx->plen);
+
+bail_txadd:
+ return ret;
+}
+
+static int build_vnic_tx_desc(struct sdma_engine *sde,
+ struct vnic_txreq *tx,
+ u64 pbc)
+{
+ int ret = 0;
+ u16 hdrbytes = 2 << 2; /* PBC */
+
+ ret = sdma_txinit_ahg(
+ &tx->txreq,
+ 0,
+ hdrbytes + tx->skb->len + tx->plen,
+ 0,
+ 0,
+ NULL,
+ 0,
+ vnic_sdma_complete);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add pbc */
+ tx->pbc_val = cpu_to_le64(pbc);
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ &tx->pbc_val,
+ hdrbytes);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add the ulp payload */
+ ret = build_vnic_ulp_payload(sde, tx);
+bail_txadd:
+ return ret;
+}
+
+/* setup the last plen bypes of pad */
+static inline void hfi1_vnic_update_pad(unsigned char *pad, u8 plen)
+{
+ pad[HFI1_VNIC_MAX_PAD - 1] = plen - OPA_VNIC_ICRC_TAIL_LEN;
+}
+
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+ struct sdma_engine *sde = vnic_sdma->sde;
+ struct vnic_txreq *tx;
+ int ret = -ECOMM;
+
+ if (unlikely(READ_ONCE(vnic_sdma->state) != HFI1_VNIC_SDMA_Q_ACTIVE))
+ goto tx_err;
+
+ if (unlikely(!sde || !sdma_running(sde)))
+ goto tx_err;
+
+ tx = kmem_cache_alloc(dd->vnic.txreq_cache, GFP_ATOMIC);
+ if (unlikely(!tx)) {
+ ret = -ENOMEM;
+ goto tx_err;
+ }
+
+ tx->sdma = vnic_sdma;
+ tx->skb = skb;
+ hfi1_vnic_update_pad(tx->pad, plen);
+ tx->plen = plen;
+ ret = build_vnic_tx_desc(sde, tx, pbc);
+ if (unlikely(ret))
+ goto free_desc;
+ tx->retry_count = 0;
+
+ ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq);
+ /* When -ECOMM, sdma callback will be called with ABORT status */
+ if (unlikely(ret && unlikely(ret != -ECOMM)))
+ goto free_desc;
+
+ return ret;
+
+free_desc:
+ sdma_txclean(dd, &tx->txreq);
+ kmem_cache_free(dd->vnic.txreq_cache, tx);
+tx_err:
+ if (ret != -EBUSY)
+ dev_kfree_skb_any(skb);
+ return ret;
+}
+
+/*
+ * hfi1_vnic_sdma_sleep - vnic sdma sleep function
+ *
+ * This function gets called from sdma_send_txreq() when there are not enough
+ * sdma descriptors available to send the packet. It adds Tx queue's wait
+ * structure to sdma engine's dmawait list to be woken up when descriptors
+ * become available.
+ */
+static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
+ struct iowait *wait,
+ struct sdma_txreq *txreq,
+ unsigned int seq)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait, struct hfi1_vnic_sdma, wait);
+ struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
+ struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
+
+ if (sdma_progress(sde, seq, txreq))
+ if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT)
+ return -EAGAIN;
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
+ write_seqlock(&dev->iowait_lock);
+ if (list_empty(&vnic_sdma->wait.list))
+ list_add_tail(&vnic_sdma->wait.list, &sde->dmawait);
+ write_sequnlock(&dev->iowait_lock);
+ return -EBUSY;
+}
+
+/*
+ * hfi1_vnic_sdma_wakeup - vnic sdma wakeup function
+ *
+ * This function gets called when SDMA descriptors becomes available and Tx
+ * queue's wait structure was previously added to sdma engine's dmawait list.
+ * It notifies the upper driver about Tx queue wakeup.
+ */
+static void hfi1_vnic_sdma_wakeup(struct iowait *wait, int reason)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait, struct hfi1_vnic_sdma, wait);
+ struct hfi1_vnic_vport_info *vinfo = vnic_sdma->vinfo;
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+ if (__netif_subqueue_stopped(vinfo->netdev, vnic_sdma->q_idx))
+ netif_wake_subqueue(vinfo->netdev, vnic_sdma->q_idx);
+};
+
+inline bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+
+ return (READ_ONCE(vnic_sdma->state) == HFI1_VNIC_SDMA_Q_ACTIVE);
+}
+
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ int i;
+
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
+
+ iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
+ hfi1_vnic_sdma_wakeup, NULL);
+ vnic_sdma->sde = &vinfo->dd->per_sdma[i];
+ vnic_sdma->dd = vinfo->dd;
+ vnic_sdma->vinfo = vinfo;
+ vnic_sdma->q_idx = i;
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+
+ /* Add a free descriptor watermark for wakeups */
+ if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
+ INIT_LIST_HEAD(&vnic_sdma->stx.list);
+ vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
+ list_add_tail(&vnic_sdma->stx.list,
+ &vnic_sdma->wait.tx_head);
+ }
+ }
+}
+
+static void hfi1_vnic_txreq_kmem_cache_ctor(void *obj)
+{
+ struct vnic_txreq *tx = (struct vnic_txreq *)obj;
+
+ memset(tx, 0, sizeof(*tx));
+}
+
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd)
+{
+ char buf[HFI1_VNIC_TXREQ_NAME_LEN];
+
+ snprintf(buf, sizeof(buf), "hfi1_%u_vnic_txreq_cache", dd->unit);
+ dd->vnic.txreq_cache = kmem_cache_create(buf,
+ sizeof(struct vnic_txreq),
+ 0, SLAB_HWCACHE_ALIGN,
+ hfi1_vnic_txreq_kmem_cache_ctor);
+ if (!dd->vnic.txreq_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd)
+{
+ kmem_cache_destroy(dd->vnic.txreq_cache);
+ dd->vnic.txreq_cache = NULL;
+}