From 5a52a7acf7e2a812d2852342992cee3dc22ad25d Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Mon, 20 Mar 2017 17:24:58 -0700 Subject: IB/hfi1: NULL pointer dereference when freeing rhashtable A NULL pointer dereference occurs when the driver is unloaded, and the SDMA rhashtable is freed if the rhashtable_init() function has not been called. Prevent this by changing sdma_rht to be a pointer to a dynamically allocated hash table. The NULL-ness of the pointer serves as an indication that the hash table was initialized and that it needs to be destroyed. Fixes: 0cb2aa690c7e ("IB/hfi1: Add sysfs interface for affinity setup") Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1/hfi.h') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 0808e3c3ba39..b69ab4736c86 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1167,7 +1167,7 @@ struct hfi1_devdata { bool eprom_available; /* true if EPROM is available for this device */ bool aspm_supported; /* Does HW support ASPM */ bool aspm_enabled; /* ASPM state: enabled/disabled */ - struct rhashtable sdma_rht; + struct rhashtable *sdma_rht; struct kobject kobj; }; -- cgit v1.2.3-55-g7522 From 5e6e94244bba1eb5be3c5ac9ceb3af87280b56d1 Mon Sep 17 00:00:00 2001 From: Michael J. Ruhl Date: Mon, 20 Mar 2017 17:25:48 -0700 Subject: IB/hfi1: Add a patch value to the firmware version string The HFI firmware now includes a patch level in its version. Updating the necessary code to include the patch version in the firmware string. Reviewed-by: Easwar Hariharan Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 23 +++++++++++++++-------- drivers/infiniband/hw/hfi1/chip.h | 18 +++++++++++------- drivers/infiniband/hw/hfi1/firmware.c | 14 ++++++++------ drivers/infiniband/hw/hfi1/hfi.h | 9 +++++---- drivers/infiniband/hw/hfi1/verbs.c | 14 ++++++++------ 5 files changed, 47 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/hfi.h') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index f9d0d8c09785..77f4b41de2b0 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -7166,7 +7166,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, * set the max_rate field in handle_verify_cap until v0.19. */ if ((dd->icode == ICODE_RTL_SILICON) && - (dd->dc8051_ver < dc8051_ver(0, 19))) { + (dd->dc8051_ver < dc8051_ver(0, 19, 0))) { /* max_rate: 0 = 12.5G, 1 = 25G */ switch (max_rate) { case 0: @@ -7351,7 +7351,7 @@ void handle_verify_cap(struct work_struct *work) } ppd->link_speed_active = 0; /* invalid value */ - if (dd->dc8051_ver < dc8051_ver(0, 20)) { + if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) { /* remote_tx_rate: 0 = 12.5G, 1 = 25G */ switch (remote_tx_rate) { case 0: @@ -8422,7 +8422,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data) int ret; if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || - (dd->dc8051_ver < dc8051_ver(0, 20))) { + (dd->dc8051_ver < dc8051_ver(0, 20, 0))) { if (acquire_lcb_access(dd, 0) == 0) { write_csr(dd, addr, data); release_lcb_access(dd, 0); @@ -8728,13 +8728,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, & REMOTE_DEVICE_REV_MASK; } -void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b) +void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, + u8 *ver_patch) { u32 frame; read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame); - *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK; - *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK; + *ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) & + STS_FM_VERSION_MAJOR_MASK; + *ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) & + STS_FM_VERSION_MINOR_MASK; + + read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame); + *ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) & + STS_FM_VERSION_PATCH_MASK; } static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management, @@ -9130,7 +9137,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) if (ret) goto set_local_link_attributes_fail; - if (dd->dc8051_ver < dc8051_ver(0, 20)) { + if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) { /* set the tx rate to the fastest enabled */ if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G) ppd->local_tx_rate = 1; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 043fd21dc5f3..24df45fc8722 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1,7 +1,7 @@ #ifndef _CHIP_H #define _CHIP_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -394,7 +394,8 @@ #define LAST_REMOTE_STATE_COMPLETE 0x13 #define LINK_QUALITY_INFO 0x14 #define REMOTE_DEVICE_ID 0x15 -#define LINK_DOWN_REASON 0x16 +#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */ +#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */ /* 8051 lane specific register field IDs */ #define TX_EQ_SETTINGS 0x00 @@ -524,10 +525,12 @@ enum { #define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B) /* misc status version fields */ -#define STS_FM_VERSION_A_SHIFT 16 -#define STS_FM_VERSION_A_MASK 0xff -#define STS_FM_VERSION_B_SHIFT 24 -#define STS_FM_VERSION_B_MASK 0xff +#define STS_FM_VERSION_MINOR_SHIFT 16 +#define STS_FM_VERSION_MINOR_MASK 0xff +#define STS_FM_VERSION_MAJOR_SHIFT 24 +#define STS_FM_VERSION_MAJOR_MASK 0xff +#define STS_FM_VERSION_PATCH_SHIFT 24 +#define STS_FM_VERSION_PATCH_MASK 0xff /* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */ #define LCB_CRC_16B 0x0 /* 16b CRC */ @@ -698,7 +701,8 @@ void fabric_serdes_reset(struct hfi1_devdata *dd); int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result); /* chip.c */ -void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b); +void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, + u8 *ver_patch); void read_guid(struct hfi1_devdata *dd); int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 0dd50cdb039a..4042c11b2742 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1004,7 +1004,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd, { u64 reg; int ret; - u8 ver_a, ver_b; + u8 ver_major; + u8 ver_minor; + u8 ver_patch; /* * DC Reset sequence @@ -1073,10 +1075,10 @@ static int load_8051_firmware(struct hfi1_devdata *dd, return -ETIMEDOUT; } - read_misc_status(dd, &ver_a, &ver_b); - dd_dev_info(dd, "8051 firmware version %d.%d\n", - (int)ver_b, (int)ver_a); - dd->dc8051_ver = dc8051_ver(ver_b, ver_a); + read_misc_status(dd, &ver_major, &ver_minor, &ver_patch); + dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", + (int)ver_major, (int)ver_minor, (int)ver_patch); + dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b69ab4736c86..a31638cc30ff 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1020,7 +1020,7 @@ struct hfi1_devdata { u8 qos_shift; u16 irev; /* implementation revision */ - u16 dc8051_ver; /* 8051 firmware version */ + u32 dc8051_ver; /* 8051 firmware version */ spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ struct platform_config platform_config; @@ -1173,9 +1173,10 @@ struct hfi1_devdata { }; /* 8051 firmware version helper */ -#define dc8051_ver(a, b) ((a) << 8 | (b)) -#define dc8051_ver_maj(a) ((a & 0xff00) >> 8) -#define dc8051_ver_min(a) (a & 0x00ff) +#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c)) +#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16) +#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8) +#define dc8051_ver_patch(a) ((a) & 0x0000ff) /* f_put_tid types */ #define PT_EXPECTED 0 diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 8d716547da9d..928918cc7d80 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1236,12 +1236,14 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) static void hfi1_fill_device_attr(struct hfi1_devdata *dd) { struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; - u16 ver = dd->dc8051_ver; + u32 ver = dd->dc8051_ver; memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); - rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) | - (u64)dc8051_ver_min(ver); + rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) | + ((u64)(dc8051_ver_min(ver)) << 16) | + (u64)dc8051_ver_patch(ver); + rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | @@ -1520,10 +1522,10 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct hfi1_ibdev *dev = dev_from_rdi(rdi); - u16 ver = dd_from_dev(dev)->dc8051_ver; + u32 ver = dd_from_dev(dev)->dc8051_ver; - snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver), - dc8051_ver_min(ver)); + snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver), + dc8051_ver_min(ver), dc8051_ver_patch(ver)); } static const char * const driver_cntr_names[] = { -- cgit v1.2.3-55-g7522 From d4829ea6035b89dcddfdcb72d325ca2139f23730 Mon Sep 17 00:00:00 2001 From: Vishwanathapura, Niranjana Date: Wed, 12 Apr 2017 20:29:28 -0700 Subject: IB/hfi1: OPA_VNIC RDMA netdev support Add support to create and free OPA_VNIC rdma netdev devices. Implement netstack interface functionality including xmit_skb, receive side NAPI etc. Also implement rdma netdev control functions. Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Andrzej Kacprowski Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/Makefile | 2 +- drivers/infiniband/hw/hfi1/driver.c | 25 +- drivers/infiniband/hw/hfi1/hfi.h | 27 +- drivers/infiniband/hw/hfi1/init.c | 9 +- drivers/infiniband/hw/hfi1/vnic.h | 153 ++++++++ drivers/infiniband/hw/hfi1/vnic_main.c | 644 +++++++++++++++++++++++++++++++++ 6 files changed, 853 insertions(+), 7 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/vnic.h create mode 100644 drivers/infiniband/hw/hfi1/vnic_main.c (limited to 'drivers/infiniband/hw/hfi1/hfi.h') diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 0cf97a09b64b..22805383de4f 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ - verbs_txreq.o + verbs_txreq.o vnic_main.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o CFLAGS_trace.o = -I$(src) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 64bdbcef5f05..e4dc6a5997e5 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -60,6 +60,7 @@ #include "qp.h" #include "sdma.h" #include "debugfs.h" +#include "vnic.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -1381,15 +1382,31 @@ int process_receive_ib(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } +static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) +{ + /* Packet received in VNIC context via RSM */ + if (packet->rcd->is_vnic) + return true; + + if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) && + (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR)) + return true; + + return false; +} + int process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; - if (unlikely(rhf_err_flags(packet->rhf))) + if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); + } else if (hfi1_is_vnic_packet(packet)) { + hfi1_vnic_bypass_rcv(packet); + return RHF_RCV_CONTINUE; + } - dd_dev_err(dd, - "Bypass packets are not supported in normal operation. Dropping\n"); + dd_dev_err(dd, "Unsupported bypass packet. Dropping\n"); incr_cntr64(&dd->sw_rcv_bypass_packet_errors); if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) { u64 *flits = packet->ebuf; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index a31638cc30ff..f85e8f4eae69 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,7 +1,7 @@ #ifndef _HFI1_KERNEL_H #define _HFI1_KERNEL_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -337,6 +337,12 @@ struct hfi1_ctxtdata { * packets with the wrong interrupt handler. */ int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); + + /* Indicates that this is vnic context */ + bool is_vnic; + + /* vnic queue index this context is mapped to */ + u8 vnic_q_idx; }; /* @@ -808,6 +814,19 @@ struct hfi1_asic_data { struct hfi1_i2c_bus *i2c_bus1; }; +/* + * Number of VNIC contexts used. Ensure it is less than or equal to + * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE). + */ +#define HFI1_NUM_VNIC_CTXT 8 + +/* Virtual NIC information */ +struct hfi1_vnic_data { + struct idr vesw_idr; +}; + +struct hfi1_vnic_vport_info; + /* device data struct now contains only "general per-device" info. * fields related to a physical IB port are in a hfi1_pportdata struct. */ @@ -1115,6 +1134,9 @@ struct hfi1_devdata { send_routine process_dma_send; void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); + int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx, + struct hfi1_vnic_vport_info *vinfo, + struct sk_buff *skb, u64 pbc, u8 plen); /* hfi1_pportdata, points to array of (physical) port-specific * data structs, indexed by pidx (0..n-1) */ @@ -1170,6 +1192,9 @@ struct hfi1_devdata { struct rhashtable *sdma_rht; struct kobject kobj; + + /* vnic data */ + struct hfi1_vnic_data vnic; }; /* 8051 firmware version helper */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 9bfb8ebe28b1..e84f95d50e79 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -65,6 +65,7 @@ #include "verbs.h" #include "aspm.h" #include "affinity.h" +#include "vnic.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -1498,6 +1499,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* do the generic initialization */ initfail = hfi1_init(dd, 0); + /* setup vnic */ + hfi1_vnic_setup(dd); + ret = hfi1_register_ib_device(dd); /* @@ -1575,6 +1579,9 @@ static void remove_one(struct pci_dev *pdev) /* unregister from IB core */ hfi1_unregister_ib_device(dd); + /* cleanup vnic */ + hfi1_vnic_cleanup(dd); + /* * Disable the IB link, disable interrupts on the device, * clear dma engines, etc. diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h new file mode 100644 index 000000000000..04723b1ab246 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -0,0 +1,153 @@ +#ifndef _HFI1_VNIC_H +#define _HFI1_VNIC_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include "hfi.h" + +#define HFI1_VNIC_MAX_TXQ 16 +#define HFI1_VNIC_MAX_PAD 12 + +/* L2 header definitions */ +#define HFI1_L2_TYPE_OFFSET 0x7 +#define HFI1_L2_TYPE_SHFT 0x5 +#define HFI1_L2_TYPE_MASK 0x3 + +#define HFI1_GET_L2_TYPE(hdr) \ + ((*((u8 *)(hdr) + HFI1_L2_TYPE_OFFSET) >> HFI1_L2_TYPE_SHFT) & \ + HFI1_L2_TYPE_MASK) + +/* L4 type definitions */ +#define HFI1_L4_TYPE_OFFSET 8 + +#define HFI1_GET_L4_TYPE(data) \ + (*((u8 *)(data) + HFI1_L4_TYPE_OFFSET)) + +/* L4 header definitions */ +#define HFI1_VNIC_L4_HDR_OFFSET OPA_VNIC_L2_HDR_LEN + +#define HFI1_VNIC_GET_L4_HDR(data) \ + (*((u16 *)((u8 *)(data) + HFI1_VNIC_L4_HDR_OFFSET))) + +#define HFI1_VNIC_GET_VESWID(data) \ + (HFI1_VNIC_GET_L4_HDR(data) & 0xFFF) + +/* Service class */ +#define HFI1_VNIC_SC_OFFSET_LOW 6 +#define HFI1_VNIC_SC_OFFSET_HI 7 +#define HFI1_VNIC_SC_SHIFT 4 + +#define HFI1_VNIC_MAX_QUEUE 16 + +/** + * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue + * @idx: queue index + * @vinfo: pointer to vport information + * @netdev: network device + * @napi: netdev napi structure + * @skbq: queue of received socket buffers + */ +struct hfi1_vnic_rx_queue { + u8 idx; + struct hfi1_vnic_vport_info *vinfo; + struct net_device *netdev; + struct napi_struct napi; + struct sk_buff_head skbq; +}; + +/** + * struct hfi1_vnic_vport_info - HFI1 VNIC virtual port information + * @dd: device data pointer + * @netdev: net device pointer + * @flags: state flags + * @lock: vport lock + * @num_tx_q: number of transmit queues + * @num_rx_q: number of receive queues + * @vesw_id: virtual switch id + * @rxq: Array of receive queues + * @stats: per queue stats + */ +struct hfi1_vnic_vport_info { + struct hfi1_devdata *dd; + struct net_device *netdev; + unsigned long flags; + + /* Lock used around state updates */ + struct mutex lock; + + u8 num_tx_q; + u8 num_rx_q; + u16 vesw_id; + struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT]; + + struct opa_vnic_stats stats[HFI1_VNIC_MAX_QUEUE]; +}; + +#define v_dbg(format, arg...) \ + netdev_dbg(vinfo->netdev, format, ## arg) +#define v_err(format, arg...) \ + netdev_err(vinfo->netdev, format, ## arg) +#define v_info(format, arg...) \ + netdev_info(vinfo->netdev, format, ## arg) + +/* vnic hfi1 internal functions */ +void hfi1_vnic_setup(struct hfi1_devdata *dd); +void hfi1_vnic_cleanup(struct hfi1_devdata *dd); + +void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet); + +/* vnic rdma netdev operations */ +struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)); +void hfi1_vnic_free_rn(struct net_device *netdev); + +#endif /* _HFI1_VNIC_H */ diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c new file mode 100644 index 000000000000..fb23f9ff6bc1 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -0,0 +1,644 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains HFI1 support for VNIC functionality + */ + +#include +#include + +#include "vnic.h" + +#define HFI_TX_TIMEOUT_MS 1000 + +#define HFI1_VNIC_RCV_Q_SIZE 1024 + +#define HFI1_VNIC_UP 0 + +static DEFINE_SPINLOCK(vport_cntr_lock); + +void hfi1_vnic_setup(struct hfi1_devdata *dd) +{ + idr_init(&dd->vnic.vesw_idr); +} + +void hfi1_vnic_cleanup(struct hfi1_devdata *dd) +{ + idr_destroy(&dd->vnic.vesw_idr); +} + +#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ + u64 *src64, *dst64; \ + for (src64 = &qstats->x_grp.unicast, \ + dst64 = &stats->x_grp.unicast; \ + dst64 <= &stats->x_grp.s_1519_max;) { \ + *dst64++ += *src64++; \ + } \ + } while (0) + +/* hfi1_vnic_update_stats - update statistics */ +static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo, + struct opa_vnic_stats *stats) +{ + struct net_device *netdev = vinfo->netdev; + u8 i; + + /* add tx counters on different queues */ + for (i = 0; i < vinfo->num_tx_q; i++) { + struct opa_vnic_stats *qstats = &vinfo->stats[i]; + struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; + + stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors; + stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors; + stats->tx_drop_state += qstats->tx_drop_state; + stats->tx_dlid_zero += qstats->tx_dlid_zero; + + SUM_GRP_COUNTERS(stats, qstats, tx_grp); + stats->netstats.tx_packets += qnstats->tx_packets; + stats->netstats.tx_bytes += qnstats->tx_bytes; + } + + /* add rx counters on different queues */ + for (i = 0; i < vinfo->num_rx_q; i++) { + struct opa_vnic_stats *qstats = &vinfo->stats[i]; + struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; + + stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors; + stats->netstats.rx_nohandler += qnstats->rx_nohandler; + stats->rx_drop_state += qstats->rx_drop_state; + stats->rx_oversize += qstats->rx_oversize; + stats->rx_runt += qstats->rx_runt; + + SUM_GRP_COUNTERS(stats, qstats, rx_grp); + stats->netstats.rx_packets += qnstats->rx_packets; + stats->netstats.rx_bytes += qnstats->rx_bytes; + } + + stats->netstats.tx_errors = stats->netstats.tx_fifo_errors + + stats->netstats.tx_carrier_errors + + stats->tx_drop_state + stats->tx_dlid_zero; + stats->netstats.tx_dropped = stats->netstats.tx_errors; + + stats->netstats.rx_errors = stats->netstats.rx_fifo_errors + + stats->netstats.rx_nohandler + + stats->rx_drop_state + stats->rx_oversize + + stats->rx_runt; + stats->netstats.rx_dropped = stats->netstats.rx_errors; + + netdev->stats.tx_packets = stats->netstats.tx_packets; + netdev->stats.tx_bytes = stats->netstats.tx_bytes; + netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors; + netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors; + netdev->stats.tx_errors = stats->netstats.tx_errors; + netdev->stats.tx_dropped = stats->netstats.tx_dropped; + + netdev->stats.rx_packets = stats->netstats.rx_packets; + netdev->stats.rx_bytes = stats->netstats.rx_bytes; + netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors; + netdev->stats.multicast = stats->rx_grp.mcastbcast; + netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt; + netdev->stats.rx_errors = stats->netstats.rx_errors; + netdev->stats.rx_dropped = stats->netstats.rx_dropped; +} + +/* update_len_counters - update pkt's len histogram counters */ +static inline void update_len_counters(struct opa_vnic_grp_stats *grp, + int len) +{ + /* account for 4 byte FCS */ + if (len >= 1515) + grp->s_1519_max++; + else if (len >= 1020) + grp->s_1024_1518++; + else if (len >= 508) + grp->s_512_1023++; + else if (len >= 252) + grp->s_256_511++; + else if (len >= 124) + grp->s_128_255++; + else if (len >= 61) + grp->s_65_127++; + else + grp->s_64++; +} + +/* hfi1_vnic_update_tx_counters - update transmit counters */ +static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx, struct sk_buff *skb, int err) +{ + struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; + struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp; + u16 vlan_tci; + + stats->netstats.tx_packets++; + stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN; + + update_len_counters(tx_grp, skb->len); + + /* rest of the counts are for good packets only */ + if (unlikely(err)) + return; + + if (is_multicast_ether_addr(mac_hdr->h_dest)) + tx_grp->mcastbcast++; + else + tx_grp->unicast++; + + if (!__vlan_get_tag(skb, &vlan_tci)) + tx_grp->vlan++; + else + tx_grp->untagged++; +} + +/* hfi1_vnic_update_rx_counters - update receive counters */ +static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx, struct sk_buff *skb, int err) +{ + struct ethhdr *mac_hdr = (struct ethhdr *)skb->data; + struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; + struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp; + u16 vlan_tci; + + stats->netstats.rx_packets++; + stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN; + + update_len_counters(rx_grp, skb->len); + + /* rest of the counts are for good packets only */ + if (unlikely(err)) + return; + + if (is_multicast_ether_addr(mac_hdr->h_dest)) + rx_grp->mcastbcast++; + else + rx_grp->unicast++; + + if (!__vlan_get_tag(skb, &vlan_tci)) + rx_grp->vlan++; + else + rx_grp->untagged++; +} + +/* This function is overloaded for opa_vnic specific implementation */ +static void hfi1_vnic_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats; + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + + hfi1_vnic_update_stats(vinfo, vstats); +} + +static u64 create_bypass_pbc(u32 vl, u32 dw_len) +{ + u64 pbc; + + pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT) + | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN + | PBC_PACKET_BYPASS + | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT) + | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT; + + return pbc; +} + +/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */ +static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx) +{ + netif_stop_subqueue(vinfo->netdev, q_idx); +} + +static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + u8 pad_len, q_idx = skb->queue_mapping; + struct hfi1_devdata *dd = vinfo->dd; + struct opa_vnic_skb_mdata *mdata; + u32 pkt_len, total_len; + int err = -EINVAL; + u64 pbc; + + v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len); + if (unlikely(!netif_oper_up(netdev))) { + vinfo->stats[q_idx].tx_drop_state++; + goto tx_finish; + } + + /* take out meta data */ + mdata = (struct opa_vnic_skb_mdata *)skb->data; + skb_pull(skb, sizeof(*mdata)); + if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) { + vinfo->stats[q_idx].tx_dlid_zero++; + goto tx_finish; + } + + /* add tail padding (for 8 bytes size alignment) and icrc */ + pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7; + pad_len += OPA_VNIC_ICRC_TAIL_LEN; + + /* + * pkt_len is how much data we have to write, includes header and data. + * total_len is length of the packet in Dwords plus the PBC should not + * include the CRC. + */ + pkt_len = (skb->len + pad_len) >> 2; + total_len = pkt_len + 2; /* PBC + packet */ + + pbc = create_bypass_pbc(mdata->vl, total_len); + + skb_get(skb); + v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len); + err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len); + if (unlikely(err)) { + if (err == -ENOMEM) + vinfo->stats[q_idx].netstats.tx_fifo_errors++; + else if (err != -EBUSY) + vinfo->stats[q_idx].netstats.tx_carrier_errors++; + } + /* remove the header before updating tx counters */ + skb_pull(skb, OPA_VNIC_HDR_LEN); + + if (unlikely(err == -EBUSY)) { + hfi1_vnic_maybe_stop_tx(vinfo, q_idx); + dev_kfree_skb_any(skb); + return NETDEV_TX_BUSY; + } + +tx_finish: + /* update tx counters */ + hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +static u16 hfi1_vnic_select_queue(struct net_device *netdev, + struct sk_buff *skb, + void *accel_priv, + select_queue_fallback_t fallback) +{ + return 0; +} + +/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */ +static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq, + struct sk_buff *skb) +{ + struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; + int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN; + int rc = -EFAULT; + + skb_pull(skb, OPA_VNIC_HDR_LEN); + + /* Validate Packet length */ + if (unlikely(skb->len > max_len)) + vinfo->stats[rxq->idx].rx_oversize++; + else if (unlikely(skb->len < ETH_ZLEN)) + vinfo->stats[rxq->idx].rx_runt++; + else + rc = 0; + return rc; +} + +static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq) +{ + unsigned char *pad_info; + struct sk_buff *skb; + + skb = skb_dequeue(&rxq->skbq); + if (unlikely(!skb)) + return NULL; + + /* remove tail padding and icrc */ + pad_info = skb->data + skb->len - 1; + skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN - + ((*pad_info) & 0x7))); + + return skb; +} + +/* hfi1_vnic_handle_rx - handle skb receive */ +static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq, + int *work_done, int work_to_do) +{ + struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; + struct sk_buff *skb; + int rc; + + while (1) { + if (*work_done >= work_to_do) + break; + + skb = hfi1_vnic_get_skb(rxq); + if (unlikely(!skb)) + break; + + rc = hfi1_vnic_decap_skb(rxq, skb); + /* update rx counters */ + hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc); + if (unlikely(rc)) { + dev_kfree_skb_any(skb); + continue; + } + + skb_checksum_none_assert(skb); + skb->protocol = eth_type_trans(skb, rxq->netdev); + + napi_gro_receive(&rxq->napi, skb); + (*work_done)++; + } +} + +/* hfi1_vnic_napi - napi receive polling callback function */ +static int hfi1_vnic_napi(struct napi_struct *napi, int budget) +{ + struct hfi1_vnic_rx_queue *rxq = container_of(napi, + struct hfi1_vnic_rx_queue, napi); + struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; + int work_done = 0; + + v_dbg("napi %d budget %d\n", rxq->idx, budget); + hfi1_vnic_handle_rx(rxq, &work_done, budget); + + v_dbg("napi %d work_done %d\n", rxq->idx, work_done); + if (work_done < budget) + napi_complete(napi); + + return work_done; +} + +void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) +{ + struct hfi1_devdata *dd = packet->rcd->dd; + struct hfi1_vnic_vport_info *vinfo = NULL; + struct hfi1_vnic_rx_queue *rxq; + struct sk_buff *skb; + int l4_type, vesw_id = -1; + u8 q_idx; + + l4_type = HFI1_GET_L4_TYPE(packet->ebuf); + if (likely(l4_type == OPA_VNIC_L4_ETHR)) { + vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); + vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); + + /* + * In case of invalid vesw id, count the error on + * the first available vport. + */ + if (unlikely(!vinfo)) { + struct hfi1_vnic_vport_info *vinfo_tmp; + int id_tmp = 0; + + vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp); + if (vinfo_tmp) { + spin_lock(&vport_cntr_lock); + vinfo_tmp->stats[0].netstats.rx_nohandler++; + spin_unlock(&vport_cntr_lock); + } + } + } + + if (unlikely(!vinfo)) { + dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n", + l4_type, vesw_id, packet->rcd->ctxt); + return; + } + + q_idx = packet->rcd->vnic_q_idx; + rxq = &vinfo->rxq[q_idx]; + if (unlikely(!netif_oper_up(vinfo->netdev))) { + vinfo->stats[q_idx].rx_drop_state++; + skb_queue_purge(&rxq->skbq); + return; + } + + if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) { + vinfo->stats[q_idx].netstats.rx_fifo_errors++; + return; + } + + skb = netdev_alloc_skb(vinfo->netdev, packet->tlen); + if (unlikely(!skb)) { + vinfo->stats[q_idx].netstats.rx_fifo_errors++; + return; + } + + memcpy(skb->data, packet->ebuf, packet->tlen); + skb_put(skb, packet->tlen); + skb_queue_tail(&rxq->skbq, skb); + + if (napi_schedule_prep(&rxq->napi)) { + v_dbg("napi %d scheduling\n", q_idx); + __napi_schedule(&rxq->napi); + } +} + +static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + struct net_device *netdev = vinfo->netdev; + int i, rc; + + /* ensure virtual eth switch id is valid */ + if (!vinfo->vesw_id) + return -EINVAL; + + rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id, + vinfo->vesw_id + 1, GFP_NOWAIT); + if (rc < 0) + return rc; + + for (i = 0; i < vinfo->num_rx_q; i++) { + struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; + + skb_queue_head_init(&rxq->skbq); + napi_enable(&rxq->napi); + } + + netif_carrier_on(netdev); + netif_tx_start_all_queues(netdev); + set_bit(HFI1_VNIC_UP, &vinfo->flags); + + return 0; +} + +static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + u8 i; + + clear_bit(HFI1_VNIC_UP, &vinfo->flags); + netif_carrier_off(vinfo->netdev); + netif_tx_disable(vinfo->netdev); + idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); + + /* remove unread skbs */ + for (i = 0; i < vinfo->num_rx_q; i++) { + struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; + + napi_disable(&rxq->napi); + skb_queue_purge(&rxq->skbq); + } +} + +static int hfi1_netdev_open(struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + int rc; + + mutex_lock(&vinfo->lock); + rc = hfi1_vnic_up(vinfo); + mutex_unlock(&vinfo->lock); + return rc; +} + +static int hfi1_netdev_close(struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + + mutex_lock(&vinfo->lock); + if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) + hfi1_vnic_down(vinfo); + mutex_unlock(&vinfo->lock); + return 0; +} + +static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + bool reopen = false; + + /* + * If vesw_id is being changed, and if the vnic port is up, + * reset the vnic port to ensure new vesw_id gets picked up + */ + if (id != vinfo->vesw_id) { + mutex_lock(&vinfo->lock); + if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) { + hfi1_vnic_down(vinfo); + reopen = true; + } + + vinfo->vesw_id = id; + if (reopen) + hfi1_vnic_up(vinfo); + + mutex_unlock(&vinfo->lock); + } +} + +/* netdev ops */ +static const struct net_device_ops hfi1_netdev_ops = { + .ndo_open = hfi1_netdev_open, + .ndo_stop = hfi1_netdev_close, + .ndo_start_xmit = hfi1_netdev_start_xmit, + .ndo_select_queue = hfi1_vnic_select_queue, + .ndo_get_stats64 = hfi1_vnic_get_stats64, +}; + +struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)) +{ + struct hfi1_devdata *dd = dd_from_ibdev(device); + struct hfi1_vnic_vport_info *vinfo; + struct net_device *netdev; + struct rdma_netdev *rn; + int i, size; + + if (!port_num || (port_num > dd->num_pports)) + return ERR_PTR(-EINVAL); + + if (type != RDMA_NETDEV_OPA_VNIC) + return ERR_PTR(-EOPNOTSUPP); + + size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); + netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, + dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT); + if (!netdev) + return ERR_PTR(-ENOMEM); + + rn = netdev_priv(netdev); + vinfo = opa_vnic_dev_priv(netdev); + vinfo->dd = dd; + vinfo->num_tx_q = dd->chip_sdma_engines; + vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT; + vinfo->netdev = netdev; + rn->set_id = hfi1_vnic_set_vesw_id; + + netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG; + netdev->hw_features = netdev->features; + netdev->vlan_features = netdev->features; + netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS); + netdev->netdev_ops = &hfi1_netdev_ops; + mutex_init(&vinfo->lock); + + for (i = 0; i < vinfo->num_rx_q; i++) { + struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; + + rxq->idx = i; + rxq->vinfo = vinfo; + rxq->netdev = netdev; + netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64); + } + + return netdev; +} + +void hfi1_vnic_free_rn(struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + + mutex_destroy(&vinfo->lock); + free_netdev(netdev); +} -- cgit v1.2.3-55-g7522 From 2280740f01aee0883a2885f332aee27449390a4b Mon Sep 17 00:00:00 2001 From: Vishwanathapura, Niranjana Date: Wed, 12 Apr 2017 20:29:29 -0700 Subject: IB/hfi1: Virtual Network Interface Controller (VNIC) HW support HFI1 HW specific support for VNIC functionality. Dynamically allocate a set of contexts for VNIC when the first vnic port is instantiated. Allocate VNIC contexts from user contexts pool and return them back to the same pool while freeing up. Set aside enough MSI-X interrupts for VNIC contexts and assign them when the contexts are allocated. On the receive side, use an RSM rule to spread TCP/UDP streams among VNIC contexts. Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Andrzej Kacprowski Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/aspm.h | 15 +- drivers/infiniband/hw/hfi1/chip.c | 291 +++++++++++++++++++++++++----- drivers/infiniband/hw/hfi1/chip.h | 2 + drivers/infiniband/hw/hfi1/debugfs.c | 10 +- drivers/infiniband/hw/hfi1/driver.c | 52 ++++-- drivers/infiniband/hw/hfi1/file_ops.c | 27 ++- drivers/infiniband/hw/hfi1/hfi.h | 29 ++- drivers/infiniband/hw/hfi1/init.c | 29 +-- drivers/infiniband/hw/hfi1/mad.c | 10 +- drivers/infiniband/hw/hfi1/pio.c | 19 +- drivers/infiniband/hw/hfi1/pio.h | 8 +- drivers/infiniband/hw/hfi1/sysfs.c | 4 +- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 8 +- drivers/infiniband/hw/hfi1/user_pages.c | 5 +- drivers/infiniband/hw/hfi1/verbs.c | 6 +- drivers/infiniband/hw/hfi1/vnic.h | 3 + drivers/infiniband/hw/hfi1/vnic_main.c | 245 ++++++++++++++++++++++++- include/rdma/opa_port_info.h | 3 +- 18 files changed, 661 insertions(+), 105 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/hfi.h') diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 0d58fe3b49b5..794e6814a531 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long data) spin_unlock_irqrestore(&rcd->aspm_lock, flags); } -/* Disable interrupt processing for verbs contexts when PSM contexts are open */ +/* + * Disable interrupt processing for verbs contexts when PSM or VNIC contexts + * are open. + */ static inline void aspm_disable_all(struct hfi1_devdata *dd) { struct hfi1_ctxtdata *rcd; unsigned long flags; unsigned i; - for (i = 0; i < dd->first_user_ctxt; i++) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { rcd = dd->rcd[i]; del_timer_sync(&rcd->aspm_timer); spin_lock_irqsave(&rcd->aspm_lock, flags); @@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd) if (aspm_mode != ASPM_MODE_DYNAMIC) return; - for (i = 0; i < dd->first_user_ctxt; i++) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { rcd = dd->rcd[i]; spin_lock_irqsave(&rcd->aspm_lock, flags); rcd->aspm_intr_enable = true; @@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) (unsigned long)rcd); rcd->aspm_intr_supported = rcd->dd->aspm_supported && aspm_mode == ASPM_MODE_DYNAMIC && - rcd->ctxt < rcd->dd->first_user_ctxt; + rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt; } static inline void aspm_init(struct hfi1_devdata *dd) @@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd) spin_lock_init(&dd->aspm_lock); dd->aspm_supported = aspm_hw_l1_supported(dd); - for (i = 0; i < dd->first_user_ctxt; i++) + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) aspm_ctx_init(dd->rcd[i]); /* Start with ASPM disabled */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 79a316acb8f4..e520929ac501 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -126,9 +126,16 @@ struct flag_table { #define DEFAULT_KRCVQS 2 #define MIN_KERNEL_KCTXTS 2 #define FIRST_KERNEL_KCTXT 1 -/* sizes for both the QP and RSM map tables */ -#define NUM_MAP_ENTRIES 256 -#define NUM_MAP_REGS 32 + +/* + * RSM instance allocation + * 0 - Verbs + * 1 - User Fecn Handling + * 2 - Vnic + */ +#define RSM_INS_VERBS 0 +#define RSM_INS_FECN 1 +#define RSM_INS_VNIC 2 /* Bit offset into the GUID which carries HFI id information */ #define GUID_HFI_INDEX_SHIFT 39 @@ -139,8 +146,7 @@ struct flag_table { #define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3) #define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4) -/* RSM fields */ - +/* RSM fields for Verbs */ /* packet type */ #define IB_PACKET_TYPE 2ull #define QW_SHIFT 6ull @@ -170,6 +176,28 @@ struct flag_table { /* QPN[m+n:1] QW 1, OFFSET 1 */ #define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull)) +/* RSM fields for Vnic */ +/* L2_TYPE: QW 0, OFFSET 61 - for match */ +#define L2_TYPE_QW 0ull +#define L2_TYPE_BIT_OFFSET 61ull +#define L2_TYPE_OFFSET(off) ((L2_TYPE_QW << QW_SHIFT) | (off)) +#define L2_TYPE_MATCH_OFFSET L2_TYPE_OFFSET(L2_TYPE_BIT_OFFSET) +#define L2_TYPE_MASK 3ull +#define L2_16B_VALUE 2ull + +/* L4_TYPE QW 1, OFFSET 0 - for match */ +#define L4_TYPE_QW 1ull +#define L4_TYPE_BIT_OFFSET 0ull +#define L4_TYPE_OFFSET(off) ((L4_TYPE_QW << QW_SHIFT) | (off)) +#define L4_TYPE_MATCH_OFFSET L4_TYPE_OFFSET(L4_TYPE_BIT_OFFSET) +#define L4_16B_TYPE_MASK 0xFFull +#define L4_16B_ETH_VALUE 0x78ull + +/* 16B VESWID - for select */ +#define L4_16B_HDR_VESWID_OFFSET ((2 << QW_SHIFT) | (16ull)) +/* 16B ENTROPY - for select */ +#define L2_16B_ENTROPY_OFFSET ((1 << QW_SHIFT) | (32ull)) + /* defines to build power on SC2VL table */ #define SC2VL_VAL( \ num, \ @@ -1047,6 +1075,7 @@ static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms); +static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index); /* * Error interrupt table entry. This is used as input to the interrupt @@ -6703,7 +6732,13 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) int i; /* enable all kernel contexts */ - for (i = 0; i < dd->n_krcv_queues; i++) { + for (i = 0; i < dd->num_rcv_contexts; i++) { + struct hfi1_ctxtdata *rcd = dd->rcd[i]; + + /* Ensure all non-user contexts(including vnic) are enabled */ + if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) + continue; + rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? @@ -8000,7 +8035,9 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) if (likely(source < dd->num_rcv_contexts)) { rcd = dd->rcd[source]; if (rcd) { - if (source < dd->first_user_ctxt) + /* Check for non-user contexts, including vnic */ + if ((source < dd->first_dyn_alloc_ctxt) || + (rcd->sc && (rcd->sc->type == SC_KERNEL))) rcd->do_interrupt(rcd, 0); else handle_user_interrupt(rcd); @@ -8028,7 +8065,8 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) rcd = dd->rcd[source]; if (rcd) { /* only pay attention to user urgent interrupts */ - if (source >= dd->first_user_ctxt) + if ((source >= dd->first_dyn_alloc_ctxt) && + (!rcd->sc || (rcd->sc->type == SC_USER))) handle_user_interrupt(rcd); return; /* OK */ } @@ -12842,7 +12880,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd) first_sdma = last_general; last_sdma = first_sdma + dd->num_sdma; first_rx = last_sdma; - last_rx = first_rx + dd->n_krcv_queues; + last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; + + /* VNIC MSIx interrupts get mapped when VNIC contexts are created */ + dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues; /* * Sanity check - the code expects all SDMA chip source @@ -12856,7 +12897,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) const char *err_info; irq_handler_t handler; irq_handler_t thread = NULL; - void *arg; + void *arg = NULL; int idx; struct hfi1_ctxtdata *rcd = NULL; struct sdma_engine *sde = NULL; @@ -12883,24 +12924,25 @@ static int request_msix_irqs(struct hfi1_devdata *dd) } else if (first_rx <= i && i < last_rx) { idx = i - first_rx; rcd = dd->rcd[idx]; - /* no interrupt if no rcd */ - if (!rcd) - continue; - /* - * Set the interrupt register and mask for this - * context's interrupt. - */ - rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; - rcd->imask = ((u64)1) << - ((IS_RCVAVAIL_START + idx) % 64); - handler = receive_context_interrupt; - thread = receive_context_thread; - arg = rcd; - snprintf(me->name, sizeof(me->name), - DRIVER_NAME "_%d kctxt%d", dd->unit, idx); - err_info = "receive context"; - remap_intr(dd, IS_RCVAVAIL_START + idx, i); - me->type = IRQ_RCVCTXT; + if (rcd) { + /* + * Set the interrupt register and mask for this + * context's interrupt. + */ + rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; + rcd->imask = ((u64)1) << + ((IS_RCVAVAIL_START + idx) % 64); + handler = receive_context_interrupt; + thread = receive_context_thread; + arg = rcd; + snprintf(me->name, sizeof(me->name), + DRIVER_NAME "_%d kctxt%d", + dd->unit, idx); + err_info = "receive context"; + remap_intr(dd, IS_RCVAVAIL_START + idx, i); + me->type = IRQ_RCVCTXT; + rcd->msix_intr = i; + } } else { /* not in our expected range - complain, then * ignore it @@ -12938,6 +12980,84 @@ static int request_msix_irqs(struct hfi1_devdata *dd) return ret; } +void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) +{ + int i; + + if (!dd->num_msix_entries) { + synchronize_irq(dd->pcidev->irq); + return; + } + + for (i = 0; i < dd->vnic.num_ctxt; i++) { + struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; + struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; + + synchronize_irq(me->msix.vector); + } +} + +void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; + + if (!me->arg) /* => no irq, no affinity */ + return; + + hfi1_put_irq_affinity(dd, me); + free_irq(me->msix.vector, me->arg); + + me->arg = NULL; +} + +void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + struct hfi1_msix_entry *me; + int idx = rcd->ctxt; + void *arg = rcd; + int ret; + + rcd->msix_intr = dd->vnic.msix_idx++; + me = &dd->msix_entries[rcd->msix_intr]; + + /* + * Set the interrupt register and mask for this + * context's interrupt. + */ + rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; + rcd->imask = ((u64)1) << + ((IS_RCVAVAIL_START + idx) % 64); + + snprintf(me->name, sizeof(me->name), + DRIVER_NAME "_%d kctxt%d", dd->unit, idx); + me->name[sizeof(me->name) - 1] = 0; + me->type = IRQ_RCVCTXT; + + remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); + + ret = request_threaded_irq(me->msix.vector, receive_context_interrupt, + receive_context_thread, 0, me->name, arg); + if (ret) { + dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n", + me->msix.vector, idx, ret); + return; + } + /* + * assign arg after request_irq call, so it will be + * cleaned up + */ + me->arg = arg; + + ret = hfi1_get_irq_affinity(dd, me); + if (ret) { + dd_dev_err(dd, + "unable to pin IRQ %d\n", ret); + free_irq(me->msix.vector, me->arg); + } +} + /* * Set the general handler to accept all interrupts, remap all * chip interrupts back to MSI-X 0. @@ -12969,7 +13089,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd) * N interrupts - one per used SDMA engine * M interrupt - one per kernel receive context */ - total = 1 + dd->num_sdma + dd->n_krcv_queues; + total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; entries = kcalloc(total, sizeof(*entries), GFP_KERNEL); if (!entries) { @@ -13034,7 +13154,8 @@ fail: * * num_rcv_contexts - number of contexts being used * n_krcv_queues - number of kernel contexts - * first_user_ctxt - first non-kernel context in array of contexts + * first_dyn_alloc_ctxt - first dynamically allocated context + * in array of contexts * freectxts - number of free user contexts * num_send_contexts - number of PIO send contexts being used */ @@ -13111,10 +13232,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd) total_contexts = num_kernel_contexts + num_user_contexts; } - /* the first N are kernel contexts, the rest are user contexts */ + /* Accommodate VNIC contexts */ + if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts) + total_contexts += HFI1_NUM_VNIC_CTXT; + + /* the first N are kernel contexts, the rest are user/vnic contexts */ dd->num_rcv_contexts = total_contexts; dd->n_krcv_queues = num_kernel_contexts; - dd->first_user_ctxt = num_kernel_contexts; + dd->first_dyn_alloc_ctxt = num_kernel_contexts; dd->num_user_contexts = num_user_contexts; dd->freectxts = num_user_contexts; dd_dev_info(dd, @@ -13570,11 +13695,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd) write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0); for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++) write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0); - for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) { - write_csr(dd, RCV_RSM_CFG + (8 * i), 0); - write_csr(dd, RCV_RSM_SELECT + (8 * i), 0); - write_csr(dd, RCV_RSM_MATCH + (8 * i), 0); - } + for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) + clear_rsm_rule(dd, i); for (i = 0; i < 32; i++) write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0); @@ -13933,6 +14055,16 @@ static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index, (u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT); } +/* + * Clear a receive side mapping rule. + */ +static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index) +{ + write_csr(dd, RCV_RSM_CFG + (8 * rule_index), 0); + write_csr(dd, RCV_RSM_SELECT + (8 * rule_index), 0); + write_csr(dd, RCV_RSM_MATCH + (8 * rule_index), 0); +} + /* return the number of RSM map table entries that will be used for QOS */ static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np) @@ -14048,7 +14180,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt) rrd.value2 = LRH_SC_VALUE; /* add rule 0 */ - add_rsm_rule(dd, 0, &rrd); + add_rsm_rule(dd, RSM_INS_VERBS, &rrd); /* mark RSM map entries as used */ rmt->used += rmt_entries; @@ -14078,7 +14210,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, /* * RSM will extract the destination context as an index into the * map table. The destination contexts are a sequential block - * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive). + * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive). * Map entries are accessed as offset + extracted value. Adjust * the added offset so this sequence can be placed anywhere in * the table - as long as the entries themselves do not wrap. @@ -14086,9 +14218,9 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, * start with that to allow for a "negative" offset. */ offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used - - (int)dd->first_user_ctxt); + (int)dd->first_dyn_alloc_ctxt); - for (i = dd->first_user_ctxt, idx = rmt->used; + for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used; i < dd->num_rcv_contexts; i++, idx++) { /* replace with identity mapping */ regoff = (idx % 8) * 8; @@ -14122,11 +14254,84 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, rrd.value2 = 1; /* add rule 1 */ - add_rsm_rule(dd, 1, &rrd); + add_rsm_rule(dd, RSM_INS_FECN, &rrd); rmt->used += dd->num_user_contexts; } +/* Initialize RSM for VNIC */ +void hfi1_init_vnic_rsm(struct hfi1_devdata *dd) +{ + u8 i, j; + u8 ctx_id = 0; + u64 reg; + u32 regoff; + struct rsm_rule_data rrd; + + if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) { + dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n", + dd->vnic.rmt_start); + return; + } + + dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n", + dd->vnic.rmt_start, + dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES); + + /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */ + regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8; + reg = read_csr(dd, regoff); + for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) { + /* Update map register with vnic context */ + j = (dd->vnic.rmt_start + i) % 8; + reg &= ~(0xffllu << (j * 8)); + reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8); + /* Wrap up vnic ctx index */ + ctx_id %= dd->vnic.num_ctxt; + /* Write back map register */ + if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) { + dev_dbg(&(dd)->pcidev->dev, + "Vnic rsm map reg[%d] =0x%llx\n", + regoff - RCV_RSM_MAP_TABLE, reg); + + write_csr(dd, regoff, reg); + regoff += 8; + if (i < (NUM_VNIC_MAP_ENTRIES - 1)) + reg = read_csr(dd, regoff); + } + } + + /* Add rule for vnic */ + rrd.offset = dd->vnic.rmt_start; + rrd.pkt_type = 4; + /* Match 16B packets */ + rrd.field1_off = L2_TYPE_MATCH_OFFSET; + rrd.mask1 = L2_TYPE_MASK; + rrd.value1 = L2_16B_VALUE; + /* Match ETH L4 packets */ + rrd.field2_off = L4_TYPE_MATCH_OFFSET; + rrd.mask2 = L4_16B_TYPE_MASK; + rrd.value2 = L4_16B_ETH_VALUE; + /* Calc context from veswid and entropy */ + rrd.index1_off = L4_16B_HDR_VESWID_OFFSET; + rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES); + rrd.index2_off = L2_16B_ENTROPY_OFFSET; + rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES); + add_rsm_rule(dd, RSM_INS_VNIC, &rrd); + + /* Enable RSM if not already enabled */ + add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); +} + +void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd) +{ + clear_rsm_rule(dd, RSM_INS_VNIC); + + /* Disable RSM if used only by vnic */ + if (dd->vnic.rmt_start == 0) + clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); +} + static void init_rxe(struct hfi1_devdata *dd) { struct rsm_map_table *rmt; @@ -14139,6 +14344,8 @@ static void init_rxe(struct hfi1_devdata *dd) init_qos(dd, rmt); init_user_fecn_handling(dd, rmt); complete_rsm_map_table(dd, rmt); + /* record number of used rsm map entries for vnic */ + dd->vnic.rmt_start = rmt->used; kfree(rmt); /* diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 24df45fc8722..b9dbf16d7703 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1362,6 +1362,8 @@ int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt); int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey); int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt); void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); +void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); +void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd); /* * Interrupt source table. diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index dc2c1c993f04..e9fa3c293e42 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1,6 +1,6 @@ #ifdef CONFIG_DEBUG_FS /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -174,7 +174,7 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v) struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; struct hfi1_devdata *dd = dd_from_dev(ibd); - for (j = 0; j < dd->first_user_ctxt; j++) { + for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { if (!dd->rcd[j]) continue; n_packets += dd->rcd[j]->opstats->stats[i].n_packets; @@ -200,7 +200,7 @@ static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos) if (!*pos) return SEQ_START_TOKEN; - if (*pos >= dd->first_user_ctxt) + if (*pos >= dd->first_dyn_alloc_ctxt) return NULL; return pos; } @@ -214,7 +214,7 @@ static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) return pos; ++*pos; - if (*pos >= dd->first_user_ctxt) + if (*pos >= dd->first_dyn_alloc_ctxt) return NULL; return pos; } @@ -1099,7 +1099,7 @@ static int _fault_stats_seq_show(struct seq_file *s, void *v) struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; struct hfi1_devdata *dd = dd_from_dev(ibd); - for (j = 0; j < dd->first_user_ctxt; j++) { + for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { if (!dd->rcd[j]) continue; n_packets += dd->rcd[j]->opstats->stats[i].n_packets; diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index e4dc6a5997e5..6b3869529d5e 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -874,20 +874,42 @@ bail: return last; } -static inline void set_all_nodma_rtail(struct hfi1_devdata *dd) +static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt) { int i; - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) + /* + * For dynamically allocated kernel contexts (like vnic) switch + * interrupt handler only for that context. Otherwise, switch + * interrupt handler for all statically allocated kernel contexts. + */ + if (ctxt >= dd->first_dyn_alloc_ctxt) { + dd->rcd[ctxt]->do_interrupt = + &handle_receive_interrupt_nodma_rtail; + return; + } + + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) dd->rcd[i]->do_interrupt = &handle_receive_interrupt_nodma_rtail; } -static inline void set_all_dma_rtail(struct hfi1_devdata *dd) +static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt) { int i; - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) + /* + * For dynamically allocated kernel contexts (like vnic) switch + * interrupt handler only for that context. Otherwise, switch + * interrupt handler for all statically allocated kernel contexts. + */ + if (ctxt >= dd->first_dyn_alloc_ctxt) { + dd->rcd[ctxt]->do_interrupt = + &handle_receive_interrupt_dma_rtail; + return; + } + + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) dd->rcd[i]->do_interrupt = &handle_receive_interrupt_dma_rtail; } @@ -897,8 +919,13 @@ void set_all_slowpath(struct hfi1_devdata *dd) int i; /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) - dd->rcd[i]->do_interrupt = &handle_receive_interrupt; + for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { + struct hfi1_ctxtdata *rcd = dd->rcd[i]; + + if ((i < dd->first_dyn_alloc_ctxt) || + (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL))) + rcd->do_interrupt = &handle_receive_interrupt; + } } static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, @@ -1008,7 +1035,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) last = RCV_PKT_DONE; if (needset) { dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n"); - set_all_nodma_rtail(dd); + set_nodma_rtail(dd, rcd->ctxt); needset = 0; } } else { @@ -1030,7 +1057,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) if (needset) { dd_dev_info(dd, "Switching to DMA_RTAIL\n"); - set_all_dma_rtail(dd); + set_dma_rtail(dd, rcd->ctxt); needset = 0; } } @@ -1079,10 +1106,10 @@ void receive_interrupt_work(struct work_struct *work) set_link_state(ppd, HLS_UP_ACTIVE); /* - * Interrupt all kernel contexts that could have had an - * interrupt during auto activation. + * Interrupt all statically allocated kernel contexts that could + * have had an interrupt during auto activation. */ - for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++) + for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) force_recv_intr(dd->rcd[i]); } @@ -1296,7 +1323,8 @@ int hfi1_reset_device(int unit) spin_lock_irqsave(&dd->uctxt_lock, flags); if (dd->rcd) - for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) { + for (i = dd->first_dyn_alloc_ctxt; + i < dd->num_rcv_contexts; i++) { if (!dd->rcd[i] || !dd->rcd[i]->cnt) continue; spin_unlock_irqrestore(&dd->uctxt_lock, flags); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index f78c739b330a..60598867f948 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -586,8 +586,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) * knows where it's own bitmap is within the page. */ memaddr = (unsigned long)(dd->events + - ((uctxt->ctxt - dd->first_user_ctxt) * - HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * + HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; memlen = PAGE_SIZE; /* * v3.7 removes VM_RESERVED but the effect is kept by @@ -756,7 +756,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) * Clear any left over, unhandled events so the next process that * gets this context doesn't get confused. */ - ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * + ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; *ev = 0; @@ -909,12 +909,18 @@ static int find_shared_ctxt(struct file *fp, if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase)) continue; - for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) { + for (i = dd->first_dyn_alloc_ctxt; + i < dd->num_rcv_contexts; i++) { struct hfi1_ctxtdata *uctxt = dd->rcd[i]; /* Skip ctxts which are not yet open */ if (!uctxt || !uctxt->cnt) continue; + + /* Skip dynamically allocted kernel contexts */ + if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) + continue; + /* Skip ctxt if it doesn't match the requested one */ if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) || @@ -960,7 +966,8 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, return -EIO; } - for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) + for (ctxt = dd->first_dyn_alloc_ctxt; + ctxt < dd->num_rcv_contexts; ctxt++) if (!dd->rcd[ctxt]) break; @@ -1306,7 +1313,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len) */ binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, fd->subctxt, 0); - offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) * + offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + fd->subctxt) * sizeof(*dd->events)); binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, @@ -1400,12 +1407,12 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) } spin_lock_irqsave(&dd->uctxt_lock, flags); - for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; + for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) { uctxt = dd->rcd[ctxt]; if (uctxt) { unsigned long *evs = dd->events + - (uctxt->ctxt - dd->first_user_ctxt) * + (uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS; int i; /* @@ -1477,7 +1484,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, if (!dd->events) return 0; - evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * + evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + subctxt; for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index f85e8f4eae69..a12bb462d83f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -66,6 +67,7 @@ #include #include #include +#include #include #include "chip_registers.h" @@ -278,6 +280,8 @@ struct hfi1_ctxtdata { struct hfi1_devdata *dd; /* so functions that need physical port can get it easily */ struct hfi1_pportdata *ppd; + /* associated msix interrupt */ + u32 msix_intr; /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ void *subctxt_uregbase; /* An array of pages for the eager receive buffers * N */ @@ -814,15 +818,27 @@ struct hfi1_asic_data { struct hfi1_i2c_bus *i2c_bus1; }; +/* sizes for both the QP and RSM map tables */ +#define NUM_MAP_ENTRIES 256 +#define NUM_MAP_REGS 32 + /* * Number of VNIC contexts used. Ensure it is less than or equal to * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE). */ #define HFI1_NUM_VNIC_CTXT 8 +/* Number of VNIC RSM entries */ +#define NUM_VNIC_MAP_ENTRIES 8 + /* Virtual NIC information */ struct hfi1_vnic_data { + struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT]; + u8 num_vports; struct idr vesw_idr; + u8 rmt_start; + u8 num_ctxt; + u32 msix_idx; }; struct hfi1_vnic_vport_info; @@ -1050,6 +1066,7 @@ struct hfi1_devdata { /* MSI-X information */ struct hfi1_msix_entry *msix_entries; u32 num_msix_entries; + u32 first_dyn_msix_idx; /* INTx information */ u32 requested_intx_irq; /* did we request one? */ @@ -1148,8 +1165,8 @@ struct hfi1_devdata { u16 flags; /* Number of physical ports available */ u8 num_pports; - /* Lowest context number which can be used by user processes */ - u8 first_user_ctxt; + /* Lowest context number which can be used by user processes or VNIC */ + u8 first_dyn_alloc_ctxt; /* adding a new field here would make it part of this cacheline */ /* seqlock for sc2vl */ @@ -1197,6 +1214,11 @@ struct hfi1_devdata { struct hfi1_vnic_data vnic; }; +static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) +{ + return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES; +} + /* 8051 firmware version helper */ #define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c)) #define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16) @@ -1261,6 +1283,9 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *, int); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); void set_all_slowpath(struct hfi1_devdata *dd); +void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd); +void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd); +void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd); extern const struct pci_device_id hfi1_pci_tbl[]; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e84f95d50e79..de2eec40f2a0 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -140,7 +140,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) goto nomem; /* create one or more kernel contexts */ - for (i = 0; i < dd->first_user_ctxt; ++i) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { struct hfi1_pportdata *ppd; struct hfi1_ctxtdata *rcd; @@ -215,9 +215,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, u32 base; if (dd->rcv_entries.nctxt_extra > - dd->num_rcv_contexts - dd->first_user_ctxt) + dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt) kctxt_ngroups = (dd->rcv_entries.nctxt_extra - - (dd->num_rcv_contexts - dd->first_user_ctxt)); + (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)); rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa); if (rcd) { u32 rcvtids, max_entries; @@ -239,10 +239,10 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, * Calculate the context's RcvArray entry starting point. * We do this here because we have to take into account all * the RcvArray entries that previous context would have - * taken and we have to account for any extra groups - * assigned to the kernel or user contexts. + * taken and we have to account for any extra groups assigned + * to the static (kernel) or dynamic (vnic/user) contexts. */ - if (ctxt < dd->first_user_ctxt) { + if (ctxt < dd->first_dyn_alloc_ctxt) { if (ctxt < kctxt_ngroups) { base = ctxt * (dd->rcv_entries.ngroups + 1); rcd->rcv_array_groups++; @@ -250,7 +250,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, base = kctxt_ngroups + (ctxt * dd->rcv_entries.ngroups); } else { - u16 ct = ctxt - dd->first_user_ctxt; + u16 ct = ctxt - dd->first_dyn_alloc_ctxt; base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) + kctxt_ngroups); @@ -323,7 +323,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, } rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE; - if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ + /* Applicable only for statically created kernel contexts */ + if (ctxt < dd->first_dyn_alloc_ctxt) { rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), GFP_KERNEL, numa); if (!rcd->opstats) @@ -586,7 +587,7 @@ static void enable_chip(struct hfi1_devdata *dd) * Enable kernel ctxts' receive and receive interrupt. * Other ctxts done as user opens and initializes them. */ - for (i = 0; i < dd->first_user_ctxt; ++i) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB; rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; @@ -715,7 +716,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) } /* dd->rcd can be NULL if early initialization failed */ - for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { + for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) { /* * Set up the (kernel) rcvhdr queue and egr TIDs. If doing * re-init, the simplest way to handle this is to free @@ -1535,6 +1536,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) hfi1_device_remove(dd); if (!ret) hfi1_unregister_ib_device(dd); + hfi1_vnic_cleanup(dd); postinit_cleanup(dd); if (initfail) ret = initfail; @@ -1621,8 +1623,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize * sizeof(u32)); - gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? - GFP_USER : GFP_KERNEL; + if ((rcd->ctxt < dd->first_dyn_alloc_ctxt) || + (rcd->sc && (rcd->sc->type == SC_KERNEL))) + gfp_flags = GFP_KERNEL; + else + gfp_flags = GFP_USER; rcd->rcvhdrq = dma_zalloc_coherent( &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma, gfp_flags | __GFP_COMP); diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 09cda3c35e82..955e5fce6573 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -53,6 +53,7 @@ #include "mad.h" #include "trace.h" #include "qp.h" +#include "vnic.h" /* the reset value from the FM is supposed to be 0xffff, handle both */ #define OPA_LINK_WIDTH_RESET_OLD 0x0fff @@ -650,9 +651,11 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0); pi->port_packet_format.supported = - cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B); + cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B | + OPA_PORT_PACKET_FORMAT_16B); pi->port_packet_format.enabled = - cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B); + cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B | + OPA_PORT_PACKET_FORMAT_16B); /* flit_control.interleave is (OPA V1, version .76): * bits use @@ -701,7 +704,8 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT; pi->buffer_units = cpu_to_be32(buffer_units); - pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported); + pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported | + OPA_CAP_MASK3_IsEthOnFabricSupported); /* HFI supports a replay buffer 128 LTPs in size */ pi->replay_depth.buffer = 0x80; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 615be68e40b3..ed72b5aca139 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -703,6 +703,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, { struct send_context_info *sci; struct send_context *sc = NULL; + int req_type = type; dma_addr_t dma; unsigned long flags; u64 reg; @@ -729,6 +730,13 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, return NULL; } + /* + * VNIC contexts are dynamically allocated. + * Hence, pick a user context for VNIC. + */ + if (type == SC_VNIC) + type = SC_USER; + spin_lock_irqsave(&dd->sc_lock, flags); ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); if (ret) { @@ -738,6 +746,15 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, return NULL; } + /* + * VNIC contexts are used by kernel driver. + * Hence, mark them as kernel contexts. + */ + if (req_type == SC_VNIC) { + dd->send_contexts[sw_index].type = SC_KERNEL; + type = SC_KERNEL; + } + sci = &dd->send_contexts[sw_index]; sci->sc = sc; diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 867e5ffc3595..a6fb70093a70 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -1,7 +1,7 @@ #ifndef _PIO_H #define _PIO_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -54,6 +54,12 @@ #define SC_USER 3 /* must be the last one: it may take all left */ #define SC_MAX 4 /* count of send context types */ +/* + * SC_VNIC types are allocated (dynamically) from the user context pool, + * (SC_USER) and used by kernel driver as kernel contexts (SC_KERNEL). + */ +#define SC_VNIC SC_MAX + /* invalid send context index */ #define INVALID_SCI 0xff diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 919a5474e651..50d140d25e38 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -542,7 +542,7 @@ static ssize_t show_nctxts(struct device *device, * give a more accurate picture of total contexts available. */ return scnprintf(buf, PAGE_SIZE, "%u\n", - min(dd->num_rcv_contexts - dd->first_user_ctxt, + min(dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt, (u32)dd->sc_sizes[SC_USER].count)); } diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 4a8295399e71..25a8698f7db9 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -607,7 +607,7 @@ int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo) struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; unsigned long *ev = uctxt->dd->events + - (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * + (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + fd->subctxt); u32 *array; int ret = 0; @@ -1011,8 +1011,8 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) * process in question. */ ev = uctxt->dd->events + - (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); + (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * + HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); } fdata->invalid_tid_idx++; diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 68295a12b771..e341e6dcc388 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -73,7 +73,8 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm, { unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit, size = (cache_size * (1UL << 20)); /* convert to bytes */ - unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt; + unsigned int usr_ctxts = + dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt; bool can_lock = capable(CAP_IPC_LOCK); /* diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 070a349afd78..239fa480555f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -61,6 +61,7 @@ #include "qp.h" #include "verbs_txreq.h" #include "debugfs.h" +#include "vnic.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -1289,7 +1290,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | - IB_DEVICE_MEM_MGT_EXTENSIONS; + IB_DEVICE_MEM_MGT_EXTENSIONS | + IB_DEVICE_RDMA_NETDEV_OPA_VNIC; rdi->dparms.props.page_size_cap = PAGE_SIZE; rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; rdi->dparms.props.vendor_part_id = dd->pcidev->device; @@ -1772,6 +1774,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->modify_device = modify_device; ibdev->alloc_hw_stats = alloc_hw_stats; ibdev->get_hw_stats = get_hw_stats; + ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn; + ibdev->free_rdma_netdev = hfi1_vnic_free_rn; /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h index 04723b1ab246..9bed40d85cff 100644 --- a/drivers/infiniband/hw/hfi1/vnic.h +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -149,5 +149,8 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, unsigned char name_assign_type, void (*setup)(struct net_device *)); void hfi1_vnic_free_rn(struct net_device *netdev); +int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, + struct hfi1_vnic_vport_info *vinfo, + struct sk_buff *skb, u64 pbc, u8 plen); #endif /* _HFI1_VNIC_H */ diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index fb23f9ff6bc1..32d91b60972b 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -62,6 +62,159 @@ static DEFINE_SPINLOCK(vport_cntr_lock); +static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) +{ + unsigned int rcvctrl_ops = 0; + int ret; + + ret = hfi1_init_ctxt(uctxt->sc); + if (ret) + goto done; + + uctxt->do_interrupt = &handle_receive_interrupt; + + /* Now allocate the RcvHdr queue and eager buffers. */ + ret = hfi1_create_rcvhdrq(dd, uctxt); + if (ret) + goto done; + + ret = hfi1_setup_eagerbufs(uctxt); + if (ret) + goto done; + + set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags); + + if (uctxt->rcvhdrtail_kvaddr) + clear_rcvhdrtail(uctxt); + + rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; + rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB; + + if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) + rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) + rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) + rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) + rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; + + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); + + uctxt->is_vnic = true; +done: + return ret; +} + +static int allocate_vnic_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata **vnic_ctxt) +{ + struct hfi1_ctxtdata *uctxt; + unsigned int ctxt; + int ret; + + if (dd->flags & HFI1_FROZEN) + return -EIO; + + for (ctxt = dd->first_dyn_alloc_ctxt; + ctxt < dd->num_rcv_contexts; ctxt++) + if (!dd->rcd[ctxt]) + break; + + if (ctxt == dd->num_rcv_contexts) + return -EBUSY; + + uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node); + if (!uctxt) { + dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); + return -ENOMEM; + } + + uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | + HFI1_CAP_KGET(NODROP_RHQ_FULL) | + HFI1_CAP_KGET(NODROP_EGR_FULL) | + HFI1_CAP_KGET(DMA_RTAIL); + uctxt->seq_cnt = 1; + + /* Allocate and enable a PIO send context */ + uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize, + uctxt->numa_id); + + ret = uctxt->sc ? 0 : -ENOMEM; + if (ret) + goto bail; + + dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n", + uctxt->sc->sw_index, uctxt->sc->hw_context); + ret = sc_enable(uctxt->sc); + if (ret) + goto bail; + + if (dd->num_msix_entries) + hfi1_set_vnic_msix_info(uctxt); + + hfi1_stats.sps_ctxts++; + dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); + *vnic_ctxt = uctxt; + + return ret; +bail: + /* + * hfi1_free_ctxtdata() also releases send_context + * structure if uctxt->sc is not null + */ + dd->rcd[uctxt->ctxt] = NULL; + hfi1_free_ctxtdata(dd, uctxt); + dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); + return ret; +} + +static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata *uctxt) +{ + unsigned long flags; + + dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); + flush_wc(); + + if (dd->num_msix_entries) + hfi1_reset_vnic_msix_info(uctxt); + + spin_lock_irqsave(&dd->uctxt_lock, flags); + /* + * Disable receive context and interrupt available, reset all + * RcvCtxtCtrl bits to default values. + */ + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | + HFI1_RCVCTRL_TIDFLOW_DIS | + HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_ONE_PKT_EGR_DIS | + HFI1_RCVCTRL_NO_RHQ_DROP_DIS | + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); + /* + * VNIC contexts are allocated from user context pool. + * Release them back to user context pool. + * + * Reset context integrity checks to default. + * (writes to CSRs probably belong in chip.c) + */ + write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, + hfi1_pkt_default_send_ctxt_mask(dd, SC_USER)); + sc_disable(uctxt->sc); + + dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + dd->rcd[uctxt->ctxt] = NULL; + uctxt->event_flags = 0; + + hfi1_clear_tids(uctxt); + hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); + + hfi1_stats.sps_ctxts--; + hfi1_free_ctxtdata(dd, uctxt); +} + void hfi1_vnic_setup(struct hfi1_devdata *dd) { idr_init(&dd->vnic.vesw_idr); @@ -519,6 +672,9 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) netif_tx_disable(vinfo->netdev); idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); + /* ensure irqs see the change */ + hfi1_vnic_synchronize_irq(dd); + /* remove unread skbs */ for (i = 0; i < vinfo->num_rx_q; i++) { struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; @@ -550,6 +706,84 @@ static int hfi1_netdev_close(struct net_device *netdev) return 0; } +static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata **vnic_ctxt) +{ + int rc; + + rc = allocate_vnic_ctxt(dd, vnic_ctxt); + if (rc) { + dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc); + return rc; + } + + rc = setup_vnic_ctxt(dd, *vnic_ctxt); + if (rc) { + dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc); + deallocate_vnic_ctxt(dd, *vnic_ctxt); + *vnic_ctxt = NULL; + } + + return rc; +} + +static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + int i, rc = 0; + + mutex_lock(&hfi1_mutex); + if (!dd->vnic.num_vports) + dd->vnic.msix_idx = dd->first_dyn_msix_idx; + + for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { + rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); + if (rc) + break; + dd->vnic.ctxt[i]->vnic_q_idx = i; + } + + if (i < vinfo->num_rx_q) { + /* + * If required amount of contexts is not + * allocated successfully then remaining contexts + * are released. + */ + while (i-- > dd->vnic.num_ctxt) { + deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + dd->vnic.ctxt[i] = NULL; + } + goto alloc_fail; + } + + if (dd->vnic.num_ctxt != i) { + dd->vnic.num_ctxt = i; + hfi1_init_vnic_rsm(dd); + } + + dd->vnic.num_vports++; +alloc_fail: + mutex_unlock(&hfi1_mutex); + return rc; +} + +static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + int i; + + mutex_lock(&hfi1_mutex); + if (--dd->vnic.num_vports == 0) { + for (i = 0; i < dd->vnic.num_ctxt; i++) { + deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + dd->vnic.ctxt[i] = NULL; + } + hfi1_deinit_vnic_rsm(dd); + dd->vnic.num_ctxt = 0; + } + mutex_unlock(&hfi1_mutex); +} + static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id) { struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); @@ -594,7 +828,7 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, struct hfi1_vnic_vport_info *vinfo; struct net_device *netdev; struct rdma_netdev *rn; - int i, size; + int i, size, rc; if (!port_num || (port_num > dd->num_pports)) return ERR_PTR(-EINVAL); @@ -632,13 +866,22 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64); } + rc = hfi1_vnic_init(vinfo); + if (rc) + goto init_fail; + return netdev; +init_fail: + mutex_destroy(&vinfo->lock); + free_netdev(netdev); + return ERR_PTR(rc); } void hfi1_vnic_free_rn(struct net_device *netdev) { struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + hfi1_vnic_deinit(vinfo); mutex_destroy(&vinfo->lock); free_netdev(netdev); } diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index 9303e0e4f508..b4f0ac02f283 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2017 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -127,6 +127,7 @@ #define OPA_LINK_WIDTH_3X 0x0004 #define OPA_LINK_WIDTH_4X 0x0008 +#define OPA_CAP_MASK3_IsEthOnFabricSupported (1 << 13) #define OPA_CAP_MASK3_IsSnoopSupported (1 << 7) #define OPA_CAP_MASK3_IsAsyncSC2VLSupported (1 << 6) #define OPA_CAP_MASK3_IsAddrRangeConfigSupported (1 << 5) -- cgit v1.2.3-55-g7522 From 64551ede6cd1c9f814951914e23ef1d2a9498f71 Mon Sep 17 00:00:00 2001 From: Vishwanathapura, Niranjana Date: Wed, 12 Apr 2017 20:29:30 -0700 Subject: IB/hfi1: VNIC SDMA support HFI1 VNIC SDMA support enables transmission of VNIC packets over SDMA. Map VNIC queues to SDMA engines and support halting and wakeup of the VNIC queues. Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/Makefile | 2 +- drivers/infiniband/hw/hfi1/hfi.h | 1 + drivers/infiniband/hw/hfi1/init.c | 1 + drivers/infiniband/hw/hfi1/vnic.h | 28 +++ drivers/infiniband/hw/hfi1/vnic_main.c | 24 ++- drivers/infiniband/hw/hfi1/vnic_sdma.c | 323 +++++++++++++++++++++++++++++++++ 6 files changed, 376 insertions(+), 3 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/vnic_sdma.c (limited to 'drivers/infiniband/hw/hfi1/hfi.h') diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 22805383de4f..88085f65432e 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ - verbs_txreq.o vnic_main.o + verbs_txreq.o vnic_main.o vnic_sdma.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o CFLAGS_trace.o = -I$(src) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index a12bb462d83f..2862b14b8414 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -834,6 +834,7 @@ struct hfi1_asic_data { /* Virtual NIC information */ struct hfi1_vnic_data { struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT]; + struct kmem_cache *txreq_cache; u8 num_vports; struct idr vesw_idr; u8 rmt_start; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index de2eec40f2a0..b4c7e04f4578 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -681,6 +681,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) dd->process_pio_send = hfi1_verbs_send_pio; dd->process_dma_send = hfi1_verbs_send_dma; dd->pio_inline_send = pio_copy; + dd->process_vnic_dma_send = hfi1_vnic_send_dma; if (is_ax(dd)) { atomic_set(&dd->drop_packet, DROP_PACKET_ON); diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h index 9bed40d85cff..e2c455299b53 100644 --- a/drivers/infiniband/hw/hfi1/vnic.h +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -49,6 +49,7 @@ #include #include "hfi.h" +#include "sdma.h" #define HFI1_VNIC_MAX_TXQ 16 #define HFI1_VNIC_MAX_PAD 12 @@ -84,6 +85,26 @@ #define HFI1_VNIC_MAX_QUEUE 16 +/** + * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information + * @dd - device data pointer + * @sde - sdma engine + * @vinfo - vnic info pointer + * @wait - iowait structure + * @stx - sdma tx request + * @state - vnic Tx ring SDMA state + * @q_idx - vnic Tx queue index + */ +struct hfi1_vnic_sdma { + struct hfi1_devdata *dd; + struct sdma_engine *sde; + struct hfi1_vnic_vport_info *vinfo; + struct iowait wait; + struct sdma_txreq stx; + unsigned int state; + u8 q_idx; +}; + /** * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue * @idx: queue index @@ -111,6 +132,7 @@ struct hfi1_vnic_rx_queue { * @vesw_id: virtual switch id * @rxq: Array of receive queues * @stats: per queue stats + * @sdma: VNIC SDMA structure per TXQ */ struct hfi1_vnic_vport_info { struct hfi1_devdata *dd; @@ -126,6 +148,7 @@ struct hfi1_vnic_vport_info { struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT]; struct opa_vnic_stats stats[HFI1_VNIC_MAX_QUEUE]; + struct hfi1_vnic_sdma sdma[HFI1_VNIC_MAX_TXQ]; }; #define v_dbg(format, arg...) \ @@ -138,8 +161,13 @@ struct hfi1_vnic_vport_info { /* vnic hfi1 internal functions */ void hfi1_vnic_setup(struct hfi1_devdata *dd); void hfi1_vnic_cleanup(struct hfi1_devdata *dd); +int hfi1_vnic_txreq_init(struct hfi1_devdata *dd); +void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd); void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet); +void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo); +bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx); /* vnic rdma netdev operations */ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 32d91b60972b..392f4d57f3e3 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -406,6 +406,10 @@ static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo, u8 q_idx) { netif_stop_subqueue(vinfo->netdev, q_idx); + if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx)) + return; + + netif_start_subqueue(vinfo->netdev, q_idx); } static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb, @@ -477,7 +481,13 @@ static u16 hfi1_vnic_select_queue(struct net_device *netdev, void *accel_priv, select_queue_fallback_t fallback) { - return 0; + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + struct opa_vnic_skb_mdata *mdata; + struct sdma_engine *sde; + + mdata = (struct opa_vnic_skb_mdata *)skb->data; + sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl); + return sde->this_idx; } /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */ @@ -733,8 +743,13 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) int i, rc = 0; mutex_lock(&hfi1_mutex); - if (!dd->vnic.num_vports) + if (!dd->vnic.num_vports) { + rc = hfi1_vnic_txreq_init(dd); + if (rc) + goto txreq_fail; + dd->vnic.msix_idx = dd->first_dyn_msix_idx; + } for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); @@ -762,7 +777,11 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) } dd->vnic.num_vports++; + hfi1_vnic_sdma_init(vinfo); alloc_fail: + if (!dd->vnic.num_vports) + hfi1_vnic_txreq_deinit(dd); +txreq_fail: mutex_unlock(&hfi1_mutex); return rc; } @@ -780,6 +799,7 @@ static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) } hfi1_deinit_vnic_rsm(dd); dd->vnic.num_ctxt = 0; + hfi1_vnic_txreq_deinit(dd); } mutex_unlock(&hfi1_mutex); } diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c new file mode 100644 index 000000000000..51a817d3aa14 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -0,0 +1,323 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains HFI1 support for VNIC SDMA functionality + */ + +#include "sdma.h" +#include "vnic.h" + +#define HFI1_VNIC_SDMA_Q_ACTIVE BIT(0) +#define HFI1_VNIC_SDMA_Q_DEFERRED BIT(1) + +#define HFI1_VNIC_TXREQ_NAME_LEN 32 +#define HFI1_VNIC_SDMA_DESC_WTRMRK 64 +#define HFI1_VNIC_SDMA_RETRY_COUNT 1 + +/* + * struct vnic_txreq - VNIC transmit descriptor + * @txreq: sdma transmit request + * @sdma: vnic sdma pointer + * @skb: skb to send + * @pad: pad buffer + * @plen: pad length + * @pbc_val: pbc value + * @retry_count: tx retry count + */ +struct vnic_txreq { + struct sdma_txreq txreq; + struct hfi1_vnic_sdma *sdma; + + struct sk_buff *skb; + unsigned char pad[HFI1_VNIC_MAX_PAD]; + u16 plen; + __le64 pbc_val; + + u32 retry_count; +}; + +static void vnic_sdma_complete(struct sdma_txreq *txreq, + int status) +{ + struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); + struct hfi1_vnic_sdma *vnic_sdma = tx->sdma; + + sdma_txclean(vnic_sdma->dd, txreq); + dev_kfree_skb_any(tx->skb); + kmem_cache_free(vnic_sdma->dd->vnic.txreq_cache, tx); +} + +static noinline int build_vnic_ulp_payload(struct sdma_engine *sde, + struct vnic_txreq *tx) +{ + int i, ret = 0; + + ret = sdma_txadd_kvaddr( + sde->dd, + &tx->txreq, + tx->skb->data, + skb_headlen(tx->skb)); + if (unlikely(ret)) + goto bail_txadd; + + for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) { + struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i]; + + /* combine physically continuous fragments later? */ + ret = sdma_txadd_page(sde->dd, + &tx->txreq, + skb_frag_page(frag), + frag->page_offset, + skb_frag_size(frag)); + if (unlikely(ret)) + goto bail_txadd; + } + + if (tx->plen) + ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, + tx->pad + HFI1_VNIC_MAX_PAD - tx->plen, + tx->plen); + +bail_txadd: + return ret; +} + +static int build_vnic_tx_desc(struct sdma_engine *sde, + struct vnic_txreq *tx, + u64 pbc) +{ + int ret = 0; + u16 hdrbytes = 2 << 2; /* PBC */ + + ret = sdma_txinit_ahg( + &tx->txreq, + 0, + hdrbytes + tx->skb->len + tx->plen, + 0, + 0, + NULL, + 0, + vnic_sdma_complete); + if (unlikely(ret)) + goto bail_txadd; + + /* add pbc */ + tx->pbc_val = cpu_to_le64(pbc); + ret = sdma_txadd_kvaddr( + sde->dd, + &tx->txreq, + &tx->pbc_val, + hdrbytes); + if (unlikely(ret)) + goto bail_txadd; + + /* add the ulp payload */ + ret = build_vnic_ulp_payload(sde, tx); +bail_txadd: + return ret; +} + +/* setup the last plen bypes of pad */ +static inline void hfi1_vnic_update_pad(unsigned char *pad, u8 plen) +{ + pad[HFI1_VNIC_MAX_PAD - 1] = plen - OPA_VNIC_ICRC_TAIL_LEN; +} + +int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, + struct hfi1_vnic_vport_info *vinfo, + struct sk_buff *skb, u64 pbc, u8 plen) +{ + struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx]; + struct sdma_engine *sde = vnic_sdma->sde; + struct vnic_txreq *tx; + int ret = -ECOMM; + + if (unlikely(READ_ONCE(vnic_sdma->state) != HFI1_VNIC_SDMA_Q_ACTIVE)) + goto tx_err; + + if (unlikely(!sde || !sdma_running(sde))) + goto tx_err; + + tx = kmem_cache_alloc(dd->vnic.txreq_cache, GFP_ATOMIC); + if (unlikely(!tx)) { + ret = -ENOMEM; + goto tx_err; + } + + tx->sdma = vnic_sdma; + tx->skb = skb; + hfi1_vnic_update_pad(tx->pad, plen); + tx->plen = plen; + ret = build_vnic_tx_desc(sde, tx, pbc); + if (unlikely(ret)) + goto free_desc; + tx->retry_count = 0; + + ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq); + /* When -ECOMM, sdma callback will be called with ABORT status */ + if (unlikely(ret && unlikely(ret != -ECOMM))) + goto free_desc; + + return ret; + +free_desc: + sdma_txclean(dd, &tx->txreq); + kmem_cache_free(dd->vnic.txreq_cache, tx); +tx_err: + if (ret != -EBUSY) + dev_kfree_skb_any(skb); + return ret; +} + +/* + * hfi1_vnic_sdma_sleep - vnic sdma sleep function + * + * This function gets called from sdma_send_txreq() when there are not enough + * sdma descriptors available to send the packet. It adds Tx queue's wait + * structure to sdma engine's dmawait list to be woken up when descriptors + * become available. + */ +static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, + struct iowait *wait, + struct sdma_txreq *txreq, + unsigned int seq) +{ + struct hfi1_vnic_sdma *vnic_sdma = + container_of(wait, struct hfi1_vnic_sdma, wait); + struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; + struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); + + if (sdma_progress(sde, seq, txreq)) + if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT) + return -EAGAIN; + + vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; + write_seqlock(&dev->iowait_lock); + if (list_empty(&vnic_sdma->wait.list)) + list_add_tail(&vnic_sdma->wait.list, &sde->dmawait); + write_sequnlock(&dev->iowait_lock); + return -EBUSY; +} + +/* + * hfi1_vnic_sdma_wakeup - vnic sdma wakeup function + * + * This function gets called when SDMA descriptors becomes available and Tx + * queue's wait structure was previously added to sdma engine's dmawait list. + * It notifies the upper driver about Tx queue wakeup. + */ +static void hfi1_vnic_sdma_wakeup(struct iowait *wait, int reason) +{ + struct hfi1_vnic_sdma *vnic_sdma = + container_of(wait, struct hfi1_vnic_sdma, wait); + struct hfi1_vnic_vport_info *vinfo = vnic_sdma->vinfo; + + vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE; + if (__netif_subqueue_stopped(vinfo->netdev, vnic_sdma->q_idx)) + netif_wake_subqueue(vinfo->netdev, vnic_sdma->q_idx); +}; + +inline bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx) +{ + struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx]; + + return (READ_ONCE(vnic_sdma->state) == HFI1_VNIC_SDMA_Q_ACTIVE); +} + +void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo) +{ + int i; + + for (i = 0; i < vinfo->num_tx_q; i++) { + struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i]; + + iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep, + hfi1_vnic_sdma_wakeup, NULL); + vnic_sdma->sde = &vinfo->dd->per_sdma[i]; + vnic_sdma->dd = vinfo->dd; + vnic_sdma->vinfo = vinfo; + vnic_sdma->q_idx = i; + vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE; + + /* Add a free descriptor watermark for wakeups */ + if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) { + INIT_LIST_HEAD(&vnic_sdma->stx.list); + vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK; + list_add_tail(&vnic_sdma->stx.list, + &vnic_sdma->wait.tx_head); + } + } +} + +static void hfi1_vnic_txreq_kmem_cache_ctor(void *obj) +{ + struct vnic_txreq *tx = (struct vnic_txreq *)obj; + + memset(tx, 0, sizeof(*tx)); +} + +int hfi1_vnic_txreq_init(struct hfi1_devdata *dd) +{ + char buf[HFI1_VNIC_TXREQ_NAME_LEN]; + + snprintf(buf, sizeof(buf), "hfi1_%u_vnic_txreq_cache", dd->unit); + dd->vnic.txreq_cache = kmem_cache_create(buf, + sizeof(struct vnic_txreq), + 0, SLAB_HWCACHE_ALIGN, + hfi1_vnic_txreq_kmem_cache_ctor); + if (!dd->vnic.txreq_cache) + return -ENOMEM; + return 0; +} + +void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd) +{ + kmem_cache_destroy(dd->vnic.txreq_cache); + dd->vnic.txreq_cache = NULL; +} -- cgit v1.2.3-55-g7522 From aad559c21d7b536444701cb798100d33ff027f70 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Sun, 9 Apr 2017 10:16:15 -0700 Subject: IB/hfi1: Rename hdr2sc to hfi1_9B_get_sc5 The function really returned the 5-bit sc value from the header and rhf. hdr2sc didn't quite describe what it did. Reviewed-by: Dennis Dalessandro Reviewed-by: Don Hiatt Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 7 ++++--- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/ud.c | 2 +- drivers/infiniband/hw/hfi1/verbs.c | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/hfi.h') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 6b3869529d5e..2afb9cca2bee 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -398,7 +398,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, u16 rlid; u8 svc_type, sl, sc5; - sc5 = hdr2sc(rhdr, packet->rhf); + sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; @@ -493,7 +493,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, return; } - sc = hdr2sc(hdr, pkt->rhf); + sc = hfi1_9B_get_sc5(hdr, pkt->rhf); bth1 = be32_to_cpu(ohdr->bth[1]); if (do_cnp && (bth1 & HFI1_FECN_SMASK)) { @@ -937,7 +937,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, packet->rhf_addr); u8 etype = rhf_rcv_type(packet->rhf); - if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB && + hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2862b14b8414..376e4c7547d1 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1312,7 +1312,7 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ -static inline int hdr2sc(struct ib_header *hdr, u64 rhf) +static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf) { return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | ((!!(rhf_dc_info(rhf))) << 4); diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 13ea4eb6ef3d..fd105cf20885 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -680,7 +680,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; bool has_grh = rcv_flags & HFI1_HAS_GRH; - u8 sc5 = hdr2sc(hdr, packet->rhf); + u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); u32 bth1; u8 sl_from_sc, sl; u16 slid; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 202572a618a2..f3b063c4df24 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1932,7 +1932,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) return; } - sc5 = hdr2sc(hdr, packet->rhf); + sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = qp->ibqp.qp_num; -- cgit v1.2.3-55-g7522 From cb42705792d809b108c76f1c97665382dc89531e Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Sun, 9 Apr 2017 10:16:22 -0700 Subject: IB/hfi1: Add functions to parse 9B headers These inline functions improve code readability by enabling callers to read specific fields from the header without knowledge of byte offsets. Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 13 ++++---- drivers/infiniband/hw/hfi1/hfi.h | 3 +- drivers/infiniband/hw/hfi1/rc.c | 14 ++++----- drivers/infiniband/hw/hfi1/ruc.c | 24 +++++++-------- drivers/infiniband/hw/hfi1/trace.c | 5 ++-- drivers/infiniband/hw/hfi1/uc.c | 8 ++--- drivers/infiniband/hw/hfi1/ud.c | 14 ++++----- drivers/infiniband/hw/hfi1/verbs.c | 8 ++--- include/rdma/ib_hdrs.h | 60 +++++++++++++++++++++++++++++++++++++ 9 files changed, 102 insertions(+), 47 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/hfi.h') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 2afb9cca2bee..51db53b11d6c 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -285,7 +285,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, { struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); - int lnh = be16_to_cpu(rhdr->lrh[0]) & 3; + int lnh = ib_get_lnh(rhdr); struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct hfi1_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -297,7 +297,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, /* For TIDERR and RC QPs preemptively schedule a NAK */ struct ib_other_headers *ohdr = NULL; u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ - u16 lid = be16_to_cpu(rhdr->lrh[1]); + u16 lid = ib_get_dlid(rhdr); u32 qp_num; u32 rcv_flags = 0; @@ -416,7 +416,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, svc_type = IB_CC_SVCTYPE_UD; break; case IB_QPT_UC: - rlid = be16_to_cpu(rhdr->lrh[3]); + rlid = ib_get_slid(rhdr); rqpn = qp->remote_qpn; svc_type = IB_CC_SVCTYPE_UC; break; @@ -462,7 +462,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = NULL; u32 rqpn = 0, bth1; - u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]); + u16 rlid, dlid = ib_get_dlid(hdr); u8 sc, svc_type; bool is_mcast = false; @@ -473,7 +473,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: - rlid = be16_to_cpu(hdr->lrh[3]); + rlid = ib_get_slid(hdr); rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; svc_type = IB_CC_SVCTYPE_UD; is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && @@ -623,8 +623,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) packet->hdr = hfi1_get_msgheader(dd, rhf_addr); hdr = packet->hdr; - - lnh = be16_to_cpu(hdr->lrh[0]) & 3; + lnh = ib_get_lnh(hdr); if (lnh == HFI1_LRH_BTH) { packet->ohdr = &hdr->u.oth; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 376e4c7547d1..c32bc49cc0cb 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1314,8 +1314,7 @@ void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf) { - return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | - ((!!(rhf_dc_info(rhf))) << 4); + return ib_get_sc(hdr) | ((!!(rhf_dc_info(rhf))) << 4); } #define HFI1_JKEY_WIDTH 16 diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 0e5657803a54..623209a21927 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -994,12 +994,12 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) return; /* Find out where the BTH is */ - if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH) + if (ib_get_lnh(hdr) == HFI1_LRH_BTH) ohdr = &hdr->u.oth; else ohdr = &hdr->u.l.oth; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + opcode = ib_bth_get_opcode(ohdr); if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) { WARN_ON(!qp->s_rdma_ack_cnt); @@ -1524,7 +1524,7 @@ read_middle: if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd)) goto ack_done; /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* * Check that the data size is >= 0 && <= pmtu. * Remember to account for ICRC (4). @@ -1548,7 +1548,7 @@ read_middle: if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* * Check that the data size is >= 1 && <= pmtu. * Remember to account for ICRC (4). @@ -1942,7 +1942,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) is_fecn = process_ecn(qp, packet, false); psn = be32_to_cpu(ohdr->bth[2]); - opcode = (bth0 >> 24) & 0xff; + opcode = ib_bth_get_opcode(ohdr); /* * Process responses (ACKs) before anything else. Note that the @@ -2073,7 +2073,7 @@ no_immediate_data: wc.ex.imm_data = 0; send_last: /* Get the number of bytes the message was padded by. */ - pad = (bth0 >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -2386,7 +2386,7 @@ void hfi1_rc_hdrerr( return; psn = be32_to_cpu(ohdr->bth[2]); - opcode = (bth0 >> 24) & 0xff; + opcode = ib_bth_get_opcode(ohdr); /* Only deal with RDMA Writes for now */ if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index eeb650dde776..27309157cef5 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -238,18 +238,18 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, qp->alt_ah_attr.grh.dgid.global.interface_id)) goto err; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, - sc5, be16_to_cpu(hdr->lrh[3])))) { + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, + ib_get_slid(hdr)))) { hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + ib_get_sl(hdr), 0, qp->ibqp.qp_num, - be16_to_cpu(hdr->lrh[3]), - be16_to_cpu(hdr->lrh[1])); + ib_get_slid(hdr), + ib_get_dlid(hdr)); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ - if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid || + if (ib_get_slid(hdr) != qp->alt_ah_attr.dlid || ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num) goto err; spin_lock_irqsave(&qp->s_lock, flags); @@ -273,18 +273,18 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, qp->remote_ah_attr.grh.dgid.global.interface_id)) goto err; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, - sc5, be16_to_cpu(hdr->lrh[3])))) { + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, + ib_get_slid(hdr)))) { hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + ib_get_sl(hdr), 0, qp->ibqp.qp_num, - be16_to_cpu(hdr->lrh[3]), - be16_to_cpu(hdr->lrh[1])); + ib_get_slid(hdr), + ib_get_dlid(hdr)); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 */ - if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid || + if (ib_get_slid(hdr) != qp->remote_ah_attr.dlid || ppd_from_ibp(ibp)->port != qp->port_num) goto err; if (qp->s_mig_state == IB_MIG_REARM && diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index e86798af6903..eafae487face 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -51,13 +51,12 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr) { struct ib_other_headers *ohdr; u8 opcode; - u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - if (lnh == HFI1_LRH_BTH) + if (ib_get_lnh(hdr) == HFI1_LRH_BTH) ohdr = &hdr->u.oth; else ohdr = &hdr->u.l.oth; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + opcode = ib_bth_get_opcode(ohdr); return hdr_len_by_opcode[opcode] == 0 ? 0 : hdr_len_by_opcode[opcode] - (12 + 8); } diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 4b2a8400c823..f0bdb100e005 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -320,7 +320,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) process_ecn(qp, packet, true); psn = be32_to_cpu(ohdr->bth[2]); - opcode = (bth0 >> 24) & 0xff; + opcode = ib_bth_get_opcode(ohdr); /* Compare the PSN verses the expected PSN. */ if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) { @@ -433,7 +433,7 @@ no_immediate_data: wc.wc_flags = 0; send_last: /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -528,7 +528,7 @@ rdma_last_imm: wc.wc_flags = IB_WC_WITH_IMM; /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -555,7 +555,7 @@ rdma_last_imm: case OP(RDMA_WRITE_LAST): rdma_last: /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index fd105cf20885..8b3403a1f199 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -688,18 +688,16 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) qkey = be32_to_cpu(ohdr->u.ud.deth[0]); src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; - dlid = be16_to_cpu(hdr->lrh[1]); + dlid = ib_get_dlid(hdr); bth1 = be32_to_cpu(ohdr->bth[1]); - slid = be16_to_cpu(hdr->lrh[3]); - pkey = (u16)be32_to_cpu(ohdr->bth[0]); - sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + slid = ib_get_slid(hdr); + pkey = ib_bth_get_pkey(ohdr); + opcode = ib_bth_get_opcode(ohdr); + sl = ib_get_sl(hdr); + extra_bytes = ib_bth_get_pad(ohdr); extra_bytes += (SIZE_OF_CRC << 2); sl_from_sc = ibp->sc_to_sl[sc5]; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - opcode &= 0xff; - process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); /* * Get the number of bytes the message was padded by diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index f3b063c4df24..57036e545bdb 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -572,7 +572,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) u16 lid; /* Check for GRH */ - lnh = be16_to_cpu(hdr->lrh[0]) & 3; + lnh = ib_get_lnh(hdr); if (lnh == HFI1_LRH_BTH) { packet->ohdr = &hdr->u.oth; } else if (lnh == HFI1_LRH_GRH) { @@ -591,12 +591,12 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) trace_input_ibhdr(rcd->dd, hdr); - opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); + opcode = ib_bth_get_opcode(packet->ohdr); inc_opstats(tlen, &rcd->opstats->stats[opcode]); /* Get the destination QP number. */ qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK; - lid = be16_to_cpu(hdr->lrh[1]); + lid = ib_get_dlid(hdr); if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { struct rvt_mcast *mcast; @@ -1231,7 +1231,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) hdr = &ps->s_txreq->phdr.hdr; /* locate the pkey within the headers */ - lnh = be16_to_cpu(hdr->lrh[0]) & 3; + lnh = ib_get_lnh(hdr); if (lnh == HFI1_LRH_GRH) ohdr = &hdr->u.l.oth; else diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index c755325f0831..9cf42bcc8317 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -181,4 +181,64 @@ static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth) ib_u64_put(val, &ateth->compare_data); } +/* + * 9B/IB Packet Format + */ +#define IB_LNH_MASK 3 +#define IB_SC_MASK 0xf +#define IB_SC_SHIFT 12 +#define IB_SL_MASK 0xf +#define IB_SL_SHIFT 4 + +static inline u8 ib_get_lnh(struct ib_header *hdr) +{ + return (be16_to_cpu(hdr->lrh[0]) & IB_LNH_MASK); +} + +static inline u8 ib_get_sc(struct ib_header *hdr) +{ + return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK); +} + +static inline u8 ib_get_sl(struct ib_header *hdr) +{ + return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK); +} + +static inline u16 ib_get_dlid(struct ib_header *hdr) +{ + return (be16_to_cpu(hdr->lrh[1])); +} + +static inline u16 ib_get_slid(struct ib_header *hdr) +{ + return (be16_to_cpu(hdr->lrh[3])); +} + +/* + * BTH + */ +#define IB_BTH_OPCODE_MASK 0xff +#define IB_BTH_OPCODE_SHIFT 24 +#define IB_BTH_PAD_MASK 3 +#define IB_BTH_PKEY_MASK 0xffff +#define IB_BTH_PAD_SHIFT 20 + +static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) +{ + return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_PAD_SHIFT) & + IB_BTH_PAD_MASK); +} + +static inline u16 ib_bth_get_pkey(struct ib_other_headers *ohdr) +{ + return (be32_to_cpu(ohdr->bth[0]) & IB_BTH_PKEY_MASK); +} + +static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr) +{ + return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_OPCODE_SHIFT) & + IB_BTH_OPCODE_MASK); +} + #endif /* IB_HDRS_H */ -- cgit v1.2.3-55-g7522 From 3d591099a0a2b45a50913130f0599ab838002fc3 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Sun, 9 Apr 2017 10:16:28 -0700 Subject: IB/hfi1: Use defines from common headers Move FECN and BECN related defines to common header files Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/common.h | 6 ------ drivers/infiniband/hw/hfi1/driver.c | 8 ++++---- drivers/infiniband/hw/hfi1/hfi.h | 4 ++-- drivers/infiniband/hw/hfi1/rc.c | 2 +- drivers/infiniband/hw/hfi1/ruc.c | 2 +- drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 8 ++++---- drivers/infiniband/hw/hfi1/ud.c | 2 +- include/rdma/ib_hdrs.h | 6 ++++++ 8 files changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/hfi.h') diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 804150febec6..995d62c7f9a7 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -331,12 +331,6 @@ struct diag_pkt { #define FULL_MGMT_P_KEY 0xFFFF #define DEFAULT_P_KEY LIM_MGMT_P_KEY -#define HFI1_FECN_SHIFT 31 -#define HFI1_FECN_MASK 1 -#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT) -#define HFI1_BECN_SHIFT 30 -#define HFI1_BECN_MASK 1 -#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT) /** * 0xF8 - 4 bits of multicast range and 1 bit for collective range diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 51db53b11d6c..500b129ed565 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -496,13 +496,13 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, sc = hfi1_9B_get_sc5(hdr, pkt->rhf); bth1 = be32_to_cpu(ohdr->bth[1]); - if (do_cnp && (bth1 & HFI1_FECN_SMASK)) { + if (do_cnp && (bth1 & IB_FECN_SMASK)) { u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); } - if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) { + if (!is_mcast && (bth1 & IB_BECN_SMASK)) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 lqpn = bth1 & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc]; @@ -635,7 +635,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) } bth1 = be32_to_cpu(packet->ohdr->bth[1]); - is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK)); + is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK)); if (!is_ecn) goto next; @@ -653,7 +653,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) rcu_read_unlock(); /* turn off BECN, FECN */ - bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK); + bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK); packet->ohdr->bth[1] = cpu_to_be32(bth1); next: update_ps_mdata(&mdata, rcd); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index c32bc49cc0cb..550116fd9d48 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1648,9 +1648,9 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, u32 bth1; bth1 = be32_to_cpu(ohdr->bth[1]); - if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { + if (unlikely(bth1 & (IB_BECN_SMASK | IB_FECN_SMASK))) { hfi1_process_ecn_slowpath(qp, pkt, do_cnp); - return bth1 & HFI1_FECN_SMASK; + return bth1 & IB_FECN_SMASK; } return false; } diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 623209a21927..da968b76ba62 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -773,7 +773,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); - ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << HFI1_BECN_SHIFT); + ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT); ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn)); /* Don't try to send ACKs if the link isn't ACTIVE */ diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 27309157cef5..879eb9b31954 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -775,7 +775,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, if (qp->s_flags & RVT_S_ECN) { qp->s_flags &= ~RVT_S_ECN; /* we recently received a FECN, so return a BECN */ - bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT); + bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); } ohdr->bth[1] = cpu_to_be32(bth1); ohdr->bth[2] = cpu_to_be32(bth2); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 382fcda3a5f6..090f6b506953 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -139,11 +139,11 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; __entry->f = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) & - HFI1_FECN_MASK; + (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) & + IB_FECN_MASK; __entry->b = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & - HFI1_BECN_MASK; + (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) & + IB_BECN_MASK; __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; __entry->a = diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 8b3403a1f199..45bc3f04793e 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -537,7 +537,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, bth0 = pkey | (IB_OPCODE_CNP << 24); ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << HFI1_BECN_SHIFT)); + ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT)); ohdr->bth[2] = 0; /* PSN 0 */ hdr.lrh[0] = cpu_to_be16(lrh0); diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 9cf42bcc8317..5519f31f043a 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -74,6 +74,12 @@ #define IB_GRH_FLOW_MASK 0xFFFFF #define IB_GRH_FLOW_SHIFT 0 #define IB_GRH_NEXT_HDR 0x1B +#define IB_FECN_SHIFT 31 +#define IB_FECN_MASK 1 +#define IB_FECN_SMASK BIT(IB_FECN_SHIFT) +#define IB_BECN_SHIFT 30 +#define IB_BECN_MASK 1 +#define IB_BECN_SMASK BIT(IB_BECN_SHIFT) #define IB_AETH_CREDIT_SHIFT 24 #define IB_AETH_CREDIT_MASK 0x1F -- cgit v1.2.3-55-g7522 From 22546b741af8355cd2e16739b6af4a8f17081839 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Fri, 28 Apr 2017 10:40:02 -0700 Subject: IB/hfi1: Fix softlockup issue Soft lockups can occur because the mad processing on different CPUs acquire the spin lock dc8051_lock: [534552.835870] [] ? read_dev_port_cntr.isra.37+0x23/0x160 [hfi1] [534552.835880] [] read_dev_cntr+0x4f/0x60 [hfi1] [534552.835893] [] pma_get_opa_portstatus+0x64d/0x8c0 [hfi1] [534552.835904] [] hfi1_process_mad+0x48d/0x18c0 [hfi1] [534552.835908] [] ? __slab_free+0x81/0x2f0 [534552.835936] [] ? ib_mad_recv_done+0x21e/0xa30 [ib_core] [534552.835939] [] ? __kmalloc+0x1f3/0x240 [534552.835947] [] ib_mad_recv_done+0x2cb/0xa30 [ib_core] [534552.835955] [] __ib_process_cq+0x55/0xd0 [ib_core] [534552.835962] [] ib_cq_poll_work+0x20/0x60 [ib_core] [534552.835964] [] process_one_work+0x17b/0x470 [534552.835966] [] worker_thread+0x126/0x410 [534552.835969] [] ? rescuer_thread+0x460/0x460 [534552.835971] [] kthread+0xcf/0xe0 [534552.835974] [] ? kthread_create_on_node+0x140/0x140 [534552.835977] [] ret_from_fork+0x58/0x90 [534552.835980] [] ? kthread_create_on_node+0x140/0x140 This issue is made worse when the 8051 is busy and the reads take longer. Fix by using a non-spinning lock procure. Reviewed-by: Michael J. Ruhl Reviewed-by: Mike Marciszyn Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 86 +++++++++++++++++++++++---------------- drivers/infiniband/hw/hfi1/hfi.h | 7 ++-- drivers/infiniband/hw/hfi1/init.c | 2 +- 3 files changed, 57 insertions(+), 38 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/hfi.h') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index e520929ac501..07aa76a74f64 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6410,18 +6410,17 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort) * * The expectation is that the caller of this routine would have taken * care of properly transitioning the link into the correct state. + * NOTE: the caller needs to acquire the dd->dc8051_lock lock + * before calling this function. */ -static void dc_shutdown(struct hfi1_devdata *dd) +static void _dc_shutdown(struct hfi1_devdata *dd) { - unsigned long flags; + lockdep_assert_held(&dd->dc8051_lock); - spin_lock_irqsave(&dd->dc8051_lock, flags); - if (dd->dc_shutdown) { - spin_unlock_irqrestore(&dd->dc8051_lock, flags); + if (dd->dc_shutdown) return; - } + dd->dc_shutdown = 1; - spin_unlock_irqrestore(&dd->dc8051_lock, flags); /* Shutdown the LCB */ lcb_shutdown(dd, 1); /* @@ -6432,35 +6431,45 @@ static void dc_shutdown(struct hfi1_devdata *dd) write_csr(dd, DC_DC8051_CFG_RST, 0x1); } +static void dc_shutdown(struct hfi1_devdata *dd) +{ + mutex_lock(&dd->dc8051_lock); + _dc_shutdown(dd); + mutex_unlock(&dd->dc8051_lock); +} + /* * Calling this after the DC has been brought out of reset should not * do any damage. + * NOTE: the caller needs to acquire the dd->dc8051_lock lock + * before calling this function. */ -static void dc_start(struct hfi1_devdata *dd) +static void _dc_start(struct hfi1_devdata *dd) { - unsigned long flags; - int ret; + lockdep_assert_held(&dd->dc8051_lock); - spin_lock_irqsave(&dd->dc8051_lock, flags); if (!dd->dc_shutdown) - goto done; - spin_unlock_irqrestore(&dd->dc8051_lock, flags); + return; + /* Take the 8051 out of reset */ write_csr(dd, DC_DC8051_CFG_RST, 0ull); /* Wait until 8051 is ready */ - ret = wait_fm_ready(dd, TIMEOUT_8051_START); - if (ret) { + if (wait_fm_ready(dd, TIMEOUT_8051_START)) dd_dev_err(dd, "%s: timeout starting 8051 firmware\n", __func__); - } + /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ write_csr(dd, DCC_CFG_RESET, 0x10); /* lcb_shutdown() with abort=1 does not restore these */ write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en); - spin_lock_irqsave(&dd->dc8051_lock, flags); dd->dc_shutdown = 0; -done: - spin_unlock_irqrestore(&dd->dc8051_lock, flags); +} + +static void dc_start(struct hfi1_devdata *dd) +{ + mutex_lock(&dd->dc8051_lock); + _dc_start(dd); + mutex_unlock(&dd->dc8051_lock); } /* @@ -8513,16 +8522,11 @@ static int do_8051_command( { u64 reg, completed; int return_code; - unsigned long flags; unsigned long timeout; hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data); - /* - * Alternative to holding the lock for a long time: - * - keep busy wait - have other users bounce off - */ - spin_lock_irqsave(&dd->dc8051_lock, flags); + mutex_lock(&dd->dc8051_lock); /* We can't send any commands to the 8051 if it's in reset */ if (dd->dc_shutdown) { @@ -8548,10 +8552,8 @@ static int do_8051_command( return_code = -ENXIO; goto fail; } - spin_unlock_irqrestore(&dd->dc8051_lock, flags); - dc_shutdown(dd); - dc_start(dd); - spin_lock_irqsave(&dd->dc8051_lock, flags); + _dc_shutdown(dd); + _dc_start(dd); } /* @@ -8632,8 +8634,7 @@ static int do_8051_command( write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0); fail: - spin_unlock_irqrestore(&dd->dc8051_lock, flags); - + mutex_unlock(&dd->dc8051_lock); return return_code; } @@ -12007,6 +12008,10 @@ static void free_cntrs(struct hfi1_devdata *dd) dd->scntrs = NULL; kfree(dd->cntrnames); dd->cntrnames = NULL; + if (dd->update_cntr_wq) { + destroy_workqueue(dd->update_cntr_wq); + dd->update_cntr_wq = NULL; + } } static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry, @@ -12162,7 +12167,7 @@ u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data) return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data); } -static void update_synth_timer(unsigned long opaque) +static void do_update_synth_timer(struct work_struct *work) { u64 cur_tx; u64 cur_rx; @@ -12171,8 +12176,8 @@ static void update_synth_timer(unsigned long opaque) int i, j, vl; struct hfi1_pportdata *ppd; struct cntr_entry *entry; - - struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; + struct hfi1_devdata *dd = container_of(work, struct hfi1_devdata, + update_cntr_work); /* * Rather than keep beating on the CSRs pick a minimal set that we can @@ -12255,7 +12260,13 @@ static void update_synth_timer(unsigned long opaque) } else { hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit); } +} +static void update_synth_timer(unsigned long opaque) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; + + queue_work(dd->update_cntr_wq, &dd->update_cntr_work); mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); } @@ -12491,6 +12502,13 @@ static int init_cntrs(struct hfi1_devdata *dd) if (init_cpu_counters(dd)) goto bail; + dd->update_cntr_wq = alloc_ordered_workqueue("hfi1_update_cntr_%d", + WQ_MEM_RECLAIM, dd->unit); + if (!dd->update_cntr_wq) + goto bail; + + INIT_WORK(&dd->update_cntr_work, do_update_synth_timer); + mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); return 0; bail: diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 550116fd9d48..d253ea2521a0 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -484,7 +484,7 @@ struct rvt_sge_state; #define HFI1_PART_ENFORCE_OUT 0x2 /* how often we check for synthetic counter wrap around */ -#define SYNTH_CNT_TIME 2 +#define SYNTH_CNT_TIME 3 /* Counter flags */ #define CNTR_NORMAL 0x0 /* Normal counters, just read register */ @@ -962,8 +962,9 @@ struct hfi1_devdata { spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */ spinlock_t uctxt_lock; /* rcd and user context changes */ - /* exclusive access to 8051 */ - spinlock_t dc8051_lock; + struct mutex dc8051_lock; /* exclusive access to 8051 */ + struct workqueue_struct *update_cntr_wq; + struct work_struct update_cntr_work; /* exclusive access to 8051 memory */ spinlock_t dc8051_memlock; int dc8051_timed_out; /* remember if the 8051 timed out */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index b4c7e04f4578..21dca7ac059c 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1081,11 +1081,11 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) spin_lock_init(&dd->uctxt_lock); spin_lock_init(&dd->hfi1_diag_trans_lock); spin_lock_init(&dd->sc_init_lock); - spin_lock_init(&dd->dc8051_lock); spin_lock_init(&dd->dc8051_memlock); seqlock_init(&dd->sc2vl_lock); spin_lock_init(&dd->sde_map_lock); spin_lock_init(&dd->pio_map_lock); + mutex_init(&dd->dc8051_lock); init_waitqueue_head(&dd->event_queue); dd->int_counter = alloc_percpu(u64); -- cgit v1.2.3-55-g7522 From 98b9ee2002a836acacd34df8df92184b83049df3 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Sun, 9 Apr 2017 10:16:53 -0700 Subject: IB/hfi1: Cache neighbor secure data after link up Secure data is transferred across the link during verify cap. This includes Neighbor Guid, Type, and Port Number. This transfer is not guaranteed to complete until the 8051 firmware has completed processing of the state_complete frame. Move the consumption of this data from verify cap handling to link up handling to ensure the data is finalized. Additionally, do not notify the SM that the link is up until after this data is actually available. Reviewed-by: Ira Weiny Reviewed-by: Easwar Hariharan Reviewed-by: Mike Marciniszyn Signed-off-by: Stuart Summers Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 27 +-------------------------- drivers/infiniband/hw/hfi1/hfi.h | 11 ++++++++++- drivers/infiniband/hw/hfi1/intr.c | 27 ++++++++++++++++----------- 3 files changed, 27 insertions(+), 38 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/hfi.h') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index de33cb670182..b926d2ad47fb 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7323,15 +7323,6 @@ void handle_verify_cap(struct work_struct *work) lcb_shutdown(dd, 0); adjust_lcb_for_fpga_serdes(dd); - /* - * These are now valid: - * remote VerifyCap fields in the general LNI config - * CSR DC8051_STS_REMOTE_GUID - * CSR DC8051_STS_REMOTE_NODE_TYPE - * CSR DC8051_STS_REMOTE_FM_SECURITY - * CSR DC8051_STS_REMOTE_PORT_NO - */ - read_vc_remote_phy(dd, &power_management, &continious); read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf, &partner_supported_crc); @@ -7462,20 +7453,6 @@ void handle_verify_cap(struct work_struct *work) write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */ set_8051_lcb_access(dd); - ppd->neighbor_guid = - read_csr(dd, DC_DC8051_STS_REMOTE_GUID); - ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) & - DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; - ppd->neighbor_type = - read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) & - DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK; - ppd->neighbor_fm_security = - read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) & - DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK; - dd_dev_info(dd, - "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n", - ppd->neighbor_guid, ppd->neighbor_type, - ppd->mgmt_allowed, ppd->neighbor_fm_security); if (ppd->mgmt_allowed) add_full_mgmt_pkey(ppd); @@ -10535,11 +10512,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) goto unexpected; } - ppd->host_link_state = HLS_UP_INIT; ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000); if (ret) { - /* logical state didn't change, stay at going_up */ - ppd->host_link_state = HLS_GOING_UP; dd_dev_err(dd, "%s: logical state did not change to INIT\n", __func__); @@ -10553,6 +10527,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); handle_linkup_change(dd, 1); + ppd->host_link_state = HLS_UP_INIT; } break; case HLS_UP_ARMED: diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index d253ea2521a0..9d7c65c7f939 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1307,7 +1307,16 @@ int hfi1_reset_device(int); /* return the driver's idea of the logical OPA port state */ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) { - return ppd->lstate; /* use the cached value */ + /* + * The driver does some processing from the time the logical + * link state is at INIT to the time the SM can be notified + * as such. Return IB_PORT_DOWN until the software state + * is ready. + */ + if (ppd->lstate == IB_PORT_INIT && !(ppd->host_link_state & HLS_UP)) + return IB_PORT_DOWN; + else + return ppd->lstate; } void receive_interrupt_work(struct work_struct *work); diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 65348d16ab2f..232014d46f79 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -131,19 +131,24 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { set_up_vl15(dd, dd->vau, dd->vl15_init); assign_remote_cm_au_table(dd, dd->vcu); - ppd->neighbor_guid = - read_csr(dd, DC_DC8051_STS_REMOTE_GUID); - ppd->neighbor_type = - read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) & - DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK; - ppd->neighbor_port_number = - read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) & - DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; - dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n", - ppd->neighbor_guid, - ppd->neighbor_type); } + ppd->neighbor_guid = + read_csr(dd, DC_DC8051_STS_REMOTE_GUID); + ppd->neighbor_type = + read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) & + DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK; + ppd->neighbor_port_number = + read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) & + DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; + ppd->neighbor_fm_security = + read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) & + DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK; + dd_dev_info(dd, + "Neighbor Guid %llx, Type %d, Port Num %d\n", + ppd->neighbor_guid, ppd->neighbor_type, + ppd->neighbor_port_number); + /* physical link went up */ ppd->linkup = 1; ppd->offline_disabled_reason = -- cgit v1.2.3-55-g7522 From 4608e4c8f2417a5a2985da9890710033c434c387 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 9 Apr 2017 10:17:30 -0700 Subject: IB/hfi1: Use bool in process_ecn The process_ecn intends to return a bool value. However it is doing so incorrectly by ANDing the fecn mask. The fecn bit is bit 31. Bool is not a native data type and is up to the compiler to implement how it sees fit. It is conceivable that this upper bit gets washed out. Fix by converting to a bool properly. Cc: stable@vger.kernel.org Fixes: Commit fd2b562edca6 ("IB/hfi1: Pull FECN/BECN processing to a common place") Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/rc.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/hfi.h') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 9d7c65c7f939..f06674317abf 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1660,7 +1660,7 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, bth1 = be32_to_cpu(ohdr->bth[1]); if (unlikely(bth1 & (IB_BECN_SMASK | IB_FECN_SMASK))) { hfi1_process_ecn_slowpath(qp, pkt, do_cnp); - return bth1 & IB_FECN_SMASK; + return !!(bth1 & IB_FECN_SMASK); } return false; } diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index da968b76ba62..9b3333fd9dc0 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1930,7 +1930,8 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) int diff; struct ib_reth *reth; unsigned long flags; - int ret, is_fecn = 0; + int ret; + bool is_fecn = false; bool copy_last = false; u32 rkey; -- cgit v1.2.3-55-g7522