summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c20
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h4
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c421
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c8
-rw-r--r--drivers/infiniband/hw/mlx5/ib_virt.c9
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c4
-rw-r--r--drivers/infiniband/hw/mlx5/main.c458
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h86
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c121
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c171
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c33
12 files changed, 1179 insertions, 158 deletions
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 90ad2adc752f..bc6299697dda 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
+mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 18d5e1db93ed..470995fa38d2 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -57,3 +57,23 @@ int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
MLX5_SET(query_cong_statistics_in, in, clear, reset);
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
+
+int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
+ void *out, int out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { };
+
+ MLX5_SET(query_cong_params_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_PARAMS);
+ MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
+
+int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,
+ void *in, int in_size)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { };
+
+ return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index fa09228193a6..af4c24596274 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -39,4 +39,8 @@
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
bool reset, void *out, int out_size);
+int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
+ void *out, int out_size);
+int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
+ void *in, int in_size);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
new file mode 100644
index 000000000000..2d32b519bb61
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/debugfs.h>
+
+#include "mlx5_ib.h"
+#include "cmd.h"
+
+enum mlx5_ib_cong_node_type {
+ MLX5_IB_RROCE_ECN_RP = 1,
+ MLX5_IB_RROCE_ECN_NP = 2,
+};
+
+static const char * const mlx5_ib_dbg_cc_name[] = {
+ "rp_clamp_tgt_rate",
+ "rp_clamp_tgt_rate_ati",
+ "rp_time_reset",
+ "rp_byte_reset",
+ "rp_threshold",
+ "rp_ai_rate",
+ "rp_hai_rate",
+ "rp_min_dec_fac",
+ "rp_min_rate",
+ "rp_rate_to_set_on_first_cnp",
+ "rp_dce_tcp_g",
+ "rp_dce_tcp_rtt",
+ "rp_rate_reduce_monitor_period",
+ "rp_initial_alpha_value",
+ "rp_gd",
+ "np_cnp_dscp",
+ "np_cnp_prio_mode",
+ "np_cnp_prio",
+};
+
+#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
+#define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2)
+#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3)
+#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4)
+#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5)
+#define MLX5_IB_RP_AI_RATE_ATTR BIT(7)
+#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8)
+#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9)
+#define MLX5_IB_RP_MIN_RATE_ATTR BIT(10)
+#define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11)
+#define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12)
+#define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13)
+#define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14)
+#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15)
+#define MLX5_IB_RP_GD_ATTR BIT(16)
+
+#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
+#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
+
+static enum mlx5_ib_cong_node_type
+mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
+{
+ if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE &&
+ param_offset <= MLX5_IB_DBG_CC_RP_GD)
+ return MLX5_IB_RROCE_ECN_RP;
+ else
+ return MLX5_IB_RROCE_ECN_NP;
+}
+
+static u32 mlx5_get_cc_param_val(void *field, int offset)
+{
+ switch (offset) {
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate);
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate_after_time_inc);
+ case MLX5_IB_DBG_CC_RP_TIME_RESET:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_time_reset);
+ case MLX5_IB_DBG_CC_RP_BYTE_RESET:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_byte_reset);
+ case MLX5_IB_DBG_CC_RP_THRESHOLD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_threshold);
+ case MLX5_IB_DBG_CC_RP_AI_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_ai_rate);
+ case MLX5_IB_DBG_CC_RP_HAI_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_hai_rate);
+ case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_dec_fac);
+ case MLX5_IB_DBG_CC_RP_MIN_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_rate);
+ case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rate_to_set_on_first_cnp);
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_g);
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_rtt);
+ case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rate_reduce_monitor_period);
+ case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ initial_alpha_value);
+ case MLX5_IB_DBG_CC_RP_GD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_gd);
+ case MLX5_IB_DBG_CC_NP_CNP_DSCP:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_dscp);
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_prio_mode);
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_802p_prio);
+ default:
+ return 0;
+ }
+}
+
+static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
+ u32 var, u32 *attr_mask)
+{
+ switch (offset) {
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
+ *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
+ *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate_after_time_inc, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_TIME_RESET:
+ *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_time_reset, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_BYTE_RESET:
+ *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_byte_reset, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_THRESHOLD:
+ *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_threshold, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_AI_RATE:
+ *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_ai_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_HAI_RATE:
+ *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_hai_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
+ *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_dec_fac, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_MIN_RATE:
+ *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
+ *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rate_to_set_on_first_cnp, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
+ *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_g, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
+ *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_rtt, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
+ *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rate_reduce_monitor_period, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
+ *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ initial_alpha_value, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_GD:
+ *attr_mask |= MLX5_IB_RP_GD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_gd, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_DSCP:
+ *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
+ *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO:
+ *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
+ break;
+ }
+}
+
+static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
+ void *out;
+ void *field;
+ int err;
+ enum mlx5_ib_cong_node_type node;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ node = mlx5_ib_param_to_node(offset);
+
+ err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen);
+ if (err)
+ goto free;
+
+ field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
+ *var = mlx5_get_cc_param_val(field, offset);
+
+free:
+ kvfree(out);
+ return err;
+}
+
+static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
+ void *in;
+ void *field;
+ enum mlx5_ib_cong_node_type node;
+ u32 attr_mask = 0;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_cong_params_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_CONG_PARAMS);
+
+ node = mlx5_ib_param_to_node(offset);
+ MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
+
+ field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
+ mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask);
+
+ field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
+ MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
+ attr_mask);
+
+ err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen);
+ kvfree(in);
+ return err;
+}
+
+static ssize_t set_param(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_dbg_param *param = filp->private_data;
+ int offset = param->offset;
+ char lbuf[11] = { };
+ u32 var;
+ int ret;
+
+ if (count > sizeof(lbuf))
+ return -EINVAL;
+
+ if (copy_from_user(lbuf, buf, count))
+ return -EFAULT;
+
+ lbuf[sizeof(lbuf) - 1] = '\0';
+
+ if (kstrtou32(lbuf, 0, &var))
+ return -EINVAL;
+
+ ret = mlx5_ib_set_cc_params(param->dev, offset, var);
+ return ret ? ret : count;
+}
+
+static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_ib_dbg_param *param = filp->private_data;
+ int offset = param->offset;
+ u32 var = 0;
+ int ret;
+ char lbuf[11];
+
+ if (*pos)
+ return 0;
+
+ ret = mlx5_ib_get_cc_params(param->dev, offset, &var);
+ if (ret)
+ return ret;
+
+ ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var);
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user(buf, lbuf, ret))
+ return -EFAULT;
+
+ *pos += ret;
+ return ret;
+}
+
+static const struct file_operations dbg_cc_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = set_param,
+ .read = get_param,
+};
+
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_debugfs_root ||
+ !dev->dbg_cc_params ||
+ !dev->dbg_cc_params->root)
+ return;
+
+ debugfs_remove_recursive(dev->dbg_cc_params->root);
+ kfree(dev->dbg_cc_params);
+ dev->dbg_cc_params = NULL;
+}
+
+int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_dbg_cc_params *dbg_cc_params;
+ int i;
+
+ if (!mlx5_debugfs_root)
+ goto out;
+
+ if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) ||
+ !MLX5_CAP_GEN(dev->mdev, cc_modify_allowed))
+ goto out;
+
+ dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
+ if (!dbg_cc_params)
+ goto out;
+
+ dev->dbg_cc_params = dbg_cc_params;
+
+ dbg_cc_params->root = debugfs_create_dir("cc_params",
+ dev->mdev->priv.dbg_root);
+ if (!dbg_cc_params->root)
+ goto err;
+
+ for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
+ dbg_cc_params->params[i].offset = i;
+ dbg_cc_params->params[i].dev = dev;
+ dbg_cc_params->params[i].dentry =
+ debugfs_create_file(mlx5_ib_dbg_cc_name[i],
+ 0600, dbg_cc_params->root,
+ &dbg_cc_params->params[i],
+ &dbg_cc_fops);
+ if (!dbg_cc_params->params[i].dentry)
+ goto err;
+ }
+out: return 0;
+
+err:
+ mlx5_ib_warn(dev, "cong debugfs failure\n");
+ mlx5_ib_cleanup_cong_debugfs(dev);
+ /*
+ * We don't want to fail driver if debugfs failed to initialize,
+ * so we are not forwarding error to the user.
+ */
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index a384d72ea3cd..2aa53f427685 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -499,7 +499,7 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
struct mlx5_ib_qp *qp;
*npolled = 0;
- /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */
+ /* Find uncompleted WQEs belonging to that cq and return mmics ones */
list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
sw_send_comp(qp, num_entries, wc + *npolled, npolled);
if (*npolled >= num_entries)
@@ -751,10 +751,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
void *cqc;
int err;
- ucmdlen =
- (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
- sizeof(ucmd)) ? (sizeof(ucmd) -
- sizeof(ucmd.reserved)) : sizeof(ucmd);
+ ucmdlen = udata->inlen < sizeof(ucmd) ?
+ (sizeof(ucmd) - sizeof(ucmd.reserved)) : sizeof(ucmd);
if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
return -EFAULT;
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
index c1b9de800fe5..649a3364f838 100644
--- a/drivers/infiniband/hw/mlx5/ib_virt.c
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -96,6 +96,7 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -109,6 +110,8 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
}
in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err)
+ vfs_ctx[vf].policy = in->policy;
out:
kfree(in);
@@ -151,6 +154,7 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -160,6 +164,8 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
in->node_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err)
+ vfs_ctx[vf].node_guid = guid;
kfree(in);
return err;
}
@@ -169,6 +175,7 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -178,6 +185,8 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
in->port_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err)
+ vfs_ctx[vf].port_guid = guid;
kfree(in);
return err;
}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 95db929bdc34..1003b0133a49 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -78,7 +78,7 @@ static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
u16 slid;
int err;
- slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
+ slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
@@ -204,7 +204,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
int err;
void *out_cnt;
- /* Decalring support of extended counters */
+ /* Declaring support of extended counters */
if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
struct ib_class_port_info cpi = {};
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f7fcde1ff0aa..ab3c562d5ba7 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
+#include <linux/debugfs.h>
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -58,6 +59,7 @@
#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
#include "cmd.h"
+#include <linux/mlx5/vport.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "5.0-0"
@@ -65,7 +67,6 @@
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRIVER_VERSION);
static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
@@ -97,6 +98,20 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
}
+static int get_port_state(struct ib_device *ibdev,
+ u8 port_num,
+ enum ib_port_state *state)
+{
+ struct ib_port_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+ if (!ret)
+ *state = attr.state;
+ return ret;
+}
+
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -114,6 +129,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
write_unlock(&ibdev->roce.netdev_lock);
break;
+ case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
@@ -127,10 +143,23 @@ static int mlx5_netdev_event(struct notifier_block *this,
if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
&& ibdev->ib_active) {
struct ib_event ibev = { };
+ enum ib_port_state port_state;
+ if (get_port_state(&ibdev->ib_dev, 1, &port_state))
+ return NOTIFY_DONE;
+
+ if (ibdev->roce.last_port_state == port_state)
+ return NOTIFY_DONE;
+
+ ibdev->roce.last_port_state = port_state;
ibev.device = &ibdev->ib_dev;
- ibev.event = (event == NETDEV_UP) ?
- IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ if (port_state == IB_PORT_DOWN)
+ ibev.event = IB_EVENT_PORT_ERR;
+ else if (port_state == IB_PORT_ACTIVE)
+ ibev.event = IB_EVENT_PORT_ACTIVE;
+ else
+ return NOTIFY_DONE;
+
ibev.element.port_num = 1;
ib_dispatch_event(&ibev);
}
@@ -668,6 +697,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_TSO;
}
+ if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
+ MLX5_CAP_GEN(dev->mdev, general_notification_event))
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
+
+ if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
+ MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap))
+ props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
/* Legacy bit to support old userspace libraries */
@@ -740,6 +777,16 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
}
+ if (MLX5_CAP_GEN(mdev, tag_matching)) {
+ props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
+ props->xrq_caps.max_num_tags =
+ (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
+ props->xrq_caps.flags = IB_TM_CAP_RC;
+ props->xrq_caps.max_ops =
+ 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
+ props->xrq_caps.max_sge = MLX5_TM_MAX_SGE;
+ }
+
if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
resp.cqe_comp_caps.max_num =
MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
@@ -765,8 +812,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
uhw->outlen)) {
- resp.mlx5_ib_support_multi_pkt_send_wqes =
- MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
+ if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe))
+ resp.mlx5_ib_support_multi_pkt_send_wqes =
+ MLX5_IB_ALLOW_MPW;
+
+ if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
+ resp.mlx5_ib_support_multi_pkt_send_wqes |=
+ MLX5_IB_SUPPORT_EMPW;
+
resp.response_length +=
sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
}
@@ -774,6 +827,27 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (field_avail(typeof(resp), reserved, uhw->outlen))
resp.response_length += sizeof(resp.reserved);
+ if (field_avail(typeof(resp), sw_parsing_caps,
+ uhw->outlen)) {
+ resp.response_length += sizeof(resp.sw_parsing_caps);
+ if (MLX5_CAP_ETH(mdev, swp)) {
+ resp.sw_parsing_caps.sw_parsing_offloads |=
+ MLX5_IB_SW_PARSING;
+
+ if (MLX5_CAP_ETH(mdev, swp_csum))
+ resp.sw_parsing_caps.sw_parsing_offloads |=
+ MLX5_IB_SW_PARSING_CSUM;
+
+ if (MLX5_CAP_ETH(mdev, swp_lso))
+ resp.sw_parsing_caps.sw_parsing_offloads |=
+ MLX5_IB_SW_PARSING_LSO;
+
+ if (resp.sw_parsing_caps.sw_parsing_offloads)
+ resp.sw_parsing_caps.supported_qpts =
+ BIT(IB_QPT_RAW_PACKET);
+ }
+ }
+
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
@@ -1144,7 +1218,7 @@ static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
return -EINVAL;
- mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, alloated %d, using %d sys pages\n",
+ mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, using %d sys pages\n",
MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
lib_uar_4k ? "yes" : "no", ref_bfregs,
req->total_num_bfregs, *num_sys_pages);
@@ -1193,6 +1267,45 @@ static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *con
return 0;
}
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
+{
+ int err;
+
+ err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
+ if (err)
+ return err;
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+ return err;
+
+ mutex_lock(&dev->lb_mutex);
+ dev->user_td++;
+
+ if (dev->user_td == 2)
+ err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
+
+ mutex_unlock(&dev->lb_mutex);
+ return err;
+}
+
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
+{
+ mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+ return;
+
+ mutex_lock(&dev->lb_mutex);
+ dev->user_td--;
+
+ if (dev->user_td < 2)
+ mlx5_nic_vport_update_local_lb(dev->mdev, false);
+
+ mutex_unlock(&dev->lb_mutex);
+}
+
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
@@ -1203,7 +1316,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct mlx5_bfreg_info *bfregi;
int ver;
int err;
- size_t reqlen;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
bool lib_uar_4k;
@@ -1211,18 +1323,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
- return ERR_PTR(-EINVAL);
-
- reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
- if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
+ if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
- else if (reqlen >= min_req_v2)
+ else if (udata->inlen >= min_req_v2)
ver = 2;
else
return ERR_PTR(-EINVAL);
- err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
+ err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
if (err)
return ERR_PTR(err);
@@ -1301,8 +1409,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
mutex_init(&context->upd_xlt_page_mutex);
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
- err = mlx5_core_alloc_transport_domain(dev->mdev,
- &context->tdn);
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
if (err)
goto out_page;
}
@@ -1368,7 +1475,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
out_page:
free_page(context->upd_xlt_page);
@@ -1396,7 +1503,7 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
bfregi = &context->bfregi;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
free_page(context->upd_xlt_page);
deallocate_uars(dev, context);
@@ -2034,23 +2141,34 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
* it won't fall into the multicast flow steering table and this rule
* could steal other multicast packets.
*/
-static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
+static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
{
- struct ib_flow_spec_eth *eth_spec;
+ union ib_flow_spec *flow_spec;
if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
- ib_attr->size < sizeof(struct ib_flow_attr) +
- sizeof(struct ib_flow_spec_eth) ||
ib_attr->num_of_specs < 1)
return false;
- eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
- if (eth_spec->type != IB_FLOW_SPEC_ETH ||
- eth_spec->size != sizeof(*eth_spec))
+ flow_spec = (union ib_flow_spec *)(ib_attr + 1);
+ if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
+ struct ib_flow_spec_ipv4 *ipv4_spec;
+
+ ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
+ if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
+ return true;
+
return false;
+ }
+
+ if (flow_spec->type == IB_FLOW_SPEC_ETH) {
+ struct ib_flow_spec_eth *eth_spec;
- return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
- is_multicast_ether_addr(eth_spec->val.dst_mac);
+ eth_spec = (struct ib_flow_spec_eth *)flow_spec;
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+ }
+
+ return false;
}
static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
@@ -2235,10 +2353,31 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
return err ? ERR_PTR(err) : prio;
}
-static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
+static void set_underlay_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ u32 underlay_qpn)
+{
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+ spec->match_criteria,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ if (underlay_qpn &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ ft_field_support.bth_dst_qp)) {
+ MLX5_SET(fte_match_set_misc,
+ misc_params_v, bth_dst_qp, underlay_qpn);
+ MLX5_SET(fte_match_set_misc,
+ misc_params_c, bth_dst_qp, 0xffffff);
+ }
+}
+
+static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst,
+ u32 underlay_qpn)
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
@@ -2274,6 +2413,9 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
ib_flow += ((union ib_flow_spec *)ib_flow)->size;
}
+ if (!flow_is_multicast_only(flow_attr))
+ set_underlay_qp(dev, spec, underlay_qpn);
+
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
if (is_drop) {
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
@@ -2313,6 +2455,14 @@ free:
return err ? ERR_PTR(err) : handler;
}
+static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0);
+}
+
static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct ib_flow_attr *flow_attr,
@@ -2449,6 +2599,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
struct mlx5_ib_flow_prio *ft_prio;
int err;
+ int underlay_qpn;
if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOMEM);
@@ -2489,8 +2640,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
handler = create_dont_trap_rule(dev, ft_prio,
flow_attr, dst);
} else {
- handler = create_flow_rule(dev, ft_prio, flow_attr,
- dst);
+ underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?
+ mqp->underlay_qpn : 0;
+ handler = _create_flow_rule(dev, ft_prio, flow_attr,
+ dst, underlay_qpn);
}
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
@@ -2528,8 +2681,14 @@ unlock:
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(ibqp);
int err;
+ if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
+ mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
@@ -2691,6 +2850,26 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
+static void delay_drop_handler(struct work_struct *work)
+{
+ int err;
+ struct mlx5_ib_delay_drop *delay_drop =
+ container_of(work, struct mlx5_ib_delay_drop,
+ delay_drop_work);
+
+ atomic_inc(&delay_drop->events_cnt);
+
+ mutex_lock(&delay_drop->lock);
+ err = mlx5_core_set_delay_drop(delay_drop->dev->mdev,
+ delay_drop->timeout);
+ if (err) {
+ mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
+ delay_drop->timeout);
+ delay_drop->activate = false;
+ }
+ mutex_unlock(&delay_drop->lock);
+}
+
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
@@ -2743,8 +2922,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
ibev.event = IB_EVENT_CLIENT_REREGISTER;
port = (u8)param;
break;
+ case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
+ schedule_work(&ibdev->delay_drop.delay_drop_work);
+ goto out;
default:
- return;
+ goto out;
}
ibev.device = &ibdev->ib_dev;
@@ -2752,7 +2934,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (port < 1 || port > ibdev->num_ports) {
mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
- return;
+ goto out;
}
if (ibdev->ib_active)
@@ -2760,6 +2942,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (fatal)
ibdev->ib_active = false;
+
+out:
+ return;
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -3042,7 +3227,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_XRC;
- attr.ext.xrc.cq = devr->c0;
+ attr.ext.cq = devr->c0;
attr.ext.xrc.xrcd = devr->x0;
devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
@@ -3057,9 +3242,9 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
devr->s0->srq_context = NULL;
devr->s0->srq_type = IB_SRQT_XRC;
devr->s0->ext.xrc.xrcd = devr->x0;
- devr->s0->ext.xrc.cq = devr->c0;
+ devr->s0->ext.cq = devr->c0;
atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
- atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
+ atomic_inc(&devr->s0->ext.cq->usecnt);
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
@@ -3078,9 +3263,9 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
devr->s1->event_handler = NULL;
devr->s1->srq_context = NULL;
devr->s1->srq_type = IB_SRQT_BASIC;
- devr->s1->ext.xrc.cq = devr->c0;
+ devr->s1->ext.cq = devr->c0;
atomic_inc(&devr->p0->usecnt);
- atomic_set(&devr->s0->usecnt, 0);
+ atomic_set(&devr->s1->usecnt, 0);
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
INIT_WORK(&devr->ports[port].pkey_change_work,
@@ -3173,13 +3358,13 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
-static void get_dev_fw_str(struct ib_device *ibdev, char *str,
- size_t str_len)
+static void get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct mlx5_ib_dev *dev =
container_of(ibdev, struct mlx5_ib_dev, ib_dev);
- snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev),
- fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d",
+ fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev),
+ fw_rev_sub(dev->mdev));
}
static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
@@ -3319,6 +3504,17 @@ static const struct mlx5_ib_counter cong_cnts[] = {
INIT_CONG_COUNTER(np_cnp_sent),
};
+static const struct mlx5_ib_counter extended_err_cnts[] = {
+ INIT_Q_COUNTER(resp_local_length_error),
+ INIT_Q_COUNTER(resp_cqe_error),
+ INIT_Q_COUNTER(req_cqe_error),
+ INIT_Q_COUNTER(req_remote_invalid_request),
+ INIT_Q_COUNTER(req_remote_access_errors),
+ INIT_Q_COUNTER(resp_remote_access_errors),
+ INIT_Q_COUNTER(resp_cqe_flush_error),
+ INIT_Q_COUNTER(req_cqe_flush_error),
+};
+
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
@@ -3343,6 +3539,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
+ num_counters += ARRAY_SIZE(extended_err_cnts);
+
cnts->num_q_counters = num_counters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@@ -3392,6 +3592,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
}
}
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
+ names[j] = extended_err_cnts[i].name;
+ offsets[j] = extended_err_cnts[i].offset;
+ }
+ }
+
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
names[j] = cong_cnts[i].name;
@@ -3562,6 +3769,136 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
return netdev;
}
+static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!dev->delay_drop.dbg)
+ return;
+ debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs);
+ kfree(dev->delay_drop.dbg);
+ dev->delay_drop.dbg = NULL;
+}
+
+static void cancel_delay_drop(struct mlx5_ib_dev *dev)
+{
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
+ delay_drop_debugfs_cleanup(dev);
+}
+
+static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
+ char lbuf[20];
+ int len;
+
+ len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout);
+ return simple_read_from_buffer(buf, count, pos, lbuf, len);
+}
+
+static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
+ u32 timeout;
+ u32 var;
+
+ if (kstrtouint_from_user(buf, count, 0, &var))
+ return -EFAULT;
+
+ timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS *
+ 1000);
+ if (timeout != var)
+ mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n",
+ timeout);
+
+ delay_drop->timeout = timeout;
+
+ return count;
+}
+
+static const struct file_operations fops_delay_drop_timeout = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = delay_drop_timeout_write,
+ .read = delay_drop_timeout_read,
+};
+
+static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_dbg_delay_drop *dbg;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ dbg = kzalloc(sizeof(*dbg), GFP_KERNEL);
+ if (!dbg)
+ return -ENOMEM;
+
+ dbg->dir_debugfs =
+ debugfs_create_dir("delay_drop",
+ dev->mdev->priv.dbg_root);
+ if (!dbg->dir_debugfs)
+ return -ENOMEM;
+
+ dbg->events_cnt_debugfs =
+ debugfs_create_atomic_t("num_timeout_events", 0400,
+ dbg->dir_debugfs,
+ &dev->delay_drop.events_cnt);
+ if (!dbg->events_cnt_debugfs)
+ goto out_debugfs;
+
+ dbg->rqs_cnt_debugfs =
+ debugfs_create_atomic_t("num_rqs", 0400,
+ dbg->dir_debugfs,
+ &dev->delay_drop.rqs_cnt);
+ if (!dbg->rqs_cnt_debugfs)
+ goto out_debugfs;
+
+ dbg->timeout_debugfs =
+ debugfs_create_file("timeout", 0600,
+ dbg->dir_debugfs,
+ &dev->delay_drop,
+ &fops_delay_drop_timeout);
+ if (!dbg->timeout_debugfs)
+ goto out_debugfs;
+
+ dev->delay_drop.dbg = dbg;
+
+ return 0;
+
+out_debugfs:
+ delay_drop_debugfs_cleanup(dev);
+ return -ENOMEM;
+}
+
+static void init_delay_drop(struct mlx5_ib_dev *dev)
+{
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ mutex_init(&dev->delay_drop.lock);
+ dev->delay_drop.dev = dev;
+ dev->delay_drop.activate = false;
+ dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
+ INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
+ atomic_set(&dev->delay_drop.rqs_cnt, 0);
+ atomic_set(&dev->delay_drop.events_cnt, 0);
+
+ if (delay_drop_debugfs_init(dev))
+ mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
+}
+
+static const struct cpumask *
+mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+
+ return mlx5_get_vector_affinity(dev->mdev, comp_vector);
+}
+
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
@@ -3692,6 +4029,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
+ dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
@@ -3729,18 +4067,20 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ dev->ib_dev.create_flow = mlx5_ib_create_flow;
+ dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
- dev->ib_dev.create_flow = mlx5_ib_create_flow;
- dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.create_wq = mlx5_ib_create_wq;
dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
@@ -3760,6 +4100,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
err = mlx5_enable_eth(dev);
if (err)
goto err_free_port;
+ dev->roce.last_port_state = IB_PORT_DOWN;
}
err = create_dev_resources(&dev->devr);
@@ -3776,9 +4117,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
goto err_odp;
}
+ err = mlx5_ib_init_cong_debugfs(dev);
+ if (err)
+ goto err_cnt;
+
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
if (!dev->mdev->priv.uar)
- goto err_cnt;
+ goto err_cong;
err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
if (err)
@@ -3796,18 +4141,25 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (err)
goto err_dev;
+ init_delay_drop(dev);
+
for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
err = device_create_file(&dev->ib_dev.dev,
mlx5_class_attributes[i]);
if (err)
- goto err_umrc;
+ goto err_delay_drop;
}
+ if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
+ MLX5_CAP_GEN(mdev, disable_local_lb))
+ mutex_init(&dev->lb_mutex);
+
dev->ib_active = true;
return dev;
-err_umrc:
+err_delay_drop:
+ cancel_delay_drop(dev);
destroy_umrc_res(dev);
err_dev:
@@ -3823,6 +4175,8 @@ err_uar_page:
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
err_cnt:
+ mlx5_ib_cleanup_cong_debugfs(dev);
+err_cong:
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
@@ -3852,11 +4206,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
struct mlx5_ib_dev *dev = context;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
+ cancel_delay_drop(dev);
mlx5_remove_netdev_notifier(dev);
ib_unregister_device(&dev->ib_dev);
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
+ mlx5_ib_cleanup_cong_debugfs(dev);
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
destroy_umrc_res(dev);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index bdcf25410c99..189e80cd6b2f 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -107,6 +107,11 @@ enum {
MLX5_CQE_VERSION_V1,
};
+enum {
+ MLX5_TM_MAX_RNDV_MSG_SIZE = 64,
+ MLX5_TM_MAX_SGE = 1,
+};
+
struct mlx5_ib_vma_private_data {
struct list_head list;
struct vm_area_struct *vma;
@@ -247,6 +252,10 @@ struct mlx5_ib_wq {
void *qend;
};
+enum mlx5_ib_wq_flags {
+ MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1,
+};
+
struct mlx5_ib_rwq {
struct ib_wq ibwq;
struct mlx5_core_qp core_qp;
@@ -264,6 +273,7 @@ struct mlx5_ib_rwq {
u32 wqe_count;
u32 wqe_shift;
int wq_sig;
+ u32 create_flags; /* Use enum mlx5_ib_wq_flags */
};
enum {
@@ -378,6 +388,7 @@ struct mlx5_ib_qp {
struct list_head cq_recv_list;
struct list_head cq_send_list;
u32 rate_limit;
+ u32 underlay_qpn;
};
struct mlx5_ib_cq_buf {
@@ -399,6 +410,7 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
MLX5_IB_QP_RSS = 1 << 8,
MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9,
+ MLX5_IB_QP_UNDERLAY = 1 << 10,
};
struct mlx5_umr_wr {
@@ -496,7 +508,7 @@ struct mlx5_ib_mr {
struct mlx5_shared_mr_info *smr_info;
struct list_head list;
int order;
- int umred;
+ bool allocated_from_cache;
int npages;
struct mlx5_ib_dev *dev;
u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
@@ -616,6 +628,63 @@ struct mlx5_roce {
struct net_device *netdev;
struct notifier_block nb;
atomic_t next_port;
+ enum ib_port_state last_port_state;
+};
+
+struct mlx5_ib_dbg_param {
+ int offset;
+ struct mlx5_ib_dev *dev;
+ struct dentry *dentry;
+};
+
+enum mlx5_ib_dbg_cc_types {
+ MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE,
+ MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI,
+ MLX5_IB_DBG_CC_RP_TIME_RESET,
+ MLX5_IB_DBG_CC_RP_BYTE_RESET,
+ MLX5_IB_DBG_CC_RP_THRESHOLD,
+ MLX5_IB_DBG_CC_RP_AI_RATE,
+ MLX5_IB_DBG_CC_RP_HAI_RATE,
+ MLX5_IB_DBG_CC_RP_MIN_DEC_FAC,
+ MLX5_IB_DBG_CC_RP_MIN_RATE,
+ MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP,
+ MLX5_IB_DBG_CC_RP_DCE_TCP_G,
+ MLX5_IB_DBG_CC_RP_DCE_TCP_RTT,
+ MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD,
+ MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE,
+ MLX5_IB_DBG_CC_RP_GD,
+ MLX5_IB_DBG_CC_NP_CNP_DSCP,
+ MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE,
+ MLX5_IB_DBG_CC_NP_CNP_PRIO,
+ MLX5_IB_DBG_CC_MAX,
+};
+
+struct mlx5_ib_dbg_cc_params {
+ struct dentry *root;
+ struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX];
+};
+
+enum {
+ MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100,
+};
+
+struct mlx5_ib_dbg_delay_drop {
+ struct dentry *dir_debugfs;
+ struct dentry *rqs_cnt_debugfs;
+ struct dentry *events_cnt_debugfs;
+ struct dentry *timeout_debugfs;
+};
+
+struct mlx5_ib_delay_drop {
+ struct mlx5_ib_dev *dev;
+ struct work_struct delay_drop_work;
+ /* serialize setting of delay drop */
+ struct mutex lock;
+ u32 timeout;
+ bool activate;
+ atomic_t events_cnt;
+ atomic_t rqs_cnt;
+ struct mlx5_ib_dbg_delay_drop *dbg;
};
struct mlx5_ib_dev {
@@ -652,9 +721,15 @@ struct mlx5_ib_dev {
struct list_head qp_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
- struct mlx5_sq_bfreg bfreg;
- struct mlx5_sq_bfreg fp_bfreg;
- u8 umr_fence;
+ struct mlx5_sq_bfreg bfreg;
+ struct mlx5_sq_bfreg fp_bfreg;
+ struct mlx5_ib_delay_drop delay_drop;
+ struct mlx5_ib_dbg_cc_params *dbg_cc_params;
+
+ /* protect the user_td */
+ struct mutex lb_mutex;
+ u32 user_td;
+ u8 umr_fence;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -904,6 +979,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
int index, enum ib_gid_type *gid_type);
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev);
+int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev);
+
/* GSI QP helper functions */
struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 2c40a2e989d2..0e2789d9bb4d 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -48,7 +48,7 @@ enum {
#define MLX5_UMR_ALIGN 2048
static int clean_mr(struct mlx5_ib_mr *mr);
-static int use_umr(struct mlx5_ib_dev *dev, int order);
+static int mr_cache_max_order(struct mlx5_ib_dev *dev);
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
@@ -183,7 +183,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
break;
}
mr->order = ent->order;
- mr->umred = 1;
+ mr->allocated_from_cache = 1;
mr->dev = dev;
MLX5_SET(mkc, mkc, free, 1);
@@ -491,16 +491,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_ib_mr *mr = NULL;
struct mlx5_cache_ent *ent;
+ int last_umr_cache_entry;
int c;
int i;
c = order2idx(dev, order);
- if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
+ last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
+ if (c < 0 || c > last_umr_cache_entry) {
mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
return NULL;
}
- for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
+ for (i = c; i <= last_umr_cache_entry; i++) {
ent = &cache->ent[i];
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
@@ -674,12 +676,12 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
queue_work(cache->wq, &ent->work);
- if (i > MAX_UMR_CACHE_ENTRY) {
+ if (i > MR_CACHE_LAST_STD_ENTRY) {
mlx5_odp_init_mr_cache_entry(ent);
continue;
}
- if (!use_umr(dev, ent->order))
+ if (ent->order > mr_cache_max_order(dev))
continue;
ent->page = PAGE_SHIFT;
@@ -806,21 +808,22 @@ err_free:
return ERR_PTR(err);
}
-static int get_octo_len(u64 addr, u64 len, int page_size)
+static int get_octo_len(u64 addr, u64 len, int page_shift)
{
+ u64 page_size = 1ULL << page_shift;
u64 offset;
int npages;
offset = addr & (page_size - 1);
- npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
+ npages = ALIGN(len + offset, page_size) >> page_shift;
return (npages + 1) / 2;
}
-static int use_umr(struct mlx5_ib_dev *dev, int order)
+static int mr_cache_max_order(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
- return order <= MAX_UMR_CACHE_ENTRY + 2;
- return order <= MLX5_MAX_UMR_SHIFT;
+ return MR_CACHE_LAST_STD_ENTRY + 2;
+ return MLX5_MAX_UMR_SHIFT;
}
static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
@@ -896,7 +899,8 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
return err;
}
-static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
+static struct mlx5_ib_mr *alloc_mr_from_cache(
+ struct ib_pd *pd, struct ib_umem *umem,
u64 virt_addr, u64 len, int npages,
int page_shift, int order, int access_flags)
{
@@ -928,16 +932,6 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
mr->mmkey.size = len;
mr->mmkey.pd = to_mpd(pd)->pdn;
- err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
- MLX5_IB_UPD_XLT_ENABLE);
-
- if (err) {
- mlx5_mr_cache_free(dev, mr);
- return ERR_PTR(err);
- }
-
- mr->live = 1;
-
return mr;
}
@@ -1103,7 +1097,8 @@ free_xlt:
static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
u64 virt_addr, u64 length,
struct ib_umem *umem, int npages,
- int page_shift, int access_flags)
+ int page_shift, int access_flags,
+ bool populate)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr;
@@ -1118,15 +1113,19 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
if (!mr)
return ERR_PTR(-ENOMEM);
- inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
- sizeof(*pas) * ((npages + 1) / 2) * 2;
+ mr->ibmr.pd = pd;
+ mr->access_flags = access_flags;
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ if (populate)
+ inlen += sizeof(*pas) * roundup(npages, 2);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_1;
}
pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
- if (!(access_flags & IB_ACCESS_ON_DEMAND))
+ if (populate && !(access_flags & IB_ACCESS_ON_DEMAND))
mlx5_ib_populate_pas(dev, umem, page_shift, pas,
pg_cap ? MLX5_IB_MTT_PRESENT : 0);
@@ -1135,23 +1134,27 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, free, !populate);
MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET64(mkc, mkc, start_addr, virt_addr);
MLX5_SET64(mkc, mkc, len, length);
MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
MLX5_SET(mkc, mkc, translations_octword_size,
- get_octo_len(virt_addr, length, 1 << page_shift));
+ get_octo_len(virt_addr, length, page_shift));
MLX5_SET(mkc, mkc, log_page_size, page_shift);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
- get_octo_len(virt_addr, length, 1 << page_shift));
+ if (populate) {
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+ get_octo_len(virt_addr, length, page_shift));
+ }
err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
if (err) {
@@ -1160,9 +1163,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
}
mr->mmkey.type = MLX5_MKEY_MR;
mr->desc_size = sizeof(struct mlx5_mtt);
- mr->umem = umem;
mr->dev = dev;
- mr->live = 1;
kvfree(in);
mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
@@ -1202,6 +1203,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int ncont;
int order;
int err;
+ bool use_umr = true;
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
@@ -1220,27 +1222,29 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
&page_shift, &ncont, &order);
- if (err < 0)
+ if (err < 0)
return ERR_PTR(err);
- if (use_umr(dev, order)) {
- mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
- order, access_flags);
+ if (order <= mr_cache_max_order(dev)) {
+ mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
+ page_shift, order, access_flags);
if (PTR_ERR(mr) == -EAGAIN) {
mlx5_ib_dbg(dev, "cache empty for order %d", order);
mr = NULL;
}
- } else if (access_flags & IB_ACCESS_ON_DEMAND &&
- !MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
- err = -EINVAL;
- pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
- goto error;
+ } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
+ if (access_flags & IB_ACCESS_ON_DEMAND) {
+ err = -EINVAL;
+ pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
+ goto error;
+ }
+ use_umr = false;
}
if (!mr) {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
- page_shift, access_flags);
+ page_shift, access_flags, !use_umr);
mutex_unlock(&dev->slow_path_mutex);
}
@@ -1258,8 +1262,22 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
update_odp_mr(mr);
#endif
- return &mr->ibmr;
+ if (use_umr) {
+ int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
+ if (access_flags & IB_ACCESS_ON_DEMAND)
+ update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP;
+
+ err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
+ update_xlt_flags);
+ if (err) {
+ mlx5_ib_dereg_mr(&mr->ibmr);
+ return ERR_PTR(err);
+ }
+ }
+
+ mr->live = 1;
+ return &mr->ibmr;
error:
ib_umem_release(umem);
return ERR_PTR(err);
@@ -1347,7 +1365,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
/*
* UMR can't be used - MKey needs to be replaced.
*/
- if (mr->umred) {
+ if (mr->allocated_from_cache) {
err = unreg_umr(dev, mr);
if (err)
mlx5_ib_warn(dev, "Failed to unregister MR\n");
@@ -1360,12 +1378,13 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
return err;
mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
- page_shift, access_flags);
+ page_shift, access_flags, true);
if (IS_ERR(mr))
return PTR_ERR(mr);
- mr->umred = 0;
+ mr->allocated_from_cache = 0;
+ mr->live = 1;
} else {
/*
* Send a UMR WQE
@@ -1453,7 +1472,7 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
static int clean_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- int umred = mr->umred;
+ int allocated_from_cache = mr->allocated_from_cache;
int err;
if (mr->sig) {
@@ -1471,20 +1490,20 @@ static int clean_mr(struct mlx5_ib_mr *mr)
mlx5_free_priv_descs(mr);
- if (!umred) {
+ if (!allocated_from_cache) {
+ u32 key = mr->mmkey.key;
+
err = destroy_mkey(dev, mr);
+ kfree(mr);
if (err) {
mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
- mr->mmkey.key, err);
+ key, err);
return err;
}
} else {
mlx5_mr_cache_free(dev, mr);
}
- if (!umred)
- kfree(mr);
-
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index f58f8f5f3ebe..acb79d3a4f1d 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -34,6 +34,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_user_verbs.h>
+#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
/* not supported currently */
@@ -453,7 +454,8 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
return -EINVAL;
}
- if (attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
} else {
@@ -675,10 +677,14 @@ err_umem:
return err;
}
-static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq)
+static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_rwq *rwq)
{
struct mlx5_ib_ucontext *context;
+ if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP)
+ atomic_dec(&dev->delay_drop.rqs_cnt);
+
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &rwq->db);
if (rwq->umem)
@@ -959,11 +965,16 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
goto err_free;
}
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL);
- qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL);
- qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL);
- qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL);
+ qp->sq.wrid = kvmalloc_array(qp->sq.wqe_cnt,
+ sizeof(*qp->sq.wrid), GFP_KERNEL);
+ qp->sq.wr_data = kvmalloc_array(qp->sq.wqe_cnt,
+ sizeof(*qp->sq.wr_data), GFP_KERNEL);
+ qp->rq.wrid = kvmalloc_array(qp->rq.wqe_cnt,
+ sizeof(*qp->rq.wrid), GFP_KERNEL);
+ qp->sq.w_list = kvmalloc_array(qp->sq.wqe_cnt,
+ sizeof(*qp->sq.w_list), GFP_KERNEL);
+ qp->sq.wqe_head = kvmalloc_array(qp->sq.wqe_cnt,
+ sizeof(*qp->sq.wqe_head), GFP_KERNEL);
if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid ||
!qp->sq.w_list || !qp->sq.wqe_head) {
@@ -975,11 +986,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
return 0;
err_wrid:
- kfree(qp->sq.wqe_head);
- kfree(qp->sq.w_list);
- kfree(qp->sq.wrid);
- kfree(qp->sq.wr_data);
- kfree(qp->rq.wrid);
+ kvfree(qp->sq.wqe_head);
+ kvfree(qp->sq.w_list);
+ kvfree(qp->sq.wrid);
+ kvfree(qp->sq.wr_data);
+ kvfree(qp->rq.wrid);
mlx5_db_free(dev->mdev, &qp->db);
err_free:
@@ -992,11 +1003,11 @@ err_buf:
static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
- kfree(qp->sq.wqe_head);
- kfree(qp->sq.w_list);
- kfree(qp->sq.wrid);
- kfree(qp->sq.wr_data);
- kfree(qp->rq.wrid);
+ kvfree(qp->sq.wqe_head);
+ kvfree(qp->sq.w_list);
+ kvfree(qp->sq.wrid);
+ kvfree(qp->sq.wr_data);
+ kvfree(qp->rq.wrid);
mlx5_db_free(dev->mdev, &qp->db);
mlx5_buf_free(dev->mdev, &qp->buf);
}
@@ -1021,12 +1032,16 @@ static int is_connected(enum ib_qp_type qp_type)
}
static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
struct mlx5_ib_sq *sq, u32 tdn)
{
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
MLX5_SET(tisc, tisc, transport_domain, tdn);
+ if (qp->flags & MLX5_IB_QP_UNDERLAY)
+ MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
+
return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
}
@@ -1068,11 +1083,16 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+ if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe))
+ MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1);
MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
MLX5_SET(sqc, sqc, tis_lst_sz, 1);
MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(dev->mdev, swp))
+ MLX5_SET(sqc, sqc, allow_swp, 1);
wq = MLX5_ADDR_OF(sqc, sqc, wq);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
@@ -1229,7 +1249,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u32 tdn = mucontext->tdn;
if (qp->sq.wqe_cnt) {
- err = create_raw_packet_qp_tis(dev, sq, tdn);
+ err = create_raw_packet_qp_tis(dev, qp, sq, tdn);
if (err)
return err;
@@ -1503,10 +1523,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
u32 *in;
int err;
- base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
- &qp->raw_packet_qp.rq.base :
- &qp->trans_qp.base;
-
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
@@ -1588,10 +1604,28 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
+
+ if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
+ if (init_attr->qp_type != IB_QPT_UD ||
+ (MLX5_CAP_GEN(dev->mdev, port_type) !=
+ MLX5_CAP_PORT_TYPE_IB) ||
+ !mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) {
+ mlx5_ib_dbg(dev, "Source QP option isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ qp->flags |= MLX5_IB_QP_UNDERLAY;
+ qp->underlay_qpn = init_attr->source_qpn;
+ }
} else {
qp->wq_sig = !!wq_signature;
}
+ base = (init_attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
+
qp->has_rq = qp_has_rq(init_attr);
err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
qp, (pd && pd->uobject) ? &ucmd : NULL);
@@ -1695,10 +1729,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
- if (qp->sq.wqe_cnt)
+ if (qp->sq.wqe_cnt) {
MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
- else
+ } else {
MLX5_SET(qpc, qpc, no_sq, 1);
+ if (init_attr->srq &&
+ init_attr->srq->srq_type == IB_SRQT_TM)
+ MLX5_SET(qpc, qpc, offload_type,
+ MLX5_QPC_OFFLOAD_TYPE_RNDV);
+ }
/* Set default resources */
switch (init_attr->qp_type) {
@@ -1742,7 +1781,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->flags |= MLX5_IB_QP_LSO;
}
- if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
err = create_raw_packet_qp(dev, qp, in, pd);
@@ -1894,7 +1934,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
- struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct mlx5_ib_qp_base *base;
unsigned long flags;
int err;
@@ -1903,12 +1943,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
return;
}
- base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
+ base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
if (qp->state != IB_QPS_RESET) {
- if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET &&
+ !(qp->flags & MLX5_IB_QP_UNDERLAY)) {
err = mlx5_core_qp_modify(dev->mdev,
MLX5_CMD_OP_2RST_QP, 0,
NULL, &base->mqp);
@@ -1947,7 +1989,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
destroy_raw_packet_qp(dev, qp);
} else {
err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
@@ -2703,7 +2746,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (is_sqp(ibqp->qp_type)) {
context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
- } else if (ibqp->qp_type == IB_QPT_UD ||
+ } else if ((ibqp->qp_type == IB_QPT_UD &&
+ !(qp->flags & MLX5_IB_QP_UNDERLAY)) ||
ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
} else if (attr_mask & IB_QP_PATH_MTU) {
@@ -2800,6 +2844,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
qp->port) - 1;
+
+ /* Underlay port should be used - index 0 function per port */
+ if (qp->flags & MLX5_IB_QP_UNDERLAY)
+ port_num = 0;
+
mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
cpu_to_be32((u32)(mibport->cnts.set_id) << 24);
@@ -2825,7 +2874,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
struct mlx5_modify_raw_qp_param raw_qp_param = {};
raw_qp_param.operation = op;
@@ -2914,7 +2964,13 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
}
- if (qp_type != MLX5_IB_QPT_REG_UMR &&
+ if (qp->flags & MLX5_IB_QP_UNDERLAY) {
+ if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) {
+ mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n",
+ attr_mask);
+ goto out;
+ }
+ } else if (qp_type != MLX5_IB_QPT_REG_UMR &&
!ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
cur_state, new_state, ibqp->qp_type, attr_mask);
@@ -4478,9 +4534,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
+ /* Not all of output fields are applicable, make sure to zero them */
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+ memset(qp_attr, 0, sizeof(*qp_attr));
+
mutex_lock(&qp->mutex);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
if (err)
goto out;
@@ -4598,6 +4659,27 @@ static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
}
}
+static int set_delay_drop(struct mlx5_ib_dev *dev)
+{
+ int err = 0;
+
+ mutex_lock(&dev->delay_drop.lock);
+ if (dev->delay_drop.activate)
+ goto out;
+
+ err = mlx5_core_set_delay_drop(dev->mdev, dev->delay_drop.timeout);
+ if (err)
+ goto out;
+
+ dev->delay_drop.activate = true;
+out:
+ mutex_unlock(&dev->delay_drop.lock);
+
+ if (!err)
+ atomic_inc(&dev->delay_drop.rqs_cnt);
+ return err;
+}
+
static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct ib_wq_init_attr *init_attr)
{
@@ -4652,9 +4734,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
}
MLX5_SET(rqc, rqc, scatter_fcs, 1);
}
+ if (init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
+ if (!(dev->ib_dev.attrs.raw_packet_caps &
+ IB_RAW_PACKET_CAP_DELAY_DROP)) {
+ mlx5_ib_dbg(dev, "Delay drop is not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET(rqc, rqc, delay_drop_en, 1);
+ }
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
+ if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
+ err = set_delay_drop(dev);
+ if (err) {
+ mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n",
+ err);
+ mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
+ } else {
+ rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP;
+ }
+ }
out:
kvfree(in);
return err;
@@ -4788,7 +4889,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
err_copy:
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
err_user_rq:
- destroy_user_rq(pd, rwq);
+ destroy_user_rq(dev, pd, rwq);
err:
kfree(rwq);
return ERR_PTR(err);
@@ -4800,7 +4901,7 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq)
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
- destroy_user_rq(wq->pd, rwq);
+ destroy_user_rq(dev, wq->pd, rwq);
kfree(rwq);
return 0;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 43707b101f47..6d5fadad9090 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -101,7 +101,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
udata->inlen - sizeof(ucmd)))
return -EINVAL;
- if (in->type == IB_SRQT_XRC) {
+ if (in->type != IB_SRQT_BASIC) {
err = get_srq_user_index(to_mucontext(pd->uobject->context),
&ucmd, udata->inlen, &uidx);
if (err)
@@ -145,7 +145,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
in->page_offset = offset;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
- in->type == IB_SRQT_XRC)
+ in->type != IB_SRQT_BASIC)
in->user_index = uidx;
return 0;
@@ -196,7 +196,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
}
mlx5_fill_page_array(&srq->buf, in->pas);
- srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
+ srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
err = -ENOMEM;
goto err_in;
@@ -205,7 +205,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
- in->type == IB_SRQT_XRC)
+ in->type != IB_SRQT_BASIC)
in->user_index = MLX5_IB_DEFAULT_UIDX;
return 0;
@@ -230,7 +230,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
{
- kfree(srq->wrid);
+ kvfree(srq->wrid);
mlx5_buf_free(dev->mdev, &srq->buf);
mlx5_db_free(dev->mdev, &srq->db);
}
@@ -292,14 +292,29 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
in.wqe_shift = srq->msrq.wqe_shift - 4;
if (srq->wq_sig)
in.flags |= MLX5_SRQ_FLAG_WQ_SIG;
- if (init_attr->srq_type == IB_SRQT_XRC) {
+
+ if (init_attr->srq_type == IB_SRQT_XRC)
in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
- in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn;
- } else if (init_attr->srq_type == IB_SRQT_BASIC) {
+ else
in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn;
- in.cqn = to_mcq(dev->devr.c0)->mcq.cqn;
+
+ if (init_attr->srq_type == IB_SRQT_TM) {
+ in.tm_log_list_size =
+ ilog2(init_attr->ext.tag_matching.max_num_tags) + 1;
+ if (in.tm_log_list_size >
+ MLX5_CAP_GEN(dev->mdev, log_tag_matching_list_sz)) {
+ mlx5_ib_dbg(dev, "TM SRQ max_num_tags exceeding limit\n");
+ err = -EINVAL;
+ goto err_usr_kern_srq;
+ }
+ in.flags |= MLX5_SRQ_FLAG_RNDV;
}
+ if (ib_srq_has_cq(init_attr->srq_type))
+ in.cqn = to_mcq(init_attr->ext.cq)->mcq.cqn;
+ else
+ in.cqn = to_mcq(dev->devr.c0)->mcq.cqn;
+
in.pd = to_mpd(pd)->pdn;
in.db_record = srq->db.dma;
err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);