summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core
diff options
context:
space:
mode:
authorLinus Torvalds2018-12-27 22:04:52 +0100
committerLinus Torvalds2018-12-27 22:04:52 +0100
commite0c38a4d1f196a4b17d2eba36afff8f656a4f1de (patch)
treeb26a69fabef0160adb127416a9744217700feeb7 /drivers/net/ethernet/mellanox/mlx5/core
parentMerge tag 'modules-for-v4.21' of git://git.kernel.org/pub/scm/linux/kernel/gi... (diff)
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (diff)
downloadkernel-qcow2-linux-e0c38a4d1f196a4b17d2eba36afff8f656a4f1de.tar.gz
kernel-qcow2-linux-e0c38a4d1f196a4b17d2eba36afff8f656a4f1de.tar.xz
kernel-qcow2-linux-e0c38a4d1f196a4b17d2eba36afff8f656a4f1de.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) New ipset extensions for matching on destination MAC addresses, from Stefano Brivio. 2) Add ipv4 ttl and tos, plus ipv6 flow label and hop limit offloads to nfp driver. From Stefano Brivio. 3) Implement GRO for plain UDP sockets, from Paolo Abeni. 4) Lots of work from Michał Mirosław to eliminate the VLAN_TAG_PRESENT bit so that we could support the entire vlan_tci value. 5) Rework the IPSEC policy lookups to better optimize more usecases, from Florian Westphal. 6) Infrastructure changes eliminating direct manipulation of SKB lists wherever possible, and to always use the appropriate SKB list helpers. This work is still ongoing... 7) Lots of PHY driver and state machine improvements and simplifications, from Heiner Kallweit. 8) Various TSO deferral refinements, from Eric Dumazet. 9) Add ntuple filter support to aquantia driver, from Dmitry Bogdanov. 10) Batch dropping of XDP packets in tuntap, from Jason Wang. 11) Lots of cleanups and improvements to the r8169 driver from Heiner Kallweit, including support for ->xmit_more. This driver has been getting some much needed love since he started working on it. 12) Lots of new forwarding selftests from Petr Machata. 13) Enable VXLAN learning in mlxsw driver, from Ido Schimmel. 14) Packed ring support for virtio, from Tiwei Bie. 15) Add new Aquantia AQtion USB driver, from Dmitry Bezrukov. 16) Add XDP support to dpaa2-eth driver, from Ioana Ciocoi Radulescu. 17) Implement coalescing on TCP backlog queue, from Eric Dumazet. 18) Implement carrier change in tun driver, from Nicolas Dichtel. 19) Support msg_zerocopy in UDP, from Willem de Bruijn. 20) Significantly improve garbage collection of neighbor objects when the table has many PERMANENT entries, from David Ahern. 21) Remove egdev usage from nfp and mlx5, and remove the facility completely from the tree as it no longer has any users. From Oz Shlomo and others. 22) Add a NETDEV_PRE_CHANGEADDR so that drivers can veto the change and therefore abort the operation before the commit phase (which is the NETDEV_CHANGEADDR event). From Petr Machata. 23) Add indirect call wrappers to avoid retpoline overhead, and use them in the GRO code paths. From Paolo Abeni. 24) Add support for netlink FDB get operations, from Roopa Prabhu. 25) Support bloom filter in mlxsw driver, from Nir Dotan. 26) Add SKB extension infrastructure. This consolidates the handling of the auxiliary SKB data used by IPSEC and bridge netfilter, and is designed to support the needs to MPTCP which could be integrated in the future. 27) Lots of XDP TX optimizations in mlx5 from Tariq Toukan. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1845 commits) net: dccp: fix kernel crash on module load drivers/net: appletalk/cops: remove redundant if statement and mask bnx2x: Fix NULL pointer dereference in bnx2x_del_all_vlans() on some hw net/net_namespace: Check the return value of register_pernet_subsys() net/netlink_compat: Fix a missing check of nla_parse_nested ieee802154: lowpan_header_create check must check daddr net/mlx4_core: drop useless LIST_HEAD mlxsw: spectrum: drop useless LIST_HEAD net/mlx5e: drop useless LIST_HEAD iptunnel: Set tun_flags in the iptunnel_metadata_reply from src net/mlx5e: fix semicolon.cocci warnings staging: octeon: fix build failure with XFRM enabled net: Revert recent Spectre-v1 patches. can: af_can: Fix Spectre v1 vulnerability packet: validate address length if non-zero nfc: af_nfc: Fix Spectre v1 vulnerability phonet: af_phonet: Fix Spectre v1 vulnerability net: core: Fix Spectre v1 vulnerability net: minor cleanup in skb_ext_add() net: drop the unused helper skb_ext_get() ...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c54
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c162
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h110
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c169
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c634
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c222
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h55
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c231
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c112
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c483
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c766
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c981
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c1260
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c70
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h28
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c283
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c325
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c83
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c285
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c255
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c368
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c716
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c109
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h9
61 files changed, 5161 insertions, 3542 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index d324a3884462..9de9abacf7f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -12,17 +12,17 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
# mlx5 core basic
#
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
- health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
+ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
- fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \
- diag/fs_tracepoint.o diag/fw_tracer.o
+ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
+ lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o
#
# Netdev basic
#
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
- en_selftest.o en/port.o
+ en_selftest.o en/port.o en/monitor_stats.o
#
# Netdev extra
@@ -30,7 +30,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o
#
# Core extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a5a0823e5ada..d3125cdf69db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -40,9 +40,11 @@
#include <linux/random.h>
#include <linux/io-mapping.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
#include <linux/debugfs.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
CMD_IF_REV = 5,
@@ -313,6 +315,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_FPGA_DESTROY_QP:
case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
case MLX5_CMD_OP_DEALLOC_MEMIC:
+ case MLX5_CMD_OP_PAGE_FAULT_RESUME:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -326,7 +329,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_CREATE_MKEY:
case MLX5_CMD_OP_QUERY_MKEY:
case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
- case MLX5_CMD_OP_PAGE_FAULT_RESUME:
case MLX5_CMD_OP_CREATE_EQ:
case MLX5_CMD_OP_QUERY_EQ:
case MLX5_CMD_OP_GEN_EQE:
@@ -371,6 +373,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
case MLX5_CMD_OP_ALLOC_Q_COUNTER:
case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ case MLX5_CMD_OP_SET_MONITOR_COUNTER:
+ case MLX5_CMD_OP_ARM_MONITOR_COUNTER:
case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
case MLX5_CMD_OP_QUERY_RATE_LIMIT:
case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
@@ -520,6 +524,8 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER);
+ MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER);
MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
@@ -805,6 +811,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
return MLX5_GET(mbox_in, in->first.data, opcode);
}
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+
static void cb_timeout_handler(struct work_struct *work)
{
struct delayed_work *dwork = container_of(work, struct delayed_work,
@@ -1412,14 +1420,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
up(&cmd->sem);
}
+static int cmd_comp_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_core_dev *dev;
+ struct mlx5_cmd *cmd;
+ struct mlx5_eqe *eqe;
+
+ cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb);
+ dev = container_of(cmd, struct mlx5_core_dev, cmd);
+ eqe = data;
+
+ mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
+
+ return NOTIFY_OK;
+}
void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
{
+ MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD);
+ mlx5_eq_notifier_register(dev, &dev->cmd.nb);
mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
}
void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
{
mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
+ mlx5_eq_notifier_unregister(dev, &dev->cmd.nb);
}
static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1435,7 +1461,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
}
}
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
{
struct mlx5_cmd *cmd = &dev->cmd;
struct mlx5_cmd_work_ent *ent;
@@ -1533,7 +1559,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
}
}
}
-EXPORT_SYMBOL(mlx5_cmd_comp_handler);
+
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
+{
+ unsigned long flags;
+ u64 vector;
+
+ /* wait for pending handlers to complete */
+ mlx5_eq_synchronize_cmd_irq(dev);
+ spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+ vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
+ if (!vector)
+ goto no_trig;
+
+ vector |= MLX5_TRIGGERED_CMD_COMP;
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+ mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+ mlx5_cmd_comp_handler(dev, vector, true);
+ return;
+
+no_trig:
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
static int status_to_err(u8 status)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 4b85abb5c9f7..713a17ee3751 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -38,6 +38,7 @@
#include <rdma/ib_verbs.h>
#include <linux/mlx5/cq.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
@@ -92,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
- struct mlx5_eq *eq;
+ struct mlx5_eq_comp *eq;
int err;
- eq = mlx5_eqn2eq(dev, eqn);
+ eq = mlx5_eqn2comp_eq(dev, eqn);
if (IS_ERR(eq))
return PTR_ERR(eq);
@@ -119,12 +120,12 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
INIT_LIST_HEAD(&cq->tasklet_ctx.list);
/* Add to comp EQ CQ tree to recv comp events */
- err = mlx5_eq_add_cq(eq, cq);
+ err = mlx5_eq_add_cq(&eq->core, cq);
if (err)
goto err_cmd;
/* Add to async EQ CQ tree to recv async events */
- err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+ err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq);
if (err)
goto err_cq_add;
@@ -139,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
return 0;
err_cq_add:
- mlx5_eq_del_cq(eq, cq);
+ mlx5_eq_del_cq(&eq->core, cq);
err_cmd:
memset(din, 0, sizeof(din));
memset(dout, 0, sizeof(dout));
@@ -157,11 +158,11 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
int err;
- err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+ err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
if (err)
return err;
- err = mlx5_eq_del_cq(cq->eq, cq);
+ err = mlx5_eq_del_cq(&cq->eq->core, cq);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 90fabd612b6c..a11e22d0b0cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -36,6 +36,7 @@
#include <linux/mlx5/cq.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
QP_PID,
@@ -349,6 +350,16 @@ out:
return param;
}
+static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
+
+ MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+ MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
int index)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 37ba7c78859d..ebc046fa97d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -45,75 +45,11 @@ struct mlx5_device_context {
unsigned long state;
};
-struct mlx5_delayed_event {
- struct list_head list;
- struct mlx5_core_dev *dev;
- enum mlx5_dev_event event;
- unsigned long param;
-};
-
enum {
MLX5_INTERFACE_ADDED,
MLX5_INTERFACE_ATTACHED,
};
-static void add_delayed_event(struct mlx5_priv *priv,
- struct mlx5_core_dev *dev,
- enum mlx5_dev_event event,
- unsigned long param)
-{
- struct mlx5_delayed_event *delayed_event;
-
- delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC);
- if (!delayed_event) {
- mlx5_core_err(dev, "event %d is missed\n", event);
- return;
- }
-
- mlx5_core_dbg(dev, "Accumulating event %d\n", event);
- delayed_event->dev = dev;
- delayed_event->event = event;
- delayed_event->param = param;
- list_add_tail(&delayed_event->list, &priv->waiting_events_list);
-}
-
-static void delayed_event_release(struct mlx5_device_context *dev_ctx,
- struct mlx5_priv *priv)
-{
- struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
- struct mlx5_delayed_event *de;
- struct mlx5_delayed_event *n;
- struct list_head temp;
-
- INIT_LIST_HEAD(&temp);
-
- spin_lock_irq(&priv->ctx_lock);
-
- priv->is_accum_events = false;
- list_splice_init(&priv->waiting_events_list, &temp);
- if (!dev_ctx->context)
- goto out;
- list_for_each_entry_safe(de, n, &temp, list)
- dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
-
-out:
- spin_unlock_irq(&priv->ctx_lock);
-
- list_for_each_entry_safe(de, n, &temp, list) {
- list_del(&de->list);
- kfree(de);
- }
-}
-
-/* accumulating events that can come after mlx5_ib calls to
- * ib_register_device, till adding that interface to the events list.
- */
-static void delayed_event_start(struct mlx5_priv *priv)
-{
- spin_lock_irq(&priv->ctx_lock);
- priv->is_accum_events = true;
- spin_unlock_irq(&priv->ctx_lock);
-}
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
@@ -129,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
dev_ctx->intf = intf;
- delayed_event_start(priv);
-
dev_ctx->context = intf->add(dev);
if (dev_ctx->context) {
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
@@ -139,22 +73,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (dev_ctx->intf->pfault) {
- if (priv->pfault) {
- mlx5_core_err(dev, "multiple page fault handlers not supported");
- } else {
- priv->pfault_ctx = dev_ctx->context;
- priv->pfault = dev_ctx->intf->pfault;
- }
- }
-#endif
spin_unlock_irq(&priv->ctx_lock);
}
- delayed_event_release(dev_ctx, priv);
-
if (!dev_ctx->context)
kfree(dev_ctx);
}
@@ -179,15 +100,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
if (!dev_ctx)
return;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- spin_lock_irq(&priv->ctx_lock);
- if (priv->pfault == dev_ctx->intf->pfault)
- priv->pfault = NULL;
- spin_unlock_irq(&priv->ctx_lock);
-
- synchronize_srcu(&priv->pfault_srcu);
-#endif
-
spin_lock_irq(&priv->ctx_lock);
list_del(&dev_ctx->list);
spin_unlock_irq(&priv->ctx_lock);
@@ -207,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv
if (!dev_ctx)
return;
- delayed_event_start(priv);
if (intf->attach) {
if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
- goto out;
+ return;
if (intf->attach(dev, dev_ctx->context))
- goto out;
-
+ return;
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
} else {
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
- goto out;
+ return;
dev_ctx->context = intf->add(dev);
if (!dev_ctx->context)
- goto out;
-
+ return;
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
}
-
-out:
- delayed_event_release(dev_ctx, priv);
}
void mlx5_attach_device(struct mlx5_core_dev *dev)
@@ -350,28 +256,6 @@ void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
mutex_unlock(&mlx5_intf_mutex);
}
-void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
-{
- struct mlx5_priv *priv = &mdev->priv;
- struct mlx5_device_context *dev_ctx;
- unsigned long flags;
- void *result = NULL;
-
- spin_lock_irqsave(&priv->ctx_lock, flags);
-
- list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
- if ((dev_ctx->intf->protocol == protocol) &&
- dev_ctx->intf->get_dev) {
- result = dev_ctx->intf->get_dev(dev_ctx->context);
- break;
- }
-
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
-
- return result;
-}
-EXPORT_SYMBOL(mlx5_get_protocol_dev);
-
/* Must be called with intf_mutex held */
void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
{
@@ -422,44 +306,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
return res;
}
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
- unsigned long param)
-{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_device_context *dev_ctx;
- unsigned long flags;
-
- spin_lock_irqsave(&priv->ctx_lock, flags);
-
- if (priv->is_accum_events)
- add_delayed_event(priv, dev, event, param);
-
- /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
- * still in priv->ctx_list. In this case, only notify the dev_ctx if its
- * ADDED or ATTACHED bit are set.
- */
- list_for_each_entry(dev_ctx, &priv->ctx_list, list)
- if (dev_ctx->intf->event &&
- (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
- test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
- dev_ctx->intf->event(dev, dev_ctx->context, event, param);
-
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
-}
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
- struct mlx5_pagefault *pfault)
-{
- struct mlx5_priv *priv = &dev->priv;
- int srcu_idx;
-
- srcu_idx = srcu_read_lock(&priv->pfault_srcu);
- if (priv->pfault)
- priv->pfault(dev, priv->pfault_ctx, pfault);
- srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
-}
-#endif
void mlx5_dev_list_lock(void)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 0f11fff32a9b..424457ff9759 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -161,10 +161,10 @@ static void print_misc_parameters_hdrs(struct trace_seq *p,
PRINT_MASKED_VAL(name, p, format); \
}
DECLARE_MASK_VAL(u64, gre_key) = {
- .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 |
- MLX5_GET(fte_match_set_misc, mask, gre_key_l),
- .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 |
- MLX5_GET(fte_match_set_misc, value, gre_key_l)};
+ .m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 |
+ MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo),
+ .v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 |
+ MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)};
PRINT_MASKED_VAL(gre_key, p, "%llu");
PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index d4ec93bde4de..6999f4486e9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
#define CREATE_TRACE_POINTS
+#include "lib/eq.h"
#include "fw_tracer.h"
#include "fw_tracer_tracepoint.h"
@@ -846,9 +847,9 @@ free_tracer:
return ERR_PTR(err);
}
-/* Create HW resources + start tracer
- * must be called before Async EQ is created
- */
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data);
+
+/* Create HW resources + start tracer */
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
{
struct mlx5_core_dev *dev;
@@ -874,6 +875,9 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
goto err_dealloc_pd;
}
+ MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
+ mlx5_eq_notifier_register(dev, &tracer->nb);
+
mlx5_fw_tracer_start(tracer);
return 0;
@@ -883,9 +887,7 @@ err_dealloc_pd:
return err;
}
-/* Stop tracer + Cleanup HW resources
- * must be called after Async EQ is destroyed
- */
+/* Stop tracer + Cleanup HW resources */
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
{
if (IS_ERR_OR_NULL(tracer))
@@ -893,7 +895,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
tracer->owner);
-
+ mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb);
cancel_work_sync(&tracer->ownership_change_work);
cancel_work_sync(&tracer->handle_traces_work);
@@ -922,12 +924,11 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
kfree(tracer);
}
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
{
- struct mlx5_fw_tracer *tracer = dev->tracer;
-
- if (!tracer)
- return;
+ struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct mlx5_eqe *eqe = data;
switch (eqe->sub_type) {
case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
@@ -942,6 +943,8 @@ void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
eqe->sub_type);
}
+
+ return NOTIFY_OK;
}
EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 0347f2dd5cee..a8b8747f2b61 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -55,6 +55,7 @@
struct mlx5_fw_tracer {
struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
bool owner;
u8 trc_ver;
struct workqueue_struct *work_queue;
@@ -170,6 +171,5 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 118324802926..8fa8fdd30b85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -49,6 +49,7 @@
#include <net/switchdev.h>
#include <net/xdp.h>
#include <linux/net_dim.h>
+#include <linux/bits.h>
#include "wq.h"
#include "mlx5_core.h"
#include "en_stats.h"
@@ -147,9 +148,6 @@ struct page_pool;
MLX5_UMR_MTT_ALIGNMENT))
#define MLX5E_UMR_WQEBBS \
(DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
-#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS
-
-#define MLX5E_NUM_MAIN_GROUPS 9
#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
@@ -178,8 +176,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
{
return is_kdump_kernel() ?
MLX5E_MIN_NUM_CHANNELS :
- min_t(int, mdev->priv.eq_table.num_comp_vectors,
- MLX5E_MAX_NUM_CHANNELS);
+ min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
}
/* Use this function to get max num channels after netdev was created */
@@ -214,22 +211,24 @@ struct mlx5e_umr_wqe {
extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
enum mlx5e_priv_flag {
- MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
- MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1),
- MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2),
- MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3),
- MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4),
+ MLX5E_PFLAG_RX_CQE_BASED_MODER,
+ MLX5E_PFLAG_TX_CQE_BASED_MODER,
+ MLX5E_PFLAG_RX_CQE_COMPRESS,
+ MLX5E_PFLAG_RX_STRIDING_RQ,
+ MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
+ MLX5E_PFLAG_XDP_TX_MPWQE,
+ MLX5E_NUM_PFLAGS, /* Keep last */
};
#define MLX5E_SET_PFLAG(params, pflag, enable) \
do { \
if (enable) \
- (params)->pflags |= (pflag); \
+ (params)->pflags |= BIT(pflag); \
else \
- (params)->pflags &= ~(pflag); \
+ (params)->pflags &= ~(BIT(pflag)); \
} while (0)
-#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag)))
+#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
#ifdef CONFIG_MLX5_CORE_EN_DCB
#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
@@ -247,9 +246,6 @@ struct mlx5e_params {
bool lro_en;
u32 lro_wqe_sz;
u8 tx_min_inline_mode;
- u8 rss_hfunc;
- u8 toeplitz_hash_key[40];
- u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
bool vlan_strip_disable;
bool scatter_fcs_en;
bool rx_dim_enabled;
@@ -349,7 +345,6 @@ enum {
MLX5E_SQ_STATE_IPSEC,
MLX5E_SQ_STATE_AM,
MLX5E_SQ_STATE_TLS,
- MLX5E_SQ_STATE_REDIRECT,
};
struct mlx5e_sq_wqe_info {
@@ -410,24 +405,51 @@ struct mlx5e_xdp_info {
struct mlx5e_dma_info di;
};
+struct mlx5e_xdp_info_fifo {
+ struct mlx5e_xdp_info *xi;
+ u32 *cc;
+ u32 *pc;
+ u32 mask;
+};
+
+struct mlx5e_xdp_wqe_info {
+ u8 num_wqebbs;
+ u8 num_ds;
+};
+
+struct mlx5e_xdp_mpwqe {
+ /* Current MPWQE session */
+ struct mlx5e_tx_wqe *wqe;
+ u8 ds_count;
+ u8 max_ds_count;
+};
+
+struct mlx5e_xdpsq;
+typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*,
+ struct mlx5e_xdp_info*);
struct mlx5e_xdpsq {
/* data path */
/* dirtied @completion */
+ u32 xdpi_fifo_cc;
u16 cc;
bool redirect_flush;
/* dirtied @xmit */
- u16 pc ____cacheline_aligned_in_smp;
- bool doorbell;
+ u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
+ u16 pc;
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg;
+ struct mlx5e_xdp_mpwqe mpwqe;
struct mlx5e_cq cq;
/* read only */
struct mlx5_wq_cyc wq;
struct mlx5e_xdpsq_stats *stats;
+ mlx5e_fp_xmit_xdp_frame xmit_xdp_frame;
struct {
- struct mlx5e_xdp_info *xdpi;
+ struct mlx5e_xdp_wqe_info *wqe_info;
+ struct mlx5e_xdp_info_fifo xdpi_fifo;
} db;
void __iomem *uar_map;
u32 sqn;
@@ -633,7 +655,6 @@ struct mlx5e_channel_stats {
} ____cacheline_aligned_in_smp;
enum {
- MLX5E_STATE_ASYNC_EVENTS_ENABLED,
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
};
@@ -654,6 +675,13 @@ enum {
MLX5E_NIC_PRIO
};
+struct mlx5e_rss_params {
+ u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
+ u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+ u8 toeplitz_hash_key[40];
+ u8 hfunc;
+};
+
struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -674,6 +702,7 @@ struct mlx5e_priv {
struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS];
+ struct mlx5e_rss_params rss_params;
u32 tx_rates[MLX5E_MAX_NUM_SQS];
struct mlx5e_flow_steering fs;
@@ -683,6 +712,8 @@ struct mlx5e_priv {
struct work_struct set_rx_mode_work;
struct work_struct tx_timeout_work;
struct work_struct update_stats_work;
+ struct work_struct monitor_counters_work;
+ struct mlx5_nb monitor_counters_nb;
struct mlx5_core_dev *mdev;
struct net_device *netdev;
@@ -692,6 +723,8 @@ struct mlx5e_priv {
struct hwtstamp_config tstamp;
u16 q_counter;
u16 drop_rq_q_counter;
+ struct notifier_block events_nb;
+
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx dcbx;
#endif
@@ -769,6 +802,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
void mlx5e_update_stats(struct mlx5e_priv *priv);
+void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
int mlx5e_self_test_num(struct mlx5e_priv *priv);
@@ -799,9 +833,11 @@ struct mlx5e_redirect_rqt_param {
int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
struct mlx5e_redirect_rqt_param rrp);
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
- enum mlx5e_traffic_types tt,
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
+ const struct mlx5e_tirc_config *ttconfig,
void *tirc, bool inner);
+void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen);
+struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
int mlx5e_open_locked(struct net_device *netdev);
int mlx5e_close_locked(struct net_device *netdev);
@@ -931,14 +967,16 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
int mlx5e_create_tises(struct mlx5e_priv *priv);
-void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
+void mlx5e_update_carrier(struct mlx5e_priv *priv);
int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
+void mlx5e_update_ndo_stats(struct mlx5e_priv *priv);
void mlx5e_queue_update_stats(struct mlx5e_priv *priv);
int mlx5e_bits_invert(unsigned long a, int size);
typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv);
+int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv);
int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
change_hw_mtu_cb set_mtu_cb);
@@ -962,12 +1000,20 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal);
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal);
+int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
+ struct ethtool_link_ksettings *link_ksettings);
+int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
+ const struct ethtool_link_ksettings *link_ksettings);
u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv);
u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
struct ethtool_ts_info *info);
int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
struct ethtool_flash *flash);
+void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam);
+int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam);
/* mlx5e generic netdev management API */
int mlx5e_netdev_init(struct net_device *netdev,
@@ -983,12 +1029,26 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv);
void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
u16 max_channels, u16 mtu);
void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
-void mlx5e_build_rss_params(struct mlx5e_params *params);
+void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
+ u16 num_channels);
u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
void mlx5e_rx_dim_work(struct work_struct *work);
void mlx5e_tx_dim_work(struct work_struct *work);
+
+void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti);
+void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti);
+netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+ struct net_device *netdev,
+ netdev_features_t features);
+#ifdef CONFIG_MLX5_ESWITCH
+int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac);
+int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate);
+int mlx5e_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi);
+int mlx5e_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats);
+#endif
#endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 1431232c9a09..be5961ff24cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -73,6 +73,22 @@ enum mlx5e_traffic_types {
MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
};
+struct mlx5e_tirc_config {
+ u8 l3_prot_type;
+ u8 l4_prot_type;
+ u32 rx_hash_fields;
+};
+
+#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP)
+#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_L4_SPORT |\
+ MLX5_HASH_FIELD_SEL_L4_DPORT)
+#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_IPSEC_SPI)
+
enum mlx5e_tunnel_types {
MLX5E_TT_IPV4_GRE,
MLX5E_TT_IPV6_GRE,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
new file mode 100644
index 000000000000..2ce420851e77
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include "en.h"
+#include "monitor_stats.h"
+#include "lib/eq.h"
+
+/* Driver will set the following watch counters list:
+ * Ppcnt.802_3:
+ * a_in_range_length_errors Type: 0x0, Counter: 0x0, group_id = N/A
+ * a_out_of_range_length_field Type: 0x0, Counter: 0x1, group_id = N/A
+ * a_frame_too_long_errors Type: 0x0, Counter: 0x2, group_id = N/A
+ * a_frame_check_sequence_errors Type: 0x0, Counter: 0x3, group_id = N/A
+ * a_alignment_errors Type: 0x0, Counter: 0x4, group_id = N/A
+ * if_out_discards Type: 0x0, Counter: 0x5, group_id = N/A
+ * Q_Counters:
+ * Q[index].rx_out_of_buffer Type: 0x1, Counter: 0x4, group_id = counter_ix
+ */
+
+#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1
+#define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1
+
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters))
+ return false;
+ if (MLX5_CAP_PCAM_REG(mdev, ppcnt) &&
+ MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters) <
+ NUM_REQ_PPCNT_COUNTER_S1)
+ return false;
+ if (MLX5_CAP_GEN(mdev, num_q_monitor_counters) <
+ NUM_REQ_Q_COUNTERS_S1)
+ return false;
+ return true;
+}
+
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(arm_monitor_counter_out)] = {};
+
+ MLX5_SET(arm_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_ARM_MONITOR_COUNTER);
+ mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static void mlx5e_monitor_counters_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ monitor_counters_work);
+
+ mutex_lock(&priv->state_lock);
+ mlx5e_update_ndo_stats(priv);
+ mutex_unlock(&priv->state_lock);
+ mlx5e_monitor_counter_arm(priv);
+}
+
+static int mlx5e_monitor_event_handler(struct notifier_block *nb,
+ unsigned long event, void *eqe)
+{
+ struct mlx5e_priv *priv = mlx5_nb_cof(nb, struct mlx5e_priv,
+ monitor_counters_nb);
+ queue_work(priv->wq, &priv->monitor_counters_work);
+ return NOTIFY_OK;
+}
+
+void mlx5e_monitor_counter_start(struct mlx5e_priv *priv)
+{
+ MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
+ MONITOR_COUNTER);
+ mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
+}
+
+static void mlx5e_monitor_counter_stop(struct mlx5e_priv *priv)
+{
+ mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
+ cancel_work_sync(&priv->monitor_counters_work);
+}
+
+static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in)
+{
+ enum mlx5_monitor_counter_ppcnt ppcnt_cnt;
+
+ for (ppcnt_cnt = 0;
+ ppcnt_cnt < NUM_REQ_PPCNT_COUNTER_S1;
+ ppcnt_cnt++, cnt++) {
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].type,
+ MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter,
+ ppcnt_cnt);
+ }
+ return ppcnt_cnt;
+}
+
+static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in)
+{
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].type,
+ MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter,
+ MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter_group_id,
+ q_counter);
+ return 1;
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters);
+ int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters);
+ int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 :
+ MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters);
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+ int q_counter = priv->q_counter;
+ int cnt = 0;
+
+ if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 &&
+ max_num_of_counters >= (NUM_REQ_PPCNT_COUNTER_S1 + cnt))
+ cnt += fill_monitor_counter_ppcnt_set1(cnt, in);
+
+ if (num_q_counters >= NUM_REQ_Q_COUNTERS_S1 &&
+ max_num_of_counters >= (NUM_REQ_Q_COUNTERS_S1 + cnt) &&
+ q_counter)
+ cnt += fill_monitor_counter_q_counter_set1(cnt, q_counter, in);
+
+ MLX5_SET(set_monitor_counter_in, in, num_of_counters, cnt);
+ MLX5_SET(set_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv)
+{
+ INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work);
+ mlx5e_monitor_counter_start(priv);
+ mlx5e_set_monitor_counter(priv);
+ mlx5e_monitor_counter_arm(priv);
+ queue_work(priv->wq, &priv->update_stats_work);
+}
+
+static void mlx5e_monitor_counter_disable(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+
+ MLX5_SET(set_monitor_counter_in, in, num_of_counters, 0);
+ MLX5_SET(set_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+ mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
+{
+ mlx5e_monitor_counter_disable(priv);
+ mlx5e_monitor_counter_stop(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h
new file mode 100644
index 000000000000..e1ac4b3d22fb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_MONITOR_H__
+#define __MLX5_MONITOR_H__
+
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv);
+
+#endif /* __MLX5_MONITOR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
new file mode 100644
index 000000000000..046948ead152
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -0,0 +1,634 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/vxlan.h>
+#include <net/gre.h>
+#include "lib/vxlan.h"
+#include "en/tc_tun.h"
+
+static int get_route_and_out_devs(struct mlx5e_priv *priv,
+ struct net_device *dev,
+ struct net_device **route_dev,
+ struct net_device **out_dev)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct net_device *uplink_dev, *uplink_upper;
+ bool dst_is_lag_dev;
+
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ uplink_upper = netdev_master_upper_dev_get(uplink_dev);
+ dst_is_lag_dev = (uplink_upper &&
+ netif_is_lag_master(uplink_upper) &&
+ dev == uplink_upper &&
+ mlx5_lag_is_sriov(priv->mdev));
+
+ /* if the egress device isn't on the same HW e-switch or
+ * it's a LAG device, use the uplink
+ */
+ if (!switchdev_port_same_parent_id(priv->netdev, dev) ||
+ dst_is_lag_dev) {
+ *route_dev = uplink_dev;
+ *out_dev = *route_dev;
+ } else {
+ *route_dev = dev;
+ if (is_vlan_dev(*route_dev))
+ *out_dev = uplink_dev;
+ else if (mlx5e_eswitch_rep(dev))
+ *out_dev = *route_dev;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct net_device **route_dev,
+ struct flowi4 *fl4,
+ struct neighbour **out_n,
+ u8 *out_ttl)
+{
+ struct rtable *rt;
+ struct neighbour *n = NULL;
+
+#if IS_ENABLED(CONFIG_INET)
+ int ret;
+
+ rt = ip_route_output_key(dev_net(mirred_dev), fl4);
+ ret = PTR_ERR_OR_ZERO(rt);
+ if (ret)
+ return ret;
+#else
+ return -EOPNOTSUPP;
+#endif
+
+ ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
+ if (ret < 0)
+ return ret;
+
+ if (!(*out_ttl))
+ *out_ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
+ ip_rt_put(rt);
+ if (!n)
+ return -ENOMEM;
+
+ *out_n = n;
+ return 0;
+}
+
+static const char *mlx5e_netdev_kind(struct net_device *dev)
+{
+ if (dev->rtnl_link_ops)
+ return dev->rtnl_link_ops->kind;
+ else
+ return "";
+}
+
+static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct net_device **route_dev,
+ struct flowi6 *fl6,
+ struct neighbour **out_n,
+ u8 *out_ttl)
+{
+ struct neighbour *n = NULL;
+ struct dst_entry *dst;
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ int ret;
+
+ ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
+ fl6);
+ if (ret < 0)
+ return ret;
+
+ if (!(*out_ttl))
+ *out_ttl = ip6_dst_hoplimit(dst);
+
+ ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
+ if (ret < 0)
+ return ret;
+#else
+ return -EOPNOTSUPP;
+#endif
+
+ n = dst_neigh_lookup(dst, &fl6->daddr);
+ dst_release(dst);
+ if (!n)
+ return -ENOMEM;
+
+ *out_n = n;
+ return 0;
+}
+
+static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key)
+{
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct vxlanhdr *vxh = (struct vxlanhdr *)
+ ((char *)udp + sizeof(struct udphdr));
+
+ udp->dest = tun_key->tp_dst;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(tun_id);
+
+ return 0;
+}
+
+static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key)
+{
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ int hdr_len;
+ struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
+
+ /* the HW does not calculate GRE csum or sequences */
+ if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
+ return -EOPNOTSUPP;
+
+ greh->protocol = htons(ETH_P_TEB);
+
+ /* GRE key */
+ hdr_len = gre_calc_hlen(tun_key->tun_flags);
+ greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
+ if (tun_key->tun_flags & TUNNEL_KEY) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+
+ *ptr = tun_id;
+ }
+
+ return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ int err = 0;
+ struct ip_tunnel_key *key = &e->tun_info.key;
+
+ if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ *ip_proto = IPPROTO_UDP;
+ err = mlx5e_gen_vxlan_header(buf, key);
+ } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
+ *ip_proto = IPPROTO_GRE;
+ err = mlx5e_gen_gre_header(buf, key);
+ } else {
+ pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n"
+ , e->tunnel_type);
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
+ struct mlx5e_encap_entry *e,
+ u16 proto)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+ char *ip;
+
+ ether_addr_copy(eth->h_dest, e->h_dest);
+ ether_addr_copy(eth->h_source, dev->dev_addr);
+ if (is_vlan_dev(dev)) {
+ struct vlan_hdr *vlan = (struct vlan_hdr *)
+ ((char *)eth + ETH_HLEN);
+ ip = (char *)vlan + VLAN_HLEN;
+ eth->h_proto = vlan_dev_vlan_proto(dev);
+ vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
+ vlan->h_vlan_encapsulated_proto = htons(proto);
+ } else {
+ eth->h_proto = htons(proto);
+ ip = (char *)eth + ETH_HLEN;
+ }
+
+ return ip;
+}
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ struct net_device *out_dev, *route_dev;
+ struct neighbour *n = NULL;
+ struct flowi4 fl4 = {};
+ int ipv4_encap_size;
+ char *encap_header;
+ u8 nud_state, ttl;
+ struct iphdr *ip;
+ int err;
+
+ /* add the IP fields */
+ fl4.flowi4_tos = tun_key->tos;
+ fl4.daddr = tun_key->u.ipv4.dst;
+ fl4.saddr = tun_key->u.ipv4.src;
+ ttl = tun_key->ttl;
+
+ err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev,
+ &fl4, &n, &ttl);
+ if (err)
+ return err;
+
+ ipv4_encap_size =
+ (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct iphdr) +
+ e->tunnel_hlen;
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ return -EOPNOTSUPP;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header)
+ return -ENOMEM;
+
+ /* used by mlx5e_detach_encap to lookup a neigh hash table
+ * entry in the neigh hash table when a user deletes a rule
+ */
+ e->m_neigh.dev = n->dev;
+ e->m_neigh.family = n->ops->family;
+ memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+ e->out_dev = out_dev;
+
+ /* It's important to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(e->h_dest, n->ha);
+ read_unlock_bh(&n->lock);
+
+ /* add ethernet header */
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ETH_P_IP);
+
+ /* add ip header */
+ ip->tos = tun_key->tos;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->ttl = ttl;
+ ip->daddr = fl4.daddr;
+ ip->saddr = fl4.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
+ &ip->protocol, e);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->encap_size = ipv4_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ err = -EAGAIN;
+ goto out;
+ }
+
+ err = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv4_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+ neigh_release(n);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+out:
+ if (n)
+ neigh_release(n);
+ return err;
+}
+
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ struct net_device *out_dev, *route_dev;
+ struct neighbour *n = NULL;
+ struct flowi6 fl6 = {};
+ struct ipv6hdr *ip6h;
+ int ipv6_encap_size;
+ char *encap_header;
+ u8 nud_state, ttl;
+ int err;
+
+ ttl = tun_key->ttl;
+
+ fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ fl6.daddr = tun_key->u.ipv6.dst;
+ fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev,
+ &fl6, &n, &ttl);
+ if (err)
+ return err;
+
+ ipv6_encap_size =
+ (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct ipv6hdr) +
+ e->tunnel_hlen;
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ return -EOPNOTSUPP;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header)
+ return -ENOMEM;
+
+ /* used by mlx5e_detach_encap to lookup a neigh hash table
+ * entry in the neigh hash table when a user deletes a rule
+ */
+ e->m_neigh.dev = n->dev;
+ e->m_neigh.family = n->ops->family;
+ memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+ e->out_dev = out_dev;
+
+ /* It's importent to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(e->h_dest, n->ha);
+ read_unlock_bh(&n->lock);
+
+ /* add ethernet header */
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ETH_P_IPV6);
+
+ /* add ip header */
+ ip6_flow_hdr(ip6h, tun_key->tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = fl6.daddr;
+ ip6h->saddr = fl6.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
+ &ip6h->nexthdr, e);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->encap_size = ipv6_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ err = -EAGAIN;
+ goto out;
+ }
+
+ err = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv6_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+ neigh_release(n);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+out:
+ if (n)
+ neigh_release(n);
+ return err;
+}
+
+int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev)
+{
+ if (netif_is_vxlan(tunnel_dev))
+ return MLX5E_TC_TUNNEL_TYPE_VXLAN;
+ else if (netif_is_gretap(tunnel_dev) ||
+ netif_is_ip6gretap(tunnel_dev))
+ return MLX5E_TC_TUNNEL_TYPE_GRETAP;
+ else
+ return MLX5E_TC_TUNNEL_TYPE_UNKNOWN;
+}
+
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+ struct net_device *netdev)
+{
+ int tunnel_type = mlx5e_tc_tun_get_type(netdev);
+
+ if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN &&
+ MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
+ return true;
+ else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP &&
+ MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap))
+ return true;
+ else
+ return false;
+}
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev);
+
+ if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ int dst_port = be16_to_cpu(e->tun_info.key.tp_dst);
+
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vxlan udp dport was not registered with the HW");
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n",
+ dst_port);
+ return -EOPNOTSUPP;
+ }
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+ e->tunnel_hlen = VXLAN_HLEN;
+ } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
+ e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags);
+ } else {
+ e->reformat_type = -1;
+ e->tunnel_hlen = -1;
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_dissector_key_ports *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ f->key);
+ struct flow_dissector_key_ports *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ f->mask);
+ void *misc_c = MLX5_ADDR_OF(fte_match_param,
+ spec->match_criteria,
+ misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param,
+ spec->match_value,
+ misc_parameters);
+
+ /* Full udp dst port must be given */
+ if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) ||
+ memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "VXLAN decap filter must include enc_dst_port condition");
+ netdev_warn(priv->netdev,
+ "VXLAN decap filter must include enc_dst_port condition\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* udp dst port must be knonwn as a VXLAN port */
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matched UDP port is not registered as a VXLAN port");
+ netdev_warn(priv->netdev,
+ "UDP port %d is not registered as a VXLAN port\n",
+ be16_to_cpu(key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ /* dst UDP port is valid here */
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(key->src));
+
+ /* match on VNI */
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->key);
+ struct flow_dissector_key_keyid *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->mask);
+ MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+ be32_to_cpu(mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+ be32_to_cpu(key->keyid));
+ }
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *outer_headers_c,
+ void *outer_headers_v)
+{
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "GRE HW offloading is not supported");
+ netdev_warn(priv->netdev, "GRE HW offloading is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ ip_protocol, IPPROTO_GRE);
+
+ /* gre protocol*/
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
+ MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
+
+ /* gre key */
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *mask = NULL;
+ struct flow_dissector_key_keyid *key = NULL;
+
+ mask = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->mask);
+ MLX5_SET(fte_match_set_misc, misc_c,
+ gre_key.key, be32_to_cpu(mask->keyid));
+
+ key = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->key);
+ MLX5_SET(fte_match_set_misc, misc_v,
+ gre_key.key, be32_to_cpu(key->keyid));
+ }
+
+ return 0;
+}
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int tunnel_type;
+ int err = 0;
+
+ tunnel_type = mlx5e_tc_tun_get_type(filter_dev);
+ if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ err = mlx5e_tc_tun_parse_vxlan(priv, spec, f,
+ headers_c, headers_v);
+ } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
+ err = mlx5e_tc_tun_parse_gretap(priv, spec, f,
+ headers_c, headers_v);
+ } else {
+ netdev_warn(priv->netdev,
+ "decapsulation offload is not supported for %s net device (%d)\n",
+ mlx5e_netdev_kind(filter_dev), tunnel_type);
+ return -EOPNOTSUPP;
+ }
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
new file mode 100644
index 000000000000..706ce7bf15e7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_TUNNEL_H__
+#define __MLX5_EN_TC_TUNNEL_H__
+
+#include <linux/netdevice.h>
+#include <linux/mlx5/fs.h>
+#include <net/pkt_cls.h>
+#include <linux/netlink.h>
+#include "en.h"
+#include "en_rep.h"
+
+enum {
+ MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
+ MLX5E_TC_TUNNEL_TYPE_VXLAN,
+ MLX5E_TC_TUNNEL_TYPE_GRETAP
+};
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack);
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+
+int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev);
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+ struct net_device *netdev);
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *headers_c,
+ void *headers_v);
+
+#endif //__MLX5_EN_TC_TUNNEL_H__
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index ad6d471d00dd..3740177eed09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -47,7 +47,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
xdpi.xdpf->len, PCI_DMA_TODEVICE);
xdpi.di = *di;
- return mlx5e_xmit_xdp_frame(sq, &xdpi);
+ return sq->xmit_xdp_frame(sq, &xdpi);
}
/* returns true if packet was consumed by xdp */
@@ -102,7 +102,98 @@ xdp_abort:
}
}
-bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u8 wqebbs;
+ u16 pi;
+
+ mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
+
+ prefetchw(session->wqe->data);
+ session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
+ * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
+ * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
+ * full-session WQE be cache-aligned.
+ */
+#if L1_CACHE_BYTES < 128
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
+#else
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
+#endif
+
+ wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi),
+ MLX5E_XDP_MPW_MAX_WQEBBS);
+
+ session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;
+}
+
+static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
+ u16 ds_count = session->ds_count;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+
+ cseg->opmod_idx_opcode =
+ cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+ wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
+ wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT;
+
+ sq->pc += wi->num_wqebbs;
+
+ sq->doorbell_cseg = cseg;
+
+ session->wqe = NULL; /* Close session */
+}
+
+static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_info *xdpi)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+ dma_addr_t dma_addr = xdpi->dma_addr;
+ struct xdp_frame *xdpf = xdpi->xdpf;
+ unsigned int dma_len = xdpf->len;
+
+ if (unlikely(sq->hw_mtu < dma_len)) {
+ stats->err++;
+ return false;
+ }
+
+ if (unlikely(!session->wqe)) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+ MLX5_SEND_WQE_MAX_WQEBBS))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ stats->full++;
+ return false;
+ }
+
+ mlx5e_xdp_mpwqe_session_start(sq);
+ }
+
+ mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len);
+
+ if (unlikely(session->ds_count == session->max_ds_count))
+ mlx5e_xdp_mpwqe_complete(sq);
+
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+ stats->xmit++;
+ return true;
+}
+
+static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
{
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -126,11 +217,8 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
}
if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
- if (sq->doorbell) {
- /* SQ is full, ring doorbell */
- mlx5e_xmit_xdp_doorbell(sq);
- sq->doorbell = false;
- }
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
stats->full++;
return false;
}
@@ -152,23 +240,20 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
- /* move page to reference to sq responsibility,
- * and mark so it's not put back in page-cache.
- */
- sq->db.xdpi[pi] = *xdpi;
sq->pc++;
- sq->doorbell = true;
+ sq->doorbell_cseg = cseg;
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
stats->xmit++;
return true;
}
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
{
+ struct mlx5e_xdp_info_fifo *xdpi_fifo;
struct mlx5e_xdpsq *sq;
struct mlx5_cqe64 *cqe;
- struct mlx5e_rq *rq;
bool is_redirect;
u16 sqcc;
int i;
@@ -182,8 +267,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
if (!cqe)
return false;
- is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
- rq = container_of(sq, struct mlx5e_rq, xdpsq);
+ is_redirect = !rq;
+ xdpi_fifo = &sq->db.xdpi_fifo;
/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
* otherwise a cq overrun may occur
@@ -199,20 +284,33 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
+ netdev_WARN_ONCE(sq->channel->netdev,
+ "Bad OP in XDPSQ CQE: 0x%x\n",
+ get_cqe_opcode(cqe));
+
do {
- u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
- struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 ci, j;
last_wqe = (sqcc == wqe_counter);
- sqcc++;
-
- if (is_redirect) {
- xdp_return_frame(xdpi->xdpf);
- dma_unmap_single(sq->pdev, xdpi->dma_addr,
- xdpi->xdpf->len, DMA_TO_DEVICE);
- } else {
- /* Recycle RX page */
- mlx5e_page_release(rq, &xdpi->di, true);
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+
+ sqcc += wi->num_wqebbs;
+
+ for (j = 0; j < wi->num_ds; j++) {
+ struct mlx5e_xdp_info xdpi =
+ mlx5e_xdpi_fifo_pop(xdpi_fifo);
+
+ if (is_redirect) {
+ xdp_return_frame(xdpi.xdpf);
+ dma_unmap_single(sq->pdev, xdpi.dma_addr,
+ xdpi.xdpf->len, DMA_TO_DEVICE);
+ } else {
+ /* Recycle RX page */
+ mlx5e_page_release(rq, &xdpi.di, true);
+ }
}
} while (!last_wqe);
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
@@ -228,27 +326,32 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
{
- struct mlx5e_rq *rq;
- bool is_redirect;
-
- is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
- rq = is_redirect ? NULL : container_of(sq, struct mlx5e_rq, xdpsq);
+ struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
+ bool is_redirect = !rq;
while (sq->cc != sq->pc) {
- u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
- struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
-
- sq->cc++;
-
- if (is_redirect) {
- xdp_return_frame(xdpi->xdpf);
- dma_unmap_single(sq->pdev, xdpi->dma_addr,
- xdpi->xdpf->len, DMA_TO_DEVICE);
- } else {
- /* Recycle RX page */
- mlx5e_page_release(rq, &xdpi->di, false);
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 ci, i;
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+ wi = &sq->db.wqe_info[ci];
+
+ sq->cc += wi->num_wqebbs;
+
+ for (i = 0; i < wi->num_ds; i++) {
+ struct mlx5e_xdp_info xdpi =
+ mlx5e_xdpi_fifo_pop(xdpi_fifo);
+
+ if (is_redirect) {
+ xdp_return_frame(xdpi.xdpf);
+ dma_unmap_single(sq->pdev, xdpi.dma_addr,
+ xdpi.xdpf->len, DMA_TO_DEVICE);
+ } else {
+ /* Recycle RX page */
+ mlx5e_page_release(rq, &xdpi.di, false);
+ }
}
}
}
@@ -292,7 +395,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
xdpi.xdpf = xdpf;
- if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) {
+ if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
dma_unmap_single(sq->pdev, xdpi.dma_addr,
xdpf->len, DMA_TO_DEVICE);
xdp_return_frame_rx_napi(xdpf);
@@ -300,8 +403,33 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
}
}
- if (flags & XDP_XMIT_FLUSH)
+ if (flags & XDP_XMIT_FLUSH) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xmit_xdp_doorbell(sq);
+ }
return n - drops;
}
+
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
+{
+ struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
+
+ if (xdpsq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(xdpsq);
+
+ mlx5e_xmit_xdp_doorbell(xdpsq);
+
+ if (xdpsq->redirect_flush) {
+ xdp_do_flush_map();
+ xdpsq->redirect_flush = false;
+ }
+}
+
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
+{
+ sq->xmit_xdp_frame = is_mpw ?
+ mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 6dfab045925f..3a67cb3cd179 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -37,27 +37,62 @@
#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_DS_COUNT \
- ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
+ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
void *va, u16 *rx_headroom, u32 *len);
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
-
-bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq);
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
{
+ if (sq->doorbell_cseg) {
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
+ sq->doorbell_cseg = NULL;
+ }
+}
+
+static inline void
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_data_seg *dseg =
+ (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+ dseg->lkey = sq->mkey_be;
+}
+
+static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq,
+ struct mlx5e_tx_wqe **wqe)
+{
struct mlx5_wq_cyc *wq = &sq->wq;
- struct mlx5e_tx_wqe *wqe;
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memset(*wqe, 0, sizeof(**wqe));
+}
- wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+static inline void
+mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
+ struct mlx5e_xdp_info *xi)
+{
+ u32 i = (*fifo->pc)++ & fifo->mask;
- mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl);
+ fifo->xi[i] = *xi;
+}
+
+static inline struct mlx5e_xdp_info
+mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo)
+{
+ return fifo->xi[(*fifo->cc)++ & fifo->mask];
}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 128a82b1dbfc..53608afd39b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -254,11 +254,13 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
struct mlx5e_ipsec_metadata *mdata;
struct mlx5e_ipsec_sa_entry *sa_entry;
struct xfrm_state *x;
+ struct sec_path *sp;
if (!xo)
return skb;
- if (unlikely(skb->sp->len != 1)) {
+ sp = skb_sec_path(skb);
+ if (unlikely(sp->len != 1)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
goto drop;
}
@@ -305,10 +307,11 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
struct mlx5e_priv *priv = netdev_priv(netdev);
struct xfrm_offload *xo;
struct xfrm_state *xs;
+ struct sec_path *sp;
u32 sa_handle;
- skb->sp = secpath_dup(skb->sp);
- if (unlikely(!skb->sp)) {
+ sp = secpath_set(skb);
+ if (unlikely(!sp)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
return NULL;
}
@@ -320,8 +323,9 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
return NULL;
}
- skb->sp->xvec[skb->sp->len++] = xs;
- skb->sp->olen++;
+ sp = skb_sec_path(skb);
+ sp->xvec[sp->len++] = xs;
+ sp->olen++;
xo = xfrm_offload(skb);
xo->flags = CRYPTO_DONE;
@@ -372,10 +376,11 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
netdev_features_t features)
{
+ struct sec_path *sp = skb_sec_path(skb);
struct xfrm_state *x;
- if (skb->sp && skb->sp->len) {
- x = skb->sp->xvec[0];
+ if (sp && sp->len) {
+ x = sp->xvec[0];
if (x && x->xso.offload_handle)
return true;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index f480763dcd0d..c9df08133718 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -135,14 +135,15 @@ void mlx5e_build_ptys2ethtool_map(void)
ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
}
-static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
- "rx_cqe_moder",
- "tx_cqe_moder",
- "rx_cqe_compress",
- "rx_striding_rq",
- "rx_no_csum_complete",
+typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
+
+struct pflag_desc {
+ char name[ETH_GSTRING_LEN];
+ mlx5e_pflag_handler handler;
};
+static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS];
+
int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
{
int i, num_stats = 0;
@@ -153,7 +154,7 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
num_stats += mlx5e_stats_grps[i].get_num_stats(priv);
return num_stats;
case ETH_SS_PRIV_FLAGS:
- return ARRAY_SIZE(mlx5e_priv_flags);
+ return MLX5E_NUM_PFLAGS;
case ETH_SS_TEST:
return mlx5e_self_test_num(priv);
/* fallthrough */
@@ -183,8 +184,9 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
switch (stringset) {
case ETH_SS_PRIV_FLAGS:
- for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++)
- strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]);
+ for (i = 0; i < MLX5E_NUM_PFLAGS; i++)
+ strcpy(data + i * ETH_GSTRING_LEN,
+ mlx5e_priv_flags[i].name);
break;
case ETH_SS_TEST:
@@ -353,7 +355,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
new_channels.params = priv->channels.params;
new_channels.params.num_channels = count;
if (!netif_is_rxfh_configured(priv->netdev))
- mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt,
+ mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
MLX5E_INDIR_RQT_SIZE, count);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
@@ -785,10 +787,9 @@ static void get_lp_advertising(u32 eth_proto_lp,
ptys2ethtool_adver_link(lp_advertising, eth_proto_lp);
}
-static int mlx5e_get_link_ksettings(struct net_device *netdev,
- struct ethtool_link_ksettings *link_ksettings)
+int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
+ struct ethtool_link_ksettings *link_ksettings)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
u32 rx_pause = 0;
@@ -804,7 +805,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
if (err) {
- netdev_err(netdev, "%s: query port ptys failed: %d\n",
+ netdev_err(priv->netdev, "%s: query port ptys failed: %d\n",
__func__, err);
goto err_query_regs;
}
@@ -824,7 +825,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
get_supported(eth_proto_cap, link_ksettings);
get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings);
- get_speed_duplex(netdev, eth_proto_oper, link_ksettings);
+ get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings);
eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
@@ -844,7 +845,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
Autoneg);
if (get_fec_supported_advertised(mdev, link_ksettings))
- netdev_dbg(netdev, "%s: FEC caps query failed: %d\n",
+ netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n",
__func__, err);
if (!an_disable_admin)
@@ -855,6 +856,14 @@ err_query_regs:
return err;
}
+static int mlx5e_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
+}
+
static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
{
u32 i, ptys_modes = 0;
@@ -869,10 +878,9 @@ static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
return ptys_modes;
}
-static int mlx5e_set_link_ksettings(struct net_device *netdev,
- const struct ethtool_link_ksettings *link_ksettings)
+int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
+ const struct ethtool_link_ksettings *link_ksettings)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
u32 eth_proto_cap, eth_proto_admin;
bool an_changes = false;
@@ -892,14 +900,14 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev,
err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
if (err) {
- netdev_err(netdev, "%s: query port eth proto cap failed: %d\n",
+ netdev_err(priv->netdev, "%s: query port eth proto cap failed: %d\n",
__func__, err);
goto out;
}
link_modes = link_modes & eth_proto_cap;
if (!link_modes) {
- netdev_err(netdev, "%s: Not supported link mode(s) requested",
+ netdev_err(priv->netdev, "%s: Not supported link mode(s) requested",
__func__);
err = -EINVAL;
goto out;
@@ -907,7 +915,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev,
err = mlx5_query_port_proto_admin(mdev, &eth_proto_admin, MLX5_PTYS_EN);
if (err) {
- netdev_err(netdev, "%s: query port eth proto admin failed: %d\n",
+ netdev_err(priv->netdev, "%s: query port eth proto admin failed: %d\n",
__func__, err);
goto out;
}
@@ -929,9 +937,17 @@ out:
return err;
}
+static int mlx5e_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
+}
+
u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
{
- return sizeof(priv->channels.params.toeplitz_hash_key);
+ return sizeof(priv->rss_params.toeplitz_hash_key);
}
static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
@@ -957,50 +973,27 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
if (indir)
- memcpy(indir, priv->channels.params.indirection_rqt,
- sizeof(priv->channels.params.indirection_rqt));
+ memcpy(indir, rss->indirection_rqt,
+ sizeof(rss->indirection_rqt));
if (key)
- memcpy(key, priv->channels.params.toeplitz_hash_key,
- sizeof(priv->channels.params.toeplitz_hash_key));
+ memcpy(key, rss->toeplitz_hash_key,
+ sizeof(rss->toeplitz_hash_key));
if (hfunc)
- *hfunc = priv->channels.params.rss_hfunc;
+ *hfunc = rss->hfunc;
return 0;
}
-static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
-{
- void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
- struct mlx5_core_dev *mdev = priv->mdev;
- int ctxlen = MLX5_ST_SZ_BYTES(tirc);
- int tt;
-
- MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(tirc, 0, ctxlen);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
- mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
- }
-
- if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
- return;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(tirc, 0, ctxlen);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
- mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen);
- }
-}
-
static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
const u8 *key, const u8 hfunc)
{
struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
bool hash_changed = false;
void *in;
@@ -1016,15 +1009,14 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
mutex_lock(&priv->state_lock);
- if (hfunc != ETH_RSS_HASH_NO_CHANGE &&
- hfunc != priv->channels.params.rss_hfunc) {
- priv->channels.params.rss_hfunc = hfunc;
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) {
+ rss->hfunc = hfunc;
hash_changed = true;
}
if (indir) {
- memcpy(priv->channels.params.indirection_rqt, indir,
- sizeof(priv->channels.params.indirection_rqt));
+ memcpy(rss->indirection_rqt, indir,
+ sizeof(rss->indirection_rqt));
if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
u32 rqtn = priv->indir_rqt.rqtn;
@@ -1032,7 +1024,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
.is_rss = true,
{
.rss = {
- .hfunc = priv->channels.params.rss_hfunc,
+ .hfunc = rss->hfunc,
.channels = &priv->channels,
},
},
@@ -1043,10 +1035,9 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
}
if (key) {
- memcpy(priv->channels.params.toeplitz_hash_key, key,
- sizeof(priv->channels.params.toeplitz_hash_key));
- hash_changed = hash_changed ||
- priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP;
+ memcpy(rss->toeplitz_hash_key, key,
+ sizeof(rss->toeplitz_hash_key));
+ hash_changed = hash_changed || rss->hfunc == ETH_RSS_HASH_TOP;
}
if (hash_changed)
@@ -1150,25 +1141,31 @@ static int mlx5e_set_tunable(struct net_device *dev,
return err;
}
-static void mlx5e_get_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *pauseparam)
+void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
int err;
err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause,
&pauseparam->tx_pause);
if (err) {
- netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n",
+ netdev_err(priv->netdev, "%s: mlx5_query_port_pause failed:0x%x\n",
__func__, err);
}
}
-static int mlx5e_set_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *pauseparam)
+static void mlx5e_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_ethtool_get_pauseparam(priv, pauseparam);
+}
+
+int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
int err;
@@ -1179,13 +1176,21 @@ static int mlx5e_set_pauseparam(struct net_device *netdev,
pauseparam->rx_pause ? 1 : 0,
pauseparam->tx_pause ? 1 : 0);
if (err) {
- netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n",
+ netdev_err(priv->netdev, "%s: mlx5_set_port_pause failed:0x%x\n",
__func__, err);
}
return err;
}
+static int mlx5e_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
+}
+
int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
struct ethtool_ts_info *info)
{
@@ -1505,8 +1510,6 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
return 0;
}
-typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
-
static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
bool is_rx_cq)
{
@@ -1669,23 +1672,58 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
return 0;
}
+static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_channels new_channels = {};
+ int err;
+
+ if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
+ return -EOPNOTSUPP;
+
+ new_channels.params = priv->channels.params;
+
+ MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ return 0;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ return err;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ return 0;
+}
+
+static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
+ { "rx_cqe_moder", set_pflag_rx_cqe_based_moder },
+ { "tx_cqe_moder", set_pflag_tx_cqe_based_moder },
+ { "rx_cqe_compress", set_pflag_rx_cqe_compress },
+ { "rx_striding_rq", set_pflag_rx_striding_rq },
+ { "rx_no_csum_complete", set_pflag_rx_no_csum_complete },
+ { "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe },
+};
+
static int mlx5e_handle_pflag(struct net_device *netdev,
u32 wanted_flags,
- enum mlx5e_priv_flag flag,
- mlx5e_pflag_handler pflag_handler)
+ enum mlx5e_priv_flag flag)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- bool enable = !!(wanted_flags & flag);
+ bool enable = !!(wanted_flags & BIT(flag));
u32 changes = wanted_flags ^ priv->channels.params.pflags;
int err;
- if (!(changes & flag))
+ if (!(changes & BIT(flag)))
return 0;
- err = pflag_handler(netdev, enable);
+ err = mlx5e_priv_flags[flag].handler(netdev, enable);
if (err) {
- netdev_err(netdev, "%s private flag 0x%x failed err %d\n",
- enable ? "Enable" : "Disable", flag, err);
+ netdev_err(netdev, "%s private flag '%s' failed err %d\n",
+ enable ? "Enable" : "Disable", mlx5e_priv_flags[flag].name, err);
return err;
}
@@ -1696,38 +1734,17 @@ static int mlx5e_handle_pflag(struct net_device *netdev,
static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ enum mlx5e_priv_flag pflag;
int err;
mutex_lock(&priv->state_lock);
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_CQE_BASED_MODER,
- set_pflag_rx_cqe_based_moder);
- if (err)
- goto out;
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_TX_CQE_BASED_MODER,
- set_pflag_tx_cqe_based_moder);
- if (err)
- goto out;
-
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_CQE_COMPRESS,
- set_pflag_rx_cqe_compress);
- if (err)
- goto out;
-
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_STRIDING_RQ,
- set_pflag_rx_striding_rq);
- if (err)
- goto out;
-
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
- set_pflag_rx_no_csum_complete);
+ for (pflag = 0; pflag < MLX5E_NUM_PFLAGS; pflag++) {
+ err = mlx5e_handle_pflag(netdev, pflags, pflag);
+ if (err)
+ break;
+ }
-out:
mutex_unlock(&priv->state_lock);
/* Need to fix some features.. */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index c18dcebe1462..4421c10f58ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -771,6 +771,112 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
INIT_LIST_HEAD(&priv->fs.ethtool.rules);
}
+static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type)
+{
+ switch (flow_type) {
+ case TCP_V4_FLOW:
+ return MLX5E_TT_IPV4_TCP;
+ case TCP_V6_FLOW:
+ return MLX5E_TT_IPV6_TCP;
+ case UDP_V4_FLOW:
+ return MLX5E_TT_IPV4_UDP;
+ case UDP_V6_FLOW:
+ return MLX5E_TT_IPV6_UDP;
+ case AH_V4_FLOW:
+ return MLX5E_TT_IPV4_IPSEC_AH;
+ case AH_V6_FLOW:
+ return MLX5E_TT_IPV6_IPSEC_AH;
+ case ESP_V4_FLOW:
+ return MLX5E_TT_IPV4_IPSEC_ESP;
+ case ESP_V6_FLOW:
+ return MLX5E_TT_IPV6_IPSEC_ESP;
+ case IPV4_FLOW:
+ return MLX5E_TT_IPV4;
+ case IPV6_FLOW:
+ return MLX5E_TT_IPV6;
+ default:
+ return MLX5E_NUM_INDIR_TIRS;
+ }
+}
+
+static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *nfc)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ enum mlx5e_traffic_types tt;
+ u8 rx_hash_field = 0;
+ void *in;
+
+ tt = flow_type_to_traffic_type(nfc->flow_type);
+ if (tt == MLX5E_NUM_INDIR_TIRS)
+ return -EINVAL;
+
+ /* RSS does not support anything other than hashing to queues
+ * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest
+ * port.
+ */
+ if (nfc->flow_type != TCP_V4_FLOW &&
+ nfc->flow_type != TCP_V6_FLOW &&
+ nfc->flow_type != UDP_V4_FLOW &&
+ nfc->flow_type != UDP_V6_FLOW)
+ return -EOPNOTSUPP;
+
+ if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3))
+ return -EOPNOTSUPP;
+
+ if (nfc->data & RXH_IP_SRC)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_SRC_IP;
+ if (nfc->data & RXH_IP_DST)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_DST_IP;
+ if (nfc->data & RXH_L4_B_0_1)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_SPORT;
+ if (nfc->data & RXH_L4_B_2_3)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mutex_lock(&priv->state_lock);
+
+ if (rx_hash_field == priv->rss_params.rx_hash_fields[tt])
+ goto out;
+
+ priv->rss_params.rx_hash_fields[tt] = rx_hash_field;
+ mlx5e_modify_tirs_hash(priv, in, inlen);
+
+out:
+ mutex_unlock(&priv->state_lock);
+ kvfree(in);
+ return 0;
+}
+
+static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *nfc)
+{
+ enum mlx5e_traffic_types tt;
+ u32 hash_field = 0;
+
+ tt = flow_type_to_traffic_type(nfc->flow_type);
+ if (tt == MLX5E_NUM_INDIR_TIRS)
+ return -EINVAL;
+
+ hash_field = priv->rss_params.rx_hash_fields[tt];
+ nfc->data = 0;
+
+ if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP)
+ nfc->data |= RXH_IP_SRC;
+ if (hash_field & MLX5_HASH_FIELD_SEL_DST_IP)
+ nfc->data |= RXH_IP_DST;
+ if (hash_field & MLX5_HASH_FIELD_SEL_L4_SPORT)
+ nfc->data |= RXH_L4_B_0_1;
+ if (hash_field & MLX5_HASH_FIELD_SEL_L4_DPORT)
+ nfc->data |= RXH_L4_B_2_3;
+
+ return 0;
+}
+
int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
{
int err = 0;
@@ -783,6 +889,9 @@ int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
case ETHTOOL_SRXCLSRLDEL:
err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
break;
+ case ETHTOOL_SRXFH:
+ err = mlx5e_set_rss_hash_opt(priv, cmd);
+ break;
default:
err = -EOPNOTSUPP;
break;
@@ -810,6 +919,9 @@ int mlx5e_get_rxnfc(struct net_device *dev,
case ETHTOOL_GRXCLSRLALL:
err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs);
break;
+ case ETHTOOL_GRXFH:
+ err = mlx5e_get_rss_hash_opt(priv, info);
+ break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b70cb6fd164c..8cfd2ec7c0a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -49,6 +49,8 @@
#include "lib/clock.h"
#include "en/port.h"
#include "en/xdp.h"
+#include "lib/eq.h"
+#include "en/monitor_stats.h"
struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
@@ -59,6 +61,7 @@ struct mlx5e_rq_param {
struct mlx5e_sq_param {
u32 sqc[MLX5_ST_SZ_DW(sqc)];
struct mlx5_wq_param wq;
+ bool is_mpw;
};
struct mlx5e_cq_param {
@@ -228,7 +231,7 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
MLX5_WQ_TYPE_CYCLIC;
}
-static void mlx5e_update_carrier(struct mlx5e_priv *priv)
+void mlx5e_update_carrier(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
u8 port_state;
@@ -267,7 +270,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
mlx5e_stats_grps[i].update_stats(priv);
}
-static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
+void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
{
int i;
@@ -298,33 +301,35 @@ void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->update_stats_work);
}
-static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
- enum mlx5_dev_event event, unsigned long param)
+static int async_event(struct notifier_block *nb, unsigned long event, void *data)
{
- struct mlx5e_priv *priv = vpriv;
+ struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+ struct mlx5_eqe *eqe = data;
- if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
- return;
+ if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
+ return NOTIFY_DONE;
- switch (event) {
- case MLX5_DEV_EVENT_PORT_UP:
- case MLX5_DEV_EVENT_PORT_DOWN:
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
queue_work(priv->wq, &priv->update_carrier_work);
break;
default:
- break;
+ return NOTIFY_DONE;
}
+
+ return NOTIFY_OK;
}
static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
{
- set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
+ priv->events_nb.notifier_call = async_event;
+ mlx5_notifier_register(priv->mdev, &priv->events_nb);
}
static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
{
- clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
- synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
+ mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
}
static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
@@ -988,18 +993,42 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq)
static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
{
- kvfree(sq->db.xdpi);
+ kvfree(sq->db.xdpi_fifo.xi);
+ kvfree(sq->db.wqe_info);
+}
+
+static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
+{
+ struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+
+ xdpi_fifo->xi = kvzalloc_node(sizeof(*xdpi_fifo->xi) * dsegs_per_wq,
+ GFP_KERNEL, numa);
+ if (!xdpi_fifo->xi)
+ return -ENOMEM;
+
+ xdpi_fifo->pc = &sq->xdpi_fifo_pc;
+ xdpi_fifo->cc = &sq->xdpi_fifo_cc;
+ xdpi_fifo->mask = dsegs_per_wq - 1;
+
+ return 0;
}
static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
{
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int err;
- sq->db.xdpi = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.xdpi)),
- GFP_KERNEL, numa);
- if (!sq->db.xdpi) {
- mlx5e_free_xdpsq_db(sq);
+ sq->db.wqe_info = kvzalloc_node(sizeof(*sq->db.wqe_info) * wq_sz,
+ GFP_KERNEL, numa);
+ if (!sq->db.wqe_info)
return -ENOMEM;
+
+ err = mlx5e_alloc_xdpsq_fifo(sq, numa);
+ if (err) {
+ mlx5e_free_xdpsq_db(sq);
+ return err;
}
return 0;
@@ -1558,11 +1587,8 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
struct mlx5e_xdpsq *sq,
bool is_redirect)
{
- unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
struct mlx5e_create_sq_param csp = {};
- unsigned int inline_hdr_sz = 0;
int err;
- int i;
err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
if (err)
@@ -1573,30 +1599,40 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
csp.cqn = sq->cq.mcq.cqn;
csp.wq_ctrl = &sq->wq_ctrl;
csp.min_inline_mode = sq->min_inline_mode;
- if (is_redirect)
- set_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
if (err)
goto err_free_xdpsq;
- if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
- ds_cnt++;
- }
+ mlx5e_set_xmit_fp(sq, param->is_mpw);
- /* Pre initialize fixed WQE fields */
- for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
- struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
- struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
- struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
- struct mlx5_wqe_data_seg *dseg;
+ if (!param->is_mpw) {
+ unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
+ unsigned int inline_hdr_sz = 0;
+ int i;
- cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
- eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+ ds_cnt++;
+ }
+
+ /* Pre initialize fixed WQE fields */
+ for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[i];
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
+ struct mlx5_wqe_data_seg *dseg;
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
- dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
- dseg->lkey = sq->mkey_be;
+ dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
+ dseg->lkey = sq->mkey_be;
+
+ wi->num_wqebbs = 1;
+ wi->num_ds = 1;
+ }
}
return 0;
@@ -1608,7 +1644,7 @@ err_free_xdpsq:
return err;
}
-static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
+static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
{
struct mlx5e_channel *c = sq->channel;
@@ -1616,7 +1652,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
napi_synchronize(&c->napi);
mlx5e_destroy_sq(c->mdev, sq->sqn);
- mlx5e_free_xdpsq_descs(sq);
+ mlx5e_free_xdpsq_descs(sq, rq);
mlx5e_free_xdpsq(sq);
}
@@ -1769,11 +1805,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
mlx5e_free_cq(cq);
}
-static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
-{
- return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
-}
-
static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam)
@@ -1924,9 +1955,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_channel_param *cparam,
struct mlx5e_channel **cp)
{
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
struct net_dim_cq_moder icocq_moder = {0, 0};
struct net_device *netdev = priv->netdev;
- int cpu = mlx5e_get_cpu(priv, ix);
struct mlx5e_channel *c;
unsigned int irq;
int err;
@@ -2009,7 +2040,7 @@ err_close_rq:
err_close_xdp_sq:
if (c->xdp)
- mlx5e_close_xdpsq(&c->rq.xdpsq);
+ mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
err_close_sqs:
mlx5e_close_sqs(c);
@@ -2062,10 +2093,10 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
static void mlx5e_close_channel(struct mlx5e_channel *c)
{
- mlx5e_close_xdpsq(&c->xdpsq);
+ mlx5e_close_xdpsq(&c->xdpsq, NULL);
mlx5e_close_rq(&c->rq);
if (c->xdp)
- mlx5e_close_xdpsq(&c->rq.xdpsq);
+ mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
mlx5e_close_sqs(c);
mlx5e_close_icosq(&c->icosq);
napi_disable(&c->napi);
@@ -2232,6 +2263,8 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
void *cqc = param->cqc;
MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
+ if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128)
+ MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
}
static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
@@ -2308,6 +2341,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
mlx5e_build_sq_param_common(priv, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
}
static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
@@ -2510,7 +2544,7 @@ static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz,
if (rrp.rss.hfunc == ETH_RSS_HASH_XOR)
ix = mlx5e_bits_invert(i, ilog2(sz));
- ix = priv->channels.params.indirection_rqt[ix];
+ ix = priv->rss_params.indirection_rqt[ix];
rqn = rrp.rss.channels->c[ix]->rq.rqn;
} else {
rqn = rrp.rqn;
@@ -2593,7 +2627,7 @@ static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
{
.rss = {
.channels = chs,
- .hfunc = chs->params.rss_hfunc,
+ .hfunc = priv->rss_params.hfunc,
}
},
};
@@ -2613,6 +2647,54 @@ static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
mlx5e_redirect_rqts(priv, drop_rrp);
}
+static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = {
+ [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+ [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+};
+
+struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt)
+{
+ return tirc_default_config[tt];
+}
+
static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
{
if (!params->lro_en)
@@ -2628,116 +2710,68 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
}
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
- enum mlx5e_traffic_types tt,
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
+ const struct mlx5e_tirc_config *ttconfig,
void *tirc, bool inner)
{
void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
-#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
- MLX5_HASH_FIELD_SEL_DST_IP)
-
-#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\
- MLX5_HASH_FIELD_SEL_DST_IP |\
- MLX5_HASH_FIELD_SEL_L4_SPORT |\
- MLX5_HASH_FIELD_SEL_L4_DPORT)
-
-#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
- MLX5_HASH_FIELD_SEL_DST_IP |\
- MLX5_HASH_FIELD_SEL_IPSEC_SPI)
-
- MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc));
- if (params->rss_hfunc == ETH_RSS_HASH_TOP) {
+ MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc));
+ if (rss_params->hfunc == ETH_RSS_HASH_TOP) {
void *rss_key = MLX5_ADDR_OF(tirc, tirc,
rx_hash_toeplitz_key);
size_t len = MLX5_FLD_SZ_BYTES(tirc,
rx_hash_toeplitz_key);
MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
- memcpy(rss_key, params->toeplitz_hash_key, len);
+ memcpy(rss_key, rss_params->toeplitz_hash_key, len);
}
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ ttconfig->l3_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ ttconfig->l4_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ ttconfig->rx_hash_fields);
+}
- switch (tt) {
- case MLX5E_TT_IPV4_TCP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_TCP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV6_TCP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_TCP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV4_UDP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_UDP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV6_UDP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_UDP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV4_IPSEC_AH:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig,
+ enum mlx5e_traffic_types tt,
+ u32 rx_hash_fields)
+{
+ *ttconfig = tirc_default_config[tt];
+ ttconfig->rx_hash_fields = rx_hash_fields;
+}
- case MLX5E_TT_IPV6_IPSEC_AH:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
+{
+ void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int ctxlen = MLX5_ST_SZ_BYTES(tirc);
+ struct mlx5e_tirc_config ttconfig;
+ int tt;
- case MLX5E_TT_IPV4_IPSEC_ESP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+ MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
- case MLX5E_TT_IPV6_IPSEC_ESP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(tirc, 0, ctxlen);
+ mlx5e_update_rx_hash_fields(&ttconfig, tt,
+ rss->rx_hash_fields[tt]);
+ mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false);
+ mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
+ }
- case MLX5E_TT_IPV4:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP);
- break;
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return;
- case MLX5E_TT_IPV6:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP);
- break;
- default:
- WARN_ONCE(true, "%s: bad traffic type!\n", __func__);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(tirc, 0, ctxlen);
+ mlx5e_update_rx_hash_fields(&ttconfig, tt,
+ rss->rx_hash_fields[tt]);
+ mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true);
+ mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in,
+ inlen);
}
}
@@ -2794,7 +2828,8 @@ static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
+ mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
+ &tirc_default_config[tt], tirc, true);
}
static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
@@ -2825,7 +2860,7 @@ static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
*mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
}
-static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
+int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
{
struct mlx5e_params *params = &priv->channels.params;
struct net_device *netdev = priv->netdev;
@@ -2905,7 +2940,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
mlx5e_activate_channels(&priv->channels);
netif_tx_start_all_queues(priv->netdev);
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ if (mlx5e_is_vport_rep(priv))
mlx5e_add_sqs_fwd_rules(priv);
mlx5e_wait_channels_min_rx_wqes(&priv->channels);
@@ -2916,7 +2951,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
{
mlx5e_redirect_rqts_to_drop(priv);
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ if (mlx5e_is_vport_rep(priv))
mlx5e_remove_sqs_fwd_rules(priv);
/* FIXME: This is a W/A only for tx timeout watch dog false alarm when
@@ -3168,7 +3203,7 @@ err_close_tises:
return err;
}
-void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
+static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
{
int tc;
@@ -3186,7 +3221,9 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+
+ mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
+ &tirc_default_config[tt], tirc, false);
}
static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
@@ -3391,11 +3428,14 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
{
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
- return mlx5e_configure_flower(priv, cls_flower, flags);
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_DESTROY:
- return mlx5e_delete_flower(priv, cls_flower, flags);
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_STATS:
- return mlx5e_stats_flower(priv, cls_flower, flags);
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
default:
return -EOPNOTSUPP;
}
@@ -3408,7 +3448,8 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
switch (type) {
case TC_SETUP_CLSFLOWER:
- return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
+ return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS |
+ MLX5E_TC_NIC_OFFLOAD);
default:
return -EOPNOTSUPP;
}
@@ -3451,7 +3492,7 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
}
}
-static void
+void
mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -3459,8 +3500,10 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
struct mlx5e_vport_stats *vstats = &priv->stats.vport;
struct mlx5e_pport_stats *pstats = &priv->stats.pport;
- /* update HW stats in background for next time */
- mlx5e_queue_update_stats(priv);
+ if (!mlx5e_monitor_counter_supported(priv)) {
+ /* update HW stats in background for next time */
+ mlx5e_queue_update_stats(priv);
+ }
if (mlx5e_is_uplink_rep(priv)) {
stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
@@ -3593,7 +3636,7 @@ static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- if (!enable && mlx5e_tc_num_filters(priv)) {
+ if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) {
netdev_err(netdev,
"Active offloaded tc filters, can't turn hw_tc_offload off\n");
return -EINVAL;
@@ -3895,7 +3938,7 @@ static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
#ifdef CONFIG_MLX5_ESWITCH
-static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -3932,8 +3975,8 @@ static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
}
-static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
- int max_tx_rate)
+int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+ int max_tx_rate)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -3974,8 +4017,8 @@ static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
mlx5_ifla_link2vport(link_state));
}
-static int mlx5e_get_vf_config(struct net_device *dev,
- int vf, struct ifla_vf_info *ivi)
+int mlx5e_get_vf_config(struct net_device *dev,
+ int vf, struct ifla_vf_info *ivi)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -3988,8 +4031,8 @@ static int mlx5e_get_vf_config(struct net_device *dev,
return 0;
}
-static int mlx5e_get_vf_stats(struct net_device *dev,
- int vf, struct ifla_vf_stats *vf_stats)
+int mlx5e_get_vf_stats(struct net_device *dev,
+ int vf, struct ifla_vf_stats *vf_stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -4050,8 +4093,7 @@ static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
queue_work(priv->wq, &vxlan_work->work);
}
-static void mlx5e_add_vxlan_port(struct net_device *netdev,
- struct udp_tunnel_info *ti)
+void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4064,8 +4106,7 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev,
mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
}
-static void mlx5e_del_vxlan_port(struct net_device *netdev,
- struct udp_tunnel_info *ti)
+void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4115,9 +4156,9 @@ out:
return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
}
-static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
- struct net_device *netdev,
- netdev_features_t features)
+netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+ struct net_device *netdev,
+ netdev_features_t features)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4140,17 +4181,17 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
struct mlx5e_txqsq *sq)
{
- struct mlx5_eq *eq = sq->cq.mcq.eq;
+ struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
u32 eqe_count;
netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
- eq->eqn, eq->cons_index, eq->irqn);
+ eq->core.eqn, eq->core.cons_index, eq->core.irqn);
eqe_count = mlx5_eq_poll_irq_disabled(eq);
if (!eqe_count)
return false;
- netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
+ netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn);
sq->channel->stats->eq_rearm++;
return true;
}
@@ -4377,8 +4418,6 @@ const struct net_device_ops mlx5e_netdev_ops = {
.ndo_get_vf_config = mlx5e_get_vf_config,
.ndo_set_vf_link_state = mlx5e_set_vf_link_state,
.ndo_get_vf_stats = mlx5e_get_vf_stats,
- .ndo_has_offload_stats = mlx5e_has_offload_stats,
- .ndo_get_offload_stats = mlx5e_get_offload_stats,
#endif
};
@@ -4524,15 +4563,23 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
mlx5e_init_rq_type_params(mdev, params);
}
-void mlx5e_build_rss_params(struct mlx5e_params *params)
+void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
+ u16 num_channels)
{
- params->rss_hfunc = ETH_RSS_HASH_XOR;
- netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key));
- mlx5e_build_default_indir_rqt(params->indirection_rqt,
- MLX5E_INDIR_RQT_SIZE, params->num_channels);
+ enum mlx5e_traffic_types tt;
+
+ rss_params->hfunc = ETH_RSS_HASH_XOR;
+ netdev_rss_key_fill(rss_params->toeplitz_hash_key,
+ sizeof(rss_params->toeplitz_hash_key));
+ mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, num_channels);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ rss_params->rx_hash_fields[tt] =
+ tirc_default_config[tt].rx_hash_fields;
}
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
u16 max_channels, u16 mtu)
{
@@ -4548,6 +4595,10 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ /* XDP SQ */
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
+ MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
+
/* set CQE compression */
params->rx_cqe_compress_def = false;
if (MLX5_CAP_GEN(mdev, cqe_compression) &&
@@ -4581,7 +4632,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
/* RSS */
- mlx5e_build_rss_params(params);
+ mlx5e_build_rss_params(rss_params, params->num_channels);
}
static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
@@ -4596,12 +4647,6 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
}
}
-#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
-static const struct switchdev_ops mlx5e_switchdev_ops = {
- .switchdev_port_attr_get = mlx5e_attr_get,
-};
-#endif
-
static void mlx5e_build_nic_netdev(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4711,12 +4756,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->priv_flags |= IFF_UNICAST_FLT;
mlx5e_set_netdev_dev_addr(netdev);
-
-#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
- if (MLX5_ESWITCH_MANAGER(mdev))
- netdev->switchdev_ops = &mlx5e_switchdev_ops;
-#endif
-
mlx5e_ipsec_build_netdev(priv);
mlx5e_tls_build_netdev(priv);
}
@@ -4754,14 +4793,16 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
void *ppriv)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
int err;
err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
if (err)
return err;
- mlx5e_build_nic_params(mdev, &priv->channels.params,
- mlx5e_get_netdev_max_channels(netdev), netdev->mtu);
+ mlx5e_build_nic_params(mdev, rss, &priv->channels.params,
+ mlx5e_get_netdev_max_channels(netdev),
+ netdev->mtu);
mlx5e_timestamp_init(priv);
@@ -4891,9 +4932,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
mlx5_lag_add(mdev, netdev);
mlx5e_enable_async_events(priv);
-
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
- mlx5e_register_vport_reps(priv);
+ if (mlx5e_monitor_counter_supported(priv))
+ mlx5e_monitor_counter_init(priv);
if (netdev->reg_state != NETREG_REGISTERED)
return;
@@ -4927,8 +4967,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->set_rx_mode_work);
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
- mlx5e_unregister_vport_reps(priv);
+ if (mlx5e_monitor_counter_supported(priv))
+ mlx5e_monitor_counter_cleanup(priv);
mlx5e_disable_async_events(priv);
mlx5_lag_remove(mdev);
@@ -4981,7 +5021,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
netif_carrier_off(netdev);
#ifdef CONFIG_MLX5_EN_ARFS
- netdev->rx_cpu_rmap = mdev->rmap;
+ netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev);
#endif
return 0;
@@ -5036,7 +5076,7 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
if (priv->channels.params.num_channels > max_nch) {
mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
priv->channels.params.num_channels = max_nch;
- mlx5e_build_default_indir_rqt(priv->channels.params.indirection_rqt,
+ mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
MLX5E_INDIR_RQT_SIZE, max_nch);
}
@@ -5125,7 +5165,6 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
static void *mlx5e_add(struct mlx5_core_dev *mdev)
{
struct net_device *netdev;
- void *rpriv = NULL;
void *priv;
int err;
int nch;
@@ -5135,20 +5174,18 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
return NULL;
#ifdef CONFIG_MLX5_ESWITCH
- if (MLX5_ESWITCH_MANAGER(mdev)) {
- rpriv = mlx5e_alloc_nic_rep_priv(mdev);
- if (!rpriv) {
- mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n");
- return NULL;
- }
+ if (MLX5_ESWITCH_MANAGER(mdev) &&
+ mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ mlx5e_rep_register_vport_reps(mdev);
+ return mdev;
}
#endif
nch = mlx5e_get_max_num_channels(mdev);
- netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, rpriv);
+ netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, NULL);
if (!netdev) {
mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
- goto err_free_rpriv;
+ return NULL;
}
priv = netdev_priv(netdev);
@@ -5174,30 +5211,26 @@ err_detach:
mlx5e_detach(mdev, priv);
err_destroy_netdev:
mlx5e_destroy_netdev(priv);
-err_free_rpriv:
- kfree(rpriv);
return NULL;
}
static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
{
- struct mlx5e_priv *priv = vpriv;
- void *ppriv = priv->ppriv;
+ struct mlx5e_priv *priv;
+#ifdef CONFIG_MLX5_ESWITCH
+ if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev) {
+ mlx5e_rep_unregister_vport_reps(mdev);
+ return;
+ }
+#endif
+ priv = vpriv;
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_delete_app(priv);
#endif
unregister_netdev(priv->netdev);
mlx5e_detach(mdev, vpriv);
mlx5e_destroy_netdev(priv);
- kfree(ppriv);
-}
-
-static void *mlx5e_get_netdev(void *vpriv)
-{
- struct mlx5e_priv *priv = vpriv;
-
- return priv->netdev;
}
static struct mlx5_interface mlx5e_interface = {
@@ -5205,9 +5238,7 @@ static struct mlx5_interface mlx5e_interface = {
.remove = mlx5e_remove,
.attach = mlx5e_attach,
.detach = mlx5e_detach,
- .event = mlx5e_async_event,
.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
- .get_dev = mlx5e_get_netdev,
};
void mlx5e_init(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 820fe85100b0..96cc0c6a4014 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -42,14 +42,24 @@
#include "en.h"
#include "en_rep.h"
#include "en_tc.h"
+#include "en/tc_tun.h"
#include "fs_core.h"
-#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \
- max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
+#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
+ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
+struct mlx5e_rep_indr_block_priv {
+ struct net_device *netdev;
+ struct mlx5e_rep_priv *rpriv;
+
+ struct list_head list;
+};
+
+static void mlx5e_rep_indr_unregister_block(struct net_device *netdev);
+
static void mlx5e_rep_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
@@ -99,7 +109,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev,
}
}
-static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -122,6 +132,32 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
vport_stats->tx_bytes = vf_stats.rx_bytes;
}
+static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct rtnl_link_stats64 *vport_stats;
+
+ mlx5e_grp_802_3_update_stats(priv);
+
+ vport_stats = &priv->stats.vf_vport;
+
+ vport_stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
+ vport_stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok);
+ vport_stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
+ vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+}
+
+static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ if (rep->vport == FDB_UPLINK_VPORT)
+ mlx5e_uplink_rep_update_hw_counters(priv);
+ else
+ mlx5e_vf_rep_update_hw_counters(priv);
+}
+
static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
{
struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -257,6 +293,22 @@ static int mlx5e_rep_set_channels(struct net_device *dev,
return 0;
}
+static int mlx5e_rep_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_coalesce(priv, coal);
+}
+
+static int mlx5e_rep_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_coalesce(priv, coal);
+}
+
static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -271,7 +323,55 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev)
return mlx5e_ethtool_get_rxfh_indir_size(priv);
}
-static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
+static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_ethtool_get_pauseparam(priv, pauseparam);
+}
+
+static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
+}
+
+static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
+}
+
+static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
+}
+
+static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = {
+ .get_drvinfo = mlx5e_rep_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = mlx5e_rep_get_strings,
+ .get_sset_count = mlx5e_rep_get_sset_count,
+ .get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
+ .get_ringparam = mlx5e_rep_get_ringparam,
+ .set_ringparam = mlx5e_rep_set_ringparam,
+ .get_channels = mlx5e_rep_get_channels,
+ .set_channels = mlx5e_rep_set_channels,
+ .get_coalesce = mlx5e_rep_get_coalesce,
+ .set_coalesce = mlx5e_rep_set_coalesce,
+ .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size,
+ .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
+};
+
+static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
.get_drvinfo = mlx5e_rep_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = mlx5e_rep_get_strings,
@@ -281,24 +381,44 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
.set_ringparam = mlx5e_rep_set_ringparam,
.get_channels = mlx5e_rep_get_channels,
.set_channels = mlx5e_rep_set_channels,
+ .get_coalesce = mlx5e_rep_get_coalesce,
+ .set_coalesce = mlx5e_rep_set_coalesce,
+ .get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings,
+ .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings,
.get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size,
.get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
+ .get_pauseparam = mlx5e_uplink_rep_get_pauseparam,
+ .set_pauseparam = mlx5e_uplink_rep_set_pauseparam,
};
-int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+static int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct net_device *uplink_upper = NULL;
+ struct mlx5e_priv *uplink_priv = NULL;
+ struct net_device *uplink_dev;
if (esw->mode == SRIOV_NONE)
return -EOPNOTSUPP;
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ if (uplink_dev) {
+ uplink_upper = netdev_master_upper_dev_get(uplink_dev);
+ uplink_priv = netdev_priv(uplink_dev);
+ }
+
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
attr->u.ppid.id_len = ETH_ALEN;
- ether_addr_copy(attr->u.ppid.id, rep->hw_id);
+ if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) {
+ ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr);
+ } else {
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ ether_addr_copy(attr->u.ppid.id, rep->hw_id);
+ }
break;
default:
return -EOPNOTSUPP;
@@ -519,6 +639,184 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
neigh_release(n);
}
+static struct mlx5e_rep_indr_block_priv *
+mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv;
+
+ /* All callback list access should be protected by RTNL. */
+ ASSERT_RTNL();
+
+ list_for_each_entry(cb_priv,
+ &rpriv->uplink_priv.tc_indr_block_priv_list,
+ list)
+ if (cb_priv->netdev == netdev)
+ return cb_priv;
+
+ return NULL;
+}
+
+static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
+ struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
+
+ list_for_each_entry_safe(cb_priv, temp, head, list) {
+ mlx5e_rep_indr_unregister_block(cb_priv->netdev);
+ kfree(cb_priv);
+ }
+}
+
+static int
+mlx5e_rep_indr_offload(struct net_device *netdev,
+ struct tc_cls_flower_offload *flower,
+ struct mlx5e_rep_indr_block_priv *indr_priv)
+{
+ struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
+ int flags = MLX5E_TC_EGRESS | MLX5E_TC_ESW_OFFLOAD;
+ int err = 0;
+
+ switch (flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ err = mlx5e_configure_flower(netdev, priv, flower, flags);
+ break;
+ case TC_CLSFLOWER_DESTROY:
+ err = mlx5e_delete_flower(netdev, priv, flower, flags);
+ break;
+ case TC_CLSFLOWER_STATS:
+ err = mlx5e_stats_flower(netdev, priv, flower, flags);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_indr_offload(priv->netdev, type_data, priv);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
+ struct mlx5e_rep_priv *rpriv,
+ struct tc_block_offload *f)
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv;
+ int err = 0;
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+ if (indr_priv)
+ return -EEXIST;
+
+ indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
+ if (!indr_priv)
+ return -ENOMEM;
+
+ indr_priv->netdev = netdev;
+ indr_priv->rpriv = rpriv;
+ list_add(&indr_priv->list,
+ &rpriv->uplink_priv.tc_indr_block_priv_list);
+
+ err = tcf_block_cb_register(f->block,
+ mlx5e_rep_indr_setup_block_cb,
+ netdev, indr_priv, f->extack);
+ if (err) {
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+ }
+
+ return err;
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block,
+ mlx5e_rep_indr_setup_block_cb,
+ netdev);
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+ if (indr_priv) {
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+ }
+
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static
+int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv,
+ enum tc_setup_type type, void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv,
+ type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev)
+{
+ int err;
+
+ err = __tc_indr_block_cb_register(netdev, rpriv,
+ mlx5e_rep_indr_setup_tc_cb,
+ netdev);
+ if (err) {
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
+ netdev_name(netdev), err);
+ }
+ return err;
+}
+
+static void mlx5e_rep_indr_unregister_block(struct net_device *netdev)
+{
+ __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
+ netdev);
+}
+
+static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ uplink_priv.netdevice_nb);
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ if (!mlx5e_tc_tun_device_to_offload(priv, netdev))
+ return NOTIFY_OK;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ mlx5e_rep_indr_register_block(rpriv, netdev);
+ break;
+ case NETDEV_UNREGISTER:
+ mlx5e_rep_indr_unregister_block(netdev);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
static struct mlx5e_neigh_hash_entry *
mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
struct mlx5e_neigh *m_neigh);
@@ -780,7 +1078,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
mlx5e_rep_neigh_entry_destroy(priv, nhe);
}
-static int mlx5e_rep_open(struct net_device *dev)
+static int mlx5e_vf_rep_open(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -802,7 +1100,7 @@ unlock:
return err;
}
-static int mlx5e_rep_close(struct net_device *dev)
+static int mlx5e_vf_rep_close(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -839,24 +1137,14 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
{
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
- return mlx5e_configure_flower(priv, cls_flower, flags);
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_DESTROY:
- return mlx5e_delete_flower(priv, cls_flower, flags);
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_STATS:
- return mlx5e_stats_flower(priv, cls_flower, flags);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data,
- void *cb_priv)
-{
- struct mlx5e_priv *priv = cb_priv;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS);
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
default:
return -EOPNOTSUPP;
}
@@ -869,7 +1157,8 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
switch (type) {
case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS |
+ MLX5E_TC_ESW_OFFLOAD);
default:
return -EOPNOTSUPP;
}
@@ -908,43 +1197,23 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep;
if (!MLX5_ESWITCH_MANAGER(priv->mdev))
return false;
- rep = rpriv->rep;
- if (esw->mode == SRIOV_OFFLOADS &&
- rep && rep->vport == FDB_UPLINK_VPORT)
- return true;
-
- return false;
-}
-
-static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep;
-
- if (!MLX5_ESWITCH_MANAGER(priv->mdev))
+ if (!rpriv) /* non vport rep mlx5e instances don't use this field */
return false;
rep = rpriv->rep;
- if (rep && rep->vport != FDB_UPLINK_VPORT)
- return true;
-
- return false;
+ return (rep->vport == FDB_UPLINK_VPORT);
}
-bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
-
switch (attr_id) {
case IFLA_OFFLOAD_XSTATS_CPU_HIT:
- if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv))
return true;
}
@@ -970,8 +1239,8 @@ mlx5e_get_sw_stats64(const struct net_device *dev,
return 0;
}
-int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
- void *sp)
+static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp)
{
switch (attr_id) {
case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -982,7 +1251,7 @@ int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
}
static void
-mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -991,37 +1260,93 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
}
+static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ return mlx5e_change_mtu(netdev, new_mtu, NULL);
+}
+
+static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu);
+}
+
+static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr)
+{
+ struct sockaddr *saddr = addr;
+
+ if (!is_valid_ether_addr(saddr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ ether_addr_copy(netdev->dev_addr, saddr->sa_data);
+ return 0;
+}
+
static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
.switchdev_port_attr_get = mlx5e_attr_get,
};
-static int mlx5e_change_rep_mtu(struct net_device *netdev, int new_mtu)
-{
- return mlx5e_change_mtu(netdev, new_mtu, NULL);
-}
+static const struct net_device_ops mlx5e_netdev_ops_vf_rep = {
+ .ndo_open = mlx5e_vf_rep_open,
+ .ndo_stop = mlx5e_vf_rep_close,
+ .ndo_start_xmit = mlx5e_xmit,
+ .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
+ .ndo_setup_tc = mlx5e_rep_setup_tc,
+ .ndo_get_stats64 = mlx5e_vf_rep_get_stats,
+ .ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
+ .ndo_change_mtu = mlx5e_vf_rep_change_mtu,
+};
-static const struct net_device_ops mlx5e_netdev_ops_rep = {
- .ndo_open = mlx5e_rep_open,
- .ndo_stop = mlx5e_rep_close,
+static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
+ .ndo_open = mlx5e_open,
+ .ndo_stop = mlx5e_close,
.ndo_start_xmit = mlx5e_xmit,
+ .ndo_set_mac_address = mlx5e_uplink_rep_set_mac,
.ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
.ndo_setup_tc = mlx5e_rep_setup_tc,
- .ndo_get_stats64 = mlx5e_rep_get_stats,
- .ndo_has_offload_stats = mlx5e_has_offload_stats,
- .ndo_get_offload_stats = mlx5e_get_offload_stats,
- .ndo_change_mtu = mlx5e_change_rep_mtu,
+ .ndo_get_stats64 = mlx5e_get_stats,
+ .ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
+ .ndo_change_mtu = mlx5e_uplink_rep_change_mtu,
+ .ndo_udp_tunnel_add = mlx5e_add_vxlan_port,
+ .ndo_udp_tunnel_del = mlx5e_del_vxlan_port,
+ .ndo_features_check = mlx5e_features_check,
+ .ndo_set_vf_mac = mlx5e_set_vf_mac,
+ .ndo_set_vf_rate = mlx5e_set_vf_rate,
+ .ndo_get_vf_config = mlx5e_get_vf_config,
+ .ndo_get_vf_stats = mlx5e_get_vf_stats,
};
-static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params, u16 mtu)
+bool mlx5e_eswitch_rep(struct net_device *netdev)
+{
+ if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep ||
+ netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep)
+ return true;
+
+ return false;
+}
+
+static void mlx5e_build_rep_params(struct net_device *netdev)
{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_params *params;
+
u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ params = &priv->channels.params;
params->hard_mtu = MLX5E_ETH_HARD_MTU;
- params->sw_mtu = mtu;
- params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
+ params->sw_mtu = netdev->mtu;
+
+ /* SQ */
+ if (rep->vport == FDB_UPLINK_VPORT)
+ params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ else
+ params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE;
/* RQ */
mlx5e_build_rq_params(mdev, params);
@@ -1035,24 +1360,38 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
/* RSS */
- mlx5e_build_rss_params(params);
+ mlx5e_build_rss_params(&priv->rss_params, params->num_channels);
}
static void mlx5e_build_rep_netdev(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_core_dev *mdev = priv->mdev;
- u16 max_mtu;
- netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+ if (rep->vport == FDB_UPLINK_VPORT) {
+ SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev);
+ netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
+ /* we want a persistent mac for the uplink rep */
+ mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr);
+ netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (MLX5_CAP_GEN(mdev, qos))
+ netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+#endif
+ } else {
+ netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep;
+ eth_hw_addr_random(netdev);
+ netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops;
+ }
netdev->watchdog_timeo = 15 * HZ;
- netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
- netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
+ netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
netdev->hw_features |= NETIF_F_HW_TC;
netdev->hw_features |= NETIF_F_SG;
@@ -1063,13 +1402,10 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
netdev->hw_features |= NETIF_F_TSO6;
netdev->hw_features |= NETIF_F_RXCSUM;
- netdev->features |= netdev->hw_features;
-
- eth_hw_addr_random(netdev);
+ if (rep->vport != FDB_UPLINK_VPORT)
+ netdev->features |= NETIF_F_VLAN_CHALLENGED;
- netdev->min_mtu = ETH_MIN_MTU;
- mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
- netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ netdev->features |= netdev->hw_features;
}
static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
@@ -1086,7 +1422,7 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
priv->channels.params.num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS;
- mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu);
+ mlx5e_build_rep_params(netdev);
mlx5e_build_rep_netdev(netdev);
mlx5e_timestamp_init(priv);
@@ -1209,94 +1545,173 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
{
- int err;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ int tc, err;
err = mlx5e_create_tises(priv);
if (err) {
mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
return err;
}
- return 0;
-}
-static const struct mlx5e_profile mlx5e_rep_profile = {
- .init = mlx5e_init_rep,
- .cleanup = mlx5e_cleanup_rep,
- .init_rx = mlx5e_init_rep_rx,
- .cleanup_rx = mlx5e_cleanup_rep_rx,
- .init_tx = mlx5e_init_rep_tx,
- .cleanup_tx = mlx5e_cleanup_nic_tx,
- .update_stats = mlx5e_rep_update_hw_counters,
- .update_carrier = NULL,
- .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
- .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
- .max_tc = 1,
-};
+ if (rpriv->rep->vport == FDB_UPLINK_VPORT) {
+ uplink_priv = &rpriv->uplink_priv;
-/* e-Switch vport representors */
+ /* init shared tc flow table */
+ err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+ if (err)
+ goto destroy_tises;
+
+ /* init indirect block notifications */
+ INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
+ uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
+ err = register_netdevice_notifier(&uplink_priv->netdevice_nb);
+ if (err) {
+ mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n");
+ goto tc_esw_cleanup;
+ }
+ }
-static int
-mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+ return 0;
+
+tc_esw_cleanup:
+ mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht);
+destroy_tises:
+ for (tc = 0; tc < priv->profile->max_tc; tc++)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
+ return err;
+}
+
+static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int tc;
- int err;
+ for (tc = 0; tc < priv->profile->max_tc; tc++)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- err = mlx5e_add_sqs_fwd_rules(priv);
- if (err)
- return err;
+ if (rpriv->rep->vport == FDB_UPLINK_VPORT) {
+ /* clean indirect TC block notifications */
+ unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb);
+ mlx5e_rep_indr_clean_block_privs(rpriv);
+
+ /* delete shared tc flow table */
+ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
}
+}
- err = mlx5e_rep_neigh_init(rpriv);
- if (err)
- goto err_remove_sqs;
+static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
- /* init shared tc flow table */
- err = mlx5e_tc_esw_init(&rpriv->tc_ht);
- if (err)
- goto err_neigh_cleanup;
+ netdev->min_mtu = ETH_MIN_MTU;
+ mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+}
- return 0;
+static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+ struct mlx5_eqe *eqe = data;
-err_neigh_cleanup:
- mlx5e_rep_neigh_cleanup(rpriv);
-err_remove_sqs:
- mlx5e_remove_sqs_fwd_rules(priv);
- return err;
+ if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
+ return NOTIFY_DONE;
+
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ queue_work(priv->wq, &priv->update_carrier_work);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
}
-static void
-mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
- if (test_bit(MLX5E_STATE_OPENED, &priv->state))
- mlx5e_remove_sqs_fwd_rules(priv);
+ netdev->min_mtu = ETH_MIN_MTU;
+ mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
+ netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ mlx5e_set_dev_port_mtu(priv);
+
+ mlx5_lag_add(mdev, netdev);
+ priv->events_nb.notifier_call = uplink_rep_async_event;
+ mlx5_notifier_register(mdev, &priv->events_nb);
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_initialize(priv);
+ mlx5e_dcbnl_init_app(priv);
+#endif
+}
- /* clean uplink offloaded TC rules, delete shared tc flow table */
- mlx5e_tc_esw_cleanup(&rpriv->tc_ht);
+static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
- mlx5e_rep_neigh_cleanup(rpriv);
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_delete_app(priv);
+#endif
+ mlx5_notifier_unregister(mdev, &priv->events_nb);
+ mlx5_lag_remove(mdev);
}
+static const struct mlx5e_profile mlx5e_vf_rep_profile = {
+ .init = mlx5e_init_rep,
+ .cleanup = mlx5e_cleanup_rep,
+ .init_rx = mlx5e_init_rep_rx,
+ .cleanup_rx = mlx5e_cleanup_rep_rx,
+ .init_tx = mlx5e_init_rep_tx,
+ .cleanup_tx = mlx5e_cleanup_rep_tx,
+ .enable = mlx5e_vf_rep_enable,
+ .update_stats = mlx5e_vf_rep_update_hw_counters,
+ .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
+ .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .max_tc = 1,
+};
+
+static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
+ .init = mlx5e_init_rep,
+ .cleanup = mlx5e_cleanup_rep,
+ .init_rx = mlx5e_init_rep_rx,
+ .cleanup_rx = mlx5e_cleanup_rep_rx,
+ .init_tx = mlx5e_init_rep_tx,
+ .cleanup_tx = mlx5e_cleanup_rep_tx,
+ .enable = mlx5e_uplink_rep_enable,
+ .disable = mlx5e_uplink_rep_disable,
+ .update_stats = mlx5e_uplink_rep_update_hw_counters,
+ .update_carrier = mlx5e_update_carrier,
+ .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
+ .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .max_tc = MLX5E_MAX_NUM_TC,
+};
+
+/* e-Switch vport representors */
static int
mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
- struct mlx5e_rep_priv *uplink_rpriv;
+ const struct mlx5e_profile *profile;
struct mlx5e_rep_priv *rpriv;
struct net_device *netdev;
- struct mlx5e_priv *upriv;
int nch, err;
rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
if (!rpriv)
return -ENOMEM;
+ /* rpriv->rep to be looked up when profile->init() is called */
+ rpriv->rep = rep;
+
nch = mlx5e_get_max_num_channels(dev);
- netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, nch, rpriv);
+ profile = (rep->vport == FDB_UPLINK_VPORT) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile;
+ netdev = mlx5e_create_netdev(dev, profile, nch, rpriv);
if (!netdev) {
pr_warn("Failed to create representor netdev for vport %d\n",
rep->vport);
@@ -1305,15 +1720,20 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
}
rpriv->netdev = netdev;
- rpriv->rep = rep;
rep->rep_if[REP_ETH].priv = rpriv;
INIT_LIST_HEAD(&rpriv->vport_sqs_list);
+ if (rep->vport == FDB_UPLINK_VPORT) {
+ err = mlx5e_create_mdev_resources(dev);
+ if (err)
+ goto err_destroy_netdev;
+ }
+
err = mlx5e_attach_netdev(netdev_priv(netdev));
if (err) {
pr_warn("Failed to attach representor netdev for vport %d\n",
rep->vport);
- goto err_destroy_netdev;
+ goto err_destroy_mdev_resources;
}
err = mlx5e_rep_neigh_init(rpriv);
@@ -1323,32 +1743,25 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
goto err_detach_netdev;
}
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH);
- upriv = netdev_priv(uplink_rpriv->netdev);
- err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev,
- upriv);
- if (err)
- goto err_neigh_cleanup;
-
err = register_netdev(netdev);
if (err) {
pr_warn("Failed to register representor netdev for vport %d\n",
rep->vport);
- goto err_egdev_cleanup;
+ goto err_neigh_cleanup;
}
return 0;
-err_egdev_cleanup:
- tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
- upriv);
-
err_neigh_cleanup:
mlx5e_rep_neigh_cleanup(rpriv);
err_detach_netdev:
mlx5e_detach_netdev(netdev_priv(netdev));
+err_destroy_mdev_resources:
+ if (rep->vport == FDB_UPLINK_VPORT)
+ mlx5e_destroy_mdev_resources(dev);
+
err_destroy_netdev:
mlx5e_destroy_netdev(netdev_priv(netdev));
kfree(rpriv);
@@ -1361,18 +1774,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_rep_priv *uplink_rpriv;
void *ppriv = priv->ppriv;
- struct mlx5e_priv *upriv;
unregister_netdev(netdev);
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch,
- REP_ETH);
- upriv = netdev_priv(uplink_rpriv->netdev);
- tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
- upriv);
mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_detach_netdev(priv);
+ if (rep->vport == FDB_UPLINK_VPORT)
+ mlx5e_destroy_mdev_resources(priv->mdev);
mlx5e_destroy_netdev(priv);
kfree(ppriv); /* mlx5e_rep_priv */
}
@@ -1386,14 +1794,13 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
return rpriv->netdev;
}
-static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
+void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev)
{
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
int total_vfs = MLX5_TOTAL_VPORTS(mdev);
int vport;
- for (vport = 1; vport < total_vfs; vport++) {
+ for (vport = 0; vport < total_vfs; vport++) {
struct mlx5_eswitch_rep_if rep_if = {};
rep_if.load = mlx5e_vport_rep_load;
@@ -1403,55 +1810,12 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
}
}
-static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv)
+void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev)
{
- struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_eswitch *esw = mdev->priv.eswitch;
int total_vfs = MLX5_TOTAL_VPORTS(mdev);
int vport;
- for (vport = 1; vport < total_vfs; vport++)
+ for (vport = total_vfs - 1; vport >= 0; vport--)
mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH);
}
-
-void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- struct mlx5_eswitch_rep_if rep_if;
- struct mlx5e_rep_priv *rpriv;
-
- rpriv = priv->ppriv;
- rpriv->netdev = priv->netdev;
-
- rep_if.load = mlx5e_nic_rep_load;
- rep_if.unload = mlx5e_nic_rep_unload;
- rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
- rep_if.priv = rpriv;
- INIT_LIST_HEAD(&rpriv->vport_sqs_list);
- mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
-
- mlx5e_rep_register_vf_vports(priv); /* VFs vports */
-}
-
-void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
-
- mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */
- mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/
-}
-
-void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev)
-{
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- struct mlx5e_rep_priv *rpriv;
-
- rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
- if (!rpriv)
- return NULL;
-
- rpriv->rep = &esw->offloads.vport_reps[0];
- return rpriv;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 844d32d5c29f..edd722824697 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -53,13 +53,33 @@ struct mlx5e_neigh_update_table {
unsigned long min_interval; /* jiffies */
};
+struct mlx5_rep_uplink_priv {
+ /* Filters DB - instantiated by the uplink representor and shared by
+ * the uplink's VFs
+ */
+ struct rhashtable tc_ht;
+
+ /* indirect block callbacks are invoked on bind/unbind events
+ * on registered higher level devices (e.g. tunnel devices)
+ *
+ * tc_indr_block_cb_priv_list is used to lookup indirect callback
+ * private data
+ *
+ * netdevice_nb is the netdev events notifier - used to register
+ * tunnel devices for block events
+ *
+ */
+ struct list_head tc_indr_block_priv_list;
+ struct notifier_block netdevice_nb;
+};
+
struct mlx5e_rep_priv {
struct mlx5_eswitch_rep *rep;
struct mlx5e_neigh_update_table neigh_update;
struct net_device *netdev;
struct mlx5_flow_handle *vport_rx_rule;
struct list_head vport_sqs_list;
- struct rhashtable tc_ht; /* valid for uplink rep */
+ struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
};
static inline
@@ -129,6 +149,8 @@ struct mlx5e_encap_entry {
struct net_device *out_dev;
int tunnel_type;
+ int tunnel_hlen;
+ int reformat_type;
u8 flags;
char *encap_header;
int encap_size;
@@ -140,16 +162,12 @@ struct mlx5e_rep_sq {
};
void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev);
-void mlx5e_register_vport_reps(struct mlx5e_priv *priv);
-void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv);
+void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev);
+void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev);
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
-int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp);
-bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id);
-
-int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
@@ -158,12 +176,17 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e);
void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+
+bool mlx5e_eswitch_rep(struct net_device *netdev);
+
#else /* CONFIG_MLX5_ESWITCH */
-static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {}
-static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {}
static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
#endif
+static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv)
+{
+ return (MLX5_ESWITCH_MANAGER(priv->mdev) && priv->ppriv);
+}
#endif /* __MLX5E_REP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 0b5ef6d4e815..1d0bb5ff8c26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -554,9 +554,9 @@ static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
mlx5_cqwq_pop(&cq->wq);
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
netdev_WARN_ONCE(cq->channel->netdev,
- "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own);
+ "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe));
return;
}
@@ -898,7 +898,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
prefetchw(va); /* xdp_frame data area */
prefetch(data);
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
return NULL;
}
@@ -930,7 +930,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
u16 byte_cnt = cqe_bcnt - headlen;
struct sk_buff *skb;
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
return NULL;
}
@@ -1154,7 +1154,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
wi->consumed_strides += cstrides;
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
goto mpwrq_cqe_out;
}
@@ -1190,7 +1190,6 @@ mpwrq_cqe_out:
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
{
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
- struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
struct mlx5_cqe64 *cqe;
int work_done = 0;
@@ -1221,15 +1220,8 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
out:
- if (xdpsq->doorbell) {
- mlx5e_xmit_xdp_doorbell(xdpsq);
- xdpsq->doorbell = false;
- }
-
- if (xdpsq->redirect_flush) {
- xdp_do_flush_map();
- xdpsq->redirect_flush = false;
- }
+ if (rq->xdp_prog)
+ mlx5e_xdp_rx_poll_complete(rq);
mlx5_cqwq_update_db_record(&cq->wq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 4337afd610d7..d3fe48ff9da9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
+#include "lib/mlx5.h"
#include "en.h"
#include "en_accel/ipsec.h"
#include "en_accel/tls.h"
@@ -480,7 +481,10 @@ static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data,
return idx;
}
-static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
+#define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \
+ (MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1)
+
+void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
{
struct mlx5e_pport_stats *pstats = &priv->stats.pport;
struct mlx5_core_dev *mdev = priv->mdev;
@@ -488,6 +492,9 @@ static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
void *out;
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
MLX5_SET(ppcnt_reg, in, local_port, 1);
out = pstats->IEEE_802_3_counters;
MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
@@ -600,6 +607,9 @@ static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv)
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
void *out;
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
MLX5_SET(ppcnt_reg, in, local_port, 1);
out = pstats->RFC_2819_counters;
MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
@@ -934,7 +944,7 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
};
static const struct counter_desc pport_pfc_stall_stats_desc[] = {
- { "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
+ { "tx_pause_storm_warning_events", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
{ "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) },
};
@@ -1075,6 +1085,9 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
int prio;
void *out;
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
MLX5_SET(ppcnt_reg, in, local_port, 1);
MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
@@ -1086,13 +1099,13 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
}
static const struct counter_desc mlx5e_pme_status_desc[] = {
- { "module_unplug", 8 },
+ { "module_unplug", sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED },
};
static const struct counter_desc mlx5e_pme_error_desc[] = {
- { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */
- { "module_high_temp", 48 }, /* high temperature */
- { "module_bad_shorted", 56 }, /* bad or shorted cable/module */
+ { "module_bus_stuck", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK },
+ { "module_high_temp", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE },
+ { "module_bad_shorted", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE },
};
#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc)
@@ -1120,15 +1133,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
int idx)
{
- struct mlx5_priv *mlx5_priv = &priv->mdev->priv;
+ struct mlx5_pme_stats pme_stats;
int i;
+ mlx5_get_pme_stats(priv->mdev, &pme_stats);
+
for (i = 0; i < NUM_PME_STATUS_STATS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+ data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters,
mlx5e_pme_status_desc, i);
for (i = 0; i < NUM_PME_ERR_STATS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+ data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters,
mlx5e_pme_error_desc, i);
return idx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 3ff69ddae2d3..fe91ec06e3c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -278,5 +278,6 @@ extern const struct mlx5e_stats_grp mlx5e_stats_grps[];
extern const int mlx5e_num_stats_grps;
void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv);
+void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv);
#endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9dabe9d4b279..cae6c6d48984 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -44,15 +44,15 @@
#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_pedit.h>
#include <net/tc_act/tc_csum.h>
-#include <net/vxlan.h>
#include <net/arp.h>
#include "en.h"
#include "en_rep.h"
#include "en_tc.h"
#include "eswitch.h"
-#include "lib/vxlan.h"
#include "fs_core.h"
#include "en/port.h"
+#include "en/tc_tun.h"
+#include "lib/devcom.h"
struct mlx5_nic_flow_attr {
u32 action;
@@ -69,25 +69,54 @@ struct mlx5_nic_flow_attr {
enum {
MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS,
MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS,
- MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE),
- MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1),
- MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2),
- MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
- MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
- MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5),
+ MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD,
+ MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD,
+ MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE),
+ MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1),
+ MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2),
+ MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3),
+ MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4),
};
#define MLX5E_TC_MAX_SPLITS 1
+/* Helper struct for accessing a struct containing list_head array.
+ * Containing struct
+ * |- Helper array
+ * [0] Helper item 0
+ * |- list_head item 0
+ * |- index (0)
+ * [1] Helper item 1
+ * |- list_head item 1
+ * |- index (1)
+ * To access the containing struct from one of the list_head items:
+ * 1. Get the helper item from the list_head item using
+ * helper item =
+ * container_of(list_head item, helper struct type, list_head field)
+ * 2. Get the contining struct from the helper item and its index in the array:
+ * containing struct =
+ * container_of(helper item, containing struct type, helper field[index])
+ */
+struct encap_flow_item {
+ struct list_head list;
+ int index;
+};
+
struct mlx5e_tc_flow {
struct rhash_head node;
struct mlx5e_priv *priv;
u64 cookie;
u16 flags;
struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
- struct list_head encap; /* flows sharing the same encap ID */
+ /* Flow can be associated with multiple encap IDs.
+ * The number of encaps is bounded by the number of supported
+ * destinations.
+ */
+ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_flow *peer_flow;
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
struct list_head hairpin; /* flows sharing the same hairpin */
+ struct list_head peer; /* flows with peer flow */
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
@@ -95,11 +124,12 @@ struct mlx5e_tc_flow {
};
struct mlx5e_tc_flow_parse_attr {
- struct ip_tunnel_info tun_info;
+ struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct net_device *filter_dev;
struct mlx5_flow_spec spec;
int num_mod_hdr_actions;
void *mod_hdr_actions;
- int mirred_ifindex;
+ int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
};
#define MLX5E_TC_TABLE_NUM_GROUPS 4
@@ -316,7 +346,7 @@ static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
for (i = 0; i < sz; i++) {
ix = i;
- if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR)
+ if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
ix = mlx5e_bits_invert(i, ilog2(sz));
ix = indirection_rqt[ix];
rqn = hp->pair->rqn[ix];
@@ -360,13 +390,15 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
void *tirc;
for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
+
memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+ mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
err = mlx5_core_create_tir(hp->func_mdev, in,
MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
@@ -569,7 +601,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct netlink_ext_ack *extack)
{
- int peer_ifindex = parse_attr->mirred_ifindex;
+ int peer_ifindex = parse_attr->mirred_ifindex[0];
struct mlx5_hairpin_params params;
struct mlx5_core_dev *peer_mdev;
struct mlx5e_hairpin_entry *hpe;
@@ -802,7 +834,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
mlx5_del_flow_rules(flow->rule[0]);
mlx5_fc_destroy(priv->mdev, counter);
- if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) {
+ if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) {
mlx5_destroy_flow_table(priv->fs.tc.t);
priv->fs.tc.t = NULL;
}
@@ -815,14 +847,15 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
}
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow);
+ struct mlx5e_tc_flow *flow, int out_index);
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct ip_tunnel_info *tun_info,
struct net_device *mirred_dev,
struct net_device **encap_dev,
struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack);
+ struct netlink_ext_ack *extack,
+ int out_index);
static struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
@@ -836,7 +869,7 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
if (IS_ERR(rule))
return rule;
- if (attr->mirror_count) {
+ if (attr->split_count) {
flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
if (IS_ERR(flow->rule[1])) {
mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
@@ -855,7 +888,7 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
{
flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
- if (attr->mirror_count)
+ if (attr->split_count)
mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
@@ -871,7 +904,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- slow_attr->mirror_count = 0;
+ slow_attr->split_count = 0;
slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
@@ -888,7 +921,7 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
{
memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- slow_attr->mirror_count = 0;
+ slow_attr->split_count = 0;
slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
flow->flags &= ~MLX5E_TC_FLOW_SLOW;
@@ -909,6 +942,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
int err = 0, encap_err = 0;
+ int out_index;
if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) {
NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW");
@@ -927,20 +961,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
goto err_max_prio_chain;
}
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ int mirred_ifindex;
+
+ if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ mirred_ifindex = attr->parse_attr->mirred_ifindex[out_index];
out_dev = __dev_get_by_index(dev_net(priv->netdev),
- attr->parse_attr->mirred_ifindex);
- encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
- out_dev, &encap_dev, flow,
- extack);
- if (encap_err && encap_err != -EAGAIN) {
- err = encap_err;
+ mirred_ifindex);
+ err = mlx5e_attach_encap(priv,
+ &parse_attr->tun_info[out_index],
+ out_dev, &encap_dev, flow,
+ extack, out_index);
+ if (err && err != -EAGAIN)
goto err_attach_encap;
- }
+ if (err == -EAGAIN)
+ encap_err = err;
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
- attr->out_rep[attr->out_count] = rpriv->rep;
- attr->out_mdev[attr->out_count++] = out_priv->mdev;
+ attr->dests[out_index].rep = rpriv->rep;
+ attr->dests[out_index].mdev = out_priv->mdev;
}
err = mlx5_eswitch_add_vlan_action(esw, attr);
@@ -955,7 +996,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(esw->dev, true);
+ counter = mlx5_fc_create(attr->counter_dev, true);
if (IS_ERR(counter)) {
err = PTR_ERR(counter);
goto err_create_counter;
@@ -984,15 +1025,16 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
return 0;
err_add_rule:
- mlx5_fc_destroy(esw->dev, counter);
+ mlx5_fc_destroy(attr->counter_dev, counter);
err_create_counter:
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
err_mod_hdr:
mlx5_eswitch_del_vlan_action(esw, attr);
err_add_vlan:
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
- mlx5e_detach_encap(priv, flow);
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+ if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
+ mlx5e_detach_encap(priv, flow, out_index);
err_attach_encap:
err_max_prio_chain:
return err;
@@ -1004,6 +1046,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5_esw_flow_attr slow_attr;
+ int out_index;
if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
if (flow->flags & MLX5E_TC_FLOW_SLOW)
@@ -1014,16 +1057,16 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5_eswitch_del_vlan_action(esw, attr);
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
- mlx5e_detach_encap(priv, flow);
- kvfree(attr->parse_attr);
- }
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+ if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
+ mlx5e_detach_encap(priv, flow, out_index);
+ kvfree(attr->parse_attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- mlx5_fc_destroy(esw->dev, attr->counter);
+ mlx5_fc_destroy(attr->counter_dev, attr->counter);
}
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -1033,10 +1076,12 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
struct mlx5_esw_flow_attr slow_attr, *esw_attr;
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
+ struct encap_flow_item *efi;
struct mlx5e_tc_flow *flow;
int err;
- err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+ err = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
e->encap_size, e->encap_header,
MLX5_FLOW_NAMESPACE_FDB,
&e->encap_id);
@@ -1048,11 +1093,31 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(priv);
- list_for_each_entry(flow, &e->flows, encap) {
+ list_for_each_entry(efi, &e->flows, list) {
+ bool all_flow_encaps_valid = true;
+ int i;
+
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
esw_attr = flow->esw_attr;
- esw_attr->encap_id = e->encap_id;
spec = &esw_attr->parse_attr->spec;
+ esw_attr->dests[efi->index].encap_id = e->encap_id;
+ esw_attr->dests[efi->index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ /* Flow can be associated with multiple encap entries.
+ * Before offloading the flow verify that all of them have
+ * a valid neighbour.
+ */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
+ all_flow_encaps_valid = false;
+ break;
+ }
+ }
+ /* Do not offload flows with unresolved neighbors */
+ if (!all_flow_encaps_valid)
+ continue;
/* update from slow path rule to encap rule */
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
if (IS_ERR(rule)) {
@@ -1075,14 +1140,18 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
struct mlx5_esw_flow_attr slow_attr;
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
+ struct encap_flow_item *efi;
struct mlx5e_tc_flow *flow;
int err;
- list_for_each_entry(flow, &e->flows, encap) {
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
spec = &flow->esw_attr->parse_attr->spec;
/* update from encap rule to slow path rule */
rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr);
+ /* mark the flow's encap dest as non-valid */
+ flow->esw_attr->dests[efi->index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -1130,9 +1199,12 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
return;
list_for_each_entry(e, &nhe->encap_list, encap_list) {
+ struct encap_flow_item *efi;
if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
continue;
- list_for_each_entry(flow, &e->flows, encap) {
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow,
+ encaps[efi->index]);
if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
counter = mlx5e_tc_get_counter(flow);
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
@@ -1162,11 +1234,11 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
}
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
+ struct mlx5e_tc_flow *flow, int out_index)
{
- struct list_head *next = flow->encap.next;
+ struct list_head *next = flow->encaps[out_index].list.next;
- list_del(&flow->encap);
+ list_del(&flow->encaps[out_index].list);
if (list_empty(next)) {
struct mlx5e_encap_entry *e;
@@ -1182,49 +1254,55 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
}
}
-static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
+static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
- mlx5e_tc_del_fdb_flow(priv, flow);
- else
- mlx5e_tc_del_nic_flow(priv, flow);
+ struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+
+ if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
+ !(flow->flags & MLX5E_TC_FLOW_DUP))
+ return;
+
+ mutex_lock(&esw->offloads.peer_mutex);
+ list_del(&flow->peer);
+ mutex_unlock(&esw->offloads.peer_mutex);
+
+ flow->flags &= ~MLX5E_TC_FLOW_DUP;
+
+ mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
+ kvfree(flow->peer_flow);
+ flow->peer_flow = NULL;
}
-static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f)
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
- void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- outer_headers);
- void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- outer_headers);
- void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
+ struct mlx5_core_dev *dev = flow->priv->mdev;
+ struct mlx5_devcom *devcom = dev->priv.devcom;
+ struct mlx5_eswitch *peer_esw;
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return;
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
- struct flow_dissector_key_keyid *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_KEYID,
- f->key);
- struct flow_dissector_key_keyid *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_KEYID,
- f->mask);
- MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
- be32_to_cpu(mask->keyid));
- MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
- be32_to_cpu(key->keyid));
+ __mlx5e_tc_del_fdb_peer_flow(flow);
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
+
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+ mlx5e_tc_del_fdb_peer_flow(flow);
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ } else {
+ mlx5e_tc_del_nic_flow(priv, flow);
}
}
+
static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f)
+ struct tc_cls_flower_offload *f,
+ struct net_device *filter_dev)
{
struct netlink_ext_ack *extack = f->common.extack;
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -1236,48 +1314,14 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
skb_flow_dissector_target(f->dissector,
FLOW_DISSECTOR_KEY_ENC_CONTROL,
f->key);
+ int err = 0;
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
- struct flow_dissector_key_ports *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_PORTS,
- f->key);
- struct flow_dissector_key_ports *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_PORTS,
- f->mask);
-
- /* Full udp dst port must be given */
- if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
- goto vxlan_match_offload_err;
-
- if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) &&
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
- parse_vxlan_attr(spec, f);
- else {
- NL_SET_ERR_MSG_MOD(extack,
- "port isn't an offloaded vxlan udp dport");
- netdev_warn(priv->netdev,
- "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
- return -EOPNOTSUPP;
- }
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- udp_dport, ntohs(mask->dst));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- udp_dport, ntohs(key->dst));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- udp_sport, ntohs(mask->src));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- udp_sport, ntohs(key->src));
- } else { /* udp dst port must be given */
-vxlan_match_offload_err:
+ err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
+ headers_c, headers_v);
+ if (err) {
NL_SET_ERR_MSG_MOD(extack,
- "IP tunnel decap offload supported only for vxlan, must set UDP dport");
- netdev_warn(priv->netdev,
- "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
- return -EOPNOTSUPP;
+ "failed to parse tunnel attributes");
+ return err;
}
if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
@@ -1381,6 +1425,7 @@ vxlan_match_offload_err:
static int __parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f,
+ struct net_device *filter_dev,
u8 *match_level)
{
struct netlink_ext_ack *extack = f->common.extack;
@@ -1432,7 +1477,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
switch (key->addr_type) {
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
- if (parse_tunnel_attr(priv, spec, f))
+ if (parse_tunnel_attr(priv, spec, f, filter_dev))
return -EOPNOTSUPP;
break;
default:
@@ -1774,7 +1819,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
static int parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f)
+ struct tc_cls_flower_offload *f,
+ struct net_device *filter_dev)
{
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5_core_dev *dev = priv->mdev;
@@ -1784,7 +1830,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
u8 match_level;
int err;
- err = __parse_cls_flower(priv, spec, f, &match_level);
+ err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level);
if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
rep = rpriv->rep;
@@ -2137,7 +2183,6 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
{
const struct tc_action *a;
bool modify_ip_header;
- LIST_HEAD(actions);
u8 htype, ip_proto;
void *headers_v;
u16 ethertype;
@@ -2226,7 +2271,6 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
{
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
const struct tc_action *a;
- LIST_HEAD(actions);
u32 action = 0;
int err, i;
@@ -2269,7 +2313,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
same_hw_devs(priv, netdev_priv(peer_dev))) {
- parse_attr->mirred_ifindex = peer_dev->ifindex;
+ parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
@@ -2318,45 +2362,6 @@ static inline int hash_encap_info(struct ip_tunnel_key *key)
return jhash(key, sizeof(*key), 0);
}
-static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct flowi4 *fl4,
- struct neighbour **out_n,
- u8 *out_ttl)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_rep_priv *uplink_rpriv;
- struct rtable *rt;
- struct neighbour *n = NULL;
-
-#if IS_ENABLED(CONFIG_INET)
- int ret;
-
- rt = ip_route_output_key(dev_net(mirred_dev), fl4);
- ret = PTR_ERR_OR_ZERO(rt);
- if (ret)
- return ret;
-#else
- return -EOPNOTSUPP;
-#endif
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- /* if the egress device isn't on the same HW e-switch, we use the uplink */
- if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
- *out_dev = uplink_rpriv->netdev;
- else
- *out_dev = rt->dst.dev;
-
- if (!(*out_ttl))
- *out_ttl = ip4_dst_hoplimit(&rt->dst);
- n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
- ip_rt_put(rt);
- if (!n)
- return -ENOMEM;
-
- *out_n = n;
- return 0;
-}
static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
struct net_device *peer_netdev)
@@ -2372,377 +2377,24 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
(peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS));
}
-static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct flowi6 *fl6,
- struct neighbour **out_n,
- u8 *out_ttl)
-{
- struct neighbour *n = NULL;
- struct dst_entry *dst;
-
-#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
- struct mlx5e_rep_priv *uplink_rpriv;
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- int ret;
-
- ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
- fl6);
- if (ret < 0)
- return ret;
-
- if (!(*out_ttl))
- *out_ttl = ip6_dst_hoplimit(dst);
-
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- /* if the egress device isn't on the same HW e-switch, we use the uplink */
- if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
- *out_dev = uplink_rpriv->netdev;
- else
- *out_dev = dst->dev;
-#else
- return -EOPNOTSUPP;
-#endif
-
- n = dst_neigh_lookup(dst, &fl6->daddr);
- dst_release(dst);
- if (!n)
- return -ENOMEM;
-
- *out_n = n;
- return 0;
-}
-
-static void gen_vxlan_header_ipv4(struct net_device *out_dev,
- char buf[], int encap_size,
- unsigned char h_dest[ETH_ALEN],
- u8 tos, u8 ttl,
- __be32 daddr,
- __be32 saddr,
- __be16 udp_dst_port,
- __be32 vx_vni)
-{
- struct ethhdr *eth = (struct ethhdr *)buf;
- struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
- struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
- struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
-
- memset(buf, 0, encap_size);
-
- ether_addr_copy(eth->h_dest, h_dest);
- ether_addr_copy(eth->h_source, out_dev->dev_addr);
- eth->h_proto = htons(ETH_P_IP);
-
- ip->daddr = daddr;
- ip->saddr = saddr;
-
- ip->tos = tos;
- ip->ttl = ttl;
- ip->protocol = IPPROTO_UDP;
- ip->version = 0x4;
- ip->ihl = 0x5;
-
- udp->dest = udp_dst_port;
- vxh->vx_flags = VXLAN_HF_VNI;
- vxh->vx_vni = vxlan_vni_field(vx_vni);
-}
-
-static void gen_vxlan_header_ipv6(struct net_device *out_dev,
- char buf[], int encap_size,
- unsigned char h_dest[ETH_ALEN],
- u8 tos, u8 ttl,
- struct in6_addr *daddr,
- struct in6_addr *saddr,
- __be16 udp_dst_port,
- __be32 vx_vni)
-{
- struct ethhdr *eth = (struct ethhdr *)buf;
- struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
- struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
- struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
-
- memset(buf, 0, encap_size);
-
- ether_addr_copy(eth->h_dest, h_dest);
- ether_addr_copy(eth->h_source, out_dev->dev_addr);
- eth->h_proto = htons(ETH_P_IPV6);
-
- ip6_flow_hdr(ip6h, tos, 0);
- /* the HW fills up ipv6 payload len */
- ip6h->nexthdr = IPPROTO_UDP;
- ip6h->hop_limit = ttl;
- ip6h->daddr = *daddr;
- ip6h->saddr = *saddr;
-
- udp->dest = udp_dst_port;
- vxh->vx_flags = VXLAN_HF_VNI;
- vxh->vx_vni = vxlan_vni_field(vx_vni);
-}
-
-static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct mlx5e_encap_entry *e)
-{
- int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
- int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
- struct net_device *out_dev;
- struct neighbour *n = NULL;
- struct flowi4 fl4 = {};
- u8 nud_state, tos, ttl;
- char *encap_header;
- int err;
- if (max_encap_size < ipv4_encap_size) {
- mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
- ipv4_encap_size, max_encap_size);
- return -EOPNOTSUPP;
- }
-
- encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
- if (!encap_header)
- return -ENOMEM;
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- fl4.flowi4_proto = IPPROTO_UDP;
- fl4.fl4_dport = tun_key->tp_dst;
- break;
- default:
- err = -EOPNOTSUPP;
- goto free_encap;
- }
-
- tos = tun_key->tos;
- ttl = tun_key->ttl;
-
- fl4.flowi4_tos = tun_key->tos;
- fl4.daddr = tun_key->u.ipv4.dst;
- fl4.saddr = tun_key->u.ipv4.src;
-
- err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
- &fl4, &n, &ttl);
- if (err)
- goto free_encap;
-
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
-
- /* It's importent to add the neigh to the hash table before checking
- * the neigh validity state. So if we'll get a notification, in case the
- * neigh changes it's validity state, we would find the relevant neigh
- * in the hash.
- */
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
- if (err)
- goto free_encap;
-
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- gen_vxlan_header_ipv4(out_dev, encap_header,
- ipv4_encap_size, e->h_dest, tos, ttl,
- fl4.daddr,
- fl4.saddr, tun_key->tp_dst,
- tunnel_id_to_key32(tun_key->tun_id));
- break;
- default:
- err = -EOPNOTSUPP;
- goto destroy_neigh_entry;
- }
- e->encap_size = ipv4_encap_size;
- e->encap_header = encap_header;
-
- if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
- err = -EAGAIN;
- goto out;
- }
-
- err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
- ipv4_encap_size, encap_header,
- MLX5_FLOW_NAMESPACE_FDB,
- &e->encap_id);
- if (err)
- goto destroy_neigh_entry;
-
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- neigh_release(n);
- return err;
-
-destroy_neigh_entry:
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
-free_encap:
- kfree(encap_header);
-out:
- if (n)
- neigh_release(n);
- return err;
-}
-
-static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct mlx5e_encap_entry *e)
-{
- int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
- int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
- struct net_device *out_dev;
- struct neighbour *n = NULL;
- struct flowi6 fl6 = {};
- u8 nud_state, tos, ttl;
- char *encap_header;
- int err;
-
- if (max_encap_size < ipv6_encap_size) {
- mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
- ipv6_encap_size, max_encap_size);
- return -EOPNOTSUPP;
- }
-
- encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
- if (!encap_header)
- return -ENOMEM;
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- fl6.flowi6_proto = IPPROTO_UDP;
- fl6.fl6_dport = tun_key->tp_dst;
- break;
- default:
- err = -EOPNOTSUPP;
- goto free_encap;
- }
-
- tos = tun_key->tos;
- ttl = tun_key->ttl;
-
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
- fl6.daddr = tun_key->u.ipv6.dst;
- fl6.saddr = tun_key->u.ipv6.src;
-
- err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
- &fl6, &n, &ttl);
- if (err)
- goto free_encap;
-
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
-
- /* It's importent to add the neigh to the hash table before checking
- * the neigh validity state. So if we'll get a notification, in case the
- * neigh changes it's validity state, we would find the relevant neigh
- * in the hash.
- */
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
- if (err)
- goto free_encap;
-
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- gen_vxlan_header_ipv6(out_dev, encap_header,
- ipv6_encap_size, e->h_dest, tos, ttl,
- &fl6.daddr,
- &fl6.saddr, tun_key->tp_dst,
- tunnel_id_to_key32(tun_key->tun_id));
- break;
- default:
- err = -EOPNOTSUPP;
- goto destroy_neigh_entry;
- }
-
- e->encap_size = ipv6_encap_size;
- e->encap_header = encap_header;
-
- if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
- err = -EAGAIN;
- goto out;
- }
-
- err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
- ipv6_encap_size, encap_header,
- MLX5_FLOW_NAMESPACE_FDB,
- &e->encap_id);
- if (err)
- goto destroy_neigh_entry;
-
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- neigh_release(n);
- return err;
-
-destroy_neigh_entry:
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
-free_encap:
- kfree(encap_header);
-out:
- if (n)
- neigh_release(n);
- return err;
-}
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct ip_tunnel_info *tun_info,
struct net_device *mirred_dev,
struct net_device **encap_dev,
struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ int out_index)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
unsigned short family = ip_tunnel_info_af(tun_info);
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct ip_tunnel_key *key = &tun_info->key;
struct mlx5e_encap_entry *e;
- int tunnel_type, err = 0;
uintptr_t hash_key;
bool found = false;
-
- /* udp dst port must be set */
- if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
- goto vxlan_encap_offload_err;
-
- /* setting udp src port isn't supported */
- if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
-vxlan_encap_offload_err:
- NL_SET_ERR_MSG_MOD(extack,
- "must set udp dst port and not set udp src port");
- netdev_warn(priv->netdev,
- "must set udp dst port and not set udp src port\n");
- return -EOPNOTSUPP;
- }
-
- if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
- tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
- } else {
- NL_SET_ERR_MSG_MOD(extack,
- "port isn't an offloaded vxlan udp dport");
- netdev_warn(priv->netdev,
- "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
- return -EOPNOTSUPP;
- }
+ int err = 0;
hash_key = hash_encap_info(key);
@@ -2763,13 +2415,16 @@ vxlan_encap_offload_err:
return -ENOMEM;
e->tun_info = *tun_info;
- e->tunnel_type = tunnel_type;
+ err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
+ if (err)
+ goto out_err;
+
INIT_LIST_HEAD(&e->flows);
if (family == AF_INET)
- err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e);
+ err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
else if (family == AF_INET6)
- err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e);
+ err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
if (err && err != -EAGAIN)
goto out_err;
@@ -2777,12 +2432,15 @@ vxlan_encap_offload_err:
hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
attach_flow:
- list_add(&flow->encap, &e->flows);
+ list_add(&flow->encaps[out_index].list, &e->flows);
+ flow->encaps[out_index].index = out_index;
*encap_dev = e->out_dev;
- if (e->flags & MLX5_ENCAP_ENTRY_VALID)
- attr->encap_id = e->encap_id;
- else
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ attr->dests[out_index].encap_id = e->encap_id;
+ attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ } else {
err = -EAGAIN;
+ }
return err;
@@ -2851,7 +2509,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct ip_tunnel_info *info = NULL;
const struct tc_action *a;
- LIST_HEAD(actions);
bool encap = false;
u32 action = 0;
int err, i;
@@ -2876,7 +2533,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
return err;
action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- attr->mirror_count = attr->out_count;
+ attr->split_count = attr->out_count;
continue;
}
@@ -2894,6 +2551,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
struct net_device *out_dev;
out_dev = tcf_mirred_dev(a);
+ if (!out_dev) {
+ /* out_dev is NULL when filters with
+ * non-existing mirred device are replayed to
+ * the driver.
+ */
+ return -EINVAL;
+ }
if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
NL_SET_ERR_MSG_MOD(extack,
@@ -2903,23 +2567,47 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
return -EOPNOTSUPP;
}
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
if (switchdev_port_same_parent_id(priv->netdev,
out_dev) ||
is_merged_eswitch_dev(priv, out_dev)) {
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev);
+
+ if (uplink_upper &&
+ netif_is_lag_master(uplink_upper) &&
+ uplink_upper == out_dev)
+ out_dev = uplink_dev;
+
+ if (!mlx5e_eswitch_rep(out_dev))
+ return -EOPNOTSUPP;
+
out_priv = netdev_priv(out_dev);
rpriv = out_priv->ppriv;
- attr->out_rep[attr->out_count] = rpriv->rep;
- attr->out_mdev[attr->out_count++] = out_priv->mdev;
+ attr->dests[attr->out_count].rep = rpriv->rep;
+ attr->dests[attr->out_count].mdev = out_priv->mdev;
+ attr->out_count++;
} else if (encap) {
- parse_attr->mirred_ifindex = out_dev->ifindex;
- parse_attr->tun_info = *info;
+ parse_attr->mirred_ifindex[attr->out_count] =
+ out_dev->ifindex;
+ parse_attr->tun_info[attr->out_count] = *info;
+ encap = false;
attr->parse_attr = parse_attr;
- action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- /* attr->out_rep is resolved when we handle encap */
+ attr->dests[attr->out_count].flags |=
+ MLX5_ESW_DEST_ENCAP;
+ attr->out_count++;
+ /* attr->dests[].rep is resolved when we
+ * handle encap
+ */
+ } else if (parse_attr->filter_dev != priv->netdev) {
+ /* All mlx5 devices are called to configure
+ * high level device filters. Therefore, the
+ * *attempt* to install a filter on invalid
+ * eswitch should not trigger an explicit error
+ */
+ return -EINVAL;
} else {
NL_SET_ERR_MSG_MOD(extack,
"devices are not on same switch HW, can't offload forwarding");
@@ -2936,7 +2624,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
encap = true;
else
return -EOPNOTSUPP;
- attr->mirror_count = attr->out_count;
continue;
}
@@ -2946,7 +2633,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
if (err)
return err;
- attr->mirror_count = attr->out_count;
+ attr->split_count = attr->out_count;
continue;
}
@@ -2988,7 +2675,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
}
- if (attr->mirror_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+ if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
NL_SET_ERR_MSG_MOD(extack,
"current firmware doesn't support split rule for port mirroring");
netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
@@ -3007,6 +2694,11 @@ static void get_flags(int flags, u16 *flow_flags)
if (flags & MLX5E_TC_EGRESS)
__flow_flags |= MLX5E_TC_FLOW_EGRESS;
+ if (flags & MLX5E_TC_ESW_OFFLOAD)
+ __flow_flags |= MLX5E_TC_FLOW_ESWITCH;
+ if (flags & MLX5E_TC_NIC_OFFLOAD)
+ __flow_flags |= MLX5E_TC_FLOW_NIC;
+
*flow_flags = __flow_flags;
}
@@ -3017,18 +2709,32 @@ static const struct rhashtable_params tc_ht_params = {
.automatic_shrinking = true,
};
-static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
+static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *uplink_rpriv;
- if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) {
+ if (flags & MLX5E_TC_ESW_OFFLOAD) {
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- return &uplink_rpriv->tc_ht;
- } else
+ return &uplink_rpriv->uplink_priv.tc_ht;
+ } else /* NIC offload */
return &priv->fs.tc.ht;
}
+static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ bool is_rep_ingress = attr->in_rep->vport != FDB_UPLINK_VPORT &&
+ flow->flags & MLX5E_TC_FLOW_INGRESS;
+ bool act_is_encap = !!(attr->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
+ bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS);
+
+ return esw_paired && mlx5_lag_is_sriov(attr->in_mdev) &&
+ (is_rep_ingress || act_is_encap);
+}
+
static int
mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
struct tc_cls_flower_offload *f, u16 flow_flags,
@@ -3050,10 +2756,6 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
flow->flags = flow_flags;
flow->priv = priv;
- err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
- if (err)
- goto err_free;
-
*__flow = flow;
*__parse_attr = parse_attr;
@@ -3066,12 +2768,16 @@ err_free:
}
static int
-mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f,
- u16 flow_flags,
- struct mlx5e_tc_flow **__flow)
+__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct net_device *filter_dev,
+ struct mlx5_eswitch_rep *in_rep,
+ struct mlx5_core_dev *in_mdev,
+ struct mlx5e_tc_flow **__flow)
{
struct netlink_ext_ack *extack = f->common.extack;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_tc_flow *flow;
int attr_size, err;
@@ -3082,6 +2788,12 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
&parse_attr, &flow);
if (err)
goto out;
+ parse_attr->filter_dev = filter_dev;
+ flow->esw_attr->parse_attr = parse_attr;
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
+ f, filter_dev);
+ if (err)
+ goto err_free;
flow->esw_attr->chain = f->common.chain_index;
flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
@@ -3089,14 +2801,19 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
+ flow->esw_attr->in_rep = in_rep;
+ flow->esw_attr->in_mdev = in_mdev;
+
+ if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
+ MLX5_COUNTER_SOURCE_ESWITCH)
+ flow->esw_attr->counter_dev = in_mdev;
+ else
+ flow->esw_attr->counter_dev = priv->mdev;
+
err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
if (err)
goto err_free;
- if (!(flow->esw_attr->action &
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT))
- kvfree(parse_attr);
-
*__flow = flow;
return 0;
@@ -3108,10 +2825,92 @@ out:
return err;
}
+static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_priv *priv = flow->priv, *peer_priv;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_rep_priv *peer_urpriv;
+ struct mlx5e_tc_flow *peer_flow;
+ struct mlx5_core_dev *in_mdev;
+ int err = 0;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return -ENODEV;
+
+ peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
+ peer_priv = netdev_priv(peer_urpriv->netdev);
+
+ /* in_mdev is assigned of which the packet originated from.
+ * So packets redirected to uplink use the same mdev of the
+ * original flow and packets redirected from uplink use the
+ * peer mdev.
+ */
+ if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT)
+ in_mdev = peer_priv->mdev;
+ else
+ in_mdev = priv->mdev;
+
+ parse_attr = flow->esw_attr->parse_attr;
+ err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags,
+ parse_attr->filter_dev,
+ flow->esw_attr->in_rep, in_mdev, &peer_flow);
+ if (err)
+ goto out;
+
+ flow->peer_flow = peer_flow;
+ flow->flags |= MLX5E_TC_FLOW_DUP;
+ mutex_lock(&esw->offloads.peer_mutex);
+ list_add_tail(&flow->peer, &esw->offloads.peer_flows);
+ mutex_unlock(&esw->offloads.peer_mutex);
+
+out:
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ return err;
+}
+
+static int
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct net_device *filter_dev,
+ struct mlx5e_tc_flow **__flow)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *in_rep = rpriv->rep;
+ struct mlx5_core_dev *in_mdev = priv->mdev;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
+ in_mdev, &flow);
+ if (err)
+ goto out;
+
+ if (is_peer_flow_needed(flow)) {
+ err = mlx5e_tc_add_fdb_peer_flow(f, flow);
+ if (err) {
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ goto out;
+ }
+ }
+
+ *__flow = flow;
+
+ return 0;
+
+out:
+ return err;
+}
+
static int
mlx5e_add_nic_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
u16 flow_flags,
+ struct net_device *filter_dev,
struct mlx5e_tc_flow **__flow)
{
struct netlink_ext_ack *extack = f->common.extack;
@@ -3130,6 +2929,12 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
if (err)
goto out;
+ parse_attr->filter_dev = filter_dev;
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
+ f, filter_dev);
+ if (err)
+ goto err_free;
+
err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack);
if (err)
goto err_free;
@@ -3155,6 +2960,7 @@ static int
mlx5e_tc_add_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
int flags,
+ struct net_device *filter_dev,
struct mlx5e_tc_flow **flow)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -3167,18 +2973,20 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
if (esw && esw->mode == SRIOV_OFFLOADS)
- err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow);
+ err = mlx5e_add_fdb_flow(priv, f, flow_flags,
+ filter_dev, flow);
else
- err = mlx5e_add_nic_flow(priv, f, flow_flags, flow);
+ err = mlx5e_add_nic_flow(priv, f, flow_flags,
+ filter_dev, flow);
return err;
}
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags)
{
struct netlink_ext_ack *extack = f->common.extack;
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
struct mlx5e_tc_flow *flow;
int err = 0;
@@ -3192,7 +3000,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
goto out;
}
- err = mlx5e_tc_add_flow(priv, f, flags, &flow);
+ err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
if (err)
goto out;
@@ -3220,10 +3028,10 @@ static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
return false;
}
-int mlx5e_delete_flower(struct mlx5e_priv *priv,
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags)
{
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
struct mlx5e_tc_flow *flow;
flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
@@ -3239,10 +3047,12 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv,
return 0;
}
-int mlx5e_stats_flower(struct mlx5e_priv *priv,
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags)
{
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+ struct mlx5_eswitch *peer_esw;
struct mlx5e_tc_flow *flow;
struct mlx5_fc *counter;
u64 bytes;
@@ -3262,6 +3072,27 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ goto out;
+
+ if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
+ (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
+ u64 bytes2;
+ u64 packets2;
+ u64 lastuse2;
+
+ counter = mlx5e_tc_get_counter(flow->peer_flow);
+ mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
+
+ bytes += bytes2;
+ packets += packets2;
+ lastuse = max_t(u64, lastuse, lastuse2);
+ }
+
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
+out:
tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
return 0;
@@ -3350,7 +3181,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
if (tc->netdevice_nb.notifier_call)
unregister_netdevice_notifier(&tc->netdevice_nb);
- rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
+ rhashtable_destroy(&tc->ht);
if (!IS_ERR_OR_NULL(tc->t)) {
mlx5_destroy_flow_table(tc->t);
@@ -3368,9 +3199,17 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
}
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags)
{
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
return atomic_read(&tc_ht->nelems);
}
+
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
+{
+ struct mlx5e_tc_flow *flow, *tmp;
+
+ list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
+ __mlx5e_tc_del_fdb_peer_flow(flow);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 49436bf3b80a..d2d87f978c06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -42,7 +42,9 @@
enum {
MLX5E_TC_INGRESS = BIT(0),
MLX5E_TC_EGRESS = BIT(1),
- MLX5E_TC_LAST_EXPORTED_BIT = 1,
+ MLX5E_TC_NIC_OFFLOAD = BIT(2),
+ MLX5E_TC_ESW_OFFLOAD = BIT(3),
+ MLX5E_TC_LAST_EXPORTED_BIT = 3,
};
int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
@@ -51,12 +53,12 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
-int mlx5e_delete_flower(struct mlx5e_priv *priv,
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
-int mlx5e_stats_flower(struct mlx5e_priv *priv,
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
struct mlx5e_encap_entry;
@@ -68,12 +70,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
struct mlx5e_neigh_hash_entry;
void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv);
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags);
+
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
-static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; }
+static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) { return 0; }
#endif
#endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 6dacaeba2fbf..598ad7e4d5c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -127,7 +127,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
else
#endif
if (skb_vlan_tag_present(skb))
- up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+ up = skb_vlan_tag_get_prio(skb);
/* channel_ix can be larger than num_channels since
* dev->num_real_tx_queues = num_channels * num_tc
@@ -459,9 +459,10 @@ static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq);
netdev_err(sq->channel->netdev,
- "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
- sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome,
- err_cqe->vendor_err_synd);
+ "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+ sq->cq.mcq.cqn, ci, sq->sqn,
+ get_cqe_opcode((struct mlx5_cqe64 *)err_cqe),
+ err_cqe->syndrome, err_cqe->vendor_err_synd);
mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
}
@@ -507,7 +508,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
- if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
+ if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
&sq->state)) {
mlx5e_dump_error_cqe(sq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 85d517360157..b4af5e19f6ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -76,6 +76,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
napi);
struct mlx5e_ch_stats *ch_stats = c->stats;
+ struct mlx5e_rq *rq = &c->rq;
bool busy = false;
int work_done = 0;
int i;
@@ -85,17 +86,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
for (i = 0; i < c->num_tc; i++)
busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
- busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
+ busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL);
if (c->xdp)
- busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq);
+ busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq);
if (likely(budget)) { /* budget=0 means: don't poll rx rings */
- work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
+ work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
busy |= work_done == budget;
}
- busy |= c->rq.post_wqes(&c->rq);
+ busy |= c->rq.post_wqes(rq);
if (busy) {
if (likely(mlx5e_channel_no_affinity_change(c)))
@@ -115,9 +116,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_cq_arm(&c->sq[i].cq);
}
- mlx5e_handle_rx_dim(&c->rq);
+ mlx5e_handle_rx_dim(rq);
- mlx5e_cq_arm(&c->rq.cq);
+ mlx5e_cq_arm(&rq->cq);
mlx5e_cq_arm(&c->icosq.cq);
mlx5e_cq_arm(&c->xdpsq.cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c1e1a16a9b07..ee04aab65a9f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -31,20 +31,22 @@
*/
#include <linux/interrupt.h>
+#include <linux/notifier.h>
#include <linux/module.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
#include <linux/mlx5/cmd.h>
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "fpga/core.h"
#include "eswitch.h"
#include "lib/clock.h"
#include "diag/fw_tracer.h"
enum {
- MLX5_EQE_SIZE = sizeof(struct mlx5_eqe),
MLX5_EQE_OWNER_INIT_VAL = 0x1,
};
@@ -55,14 +57,32 @@ enum {
};
enum {
- MLX5_NUM_SPARE_EQE = 0x80,
- MLX5_NUM_ASYNC_EQE = 0x1000,
- MLX5_NUM_CMD_EQE = 32,
- MLX5_NUM_PF_DRAIN = 64,
+ MLX5_EQ_DOORBEL_OFFSET = 0x40,
};
-enum {
- MLX5_EQ_DOORBEL_OFFSET = 0x40,
+struct mlx5_irq_info {
+ cpumask_var_t mask;
+ char name[MLX5_MAX_IRQ_NAME];
+ void *context; /* dev_id provided to request_irq */
+};
+
+struct mlx5_eq_table {
+ struct list_head comp_eqs_list;
+ struct mlx5_eq pages_eq;
+ struct mlx5_eq cmd_eq;
+ struct mlx5_eq async_eq;
+
+ struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
+
+ /* Since CQ DB is stored in async_eq */
+ struct mlx5_nb cq_err_nb;
+
+ struct mutex lock; /* sync async eqs creations */
+ int num_comp_vectors;
+ struct mlx5_irq_info *irq_info;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
@@ -78,17 +98,6 @@ enum {
(1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \
(1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
-struct map_eq_in {
- u64 mask;
- u32 reserved;
- u32 unmap_eqn;
-};
-
-struct cre_des_eq {
- u8 reserved[15];
- u8 eqn;
-};
-
static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
{
u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
@@ -99,213 +108,56 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
-static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
-{
- return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
-}
-
-static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
-{
- struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
-
- return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
-}
-
-static const char *eqe_type_str(u8 type)
-{
- switch (type) {
- case MLX5_EVENT_TYPE_COMP:
- return "MLX5_EVENT_TYPE_COMP";
- case MLX5_EVENT_TYPE_PATH_MIG:
- return "MLX5_EVENT_TYPE_PATH_MIG";
- case MLX5_EVENT_TYPE_COMM_EST:
- return "MLX5_EVENT_TYPE_COMM_EST";
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- return "MLX5_EVENT_TYPE_SQ_DRAINED";
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
- case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
- return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
- case MLX5_EVENT_TYPE_CQ_ERROR:
- return "MLX5_EVENT_TYPE_CQ_ERROR";
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
- case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
- return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
- case MLX5_EVENT_TYPE_INTERNAL_ERROR:
- return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
- case MLX5_EVENT_TYPE_PORT_CHANGE:
- return "MLX5_EVENT_TYPE_PORT_CHANGE";
- case MLX5_EVENT_TYPE_GPIO_EVENT:
- return "MLX5_EVENT_TYPE_GPIO_EVENT";
- case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
- return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
- case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
- return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
- case MLX5_EVENT_TYPE_REMOTE_CONFIG:
- return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
- case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
- return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
- case MLX5_EVENT_TYPE_STALL_EVENT:
- return "MLX5_EVENT_TYPE_STALL_EVENT";
- case MLX5_EVENT_TYPE_CMD:
- return "MLX5_EVENT_TYPE_CMD";
- case MLX5_EVENT_TYPE_PAGE_REQUEST:
- return "MLX5_EVENT_TYPE_PAGE_REQUEST";
- case MLX5_EVENT_TYPE_PAGE_FAULT:
- return "MLX5_EVENT_TYPE_PAGE_FAULT";
- case MLX5_EVENT_TYPE_PPS_EVENT:
- return "MLX5_EVENT_TYPE_PPS_EVENT";
- case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
- return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
- case MLX5_EVENT_TYPE_FPGA_ERROR:
- return "MLX5_EVENT_TYPE_FPGA_ERROR";
- case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
- return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
- case MLX5_EVENT_TYPE_GENERAL_EVENT:
- return "MLX5_EVENT_TYPE_GENERAL_EVENT";
- case MLX5_EVENT_TYPE_DEVICE_TRACER:
- return "MLX5_EVENT_TYPE_DEVICE_TRACER";
- default:
- return "Unrecognized event";
- }
-}
-
-static enum mlx5_dev_event port_subtype_event(u8 subtype)
-{
- switch (subtype) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- return MLX5_DEV_EVENT_PORT_DOWN;
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- return MLX5_DEV_EVENT_PORT_UP;
- case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
- return MLX5_DEV_EVENT_PORT_INITIALIZED;
- case MLX5_PORT_CHANGE_SUBTYPE_LID:
- return MLX5_DEV_EVENT_LID_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
- return MLX5_DEV_EVENT_PKEY_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_GUID:
- return MLX5_DEV_EVENT_GUID_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
- return MLX5_DEV_EVENT_CLIENT_REREG;
- }
- return -1;
-}
-
-static void eq_update_ci(struct mlx5_eq *eq, int arm)
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
{
- __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
- u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
-
- __raw_writel((__force u32)cpu_to_be32(val), addr);
- /* We still want ordering, just not swabbing, so add a barrier */
- mb();
-}
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *cq = NULL;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static void eqe_pf_action(struct work_struct *work)
-{
- struct mlx5_pagefault *pfault = container_of(work,
- struct mlx5_pagefault,
- work);
- struct mlx5_eq *eq = pfault->eq;
+ spin_lock(&table->lock);
+ cq = radix_tree_lookup(&table->tree, cqn);
+ if (likely(cq))
+ mlx5_cq_hold(cq);
+ spin_unlock(&table->lock);
- mlx5_core_page_fault(eq->dev, pfault);
- mempool_free(pfault, eq->pf_ctx.pool);
+ return cq;
}
-static void eq_pf_process(struct mlx5_eq *eq)
+static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
{
- struct mlx5_core_dev *dev = eq->dev;
- struct mlx5_eqe_page_fault *pf_eqe;
- struct mlx5_pagefault *pfault;
+ struct mlx5_eq_comp *eq_comp = eq_ptr;
+ struct mlx5_eq *eq = eq_ptr;
struct mlx5_eqe *eqe;
int set_ci = 0;
+ u32 cqn = -1;
while ((eqe = next_eqe_sw(eq))) {
- pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
- if (!pfault) {
- schedule_work(&eq->pf_ctx.work);
- break;
- }
-
+ struct mlx5_core_cq *cq;
+ /* Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
dma_rmb();
- pf_eqe = &eqe->data.page_fault;
- pfault->event_subtype = eqe->sub_type;
- pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
-
- mlx5_core_dbg(dev,
- "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
- eqe->sub_type, pfault->bytes_committed);
-
- switch (eqe->sub_type) {
- case MLX5_PFAULT_SUBTYPE_RDMA:
- /* RDMA based event */
- pfault->type =
- be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
- pfault->token =
- be32_to_cpu(pf_eqe->rdma.pftype_token) &
- MLX5_24BIT_MASK;
- pfault->rdma.r_key =
- be32_to_cpu(pf_eqe->rdma.r_key);
- pfault->rdma.packet_size =
- be16_to_cpu(pf_eqe->rdma.packet_length);
- pfault->rdma.rdma_op_len =
- be32_to_cpu(pf_eqe->rdma.rdma_op_len);
- pfault->rdma.rdma_va =
- be64_to_cpu(pf_eqe->rdma.rdma_va);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
- pfault->type, pfault->token,
- pfault->rdma.r_key);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
- pfault->rdma.rdma_op_len,
- pfault->rdma.rdma_va);
- break;
-
- case MLX5_PFAULT_SUBTYPE_WQE:
- /* WQE based event */
- pfault->type =
- (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
- pfault->token =
- be32_to_cpu(pf_eqe->wqe.token);
- pfault->wqe.wq_num =
- be32_to_cpu(pf_eqe->wqe.pftype_wq) &
- MLX5_24BIT_MASK;
- pfault->wqe.wqe_index =
- be16_to_cpu(pf_eqe->wqe.wqe_index);
- pfault->wqe.packet_size =
- be16_to_cpu(pf_eqe->wqe.packet_length);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
- pfault->type, pfault->token,
- pfault->wqe.wq_num,
- pfault->wqe.wqe_index);
- break;
-
- default:
- mlx5_core_warn(dev,
- "Unsupported page fault event sub-type: 0x%02hhx\n",
- eqe->sub_type);
- /* Unsupported page faults should still be
- * resolved by the page fault handler
- */
+ /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
+ cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+
+ cq = mlx5_eq_cq_get(eq, cqn);
+ if (likely(cq)) {
+ ++cq->arm_sn;
+ cq->comp(cq);
+ mlx5_cq_put(cq);
+ } else {
+ mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
}
- pfault->eq = eq;
- INIT_WORK(&pfault->work, eqe_pf_action);
- queue_work(eq->pf_ctx.wq, &pfault->work);
-
++eq->cons_index;
++set_ci;
+ /* The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events. We
+ * create our EQs with MLX5_NUM_SPARE_EQE extra
+ * entries, so we must update our consumer index at
+ * least that often.
+ */
if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
eq_update_ci(eq, 0);
set_ci = 0;
@@ -313,165 +165,41 @@ static void eq_pf_process(struct mlx5_eq *eq)
}
eq_update_ci(eq, 1);
-}
-static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
-{
- struct mlx5_eq *eq = eq_ptr;
- unsigned long flags;
-
- if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
- eq_pf_process(eq);
- spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
- } else {
- schedule_work(&eq->pf_ctx.work);
- }
+ if (cqn != -1)
+ tasklet_schedule(&eq_comp->tasklet_ctx.task);
return IRQ_HANDLED;
}
-/* mempool_refill() was proposed but unfortunately wasn't accepted
- * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
- * Chip workaround.
+/* Some architectures don't latch interrupts when they are disabled, so using
+ * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
+ * avoid losing them. It is not recommended to use it, unless this is the last
+ * resort.
*/
-static void mempool_refill(mempool_t *pool)
-{
- while (pool->curr_nr < pool->min_nr)
- mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
-}
-
-static void eq_pf_action(struct work_struct *work)
-{
- struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
-
- mempool_refill(eq->pf_ctx.pool);
-
- spin_lock_irq(&eq->pf_ctx.lock);
- eq_pf_process(eq);
- spin_unlock_irq(&eq->pf_ctx.lock);
-}
-
-static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
-{
- spin_lock_init(&pf_ctx->lock);
- INIT_WORK(&pf_ctx->work, eq_pf_action);
-
- pf_ctx->wq = alloc_ordered_workqueue(name,
- WQ_MEM_RECLAIM);
- if (!pf_ctx->wq)
- return -ENOMEM;
-
- pf_ctx->pool = mempool_create_kmalloc_pool
- (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
- if (!pf_ctx->pool)
- goto err_wq;
-
- return 0;
-err_wq:
- destroy_workqueue(pf_ctx->wq);
- return -ENOMEM;
-}
-
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
- u32 wq_num, u8 type, int error)
-{
- u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
-
- MLX5_SET(page_fault_resume_in, in, opcode,
- MLX5_CMD_OP_PAGE_FAULT_RESUME);
- MLX5_SET(page_fault_resume_in, in, error, !!error);
- MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
- MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
- MLX5_SET(page_fault_resume_in, in, token, token);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
-#endif
-
-static void general_event_handler(struct mlx5_core_dev *dev,
- struct mlx5_eqe *eqe)
-{
- switch (eqe->sub_type) {
- case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
- if (dev->event)
- dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
- break;
- default:
- mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
- eqe->sub_type);
- }
-}
-
-static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
- struct mlx5_eqe *eqe)
-{
- u64 value_lsb;
- u64 value_msb;
-
- value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
- value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
-
- mlx5_core_warn(dev,
- "High temperature on sensors with bit set %llx %llx",
- value_msb, value_lsb);
-}
-
-/* caller must eventually call mlx5_cq_put on the returned cq */
-static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
-{
- struct mlx5_cq_table *table = &eq->cq_table;
- struct mlx5_core_cq *cq = NULL;
-
- spin_lock(&table->lock);
- cq = radix_tree_lookup(&table->tree, cqn);
- if (likely(cq))
- mlx5_cq_hold(cq);
- spin_unlock(&table->lock);
-
- return cq;
-}
-
-static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
-{
- struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
- if (unlikely(!cq)) {
- mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
- return;
- }
-
- ++cq->arm_sn;
-
- cq->comp(cq);
-
- mlx5_cq_put(cq);
-}
-
-static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
{
- struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
- if (unlikely(!cq)) {
- mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
- return;
- }
+ u32 count_eqe;
- cq->event(cq, event_type);
+ disable_irq(eq->core.irqn);
+ count_eqe = eq->core.cons_index;
+ mlx5_eq_comp_int(eq->core.irqn, eq);
+ count_eqe = eq->core.cons_index - count_eqe;
+ enable_irq(eq->core.irqn);
- mlx5_cq_put(cq);
+ return count_eqe;
}
-static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
+static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
{
struct mlx5_eq *eq = eq_ptr;
- struct mlx5_core_dev *dev = eq->dev;
+ struct mlx5_eq_table *eqt;
+ struct mlx5_core_dev *dev;
struct mlx5_eqe *eqe;
int set_ci = 0;
- u32 cqn = -1;
- u32 rsn;
- u8 port;
+
+ dev = eq->dev;
+ eqt = dev->priv.eq_table;
while ((eqe = next_eqe_sw(eq))) {
/*
@@ -480,116 +208,12 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
*/
dma_rmb();
- mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
- eq->eqn, eqe_type_str(eqe->type));
- switch (eqe->type) {
- case MLX5_EVENT_TYPE_COMP:
- cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
- mlx5_eq_cq_completion(eq, cqn);
- break;
- case MLX5_EVENT_TYPE_DCT_DRAINED:
- rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
- rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
- mlx5_rsc_event(dev, rsn, eqe->type);
- break;
- case MLX5_EVENT_TYPE_PATH_MIG:
- case MLX5_EVENT_TYPE_COMM_EST:
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
- rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
- mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
- eqe_type_str(eqe->type), eqe->type, rsn);
- mlx5_rsc_event(dev, rsn, eqe->type);
- break;
-
- case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
- case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
- rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
- mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
- eqe_type_str(eqe->type), eqe->type, rsn);
- mlx5_srq_event(dev, rsn, eqe->type);
- break;
-
- case MLX5_EVENT_TYPE_CMD:
- mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
- break;
+ if (likely(eqe->type < MLX5_EVENT_TYPE_MAX))
+ atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
+ else
+ mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type);
- case MLX5_EVENT_TYPE_PORT_CHANGE:
- port = (eqe->data.port.port >> 4) & 0xf;
- switch (eqe->sub_type) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- case MLX5_PORT_CHANGE_SUBTYPE_LID:
- case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
- case MLX5_PORT_CHANGE_SUBTYPE_GUID:
- case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
- case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
- if (dev->event)
- dev->event(dev, port_subtype_event(eqe->sub_type),
- (unsigned long)port);
- break;
- default:
- mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
- port, eqe->sub_type);
- }
- break;
- case MLX5_EVENT_TYPE_CQ_ERROR:
- cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
- mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
- cqn, eqe->data.cq_err.syndrome);
- mlx5_eq_cq_event(eq, cqn, eqe->type);
- break;
-
- case MLX5_EVENT_TYPE_PAGE_REQUEST:
- {
- u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
- s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
-
- mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
- func_id, npages);
- mlx5_core_req_pages_handler(dev, func_id, npages);
- }
- break;
-
- case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
- mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
- break;
-
- case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
- mlx5_port_module_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_PPS_EVENT:
- mlx5_pps_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_FPGA_ERROR:
- case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
- mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
- break;
-
- case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
- mlx5_temp_warning_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_GENERAL_EVENT:
- general_event_handler(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_DEVICE_TRACER:
- mlx5_fw_tracer_event(dev, eqe);
- break;
-
- default:
- mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
- eqe->type, eq->eqn);
- break;
- }
+ atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
++eq->cons_index;
++set_ci;
@@ -608,30 +232,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
eq_update_ci(eq, 1);
- if (cqn != -1)
- tasklet_schedule(&eq->tasklet_ctx.task);
-
return IRQ_HANDLED;
}
-/* Some architectures don't latch interrupts when they are disabled, so using
- * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
- * avoid losing them. It is not recommended to use it, unless this is the last
- * resort.
- */
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
-{
- u32 count_eqe;
-
- disable_irq(eq->irqn);
- count_eqe = eq->cons_index;
- mlx5_eq_int(eq->irqn, eq);
- count_eqe = eq->cons_index - count_eqe;
- enable_irq(eq->irqn);
-
- return count_eqe;
-}
-
static void init_eq_buf(struct mlx5_eq *eq)
{
struct mlx5_eqe *eqe;
@@ -643,39 +246,35 @@ static void init_eq_buf(struct mlx5_eq *eq)
}
}
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name,
- enum mlx5_eq_type type)
+static int
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+ struct mlx5_eq_param *param)
{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
struct mlx5_cq_table *cq_table = &eq->cq_table;
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
struct mlx5_priv *priv = &dev->priv;
- irq_handler_t handler;
+ u8 vecidx = param->index;
__be64 *pas;
void *eqc;
int inlen;
u32 *in;
int err;
+ if (eq_table->irq_info[vecidx].context)
+ return -EEXIST;
+
/* Init CQ table */
memset(cq_table, 0, sizeof(*cq_table));
spin_lock_init(&cq_table->lock);
INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
- eq->type = type;
- eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+ eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
eq->cons_index = 0;
err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
if (err)
return err;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (type == MLX5_EQ_TYPE_PF)
- handler = mlx5_eq_pf_int;
- else
-#endif
- handler = mlx5_eq_int;
-
init_eq_buf(eq);
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
@@ -691,7 +290,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
mlx5_fill_page_array(&eq->buf, pas);
MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
- MLX5_SET64(create_eq_in, in, event_bitmask, mask);
+ MLX5_SET64(create_eq_in, in, event_bitmask, param->mask);
eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
@@ -704,15 +303,17 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
if (err)
goto err_in;
- snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
+ snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
name, pci_name(dev->pdev));
+ eq_table->irq_info[vecidx].context = param->context;
+ eq->vecidx = vecidx;
eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
eq->irqn = pci_irq_vector(dev->pdev, vecidx);
eq->dev = dev;
eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
- err = request_irq(eq->irqn, handler, 0,
- priv->irq_info[vecidx].name, eq);
+ err = request_irq(eq->irqn, param->handler, 0,
+ eq_table->irq_info[vecidx].name, param->context);
if (err)
goto err_eq;
@@ -720,21 +321,6 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
if (err)
goto err_irq;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (type == MLX5_EQ_TYPE_PF) {
- err = init_pf_ctx(&eq->pf_ctx, name);
- if (err)
- goto err_irq;
- } else
-#endif
- {
- INIT_LIST_HEAD(&eq->tasklet_ctx.list);
- INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
- spin_lock_init(&eq->tasklet_ctx.lock);
- tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
- (unsigned long)&eq->tasklet_ctx);
- }
-
/* EQs are created in ARMED state
*/
eq_update_ci(eq, 1);
@@ -756,27 +342,25 @@ err_buf:
return err;
}
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ struct mlx5_irq_info *irq_info;
int err;
+ irq_info = &eq_table->irq_info[eq->vecidx];
+
mlx5_debug_eq_remove(dev, eq);
- free_irq(eq->irqn, eq);
+
+ free_irq(eq->irqn, irq_info->context);
+ irq_info->context = NULL;
+
err = mlx5_cmd_destroy_eq(dev, eq->eqn);
if (err)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
synchronize_irq(eq->irqn);
- if (eq->type == MLX5_EQ_TYPE_COMP) {
- tasklet_disable(&eq->tasklet_ctx.task);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- } else if (eq->type == MLX5_EQ_TYPE_PF) {
- cancel_work_sync(&eq->pf_ctx.work);
- destroy_workqueue(eq->pf_ctx.wq);
- mempool_destroy(eq->pf_ctx.pool);
-#endif
- }
mlx5_buf_free(dev, &eq->buf);
return err;
@@ -816,28 +400,106 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
return 0;
}
-int mlx5_eq_init(struct mlx5_core_dev *dev)
+int mlx5_eq_table_init(struct mlx5_core_dev *dev)
{
- int err;
+ struct mlx5_eq_table *eq_table;
+ int i, err;
- spin_lock_init(&dev->priv.eq_table.lock);
+ eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
+ if (!eq_table)
+ return -ENOMEM;
+
+ dev->priv.eq_table = eq_table;
err = mlx5_eq_debugfs_init(dev);
+ if (err)
+ goto kvfree_eq_table;
+
+ mutex_init(&eq_table->lock);
+ for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
+ ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
+ return 0;
+
+kvfree_eq_table:
+ kvfree(eq_table);
+ dev->priv.eq_table = NULL;
return err;
}
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
{
mlx5_eq_debugfs_cleanup(dev);
+ kvfree(dev->priv.eq_table);
}
-int mlx5_start_eqs(struct mlx5_core_dev *dev)
+/* Async EQs */
+
+static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+ struct mlx5_eq *eq, struct mlx5_eq_param *param)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ int err;
+
+ mutex_lock(&eq_table->lock);
+ if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
+ err = -ENOSPC;
+ goto unlock;
+ }
+
+ err = create_map_eq(dev, eq, name, param);
+unlock:
+ mutex_unlock(&eq_table->lock);
+ return err;
+}
+
+static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
+ mutex_lock(&eq_table->lock);
+ err = destroy_unmap_eq(dev, eq);
+ mutex_unlock(&eq_table->lock);
+ return err;
+}
+
+static int cq_err_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_eq_table *eqt;
+ struct mlx5_core_cq *cq;
+ struct mlx5_eqe *eqe;
+ struct mlx5_eq *eq;
+ u32 cqn;
+
+ /* type == MLX5_EVENT_TYPE_CQ_ERROR */
+
+ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
+ eq = &eqt->async_eq;
+ eqe = data;
+
+ cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
+ cqn, eqe->data.cq_err.syndrome);
+
+ cq = mlx5_eq_cq_get(eq, cqn);
+ if (unlikely(!cq)) {
+ mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+ return NOTIFY_OK;
+ }
+
+ cq->event(cq, type);
+
+ mlx5_cq_put(cq);
+
+ return NOTIFY_OK;
+}
+
+static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
+{
+ u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+
if (MLX5_VPORT_MANAGER(dev))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
@@ -865,127 +527,521 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
- err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
- MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
- "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
+ if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
+
+ return async_event_mask;
+}
+
+static int create_async_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_param param = {};
+ int err;
+
+ MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
+ mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_CMD_IDX,
+ .mask = 1ull << MLX5_EVENT_TYPE_CMD,
+ .nent = MLX5_NUM_CMD_EQE,
+ .context = &table->cmd_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
- return err;
+ goto err0;
}
mlx5_cmd_use_events(dev);
- err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
- MLX5_NUM_ASYNC_EQE, async_event_mask,
- "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_ASYNC_IDX,
+ .mask = gather_async_events_mask(dev),
+ .nent = MLX5_NUM_ASYNC_EQE,
+ .context = &table->async_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
goto err1;
}
- err = mlx5_create_map_eq(dev, &table->pages_eq,
- MLX5_EQ_VEC_PAGES,
- /* TODO: sriov max_vf + */ 1,
- 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
- MLX5_EQ_TYPE_ASYNC);
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_PAGEREQ_IDX,
+ .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
+ .nent = /* TODO: sriov max_vf + */ 1,
+ .context = &table->pages_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
goto err2;
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg)) {
- err = mlx5_create_map_eq(dev, &table->pfault_eq,
- MLX5_EQ_VEC_PFAULT,
- MLX5_NUM_ASYNC_EQE,
- 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
- "mlx5_page_fault_eq",
- MLX5_EQ_TYPE_PF);
- if (err) {
- mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
- err);
- goto err3;
- }
- }
-
- return err;
-err3:
- mlx5_destroy_unmap_eq(dev, &table->pages_eq);
-#else
return err;
-#endif
err2:
- mlx5_destroy_unmap_eq(dev, &table->async_eq);
+ destroy_async_eq(dev, &table->async_eq);
err1:
mlx5_cmd_use_polling(dev);
- mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ destroy_async_eq(dev, &table->cmd_eq);
+err0:
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
return err;
}
-void mlx5_stop_eqs(struct mlx5_core_dev *dev)
+static void destroy_async_eqs(struct mlx5_core_dev *dev)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq_table *table = dev->priv.eq_table;
int err;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg)) {
- err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
- if (err)
- mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
- err);
- }
-#endif
-
- err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+ err = destroy_async_eq(dev, &table->pages_eq);
if (err)
mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
err);
- err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+ err = destroy_async_eq(dev, &table->async_eq);
if (err)
mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
err);
+
mlx5_cmd_use_polling(dev);
- err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ err = destroy_async_eq(dev, &table->cmd_eq);
if (err)
mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
err);
+
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
}
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
- u32 *out, int outlen)
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
{
- u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
+ return &dev->priv.eq_table->async_eq;
+}
- MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
- MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
+{
+ synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+}
+
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
+{
+ synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+}
+
+/* Generic EQ API for mlx5_core consumers
+ * Needed For RDMA ODP EQ for now
+ */
+struct mlx5_eq *
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+ struct mlx5_eq_param *param)
+{
+ struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
+ int err;
+
+ if (!eq)
+ return ERR_PTR(-ENOMEM);
+
+ err = create_async_eq(dev, name, eq, param);
+ if (err) {
+ kvfree(eq);
+ eq = ERR_PTR(err);
+ }
+
+ return eq;
+}
+EXPORT_SYMBOL(mlx5_eq_create_generic);
+
+int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ if (IS_ERR(eq))
+ return -EINVAL;
+
+ err = destroy_async_eq(dev, eq);
+ if (err)
+ goto out;
+
+ kvfree(eq);
+out:
+ return err;
+}
+EXPORT_SYMBOL(mlx5_eq_destroy_generic);
+
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
+{
+ u32 ci = eq->cons_index + cc;
+ struct mlx5_eqe *eqe;
+
+ eqe = get_eqe(eq, ci & (eq->nent - 1));
+ eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
+ /* Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ if (eqe)
+ dma_rmb();
+
+ return eqe;
+}
+EXPORT_SYMBOL(mlx5_eq_get_eqe);
+
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val;
+
+ eq->cons_index += cc;
+ val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+EXPORT_SYMBOL(mlx5_eq_update_ci);
+
+/* Completion EQs */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ struct mlx5_priv *priv = &mdev->priv;
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+ struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+ if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
+ mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+ return -ENOMEM;
+ }
+
+ cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+ irq_info->mask);
+
+ if (IS_ENABLED(CONFIG_SMP) &&
+ irq_set_affinity_hint(irq, irq_info->mask))
+ mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+ return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ struct mlx5_priv *priv = &mdev->priv;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+ struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+ irq_set_affinity_hint(irq, NULL);
+ free_cpumask_var(irq_info->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
+ err = set_comp_irq_affinity_hint(mdev, i);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ for (i--; i >= 0; i--)
+ clear_comp_irq_affinity_hint(mdev, i);
+
+ return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
+ clear_comp_irq_affinity_hint(mdev, i);
+}
+
+static void destroy_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+
+ clear_comp_irqs_affinity_hints(dev);
+
+#ifdef CONFIG_RFS_ACCEL
+ if (table->rmap) {
+ free_irq_cpu_rmap(table->rmap);
+ table->rmap = NULL;
+ }
+#endif
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ list_del(&eq->list);
+ if (destroy_unmap_eq(dev, &eq->core))
+ mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
+ eq->core.eqn);
+ tasklet_disable(&eq->tasklet_ctx.task);
+ kfree(eq);
+ }
+}
+
+static int create_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ char name[MLX5_MAX_IRQ_NAME];
+ struct mlx5_eq_comp *eq;
+ int ncomp_vec;
+ int nent;
+ int err;
+ int i;
+
+ INIT_LIST_HEAD(&table->comp_eqs_list);
+ ncomp_vec = table->num_comp_vectors;
+ nent = MLX5_COMP_EQ_SIZE;
+#ifdef CONFIG_RFS_ACCEL
+ table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
+ if (!table->rmap)
+ return -ENOMEM;
+#endif
+ for (i = 0; i < ncomp_vec; i++) {
+ int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+ struct mlx5_eq_param param = {};
+
+ eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ if (!eq) {
+ err = -ENOMEM;
+ goto clean;
+ }
+
+ INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+ INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+ spin_lock_init(&eq->tasklet_ctx.lock);
+ tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
+ (unsigned long)&eq->tasklet_ctx);
+
+#ifdef CONFIG_RFS_ACCEL
+ irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
+#endif
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+ param = (struct mlx5_eq_param) {
+ .index = vecidx,
+ .mask = 0,
+ .nent = nent,
+ .context = &eq->core,
+ .handler = mlx5_eq_comp_int
+ };
+ err = create_map_eq(dev, &eq->core, name, &param);
+ if (err) {
+ kfree(eq);
+ goto clean;
+ }
+ mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
+ /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
+ list_add_tail(&eq->list, &table->comp_eqs_list);
+ }
+
+ err = set_comp_irq_affinity_hints(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+ goto clean;
+ }
+
+ return 0;
+
+clean:
+ destroy_comp_eqs(dev);
+ return err;
+}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+ unsigned int *irqn)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+ int err = -ENOENT;
+ int i = 0;
+
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ if (i++ == vector) {
+ *eqn = eq->core.eqn;
+ *irqn = eq->core.irqn;
+ err = 0;
+ break;
+ }
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_vector2eqn);
+
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
+{
+ return dev->priv.eq_table->num_comp_vectors;
+}
+EXPORT_SYMBOL(mlx5_comp_vectors_count);
+
+struct cpumask *
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
+{
+ /* TODO: consider irq_get_affinity_mask(irq) */
+ return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+}
+EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
+
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ return dev->priv.eq_table->rmap;
+#else
+ return NULL;
+#endif
+}
+
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq;
+
+ list_for_each_entry(eq, &table->comp_eqs_list, list) {
+ if (eq->core.eqn == eqn)
+ return eq;
+ }
+
+ return ERR_PTR(-ENOENT);
}
/* This function should only be called after mlx5_cmd_force_teardown_hca */
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq;
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int i, max_eqs;
+
+ clear_comp_irqs_affinity_hints(dev);
#ifdef CONFIG_RFS_ACCEL
- if (dev->rmap) {
- free_irq_cpu_rmap(dev->rmap);
- dev->rmap = NULL;
+ if (table->rmap) {
+ free_irq_cpu_rmap(table->rmap);
+ table->rmap = NULL;
}
#endif
- list_for_each_entry(eq, &table->comp_eqs_list, list)
- free_irq(eq->irqn, eq);
-
- free_irq(table->pages_eq.irqn, &table->pages_eq);
- free_irq(table->async_eq.irqn, &table->async_eq);
- free_irq(table->cmd_eq.irqn, &table->cmd_eq);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg))
- free_irq(table->pfault_eq.irqn, &table->pfault_eq);
-#endif
+
+ mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+ max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
+ for (i = max_eqs - 1; i >= 0; i--) {
+ if (!table->irq_info[i].context)
+ continue;
+ free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
+ table->irq_info[i].context = NULL;
+ }
+ mutex_unlock(&table->lock);
+ pci_free_irq_vectors(dev->pdev);
+}
+
+static int alloc_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_eq_table *table = priv->eq_table;
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int nvec;
+ int err;
+
+ nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+ MLX5_EQ_VEC_COMP_BASE;
+ nvec = min_t(int, nvec, num_eqs);
+ if (nvec <= MLX5_EQ_VEC_COMP_BASE)
+ return -ENOMEM;
+
+ table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
+ if (!table->irq_info)
+ return -ENOMEM;
+
+ nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
+ nvec, PCI_IRQ_MSIX);
+ if (nvec < 0) {
+ err = nvec;
+ goto err_free_irq_info;
+ }
+
+ table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
+
+ return 0;
+
+err_free_irq_info:
+ kfree(table->irq_info);
+ return err;
+}
+
+static void free_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+
pci_free_irq_vectors(dev->pdev);
+ kfree(priv->eq_table->irq_info);
+}
+
+int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ err = alloc_irq_vectors(dev);
+ if (err) {
+ mlx5_core_err(dev, "alloc irq vectors failed\n");
+ return err;
+ }
+
+ err = create_async_eqs(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to create async EQs\n");
+ goto err_async_eqs;
+ }
+
+ err = create_comp_eqs(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to create completion EQs\n");
+ goto err_comp_eqs;
+ }
+
+ return 0;
+err_comp_eqs:
+ destroy_async_eqs(dev);
+err_async_eqs:
+ free_irq_vectors(dev);
+ return err;
+}
+
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
+{
+ destroy_comp_eqs(dev);
+ destroy_async_eqs(dev);
+ free_irq_vectors(dev);
+}
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+ if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
+}
+
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+ if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index d004957328f9..a44ea7b85614 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -36,6 +36,7 @@
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "eswitch.h"
#include "fs_core.h"
@@ -1567,7 +1568,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
/* Mark this vport as disabled to discard new events */
vport->enabled = false;
- synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC));
/* Wait for current already scheduled events to complete */
flush_workqueue(esw->work_queue);
/* Disable events from this vport */
@@ -1593,10 +1593,25 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
mutex_unlock(&esw->state_lock);
}
+static int eswitch_vport_event(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb);
+ struct mlx5_eqe *eqe = data;
+ struct mlx5_vport *vport;
+ u16 vport_num;
+
+ vport_num = be16_to_cpu(eqe->data.vport_change.vport_num);
+ vport = &esw->vports[vport_num];
+ if (vport->enabled)
+ queue_work(esw->work_queue, &vport->vport_change_handler);
+
+ return NOTIFY_OK;
+}
+
/* Public E-Switch API */
#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
-
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
{
int err;
@@ -1615,13 +1630,16 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
+
esw->mode = mode;
+ mlx5_lag_update(esw->dev);
+
if (mode == SRIOV_LEGACY) {
err = esw_create_legacy_fdb_table(esw);
} else {
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-
err = esw_offloads_init(esw, nvfs + 1);
}
@@ -1640,6 +1658,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
for (i = 0; i <= nvfs; i++)
esw_enable_vport(esw, i, enabled_events);
+ if (mode == SRIOV_LEGACY) {
+ MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+ mlx5_eq_notifier_register(esw->dev, &esw->nb);
+ }
+
esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
esw->enabled_vports);
return 0;
@@ -1647,8 +1670,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
abort:
esw->mode = SRIOV_NONE;
- if (mode == SRIOV_OFFLOADS)
+ if (mode == SRIOV_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
+ }
return err;
}
@@ -1669,6 +1694,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
mc_promisc = &esw->mc_promisc;
nvports = esw->enabled_vports;
+ if (esw->mode == SRIOV_LEGACY)
+ mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+
for (i = 0; i < esw->total_vports; i++)
esw_disable_vport(esw, i);
@@ -1685,8 +1713,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
old_mode = esw->mode;
esw->mode = SRIOV_NONE;
- if (old_mode == SRIOV_OFFLOADS)
+ mlx5_lag_update(esw->dev);
+
+ if (old_mode == SRIOV_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
+ }
}
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@@ -1777,23 +1809,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
kfree(esw);
}
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
-{
- struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
- u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
- struct mlx5_vport *vport;
-
- if (!esw) {
- pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
- vport_num);
- return;
- }
-
- vport = &esw->vports[vport_num];
- if (vport->enabled)
- queue_work(esw->work_queue, &vport->vport_change_handler);
-}
-
/* Vport Administration */
#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
@@ -2219,3 +2234,14 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
+
+bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
+{
+ if ((dev0->priv.eswitch->mode == SRIOV_NONE &&
+ dev1->priv.eswitch->mode == SRIOV_NONE) ||
+ (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
+ dev1->priv.eswitch->mode == SRIOV_OFFLOADS))
+ return true;
+
+ return false;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index aaafc9f17115..9c89eea9b2c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -143,6 +143,8 @@ struct mlx5_eswitch_fdb {
struct offloads_fdb {
struct mlx5_flow_table *slow_fdb;
struct mlx5_flow_group *send_to_vport_grp;
+ struct mlx5_flow_group *peer_miss_grp;
+ struct mlx5_flow_handle **peer_miss_rules;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_handle *miss_rule_uni;
struct mlx5_flow_handle *miss_rule_multi;
@@ -165,6 +167,8 @@ struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
struct mlx5_eswitch_rep *vport_reps;
+ struct list_head peer_flows;
+ struct mutex peer_mutex;
DECLARE_HASHTABLE(encap_tbl, 8);
DECLARE_HASHTABLE(mod_hdr_tbl, 8);
u8 inline_mode;
@@ -181,6 +185,7 @@ struct esw_mc_addr { /* SRIOV only */
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
struct mlx5_eswitch_fdb fdb_table;
struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
struct workqueue_struct *work_queue;
@@ -211,7 +216,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
@@ -281,13 +285,17 @@ enum mlx5_flow_match_level {
/* current maximum for flow based vport multicasting */
#define MLX5_MAX_FLOW_FWD_VPORTS 2
+enum {
+ MLX5_ESW_DEST_ENCAP = BIT(0),
+ MLX5_ESW_DEST_ENCAP_VALID = BIT(1),
+};
+
struct mlx5_esw_flow_attr {
struct mlx5_eswitch_rep *in_rep;
- struct mlx5_eswitch_rep *out_rep[MLX5_MAX_FLOW_FWD_VPORTS];
- struct mlx5_core_dev *out_mdev[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5_core_dev *in_mdev;
+ struct mlx5_core_dev *counter_dev;
- int mirror_count;
+ int split_count;
int out_count;
int action;
@@ -296,7 +304,12 @@ struct mlx5_esw_flow_attr {
u8 vlan_prio[MLX5_FS_VLAN_DEPTH];
u8 total_vlan;
bool vlan_handled;
- u32 encap_id;
+ struct {
+ u32 flags;
+ struct mlx5_eswitch_rep *rep;
+ struct mlx5_core_dev *mdev;
+ u32 encap_id;
+ } dests[MLX5_MAX_FLOW_FWD_VPORTS];
u32 mod_hdr_id;
u8 match_level;
struct mlx5_fc *counter;
@@ -338,6 +351,9 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
}
+bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1);
+
#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
#define esw_info(dev, format, ...) \
@@ -352,9 +368,9 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
#define FDB_MAX_CHAIN 1
#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 9eac137790f5..53065b6ae593 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -39,6 +39,7 @@
#include "eswitch.h"
#include "en.h"
#include "fs_core.h"
+#include "lib/devcom.h"
enum {
FDB_FAST_PATH = 0,
@@ -81,7 +82,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
{
struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
- bool mirror = !!(attr->mirror_count);
+ bool split = !!(attr->split_count);
struct mlx5_flow_handle *rule;
struct mlx5_flow_table *fdb;
int j, i = 0;
@@ -120,13 +121,21 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
dest[i].ft = ft;
i++;
} else {
- for (j = attr->mirror_count; j < attr->out_count; j++) {
+ for (j = attr->split_count; j < attr->out_count; j++) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->out_rep[j]->vport;
+ dest[i].vport.num = attr->dests[j].rep->vport;
dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
- dest[i].vport.vhca_id_valid =
- !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ dest[i].vport.flags |=
+ MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act.reformat_id = attr->dests[j].encap_id;
+ dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+ dest[i].vport.reformat_id =
+ attr->dests[j].encap_id;
+ }
i++;
}
}
@@ -163,10 +172,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_id = attr->mod_hdr_id;
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
- flow_act.reformat_id = attr->encap_id;
-
- fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split);
if (IS_ERR(fdb)) {
rule = ERR_CAST(fdb);
goto err_esw_get;
@@ -181,7 +187,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
return rule;
err_add_rule:
- esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
err_esw_get:
if (attr->dest_chain)
esw_put_prio_table(esw, attr->dest_chain, 1, 0);
@@ -215,12 +221,17 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- for (i = 0; i < attr->mirror_count; i++) {
+ for (i = 0; i < attr->split_count; i++) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->out_rep[i]->vport;
+ dest[i].vport.num = attr->dests[i].rep->vport;
dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->out_mdev[i], vhca_id);
- dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
+ dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+ dest[i].vport.reformat_id = attr->dests[i].encap_id;
+ }
}
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[i].ft = fwd_fdb,
@@ -268,7 +279,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
struct mlx5_esw_flow_attr *attr,
bool fwd_rule)
{
- bool mirror = (attr->mirror_count > 0);
+ bool split = (attr->split_count > 0);
mlx5_del_flow_rules(rule);
esw->offloads.num_flows--;
@@ -277,7 +288,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
esw_put_prio_table(esw, attr->chain, attr->prio, 1);
esw_put_prio_table(esw, attr->chain, attr->prio, 0);
} else {
- esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
if (attr->dest_chain)
esw_put_prio_table(esw, attr->dest_chain, 1, 0);
}
@@ -325,7 +336,7 @@ esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop)
struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
in_rep = attr->in_rep;
- out_rep = attr->out_rep[0];
+ out_rep = attr->dests[0].rep;
if (push)
vport = in_rep;
@@ -346,7 +357,7 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
goto out_notsupp;
in_rep = attr->in_rep;
- out_rep = attr->out_rep[0];
+ out_rep = attr->dests[0].rep;
if (push && in_rep->vport == FDB_UPLINK_VPORT)
goto out_notsupp;
@@ -398,7 +409,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
if (!push && !pop && fwd) {
/* tracks VF --> wire rules without vlan push action */
- if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) {
+ if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) {
vport->vlan_refcount++;
attr->vlan_handled = true;
}
@@ -458,7 +469,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
if (!push && !pop && fwd) {
/* tracks VF --> wire rules without vlan push action */
- if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT)
+ if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT)
vport->vlan_refcount--;
return 0;
@@ -531,6 +542,98 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
+static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_destination *dest)
+{
+ void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(peer_dev, vhca_id));
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest->vport.num = 0;
+ dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
+ dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+}
+
+static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
+ struct mlx5_core_dev *peer_dev)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_handle **flows;
+ struct mlx5_flow_handle *flow;
+ struct mlx5_flow_spec *spec;
+ /* total vports is the same for both e-switches */
+ int nvports = esw->total_vports;
+ void *misc;
+ int err, i;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ peer_miss_rules_setup(peer_dev, spec, &dest);
+
+ flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
+ if (!flows) {
+ err = -ENOMEM;
+ goto alloc_flows_err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ for (i = 1; i < nvports; i++) {
+ MLX5_SET(fte_match_set_misc, misc, source_port, i);
+ flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow)) {
+ err = PTR_ERR(flow);
+ esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
+ goto add_flow_err;
+ }
+ flows[i] = flow;
+ }
+
+ esw->fdb_table.offloads.peer_miss_rules = flows;
+
+ kvfree(spec);
+ return 0;
+
+add_flow_err:
+ for (i--; i > 0; i--)
+ mlx5_del_flow_rules(flows[i]);
+ kvfree(flows);
+alloc_flows_err:
+ kvfree(spec);
+ return err;
+}
+
+static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_handle **flows;
+ int i;
+
+ flows = esw->fdb_table.offloads.peer_miss_rules;
+
+ for (i = 1; i < esw->total_vports; i++)
+ mlx5_del_flow_rules(flows[i]);
+
+ kvfree(flows);
+}
+
static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
{
struct mlx5_flow_act flow_act = {0};
@@ -801,7 +904,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
esw->fdb_table.offloads.fdb_left[i] =
ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
- table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
+ table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2 +
+ esw->total_vports;
/* create the slow path fdb with encap set, so further table instances
* can be created at run time while VFs are probed if the FW allows that.
@@ -856,6 +960,34 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
}
esw->fdb_table.offloads.send_to_vport_grp = g;
+ /* create peer esw miss group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ ix + esw->total_vports - 1);
+ ix += esw->total_vports;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
+ goto peer_miss_err;
+ }
+ esw->fdb_table.offloads.peer_miss_grp = g;
+
/* create miss group */
memset(flow_group_in, 0, inlen);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -888,6 +1020,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
miss_rule_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
miss_err:
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+peer_miss_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
send_vport_err:
esw_destroy_offloads_fast_fdb_tables(esw);
@@ -907,6 +1041,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
@@ -1163,6 +1298,105 @@ err_reps:
return err;
}
+#define ESW_OFFLOADS_DEVCOM_PAIR (0)
+#define ESW_OFFLOADS_DEVCOM_UNPAIR (1)
+
+static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw)
+{
+ int err;
+
+ err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
+
+static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
+{
+ mlx5e_tc_clean_fdb_peer_flows(esw);
+ esw_del_fdb_peer_miss_rules(esw);
+}
+
+static int mlx5_esw_offloads_devcom_event(int event,
+ void *my_data,
+ void *event_data)
+{
+ struct mlx5_eswitch *esw = my_data;
+ struct mlx5_eswitch *peer_esw = event_data;
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+ int err;
+
+ switch (event) {
+ case ESW_OFFLOADS_DEVCOM_PAIR:
+ err = mlx5_esw_offloads_pair(esw, peer_esw);
+ if (err)
+ goto err_out;
+
+ err = mlx5_esw_offloads_pair(peer_esw, esw);
+ if (err)
+ goto err_pair;
+
+ mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
+ break;
+
+ case ESW_OFFLOADS_DEVCOM_UNPAIR:
+ if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+ break;
+
+ mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
+ mlx5_esw_offloads_unpair(peer_esw);
+ mlx5_esw_offloads_unpair(esw);
+ break;
+ }
+
+ return 0;
+
+err_pair:
+ mlx5_esw_offloads_unpair(esw);
+
+err_out:
+ mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d",
+ event, err);
+ return err;
+}
+
+static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+
+ INIT_LIST_HEAD(&esw->offloads.peer_flows);
+ mutex_init(&esw->offloads.peer_mutex);
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return;
+
+ mlx5_devcom_register_component(devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS,
+ mlx5_esw_offloads_devcom_event,
+ esw);
+
+ mlx5_devcom_send_event(devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS,
+ ESW_OFFLOADS_DEVCOM_PAIR, esw);
+}
+
+static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return;
+
+ mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
+ ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
+
+ mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
+
int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
{
int err;
@@ -1185,6 +1419,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
if (err)
goto err_reps;
+ esw_offloads_devcom_init(esw);
return 0;
err_reps:
@@ -1215,14 +1450,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
}
}
- /* enable back PF RoCE */
- mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-
return err;
}
void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
{
+ esw_offloads_devcom_cleanup(esw);
esw_offloads_unload_reps(esw, nvports);
esw_destroy_vport_rx_group(esw);
esw_destroy_offloads_table(esw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
new file mode 100644
index 000000000000..fbc42b7252a9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+
+struct mlx5_event_nb {
+ struct mlx5_nb nb;
+ void *ctx;
+};
+
+/* General events handlers for the low level mlx5_core driver
+ *
+ * Other Major feature specific events such as
+ * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
+ * separate notifiers callbacks, specifically by those mlx5 components.
+ */
+static int any_notifier(struct notifier_block *, unsigned long, void *);
+static int temp_warn(struct notifier_block *, unsigned long, void *);
+static int port_module(struct notifier_block *, unsigned long, void *);
+
+/* handler which forwards the event to events->nh, driver notifiers */
+static int forward_event(struct notifier_block *, unsigned long, void *);
+
+static struct mlx5_nb events_nbs_ref[] = {
+ /* Events to be proccessed by mlx5_core */
+ {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
+ {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
+ {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
+
+ /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+ /* QP/WQ resource events to forward */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
+ /* SRQ events */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
+};
+
+struct mlx5_events {
+ struct mlx5_core_dev *dev;
+ struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
+ /* driver notifier chain */
+ struct atomic_notifier_head nh;
+ /* port module events stats */
+ struct mlx5_pme_stats pme_stats;
+};
+
+static const char *eqe_type_str(u8 type)
+{
+ switch (type) {
+ case MLX5_EVENT_TYPE_COMP:
+ return "MLX5_EVENT_TYPE_COMP";
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ return "MLX5_EVENT_TYPE_PATH_MIG";
+ case MLX5_EVENT_TYPE_COMM_EST:
+ return "MLX5_EVENT_TYPE_COMM_EST";
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ return "MLX5_EVENT_TYPE_SQ_DRAINED";
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ return "MLX5_EVENT_TYPE_CQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+ return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return "MLX5_EVENT_TYPE_PORT_CHANGE";
+ case MLX5_EVENT_TYPE_GPIO_EVENT:
+ return "MLX5_EVENT_TYPE_GPIO_EVENT";
+ case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+ return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+ case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+ return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
+ case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+ return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+ case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+ return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+ case MLX5_EVENT_TYPE_STALL_EVENT:
+ return "MLX5_EVENT_TYPE_STALL_EVENT";
+ case MLX5_EVENT_TYPE_CMD:
+ return "MLX5_EVENT_TYPE_CMD";
+ case MLX5_EVENT_TYPE_PAGE_REQUEST:
+ return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+ case MLX5_EVENT_TYPE_PAGE_FAULT:
+ return "MLX5_EVENT_TYPE_PAGE_FAULT";
+ case MLX5_EVENT_TYPE_PPS_EVENT:
+ return "MLX5_EVENT_TYPE_PPS_EVENT";
+ case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+ return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_ERROR";
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+ case MLX5_EVENT_TYPE_MONITOR_COUNTER:
+ return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
+ case MLX5_EVENT_TYPE_DEVICE_TRACER:
+ return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+ default:
+ return "Unrecognized event";
+ }
+}
+
+/* handles all FW events, type == eqe->type */
+static int any_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
+ eqe_type_str(eqe->type), eqe->sub_type);
+ return NOTIFY_OK;
+}
+
+/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
+static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+ u64 value_lsb;
+ u64 value_msb;
+
+ value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+ value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+ mlx5_core_warn(events->dev,
+ "High temperature on sensors with bit set %llx %llx",
+ value_msb, value_lsb);
+
+ return NOTIFY_OK;
+}
+
+/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
+{
+ switch (status) {
+ case MLX5_MODULE_STATUS_PLUGGED:
+ return "Cable plugged";
+ case MLX5_MODULE_STATUS_UNPLUGGED:
+ return "Cable unplugged";
+ case MLX5_MODULE_STATUS_ERROR:
+ return "Cable error";
+ case MLX5_MODULE_STATUS_DISABLED:
+ return "Cable disabled";
+ default:
+ return "Unknown status";
+ }
+}
+
+static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
+{
+ switch (error) {
+ case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
+ return "Power budget exceeded";
+ case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
+ return "Long Range for non MLNX cable";
+ case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
+ return "Bus stuck (I2C or data shorted)";
+ case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
+ return "No EEPROM/retry timeout";
+ case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
+ return "Enforce part number list";
+ case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
+ return "Unknown identifier";
+ case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
+ return "High Temperature";
+ case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
+ return "Bad or shorted cable/module";
+ case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
+ return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
+ default:
+ return "Unknown error";
+ }
+}
+
+/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static int port_module(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ enum port_module_event_status_type module_status;
+ enum port_module_event_error_type error_type;
+ struct mlx5_eqe_port_module *module_event_eqe;
+ const char *status_str, *error_str;
+ u8 module_num;
+
+ module_event_eqe = &eqe->data.port_module;
+ module_num = module_event_eqe->module;
+ module_status = module_event_eqe->module_status &
+ PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+ error_type = module_event_eqe->error_type &
+ PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+
+ if (module_status < MLX5_MODULE_STATUS_NUM)
+ events->pme_stats.status_counters[module_status]++;
+ status_str = mlx5_pme_status_to_string(module_status);
+
+ if (module_status == MLX5_MODULE_STATUS_ERROR) {
+ if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
+ events->pme_stats.error_counters[error_type]++;
+ error_str = mlx5_pme_error_to_string(error_type);
+ }
+
+ if (!printk_ratelimit())
+ return NOTIFY_OK;
+
+ if (module_status == MLX5_MODULE_STATUS_ERROR)
+ mlx5_core_err(events->dev,
+ "Port module event[error]: module %u, %s, %s\n",
+ module_num, status_str, error_str);
+ else
+ mlx5_core_info(events->dev,
+ "Port module event: module %u, %s\n",
+ module_num, status_str);
+
+ return NOTIFY_OK;
+}
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
+{
+ *stats = dev->priv.events->pme_stats;
+}
+
+/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
+static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
+ eqe_type_str(eqe->type), eqe->sub_type);
+ atomic_notifier_call_chain(&events->nh, event, data);
+ return NOTIFY_OK;
+}
+
+int mlx5_events_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
+
+ if (!events)
+ return -ENOMEM;
+
+ ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
+ events->dev = dev;
+ dev->priv.events = events;
+ return 0;
+}
+
+void mlx5_events_cleanup(struct mlx5_core_dev *dev)
+{
+ kvfree(dev->priv.events);
+}
+
+void mlx5_events_start(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = dev->priv.events;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
+ events->notifiers[i].nb = events_nbs_ref[i];
+ events->notifiers[i].ctx = events;
+ mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
+ }
+}
+
+void mlx5_events_stop(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = dev->priv.events;
+ int i;
+
+ for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
+ mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
+}
+
+int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return atomic_notifier_chain_register(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_register);
+
+int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return atomic_notifier_chain_unregister(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_unregister);
+
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
+{
+ return atomic_notifier_call_chain(&events->nh, event, data);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 8ca1d1949d93..873541ef4c1b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -334,7 +334,7 @@ static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
{
u8 opcode, status = 0;
- opcode = cqe->op_own >> 4;
+ opcode = get_cqe_opcode(cqe);
switch (opcode) {
case MLX5_CQE_REQ_ERR:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 436a8136f26f..27c5f6c7d36a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -36,6 +36,7 @@
#include "mlx5_core.h"
#include "lib/mlx5.h"
+#include "lib/eq.h"
#include "fpga/core.h"
#include "fpga/conn.h"
@@ -145,6 +146,22 @@ static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
return 0;
}
+static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
+
+static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+ struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
+
+ return mlx5_fpga_event(fdev, event, eqe);
+}
+
+static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+ struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
+
+ return mlx5_fpga_event(fdev, event, eqe);
+}
+
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
{
struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -185,6 +202,11 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
if (err)
goto out;
+ MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
+ MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
+ mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
+
err = mlx5_fpga_conn_device_init(fdev);
if (err)
goto err_rsvd_gid;
@@ -201,6 +223,8 @@ err_conn_init:
mlx5_fpga_conn_device_cleanup(fdev);
err_rsvd_gid:
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
mlx5_core_unreserve_gids(mdev, max_num_qps);
out:
spin_lock_irqsave(&fdev->state_lock, flags);
@@ -256,6 +280,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
}
mlx5_fpga_conn_device_cleanup(fdev);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
+
max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
mlx5_core_unreserve_gids(mdev, max_num_qps);
}
@@ -283,9 +310,10 @@ static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
return "Unknown";
}
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
+static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
+ unsigned long event, void *eqe)
{
- struct mlx5_fpga_device *fdev = mdev->fpga;
+ void *data = ((struct mlx5_eqe *)eqe)->data.raw;
const char *event_name;
bool teardown = false;
unsigned long flags;
@@ -303,9 +331,7 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
break;
default:
- mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
- event);
- return;
+ return NOTIFY_DONE;
}
spin_lock_irqsave(&fdev->state_lock, flags);
@@ -326,4 +352,6 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
*/
if (teardown)
mlx5_trigger_health_work(fdev->mdev);
+
+ return NOTIFY_OK;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 3e2355c8df3f..7e2e871dbf83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -35,11 +35,16 @@
#ifdef CONFIG_MLX5_FPGA
+#include <linux/mlx5/eq.h>
+
+#include "lib/eq.h"
#include "fpga/cmd.h"
/* Represents an Innova device */
struct mlx5_fpga_device {
struct mlx5_core_dev *mdev;
+ struct mlx5_nb fpga_err_nb;
+ struct mlx5_nb fpga_qp_err_nb;
spinlock_t state_lock; /* Protects state transitions */
enum mlx5_fpga_status state;
enum mlx5_fpga_image last_admin_image;
@@ -82,7 +87,6 @@ int mlx5_fpga_init(struct mlx5_core_dev *mdev);
void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
#else
@@ -104,11 +108,6 @@ static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
{
}
-static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
- void *data)
-{
-}
-
#endif
#endif /* __MLX5_FPGA_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 08a891f9aade..c44ccb67c4a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -308,22 +308,68 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
+static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
+ struct fs_fte *fte, bool *extended_dest)
+{
+ int fw_log_max_fdb_encap_uplink =
+ MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink);
+ int num_fwd_destinations = 0;
+ struct mlx5_flow_rule *dst;
+ int num_encap = 0;
+
+ *extended_dest = false;
+ if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return 0;
+
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+ if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+ num_encap++;
+ num_fwd_destinations++;
+ }
+ if (num_fwd_destinations > 1 && num_encap > 0)
+ *extended_dest = true;
+
+ if (*extended_dest && !fw_log_max_fdb_encap_uplink) {
+ mlx5_core_warn(dev, "FW does not support extended destination");
+ return -EOPNOTSUPP;
+ }
+ if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) {
+ mlx5_core_warn(dev, "FW does not support more than %d encaps",
+ 1 << fw_log_max_fdb_encap_uplink);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
int opmod, int modify_mask,
struct mlx5_flow_table *ft,
unsigned group_id,
struct fs_fte *fte)
{
- unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
- fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
+ bool extended_dest = false;
struct mlx5_flow_rule *dst;
void *in_flow_context, *vlan;
void *in_match_value;
+ unsigned int inlen;
+ int dst_cnt_size;
void *in_dests;
u32 *in;
int err;
+ if (mlx5_set_extended_dest(dev, fte, &extended_dest))
+ return -EOPNOTSUPP;
+
+ if (!extended_dest)
+ dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct);
+ else
+ dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format);
+
+ inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -343,9 +389,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_context, in_flow_context, group_id, group_id);
MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
- MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
- MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
- fte->action.reformat_id);
+ MLX5_SET(flow_context, in_flow_context, extended_destination,
+ extended_dest);
+ if (extended_dest) {
+ u32 action;
+
+ action = fte->action.action &
+ ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ MLX5_SET(flow_context, in_flow_context, action, action);
+ } else {
+ MLX5_SET(flow_context, in_flow_context, action,
+ fte->action.action);
+ MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+ fte->action.reformat_id);
+ }
MLX5_SET(flow_context, in_flow_context, modify_header_id,
fte->action.modify_id);
@@ -387,10 +444,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
id = dst->dest_attr.vport.num;
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id_valid,
- dst->dest_attr.vport.vhca_id_valid);
+ !!(dst->dest_attr.vport.flags &
+ MLX5_FLOW_DEST_VPORT_VHCA_ID));
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id,
dst->dest_attr.vport.vhca_id);
+ if (extended_dest) {
+ MLX5_SET(dest_format_struct, in_dests,
+ packet_reformat,
+ !!(dst->dest_attr.vport.flags &
+ MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
+ MLX5_SET(extended_dest_format, in_dests,
+ packet_reformat_id,
+ dst->dest_attr.vport.reformat_id);
+ }
break;
default:
id = dst->dest_attr.tir_num;
@@ -399,7 +466,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(dest_format_struct, in_dests, destination_type,
type);
MLX5_SET(dest_format_struct, in_dests, destination_id, id);
- in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ in_dests += dst_cnt_size;
list_size++;
}
@@ -420,7 +487,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
dst->dest_attr.counter_id);
- in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ in_dests += dst_cnt_size;
list_size++;
}
if (list_size > max_list_size) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 08233cf44871..79f122b45def 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1373,7 +1373,10 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
{
if (d1->type == d2->type) {
if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
- d1->vport.num == d2->vport.num) ||
+ d1->vport.num == d2->vport.num &&
+ d1->vport.flags == d2->vport.flags &&
+ ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
+ (d1->vport.reformat_id == d2->vport.reformat_id) : true)) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
d1->ft == d2->ft) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index b51ad217da32..2dc86347af58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -145,29 +145,6 @@ struct mlx5_flow_table {
struct rhltable fgs_hash;
};
-struct mlx5_fc_cache {
- u64 packets;
- u64 bytes;
- u64 lastuse;
-};
-
-struct mlx5_fc {
- struct list_head list;
- struct llist_node addlist;
- struct llist_node dellist;
-
- /* last{packets,bytes} members are used when calculating the delta since
- * last reading
- */
- u64 lastpackets;
- u64 lastbytes;
-
- u32 id;
- bool aging;
-
- struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
-};
-
struct mlx5_ft_underlay_qp {
struct list_head list;
u32 qpn;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 32accd6b041b..c6c28f56aa29 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -41,6 +41,29 @@
/* Max number of counters to query in bulk read is 32K */
#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
+struct mlx5_fc_cache {
+ u64 packets;
+ u64 bytes;
+ u64 lastuse;
+};
+
+struct mlx5_fc {
+ struct list_head list;
+ struct llist_node addlist;
+ struct llist_node dellist;
+
+ /* last{packets,bytes} members are used when calculating the delta since
+ * last reading
+ */
+ u64 lastpackets;
+ u64 lastbytes;
+
+ u32 id;
+ bool aging;
+
+ struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
/* locking scheme:
*
* It is the responsibility of the user to prevent concurrent calls or bad
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 43118de8ee99..196c07383082 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -38,6 +38,8 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
enum {
MLX5_HEALTH_POLL_INTERVAL = 2 * HZ,
@@ -78,29 +80,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
&dev->iseg->cmdq_addr_l_sz);
}
-static void trigger_cmd_completions(struct mlx5_core_dev *dev)
-{
- unsigned long flags;
- u64 vector;
-
- /* wait for pending handlers to complete */
- synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD));
- spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
- vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
- if (!vector)
- goto no_trig;
-
- vector |= MLX5_TRIGGERED_CMD_COMP;
- spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-
- mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
- mlx5_cmd_comp_handler(dev, vector, true);
- return;
-
-no_trig:
- spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-}
-
static int in_fatal(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
@@ -124,10 +103,10 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
mlx5_core_err(dev, "start\n");
if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
- trigger_cmd_completions(dev);
+ mlx5_cmd_trigger_completions(dev);
}
- mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
+ mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
mlx5_core_err(dev, "end\n");
unlock:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 11dabd62e2c7..bfc0f6581729 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
netdev->mtu = max_mtu;
- mlx5e_build_nic_params(mdev, &priv->channels.params,
+ mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params,
mlx5e_get_netdev_max_channels(netdev),
netdev->mtu);
mlx5i_build_nic_params(mdev, &priv->channels.params);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 582b2f18010a..3a6baed722d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -34,11 +34,15 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
+#include "eswitch.h"
enum {
- MLX5_LAG_FLAG_BONDED = 1 << 0,
+ MLX5_LAG_FLAG_ROCE = 1 << 0,
+ MLX5_LAG_FLAG_SRIOV = 1 << 1,
};
+#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV)
+
struct lag_func {
struct mlx5_core_dev *dev;
struct net_device *netdev;
@@ -61,11 +65,6 @@ struct mlx5_lag {
struct lag_tracker tracker;
struct delayed_work bond_work;
struct notifier_block nb;
-
- /* Admin state. Allow lag only if allowed is true
- * even if network conditions for lag were met
- */
- bool allowed;
};
/* General purpose, use for short periods of time.
@@ -165,9 +164,19 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
return -1;
}
-static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
+static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
+{
+ return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
+}
+
+static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
+{
+ return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
+}
+
+static bool __mlx5_lag_is_active(struct mlx5_lag *ldev)
{
- return !!(ldev->flags & MLX5_LAG_FLAG_BONDED);
+ return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
}
static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
@@ -186,36 +195,131 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
*port2 = 1;
}
-static void mlx5_activate_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker)
+static void mlx5_modify_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
{
struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ u8 v2p_port1, v2p_port2;
int err;
- ldev->flags |= MLX5_LAG_FLAG_BONDED;
+ mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
+ &v2p_port2);
+
+ if (v2p_port1 != ldev->v2p_map[0] ||
+ v2p_port2 != ldev->v2p_map[1]) {
+ ldev->v2p_map[0] = v2p_port1;
+ ldev->v2p_map[1] = v2p_port2;
+
+ mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
+ ldev->v2p_map[0], ldev->v2p_map[1]);
+
+ err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
+ if (err)
+ mlx5_core_err(dev0,
+ "Failed to modify LAG (%d)\n",
+ err);
+ }
+}
+
+static int mlx5_create_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ int err;
mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
&ldev->v2p_map[1]);
+ mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
+ ldev->v2p_map[0], ldev->v2p_map[1]);
+
err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
if (err)
mlx5_core_err(dev0,
"Failed to create LAG (%d)\n",
err);
+ return err;
+}
+
+static int mlx5_activate_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ u8 flags)
+{
+ bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ int err;
+
+ err = mlx5_create_lag(ldev, tracker);
+ if (err) {
+ if (roce_lag) {
+ mlx5_core_err(dev0,
+ "Failed to activate RoCE LAG\n");
+ } else {
+ mlx5_core_err(dev0,
+ "Failed to activate VF LAG\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ }
+ return err;
+ }
+
+ ldev->flags |= flags;
+ return 0;
}
-static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
+static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ bool roce_lag = __mlx5_lag_is_roce(ldev);
int err;
- ldev->flags &= ~MLX5_LAG_FLAG_BONDED;
+ ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
err = mlx5_cmd_destroy_lag(dev0);
- if (err)
- mlx5_core_err(dev0,
- "Failed to destroy LAG (%d)\n",
- err);
+ if (err) {
+ if (roce_lag) {
+ mlx5_core_err(dev0,
+ "Failed to deactivate RoCE LAG; driver restart required\n");
+ } else {
+ mlx5_core_err(dev0,
+ "Failed to deactivate VF LAG; driver restart required\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ }
+ }
+
+ return err;
+}
+
+static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+{
+ if (!ldev->pf[0].dev || !ldev->pf[1].dev)
+ return false;
+
+#ifdef CONFIG_MLX5_ESWITCH
+ return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
+#else
+ return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) &&
+ !mlx5_sriov_is_enabled(ldev->pf[1].dev));
+#endif
+}
+
+static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ mlx5_add_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
+}
+
+static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
}
static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -223,9 +327,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
struct lag_tracker tracker;
- u8 v2p_port1, v2p_port2;
- int i, err;
- bool do_bond;
+ bool do_bond, roce_lag;
+ int err;
if (!dev0 || !dev1)
return;
@@ -234,42 +337,45 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
tracker = ldev->tracker;
mutex_unlock(&lag_mutex);
- do_bond = tracker.is_bonded && ldev->allowed;
+ do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
- if (do_bond && !mlx5_lag_is_bonded(ldev)) {
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
- MLX5_INTERFACE_PROTOCOL_IB);
+ if (do_bond && !__mlx5_lag_is_active(ldev)) {
+ roce_lag = !mlx5_sriov_is_enabled(dev0) &&
+ !mlx5_sriov_is_enabled(dev1);
- mlx5_activate_lag(ldev, &tracker);
+ if (roce_lag)
+ mlx5_lag_remove_ib_devices(ldev);
- mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_nic_vport_enable_roce(dev1);
- } else if (do_bond && mlx5_lag_is_bonded(ldev)) {
- mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1,
- &v2p_port2);
+ err = mlx5_activate_lag(ldev, &tracker,
+ roce_lag ? MLX5_LAG_FLAG_ROCE :
+ MLX5_LAG_FLAG_SRIOV);
+ if (err) {
+ if (roce_lag)
+ mlx5_lag_add_ib_devices(ldev);
- if ((v2p_port1 != ldev->v2p_map[0]) ||
- (v2p_port2 != ldev->v2p_map[1])) {
- ldev->v2p_map[0] = v2p_port1;
- ldev->v2p_map[1] = v2p_port2;
+ return;
+ }
- err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
- if (err)
- mlx5_core_err(dev0,
- "Failed to modify LAG (%d)\n",
- err);
+ if (roce_lag) {
+ mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_enable_roce(dev1);
+ }
+ } else if (do_bond && __mlx5_lag_is_active(ldev)) {
+ mlx5_modify_lag(ldev, &tracker);
+ } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
+ roce_lag = __mlx5_lag_is_roce(ldev);
+
+ if (roce_lag) {
+ mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_disable_roce(dev1);
}
- } else if (!do_bond && mlx5_lag_is_bonded(ldev)) {
- mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_nic_vport_disable_roce(dev1);
- mlx5_deactivate_lag(ldev);
+ err = mlx5_deactivate_lag(ldev);
+ if (err)
+ return;
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (ldev->pf[i].dev)
- mlx5_add_dev_by_protocol(ldev->pf[i].dev,
- MLX5_INTERFACE_PROTOCOL_IB);
+ if (roce_lag)
+ mlx5_lag_add_ib_devices(ldev);
}
}
@@ -419,15 +525,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
-{
- if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) ||
- (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev)))
- return false;
- else
- return true;
-}
-
static struct mlx5_lag *mlx5_lag_dev_alloc(void)
{
struct mlx5_lag *ldev;
@@ -437,7 +534,6 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void)
return NULL;
INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
- ldev->allowed = mlx5_lag_check_prereq(ldev);
return ldev;
}
@@ -462,7 +558,6 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
ldev->tracker.netdev_state[fn].link_up = 0;
ldev->tracker.netdev_state[fn].tx_enabled = 0;
- ldev->allowed = mlx5_lag_check_prereq(ldev);
dev->priv.lag = ldev;
mutex_unlock(&lag_mutex);
@@ -484,7 +579,6 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
dev->priv.lag = NULL;
- ldev->allowed = mlx5_lag_check_prereq(ldev);
mutex_unlock(&lag_mutex);
}
@@ -532,7 +626,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
if (!ldev)
return;
- if (mlx5_lag_is_bonded(ldev))
+ if (__mlx5_lag_is_active(ldev))
mlx5_deactivate_lag(ldev);
mlx5_lag_dev_remove_pf(ldev, dev);
@@ -549,56 +643,61 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
}
}
-bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
bool res;
mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- res = ldev && mlx5_lag_is_bonded(ldev);
+ res = ldev && __mlx5_lag_is_roce(ldev);
mutex_unlock(&lag_mutex);
return res;
}
-EXPORT_SYMBOL(mlx5_lag_is_active);
+EXPORT_SYMBOL(mlx5_lag_is_roce);
-static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow)
+bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
- int ret = 0;
- bool lag_active;
-
- mlx5_dev_list_lock();
+ bool res;
+ mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- if (!ldev) {
- ret = -ENODEV;
- goto unlock;
- }
- lag_active = mlx5_lag_is_bonded(ldev);
- if (!mlx5_lag_check_prereq(ldev) && allow) {
- ret = -EINVAL;
- goto unlock;
- }
- if (ldev->allowed == allow)
- goto unlock;
- ldev->allowed = allow;
- if ((lag_active && !allow) || allow)
- mlx5_do_bond(ldev);
-unlock:
- mlx5_dev_list_unlock();
- return ret;
+ res = ldev && __mlx5_lag_is_active(ldev);
+ mutex_unlock(&lag_mutex);
+
+ return res;
}
+EXPORT_SYMBOL(mlx5_lag_is_active);
-int mlx5_lag_forbid(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
- return mlx5_lag_set_state(dev, false);
+ struct mlx5_lag *ldev;
+ bool res;
+
+ mutex_lock(&lag_mutex);
+ ldev = mlx5_lag_dev_get(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev);
+ mutex_unlock(&lag_mutex);
+
+ return res;
}
+EXPORT_SYMBOL(mlx5_lag_is_sriov);
-int mlx5_lag_allow(struct mlx5_core_dev *dev)
+void mlx5_lag_update(struct mlx5_core_dev *dev)
{
- return mlx5_lag_set_state(dev, true);
+ struct mlx5_lag *ldev;
+
+ mlx5_dev_list_lock();
+ ldev = mlx5_lag_dev_get(dev);
+ if (!ldev)
+ goto unlock;
+
+ mlx5_do_bond(ldev);
+
+unlock:
+ mlx5_dev_list_unlock();
}
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
@@ -609,7 +708,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- if (!(ldev && mlx5_lag_is_bonded(ldev)))
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
@@ -638,7 +737,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
return true;
ldev = mlx5_lag_dev_get(dev);
- if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev)
+ if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev)
return true;
/* If bonded, we do not add an IB device for PF1. */
@@ -665,7 +764,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- if (ldev && mlx5_lag_is_bonded(ldev)) {
+ if (ldev && __mlx5_lag_is_roce(ldev)) {
num_ports = MLX5_MAX_PORTS;
mdev[0] = ldev->pf[0].dev;
mdev[1] = ldev->pf[1].dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 0d90b1b4a3d3..ca0ee9916e9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -33,6 +33,7 @@
#include <linux/clocksource.h>
#include <linux/highmem.h>
#include <rdma/mlx5-abi.h>
+#include "lib/eq.h"
#include "en.h"
#include "clock.h"
@@ -71,7 +72,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
clock);
- return mlx5_read_internal_timer(mdev) & cc->mask;
+ return mlx5_read_internal_timer(mdev, NULL) & cc->mask;
}
static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
@@ -155,15 +156,19 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
return 0;
}
-static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
ptp_info);
- u64 ns;
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
unsigned long flags;
+ u64 cycles, ns;
write_seqlock_irqsave(&clock->lock, flags);
- ns = timecounter_read(&clock->tc);
+ cycles = mlx5_read_internal_timer(mdev, sts);
+ ns = timecounter_cyc2time(&clock->tc, cycles);
write_sequnlock_irqrestore(&clock->lock, flags);
*ts = ns_to_timespec64(ns);
@@ -306,7 +311,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
ts.tv_sec = rq->perout.start.sec;
ts.tv_nsec = rq->perout.start.nsec;
ns = timespec64_to_ns(&ts);
- cycles_now = mlx5_read_internal_timer(mdev);
+ cycles_now = mlx5_read_internal_timer(mdev, NULL);
write_seqlock_irqsave(&clock->lock, flags);
nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
nsec_delta = ns - nsec_now;
@@ -383,7 +388,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = {
.pps = 0,
.adjfreq = mlx5_ptp_adjfreq,
.adjtime = mlx5_ptp_adjtime,
- .gettime64 = mlx5_ptp_gettime,
+ .gettimex64 = mlx5_ptp_gettimex,
.settime64 = mlx5_ptp_settime,
.enable = NULL,
.verify = NULL,
@@ -439,16 +444,17 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
}
-void mlx5_pps_event(struct mlx5_core_dev *mdev,
- struct mlx5_eqe *eqe)
+static int mlx5_pps_event(struct notifier_block *nb,
+ unsigned long type, void *data)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
+ struct mlx5_core_dev *mdev = clock->mdev;
struct ptp_clock_event ptp_event;
- struct timespec64 ts;
- u64 nsec_now, nsec_delta;
u64 cycles_now, cycles_delta;
+ u64 nsec_now, nsec_delta, ns;
+ struct mlx5_eqe *eqe = data;
int pin = eqe->data.pps.pin;
- s64 ns;
+ struct timespec64 ts;
unsigned long flags;
switch (clock->ptp_info.pin_config[pin].func) {
@@ -463,11 +469,12 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
} else {
ptp_event.type = PTP_CLOCK_EXTTS;
}
+ /* TODOL clock->ptp can be NULL if ptp_clock_register failes */
ptp_clock_event(clock->ptp, &ptp_event);
break;
case PTP_PF_PEROUT:
- mlx5_ptp_gettime(&clock->ptp_info, &ts);
- cycles_now = mlx5_read_internal_timer(mdev);
+ mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL);
+ cycles_now = mlx5_read_internal_timer(mdev, NULL);
ts.tv_sec += 1;
ts.tv_nsec = 0;
ns = timespec64_to_ns(&ts);
@@ -481,8 +488,11 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
write_sequnlock_irqrestore(&clock->lock, flags);
break;
default:
- mlx5_core_err(mdev, " Unhandled event\n");
+ mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
+ clock->ptp_info.pin_config[pin].func);
}
+
+ return NOTIFY_OK;
}
void mlx5_init_clock(struct mlx5_core_dev *mdev)
@@ -511,14 +521,14 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
ktime_to_ns(ktime_get_real()));
/* Calculate period in seconds to call the overflow watchdog - to make
- * sure counter is checked at least once every wrap around.
+ * sure counter is checked at least twice every wrap around.
* The period is calculated as the minimum between max HW cycles count
* (The clock source mask) and max amount of cycles that can be
* multiplied by clock multiplier where the result doesn't exceed
* 64bits.
*/
overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
- overflow_cycles = min(overflow_cycles, clock->cycles.mask >> 1);
+ overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles,
frac, &frac);
@@ -567,6 +577,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
PTR_ERR(clock->ptp));
clock->ptp = NULL;
}
+
+ MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
+ mlx5_eq_notifier_register(mdev, &clock->pps_nb);
}
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
@@ -576,6 +589,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
return;
+ mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
if (clock->ptp) {
ptp_clock_unregister(clock->ptp);
clock->ptp = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index 263cb6e2aeee..31600924bdc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -36,7 +36,6 @@
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
void mlx5_init_clock(struct mlx5_core_dev *mdev);
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
-void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
@@ -60,8 +59,6 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
#else
static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
-static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {}
-
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
return -1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
new file mode 100644
index 000000000000..bced2efe9bef
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
+
+static LIST_HEAD(devcom_list);
+
+#define devcom_for_each_component(priv, comp, iter) \
+ for (iter = 0; \
+ comp = &(priv)->components[iter], iter < MLX5_DEVCOM_NUM_COMPONENTS; \
+ iter++)
+
+struct mlx5_devcom_component {
+ struct {
+ void *data;
+ } device[MLX5_MAX_PORTS];
+
+ mlx5_devcom_event_handler_t handler;
+ struct rw_semaphore sem;
+ bool paired;
+};
+
+struct mlx5_devcom_list {
+ struct list_head list;
+
+ struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS];
+ struct mlx5_core_dev *devs[MLX5_MAX_PORTS];
+};
+
+struct mlx5_devcom {
+ struct mlx5_devcom_list *priv;
+ int idx;
+};
+
+static struct mlx5_devcom_list *mlx5_devcom_list_alloc(void)
+{
+ struct mlx5_devcom_component *comp;
+ struct mlx5_devcom_list *priv;
+ int i;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ devcom_for_each_component(priv, comp, i)
+ init_rwsem(&comp->sem);
+
+ return priv;
+}
+
+static struct mlx5_devcom *mlx5_devcom_alloc(struct mlx5_devcom_list *priv,
+ u8 idx)
+{
+ struct mlx5_devcom *devcom;
+
+ devcom = kzalloc(sizeof(*devcom), GFP_KERNEL);
+ if (!devcom)
+ return NULL;
+
+ devcom->priv = priv;
+ devcom->idx = idx;
+ return devcom;
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_devcom_list *priv = NULL, *iter;
+ struct mlx5_devcom *devcom = NULL;
+ bool new_priv = false;
+ u64 sguid0, sguid1;
+ int idx, i;
+
+ if (!mlx5_core_is_pf(dev))
+ return NULL;
+
+ sguid0 = mlx5_query_nic_system_image_guid(dev);
+ list_for_each_entry(iter, &devcom_list, list) {
+ struct mlx5_core_dev *tmp_dev = NULL;
+
+ idx = -1;
+ for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ if (iter->devs[i])
+ tmp_dev = iter->devs[i];
+ else
+ idx = i;
+ }
+
+ if (idx == -1)
+ continue;
+
+ sguid1 = mlx5_query_nic_system_image_guid(tmp_dev);
+ if (sguid0 != sguid1)
+ continue;
+
+ priv = iter;
+ break;
+ }
+
+ if (!priv) {
+ priv = mlx5_devcom_list_alloc();
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+
+ idx = 0;
+ new_priv = true;
+ }
+
+ priv->devs[idx] = dev;
+ devcom = mlx5_devcom_alloc(priv, idx);
+ if (!devcom) {
+ kfree(priv);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (new_priv)
+ list_add(&priv->list, &devcom_list);
+
+ return devcom;
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
+{
+ struct mlx5_devcom_list *priv;
+ int i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ priv = devcom->priv;
+ priv->devs[devcom->idx] = NULL;
+
+ kfree(devcom);
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (priv->devs[i])
+ break;
+
+ if (i != MLX5_MAX_PORTS)
+ return;
+
+ list_del(&priv->list);
+ kfree(priv);
+}
+
+void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ mlx5_devcom_event_handler_t handler,
+ void *data)
+{
+ struct mlx5_devcom_component *comp;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ WARN_ON(!data);
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ comp->handler = handler;
+ comp->device[devcom->idx].data = data;
+ up_write(&comp->sem);
+}
+
+void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ comp->device[devcom->idx].data = NULL;
+ up_write(&comp->sem);
+}
+
+int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ int event,
+ void *event_data)
+{
+ struct mlx5_devcom_component *comp;
+ int err = -ENODEV, i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return err;
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (i != devcom->idx && comp->device[i].data) {
+ err = comp->handler(event, comp->device[i].data,
+ event_data);
+ break;
+ }
+
+ up_write(&comp->sem);
+ return err;
+}
+
+void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ bool paired)
+{
+ struct mlx5_devcom_component *comp;
+
+ comp = &devcom->priv->components[id];
+ WARN_ON(!rwsem_is_locked(&comp->sem));
+
+ comp->paired = paired;
+}
+
+bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ if (IS_ERR_OR_NULL(devcom))
+ return false;
+
+ return devcom->priv->components[id].paired;
+}
+
+void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp;
+ int i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return NULL;
+
+ comp = &devcom->priv->components[id];
+ down_read(&comp->sem);
+ if (!comp->paired) {
+ up_read(&comp->sem);
+ return NULL;
+ }
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (i != devcom->idx)
+ break;
+
+ return comp->device[i].data;
+}
+
+void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp = &devcom->priv->components[id];
+
+ up_read(&comp->sem);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
new file mode 100644
index 000000000000..939d5bf1581b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_DEVCOM_H__
+#define __LIB_MLX5_DEVCOM_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_devcom_components {
+ MLX5_DEVCOM_ESW_OFFLOADS,
+
+ MLX5_DEVCOM_NUM_COMPONENTS,
+};
+
+typedef int (*mlx5_devcom_event_handler_t)(int event,
+ void *my_data,
+ void *event_data);
+
+struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev);
+void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom);
+
+void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ mlx5_devcom_event_handler_t handler,
+ void *data);
+void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ int event,
+ void *event_data);
+
+void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ bool paired);
+bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+#endif
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
new file mode 100644
index 000000000000..c0fb6d72b695
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_EQ_H__
+#define __LIB_MLX5_EQ_H__
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
+#include <linux/mlx5/cq.h>
+
+#define MLX5_MAX_IRQ_NAME (32)
+#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe))
+
+struct mlx5_eq_tasklet {
+ struct list_head list;
+ struct list_head process_list;
+ struct tasklet_struct task;
+ spinlock_t lock; /* lock completion tasklet list */
+};
+
+struct mlx5_cq_table {
+ spinlock_t lock; /* protect radix tree */
+ struct radix_tree_root tree;
+};
+
+struct mlx5_eq {
+ struct mlx5_core_dev *dev;
+ struct mlx5_cq_table cq_table;
+ __be32 __iomem *doorbell;
+ u32 cons_index;
+ struct mlx5_frag_buf buf;
+ int size;
+ unsigned int vecidx;
+ unsigned int irqn;
+ u8 eqn;
+ int nent;
+ struct mlx5_rsc_debug *dbg;
+};
+
+struct mlx5_eq_comp {
+ struct mlx5_eq core; /* Must be first */
+ struct mlx5_eq_tasklet tasklet_ctx;
+ struct list_head list;
+};
+
+static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+ return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+}
+
+static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+ struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+
+ return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+}
+
+static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+
+int mlx5_eq_table_init(struct mlx5_core_dev *dev);
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_eq_table_create(struct mlx5_core_dev *dev);
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
+void mlx5_cq_tasklet_cb(unsigned long data);
+struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
+
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
+#endif
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 7550b1cc8c6a..397a2847867a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -33,6 +33,8 @@
#ifndef __LIB_MLX5_H__
#define __LIB_MLX5_H__
+#include "mlx5_core.h"
+
void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
@@ -40,4 +42,38 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+/* TODO move to lib/events.h */
+
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF
+
+enum port_module_event_status_type {
+ MLX5_MODULE_STATUS_PLUGGED = 0x1,
+ MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
+ MLX5_MODULE_STATUS_ERROR = 0x3,
+ MLX5_MODULE_STATUS_DISABLED = 0x4,
+ MLX5_MODULE_STATUS_NUM,
+};
+
+enum port_module_event_error_type {
+ MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0,
+ MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1,
+ MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2,
+ MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3,
+ MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4,
+ MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5,
+ MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6,
+ MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7,
+ MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc,
+ MLX5_MODULE_EVENT_ERROR_NUM,
+};
+
+struct mlx5_pme_stats {
+ u64 status_counters[MLX5_MODULE_STATUS_NUM];
+ u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 28132c7dc05f..77896c11f6f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -43,7 +43,6 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
#include <linux/debugfs.h>
#include <linux/kmod.h>
#include <linux/mlx5/mlx5_ifc.h>
@@ -53,6 +52,7 @@
#endif
#include <net/devlink.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "fs_core.h"
#include "lib/mpfs.h"
#include "eswitch.h"
@@ -63,6 +63,7 @@
#include "accel/tls.h"
#include "lib/clock.h"
#include "lib/vxlan.h"
+#include "lib/devcom.h"
#include "diag/fw_tracer.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
@@ -319,51 +320,6 @@ static void release_bar(struct pci_dev *pdev)
pci_release_regions(pdev);
}
-static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_eq_table *table = &priv->eq_table;
- int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
- MLX5_CAP_GEN(dev, max_num_eqs) :
- 1 << MLX5_CAP_GEN(dev, log_max_eq);
- int nvec;
- int err;
-
- nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
- MLX5_EQ_VEC_COMP_BASE;
- nvec = min_t(int, nvec, num_eqs);
- if (nvec <= MLX5_EQ_VEC_COMP_BASE)
- return -ENOMEM;
-
- priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
- if (!priv->irq_info)
- return -ENOMEM;
-
- nvec = pci_alloc_irq_vectors(dev->pdev,
- MLX5_EQ_VEC_COMP_BASE + 1, nvec,
- PCI_IRQ_MSIX);
- if (nvec < 0) {
- err = nvec;
- goto err_free_irq_info;
- }
-
- table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
- return 0;
-
-err_free_irq_info:
- kfree(priv->irq_info);
- return err;
-}
-
-static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
-
- pci_free_irq_vectors(dev->pdev);
- kfree(priv->irq_info);
-}
-
struct mlx5_reg_host_endianness {
u8 he;
u8 rsvd[15];
@@ -624,188 +580,24 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
+ struct ptp_system_timestamp *sts)
{
u32 timer_h, timer_h1, timer_l;
timer_h = ioread32be(&dev->iseg->internal_timer_h);
+ ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
+ ptp_read_system_postts(sts);
timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
- if (timer_h != timer_h1) /* wrap around */
+ if (timer_h != timer_h1) {
+ /* wrap around */
+ ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
-
- return (u64)timer_l | (u64)timer_h1 << 32;
-}
-
-static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- struct mlx5_priv *priv = &mdev->priv;
- int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
- if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
- mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
- return -ENOMEM;
+ ptp_read_system_postts(sts);
}
- cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
- priv->irq_info[i].mask);
-
- if (IS_ENABLED(CONFIG_SMP) &&
- irq_set_affinity_hint(irq, priv->irq_info[i].mask))
- mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
- return 0;
-}
-
-static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- struct mlx5_priv *priv = &mdev->priv;
- int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
- irq_set_affinity_hint(irq, NULL);
- free_cpumask_var(priv->irq_info[i].mask);
-}
-
-static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int err;
- int i;
-
- for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
- err = mlx5_irq_set_affinity_hint(mdev, i);
- if (err)
- goto err_out;
- }
-
- return 0;
-
-err_out:
- for (i--; i >= 0; i--)
- mlx5_irq_clear_affinity_hint(mdev, i);
-
- return err;
-}
-
-static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int i;
-
- for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
- mlx5_irq_clear_affinity_hint(mdev, i);
-}
-
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
- unsigned int *irqn)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq, *n;
- int err = -ENOENT;
-
- spin_lock(&table->lock);
- list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
- if (eq->index == vector) {
- *eqn = eq->eqn;
- *irqn = eq->irqn;
- err = 0;
- break;
- }
- }
- spin_unlock(&table->lock);
-
- return err;
-}
-EXPORT_SYMBOL(mlx5_vector2eqn);
-
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq;
-
- spin_lock(&table->lock);
- list_for_each_entry(eq, &table->comp_eqs_list, list)
- if (eq->eqn == eqn) {
- spin_unlock(&table->lock);
- return eq;
- }
-
- spin_unlock(&table->lock);
-
- return ERR_PTR(-ENOENT);
-}
-
-static void free_comp_eqs(struct mlx5_core_dev *dev)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq, *n;
-
-#ifdef CONFIG_RFS_ACCEL
- if (dev->rmap) {
- free_irq_cpu_rmap(dev->rmap);
- dev->rmap = NULL;
- }
-#endif
- spin_lock(&table->lock);
- list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
- list_del(&eq->list);
- spin_unlock(&table->lock);
- if (mlx5_destroy_unmap_eq(dev, eq))
- mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
- eq->eqn);
- kfree(eq);
- spin_lock(&table->lock);
- }
- spin_unlock(&table->lock);
-}
-
-static int alloc_comp_eqs(struct mlx5_core_dev *dev)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- char name[MLX5_MAX_IRQ_NAME];
- struct mlx5_eq *eq;
- int ncomp_vec;
- int nent;
- int err;
- int i;
-
- INIT_LIST_HEAD(&table->comp_eqs_list);
- ncomp_vec = table->num_comp_vectors;
- nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
- dev->rmap = alloc_irq_cpu_rmap(ncomp_vec);
- if (!dev->rmap)
- return -ENOMEM;
-#endif
- for (i = 0; i < ncomp_vec; i++) {
- eq = kzalloc(sizeof(*eq), GFP_KERNEL);
- if (!eq) {
- err = -ENOMEM;
- goto clean;
- }
-
-#ifdef CONFIG_RFS_ACCEL
- irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev,
- MLX5_EQ_VEC_COMP_BASE + i));
-#endif
- snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
- err = mlx5_create_map_eq(dev, eq,
- i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
- name, MLX5_EQ_TYPE_COMP);
- if (err) {
- kfree(eq);
- goto clean;
- }
- mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
- eq->index = i;
- spin_lock(&table->lock);
- list_add_tail(&eq->list, &table->comp_eqs_list);
- spin_unlock(&table->lock);
- }
-
- return 0;
-
-clean:
- free_comp_eqs(dev);
- return err;
+ return (u64)timer_l | (u64)timer_h1 << 32;
}
static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
@@ -938,28 +730,37 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
struct pci_dev *pdev = dev->pdev;
int err;
+ priv->devcom = mlx5_devcom_register_device(dev);
+ if (IS_ERR(priv->devcom))
+ dev_err(&pdev->dev, "failed to register with devcom (0x%p)\n",
+ priv->devcom);
+
err = mlx5_query_board_id(dev);
if (err) {
dev_err(&pdev->dev, "query board id failed\n");
- goto out;
+ goto err_devcom;
}
- err = mlx5_eq_init(dev);
+ err = mlx5_eq_table_init(dev);
if (err) {
dev_err(&pdev->dev, "failed to initialize eq\n");
- goto out;
+ goto err_devcom;
+ }
+
+ err = mlx5_events_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to initialize events\n");
+ goto err_eq_cleanup;
}
err = mlx5_cq_debugfs_init(dev);
if (err) {
dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
- goto err_eq_cleanup;
+ goto err_events_cleanup;
}
mlx5_init_qp_table(dev);
- mlx5_init_srq_table(dev);
-
mlx5_init_mkey_table(dev);
mlx5_init_reserved_gids(dev);
@@ -1013,14 +814,15 @@ err_rl_cleanup:
err_tables_cleanup:
mlx5_vxlan_destroy(dev->vxlan);
mlx5_cleanup_mkey_table(dev);
- mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cq_debugfs_cleanup(dev);
-
+err_events_cleanup:
+ mlx5_events_cleanup(dev);
err_eq_cleanup:
- mlx5_eq_cleanup(dev);
+ mlx5_eq_table_cleanup(dev);
+err_devcom:
+ mlx5_devcom_unregister_device(dev->priv.devcom);
-out:
return err;
}
@@ -1036,10 +838,11 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_cleanup_clock(dev);
mlx5_cleanup_reserved_gids(dev);
mlx5_cleanup_mkey_table(dev);
- mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cq_debugfs_cleanup(dev);
- mlx5_eq_cleanup(dev);
+ mlx5_events_cleanup(dev);
+ mlx5_eq_table_cleanup(dev);
+ mlx5_devcom_unregister_device(dev->priv.devcom);
}
static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
@@ -1131,16 +934,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto reclaim_boot_pages;
}
- err = mlx5_pagealloc_start(dev);
- if (err) {
- dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
- goto reclaim_boot_pages;
- }
-
err = mlx5_cmd_init_hca(dev, sw_owner_id);
if (err) {
dev_err(&pdev->dev, "init hca failed\n");
- goto err_pagealloc_stop;
+ goto reclaim_boot_pages;
}
mlx5_set_driver_version(dev);
@@ -1161,23 +958,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
}
}
- err = mlx5_alloc_irq_vectors(dev);
- if (err) {
- dev_err(&pdev->dev, "alloc irq vectors failed\n");
- goto err_cleanup_once;
- }
-
dev->priv.uar = mlx5_get_uars_page(dev);
if (IS_ERR(dev->priv.uar)) {
dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
err = PTR_ERR(dev->priv.uar);
- goto err_disable_msix;
+ goto err_get_uars;
}
- err = mlx5_start_eqs(dev);
+ mlx5_events_start(dev);
+ mlx5_pagealloc_start(dev);
+
+ err = mlx5_eq_table_create(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
- goto err_put_uars;
+ dev_err(&pdev->dev, "Failed to create EQs\n");
+ goto err_eq_table;
}
err = mlx5_fw_tracer_init(dev->tracer);
@@ -1186,18 +980,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_fw_tracer;
}
- err = alloc_comp_eqs(dev);
- if (err) {
- dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
- goto err_comp_eqs;
- }
-
- err = mlx5_irq_set_affinity_hints(dev);
- if (err) {
- dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
- goto err_affinity_hints;
- }
-
err = mlx5_fpga_device_start(dev);
if (err) {
dev_err(&pdev->dev, "fpga device start failed %d\n", err);
@@ -1266,24 +1048,17 @@ err_ipsec_start:
mlx5_fpga_device_stop(dev);
err_fpga_start:
- mlx5_irq_clear_affinity_hints(dev);
-
-err_affinity_hints:
- free_comp_eqs(dev);
-
-err_comp_eqs:
mlx5_fw_tracer_cleanup(dev->tracer);
err_fw_tracer:
- mlx5_stop_eqs(dev);
+ mlx5_eq_table_destroy(dev);
-err_put_uars:
+err_eq_table:
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
mlx5_put_uars_page(dev, priv->uar);
-err_disable_msix:
- mlx5_free_irq_vectors(dev);
-
-err_cleanup_once:
+err_get_uars:
if (boot)
mlx5_cleanup_once(dev);
@@ -1294,9 +1069,6 @@ err_stop_poll:
goto out_err;
}
-err_pagealloc_stop:
- mlx5_pagealloc_stop(dev);
-
reclaim_boot_pages:
mlx5_reclaim_startup_pages(dev);
@@ -1340,21 +1112,20 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_accel_ipsec_cleanup(dev);
mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
- mlx5_irq_clear_affinity_hints(dev);
- free_comp_eqs(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
- mlx5_stop_eqs(dev);
+ mlx5_eq_table_destroy(dev);
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
mlx5_put_uars_page(dev, priv->uar);
- mlx5_free_irq_vectors(dev);
if (cleanup)
mlx5_cleanup_once(dev);
mlx5_stop_health_poll(dev, cleanup);
+
err = mlx5_cmd_teardown_hca(dev);
if (err) {
dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
goto out;
}
- mlx5_pagealloc_stop(dev);
mlx5_reclaim_startup_pages(dev);
mlx5_core_disable_hca(dev, 0);
mlx5_cmd_cleanup(dev);
@@ -1364,12 +1135,6 @@ out:
return err;
}
-struct mlx5_core_event_handler {
- void (*event)(struct mlx5_core_dev *dev,
- enum mlx5_dev_event event,
- void *data);
-};
-
static const struct devlink_ops mlx5_devlink_ops = {
#ifdef CONFIG_MLX5_ESWITCH
.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
@@ -1403,7 +1168,6 @@ static int init_one(struct pci_dev *pdev,
pci_set_drvdata(pdev, dev);
dev->pdev = pdev;
- dev->event = mlx5_core_event;
dev->profile = &profile[prof_sel];
INIT_LIST_HEAD(&priv->ctx_list);
@@ -1411,17 +1175,6 @@ static int init_one(struct pci_dev *pdev,
mutex_init(&dev->pci_status_mutex);
mutex_init(&dev->intf_state_mutex);
- INIT_LIST_HEAD(&priv->waiting_events_list);
- priv->is_accum_events = false;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- err = init_srcu_struct(&priv->pfault_srcu);
- if (err) {
- dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
- err);
- goto clean_dev;
- }
-#endif
mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
@@ -1430,7 +1183,7 @@ static int init_one(struct pci_dev *pdev,
err = mlx5_pci_init(dev, priv);
if (err) {
dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
- goto clean_srcu;
+ goto clean_dev;
}
err = mlx5_health_init(dev);
@@ -1439,12 +1192,14 @@ static int init_one(struct pci_dev *pdev,
goto close_pci;
}
- mlx5_pagealloc_init(dev);
+ err = mlx5_pagealloc_init(dev);
+ if (err)
+ goto err_pagealloc_init;
err = mlx5_load_one(dev, priv, true);
if (err) {
dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
- goto clean_health;
+ goto err_load_one;
}
request_module_nowait(MLX5_IB_MOD);
@@ -1458,16 +1213,13 @@ static int init_one(struct pci_dev *pdev,
clean_load:
mlx5_unload_one(dev, priv, true);
-clean_health:
+err_load_one:
mlx5_pagealloc_cleanup(dev);
+err_pagealloc_init:
mlx5_health_cleanup(dev);
close_pci:
mlx5_pci_close(dev, priv);
-clean_srcu:
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- cleanup_srcu_struct(&priv->pfault_srcu);
clean_dev:
-#endif
devlink_free(devlink);
return err;
@@ -1491,9 +1243,6 @@ static void remove_one(struct pci_dev *pdev)
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
mlx5_pci_close(dev, priv);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- cleanup_srcu_struct(&priv->pfault_srcu);
-#endif
devlink_free(devlink);
}
@@ -1637,7 +1386,6 @@ succeed:
* kexec. There is no need to cleanup the mlx5_core software
* contexts.
*/
- mlx5_irq_clear_affinity_hints(dev);
mlx5_core_eq_free_irqs(dev);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 0594d0961cb3..c68dcea5985b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -38,6 +38,7 @@
#include <linux/sched.h>
#include <linux/if_link.h>
#include <linux/firmware.h>
+#include <linux/ptp_clock_kernel.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/fs.h>
@@ -78,6 +79,11 @@ do { \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
+#define mlx5_core_warn_once(__dev, format, ...) \
+ dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
#define mlx5_core_info(__dev, format, ...) \
dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
@@ -97,12 +103,6 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
-
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
- unsigned long param);
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
- struct mlx5_pagefault *pfault);
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
void mlx5_disable_device(struct mlx5_core_dev *dev);
void mlx5_recover_device(struct mlx5_core_dev *dev);
@@ -122,30 +122,10 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
u32 element_id);
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
-
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name,
- enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
- u32 *out, int outlen);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
-/* This function should only be called after mlx5_cmd_force_teardown_hca */
-void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
-void mlx5_cq_tasklet_cb(unsigned long data);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
+ struct ptp_system_timestamp *sts);
+
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
@@ -159,6 +139,11 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove(struct mlx5_core_dev *dev);
+int mlx5_events_init(struct mlx5_core_dev *dev);
+void mlx5_events_cleanup(struct mlx5_core_dev *dev);
+void mlx5_events_start(struct mlx5_core_dev *dev);
+void mlx5_events_stop(struct mlx5_core_dev *dev);
+
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_attach_device(struct mlx5_core_dev *dev);
@@ -202,10 +187,8 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
MLX5_CAP_GEN(dev, lag_master);
}
-int mlx5_lag_allow(struct mlx5_core_dev *dev);
-int mlx5_lag_forbid(struct mlx5_core_dev *dev);
-
void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
+void mlx5_lag_update(struct mlx5_core_dev *dev);
enum {
MLX5_NIC_IFC_FULL = 0,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index e36d3e3675f9..a83b517b0714 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -37,6 +37,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
MLX5_PAGES_CANT_GIVE = 0,
@@ -433,15 +434,28 @@ static void pages_work_handler(struct work_struct *work)
kfree(req);
}
-void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
- s32 npages)
+static int req_pages_handler(struct notifier_block *nb,
+ unsigned long type, void *data)
{
struct mlx5_pages_req *req;
-
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+ struct mlx5_eqe *eqe;
+ u16 func_id;
+ s32 npages;
+
+ priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+ eqe = data;
+
+ func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+ npages = be32_to_cpu(eqe->data.req_pages.num_pages);
+ mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
+ func_id, npages);
req = kzalloc(sizeof(*req), GFP_ATOMIC);
if (!req) {
mlx5_core_warn(dev, "failed to allocate pages request\n");
- return;
+ return NOTIFY_DONE;
}
req->dev = dev;
@@ -449,6 +463,7 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
req->npages = npages;
INIT_WORK(&req->work, pages_work_handler);
queue_work(dev->priv.pg_wq, &req->work);
+ return NOTIFY_OK;
}
int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
@@ -524,29 +539,32 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
return 0;
}
-void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
{
dev->priv.page_root = RB_ROOT;
INIT_LIST_HEAD(&dev->priv.free_list);
+ dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
+ if (!dev->priv.pg_wq)
+ return -ENOMEM;
+
+ return 0;
}
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
{
- /* nothing */
+ destroy_workqueue(dev->priv.pg_wq);
}
-int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+void mlx5_pagealloc_start(struct mlx5_core_dev *dev)
{
- dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
- if (!dev->priv.pg_wq)
- return -ENOMEM;
-
- return 0;
+ MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST);
+ mlx5_eq_notifier_register(dev, &dev->priv.pg_nb);
}
void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
{
- destroy_workqueue(dev->priv.pg_wq);
+ mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb);
+ flush_workqueue(dev->priv.pg_wq);
}
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 31a9cbd85689..2b82f35f4c35 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -915,63 +915,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
*enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
}
-static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
- "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */
- "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */
- "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */
-};
-
-static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
- "Power budget exceeded",
- "Long Range for non MLNX cable",
- "Bus stuck(I2C or data shorted)",
- "No EEPROM/retry timeout",
- "Enforce part number list",
- "Unknown identifier",
- "High Temperature",
- "Bad or shorted cable/module",
- "Unknown status",
-};
-
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
-{
- enum port_module_event_status_type module_status;
- enum port_module_event_error_type error_type;
- struct mlx5_eqe_port_module *module_event_eqe;
- struct mlx5_priv *priv = &dev->priv;
- u8 module_num;
-
- module_event_eqe = &eqe->data.port_module;
- module_num = module_event_eqe->module;
- module_status = module_event_eqe->module_status &
- PORT_MODULE_EVENT_MODULE_STATUS_MASK;
- error_type = module_event_eqe->error_type &
- PORT_MODULE_EVENT_ERROR_TYPE_MASK;
-
- if (module_status < MLX5_MODULE_STATUS_ERROR) {
- priv->pme_stats.status_counters[module_status - 1]++;
- } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
- if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
- /* Unknown error type */
- error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
- priv->pme_stats.error_counters[error_type]++;
- }
-
- if (!printk_ratelimit())
- return;
-
- if (module_status < MLX5_MODULE_STATUS_ERROR)
- mlx5_core_info(dev,
- "Port module event: module %u, %s\n",
- module_num, mlx5_pme_status[module_status - 1]);
-
- else if (module_status == MLX5_MODULE_STATUS_ERROR)
- mlx5_core_info(dev,
- "Port module event[error]: module %u, %s, %s\n",
- module_num, mlx5_pme_status[module_status - 1],
- mlx5_pme_error[error_type]);
-}
-
int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
{
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 91b8139a388d..388f205a497f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -38,11 +38,11 @@
#include <linux/mlx5/transobj.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
-static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
- u32 rsn)
+static struct mlx5_core_rsc_common *
+mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
{
- struct mlx5_qp_table *table = &dev->priv.qp_table;
struct mlx5_core_rsc_common *common;
spin_lock(&table->lock);
@@ -53,11 +53,6 @@ static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
spin_unlock(&table->lock);
- if (!common) {
- mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",
- rsn);
- return NULL;
- }
return common;
}
@@ -120,19 +115,57 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
}
}
-void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
+static int rsc_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
{
- struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+ struct mlx5_core_rsc_common *common;
+ struct mlx5_qp_table *table;
+ struct mlx5_core_dev *dev;
struct mlx5_core_dct *dct;
+ u8 event_type = (u8)type;
struct mlx5_core_qp *qp;
+ struct mlx5_priv *priv;
+ struct mlx5_eqe *eqe;
+ u32 rsn;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ eqe = data;
+ rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
+ break;
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ eqe = data;
+ rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ table = container_of(nb, struct mlx5_qp_table, nb);
+ priv = container_of(table, struct mlx5_priv, qp_table);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn);
- if (!common)
- return;
+ common = mlx5_get_rsc(table, rsn);
+ if (!common) {
+ mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn);
+ return NOTIFY_OK;
+ }
if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
event_type, rsn);
- return;
+ goto out;
}
switch (common->res) {
@@ -150,8 +183,10 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
default:
mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
}
-
+out:
mlx5_core_put_rsc(common);
+
+ return NOTIFY_OK;
}
static int create_resource_common(struct mlx5_core_dev *dev,
@@ -487,10 +522,16 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev)
spin_lock_init(&table->lock);
INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
mlx5_qp_debugfs_init(dev);
+
+ table->nb.notifier_call = rsc_event_notifier;
+ mlx5_notifier_register(dev, &table->nb);
}
void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+ mlx5_notifier_unregister(dev, &table->nb);
mlx5_qp_debugfs_cleanup(dev);
}
@@ -670,3 +711,20 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+ int res_num,
+ enum mlx5_res_type res_type)
+{
+ u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+ return mlx5_get_rsc(table, rsn);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_hold);
+
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
+{
+ mlx5_core_put_rsc(res);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_put);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index a0674962f02c..6e178030d8fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -216,20 +216,10 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (!mlx5_core_is_pf(dev))
return -EPERM;
- if (num_vfs) {
- int ret;
-
- ret = mlx5_lag_forbid(dev);
- if (ret && (ret != -ENODEV))
- return ret;
- }
-
- if (num_vfs) {
+ if (num_vfs)
err = mlx5_sriov_enable(pdev, num_vfs);
- } else {
+ else
mlx5_sriov_disable(pdev);
- mlx5_lag_allow(dev);
- }
return err ? err : num_vfs;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
deleted file mode 100644
index 6a6fc9be01e6..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ /dev/null
@@ -1,716 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
-#include <linux/mlx5/srq.h>
-#include <rdma/ib_verbs.h>
-#include "mlx5_core.h"
-#include <linux/mlx5/transobj.h>
-
-void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
- struct mlx5_core_srq *srq;
-
- spin_lock(&table->lock);
-
- srq = radix_tree_lookup(&table->tree, srqn);
- if (srq)
- atomic_inc(&srq->refcount);
-
- spin_unlock(&table->lock);
-
- if (!srq) {
- mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
- return;
- }
-
- srq->event(srq, event_type);
-
- if (atomic_dec_and_test(&srq->refcount))
- complete(&srq->free);
-}
-
-static int get_pas_size(struct mlx5_srq_attr *in)
-{
- u32 log_page_size = in->log_page_size + 12;
- u32 log_srq_size = in->log_size;
- u32 log_rq_stride = in->wqe_shift;
- u32 page_offset = in->page_offset;
- u32 po_quanta = 1 << (log_page_size - 6);
- u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride);
- u32 page_size = 1 << log_page_size;
- u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
- u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size);
-
- return rq_num_pas * sizeof(u64);
-}
-
-static void set_wq(void *wq, struct mlx5_srq_attr *in)
-{
- MLX5_SET(wq, wq, wq_signature, !!(in->flags
- & MLX5_SRQ_FLAG_WQ_SIG));
- MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size);
- MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4);
- MLX5_SET(wq, wq, log_wq_sz, in->log_size);
- MLX5_SET(wq, wq, page_offset, in->page_offset);
- MLX5_SET(wq, wq, lwm, in->lwm);
- MLX5_SET(wq, wq, pd, in->pd);
- MLX5_SET64(wq, wq, dbr_addr, in->db_record);
-}
-
-static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
-{
- MLX5_SET(srqc, srqc, wq_signature, !!(in->flags
- & MLX5_SRQ_FLAG_WQ_SIG));
- MLX5_SET(srqc, srqc, log_page_size, in->log_page_size);
- MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift);
- MLX5_SET(srqc, srqc, log_srq_size, in->log_size);
- MLX5_SET(srqc, srqc, page_offset, in->page_offset);
- MLX5_SET(srqc, srqc, lwm, in->lwm);
- MLX5_SET(srqc, srqc, pd, in->pd);
- MLX5_SET64(srqc, srqc, dbr_addr, in->db_record);
- MLX5_SET(srqc, srqc, xrcd, in->xrcd);
- MLX5_SET(srqc, srqc, cqn, in->cqn);
-}
-
-static void get_wq(void *wq, struct mlx5_srq_attr *in)
-{
- if (MLX5_GET(wq, wq, wq_signature))
- in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
- in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz);
- in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4;
- in->log_size = MLX5_GET(wq, wq, log_wq_sz);
- in->page_offset = MLX5_GET(wq, wq, page_offset);
- in->lwm = MLX5_GET(wq, wq, lwm);
- in->pd = MLX5_GET(wq, wq, pd);
- in->db_record = MLX5_GET64(wq, wq, dbr_addr);
-}
-
-static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
-{
- if (MLX5_GET(srqc, srqc, wq_signature))
- in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
- in->log_page_size = MLX5_GET(srqc, srqc, log_page_size);
- in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride);
- in->log_size = MLX5_GET(srqc, srqc, log_srq_size);
- in->page_offset = MLX5_GET(srqc, srqc, page_offset);
- in->lwm = MLX5_GET(srqc, srqc, lwm);
- in->pd = MLX5_GET(srqc, srqc, pd);
- in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
-}
-
-struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
- struct mlx5_core_srq *srq;
-
- spin_lock(&table->lock);
-
- srq = radix_tree_lookup(&table->tree, srqn);
- if (srq)
- atomic_inc(&srq->refcount);
-
- spin_unlock(&table->lock);
-
- return srq;
-}
-EXPORT_SYMBOL(mlx5_core_get_srq);
-
-static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
- void *create_in;
- void *srqc;
- void *pas;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- MLX5_SET(create_srq_in, create_in, uid, in->uid);
- srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
- pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
-
- set_srqc(srqc, in);
- memcpy(pas, in->pas, pas_size);
-
- MLX5_SET(create_srq_in, create_in, opcode,
- MLX5_CMD_OP_CREATE_SRQ);
-
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
- sizeof(create_out));
- kvfree(create_in);
- if (!err) {
- srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
- srq->uid = in->uid;
- }
-
- return err;
-}
-
-static int destroy_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
- u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
-
- MLX5_SET(destroy_srq_in, srq_in, opcode,
- MLX5_CMD_OP_DESTROY_SRQ);
- MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
- MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, sizeof(srq_out));
-}
-
-static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- u16 lwm, int is_srq)
-{
- u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
- u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
-
- MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ);
- MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
- MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
- MLX5_SET(arm_rq_in, srq_in, lwm, lwm);
- MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, sizeof(srq_out));
-}
-
-static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
- u32 *srq_out;
- void *srqc;
- int err;
-
- srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
- if (!srq_out)
- return -ENOMEM;
-
- MLX5_SET(query_srq_in, srq_in, opcode,
- MLX5_CMD_OP_QUERY_SRQ);
- MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
- err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
- if (err)
- goto out;
-
- srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
- get_srqc(srqc, out);
- if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
- out->flags |= MLX5_SRQ_FLAG_ERR;
-out:
- kvfree(srq_out);
- return err;
-}
-
-static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
- void *create_in;
- void *xrc_srqc;
- void *pas;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
- xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
- xrc_srq_context_entry);
- pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
-
- set_srqc(xrc_srqc, in);
- MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
- memcpy(pas, in->pas, pas_size);
- MLX5_SET(create_xrc_srq_in, create_in, opcode,
- MLX5_CMD_OP_CREATE_XRC_SRQ);
-
- memset(create_out, 0, sizeof(create_out));
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
- sizeof(create_out));
- if (err)
- goto out;
-
- srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
- srq->uid = in->uid;
-out:
- kvfree(create_in);
- return err;
-}
-
-static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
- u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
- MLX5_CMD_OP_DESTROY_XRC_SRQ);
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
- xrcsrq_out, sizeof(xrcsrq_out));
-}
-
-static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq, u16 lwm)
-{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
- u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
- xrcsrq_out, sizeof(xrcsrq_out));
-}
-
-static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
- u32 *xrcsrq_out;
- void *xrc_srqc;
- int err;
-
- xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
- if (!xrcsrq_out)
- return -ENOMEM;
- memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
-
- MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
- MLX5_CMD_OP_QUERY_XRC_SRQ);
- MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
-
- err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
- MLX5_ST_SZ_BYTES(query_xrc_srq_out));
- if (err)
- goto out;
-
- xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
- xrc_srq_context_entry);
- get_srqc(xrc_srqc, out);
- if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
- out->flags |= MLX5_SRQ_FLAG_ERR;
-
-out:
- kvfree(xrcsrq_out);
- return err;
-}
-
-static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- void *create_in;
- void *rmpc;
- void *wq;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
- wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
- MLX5_SET(create_rmp_in, create_in, uid, in->uid);
- set_wq(wq, in);
- memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
-
- err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
- if (!err)
- srq->uid = in->uid;
-
- kvfree(create_in);
- return err;
-}
-
-static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
- u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
-
- MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
- MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
- MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int arm_rmp_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- u16 lwm)
-{
- void *in;
- void *rmpc;
- void *wq;
- void *bitmask;
- int err;
-
- in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
- bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
- wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
- MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
- MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn);
- MLX5_SET(modify_rmp_in, in, uid, srq->uid);
- MLX5_SET(wq, wq, lwm, lwm);
- MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
-
- err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
- kvfree(in);
- return err;
-}
-
-static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 *rmp_out;
- void *rmpc;
- int err;
-
- rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL);
- if (!rmp_out)
- return -ENOMEM;
-
- err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out);
- if (err)
- goto out;
-
- rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
- get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
- if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
- out->flags |= MLX5_SRQ_FLAG_ERR;
-
-out:
- kvfree(rmp_out);
- return err;
-}
-
-static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
- void *create_in;
- void *xrqc;
- void *wq;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
- wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
-
- set_wq(wq, in);
- memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size);
-
- if (in->type == IB_SRQT_TM) {
- MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
- if (in->flags & MLX5_SRQ_FLAG_RNDV)
- MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV);
- MLX5_SET(xrqc, xrqc,
- tag_matching_topology_context.log_matching_list_sz,
- in->tm_log_list_size);
- }
- MLX5_SET(xrqc, xrqc, user_index, in->user_index);
- MLX5_SET(xrqc, xrqc, cqn, in->cqn);
- MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
- MLX5_SET(create_xrq_in, create_in, uid, in->uid);
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
- sizeof(create_out));
- kvfree(create_in);
- if (!err) {
- srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
- srq->uid = in->uid;
- }
-
- return err;
-}
-
-static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
-
- MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
- MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
- MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int arm_xrq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- u16 lwm)
-{
- u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
-
- MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
- MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
- MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
- MLX5_SET(arm_rq_in, in, lwm, lwm);
- MLX5_SET(arm_rq_in, in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
- u32 *xrq_out;
- int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
- void *xrqc;
- int err;
-
- xrq_out = kvzalloc(outlen, GFP_KERNEL);
- if (!xrq_out)
- return -ENOMEM;
-
- MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
- MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
-
- err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen);
- if (err)
- goto out;
-
- xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context);
- get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out);
- if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD)
- out->flags |= MLX5_SRQ_FLAG_ERR;
- out->tm_next_tag =
- MLX5_GET(xrqc, xrqc,
- tag_matching_topology_context.append_next_index);
- out->tm_hw_phase_cnt =
- MLX5_GET(xrqc, xrqc,
- tag_matching_topology_context.hw_phase_cnt);
- out->tm_sw_phase_cnt =
- MLX5_GET(xrqc, xrqc,
- tag_matching_topology_context.sw_phase_cnt);
-
-out:
- kvfree(xrq_out);
- return err;
-}
-
-static int create_srq_split(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- if (!dev->issi)
- return create_srq_cmd(dev, srq, in);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return create_xrc_srq_cmd(dev, srq, in);
- case MLX5_RES_XRQ:
- return create_xrq_cmd(dev, srq, in);
- default:
- return create_rmp_cmd(dev, srq, in);
- }
-}
-
-static int destroy_srq_split(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- if (!dev->issi)
- return destroy_srq_cmd(dev, srq);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return destroy_xrc_srq_cmd(dev, srq);
- case MLX5_RES_XRQ:
- return destroy_xrq_cmd(dev, srq);
- default:
- return destroy_rmp_cmd(dev, srq);
- }
-}
-
-int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- int err;
- struct mlx5_srq_table *table = &dev->priv.srq_table;
-
- switch (in->type) {
- case IB_SRQT_XRC:
- srq->common.res = MLX5_RES_XSRQ;
- break;
- case IB_SRQT_TM:
- srq->common.res = MLX5_RES_XRQ;
- break;
- default:
- srq->common.res = MLX5_RES_SRQ;
- }
-
- err = create_srq_split(dev, srq, in);
- if (err)
- return err;
-
- atomic_set(&srq->refcount, 1);
- init_completion(&srq->free);
-
- spin_lock_irq(&table->lock);
- err = radix_tree_insert(&table->tree, srq->srqn, srq);
- spin_unlock_irq(&table->lock);
- if (err) {
- mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
- goto err_destroy_srq_split;
- }
-
- return 0;
-
-err_destroy_srq_split:
- destroy_srq_split(dev, srq);
-
- return err;
-}
-EXPORT_SYMBOL(mlx5_core_create_srq);
-
-int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
- struct mlx5_core_srq *tmp;
- int err;
-
- spin_lock_irq(&table->lock);
- tmp = radix_tree_delete(&table->tree, srq->srqn);
- spin_unlock_irq(&table->lock);
- if (!tmp) {
- mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
- return -EINVAL;
- }
- if (tmp != srq) {
- mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
- return -EINVAL;
- }
-
- err = destroy_srq_split(dev, srq);
- if (err)
- return err;
-
- if (atomic_dec_and_test(&srq->refcount))
- complete(&srq->free);
- wait_for_completion(&srq->free);
-
- return 0;
-}
-EXPORT_SYMBOL(mlx5_core_destroy_srq);
-
-int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- if (!dev->issi)
- return query_srq_cmd(dev, srq, out);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return query_xrc_srq_cmd(dev, srq, out);
- case MLX5_RES_XRQ:
- return query_xrq_cmd(dev, srq, out);
- default:
- return query_rmp_cmd(dev, srq, out);
- }
-}
-EXPORT_SYMBOL(mlx5_core_query_srq);
-
-int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- u16 lwm, int is_srq)
-{
- if (!dev->issi)
- return arm_srq_cmd(dev, srq, lwm, is_srq);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return arm_xrc_srq_cmd(dev, srq, lwm);
- case MLX5_RES_XRQ:
- return arm_xrq_cmd(dev, srq, lwm);
- default:
- return arm_rmp_cmd(dev, srq, lwm);
- }
-}
-EXPORT_SYMBOL(mlx5_core_arm_srq);
-
-void mlx5_init_srq_table(struct mlx5_core_dev *dev)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
-
- memset(table, 0, sizeof(*table));
- spin_lock_init(&table->lock);
- INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
-}
-
-void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
-{
- /* nothing */
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index a1ee9a8a769e..c4d4b76096dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -258,115 +258,6 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
}
EXPORT_SYMBOL(mlx5_core_destroy_tis);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rmpn)
-{
- u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0};
- int err;
-
- MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- if (!err)
- *rmpn = MLX5_GET(create_rmp_out, out, rmpn);
-
- return err;
-}
-
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen)
-{
- u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0};
-
- MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
- return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-}
-
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0};
-
- MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
- MLX5_SET(destroy_rmp_in, in, rmpn, rmpn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out,
- sizeof(out));
-}
-
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out)
-{
- u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0};
- int outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
-
- MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP);
- MLX5_SET(query_rmp_in, in, rmpn, rmpn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
-{
- void *in;
- void *rmpc;
- void *wq;
- void *bitmask;
- int err;
-
- in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
- bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
- wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
- MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
- MLX5_SET(modify_rmp_in, in, rmpn, rmpn);
- MLX5_SET(wq, wq, lwm, lwm);
- MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
-
- err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
- kvfree(in);
-
- return err;
-}
-
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *xsrqn)
-{
- u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0};
- int err;
-
- MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- if (!err)
- *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
-
- return err;
-}
-
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
- MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ);
- MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
-{
- u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
- MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn);
- MLX5_SET(arm_xrc_srq_in, in, lwm, lwm);
- MLX5_SET(arm_xrc_srq_in, in, op_mod,
- MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rqtn)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index cfbea66b4879..9b150ce9d315 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1204,9 +1204,19 @@ EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
{
- if (!mdev->sys_image_guid)
- mlx5_query_nic_vport_system_image_guid(mdev, &mdev->sys_image_guid);
+ int port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ u64 tmp = 0;
- return mdev->sys_image_guid;
+ if (mdev->sys_image_guid)
+ return mdev->sys_image_guid;
+
+ if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH)
+ mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+ else
+ mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
+
+ mdev->sys_image_guid = tmp;
+
+ return tmp;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 2dcbf1ebfd6a..953cc8efba69 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -155,7 +155,8 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *cqc, struct mlx5_cqwq *wq,
struct mlx5_wq_ctrl *wq_ctrl)
{
- u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6;
+ /* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */
+ u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7;
u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size);
int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index b1293d153a58..ea934a48c90a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -177,9 +177,14 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
return mlx5_cqwq_ctr2ix(wq, wq->cc);
}
-static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
{
- return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+ struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+
+ /* For 128B CQEs the data is in the last 64B */
+ cqe += wq->fbc.log_stride == 7;
+
+ return cqe;
}
static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr)