diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
151 files changed, 20275 insertions, 5434 deletions
diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig index d54701047401..872548cd9431 100644 --- a/drivers/net/ethernet/mellanox/Kconfig +++ b/drivers/net/ethernet/mellanox/Kconfig @@ -5,9 +5,10 @@ config NET_VENDOR_MELLANOX bool "Mellanox devices" default y - depends on PCI + depends on PCI || I2C ---help--- - If you have a network (Ethernet) card belonging to this class, say Y. + If you have a network (Ethernet or RDMA) device belonging to this + class, say Y. Note that the answer to this question doesn't directly affect the kernel: saying N will just cause the configurator to skip all @@ -19,5 +20,6 @@ if NET_VENDOR_MELLANOX source "drivers/net/ethernet/mellanox/mlx4/Kconfig" source "drivers/net/ethernet/mellanox/mlx5/core/Kconfig" source "drivers/net/ethernet/mellanox/mlxsw/Kconfig" +source "drivers/net/ethernet/mellanox/mlxfw/Kconfig" endif # NET_VENDOR_MELLANOX diff --git a/drivers/net/ethernet/mellanox/Makefile b/drivers/net/ethernet/mellanox/Makefile index 2e2a5ec509ac..016aa263bc04 100644 --- a/drivers/net/ethernet/mellanox/Makefile +++ b/drivers/net/ethernet/mellanox/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_MLX4_CORE) += mlx4/ obj-$(CONFIG_MLX5_CORE) += mlx5/core/ obj-$(CONFIG_MLXSW_CORE) += mlxsw/ +obj-$(CONFIG_MLXFW) += mlxfw/ diff --git a/drivers/net/ethernet/mellanox/mlx4/Makefile b/drivers/net/ethernet/mellanox/mlx4/Makefile index c82217e0d22d..16b10d01fcf4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Makefile +++ b/drivers/net/ethernet/mellanox/mlx4/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MLX4_CORE) += mlx4_core.o mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o fw_qos.o icm.o intf.o \ diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 249a4584401a..6dabd983e7e0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -186,7 +186,7 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, bitmap->effective_len = bitmap->avail; spin_lock_init(&bitmap->lock); bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) * - sizeof (long), GFP_KERNEL); + sizeof(long), GFP_KERNEL); if (!bitmap->table) return -ENOMEM; @@ -283,7 +283,7 @@ int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc, } /* Should be called under a lock */ -static int __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry) +static void __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry) { struct mlx4_zone_allocator *zone_alloc = entry->allocator; @@ -315,8 +315,6 @@ static int __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry) } zone_alloc->mask = mask; } - - return 0; } void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc) @@ -457,7 +455,7 @@ struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32 int mlx4_zone_remove_one(struct mlx4_zone_allocator *zones, u32 uid) { struct mlx4_zone_entry *zone; - int res; + int res = 0; spin_lock(&zones->lock); @@ -468,7 +466,7 @@ int mlx4_zone_remove_one(struct mlx4_zone_allocator *zones, u32 uid) goto out; } - res = __mlx4_zone_remove_one_entry(zone); + __mlx4_zone_remove_one_entry(zone); out: spin_unlock(&zones->lock); @@ -578,7 +576,7 @@ out: } static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, - struct mlx4_buf *buf, gfp_t gfp) + struct mlx4_buf *buf) { dma_addr_t t; @@ -587,7 +585,7 @@ static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, buf->page_shift = get_order(size) + PAGE_SHIFT; buf->direct.buf = dma_zalloc_coherent(&dev->persist->pdev->dev, - size, &t, gfp); + size, &t, GFP_KERNEL); if (!buf->direct.buf) return -ENOMEM; @@ -607,10 +605,10 @@ static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, * multiple pages, so we don't require too much contiguous memory. */ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf, gfp_t gfp) + struct mlx4_buf *buf) { if (size <= max_direct) { - return mlx4_buf_direct_alloc(dev, size, buf, gfp); + return mlx4_buf_direct_alloc(dev, size, buf); } else { dma_addr_t t; int i; @@ -620,14 +618,14 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - gfp); + GFP_KERNEL); if (!buf->page_list) return -ENOMEM; for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = dma_zalloc_coherent(&dev->persist->pdev->dev, - PAGE_SIZE, &t, gfp); + PAGE_SIZE, &t, GFP_KERNEL); if (!buf->page_list[i].buf) goto err_free; @@ -663,12 +661,11 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) } EXPORT_SYMBOL_GPL(mlx4_buf_free); -static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device, - gfp_t gfp) +static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) { struct mlx4_db_pgdir *pgdir; - pgdir = kzalloc(sizeof *pgdir, gfp); + pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); if (!pgdir) return NULL; @@ -676,7 +673,7 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device, pgdir->bits[0] = pgdir->order0; pgdir->bits[1] = pgdir->order1; pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, - &pgdir->db_dma, gfp); + &pgdir->db_dma, GFP_KERNEL); if (!pgdir->db_page) { kfree(pgdir); return NULL; @@ -716,7 +713,7 @@ found: return 0; } -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp) +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_db_pgdir *pgdir; @@ -728,7 +725,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) goto out; - pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp); + pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev); if (!pgdir) { ret = -ENOMEM; goto out; @@ -780,13 +777,13 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, { int err; - err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL); + err = mlx4_db_alloc(dev, &wqres->db, 1); if (err) return err; *wqres->db.db = 0; - err = mlx4_buf_direct_alloc(dev, size, &wqres->buf, GFP_KERNEL); + err = mlx4_buf_direct_alloc(dev, size, &wqres->buf); if (err) goto err_db; @@ -795,7 +792,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL); + err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf); if (err) goto err_mtt; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index c1af47e45d3f..6a9086dc1e92 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1958,19 +1958,19 @@ static void mlx4_allocate_port_vpps(struct mlx4_dev *dev, int port) int i; int err; int num_vfs; - u16 availible_vpp; + u16 available_vpp; u8 vpp_param[MLX4_NUM_UP]; struct mlx4_qos_manager *port_qos; struct mlx4_priv *priv = mlx4_priv(dev); - err = mlx4_ALLOCATE_VPP_get(dev, port, &availible_vpp, vpp_param); + err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param); if (err) { - mlx4_info(dev, "Failed query availible VPPs\n"); + mlx4_info(dev, "Failed query available VPPs\n"); return; } port_qos = &priv->mfunc.master.qos_ctl[port]; - num_vfs = (availible_vpp / + num_vfs = (available_vpp / bitmap_weight(port_qos->priority_bm, MLX4_NUM_UP)); for (i = 0; i < MLX4_NUM_UP; i++) { @@ -1985,14 +1985,14 @@ static void mlx4_allocate_port_vpps(struct mlx4_dev *dev, int port) } /* Query actual allocated VPP, just to make sure */ - err = mlx4_ALLOCATE_VPP_get(dev, port, &availible_vpp, vpp_param); + err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param); if (err) { - mlx4_info(dev, "Failed query availible VPPs\n"); + mlx4_info(dev, "Failed query available VPPs\n"); return; } port_qos->num_of_qos_vfs = num_vfs; - mlx4_dbg(dev, "Port %d Availible VPPs %d\n", port, availible_vpp); + mlx4_dbg(dev, "Port %d Available VPPs %d\n", port, available_vpp); for (i = 0; i < MLX4_NUM_UP; i++) mlx4_dbg(dev, "Port %d UP %d Allocated %d VPPs\n", port, i, @@ -2535,8 +2535,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (!priv->cmd.pool) { - priv->cmd.pool = pci_pool_create("mlx4_cmd", - dev->persist->pdev, + priv->cmd.pool = dma_pool_create("mlx4_cmd", + &dev->persist->pdev->dev, MLX4_MAILBOX_SIZE, MLX4_MAILBOX_SIZE, 0); if (!priv->cmd.pool) @@ -2607,7 +2607,7 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask) struct mlx4_priv *priv = mlx4_priv(dev); if (priv->cmd.pool && (cleanup_mask & MLX4_CMD_CLEANUP_POOL)) { - pci_pool_destroy(priv->cmd.pool); + dma_pool_destroy(priv->cmd.pool); priv->cmd.pool = NULL; } @@ -2637,7 +2637,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) int err = 0; priv->cmd.context = kmalloc(priv->cmd.max_cmds * - sizeof (struct mlx4_cmd_context), + sizeof(struct mlx4_cmd_context), GFP_KERNEL); if (!priv->cmd.context) return -ENOMEM; @@ -2695,11 +2695,11 @@ struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) { struct mlx4_cmd_mailbox *mailbox; - mailbox = kmalloc(sizeof *mailbox, GFP_KERNEL); + mailbox = kmalloc(sizeof(*mailbox), GFP_KERNEL); if (!mailbox) return ERR_PTR(-ENOMEM); - mailbox->buf = pci_pool_zalloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL, + mailbox->buf = dma_pool_zalloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL, &mailbox->dma); if (!mailbox->buf) { kfree(mailbox); @@ -2716,7 +2716,7 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, if (!mailbox) return; - pci_pool_free(mlx4_priv(dev)->cmd.pool, mailbox->buf, mailbox->dma); + dma_pool_free(mlx4_priv(dev)->cmd.pool, mailbox->buf, mailbox->dma); kfree(mailbox); } EXPORT_SYMBOL_GPL(mlx4_free_cmd_mailbox); @@ -2891,7 +2891,7 @@ static int mlx4_set_vport_qos(struct mlx4_priv *priv, int slave, int port, memset(vpp_qos, 0, sizeof(struct mlx4_vport_qos_param) * MLX4_NUM_UP); if (slave > port_qos->num_of_qos_vfs) { - mlx4_info(dev, "No availible VPP resources for this VF\n"); + mlx4_info(dev, "No available VPP resources for this VF\n"); return -EINVAL; } @@ -3280,7 +3280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) mlx4_dbg(dev, - "updating vf %d port %d no link state HW enforcment\n", + "updating vf %d port %d no link state HW enforcement\n", vf, port); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index fa6d2354a0e9..72eb50cd5ecd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -224,11 +224,11 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) if (*cqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL); + err = mlx4_table_get(dev, &cq_table->table, *cqn); if (err) goto err_out; - err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL); + err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn); if (err) goto err_put; return 0; @@ -241,13 +241,14 @@ err_out: return err; } -static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) +static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn, u8 usage) { + u32 in_modifier = RES_CQ | (((u32)usage & 3) << 30); u64 out_param; int err; if (mlx4_is_mfunc(dev)) { - err = mlx4_cmd_imm(dev, 0, &out_param, RES_CQ, + err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier, RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) @@ -303,7 +304,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq->vector = vector; - err = mlx4_cq_alloc_icm(dev, &cq->cqn); + err = mlx4_cq_alloc_icm(dev, &cq->cqn, cq->usage); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 09dd3776db76..1e487acb4667 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -140,22 +140,32 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, (cq->type == RX && priv->hwtstamp_config.rx_filter)) timestamp_en = 1; + cq->mcq.usage = MLX4_RES_USAGE_DRIVER; err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, cq->vector, 0, timestamp_en); if (err) goto free_eq; - cq->mcq.comp = cq->type != RX ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; - if (cq->type != RX) + switch (cq->type) { + case TX: + cq->mcq.comp = mlx4_en_tx_irq; netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, NAPI_POLL_WEIGHT); - else + napi_enable(&cq->napi); + break; + case RX: + cq->mcq.comp = mlx4_en_rx_irq; netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); - - napi_enable(&cq->napi); + napi_enable(&cq->napi); + break; + case TX_XDP: + /* nothing regarding napi, it's shared with rx ring */ + cq->xdp_busy = false; + break; + } return 0; @@ -184,8 +194,10 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { - napi_disable(&cq->napi); - netif_napi_del(&cq->napi); + if (cq->type != TX_XDP) { + napi_disable(&cq->napi); + netif_napi_del(&cq->napi); + } mlx4_cq_free(priv->mdev->dev, &cq->mcq); } @@ -197,12 +209,10 @@ int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) cq->moder_cnt, cq->moder_time); } -int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT, priv->mdev->uar_map, &priv->mdev->uar_lock); - - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index 1dae8e40fb25..5f41dc92aa68 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -238,7 +238,7 @@ static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state) priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; } - if (mlx4_en_setup_tc(dev, num_tcs)) + if (mlx4_en_alloc_tx_queue_per_tc(dev, num_tcs)) return 1; return 0; @@ -303,7 +303,7 @@ static int mlx4_en_ets_validate(struct mlx4_en_priv *priv, struct ieee_ets *ets) int has_ets_tc = 0; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->prio_tc[i] >= MLX4_EN_NUM_UP) { + if (ets->prio_tc[i] >= MLX4_EN_NUM_UP_HIGH) { en_err(priv, "Bad priority in UP <=> TC mapping. TC: %d, UP: %d\n", i, ets->prio_tc[i]); return -EINVAL; @@ -472,7 +472,7 @@ static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode) goto err; if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc)) goto err; - if (mlx4_en_setup_tc(dev, 0)) + if (mlx4_en_alloc_tx_queue_per_tc(dev, 0)) goto err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index ffbcb27c05e5..3d4e4a5d00d1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -89,7 +89,7 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) struct mlx4_en_dev *mdev = priv->mdev; strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); - strlcpy(drvinfo->version, DRV_VERSION " (" DRV_RELDATE ")", + strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%d", @@ -223,6 +223,7 @@ static void mlx4_en_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct mlx4_en_priv *priv = netdev_priv(netdev); + struct mlx4_caps *caps = &priv->mdev->dev->caps; int err = 0; u64 config = 0; u64 mask; @@ -235,24 +236,24 @@ static void mlx4_en_get_wol(struct net_device *netdev, mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 : MLX4_DEV_CAP_FLAG_WOL_PORT2; - if (!(priv->mdev->dev->caps.flags & mask)) { + if (!(caps->flags & mask)) { wol->supported = 0; wol->wolopts = 0; return; } + if (caps->wol_port[priv->port]) + wol->supported = WAKE_MAGIC; + else + wol->supported = 0; + err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); if (err) { en_err(priv, "Failed to get WoL information\n"); return; } - if (config & MLX4_EN_WOL_MAGIC) - wol->supported = WAKE_MAGIC; - else - wol->supported = 0; - - if (config & MLX4_EN_WOL_ENABLED) + if ((config & MLX4_EN_WOL_ENABLED) && (config & MLX4_EN_WOL_MAGIC)) wol->wolopts = WAKE_MAGIC; else wol->wolopts = 0; @@ -1750,7 +1751,8 @@ static void mlx4_en_get_channels(struct net_device *dev, channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP; channel->rx_count = priv->rx_ring_num; - channel->tx_count = priv->tx_ring_num[TX] / MLX4_EN_NUM_UP; + channel->tx_count = priv->tx_ring_num[TX] / + priv->prof->num_up; } static int mlx4_en_set_channels(struct net_device *dev, @@ -1763,6 +1765,7 @@ static int mlx4_en_set_channels(struct net_device *dev, int port_up = 0; int xdp_count; int err = 0; + u8 up; if (!channel->tx_count || !channel->rx_count) return -EINVAL; @@ -1773,18 +1776,19 @@ static int mlx4_en_set_channels(struct net_device *dev, mutex_lock(&mdev->state_lock); xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0; - if (channel->tx_count * MLX4_EN_NUM_UP + xdp_count > MAX_TX_RINGS) { + if (channel->tx_count * priv->prof->num_up + xdp_count > + MAX_TX_RINGS) { err = -EINVAL; en_err(priv, "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n", - channel->tx_count * MLX4_EN_NUM_UP + xdp_count, + channel->tx_count * priv->prof->num_up + xdp_count, MAX_TX_RINGS); goto out; } memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); new_prof.num_tx_rings_p_up = channel->tx_count; - new_prof.tx_ring_num[TX] = channel->tx_count * MLX4_EN_NUM_UP; + new_prof.tx_ring_num[TX] = channel->tx_count * priv->prof->num_up; new_prof.tx_ring_num[TX_XDP] = xdp_count; new_prof.rx_ring_num = channel->rx_count; @@ -1799,11 +1803,11 @@ static int mlx4_en_set_channels(struct net_device *dev, mlx4_en_safe_replace_resources(priv, tmp); - netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); - if (netdev_get_num_tc(dev)) - mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP); + up = (priv->prof->num_up == MLX4_EN_NUM_UP_LOW) ? + 0 : priv->prof->num_up; + mlx4_en_setup_tc(dev, up); en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]); en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 36a7a54bbb82..686e18de9a97 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -46,11 +46,11 @@ MODULE_AUTHOR("Liran Liss, Yevgeny Petrilin"); MODULE_DESCRIPTION("Mellanox ConnectX HCA Ethernet driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION " ("DRV_RELDATE")"); +MODULE_VERSION(DRV_VERSION); static const char mlx4_en_version[] = DRV_NAME ": Mellanox ConnectX HCA Ethernet driver v" - DRV_VERSION " (" DRV_RELDATE ")\n"; + DRV_VERSION "\n"; #define MLX4_EN_PARM_INT(X, def_val, desc) \ static unsigned int X = def_val;\ @@ -125,9 +125,9 @@ void mlx4_en_update_loopback_state(struct net_device *dev, priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK; mutex_lock(&priv->mdev->state_lock); - if (priv->mdev->dev->caps.flags2 & - MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB && - priv->rss_map.indir_qp.qpn) { + if ((priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB) && + priv->rss_map.indir_qp && priv->rss_map.indir_qp->qpn) { int i; int err = 0; int loopback = !!(features & NETIF_F_LOOPBACK); @@ -147,7 +147,7 @@ void mlx4_en_update_loopback_state(struct net_device *dev, mutex_unlock(&priv->mdev->state_lock); } -static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) +static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) { struct mlx4_en_profile *params = &mdev->profile; int i; @@ -169,13 +169,13 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].tx_ppp = pfctx; params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; + params->prof[i].num_up = MLX4_EN_NUM_UP_LOW; + params->prof[i].num_tx_rings_p_up = params->num_tx_rings_p_up; params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up * - MLX4_EN_NUM_UP; + params->prof[i].num_up; params->prof[i].rss_rings = 0; params->prof[i].inline_thold = inline_thold; } - - return 0; } static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port) @@ -307,10 +307,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) } /* Build device profile according to supplied module parameters */ - if (mlx4_en_get_profile(mdev)) { - mlx4_err(mdev, "Bad module parameters, aborting\n"); - goto err_mr; - } + mlx4_en_get_profile(mdev); /* Configure which ports to start according to module parameters */ mdev->port_cnt = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 94fab20ef146..9c218f1cfc6c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -60,11 +60,11 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) int i; unsigned int offset = 0; - if (up && up != MLX4_EN_NUM_UP) + if (up && up != MLX4_EN_NUM_UP_HIGH) return -EINVAL; netdev_set_num_tc(dev, up); - + netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); /* Partition Tx queues evenly amongst UP's */ for (i = 0; i < up; i++) { netdev_set_tc_queue(dev, i, priv->num_tx_rings_p_up, offset); @@ -86,15 +86,64 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } -static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, __be16 proto, - struct tc_to_netdev *tc) +int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc) { - if (tc->type != TC_SETUP_MQPRIO) + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; + int port_up = 0; + int err = 0; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + mutex_lock(&mdev->state_lock); + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + new_prof.num_up = (tc == 0) ? MLX4_EN_NUM_UP_LOW : + MLX4_EN_NUM_UP_HIGH; + new_prof.tx_ring_num[TX] = new_prof.num_tx_rings_p_up * + new_prof.num_up; + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); + if (err) + goto out; + + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + mlx4_en_safe_replace_resources(priv, tmp); + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) { + en_err(priv, "Failed starting port for setup TC\n"); + goto out; + } + } + + err = mlx4_en_setup_tc(dev, tc); +out: + mutex_unlock(&mdev->state_lock); + kfree(tmp); + return err; +} + +static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct tc_mqprio_qopt *mqprio = type_data; + + if (type != TC_SETUP_MQPRIO) + return -EOPNOTSUPP; + + if (mqprio->num_tc && mqprio->num_tc != MLX4_EN_NUM_UP_HIGH) return -EINVAL; - tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; - return mlx4_en_setup_tc(dev, tc->mqprio->num_tc); + return mlx4_en_alloc_tx_queue_per_tc(dev, mqprio->num_tc); } #ifdef CONFIG_RFS_ACCEL @@ -595,13 +644,16 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) return err; } + en_info(priv, "Steering Mode %d\n", dev->caps.steering_mode); + if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { int base_qpn = mlx4_get_base_qpn(dev, priv->port); *qpn = base_qpn + index; return 0; } - err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP); + err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP, + MLX4_RES_USAGE_DRIVER); en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); if (err) { en_err(priv, "Failed to reserve qp for mac registration\n"); @@ -681,6 +733,21 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, return __mlx4_replace_mac(dev, priv->port, qpn, new_mac_u64); } +static void mlx4_en_update_user_mac(struct mlx4_en_priv *priv, + unsigned char new_mac[ETH_ALEN + 2]) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_USER_MAC_EN)) + return; + + err = mlx4_SET_PORT_user_mac(mdev->dev, priv->port, new_mac); + if (err) + en_err(priv, "Failed to pass user MAC(%pM) to Firmware for port %d, with error %d\n", + new_mac, priv->port, err); +} + static int mlx4_en_do_set_mac(struct mlx4_en_priv *priv, unsigned char new_mac[ETH_ALEN + 2]) { @@ -715,8 +782,12 @@ static int mlx4_en_set_mac(struct net_device *dev, void *addr) mutex_lock(&mdev->state_lock); memcpy(new_mac, saddr->sa_data, ETH_ALEN); err = mlx4_en_do_set_mac(priv, new_mac); - if (!err) - memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN); + if (err) + goto out; + + memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN); + mlx4_en_update_user_mac(priv, new_mac); +out: mutex_unlock(&mdev->state_lock); return err; @@ -1009,7 +1080,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, memcpy(&mc_list[10], mclist->addr, ETH_ALEN); mc_list[5] = priv->port; err = mlx4_multicast_detach(mdev->dev, - &priv->rss_map.indir_qp, + priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, mclist->reg_id); @@ -1031,7 +1102,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, /* needed for B0 steering support */ mc_list[5] = priv->port; err = mlx4_multicast_attach(mdev->dev, - &priv->rss_map.indir_qp, + priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, @@ -1349,7 +1420,7 @@ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) priv->rx_usecs = MLX4_EN_RX_COAL_TIME; priv->tx_frames = MLX4_EN_TX_COAL_PKTS; priv->tx_usecs = MLX4_EN_TX_COAL_TIME; - en_dbg(INTR, priv, "Default coalesing params for mtu:%d - rx_frames:%d rx_usecs:%d\n", + en_dbg(INTR, priv, "Default coalescing params for mtu:%d - rx_frames:%d rx_usecs:%d\n", priv->dev->mtu, priv->rx_frames, priv->rx_usecs); /* Setup cq moderation params */ @@ -1676,13 +1747,15 @@ int mlx4_en_start_port(struct net_device *dev) if (t != TX_XDP) { tx_ring->tx_queue = netdev_get_tx_queue(dev, i); tx_ring->recycle_ring = NULL; + + /* Arm CQ for TX completions */ + mlx4_en_arm_cq(priv, cq); + } else { mlx4_en_init_recycle_ring(priv, i); + /* XDP TX CQ should never be armed */ } - /* Arm CQ for TX completions */ - mlx4_en_arm_cq(priv, cq); - /* Set initial ownership of all Tx TXBBs to SW (1) */ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) *((u32 *)(tx_ring->buf + j)) = 0xffffffff; @@ -1741,7 +1814,7 @@ int mlx4_en_start_port(struct net_device *dev) /* Attach rx QP to bradcast address */ eth_broadcast_addr(&mc_list[10]); mc_list[5] = priv->port; /* needed for B0 steering support */ - if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, + if (mlx4_multicast_attach(mdev->dev, priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &priv->broadcast_id)) mlx4_warn(mdev, "Failed Attaching Broadcast\n"); @@ -1865,12 +1938,12 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) /* Detach All multicasts */ eth_broadcast_addr(&mc_list[10]); mc_list[5] = priv->port; /* needed for B0 steering support */ - mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, + mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, priv->broadcast_id); list_for_each_entry(mclist, &priv->curr_list, list) { memcpy(&mc_list[10], mclist->addr, ETH_ALEN); mc_list[5] = priv->port; - mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, + mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, mclist->reg_id); if (mclist->tunnel_reg_id) mlx4_flow_detach(mdev->dev, mclist->tunnel_reg_id); @@ -2139,7 +2212,7 @@ static int mlx4_en_copy_priv(struct mlx4_en_priv *dst, memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config, sizeof(dst->hwtstamp_config)); - dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up; + dst->num_tx_rings_p_up = prof->num_tx_rings_p_up; dst->rx_ring_num = prof->rx_ring_num; dst->flags = prof->flags; dst->mdev = src->mdev; @@ -2192,6 +2265,7 @@ static void mlx4_en_update_priv(struct mlx4_en_priv *dst, dst->tx_ring[t] = src->tx_ring[t]; dst->tx_cq[t] = src->tx_cq[t]; } + dst->num_tx_rings_p_up = src->num_tx_rings_p_up; dst->rx_ring_num = src->rx_ring_num; memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile)); } @@ -2375,6 +2449,7 @@ static int mlx4_en_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: @@ -2774,7 +2849,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) if (priv->tx_ring_num[TX] + xdp_ring_num > MAX_TX_RINGS) { tx_changed = 1; new_prof.tx_ring_num[TX] = - MAX_TX_RINGS - ALIGN(xdp_ring_num, MLX4_EN_NUM_UP); + MAX_TX_RINGS - ALIGN(xdp_ring_num, priv->prof->num_up); en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n"); } @@ -2819,11 +2894,25 @@ out: return err; } -static bool mlx4_xdp_attached(struct net_device *dev) +static u32 mlx4_xdp_query(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + const struct bpf_prog *xdp_prog; + u32 prog_id = 0; + + if (!priv->tx_ring_num[TX_XDP]) + return prog_id; + + mutex_lock(&mdev->state_lock); + xdp_prog = rcu_dereference_protected( + priv->rx_ring[0]->xdp_prog, + lockdep_is_held(&mdev->state_lock)); + if (xdp_prog) + prog_id = xdp_prog->aux->id; + mutex_unlock(&mdev->state_lock); - return !!priv->tx_ring_num[TX_XDP]; + return prog_id; } static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) @@ -2832,7 +2921,8 @@ static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) case XDP_SETUP_PROG: return mlx4_xdp_set(dev, xdp->prog); case XDP_QUERY_PROG: - xdp->prog_attached = mlx4_xdp_attached(dev); + xdp->prog_id = mlx4_xdp_query(dev); + xdp->prog_attached = !!xdp->prog_id; return 0; default: return -EINVAL; @@ -3250,7 +3340,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->flags |= MLX4_EN_DCB_ENABLED; priv->cee_config.pfc_state = false; - for (i = 0; i < MLX4_EN_NUM_UP; i++) + for (i = 0; i < MLX4_EN_NUM_UP_HIGH; i++) priv->cee_config.dcb_pfc[i] = pfc_disabled; if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index a6b0db0e0383..5a47f9669621 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -44,7 +44,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; - memset(context, 0, sizeof *context); + memset(context, 0, sizeof(*context)); context->flags = cpu_to_be32(7 << 16 | rss << MLX4_RSS_QPC_FLAG_OFFSET); context->pd = cpu_to_be32(mdev->priv_pdn); context->mtu_msgmax = 0xff; @@ -63,7 +63,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->local_qpn = cpu_to_be32(qpn); context->pri_path.ackto = 1 & 0x07; context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6; - if (user_prio >= 0) { + /* force user priority per tx ring */ + if (user_prio >= 0 && priv->prof->num_up == MLX4_EN_NUM_UP_HIGH) { context->pri_path.sched_queue |= user_prio << 3; context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 77abd1813047..b97a55c827eb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -134,10 +134,11 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index, gfp_t gfp) { - struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); + struct mlx4_en_rx_desc *rx_desc = ring->buf + + (index << ring->log_stride); struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); - if (ring->page_cache.index > 0) { + if (likely(ring->page_cache.index > 0)) { /* XDP uses a single page per frame */ if (!frags->page) { ring->page_cache.index--; @@ -178,6 +179,7 @@ static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv, } } +/* Function not in fast-path */ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; @@ -539,14 +541,14 @@ static void validate_loopback(struct mlx4_en_priv *priv, void *va) priv->loopback_ok = 1; } -static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, +static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { u32 missing = ring->actual_size - (ring->prod - ring->cons); /* Try to batch allocations, but not too much. */ if (missing < 8) - return false; + return; do { if (mlx4_en_prepare_rx_desc(priv, ring, ring->prod & ring->size_mask, @@ -554,9 +556,9 @@ static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, __GFP_MEMALLOC)) break; ring->prod++; - } while (--missing); + } while (likely(--missing)); - return true; + mlx4_en_update_rx_prod_db(ring); } /* When hardware doesn't strip the vlan, we need to calculate the checksum @@ -572,16 +574,21 @@ static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum, * header, the HW adds it. To address that, we are subtracting the pseudo * header checksum from the checksum value provided by the HW. */ -static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, - struct iphdr *iph) +static int get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, + struct iphdr *iph) { __u16 length_for_csum = 0; __wsum csum_pseudo_header = 0; + __u8 ipproto = iph->protocol; + + if (unlikely(ipproto == IPPROTO_SCTP)) + return -1; length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2)); csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr, - length_for_csum, iph->protocol, 0); + length_for_csum, ipproto, 0); skb->csum = csum_sub(hw_checksum, csum_pseudo_header); + return 0; } #if IS_ENABLED(CONFIG_IPV6) @@ -592,17 +599,20 @@ static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, struct ipv6hdr *ipv6h) { + __u8 nexthdr = ipv6h->nexthdr; __wsum csum_pseudo_hdr = 0; - if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT || - ipv6h->nexthdr == IPPROTO_HOPOPTS)) + if (unlikely(nexthdr == IPPROTO_FRAGMENT || + nexthdr == IPPROTO_HOPOPTS || + nexthdr == IPPROTO_SCTP)) return -1; - hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); + hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(nexthdr)); csum_pseudo_hdr = csum_partial(&ipv6h->saddr, sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0); csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len); - csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr)); + csum_pseudo_hdr = csum_add(csum_pseudo_hdr, + (__force __wsum)htons(nexthdr)); skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr); skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0)); @@ -625,11 +635,10 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, } if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4)) - get_fixed_ipv4_csum(hw_checksum, skb, hdr); + return get_fixed_ipv4_csum(hw_checksum, skb, hdr); #if IS_ENABLED(CONFIG_IPV6) - else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) - if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr))) - return -1; + if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) + return get_fixed_ipv6_csum(hw_checksum, skb, hdr); #endif return 0; } @@ -637,21 +646,14 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_dev *mdev = priv->mdev; - struct mlx4_cqe *cqe; - struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; - struct mlx4_en_rx_alloc *frags; + int factor = priv->cqe_factor; + struct mlx4_en_rx_ring *ring; struct bpf_prog *xdp_prog; - int doorbell_pending; - struct sk_buff *skb; - int index; - int nr; - unsigned int length; + int cq_ring = cq->ring; + bool doorbell_pending; + struct mlx4_cqe *cqe; int polled = 0; - int ip_summed; - int factor = priv->cqe_factor; - u64 timestamp; - bool l2_tunnel; + int index; if (unlikely(!priv->port_up)) return 0; @@ -659,6 +661,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (unlikely(budget <= 0)) return polled; + ring = priv->rx_ring[cq_ring]; + /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ rcu_read_lock(); xdp_prog = rcu_dereference(ring->xdp_prog); @@ -673,10 +677,17 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { + struct mlx4_en_rx_alloc *frags; + enum pkt_hash_types hash_type; + struct sk_buff *skb; + unsigned int length; + int ip_summed; void *va; + int nr; frags = ring->rx_info + (index << priv->log_rx_info); va = page_address(frags[0].page) + frags[0].page_offset; + prefetchw(va); /* * make sure we read the CQE after we read the ownership bit */ @@ -768,7 +779,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud break; case XDP_TX: if (likely(!mlx4_en_xmit_frame(ring, frags, dev, - length, cq->ring, + length, cq_ring, &doorbell_pending))) { frags[0].page = NULL; goto next; @@ -790,24 +801,27 @@ xdp_drop_no_cnt: ring->packets++; skb = napi_get_frags(&cq->napi); - if (!skb) + if (unlikely(!skb)) goto next; if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) { - timestamp = mlx4_en_get_cqe_ts(cqe); - mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb), + u64 timestamp = mlx4_en_get_cqe_ts(cqe); + + mlx4_en_fill_hwtstamps(priv->mdev, skb_hwtstamps(skb), timestamp); } - skb_record_rx_queue(skb, cq->ring); + skb_record_rx_queue(skb, cq_ring); if (likely(dev->features & NETIF_F_RXCSUM)) { if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && cqe->checksum == cpu_to_be16(0xffff)) { - ip_summed = CHECKSUM_UNNECESSARY; - l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && + bool l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); + + ip_summed = CHECKSUM_UNNECESSARY; + hash_type = PKT_HASH_TYPE_L4; if (l2_tunnel) skb->csum_level = 1; ring->csum_ok++; @@ -822,6 +836,7 @@ xdp_drop_no_cnt: goto csum_none; } else { ip_summed = CHECKSUM_COMPLETE; + hash_type = PKT_HASH_TYPE_L3; ring->csum_complete++; } } else { @@ -831,16 +846,14 @@ xdp_drop_no_cnt: } else { csum_none: ip_summed = CHECKSUM_NONE; + hash_type = PKT_HASH_TYPE_L3; ring->csum_none++; } skb->ip_summed = ip_summed; if (dev->features & NETIF_F_RXHASH) skb_set_hash(skb, be32_to_cpu(cqe->immed_rss_invalid), - (ip_summed == CHECKSUM_UNNECESSARY) ? - PKT_HASH_TYPE_L4 : - PKT_HASH_TYPE_L3); - + hash_type); if ((cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && @@ -867,15 +880,17 @@ next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; - if (++polled == budget) + if (unlikely(++polled == budget)) break; } rcu_read_unlock(); - if (polled) { - if (doorbell_pending) - mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]); + if (likely(polled)) { + if (doorbell_pending) { + priv->tx_cq[TX_XDP][cq_ring]->xdp_busy = true; + mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq_ring]); + } mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ @@ -883,8 +898,7 @@ next: } AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); - if (mlx4_en_refill_rx_buffers(priv, ring)) - mlx4_en_update_rx_prod_db(ring); + mlx4_en_refill_rx_buffers(priv, ring); return polled; } @@ -907,16 +921,30 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_cq *xdp_tx_cq = NULL; + bool clean_complete = true; int done; + if (priv->tx_ring_num[TX_XDP]) { + xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring]; + if (xdp_tx_cq->xdp_busy) { + clean_complete = mlx4_en_process_tx_cq(dev, xdp_tx_cq, + budget); + xdp_tx_cq->xdp_busy = !clean_complete; + } + } + done = mlx4_en_process_rx_cq(dev, cq, budget); /* If we used up all the quota - we're probably not done yet... */ - if (done == budget) { + if (done == budget || !clean_complete) { const struct cpumask *aff; struct irq_data *idata; int cpu_curr; + /* in case we got here because of !clean_complete */ + done = budget; + INC_PERF_COUNTER(priv->pstats.napi_quota); cpu_curr = smp_processor_id(); @@ -936,7 +964,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) done--; } /* Done for now */ - if (napi_complete_done(napi, done)) + if (likely(napi_complete_done(napi, done))) mlx4_en_arm_cq(priv, cq); return done; } @@ -1021,14 +1049,14 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, if (!context) return -ENOMEM; - err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL); + err = mlx4_qp_alloc(mdev->dev, qpn, qp); if (err) { en_err(priv, "Failed to allocate qp #%x\n", qpn); goto out; } qp->event = mlx4_en_sqp_event; - memset(context, 0, sizeof *context); + memset(context, 0, sizeof(*context)); mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0, qpn, ring->cqn, -1, context); context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); @@ -1060,12 +1088,13 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) u32 qpn; err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, - MLX4_RESERVE_A0_QP); + MLX4_RESERVE_A0_QP, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "Failed reserving drop qpn\n"); return err; } - err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL); + err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); if (err) { en_err(priv, "Failed allocating drop qp\n"); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); @@ -1099,11 +1128,15 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) int i, qpn; int err = 0; int good_qps = 0; + u8 flags; en_dbg(DRV, priv, "Configuring rss steering\n"); + + flags = priv->rx_ring_num == 1 ? MLX4_RESERVE_A0_QP : 0; err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, priv->rx_ring_num, - &rss_map->base_qpn, 0); + &rss_map->base_qpn, flags, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); return err; @@ -1120,13 +1153,27 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) ++good_qps; } + if (priv->rx_ring_num == 1) { + rss_map->indir_qp = &rss_map->qps[0]; + priv->base_qpn = rss_map->indir_qp->qpn; + en_info(priv, "Optimized Non-RSS steering\n"); + return 0; + } + + rss_map->indir_qp = kzalloc(sizeof(*rss_map->indir_qp), GFP_KERNEL); + if (!rss_map->indir_qp) { + err = -ENOMEM; + goto rss_err; + } + /* Configure RSS indirection qp */ - err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL); + err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); goto rss_err; } - rss_map->indir_qp.event = mlx4_en_sqp_event; + + rss_map->indir_qp->event = mlx4_en_sqp_event; mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, priv->rx_ring[0]->cqn, -1, &context); @@ -1164,8 +1211,9 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) err = -EINVAL; goto indir_err; } + err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, - &rss_map->indir_qp, &rss_map->indir_state); + rss_map->indir_qp, &rss_map->indir_state); if (err) goto indir_err; @@ -1173,9 +1221,11 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) indir_err: mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, - MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); - mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); - mlx4_qp_free(mdev->dev, &rss_map->indir_qp); + MLX4_QP_STATE_RST, NULL, 0, 0, rss_map->indir_qp); + mlx4_qp_remove(mdev->dev, rss_map->indir_qp); + mlx4_qp_free(mdev->dev, rss_map->indir_qp); + kfree(rss_map->indir_qp); + rss_map->indir_qp = NULL; rss_err: for (i = 0; i < good_qps; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], @@ -1193,10 +1243,15 @@ void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) struct mlx4_en_rss_map *rss_map = &priv->rss_map; int i; - mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, - MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); - mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); - mlx4_qp_free(mdev->dev, &rss_map->indir_qp); + if (priv->rx_ring_num > 1) { + mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, + MLX4_QP_STATE_RST, NULL, 0, 0, + rss_map->indir_qp); + mlx4_qp_remove(mdev->dev, rss_map->indir_qp); + mlx4_qp_free(mdev->dev, rss_map->indir_qp); + kfree(rss_map->indir_qp); + rss_map->indir_qp = NULL; + } for (i = 0; i < priv->rx_ring_num; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 17112faafbcc..88699b181946 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -63,8 +63,8 @@ static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv) skb_reserve(skb, NET_IP_ALIGN); - ethh = (struct ethhdr *)skb_put(skb, sizeof(struct ethhdr)); - packet = (unsigned char *)skb_put(skb, packet_size); + ethh = skb_put(skb, sizeof(struct ethhdr)); + packet = skb_put(skb, packet_size); memcpy(ethh->h_dest, priv->dev->dev_addr, ETH_ALEN); eth_zero_addr(ethh->h_source); ethh->h_proto = htons(ETH_P_ARP); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 6ffd1849a604..8a32a8f7f9c0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -105,13 +105,14 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, (unsigned long long) ring->sp_wqres.buf.direct.map); err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn, - MLX4_RESERVE_ETH_BF_QP); + MLX4_RESERVE_ETH_BF_QP, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "failed reserving qp for TX ring\n"); goto err_hwq_res; } - err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp, GFP_KERNEL); + err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp); if (err) { en_err(priv, "Failed allocating qp %d\n", ring->qpn); goto err_reserve; @@ -234,23 +235,24 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, u8 owner) { __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); - struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; + struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE); struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; void *end = ring->buf + ring->buf_size; __be32 *ptr = (__be32 *)tx_desc; int i; /* Optimize the common case when there are no wraparounds */ - if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { + if (likely((void *)tx_desc + + (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) { /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; + for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE; i += STAMP_STRIDE) { *ptr = stamp; ptr += STAMP_DWORDS; } } else { /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; + for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE; i += STAMP_STRIDE) { *ptr = stamp; ptr += STAMP_DWORDS; @@ -265,11 +267,11 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, + int index, u64 timestamp, int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; - struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; + struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE); struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; void *end = ring->buf + ring->buf_size; struct sk_buff *skb = tx_info->skb; @@ -288,19 +290,20 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, skb_tstamp_tx(skb, &hwts); } - /* Optimize the common case when there are no wraparounds */ - if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { - if (!tx_info->inl) { - if (tx_info->linear) - dma_unmap_single(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); - else - dma_unmap_page(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); + if (!tx_info->inl) { + if (tx_info->linear) + dma_unmap_single(priv->ddev, + tx_info->map0_dma, + tx_info->map0_byte_count, + PCI_DMA_TODEVICE); + else + dma_unmap_page(priv->ddev, + tx_info->map0_dma, + tx_info->map0_byte_count, + PCI_DMA_TODEVICE); + /* Optimize the common case when there are no wraparounds */ + if (likely((void *)tx_desc + + (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) { for (i = 1; i < nr_maps; i++) { data++; dma_unmap_page(priv->ddev, @@ -308,23 +311,10 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, be32_to_cpu(data->byte_count), PCI_DMA_TODEVICE); } - } - } else { - if (!tx_info->inl) { - if ((void *) data >= end) { + } else { + if ((void *)data >= end) data = ring->buf + ((void *)data - end); - } - if (tx_info->linear) - dma_unmap_single(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); - else - dma_unmap_page(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); for (i = 1; i < nr_maps; i++) { data++; /* Check for wraparound before unmapping */ @@ -344,7 +334,7 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, + int index, u64 timestamp, int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; @@ -381,8 +371,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) while (ring->cons != ring->prod) { ring->last_nr_txbb = ring->free_tx_desc(priv, ring, ring->cons & ring->size_mask, - !!(ring->cons & ring->size), 0, - 0 /* Non-NAPI caller */); + 0, 0 /* Non-NAPI caller */); ring->cons += ring->last_nr_txbb; cnt++; } @@ -396,15 +385,14 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) return cnt; } -static bool mlx4_en_process_tx_cq(struct net_device *dev, - struct mlx4_en_cq *cq, int napi_budget) +bool mlx4_en_process_tx_cq(struct net_device *dev, + struct mlx4_en_cq *cq, int napi_budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring]; struct mlx4_cqe *cqe; - u16 index; - u16 new_index, ring_index, stamp_index; + u16 index, ring_index, stamp_index; u32 txbbs_skipped = 0; u32 txbbs_stamp = 0; u32 cons_index = mcq->cons_index; @@ -419,7 +407,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, u32 last_nr_txbb; u32 ring_cons; - if (!priv->port_up) + if (unlikely(!priv->port_up)) return true; netdev_txq_bql_complete_prefetchw(ring->tx_queue); @@ -434,6 +422,8 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size) && (done < budget)) { + u16 new_index; + /* * make sure we read the CQE after we read the * ownership bit @@ -464,8 +454,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, /* free next descriptor */ last_nr_txbb = ring->free_tx_desc( priv, ring, ring_index, - !!((ring_cons + txbbs_skipped) & - ring->size), timestamp, napi_budget); + timestamp, napi_budget); mlx4_en_stamp_wqe(priv, ring, stamp_index, !!((ring_cons + txbbs_stamp) & @@ -481,7 +470,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; } - /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. @@ -494,7 +482,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; - if (ring->free_tx_desc == mlx4_en_recycle_tx_desc) + if (cq->type == TX_XDP) return done < budget; netdev_tx_completed_queue(ring->tx_queue, packets, bytes); @@ -506,6 +494,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } + return done < budget; } @@ -526,7 +515,7 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); - int clean_complete; + bool clean_complete; clean_complete = mlx4_en_process_tx_cq(dev, cq, budget); if (!clean_complete) @@ -543,7 +532,7 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, u32 index, unsigned int desc_size) { - u32 copy = (ring->size - index) * TXBB_SIZE; + u32 copy = (ring->size - index) << LOG_TXBB_SIZE; int i; for (i = desc_size - copy - 4; i >= 0; i -= 4) { @@ -558,12 +547,12 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, if ((i & (TXBB_SIZE - 1)) == 0) wmb(); - *((u32 *) (ring->buf + index * TXBB_SIZE + i)) = + *((u32 *)(ring->buf + (index << LOG_TXBB_SIZE) + i)) = *((u32 *) (ring->bounce_buf + i)); } /* Return real descriptor location */ - return ring->buf + index * TXBB_SIZE; + return ring->buf + (index << LOG_TXBB_SIZE); } /* Decide if skb can be inlined in tx descriptor to avoid dma mapping @@ -655,7 +644,7 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, void *fragptr) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; - int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; + int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof(*inl); unsigned int hlen = skb_headlen(skb); if (skb->len <= spc) { @@ -703,15 +692,11 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, { struct mlx4_en_priv *priv = netdev_priv(dev); u16 rings_p_up = priv->num_tx_rings_p_up; - u8 up = 0; if (netdev_get_num_tc(dev)) return skb_tx_hash(dev, skb); - if (skb_vlan_tag_present(skb)) - up = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT; - - return fallback(dev, skb) % rings_p_up + up * rings_p_up; + return fallback(dev, skb) % rings_p_up; } static void mlx4_bf_copy(void __iomem *dst, const void *src, @@ -775,37 +760,101 @@ static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring, } } +static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv, + struct skb_shared_info *shinfo, + struct mlx4_wqe_data_seg *data, + struct sk_buff *skb, + int lso_header_size, + __be32 mr_key, + struct mlx4_en_tx_info *tx_info) +{ + struct device *ddev = priv->ddev; + dma_addr_t dma = 0; + u32 byte_count = 0; + int i_frag; + + /* Map fragments if any */ + for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) { + const struct skb_frag_struct *frag; + + frag = &shinfo->frags[i_frag]; + byte_count = skb_frag_size(frag); + dma = skb_frag_dma_map(ddev, frag, + 0, byte_count, + DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) + goto tx_drop_unmap; + + data->addr = cpu_to_be64(dma); + data->lkey = mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(byte_count); + --data; + } + + /* Map linear part if needed */ + if (tx_info->linear) { + byte_count = skb_headlen(skb) - lso_header_size; + + dma = dma_map_single(ddev, skb->data + + lso_header_size, byte_count, + PCI_DMA_TODEVICE); + if (dma_mapping_error(ddev, dma)) + goto tx_drop_unmap; + + data->addr = cpu_to_be64(dma); + data->lkey = mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(byte_count); + } + /* tx completion can avoid cache line miss for common cases */ + tx_info->map0_dma = dma; + tx_info->map0_byte_count = byte_count; + + return true; + +tx_drop_unmap: + en_err(priv, "DMA mapping error\n"); + + while (++i_frag < shinfo->nr_frags) { + ++data; + dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr), + be32_to_cpu(data->byte_count), + PCI_DMA_TODEVICE); + } + + return false; +} + netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct mlx4_en_priv *priv = netdev_priv(dev); union mlx4_wqe_qpn_vlan qpn_vlan = {}; - struct device *ddev = priv->ddev; struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; struct mlx4_wqe_data_seg *data; struct mlx4_en_tx_info *tx_info; - int tx_ind = 0; + int tx_ind; int nr_txbb; int desc_size; int real_size; u32 index, bf_index; __be32 op_own; - u16 vlan_proto = 0; - int i_frag; int lso_header_size; void *fragptr = NULL; bool bounce = false; bool send_doorbell; bool stop_queue; bool inline_ok; + u8 data_offset; u32 ring_cons; bool bf_ok; tx_ind = skb_get_queue_mapping(skb); ring = priv->tx_ring[TX][tx_ind]; - if (!priv->port_up) + if (unlikely(!priv->port_up)) goto tx_drop; /* fetch ring->cons far ahead before needing it to avoid stall */ @@ -818,7 +867,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); - nr_txbb = desc_size / TXBB_SIZE; + nr_txbb = desc_size >> LOG_TXBB_SIZE; if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { if (netif_msg_tx_err(priv)) en_warn(priv, "Oversized header or SG list\n"); @@ -827,6 +876,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) bf_ok = ring->bf_enabled; if (skb_vlan_tag_present(skb)) { + u16 vlan_proto; + qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb)); vlan_proto = be16_to_cpu(skb->vlan_proto); if (vlan_proto == ETH_P_8021AD) @@ -851,7 +902,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* See if we have enough space for whole descriptor TXBB for setting * SW ownership on next descriptor; if not, use a bounce buffer. */ if (likely(index + nr_txbb <= ring->size)) - tx_desc = ring->buf + index * TXBB_SIZE; + tx_desc = ring->buf + (index << LOG_TXBB_SIZE); else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; @@ -863,64 +914,31 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) tx_info->skb = skb; tx_info->nr_txbb = nr_txbb; - data = &tx_desc->data; - if (lso_header_size) - data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4, - DS_SIZE)); + if (!lso_header_size) { + data = &tx_desc->data; + data_offset = offsetof(struct mlx4_en_tx_desc, data); + } else { + int lso_align = ALIGN(lso_header_size + 4, DS_SIZE); + + data = (void *)&tx_desc->lso + lso_align; + data_offset = offsetof(struct mlx4_en_tx_desc, lso) + lso_align; + } /* valid only for none inline segments */ - tx_info->data_offset = (void *)data - (void *)tx_desc; + tx_info->data_offset = data_offset; tx_info->inl = inline_ok; - tx_info->linear = (lso_header_size < skb_headlen(skb) && - !inline_ok) ? 1 : 0; + tx_info->linear = lso_header_size < skb_headlen(skb) && !inline_ok; tx_info->nr_maps = shinfo->nr_frags + tx_info->linear; data += tx_info->nr_maps - 1; - if (!tx_info->inl) { - dma_addr_t dma = 0; - u32 byte_count = 0; - - /* Map fragments if any */ - for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) { - const struct skb_frag_struct *frag; - - frag = &shinfo->frags[i_frag]; - byte_count = skb_frag_size(frag); - dma = skb_frag_dma_map(ddev, frag, - 0, byte_count, - DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) - goto tx_drop_unmap; - - data->addr = cpu_to_be64(dma); - data->lkey = ring->mr_key; - dma_wmb(); - data->byte_count = cpu_to_be32(byte_count); - --data; - } - - /* Map linear part if needed */ - if (tx_info->linear) { - byte_count = skb_headlen(skb) - lso_header_size; - - dma = dma_map_single(ddev, skb->data + - lso_header_size, byte_count, - PCI_DMA_TODEVICE); - if (dma_mapping_error(ddev, dma)) - goto tx_drop_unmap; - - data->addr = cpu_to_be64(dma); - data->lkey = ring->mr_key; - dma_wmb(); - data->byte_count = cpu_to_be32(byte_count); - } - /* tx completion can avoid cache line miss for common cases */ - tx_info->map0_dma = dma; - tx_info->map0_byte_count = byte_count; - } + if (!tx_info->inl) + if (!mlx4_en_build_dma_wqe(priv, shinfo, data, skb, + lso_header_size, ring->mr_key, + tx_info)) + goto tx_drop_count; /* * For timestamping add flag to skb_shinfo and @@ -1056,16 +1074,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) } return NETDEV_TX_OK; -tx_drop_unmap: - en_err(priv, "DMA mapping error\n"); - - while (++i_frag < shinfo->nr_frags) { - ++data; - dma_unmap_page(ddev, (dma_addr_t) be64_to_cpu(data->addr), - be32_to_cpu(data->byte_count), - PCI_DMA_TODEVICE); - } - tx_drop_count: ring->tx_dropped++; tx_drop: @@ -1073,52 +1081,41 @@ tx_drop: return NETDEV_TX_OK; } +#define MLX4_EN_XDP_TX_NRTXBB 1 +#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \ + / 16) & 0x3f) + netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, struct net_device *dev, unsigned int length, - int tx_ind, int *doorbell_pending) + int tx_ind, bool *doorbell_pending) { struct mlx4_en_priv *priv = netdev_priv(dev); union mlx4_wqe_qpn_vlan qpn_vlan = {}; - struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; - struct mlx4_wqe_data_seg *data; struct mlx4_en_tx_info *tx_info; - int index, bf_index; - bool send_doorbell; - int nr_txbb = 1; - bool stop_queue; + struct mlx4_wqe_data_seg *data; + struct mlx4_en_tx_ring *ring; dma_addr_t dma; - int real_size; __be32 op_own; - u32 ring_cons; - bool bf_ok; + int index; - BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE, - "mlx4_en_xmit_frame requires minimum size tx desc"); + if (unlikely(!priv->port_up)) + goto tx_drop; ring = priv->tx_ring[TX_XDP][tx_ind]; - if (!priv->port_up) - goto tx_drop; - - if (mlx4_en_is_tx_ring_full(ring)) + if (unlikely(mlx4_en_is_tx_ring_full(ring))) goto tx_drop_count; - /* fetch ring->cons far ahead before needing it to avoid stall */ - ring_cons = READ_ONCE(ring->cons); - index = ring->prod & ring->size_mask; tx_info = &ring->tx_info[index]; - bf_ok = ring->bf_enabled; - /* Track current inflight packets for performance analysis */ AVG_PERF_COUNTER(priv->pstats.inflight_avg, - (u32)(ring->prod - ring_cons - 1)); + (u32)(ring->prod - READ_ONCE(ring->cons) - 1)); - bf_index = ring->prod; - tx_desc = ring->buf + index * TXBB_SIZE; + tx_desc = ring->buf + (index << LOG_TXBB_SIZE); data = &tx_desc->data; dma = frame->dma; @@ -1127,9 +1124,9 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, frame->page = NULL; tx_info->map0_dma = dma; tx_info->map0_byte_count = PAGE_SIZE; - tx_info->nr_txbb = nr_txbb; + tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB; tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); - tx_info->data_offset = (void *)data - (void *)tx_desc; + tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data); tx_info->ts_requested = 0; tx_info->nr_maps = 1; tx_info->linear = 1; @@ -1153,28 +1150,19 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, rx_ring->xdp_tx++; AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); - ring->prod += nr_txbb; - - stop_queue = mlx4_en_is_tx_ring_full(ring); - send_doorbell = stop_queue || - *doorbell_pending > MLX4_EN_DOORBELL_BUDGET; - bf_ok &= send_doorbell; + ring->prod += MLX4_EN_XDP_TX_NRTXBB; - real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f; + qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ; - if (bf_ok) - qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); - else - qpn_vlan.fence_size = real_size; - - mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index, - op_own, bf_ok, send_doorbell); - *doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1; + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0, + op_own, false, false); + *doorbell_pending = true; return NETDEV_TX_OK; tx_drop_count: rx_ring->xdp_tx_full++; + *doorbell_pending = true; tx_drop: return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 07406cf2eacd..6f57c052053e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -259,7 +259,7 @@ int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port) if (!s_slave->active) return 0; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE; @@ -276,7 +276,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) /*don't send if we don't have the that slave */ if (dev->persist->num_vfs < slave) return 0; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO; @@ -295,7 +295,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, /*don't send if we don't have the that slave */ if (dev->persist->num_vfs < slave) return 0; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE; eqe.subtype = port_subtype_change; @@ -432,7 +432,7 @@ int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr) { struct mlx4_eqe eqe; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO; @@ -726,7 +726,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) } memcpy(&priv->mfunc.master.comm_arm_bit_vector, eqe->event.comm_channel_arm.bit_vec, - sizeof eqe->event.comm_channel_arm.bit_vec); + sizeof(eqe->event.comm_channel_arm.bit_vec)); queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.comm_work); break; @@ -984,15 +984,15 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, */ npages = PAGE_ALIGN(eq->nent * dev->caps.eqe_size) / PAGE_SIZE; - eq->page_list = kmalloc(npages * sizeof *eq->page_list, - GFP_KERNEL); + eq->page_list = kmalloc_array(npages, sizeof(*eq->page_list), + GFP_KERNEL); if (!eq->page_list) goto err_out; for (i = 0; i < npages; ++i) eq->page_list[i].buf = NULL; - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + dma_list = kmalloc_array(npages, sizeof(*dma_list), GFP_KERNEL); if (!dma_list) goto err_out_free; @@ -1161,7 +1161,7 @@ int mlx4_alloc_eq_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs, - sizeof *priv->eq_table.eq, GFP_KERNEL); + sizeof(*priv->eq_table.eq), GFP_KERNEL); if (!priv->eq_table.eq) return -ENOMEM; @@ -1180,7 +1180,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) int i; priv->eq_table.uar_map = kcalloc(mlx4_num_eq_uar(dev), - sizeof *priv->eq_table.uar_map, + sizeof(*priv->eq_table.uar_map), GFP_KERNEL); if (!priv->eq_table.uar_map) { err = -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 37e84a59e751..16c09949afd5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -36,6 +36,7 @@ #include <linux/mlx4/cmd.h> #include <linux/module.h> #include <linux/cache.h> +#include <linux/kernel.h> #include "fw.h" #include "icm.h" @@ -57,7 +58,7 @@ MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)"); do { \ void *__p = (char *) (source) + (offset); \ u64 val; \ - switch (sizeof (dest)) { \ + switch (sizeof(dest)) { \ case 1: (dest) = *(u8 *) __p; break; \ case 2: (dest) = be16_to_cpup(__p); break; \ case 4: (dest) = be32_to_cpup(__p); break; \ @@ -159,8 +160,10 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [32] = "Loopback source checks support", [33] = "RoCEv2 support", [34] = "DMFS Sniffer support (UC & MC)", - [35] = "QinQ VST mode support", - [36] = "sl to vl mapping table change event support" + [35] = "Diag counters per port", + [36] = "QinQ VST mode support", + [37] = "sl to vl mapping table change event support", + [38] = "user MAC support", }; int i; @@ -678,22 +681,22 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) { MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); - func_cap->qp0_qkey = qkey; + func_cap->spec_qps.qp0_qkey = qkey; } else { - func_cap->qp0_qkey = 0; + func_cap->spec_qps.qp0_qkey = 0; } MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); - func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp0_tunnel = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY); - func_cap->qp0_proxy_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp0_proxy = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL); - func_cap->qp1_tunnel_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp1_tunnel = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); - func_cap->qp1_proxy_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp1_proxy = size & 0xFFFFFF; if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_NIC_INFO) MLX4_GET(func_cap->phys_port_id, outbox, @@ -764,6 +767,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET 0x3e #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f #define QUERY_DEV_CAP_EXT_FLAGS_OFFSET 0x40 +#define QUERY_DEV_CAP_WOL_OFFSET 0x43 #define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 #define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49 @@ -776,6 +780,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_CAP_USER_MAC_EN_OFFSET 0x5C #define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET 0x5D #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 @@ -920,6 +925,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); dev_cap->flags = flags | (u64)ext_flags << 32; + MLX4_GET(field, outbox, QUERY_DEV_CAP_WOL_OFFSET); + dev_cap->wol_port[1] = !!(field & 0x20); + dev_cap->wol_port[2] = !!(field & 0x40); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET); dev_cap->reserved_uars = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET); @@ -944,6 +952,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); dev_cap->max_sq_desc_sz = size; + MLX4_GET(field, outbox, QUERY_DEV_CAP_USER_MAC_EN_OFFSET); + if (field & (1 << 2)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_USER_MAC_EN; MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET); if (field & 0x1) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP; @@ -1529,7 +1540,7 @@ int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt) for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) { if (virt != -1) { pages[nent * 2] = cpu_to_be64(virt); - virt += 1 << lg; + virt += 1ULL << lg; } pages[nent * 2 + 1] = @@ -2445,14 +2456,14 @@ int mlx4_config_dev_retrieval(struct mlx4_dev *dev, csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET) & CONFIG_DEV_RX_CSUM_MODE_MASK; - if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0])) + if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags)) return -EINVAL; params->rx_csum_flags_port_1 = config_dev_csum_flags[csum_mask]; csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET) & CONFIG_DEV_RX_CSUM_MODE_MASK; - if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0])) + if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags)) return -EINVAL; params->rx_csum_flags_port_2 = config_dev_csum_flags[csum_mask]; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 5343a0599253..cd6399c76bfd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -129,6 +129,7 @@ struct mlx4_dev_cap { u32 dmfs_high_rate_qpn_range; struct mlx4_rate_limit_caps rl_caps; struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1]; + bool wol_port[MLX4_MAX_PORTS + 1]; }; struct mlx4_func_cap { @@ -143,11 +144,7 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; - u32 qp0_qkey; - u32 qp0_tunnel_qpn; - u32 qp0_proxy_qpn; - u32 qp1_tunnel_qpn; - u32 qp1_proxy_qpn; + struct mlx4_spec_qps spec_qps; u32 reserved_lkey; u8 physical_port; u8 flags0; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c index 8f2fde0487c4..3a09d7122d3b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c @@ -65,7 +65,7 @@ struct mlx4_set_port_scheduler_context { /* Granular Qos (per VF) section */ struct mlx4_alloc_vpp_param { - __be32 availible_vpp; + __be32 available_vpp; __be32 vpp_p_up[MLX4_NUM_UP]; }; @@ -157,7 +157,7 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER); int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, - u16 *availible_vpp, u8 *vpp_p_up) + u16 *available_vpp, u8 *vpp_p_up) { int i; int err; @@ -179,7 +179,7 @@ int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, goto out; /* Total number of supported VPPs */ - *availible_vpp = (u16)be32_to_cpu(out_param->availible_vpp); + *available_vpp = (u16)be32_to_cpu(out_param->available_vpp); for (i = 0; i < MLX4_NUM_UP; i++) vpp_p_up[i] = (u8)be32_to_cpu(out_param->vpp_p_up[i]); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h index ac1f331878e6..582997577a04 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h @@ -84,23 +84,23 @@ int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, u8 *pg, u16 *ratelimit); /** - * mlx4_ALLOCATE_VPP_get - Query port VPP availible resources and allocation. - * Before distribution of VPPs to priorities, only availible_vpp is returned. + * mlx4_ALLOCATE_VPP_get - Query port VPP available resources and allocation. + * Before distribution of VPPs to priorities, only available_vpp is returned. * After initialization it returns the distribution of VPPs among priorities. * * @dev: mlx4_dev. * @port: Physical port number. - * @availible_vpp: Pointer to variable where number of availible VPPs is stored + * @available_vpp: Pointer to variable where number of available VPPs is stored * @vpp_p_up: Distribution of VPPs to priorities is stored in this array * * Returns 0 on success or a negative mlx4_core errno code. **/ int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, - u16 *availible_vpp, u8 *vpp_p_up); + u16 *available_vpp, u8 *vpp_p_up); /** * mlx4_ALLOCATE_VPP_set - Distribution of VPPs among differnt priorities. * The total number of VPPs assigned to all for a port must not exceed - * the value reported by availible_vpp in mlx4_ALLOCATE_VPP_get. + * the value reported by available_vpp in mlx4_ALLOCATE_VPP_get. * VPP allocation is allowed only after the port type has been set, * and while no QPs are open for this port. * diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index e1f9e7cebf8f..a822f7a56bc5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -251,8 +251,7 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev) MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, - gfp_t gfp) +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) { u32 i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size); @@ -266,7 +265,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, } table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT, - (table->lowmem ? gfp : GFP_HIGHUSER) | + (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN, table->coherent); if (!table->icm[i]) { ret = -ENOMEM; @@ -363,7 +362,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 i; for (i = start; i <= end; i += inc) { - err = mlx4_table_get(dev, table, i, GFP_KERNEL); + err = mlx4_table_get(dev, table, i); if (err) goto fail; } @@ -401,7 +400,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size; num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk; - table->icm = kcalloc(num_icm, sizeof *table->icm, GFP_KERNEL); + table->icm = kcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL); if (!table->icm) return -ENOMEM; table->virt = virt; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h index 0c7364550150..c9169a490557 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.h +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -39,8 +39,8 @@ #include <linux/mutex.h> #define MLX4_ICM_CHUNK_LEN \ - ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \ - (sizeof (struct scatterlist))) + ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ + (sizeof(struct scatterlist))) enum { MLX4_ICM_PAGE_SHIFT = 12, @@ -71,8 +71,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, gfp_t gfp_mask, int coherent); void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent); -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, - gfp_t gfp); +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 start, u32 end); diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index e00f627331cb..2edcce98ab2d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -53,7 +53,7 @@ static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv) { struct mlx4_device_context *dev_ctx; - dev_ctx = kmalloc(sizeof *dev_ctx, GFP_KERNEL); + dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); if (!dev_ctx) return; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 83aab1e4c8c8..e61c99ef741d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -34,6 +34,7 @@ */ #include <linux/module.h> +#include <linux/kernel.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/pci.h> @@ -91,7 +92,7 @@ module_param_array(probe_vf, byte, &probe_vfs_argc, 0444); MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n" "probe_vf=port1,port2,port1+2"); -int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; +static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; module_param_named(log_num_mgm_entry_size, mlx4_log_num_mgm_entry_size, int, 0444); MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" @@ -119,9 +120,9 @@ MODULE_PARM_DESC(enable_4k_uar, static char mlx4_version[] = DRV_NAME ": Mellanox ConnectX core driver v" - DRV_VERSION " (" DRV_RELDATE ")\n"; + DRV_VERSION "\n"; -static struct mlx4_profile default_profile = { +static const struct mlx4_profile default_profile = { .num_qp = 1 << 18, .num_srq = 1 << 16, .rdmarc_per_qp = 1 << 4, @@ -131,7 +132,7 @@ static struct mlx4_profile default_profile = { .num_mtt = 1 << 20, /* It is really num mtt segements */ }; -static struct mlx4_profile low_mem_profile = { +static const struct mlx4_profile low_mem_profile = { .num_qp = 1 << 17, .num_srq = 1 << 6, .rdmarc_per_qp = 1 << 4, @@ -424,13 +425,15 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.stat_rate_support = dev_cap->stat_rate_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; + dev->caps.wol_port[1] = dev_cap->wol_port[1]; + dev->caps.wol_port[2] = dev_cap->wol_port[2]; /* Save uar page shift */ if (!mlx4_is_slave(dev)) { /* Virtual PCI function needs to determine UAR page size from * firmware. Only master PCI function can set the uar page size */ - if (enable_4k_uar) + if (enable_4k_uar || !dev->persist->num_vfs) dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT; else dev->uar_page_shift = PAGE_SHIFT; @@ -817,38 +820,93 @@ static void slave_adjust_steering_mode(struct mlx4_dev *dev, mlx4_steering_mode_str(dev->caps.steering_mode)); } +static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev) +{ + kfree(dev->caps.spec_qps); + dev->caps.spec_qps = NULL; +} + +static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev) +{ + struct mlx4_func_cap *func_cap = NULL; + struct mlx4_caps *caps = &dev->caps; + int i, err = 0; + + func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL); + caps->spec_qps = kcalloc(caps->num_ports, sizeof(*caps->spec_qps), GFP_KERNEL); + + if (!func_cap || !caps->spec_qps) { + mlx4_err(dev, "Failed to allocate memory for special qps cap\n"); + err = -ENOMEM; + goto err_mem; + } + + for (i = 1; i <= caps->num_ports; ++i) { + err = mlx4_QUERY_FUNC_CAP(dev, i, func_cap); + if (err) { + mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n", + i, err); + goto err_mem; + } + caps->spec_qps[i - 1] = func_cap->spec_qps; + caps->port_mask[i] = caps->port_type[i]; + caps->phys_port_id[i] = func_cap->phys_port_id; + err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, + &caps->gid_table_len[i], + &caps->pkey_table_len[i]); + if (err) { + mlx4_err(dev, "QUERY_PORT command failed for port %d, aborting (%d)\n", + i, err); + goto err_mem; + } + } + +err_mem: + if (err) + mlx4_slave_destroy_special_qp_cap(dev); + kfree(func_cap); + return err; +} + static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; u32 page_size; - struct mlx4_dev_cap dev_cap; - struct mlx4_func_cap func_cap; - struct mlx4_init_hca_param hca_param; - u8 i; + struct mlx4_dev_cap *dev_cap = NULL; + struct mlx4_func_cap *func_cap = NULL; + struct mlx4_init_hca_param *hca_param = NULL; + + hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL); + func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL); + dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL); + if (!hca_param || !func_cap || !dev_cap) { + mlx4_err(dev, "Failed to allocate memory for slave_cap\n"); + err = -ENOMEM; + goto free_mem; + } - memset(&hca_param, 0, sizeof(hca_param)); - err = mlx4_QUERY_HCA(dev, &hca_param); + err = mlx4_QUERY_HCA(dev, hca_param); if (err) { mlx4_err(dev, "QUERY_HCA command failed, aborting\n"); - return err; + goto free_mem; } /* fail if the hca has an unknown global capability * at this time global_caps should be always zeroed */ - if (hca_param.global_caps) { + if (hca_param->global_caps) { mlx4_err(dev, "Unknown hca global capabilities\n"); - return -EINVAL; + err = -EINVAL; + goto free_mem; } - dev->caps.hca_core_clock = hca_param.hca_core_clock; + dev->caps.hca_core_clock = hca_param->hca_core_clock; - memset(&dev_cap, 0, sizeof(dev_cap)); - dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; - err = mlx4_dev_cap(dev, &dev_cap); + dev->caps.max_qp_dest_rdma = 1 << hca_param->log_rd_per_qp; + err = mlx4_dev_cap(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); - return err; + goto free_mem; } err = mlx4_QUERY_FW(dev); @@ -860,21 +918,23 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (page_size > PAGE_SIZE) { mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n", page_size, PAGE_SIZE); - return -ENODEV; + err = -ENODEV; + goto free_mem; } /* Set uar_page_shift for VF */ - dev->uar_page_shift = hca_param.uar_page_sz + 12; + dev->uar_page_shift = hca_param->uar_page_sz + 12; /* Make sure the master uar page size is valid */ if (dev->uar_page_shift > PAGE_SHIFT) { mlx4_err(dev, "Invalid configuration: uar page size is larger than system page size\n"); - return -ENODEV; + err = -ENODEV; + goto free_mem; } /* Set reserved_uars based on the uar_page_shift */ - mlx4_set_num_reserved_uars(dev, &dev_cap); + mlx4_set_num_reserved_uars(dev, dev_cap); /* Although uar page size in FW differs from system page size, * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core) @@ -882,34 +942,35 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) */ dev->caps.uar_page_size = PAGE_SIZE; - memset(&func_cap, 0, sizeof(func_cap)); - err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); + err = mlx4_QUERY_FUNC_CAP(dev, 0, func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n", err); - return err; + goto free_mem; } - if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != + if ((func_cap->pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != PF_CONTEXT_BEHAVIOUR_MASK) { mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n", - func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK); - return -EINVAL; - } - - dev->caps.num_ports = func_cap.num_ports; - dev->quotas.qp = func_cap.qp_quota; - dev->quotas.srq = func_cap.srq_quota; - dev->quotas.cq = func_cap.cq_quota; - dev->quotas.mpt = func_cap.mpt_quota; - dev->quotas.mtt = func_cap.mtt_quota; - dev->caps.num_qps = 1 << hca_param.log_num_qps; - dev->caps.num_srqs = 1 << hca_param.log_num_srqs; - dev->caps.num_cqs = 1 << hca_param.log_num_cqs; - dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; - dev->caps.num_eqs = func_cap.max_eq; - dev->caps.reserved_eqs = func_cap.reserved_eq; - dev->caps.reserved_lkey = func_cap.reserved_lkey; + func_cap->pf_context_behaviour, + PF_CONTEXT_BEHAVIOUR_MASK); + err = -EINVAL; + goto free_mem; + } + + dev->caps.num_ports = func_cap->num_ports; + dev->quotas.qp = func_cap->qp_quota; + dev->quotas.srq = func_cap->srq_quota; + dev->quotas.cq = func_cap->cq_quota; + dev->quotas.mpt = func_cap->mpt_quota; + dev->quotas.mtt = func_cap->mtt_quota; + dev->caps.num_qps = 1 << hca_param->log_num_qps; + dev->caps.num_srqs = 1 << hca_param->log_num_srqs; + dev->caps.num_cqs = 1 << hca_param->log_num_cqs; + dev->caps.num_mpts = 1 << hca_param->log_mpt_sz; + dev->caps.num_eqs = func_cap->max_eq; + dev->caps.reserved_eqs = func_cap->reserved_eq; + dev->caps.reserved_lkey = func_cap->reserved_lkey; dev->caps.num_pds = MLX4_NUM_PDS; dev->caps.num_mgms = 0; dev->caps.num_amgms = 0; @@ -917,43 +978,16 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (dev->caps.num_ports > MLX4_MAX_PORTS) { mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n", dev->caps.num_ports, MLX4_MAX_PORTS); - return -ENODEV; + err = -ENODEV; + goto free_mem; } mlx4_replace_zero_macs(dev); - dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL); - dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - - if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || - !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy || - !dev->caps.qp0_qkey) { - err = -ENOMEM; - goto err_mem; - } - - for (i = 1; i <= dev->caps.num_ports; ++i) { - err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap); - if (err) { - mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n", - i, err); - goto err_mem; - } - dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey; - dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; - dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; - dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; - dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; - dev->caps.port_mask[i] = dev->caps.port_type[i]; - dev->caps.phys_port_id[i] = func_cap.phys_port_id; - err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, - &dev->caps.gid_table_len[i], - &dev->caps.pkey_table_len[i]); - if (err) - goto err_mem; + err = mlx4_slave_special_qp_cap(dev); + if (err) { + mlx4_err(dev, "Set special QP caps failed. aborting\n"); + goto free_mem; } if (dev->caps.uar_page_size * (dev->caps.num_uars - @@ -968,7 +1002,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) goto err_mem; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { dev->caps.eqe_size = 64; dev->caps.eqe_factor = 1; } else { @@ -976,20 +1010,20 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.eqe_factor = 0; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { dev->caps.cqe_size = 64; dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; } else { dev->caps.cqe_size = 32; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) { - dev->caps.eqe_size = hca_param.eqe_size; + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) { + dev->caps.eqe_size = hca_param->eqe_size; dev->caps.eqe_factor = 0; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) { - dev->caps.cqe_size = hca_param.cqe_size; + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) { + dev->caps.cqe_size = hca_param->cqe_size; /* User still need to know when CQE > 32B */ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; } @@ -997,31 +1031,27 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_warn(dev, "Timestamping is not supported in slave mode\n"); - slave_adjust_steering_mode(dev, &dev_cap, &hca_param); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_USER_MAC_EN; + mlx4_dbg(dev, "User MAC FW update is not supported in slave mode\n"); + + slave_adjust_steering_mode(dev, dev_cap, hca_param); mlx4_dbg(dev, "RSS support for IP fragments is %s\n", - hca_param.rss_ip_frags ? "on" : "off"); + hca_param->rss_ip_frags ? "on" : "off"); - if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && + if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && dev->caps.bf_reg_size) dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP; - if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP) + if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP) dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP; - return 0; - err_mem: - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - dev->caps.qp0_qkey = NULL; - dev->caps.qp0_tunnel = NULL; - dev->caps.qp0_proxy = NULL; - dev->caps.qp1_tunnel = NULL; - dev->caps.qp1_proxy = NULL; - + if (err) + mlx4_slave_destroy_special_qp_cap(dev); +free_mem: + kfree(hca_param); + kfree(func_cap); + kfree(dev_cap); return err; } @@ -2275,7 +2305,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; - if (enable_4k_uar) { + if (enable_4k_uar || !dev->persist->num_vfs) { init_hca.log_uar_sz = ilog2(dev->caps.num_uars) + PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT; init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; @@ -2356,8 +2386,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) MLX4_A0_STEERING_TABLE_SIZE; } - mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n", - dmfs_high_rate_steering_mode_str( + mlx4_info(dev, "DMFS high rate steer mode is: %s\n", + dmfs_high_rate_steering_mode_str( dev->caps.dmfs_high_steer_mode)); } } else { @@ -2397,7 +2427,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2; } priv->eq_table.inta_pin = adapter.inta_pin; - memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); + memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id)); return 0; @@ -2405,13 +2435,8 @@ unmap_bf: unmap_internal_clock(dev); unmap_bf_area(dev); - if (mlx4_is_slave(dev)) { - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - } + if (mlx4_is_slave(dev)) + mlx4_slave_destroy_special_qp_cap(dev); err_close: if (mlx4_is_slave(dev)) @@ -2475,7 +2500,7 @@ static int mlx4_allocate_default_counters(struct mlx4_dev *dev) priv->def_counter[port] = -1; for (port = 0; port < dev->caps.num_ports; port++) { - err = mlx4_counter_alloc(dev, &idx); + err = mlx4_counter_alloc(dev, &idx, MLX4_RES_USAGE_DRIVER); if (!err || err == -ENOSPC) { priv->def_counter[port] = idx; @@ -2517,13 +2542,14 @@ int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) return 0; } -int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) +int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage) { + u32 in_modifier = RES_COUNTER | (((u32)usage & 3) << 30); u64 out_param; int err; if (mlx4_is_mfunc(dev)) { - err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER, + err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) @@ -2867,7 +2893,7 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) dev->caps.num_eqs - dev->caps.reserved_eqs, MAX_MSIX); - entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); + entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL); if (!entries) goto no_msi; @@ -3594,13 +3620,8 @@ err_master_mfunc: mlx4_multi_func_cleanup(dev); } - if (mlx4_is_slave(dev)) { - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - } + if (mlx4_is_slave(dev)) + mlx4_slave_destroy_special_qp_cap(dev); err_close: mlx4_close_hca(dev); @@ -3656,7 +3677,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, * per port, we must limit the number of VFs to 63 (since their are * 128 MACs) */ - for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc; + for (i = 0; i < ARRAY_SIZE(nvfs) && i < num_vfs_argc; total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) { nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i]; if (nvfs[i] < 0) { @@ -3665,7 +3686,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, goto err_disable_pdev; } } - for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc; + for (i = 0; i < ARRAY_SIZE(prb_vf) && i < probe_vfs_argc; i++) { prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i]; if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) { @@ -3744,11 +3765,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, if (total_vfs) { unsigned vfs_offset = 0; - for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && + for (i = 0; i < ARRAY_SIZE(nvfs) && vfs_offset + nvfs[i] < extended_func_num(pdev); vfs_offset += nvfs[i], i++) ; - if (i == sizeof(nvfs)/sizeof(nvfs[0])) { + if (i == ARRAY_SIZE(nvfs)) { err = -ENODEV; goto err_release_regions; } @@ -3780,7 +3801,6 @@ err_release_regions: err_disable_pdev: mlx4_pci_disable_device(&priv->dev); - pci_set_drvdata(pdev, NULL); return err; } @@ -3941,11 +3961,7 @@ static void mlx4_unload_one(struct pci_dev *pdev) if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); + mlx4_slave_destroy_special_qp_cap(dev); kfree(dev->dev_vfs); mlx4_clean_dev(dev); @@ -3995,7 +4011,6 @@ static void mlx4_remove_one(struct pci_dev *pdev) devlink_unregister(devlink); kfree(dev->persist); devlink_free(devlink); - pci_set_drvdata(pdev, NULL); } static int restore_current_port_types(struct mlx4_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 0710b3677464..4c5306dbcf11 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -162,7 +162,7 @@ static int new_steering_entry(struct mlx4_dev *dev, u8 port, return -EINVAL; s_steer = &mlx4_priv(dev)->steer[port - 1]; - new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL); + new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); if (!new_entry) return -ENOMEM; @@ -175,7 +175,7 @@ static int new_steering_entry(struct mlx4_dev *dev, u8 port, */ pqp = get_promisc_qp(dev, port, steer, qpn); if (pqp) { - dqp = kmalloc(sizeof *dqp, GFP_KERNEL); + dqp = kmalloc(sizeof(*dqp), GFP_KERNEL); if (!dqp) { err = -ENOMEM; goto out_alloc; @@ -274,7 +274,7 @@ static int existing_steering_entry(struct mlx4_dev *dev, u8 port, } /* add the qp as a duplicate on this index */ - dqp = kmalloc(sizeof *dqp, GFP_KERNEL); + dqp = kmalloc(sizeof(*dqp), GFP_KERNEL); if (!dqp) return -ENOMEM; dqp->qpn = qpn; @@ -443,7 +443,7 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port, goto out_mutex; } - pqp = kmalloc(sizeof *pqp, GFP_KERNEL); + pqp = kmalloc(sizeof(*pqp), GFP_KERNEL); if (!pqp) { err = -ENOMEM; goto out_mutex; @@ -514,7 +514,7 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port, /* add the new qpn to list of promisc qps */ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]); /* now need to add all the promisc qps to default entry */ - memset(mgm, 0, sizeof *mgm); + memset(mgm, 0, sizeof(*mgm)); members_count = 0; list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) { if (members_count == dev->caps.num_qp_per_mgm) { @@ -1144,7 +1144,7 @@ int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], index += dev->caps.num_mgms; new_entry = 1; - memset(mgm, 0, sizeof *mgm); + memset(mgm, 0, sizeof(*mgm)); memcpy(mgm->gid, gid, 16); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index b4f1bc56cc68..c68da1986e51 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -56,8 +56,7 @@ #define DRV_NAME "mlx4_core" #define PFX DRV_NAME ": " -#define DRV_VERSION "2.2-1" -#define DRV_RELDATE "Feb, 2014" +#define DRV_VERSION "4.0-0" #define MLX4_FS_UDP_UC_EN (1 << 1) #define MLX4_FS_TCP_UC_EN (1 << 2) @@ -231,7 +230,6 @@ do { \ #define mlx4_warn(mdev, format, ...) \ dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) -extern int mlx4_log_num_mgm_entry_size; extern int log_mtts_per_seg; extern int mlx4_internal_err_reset; @@ -628,7 +626,7 @@ struct mlx4_mgm { }; struct mlx4_cmd { - struct pci_pool *pool; + struct dma_pool *pool; void __iomem *hcr; struct mutex slave_cmd_mutex; struct semaphore poll_sem; @@ -809,6 +807,8 @@ struct mlx4_set_port_general_context { u8 phv_en; u8 reserved6[5]; __be16 user_mtu; + u16 reserved7; + u8 user_mac[6]; }; struct mlx4_set_port_rqp_calc_context { @@ -971,7 +971,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev); void mlx4_cleanup_qp_table(struct mlx4_dev *dev); void mlx4_cleanup_srq_table(struct mlx4_dev *dev); void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp); +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn); void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn); int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn); void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn); @@ -979,7 +979,7 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn); void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn); int __mlx4_mpt_reserve(struct mlx4_dev *dev); void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index); -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp); +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index); void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index); u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order); void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 39f401aa3047..fdb3ad0cbe54 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -58,8 +58,7 @@ #include "mlx4_stats.h" #define DRV_NAME "mlx4_en" -#define DRV_VERSION "2.2-1" -#define DRV_RELDATE "Feb 2014" +#define DRV_VERSION "4.0-0" #define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN) @@ -73,7 +72,8 @@ #define DEF_RX_RINGS 16 #define MAX_RX_RINGS 128 #define MIN_RX_RINGS 4 -#define TXBB_SIZE 64 +#define LOG_TXBB_SIZE 6 +#define TXBB_SIZE BIT(LOG_TXBB_SIZE) #define HEADROOM (2048 / TXBB_SIZE + 1) #define STAMP_STRIDE 64 #define STAMP_DWORDS (STAMP_STRIDE / 4) @@ -115,14 +115,14 @@ #define MLX4_EN_SMALL_PKT_SIZE 64 #define MLX4_EN_MIN_TX_RING_P_UP 1 #define MLX4_EN_MAX_TX_RING_P_UP 32 -#define MLX4_EN_NUM_UP 8 -#define MLX4_EN_DEF_TX_RING_SIZE 512 +#define MLX4_EN_NUM_UP_LOW 1 +#define MLX4_EN_NUM_UP_HIGH 8 #define MLX4_EN_DEF_RX_RING_SIZE 1024 +#define MLX4_EN_DEF_TX_RING_SIZE MLX4_EN_DEF_RX_RING_SIZE #define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \ - MLX4_EN_NUM_UP) + MLX4_EN_NUM_UP_HIGH) #define MLX4_EN_DEFAULT_TX_WORK 256 -#define MLX4_EN_DOORBELL_BUDGET 8 /* Target number of packets to coalesce with interrupt moderation */ #define MLX4_EN_RX_COAL_TARGET 44 @@ -277,7 +277,7 @@ struct mlx4_en_tx_ring { struct netdev_queue *tx_queue; u32 (*free_tx_desc)(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, + int index, u64 timestamp, int napi_mode); struct mlx4_en_rx_ring *recycle_ring; @@ -360,7 +360,10 @@ struct mlx4_en_cq { struct mlx4_hwq_resources wqres; int ring; struct net_device *dev; - struct napi_struct napi; + union { + struct napi_struct napi; + bool xdp_busy; + }; int size; int buf_size; int vector; @@ -384,6 +387,7 @@ struct mlx4_en_port_profile { u8 rx_ppp; u8 tx_pause; u8 tx_ppp; + u8 num_up; int rss_rings; int inline_thold; struct hwtstamp_config hwtstamp_config; @@ -432,7 +436,7 @@ struct mlx4_en_rss_map { int base_qpn; struct mlx4_qp qps[MAX_RX_RINGS]; enum mlx4_qp_state state[MAX_RX_RINGS]; - struct mlx4_qp indir_qp; + struct mlx4_qp *indir_qp; enum mlx4_qp_state indir_state; }; @@ -483,7 +487,7 @@ enum dcb_pfc_type { struct mlx4_en_cee_config { bool pfc_state; - enum dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP]; + enum dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP_HIGH]; }; #endif @@ -681,7 +685,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int cq_idx); void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); -int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); +void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, @@ -690,7 +694,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, struct net_device *dev, unsigned int length, - int tx_ind, int *doorbell_pending); + int tx_ind, bool *doorbell_pending); void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_alloc *frame); @@ -722,13 +726,15 @@ int mlx4_en_process_rx_cq(struct net_device *dev, int budget); int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget); int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget); +bool mlx4_en_process_tx_cq(struct net_device *dev, + struct mlx4_en_cq *cq, int napi_budget); u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, + int index, u64 timestamp, int napi_mode); u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, + int index, u64 timestamp, int napi_mode); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, @@ -757,6 +763,7 @@ extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops; #endif int mlx4_en_setup_tc(struct net_device *dev, u8 up); +int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc); #ifdef CONFIG_RFS_ACCEL void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h index 926f3c3f3665..aab28eb27a30 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _MLX4_STATS_ #define _MLX4_STATS_ diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index ce852ca22a96..c7c0764991c9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -106,9 +106,9 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) buddy->max_order = max_order; spin_lock_init(&buddy->lock); - buddy->bits = kcalloc(buddy->max_order + 1, sizeof (long *), + buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *), GFP_KERNEL); - buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, + buddy->num_free = kcalloc(buddy->max_order + 1, sizeof(*buddy->num_free), GFP_KERNEL); if (!buddy->bits || !buddy->num_free) goto err_out; @@ -479,14 +479,14 @@ static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) __mlx4_mpt_release(dev, index); } -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; - return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp); + return mlx4_table_get(dev, &mr_table->dmpt_table, index); } -static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) +static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { u64 param = 0; @@ -497,7 +497,7 @@ static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_mpt_alloc_icm(dev, index, gfp); + return __mlx4_mpt_alloc_icm(dev, index); } void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) @@ -629,7 +629,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key)); if (err) return err; @@ -703,13 +703,13 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, return -ENOMEM; dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle, - npages * sizeof (u64), DMA_TO_DEVICE); + npages * sizeof(u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle, - npages * sizeof (u64), DMA_TO_DEVICE); + npages * sizeof(u64), DMA_TO_DEVICE); return 0; } @@ -787,14 +787,13 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, EXPORT_SYMBOL_GPL(mlx4_write_mtt); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf, gfp_t gfp) + struct mlx4_buf *buf) { u64 *page_list; int err; int i; - page_list = kmalloc(buf->npages * sizeof *page_list, - gfp); + page_list = kcalloc(buf->npages, sizeof(*page_list), GFP_KERNEL); if (!page_list) return -ENOMEM; @@ -841,7 +840,7 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key)); if (err) return err; @@ -1053,7 +1052,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, return -EINVAL; /* All MTTs must fit in the same page */ - if (max_pages * sizeof *fmr->mtts > PAGE_SIZE) + if (max_pages * sizeof(*fmr->mtts) > PAGE_SIZE) return -EINVAL; fmr->page_shift = page_shift; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 4e36e287d605..3ef3406ff4cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -52,6 +52,7 @@ #define MLX4_FLAG2_V_IGNORE_FCS_MASK BIT(1) #define MLX4_FLAG2_V_USER_MTU_MASK BIT(5) +#define MLX4_FLAG2_V_USER_MAC_MASK BIT(6) #define MLX4_FLAG_V_MTU_MASK BIT(0) #define MLX4_FLAG_V_PPRX_MASK BIT(1) #define MLX4_FLAG_V_PPTX_MASK BIT(2) @@ -1700,6 +1701,30 @@ int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu) } EXPORT_SYMBOL(mlx4_SET_PORT_user_mtu); +int mlx4_SET_PORT_user_mac(struct mlx4_dev *dev, u8 port, u8 *user_mac) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + context->flags2 |= MLX4_FLAG2_V_USER_MAC_MASK; + memcpy(context->user_mac, user_mac, sizeof(context->user_mac)); + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_user_mac); + int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value) { struct mlx4_cmd_mailbox *mailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 5a310d313e94..728a2fb1f5c0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -174,7 +174,7 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn)); *(__be32 *) mailbox->buf = cpu_to_be32(optpar); - memcpy(mailbox->buf + 8, context, sizeof *context); + memcpy(mailbox->buf + 8, context, sizeof(*context)); ((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn = cpu_to_be32(qp->qpn); @@ -245,8 +245,9 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, } int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base, u8 flags) + int *base, u8 flags, u8 usage) { + u32 in_modifier = RES_QP | (((u32)usage & 3) << 30); u64 in_param = 0; u64 out_param; int err; @@ -258,7 +259,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, set_param_l(&in_param, (((u32)flags) << 24) | (u32)cnt); set_param_h(&in_param, align); err = mlx4_cmd_imm(dev, in_param, &out_param, - RES_QP, RES_OP_RESERVE, + in_modifier, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) @@ -301,29 +302,29 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) } EXPORT_SYMBOL_GPL(mlx4_qp_release_range); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; int err; - err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->qp_table, qpn); if (err) goto err_out; - err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->auxc_table, qpn); if (err) goto err_put_qp; - err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->altc_table, qpn); if (err) goto err_put_auxc; - err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn); if (err) goto err_put_altc; - err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn); if (err) goto err_put_rdmarc; @@ -345,7 +346,7 @@ err_out: return err; } -static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) +static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) { u64 param = 0; @@ -355,7 +356,7 @@ static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_qp_alloc_icm(dev, qpn, gfp); + return __mlx4_qp_alloc_icm(dev, qpn); } void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) @@ -397,7 +398,7 @@ struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) return qp; } -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; @@ -408,7 +409,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) qp->qpn = qpn; - err = mlx4_qp_alloc_icm(dev, qpn, gfp); + err = mlx4_qp_alloc_icm(dev, qpn); if (err) return err; @@ -844,24 +845,21 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) /* In mfunc, calculate proxy and tunnel qp offsets for the PF here, * since the PF does not call mlx4_slave_caps */ - dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - - if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || - !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + dev->caps.spec_qps = kcalloc(dev->caps.num_ports, + sizeof(*dev->caps.spec_qps), + GFP_KERNEL); + if (!dev->caps.spec_qps) { err = -ENOMEM; goto err_mem; } for (k = 0; k < dev->caps.num_ports; k++) { - dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn + + dev->caps.spec_qps[k].qp0_proxy = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev) + k; - dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX; - dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn + + dev->caps.spec_qps[k].qp0_tunnel = dev->caps.spec_qps[k].qp0_proxy + 8 * MLX4_MFUNC_MAX; + dev->caps.spec_qps[k].qp1_proxy = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k; - dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX; + dev->caps.spec_qps[k].qp1_tunnel = dev->caps.spec_qps[k].qp1_proxy + 8 * MLX4_MFUNC_MAX; } } @@ -873,12 +871,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) return err; err_mem: - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - dev->caps.qp0_tunnel = dev->caps.qp0_proxy = - dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + kfree(dev->caps.spec_qps); + dev->caps.spec_qps = NULL; mlx4_cleanup_qp_zones(dev); return err; } @@ -907,7 +901,7 @@ int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) - memcpy(context, mailbox->buf + 8, sizeof *context); + memcpy(context, mailbox->buf + 8, sizeof(*context)); mlx4_free_cmd_mailbox(dev, mailbox); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 812783865205..fabb53379727 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1040,7 +1040,7 @@ static struct res_common *alloc_qp_tr(int id) { struct res_qp *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1058,7 +1058,7 @@ static struct res_common *alloc_mtt_tr(int id, int order) { struct res_mtt *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1074,7 +1074,7 @@ static struct res_common *alloc_mpt_tr(int id, int key) { struct res_mpt *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1089,7 +1089,7 @@ static struct res_common *alloc_eq_tr(int id) { struct res_eq *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1103,7 +1103,7 @@ static struct res_common *alloc_cq_tr(int id) { struct res_cq *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1118,7 +1118,7 @@ static struct res_common *alloc_srq_tr(int id) { struct res_srq *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1133,7 +1133,7 @@ static struct res_common *alloc_counter_tr(int id, int port) { struct res_counter *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1148,7 +1148,7 @@ static struct res_common *alloc_xrcdn_tr(int id) { struct res_xrcdn *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1162,7 +1162,7 @@ static struct res_common *alloc_fs_rule_tr(u64 id, int qpn) { struct res_fs_rule *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1274,7 +1274,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct rb_root *root = &tracker->res_tree[type]; - res_arr = kzalloc(count * sizeof *res_arr, GFP_KERNEL); + res_arr = kcalloc(count, sizeof(*res_arr), GFP_KERNEL); if (!res_arr) return -ENOMEM; @@ -1822,7 +1822,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; if (!fw_reserved(dev, qpn)) { - err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL); + err = __mlx4_qp_alloc_icm(dev, qpn); if (err) { res_abort_move(dev, slave, RES_QP, qpn); return err; @@ -1909,7 +1909,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL); + err = __mlx4_mpt_alloc_icm(dev, mpt->key); if (err) { res_abort_move(dev, slave, RES_MPT, id); return err; @@ -2027,7 +2027,7 @@ static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port, if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port)) return -EINVAL; - res = kzalloc(sizeof *res, GFP_KERNEL); + res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) { mlx4_release_resource(dev, slave, RES_MAC, 1, port); return -ENOMEM; @@ -4020,7 +4020,7 @@ static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, struct res_gid *res; int err; - res = kzalloc(sizeof *res, GFP_KERNEL); + res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index f44d089e2ca6..bedf52126824 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -100,11 +100,11 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn) if (*srqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL); + err = mlx4_table_get(dev, &srq_table->table, *srqn); if (err) goto err_out; - err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL); + err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn); if (err) goto err_put; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 27251a78075c..fdaef00465d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -11,6 +11,20 @@ config MLX5_CORE Core driver for low level functionality of the ConnectX-4 and Connect-IB cards by Mellanox Technologies. +config MLX5_ACCEL + bool + +config MLX5_FPGA + bool "Mellanox Technologies Innova support" + depends on MLX5_CORE + select MLX5_ACCEL + ---help--- + Build support for the Innova family of network cards by Mellanox + Technologies. Innova network cards are comprised of a ConnectX chip + and an FPGA chip on one board. If you select this option, the + mlx5_core driver will include the Innova FPGA core and allow building + sandbox-specific client drivers. + config MLX5_CORE_EN bool "Mellanox Technologies ConnectX-4 Ethernet support" depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE @@ -20,6 +34,27 @@ config MLX5_CORE_EN ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. +config MLX5_MPFS + bool "Mellanox Technologies MLX5 MPFS support" + depends on MLX5_CORE_EN + default y + ---help--- + Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS) + support in ConnectX NIC. MPFs is required for when multi-PF configuration + is enabled to allow passing user configured unicast MAC addresses to the + requesting PF. + +config MLX5_ESWITCH + bool "Mellanox Technologies MLX5 SRIOV E-Switch support" + depends on MLX5_CORE_EN + default y + ---help--- + Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC. + E-Switch provides internal SRIOV packet steering and switching for the + enabled VFs and PF in two available modes: + Legacy SRIOV mode (L2 mac vlan steering based). + Switchdev mode (eswitch offloads). + config MLX5_CORE_EN_DCB bool "Data Center Bridging (DCB) Support" default y @@ -38,3 +73,15 @@ config MLX5_CORE_IPOIB default n ---help--- MLX5 IPoIB offloads & acceleration support. + +config MLX5_EN_IPSEC + bool "IPSec XFRM cryptography-offload accelaration" + depends on MLX5_ACCEL + depends on MLX5_CORE_EN + depends on XFRM_OFFLOAD + depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD + default n + ---help--- + Build support for IPsec cryptography-offload accelaration in the NIC. + Note: Support for hardware with this capability needs to be selected + for this option to become available. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9e644615f07a..714dd0dc5eef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -1,15 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MLX5_CORE) += mlx5_core.o +subdir-ccflags-y += -I$(src) mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o + fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ + diag/fs_tracepoint.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ - en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ - en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ - en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o +mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o + +mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ + fpga/ipsec.o + +mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ + en_tx.o en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_arfs.o en_fs_ethtool.o en_selftest.o + +mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o + +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o en_rep.o en_tc.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o -mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib.o +mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o + +mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ + en_accel/ipsec_stats.o + +CFLAGS_tracepoint.o := -I$(src) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile new file mode 100644 index 000000000000..d8e17110f25d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c new file mode 100644 index 000000000000..53e69edaedde --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/device.h> + +#include "accel/ipsec.h" +#include "mlx5_core.h" +#include "fpga/ipsec.h" + +void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, + struct mlx5_accel_ipsec_sa *cmd) +{ + if (!MLX5_IPSEC_DEV(mdev)) + return ERR_PTR(-EOPNOTSUPP); + + return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd); +} + +int mlx5_accel_ipsec_sa_cmd_wait(void *ctx) +{ + return mlx5_fpga_ipsec_sa_cmd_wait(ctx); +} + +u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_ipsec_device_caps(mdev); +} + +unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_ipsec_counters_count(mdev); +} + +int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int count) +{ + return mlx5_fpga_ipsec_counters_read(mdev, counters, count); +} + +int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_ipsec_init(mdev); +} + +void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ + mlx5_fpga_ipsec_cleanup(mdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h new file mode 100644 index 000000000000..d6e20fea9554 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_ACCEL_IPSEC_H__ +#define __MLX5_ACCEL_IPSEC_H__ + +#ifdef CONFIG_MLX5_ACCEL + +#include <linux/mlx5/driver.h> + +enum { + MLX5_ACCEL_IPSEC_DEVICE = BIT(1), + MLX5_ACCEL_IPSEC_IPV6 = BIT(2), + MLX5_ACCEL_IPSEC_ESP = BIT(3), + MLX5_ACCEL_IPSEC_LSO = BIT(4), +}; + +#define MLX5_IPSEC_SADB_IP_AH BIT(7) +#define MLX5_IPSEC_SADB_IP_ESP BIT(6) +#define MLX5_IPSEC_SADB_SA_VALID BIT(5) +#define MLX5_IPSEC_SADB_SPI_EN BIT(4) +#define MLX5_IPSEC_SADB_DIR_SX BIT(3) +#define MLX5_IPSEC_SADB_IPV6 BIT(2) + +enum { + MLX5_IPSEC_CMD_ADD_SA = 0, + MLX5_IPSEC_CMD_DEL_SA = 1, +}; + +enum mlx5_accel_ipsec_enc_mode { + MLX5_IPSEC_SADB_MODE_NONE = 0, + MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128 = 1, + MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128 = 3, +}; + +#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \ + MLX5_ACCEL_IPSEC_DEVICE) + +struct mlx5_accel_ipsec_sa { + __be32 cmd; + u8 key_enc[32]; + u8 key_auth[32]; + __be32 sip[4]; + __be32 dip[4]; + union { + struct { + __be32 reserved; + u8 salt_iv[8]; + __be32 salt; + } __packed gcm; + struct { + u8 salt[16]; + } __packed cbc; + }; + __be32 spi; + __be32 sw_sa_handle; + __be16 tfclen; + u8 enc_mode; + u8 sip_masklen; + u8 dip_masklen; + u8 flags; + u8 reserved[2]; +} __packed; + +/** + * mlx5_accel_ipsec_sa_cmd_exec - Execute an IPSec SADB command + * @mdev: mlx5 device + * @cmd: command to execute + * May be called from atomic context. Returns context pointer, or error + * Caller must eventually call mlx5_accel_ipsec_sa_cmd_wait from non-atomic + * context, to cleanup the context pointer + */ +void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, + struct mlx5_accel_ipsec_sa *cmd); + +/** + * mlx5_accel_ipsec_sa_cmd_wait - Wait for command execution completion + * @context: Context pointer returned from call to mlx5_accel_ipsec_sa_cmd_exec + * Sleeps (killable) until command execution is complete. + * Returns the command result, or -EINTR if killed + */ +int mlx5_accel_ipsec_sa_cmd_wait(void *context); + +u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev); + +unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev); +int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int count); + +int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev); +void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); + +#else + +#define MLX5_IPSEC_DEV(mdev) false + +static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ +} + +#endif + +#endif /* __MLX5_ACCEL_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 66bd213f35ce..47239bf7bf43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -258,6 +258,7 @@ EXPORT_SYMBOL_GPL(mlx5_db_alloc); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) { u32 db_per_page = PAGE_SIZE / cache_line_size(); + mutex_lock(&dev->priv.pgdir_mutex); __set_bit(db->index, db->u.pgdir->bitmap); @@ -274,7 +275,6 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) } EXPORT_SYMBOL_GPL(mlx5_db_free); - void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas) { u64 addr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 10d282841f5b..1fffdebbc9e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -217,7 +217,6 @@ static void free_cmd(struct mlx5_cmd_work_ent *ent) kfree(ent); } - static int verify_signature(struct mlx5_cmd_work_ent *ent) { struct mlx5_cmd_mailbox *next = ent->out->next; @@ -308,6 +307,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_FPGA_DESTROY_QP: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -420,6 +420,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_FLOW_COUNTER: case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_FPGA_CREATE_QP: + case MLX5_CMD_OP_FPGA_MODIFY_QP: + case MLX5_CMD_OP_FPGA_QUERY_QP: + case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -586,6 +590,11 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP); + MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP); + MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP); + MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS); + MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP); default: return "unknown command opcode"; } } @@ -777,6 +786,10 @@ static void cb_timeout_handler(struct work_struct *work) mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); +static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg); + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -786,16 +799,28 @@ static void cmd_work_handler(struct work_struct *work) struct mlx5_cmd_layout *lay; struct semaphore *sem; unsigned long flags; + bool poll_cmd = ent->polling; + int alloc_ret; sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - ent->idx = alloc_ent(cmd); - if (ent->idx < 0) { + alloc_ret = alloc_ent(cmd); + if (alloc_ret < 0) { mlx5_core_err(dev, "failed to allocate command entry\n"); + if (ent->callback) { + ent->callback(-EAGAIN, ent->context); + mlx5_free_cmd_msg(dev, ent->out); + free_msg(dev, ent->in); + free_cmd(ent); + } else { + ent->ret = -EAGAIN; + complete(&ent->done); + } up(sem); return; } + ent->idx = alloc_ret; } else { ent->idx = cmd->max_reg_cmds; spin_lock_irqsave(&cmd->alloc_lock, flags); @@ -846,7 +871,7 @@ static void cmd_work_handler(struct work_struct *work) iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); mmiowb(); /* if not in polling don't use ent after this point */ - if (cmd->mode == CMD_MODE_POLLING) { + if (cmd->mode == CMD_MODE_POLLING || poll_cmd) { poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ rmb(); @@ -874,7 +899,7 @@ static const char *deliv_status_to_str(u8 status) case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: return "command input length error"; case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: - return "command ouput length error"; + return "command output length error"; case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: return "reserved fields not cleared"; case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: @@ -890,7 +915,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) struct mlx5_cmd *cmd = &dev->cmd; int err; - if (cmd->mode == CMD_MODE_POLLING) { + if (cmd->mode == CMD_MODE_POLLING || ent->polling) { wait_for_completion(&ent->done); } else if (!wait_for_completion_timeout(&ent->done, timeout)) { ent->ret = -ETIMEDOUT; @@ -918,7 +943,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, void *uout, int uout_size, mlx5_cmd_cbk_t callback, void *context, int page_queue, u8 *status, - u8 token) + u8 token, bool force_polling) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -936,6 +961,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, return PTR_ERR(ent); ent->token = token; + ent->polling = force_polling; if (!callback) init_completion(&ent->done); @@ -955,7 +981,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, err = wait_func(dev, ent); if (err == -ETIMEDOUT) - goto out_free; + goto out; ds = ent->ts2 - ent->ts1; op = MLX5_GET(mbox_in, in->first.data, opcode); @@ -1001,7 +1027,6 @@ static ssize_t dbg_write(struct file *filp, const char __user *buf, return err ? err : count; } - static const struct file_operations fops = { .owner = THIS_MODULE, .open = simple_open, @@ -1084,7 +1109,7 @@ static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev, if (!mailbox) return ERR_PTR(-ENOMEM); - mailbox->buf = pci_pool_zalloc(dev->cmd.pool, flags, + mailbox->buf = dma_pool_zalloc(dev->cmd.pool, flags, &mailbox->dma); if (!mailbox->buf) { mlx5_core_dbg(dev, "failed allocation\n"); @@ -1099,7 +1124,7 @@ static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev, static void free_cmd_box(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *mailbox) { - pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + dma_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); kfree(mailbox); } @@ -1153,7 +1178,7 @@ err_alloc: } static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, - struct mlx5_cmd_msg *msg) + struct mlx5_cmd_msg *msg) { struct mlx5_cmd_mailbox *head = msg->next; struct mlx5_cmd_mailbox *next; @@ -1419,6 +1444,7 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", ent->idx); free_ent(cmd, ent->idx); + free_cmd(ent); } continue; } @@ -1477,7 +1503,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) free_msg(dev, ent->in); err = err ? err : ent->status; - free_cmd(ent); + if (!forced) + free_cmd(ent); callback(err, context); } else { complete(&ent->done); @@ -1537,7 +1564,8 @@ static int is_manage_pages(void *in) } static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, - int out_size, mlx5_cmd_cbk_t callback, void *context) + int out_size, mlx5_cmd_cbk_t callback, void *context, + bool force_polling) { struct mlx5_cmd_msg *inb; struct mlx5_cmd_msg *outb; @@ -1582,7 +1610,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, - pages_queue, &status, token); + pages_queue, &status, token, force_polling); if (err) goto out_out; @@ -1610,7 +1638,7 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, { int err; - err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL); + err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); return err ? : mlx5_cmd_check(dev, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec); @@ -1619,10 +1647,22 @@ int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context) { - return cmd_exec(dev, in, in_size, out, out_size, callback, context); + return cmd_exec(dev, in, in_size, out, out_size, callback, context, + false); } EXPORT_SYMBOL(mlx5_cmd_exec_cb); +int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size) +{ + int err; + + err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); + + return err ? : mlx5_cmd_check(dev, in, out); +} +EXPORT_SYMBOL(mlx5_cmd_exec_polling); + static void destroy_msg_cache(struct mlx5_core_dev *dev) { struct cmd_msg_cache *ch; @@ -1735,7 +1775,8 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) return -EINVAL; } - cmd->pool = pci_pool_create("mlx5_cmd", dev->pdev, size, align, 0); + cmd->pool = dma_pool_create("mlx5_cmd", &dev->pdev->dev, size, align, + 0); if (!cmd->pool) return -ENOMEM; @@ -1825,7 +1866,7 @@ err_free_page: free_cmd_page(dev, cmd); err_free_pool: - pci_pool_destroy(cmd->pool); + dma_pool_destroy(cmd->pool); return err; } @@ -1839,6 +1880,6 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) destroy_workqueue(cmd->wq); destroy_msg_cache(dev); free_cmd_page(dev, cmd); - pci_pool_destroy(cmd->pool); + dma_pool_destroy(cmd->pool); } EXPORT_SYMBOL(mlx5_cmd_cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index e94a9532e218..7ecadb501743 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -168,7 +168,6 @@ static ssize_t average_read(struct file *filp, char __user *buf, size_t count, return ret; } - static ssize_t average_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { @@ -405,7 +404,7 @@ static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *out; int err; - out = mlx5_vzalloc(outlen); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return param; @@ -466,7 +465,6 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count, return -EINVAL; } - if (is_str) ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)(unsigned long)field); else @@ -562,7 +560,6 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) rem_res_tree(qp->dbg); } - int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index a62f4b6a21a5..fc281712869b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -45,11 +45,76 @@ struct mlx5_device_context { unsigned long state; }; +struct mlx5_delayed_event { + struct list_head list; + struct mlx5_core_dev *dev; + enum mlx5_dev_event event; + unsigned long param; +}; + enum { MLX5_INTERFACE_ADDED, MLX5_INTERFACE_ATTACHED, }; +static void add_delayed_event(struct mlx5_priv *priv, + struct mlx5_core_dev *dev, + enum mlx5_dev_event event, + unsigned long param) +{ + struct mlx5_delayed_event *delayed_event; + + delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC); + if (!delayed_event) { + mlx5_core_err(dev, "event %d is missed\n", event); + return; + } + + mlx5_core_dbg(dev, "Accumulating event %d\n", event); + delayed_event->dev = dev; + delayed_event->event = event; + delayed_event->param = param; + list_add_tail(&delayed_event->list, &priv->waiting_events_list); +} + +static void delayed_event_release(struct mlx5_device_context *dev_ctx, + struct mlx5_priv *priv) +{ + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + struct mlx5_delayed_event *de; + struct mlx5_delayed_event *n; + struct list_head temp; + + INIT_LIST_HEAD(&temp); + + spin_lock_irq(&priv->ctx_lock); + + priv->is_accum_events = false; + list_splice_init(&priv->waiting_events_list, &temp); + if (!dev_ctx->context) + goto out; + list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) + dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); + +out: + spin_unlock_irq(&priv->ctx_lock); + + list_for_each_entry_safe(de, n, &temp, list) { + list_del(&de->list); + kfree(de); + } +} + +/* accumulating events that can come after mlx5_ib calls to + * ib_register_device, till adding that interface to the events list. + */ +static void delayed_event_start(struct mlx5_priv *priv) +{ + spin_lock_irq(&priv->ctx_lock); + priv->is_accum_events = true; + spin_unlock_irq(&priv->ctx_lock); +} + void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_device_context *dev_ctx; @@ -63,6 +128,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) return; dev_ctx->intf = intf; + + delayed_event_start(priv); + dev_ctx->context = intf->add(dev); set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); if (intf->attach) @@ -71,6 +139,7 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (dev_ctx->context) { spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (dev_ctx->intf->pfault) { if (priv->pfault) { @@ -82,9 +151,12 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) } #endif spin_unlock_irq(&priv->ctx_lock); - } else { - kfree(dev_ctx); } + + delayed_event_release(dev_ctx, priv); + + if (!dev_ctx->context) + kfree(dev_ctx); } static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, @@ -135,17 +207,21 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv if (!dev_ctx) return; + delayed_event_start(priv); if (intf->attach) { if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - return; + goto out; intf->attach(dev, dev_ctx->context); set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); } else { if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - return; + goto out; dev_ctx->context = intf->add(dev); set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); } + +out: + delayed_event_release(dev_ctx, priv); } void mlx5_attach_device(struct mlx5_core_dev *dev) @@ -341,8 +417,17 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, spin_lock_irqsave(&priv->ctx_lock, flags); + if (priv->is_accum_events) + add_delayed_event(priv, dev, event, param); + + /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is + * still in priv->ctx_list. In this case, only notify the dev_ctx if its + * ADDED or ATTACHED bit are set. + */ list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event) + if (dev_ctx->intf->event && + (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) || + test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))) dev_ctx->intf->event(dev, dev_ctx->context, event, param); spin_unlock_irqrestore(&priv->ctx_lock, flags); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile new file mode 100644 index 000000000000..d8e17110f25d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c new file mode 100644 index 000000000000..0be4575b58a2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define CREATE_TRACE_POINTS + +#include "fs_tracepoint.h" +#include <linux/stringify.h> + +#define DECLARE_MASK_VAL(type, name) struct {type m; type v; } name +#define MASK_VAL(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET(spec, mask, fld),\ + .v = MLX5_GET(spec, val, fld)} +#define MASK_VAL_BE(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET_BE(type, spec, mask, fld),\ + .v = MLX5_GET_BE(type, spec, val, fld)} +#define GET_MASKED_VAL(name) (name.m & name.v) + +#define GET_MASK_VAL(name, type, mask, val, fld) \ + (name.m = MLX5_GET(type, mask, fld), \ + name.v = MLX5_GET(type, val, fld), \ + name.m & name.v) +#define PRINT_MASKED_VAL(name, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", name.v); \ + } +#define PRINT_MASKED_VALP(name, cast, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", \ + (cast)&name.v);\ + } + +static void print_lyr_2_4_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_L2(type, name, fld) \ + MASK_VAL(type, fte_match_set_lyr_2_4, name, mask, value, fld) + DECLARE_MASK_VAL(u64, smac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, smac_15_0)}; + DECLARE_MASK_VAL(u64, dmac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)}; + MASK_VAL_L2(u16, ethertype, ethertype); + + PRINT_MASKED_VALP(smac, u8 *, p, "%pM"); + PRINT_MASKED_VALP(dmac, u8 *, p, "%pM"); + PRINT_MASKED_VAL(ethertype, p, "%04x"); + + if (ethertype.m == 0xffff) { + if (ethertype.v == ETH_P_IP) { +#define MASK_VAL_L2_BE(type, name, fld) \ + MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld) + MASK_VAL_L2_BE(u32, src_ipv4, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MASK_VAL_L2_BE(u32, dst_ipv4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p, + "%pI4"); + PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p, + "%pI4"); + } else if (ethertype.v == ETH_P_IPV6) { + static const struct in6_addr full_ones = { + .in6_u.u6_addr32 = {htonl(0xffffffff), + htonl(0xffffffff), + htonl(0xffffffff), + htonl(0xffffffff)}, + }; + DECLARE_MASK_VAL(struct in6_addr, src_ipv6); + DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); + + memcpy(src_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.m)); + memcpy(dst_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.m)); + memcpy(src_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.v)); + memcpy(dst_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.v)); + + if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "src_ipv6=%pI6 ", + &src_ipv6.v); + if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "dst_ipv6=%pI6 ", + &dst_ipv6.v); + } + } + +#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\ + MASK_VAL_L2(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + + PRINT_MASKED_VAL_L2(u8, ip_protocol, ip_protocol, p, "%02x"); + PRINT_MASKED_VAL_L2(u16, tcp_flags, tcp_flags, p, "%x"); + PRINT_MASKED_VAL_L2(u16, tcp_sport, tcp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, tcp_dport, tcp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_sport, udp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_dport, udp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, first_vid, first_vid, p, "%04x"); + PRINT_MASKED_VAL_L2(u8, first_prio, first_prio, p, "%x"); + PRINT_MASKED_VAL_L2(u8, first_cfi, first_cfi, p, "%d"); + PRINT_MASKED_VAL_L2(u8, ip_dscp, ip_dscp, p, "%02x"); + PRINT_MASKED_VAL_L2(u8, ip_ecn, ip_ecn, p, "%x"); + PRINT_MASKED_VAL_L2(u8, cvlan_tag, cvlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, svlan_tag, svlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, frag, frag, p, "%d"); +} + +static void print_misc_parameters_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_MISC(type, name, fld) \ + MASK_VAL(type, fte_match_set_misc, name, mask, value, fld) +#define PRINT_MASKED_VAL_MISC(type, name, fld, p, format) {\ + MASK_VAL_MISC(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + DECLARE_MASK_VAL(u64, gre_key) = { + .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 | + MLX5_GET(fte_match_set_misc, mask, gre_key_l), + .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 | + MLX5_GET(fte_match_set_misc, value, gre_key_l)}; + + PRINT_MASKED_VAL(gre_key, p, "%llu"); + PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, source_port, source_port, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_prio, outer_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_cfi, outer_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, outer_second_vid, outer_second_vid, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_prio, inner_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cfi, inner_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, inner_second_vid, inner_second_vid, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, outer_second_cvlan_tag, + outer_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cvlan_tag, + inner_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_svlan_tag, + outer_second_svlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_svlan_tag, + inner_second_svlan_tag, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, gre_protocol, gre_protocol, p, "%u"); + + PRINT_MASKED_VAL_MISC(u32, vxlan_vni, vxlan_vni, p, "%u"); + PRINT_MASKED_VAL_MISC(u32, outer_ipv6_flow_label, outer_ipv6_flow_label, + p, "%x"); + PRINT_MASKED_VAL_MISC(u32, inner_ipv6_flow_label, inner_ipv6_flow_label, + p, "%x"); +} + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { + trace_seq_printf(p, "[outer] "); + print_lyr_2_4_hdrs(p, mask_outer, value_outer); + } + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { + trace_seq_printf(p, "[misc] "); + print_misc_parameters_hdrs(p, mask_misc, value_misc); + } + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { + trace_seq_printf(p, "[inner] "); + print_lyr_2_4_hdrs(p, mask_inner, value_inner); + } + trace_seq_putc(p, 0); + return ret; +} + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id) +{ + const char *ret = trace_seq_buffer_ptr(p); + + switch (dst->type) { + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + trace_seq_printf(p, "vport=%u\n", dst->vport_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + trace_seq_printf(p, "ft=%p\n", dst->ft); + break; + case MLX5_FLOW_DESTINATION_TYPE_TIR: + trace_seq_printf(p, "tir=%u\n", dst->tir_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_COUNTER: + trace_seq_printf(p, "counter_id=%u\n", counter_id); + break; + } + + trace_seq_putc(p, 0); + return ret; +} + +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_rule); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_rule); + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h new file mode 100644 index 000000000000..80eef4163f52 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(_MLX5_FS_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_FS_TP_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> +#include "../fs_core.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#define __parse_fs_hdrs(match_criteria_enable, mouter, mmisc, minner, vouter, \ + vinner, vmisc) \ + parse_fs_hdrs(p, match_criteria_enable, mouter, mmisc, minner, vouter,\ + vinner, vmisc) + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner); + +#define __parse_fs_dst(dst, counter_id) \ + parse_fs_dst(p, (const struct mlx5_flow_destination *)dst, counter_id) + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id); + +TRACE_EVENT(mlx5_fs_add_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(const struct mlx5_flow_table *, ft) + __field(u32, start_index) + __field(u32, end_index) + __field(u32, id) + __field(u8, mask_enable) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fg = fg; + fs_get_obj(__entry->ft, fg->node.parent); + __entry->start_index = fg->start_index; + __entry->end_index = fg->start_index + fg->max_ftes; + __entry->id = fg->id; + __entry->mask_enable = fg->mask.match_criteria_enable; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + + ), + TP_printk("fg=%p ft=%p id=%u start=%u end=%u bit_mask=%02x %s\n", + __entry->fg, __entry->ft, __entry->id, + __entry->start_index, __entry->end_index, + __entry->mask_enable, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(u32, id) + ), + TP_fast_assign( + __entry->fg = fg; + __entry->id = fg->id; + + ), + TP_printk("fg=%p id=%u\n", + __entry->fg, __entry->id) + ); + +#define ACTION_FLAGS \ + {MLX5_FLOW_CONTEXT_ACTION_ALLOW, "ALLOW"},\ + {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\ + {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\ + {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"} + +TRACE_EVENT(mlx5_fs_set_fte, + TP_PROTO(const struct fs_fte *fte, int new_fte), + TP_ARGS(fte, new_fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(const struct mlx5_flow_group *, fg) + __field(u32, group_index) + __field(u32, index) + __field(u32, action) + __field(u32, flow_tag) + __field(u8, mask_enable) + __field(int, new_fte) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + __array(u32, value_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->new_fte = new_fte; + fs_get_obj(__entry->fg, fte->node.parent); + __entry->group_index = __entry->fg->id; + __entry->index = fte->index; + __entry->action = fte->action; + __entry->mask_enable = __entry->fg->mask.match_criteria_enable; + __entry->flow_tag = fte->flow_tag; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + memcpy(__entry->value_outer, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + outer_headers), + sizeof(__entry->value_outer)); + memcpy(__entry->value_inner, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + inner_headers), + sizeof(__entry->value_inner)); + memcpy(__entry->value_misc, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + misc_parameters), + sizeof(__entry->value_misc)); + + ), + TP_printk("op=%s fte=%p fg=%p index=%u group_index=%u action=<%s> flow_tag=%x %s\n", + __entry->new_fte ? "add" : "set", + __entry->fte, __entry->fg, __entry->index, + __entry->group_index, __print_flags(__entry->action, "|", + ACTION_FLAGS), + __entry->flow_tag, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->value_outer, + __entry->value_misc, + __entry->value_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fte, + TP_PROTO(const struct fs_fte *fte), + TP_ARGS(fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(u32, index) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->index = fte->index; + + ), + TP_printk("fte=%p index=%u\n", + __entry->fte, __entry->index) + ); + +TRACE_EVENT(mlx5_fs_add_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + __field(u32, sw_action) + __field(u32, index) + __field(u32, counter_id) + __array(u8, destination, sizeof(struct mlx5_flow_destination)) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + __entry->index = __entry->fte->dests_size - 1; + __entry->sw_action = rule->sw_action; + memcpy(__entry->destination, + &rule->dest_attr, + sizeof(__entry->destination)); + if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER && + rule->dest_attr.counter) + __entry->counter_id = + rule->dest_attr.counter->id; + ), + TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", + __entry->rule, __entry->fte, __entry->index, + __print_flags(__entry->sw_action, "|", ACTION_FLAGS), + __parse_fs_dst(__entry->destination, __entry->counter_id)) + ); + +TRACE_EVENT(mlx5_fs_del_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + ), + TP_printk("rule=%p fte=%p\n", + __entry->rule, __entry->fte) + ); +#endif + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE fs_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 944fc1742464..cc13d3dbd366 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -52,8 +52,10 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) -#define MLX5E_HW2SW_MTU(hwmtu) ((hwmtu) - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) -#define MLX5E_SW2HW_MTU(swmtu) ((swmtu) + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) +#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) + +#define MLX5E_HW2SW_MTU(priv, hwmtu) ((hwmtu) - ((priv)->hard_mtu)) +#define MLX5E_SW2HW_MTU(priv, swmtu) ((swmtu) + ((priv)->hard_mtu)) #define MLX5E_MAX_NUM_TC 8 @@ -70,6 +72,8 @@ #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 #define MLX5_RX_HEADROOM NET_SKB_PAD +#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ @@ -213,6 +217,7 @@ struct mlx5e_cq_moder { struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; + u16 rq_headroom; u8 mpwqe_log_stride_sz; u8 mpwqe_log_num_strides; u8 log_rq_size; @@ -258,9 +263,18 @@ struct mlx5e_dcbx { /* The only setting that cannot be read from FW */ u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; + u8 cap; }; #endif +#define MAX_PIN_NUM 8 +struct mlx5e_pps { + u8 pin_caps[MAX_PIN_NUM]; + struct work_struct out_work; + u64 start[MAX_PIN_NUM]; + u8 enabled; +}; + struct mlx5e_tstamp { rwlock_t lock; struct cyclecounter cycles; @@ -272,15 +286,16 @@ struct mlx5e_tstamp { struct mlx5_core_dev *mdev; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; - u8 *pps_pin_caps; + struct mlx5e_pps pps_info; }; enum { MLX5E_RQ_STATE_ENABLED, - MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, MLX5E_RQ_STATE_AM, }; +#define MLX5E_TEST_BIT(state, nr) (state & BIT(nr)) + struct mlx5e_cq { /* data path - accessed per cqe */ struct mlx5_cqwq wq; @@ -323,11 +338,11 @@ struct mlx5e_sq_dma { enum { MLX5E_SQ_STATE_ENABLED, + MLX5E_SQ_STATE_IPSEC, }; struct mlx5e_sq_wqe_info { u8 opcode; - u8 num_wqebbs; }; struct mlx5e_txqsq { @@ -403,13 +418,8 @@ struct mlx5e_xdpsq { struct mlx5e_icosq { /* data path */ - /* dirtied @completion */ - u16 cc; - /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; - u32 dma_fifo_pc; - u16 prev_cc; struct mlx5e_cq cq; @@ -423,7 +433,6 @@ struct mlx5e_icosq { void __iomem *uar_map; u32 sqn; u16 edge; - struct device *pdev; __be32 mkey_be; unsigned long state; @@ -443,6 +452,11 @@ struct mlx5e_dma_info { dma_addr_t addr; }; +struct mlx5e_wqe_frag_info { + struct mlx5e_dma_info di; + u32 offset; +}; + struct mlx5e_umr_dma_info { __be64 *mtt; dma_addr_t mtt_addr; @@ -487,7 +501,7 @@ struct mlx5e_rx_am { /* Adaptive Moderation */ */ #define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT) -#define MLX5E_CACHE_SIZE (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) +#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) struct mlx5e_page_cache { u32 head; u32 tail; @@ -496,7 +510,7 @@ struct mlx5e_page_cache { struct mlx5e_rq; typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); -typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq*, struct mlx5e_rx_wqe*, u16); +typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); struct mlx5e_rq { @@ -504,19 +518,29 @@ struct mlx5e_rq { struct mlx5_wq_ll wq; union { - struct mlx5e_dma_info *dma_info; + struct { + struct mlx5e_wqe_frag_info *frag_info; + u32 frag_sz; /* max possible skb frag_sz */ + union { + bool page_reuse; + bool xdp_xmit; + }; + } wqe; struct { struct mlx5e_mpw_info *info; void *mtt_no_align; + u16 num_strides; + u8 log_stride_sz; + bool umr_in_progress; } mpwqe; }; struct { + u16 headroom; u8 page_order; - u32 wqe_sz; /* wqe data buffer size */ u8 map_dir; /* dma map direction */ } buff; - __be32 mkey_be; + struct mlx5e_channel *channel; struct device *pdev; struct net_device *netdev; struct mlx5e_tstamp *tstamp; @@ -525,12 +549,11 @@ struct mlx5e_rq { struct mlx5e_page_cache page_cache; mlx5e_fp_handle_rx_cqe handle_rx_cqe; - mlx5e_fp_alloc_wqe alloc_wqe; + mlx5e_fp_post_rx_wqes post_wqes; mlx5e_fp_dealloc_wqe dealloc_wqe; unsigned long state; int ix; - u16 rx_headroom; struct mlx5e_rx_am am; /* Adaptive Moderation */ @@ -540,19 +563,13 @@ struct mlx5e_rq { /* control */ struct mlx5_wq_ctrl wq_ctrl; + __be32 mkey_be; u8 wq_type; - u32 mpwqe_stride_sz; - u32 mpwqe_num_strides; u32 rqn; - struct mlx5e_channel *channel; struct mlx5_core_dev *mdev; struct mlx5_core_mkey umr_mkey; } ____cacheline_aligned_in_smp; -enum channel_flags { - MLX5E_CHANNEL_NAPI_SCHED = 1, -}; - struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; @@ -564,14 +581,15 @@ struct mlx5e_channel { struct net_device *netdev; __be32 mkey_be; u8 num_tc; - unsigned long flags; + + /* data path - accessed per napi poll */ + struct irq_desc *irq_desc; /* control */ struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; struct mlx5e_tstamp *tstamp; int ix; - int cpu; }; struct mlx5e_channels { @@ -596,6 +614,12 @@ enum mlx5e_traffic_types { MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; +enum mlx5e_tunnel_types { + MLX5E_TT_IPV4_GRE, + MLX5E_TT_IPV6_GRE, + MLX5E_NUM_TUNNEL_TT, +}; + enum { MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, @@ -625,6 +649,8 @@ struct mlx5e_tc_table { struct rhashtable_params ht_params; struct rhashtable ht; + + DECLARE_HASHTABLE(mod_hdr_tbl, 8); }; struct mlx5e_vlan_table { @@ -653,6 +679,7 @@ struct mlx5e_l2_table { struct mlx5e_ttc_table { struct mlx5e_flow_table ft; struct mlx5_flow_handle *rules[MLX5E_NUM_TT]; + struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT]; }; #define ARFS_HASH_SHIFT BITS_PER_BYTE @@ -685,6 +712,7 @@ enum { MLX5E_VLAN_FT_LEVEL = 0, MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, + MLX5E_INNER_TTC_FT_LEVEL, MLX5E_ARFS_FT_LEVEL }; @@ -710,6 +738,7 @@ struct mlx5e_flow_steering { struct mlx5e_vlan_table vlan; struct mlx5e_l2_table l2; struct mlx5e_ttc_table ttc; + struct mlx5e_ttc_table inner_ttc; struct mlx5e_arfs_tables arfs; }; @@ -743,8 +772,10 @@ struct mlx5e_priv { u32 tisn[MLX5E_MAX_NUM_TC]; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32 tx_rates[MLX5E_MAX_NUM_SQS]; + int hard_mtu; struct mlx5e_flow_steering fs; struct mlx5e_vxlan_db vxlan; @@ -766,6 +797,9 @@ struct mlx5e_priv { const struct mlx5e_profile *profile; void *ppriv; +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5e_ipsec *ipsec; +#endif }; struct mlx5e_profile { @@ -780,6 +814,7 @@ struct mlx5e_profile { void (*enable)(struct mlx5e_priv *priv); void (*disable)(struct mlx5e_priv *priv); void (*update_stats)(struct mlx5e_priv *priv); + void (*update_carrier)(struct mlx5e_priv *priv); int (*max_nch)(struct mlx5_core_dev *mdev); struct { mlx5e_fp_handle_rx_cqe handle_rx_cqe; @@ -808,19 +843,16 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); -void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq); void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); -struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_rx_am(struct mlx5e_rq *rq); void mlx5e_rx_am_work(struct work_struct *work); struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode); -void mlx5e_update_stats(struct mlx5e_priv *priv); +void mlx5e_update_stats(struct mlx5e_priv *priv, bool full); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); @@ -847,8 +879,8 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv); void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv); void mlx5e_pps_event_handler(struct mlx5e_priv *priv, struct ptp_clock_event *event); -int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); -int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, @@ -873,7 +905,7 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, struct mlx5e_redirect_rqt_param rrp); void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, enum mlx5e_traffic_types tt, - void *tirc); + void *tirc, bool inner); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -892,8 +924,7 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); -void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, - u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels); int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); @@ -902,6 +933,12 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u8 rq_type); +static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); +} + static inline struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) { @@ -1019,6 +1056,31 @@ int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); +/* ethtool helpers */ +void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, + struct ethtool_drvinfo *drvinfo); +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, + uint32_t stringset, uint8_t *data); +int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset); +void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, + struct ethtool_stats *stats, u64 *data); +void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param); +int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param); +void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch); +int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch); +int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal); +int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal); +int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, + struct ethtool_ts_info *info); +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash); + /* mlx5e generic netdev management API */ struct net_device* mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile new file mode 100644 index 000000000000..d8e17110f25d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c new file mode 100644 index 000000000000..bac5103efad3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -0,0 +1,461 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <crypto/internal/geniv.h> +#include <crypto/aead.h> +#include <linux/inetdevice.h> +#include <linux/netdevice.h> +#include <linux/module.h> + +#include "en.h" +#include "accel/ipsec.h" +#include "en_accel/ipsec.h" +#include "en_accel/ipsec_rxtx.h" + +struct mlx5e_ipsec_sa_entry { + struct hlist_node hlist; /* Item in SADB_RX hashtable */ + unsigned int handle; /* Handle in SADB_RX */ + struct xfrm_state *x; + struct mlx5e_ipsec *ipsec; + void *context; +}; + +struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec, + unsigned int handle) +{ + struct mlx5e_ipsec_sa_entry *sa_entry; + struct xfrm_state *ret = NULL; + + rcu_read_lock(); + hash_for_each_possible_rcu(ipsec->sadb_rx, sa_entry, hlist, handle) + if (sa_entry->handle == handle) { + ret = sa_entry->x; + xfrm_state_hold(ret); + break; + } + rcu_read_unlock(); + + return ret; +} + +static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + unsigned long flags; + int ret; + + spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); + ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL); + if (ret < 0) + goto out; + + sa_entry->handle = ret; + hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle); + ret = 0; + +out: + spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); + return ret; +} + +static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + unsigned long flags; + + spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); + hash_del_rcu(&sa_entry->hlist); + spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); +} + +static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + unsigned long flags; + + /* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */ + synchronize_rcu(); + spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); + ida_simple_remove(&ipsec->halloc, sa_entry->handle); + spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); +} + +static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x) +{ + unsigned int key_len = (x->aead->alg_key_len + 7) / 8 - 4; + + switch (key_len) { + case 16: + return MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128; + case 32: + return MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128; + default: + netdev_warn(x->xso.dev, "Bad key len: %d for alg %s\n", + key_len, x->aead->alg_name); + return -1; + } +} + +static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_ipsec_sa *hw_sa) +{ + struct xfrm_state *x = sa_entry->x; + struct aead_geniv_ctx *geniv_ctx; + unsigned int crypto_data_len; + struct crypto_aead *aead; + unsigned int key_len; + int ivsize; + + memset(hw_sa, 0, sizeof(*hw_sa)); + + if (op == MLX5_IPSEC_CMD_ADD_SA) { + crypto_data_len = (x->aead->alg_key_len + 7) / 8; + key_len = crypto_data_len - 4; /* 4 bytes salt at end */ + aead = x->data; + geniv_ctx = crypto_aead_ctx(aead); + ivsize = crypto_aead_ivsize(aead); + + memcpy(&hw_sa->key_enc, x->aead->alg_key, key_len); + /* Duplicate 128 bit key twice according to HW layout */ + if (key_len == 16) + memcpy(&hw_sa->key_enc[16], x->aead->alg_key, key_len); + memcpy(&hw_sa->gcm.salt_iv, geniv_ctx->salt, ivsize); + hw_sa->gcm.salt = *((__be32 *)(x->aead->alg_key + key_len)); + } + + hw_sa->cmd = htonl(op); + hw_sa->flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN; + if (x->props.family == AF_INET) { + hw_sa->sip[3] = x->props.saddr.a4; + hw_sa->dip[3] = x->id.daddr.a4; + hw_sa->sip_masklen = 32; + hw_sa->dip_masklen = 32; + } else { + memcpy(hw_sa->sip, x->props.saddr.a6, sizeof(hw_sa->sip)); + memcpy(hw_sa->dip, x->id.daddr.a6, sizeof(hw_sa->dip)); + hw_sa->sip_masklen = 128; + hw_sa->dip_masklen = 128; + hw_sa->flags |= MLX5_IPSEC_SADB_IPV6; + } + hw_sa->spi = x->id.spi; + hw_sa->sw_sa_handle = htonl(sa_entry->handle); + switch (x->id.proto) { + case IPPROTO_ESP: + hw_sa->flags |= MLX5_IPSEC_SADB_IP_ESP; + break; + case IPPROTO_AH: + hw_sa->flags |= MLX5_IPSEC_SADB_IP_AH; + break; + default: + break; + } + hw_sa->enc_mode = mlx5e_ipsec_enc_mode(x); + if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) + hw_sa->flags |= MLX5_IPSEC_SADB_DIR_SX; +} + +static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) +{ + struct net_device *netdev = x->xso.dev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + + if (x->props.aalgo != SADB_AALG_NONE) { + netdev_info(netdev, "Cannot offload authenticated xfrm states\n"); + return -EINVAL; + } + if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) { + netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n"); + return -EINVAL; + } + if (x->props.calgo != SADB_X_CALG_NONE) { + netdev_info(netdev, "Cannot offload compressed xfrm states\n"); + return -EINVAL; + } + if (x->props.flags & XFRM_STATE_ESN) { + netdev_info(netdev, "Cannot offload ESN xfrm states\n"); + return -EINVAL; + } + if (x->props.family != AF_INET && + x->props.family != AF_INET6) { + netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n"); + return -EINVAL; + } + if (x->props.mode != XFRM_MODE_TRANSPORT && + x->props.mode != XFRM_MODE_TUNNEL) { + dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n"); + return -EINVAL; + } + if (x->id.proto != IPPROTO_ESP) { + netdev_info(netdev, "Only ESP xfrm state may be offloaded\n"); + return -EINVAL; + } + if (x->encap) { + netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n"); + return -EINVAL; + } + if (!x->aead) { + netdev_info(netdev, "Cannot offload xfrm states without aead\n"); + return -EINVAL; + } + if (x->aead->alg_icv_len != 128) { + netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n"); + return -EINVAL; + } + if ((x->aead->alg_key_len != 128 + 32) && + (x->aead->alg_key_len != 256 + 32)) { + netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); + return -EINVAL; + } + if (x->tfcpad) { + netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n"); + return -EINVAL; + } + if (!x->geniv) { + netdev_info(netdev, "Cannot offload xfrm states without geniv\n"); + return -EINVAL; + } + if (strcmp(x->geniv, "seqiv")) { + netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n"); + return -EINVAL; + } + if (x->props.family == AF_INET6 && + !(mlx5_accel_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_IPV6)) { + netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n"); + return -EINVAL; + } + return 0; +} + +static int mlx5e_xfrm_add_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = NULL; + struct net_device *netdev = x->xso.dev; + struct mlx5_accel_ipsec_sa hw_sa; + struct mlx5e_priv *priv; + void *context; + int err; + + priv = netdev_priv(netdev); + + err = mlx5e_xfrm_validate_state(x); + if (err) + return err; + + sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL); + if (!sa_entry) { + err = -ENOMEM; + goto out; + } + + sa_entry->x = x; + sa_entry->ipsec = priv->ipsec; + + /* Add the SA to handle processed incoming packets before the add SA + * completion was received + */ + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) { + err = mlx5e_ipsec_sadb_rx_add(sa_entry); + if (err) { + netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err); + goto err_entry; + } + } + + mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa); + context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa); + if (IS_ERR(context)) { + err = PTR_ERR(context); + goto err_sadb_rx; + } + + err = mlx5_accel_ipsec_sa_cmd_wait(context); + if (err) + goto err_sadb_rx; + + x->xso.offload_handle = (unsigned long)sa_entry; + goto out; + +err_sadb_rx: + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) { + mlx5e_ipsec_sadb_rx_del(sa_entry); + mlx5e_ipsec_sadb_rx_free(sa_entry); + } +err_entry: + kfree(sa_entry); +out: + return err; +} + +static void mlx5e_xfrm_del_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry; + struct mlx5_accel_ipsec_sa hw_sa; + void *context; + + if (!x->xso.offload_handle) + return; + + sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; + WARN_ON(sa_entry->x != x); + + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) + mlx5e_ipsec_sadb_rx_del(sa_entry); + + mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa); + context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa); + if (IS_ERR(context)) + return; + + sa_entry->context = context; +} + +static void mlx5e_xfrm_free_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry; + int res; + + if (!x->xso.offload_handle) + return; + + sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; + WARN_ON(sa_entry->x != x); + + res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context); + sa_entry->context = NULL; + if (res) { + /* Leftover object will leak */ + return; + } + + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) + mlx5e_ipsec_sadb_rx_free(sa_entry); + + kfree(sa_entry); +} + +int mlx5e_ipsec_init(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec *ipsec = NULL; + + if (!MLX5_IPSEC_DEV(priv->mdev)) { + netdev_dbg(priv->netdev, "Not an IPSec offload device\n"); + return 0; + } + + ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL); + if (!ipsec) + return -ENOMEM; + + hash_init(ipsec->sadb_rx); + spin_lock_init(&ipsec->sadb_rx_lock); + ida_init(&ipsec->halloc); + ipsec->en_priv = priv; + ipsec->en_priv->ipsec = ipsec; + netdev_dbg(priv->netdev, "IPSec attached to netdevice\n"); + return 0; +} + +void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec *ipsec = priv->ipsec; + + if (!ipsec) + return; + + ida_destroy(&ipsec->halloc); + kfree(ipsec); + priv->ipsec = NULL; +} + +static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + if (x->props.family == AF_INET) { + /* Offload with IPv4 options is not supported yet */ + if (ip_hdr(skb)->ihl > 5) + return false; + } else { + /* Offload with IPv6 extension headers is not support yet */ + if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) + return false; + } + + return true; +} + +static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { + .xdo_dev_state_add = mlx5e_xfrm_add_state, + .xdo_dev_state_delete = mlx5e_xfrm_del_state, + .xdo_dev_state_free = mlx5e_xfrm_free_state, + .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, +}; + +void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *netdev = priv->netdev; + + if (!priv->ipsec) + return; + + if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_ESP) || + !MLX5_CAP_ETH(mdev, swp)) { + mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n"); + return; + } + + mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); + netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; + netdev->features |= NETIF_F_HW_ESP; + netdev->hw_enc_features |= NETIF_F_HW_ESP; + + if (!MLX5_CAP_ETH(mdev, swp_csum)) { + mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n"); + return; + } + + netdev->features |= NETIF_F_HW_ESP_TX_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM; + + if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) || + !MLX5_CAP_ETH(mdev, swp_lso)) { + mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n"); + return; + } + + mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n"); + netdev->features |= NETIF_F_GSO_ESP; + netdev->hw_features |= NETIF_F_GSO_ESP; + netdev->hw_enc_features |= NETIF_F_GSO_ESP; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h new file mode 100644 index 000000000000..56e00baf16cc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_IPSEC_H__ +#define __MLX5E_IPSEC_H__ + +#ifdef CONFIG_MLX5_EN_IPSEC + +#include <linux/mlx5/device.h> +#include <net/xfrm.h> +#include <linux/idr.h> + +#define MLX5E_IPSEC_SADB_RX_BITS 10 +#define MLX5E_METADATA_ETHER_TYPE (0x8CE4) +#define MLX5E_METADATA_ETHER_LEN 8 + +struct mlx5e_priv; + +struct mlx5e_ipsec_sw_stats { + atomic64_t ipsec_rx_drop_sp_alloc; + atomic64_t ipsec_rx_drop_sadb_miss; + atomic64_t ipsec_rx_drop_syndrome; + atomic64_t ipsec_tx_drop_bundle; + atomic64_t ipsec_tx_drop_no_state; + atomic64_t ipsec_tx_drop_not_ip; + atomic64_t ipsec_tx_drop_trailer; + atomic64_t ipsec_tx_drop_metadata; +}; + +struct mlx5e_ipsec_stats { + u64 ipsec_dec_in_packets; + u64 ipsec_dec_out_packets; + u64 ipsec_dec_bypass_packets; + u64 ipsec_enc_in_packets; + u64 ipsec_enc_out_packets; + u64 ipsec_enc_bypass_packets; + u64 ipsec_dec_drop_packets; + u64 ipsec_dec_auth_fail_packets; + u64 ipsec_enc_drop_packets; + u64 ipsec_add_sa_success; + u64 ipsec_add_sa_fail; + u64 ipsec_del_sa_success; + u64 ipsec_del_sa_fail; + u64 ipsec_cmd_drop; +}; + +struct mlx5e_ipsec { + struct mlx5e_priv *en_priv; + DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS); + spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */ + struct ida halloc; + struct mlx5e_ipsec_sw_stats sw_stats; + struct mlx5e_ipsec_stats stats; +}; + +void mlx5e_ipsec_build_inverse_table(void); +int mlx5e_ipsec_init(struct mlx5e_priv *priv); +void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv); +void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); + +int mlx5e_ipsec_get_count(struct mlx5e_priv *priv); +int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data); +void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv); +int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data); + +struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev, + unsigned int handle); + +#else + +static inline void mlx5e_ipsec_build_inverse_table(void) +{ +} + +static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) +{ +} + +static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ipsec_get_count(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, + uint8_t *data) +{ + return 0; +} + +static inline void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + return 0; +} + +#endif + +#endif /* __MLX5E_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c new file mode 100644 index 000000000000..4614ddfa91eb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <crypto/aead.h> +#include <net/xfrm.h> +#include <net/esp.h> + +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/ipsec.h" +#include "en.h" + +enum { + MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11, + MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12, +}; + +struct mlx5e_ipsec_rx_metadata { + unsigned char reserved; + __be32 sa_handle; +} __packed; + +enum { + MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8, + MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9, +}; + +struct mlx5e_ipsec_tx_metadata { + __be16 mss_inv; /* 1/MSS in 16bit fixed point, only for LSO */ + __be16 seq; /* LSBs of the first TCP seq, only for LSO */ + u8 esp_next_proto; /* Next protocol of ESP */ +} __packed; + +struct mlx5e_ipsec_metadata { + unsigned char syndrome; + union { + unsigned char raw[5]; + /* from FPGA to host, on successful decrypt */ + struct mlx5e_ipsec_rx_metadata rx; + /* from host to FPGA */ + struct mlx5e_ipsec_tx_metadata tx; + } __packed content; + /* packet type ID field */ + __be16 ethertype; +} __packed; + +#define MAX_LSO_MSS 2048 + +/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */ +static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS]; + +static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb) +{ + return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size]; +} + +static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb) +{ + struct mlx5e_ipsec_metadata *mdata; + struct ethhdr *eth; + + if (unlikely(skb_cow_head(skb, sizeof(*mdata)))) + return ERR_PTR(-ENOMEM); + + eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata)); + skb->mac_header -= sizeof(*mdata); + mdata = (struct mlx5e_ipsec_metadata *)(eth + 1); + + memmove(skb->data, skb->data + sizeof(*mdata), + 2 * ETH_ALEN); + + eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE); + + memset(mdata->content.raw, 0, sizeof(mdata->content.raw)); + return mdata; +} + +static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) +{ + unsigned int alen = crypto_aead_authsize(x->data); + struct ipv6hdr *ipv6hdr = ipv6_hdr(skb); + struct iphdr *ipv4hdr = ip_hdr(skb); + unsigned int trailer_len; + u8 plen; + int ret; + + ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1); + if (unlikely(ret)) + return ret; + + trailer_len = alen + plen + 2; + + pskb_trim(skb, skb->len - trailer_len); + if (skb->protocol == htons(ETH_P_IP)) { + ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); + ip_send_check(ipv4hdr); + } else { + ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) - + trailer_len); + } + return 0; +} + +static void mlx5e_ipsec_set_swp(struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, u8 mode, + struct xfrm_offload *xo) +{ + u8 proto; + + /* Tunnel Mode: + * SWP: OutL3 InL3 InL4 + * Pkt: MAC IP ESP IP L4 + * + * Transport Mode: + * SWP: OutL3 InL4 + * InL3 + * Pkt: MAC IP ESP L4 + * + * Offsets are in 2-byte words, counting from start of frame + */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (skb->protocol == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + + if (mode == XFRM_MODE_TUNNEL) { + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + if (xo->proto == IPPROTO_IPV6) { + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + proto = inner_ipv6_hdr(skb)->nexthdr; + } else { + proto = inner_ip_hdr(skb)->protocol; + } + } else { + eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; + if (skb->protocol == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + proto = xo->proto; + } + switch (proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + /* Fall through */ + case IPPROTO_TCP: + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + } +} + +static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo) +{ + int iv_offset; + __be64 seqno; + + /* Place the SN in the IV field */ + seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); + iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr); + skb_store_bits(skb, iv_offset, &seqno, 8); +} + +static void mlx5e_ipsec_set_metadata(struct sk_buff *skb, + struct mlx5e_ipsec_metadata *mdata, + struct xfrm_offload *xo) +{ + struct ip_esp_hdr *esph; + struct tcphdr *tcph; + + if (skb_is_gso(skb)) { + /* Add LSO metadata indication */ + esph = ip_esp_hdr(skb); + tcph = inner_tcp_hdr(skb); + netdev_dbg(skb->dev, " Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n", + skb->network_header, + skb->transport_header, + skb->inner_network_header, + skb->inner_transport_header); + netdev_dbg(skb->dev, " Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n", + skb->len, skb_shinfo(skb)->gso_size, + ntohs(tcph->source), ntohs(tcph->dest), + ntohl(tcph->seq), ntohl(esph->seq_no)); + mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP; + mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb); + mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF); + } else { + mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD; + } + mdata->content.tx.esp_next_proto = xo->proto; + + netdev_dbg(skb->dev, " TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n", + mdata->syndrome, mdata->content.tx.esp_next_proto, + ntohs(mdata->content.tx.mss_inv), + ntohs(mdata->content.tx.seq)); +} + +struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct mlx5e_tx_wqe *wqe, + struct sk_buff *skb) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct xfrm_offload *xo = xfrm_offload(skb); + struct mlx5e_ipsec_metadata *mdata; + struct xfrm_state *x; + + if (!xo) + return skb; + + if (unlikely(skb->sp->len != 1)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle); + goto drop; + } + + x = xfrm_input_state(skb); + if (unlikely(!x)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state); + goto drop; + } + + if (unlikely(!x->xso.offload_handle || + (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip); + goto drop; + } + + if (!skb_is_gso(skb)) + if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer); + goto drop; + } + mdata = mlx5e_ipsec_add_metadata(skb); + if (unlikely(IS_ERR(mdata))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata); + goto drop; + } + mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo); + mlx5e_ipsec_set_iv(skb, xo); + mlx5e_ipsec_set_metadata(skb, mdata, xo); + + return skb; + +drop: + kfree_skb(skb); + return NULL; +} + +static inline struct xfrm_state * +mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, + struct mlx5e_ipsec_metadata *mdata) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct xfrm_offload *xo; + struct xfrm_state *xs; + u32 sa_handle; + + skb->sp = secpath_dup(skb->sp); + if (unlikely(!skb->sp)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc); + return NULL; + } + + sa_handle = be32_to_cpu(mdata->content.rx.sa_handle); + xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle); + if (unlikely(!xs)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss); + return NULL; + } + + skb->sp->xvec[skb->sp->len++] = xs; + skb->sp->olen++; + + xo = xfrm_offload(skb); + xo->flags = CRYPTO_DONE; + switch (mdata->syndrome) { + case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED: + xo->status = CRYPTO_SUCCESS; + break; + case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED: + xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED; + break; + default: + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome); + return NULL; + } + return xs; +} + +struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb) +{ + struct mlx5e_ipsec_metadata *mdata; + struct ethhdr *old_eth; + struct ethhdr *new_eth; + struct xfrm_state *xs; + __be16 *ethtype; + + /* Detect inline metadata */ + if (skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN) + return skb; + ethtype = (__be16 *)(skb->data + ETH_ALEN * 2); + if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE)) + return skb; + + /* Use the metadata */ + mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN); + xs = mlx5e_ipsec_build_sp(netdev, skb, mdata); + if (unlikely(!xs)) { + kfree_skb(skb); + return NULL; + } + + /* Remove the metadata from the buffer */ + old_eth = (struct ethhdr *)skb->data; + new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN); + memmove(new_eth, old_eth, 2 * ETH_ALEN); + /* Ethertype is already in its new place */ + skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN); + + return skb; +} + +bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, + netdev_features_t features) +{ + struct xfrm_state *x; + + if (skb->sp && skb->sp->len) { + x = skb->sp->xvec[0]; + if (x && x->xso.offload_handle) + return true; + } + return false; +} + +void mlx5e_ipsec_build_inverse_table(void) +{ + u16 mss_inv; + u32 mss; + + /* Calculate 1/x inverse table for use in GSO data path. + * Using this table, we provide the IPSec accelerator with the value of + * 1/gso_size so that it can infer the position of each segment inside + * the GSO, and increment the ESP sequence number, and generate the IV. + * The HW needs this value in Q0.16 fixed-point number format + */ + mlx5e_ipsec_inverse_table[1] = htons(0xFFFF); + for (mss = 2; mss < MAX_LSO_MSS; mss++) { + mss_inv = div_u64(1ULL << 32, mss) >> 16; + mlx5e_ipsec_inverse_table[mss] = htons(mss_inv); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h new file mode 100644 index 000000000000..e37ae2598dbb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_IPSEC_RXTX_H__ +#define __MLX5E_IPSEC_RXTX_H__ + +#ifdef CONFIG_MLX5_EN_IPSEC + +#include <linux/skbuff.h> +#include "en.h" + +struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb); +void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +void mlx5e_ipsec_inverse_table_init(void); +bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, + netdev_features_t features); +struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct mlx5e_tx_wqe *wqe, + struct sk_buff *skb); + +#endif /* CONFIG_MLX5_EN_IPSEC */ + +#endif /* __MLX5E_IPSEC_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c new file mode 100644 index 000000000000..6fea59223dc4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/ethtool.h> +#include <net/sock.h> + +#include "en.h" +#include "accel/ipsec.h" +#include "fpga/sdk.h" +#include "en_accel/ipsec.h" + +static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) }, +}; + +static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) }, +}; + +#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ + atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) + +#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc) +#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc) + +#define NUM_IPSEC_COUNTERS (NUM_IPSEC_HW_COUNTERS + NUM_IPSEC_SW_COUNTERS) + +int mlx5e_ipsec_get_count(struct mlx5e_priv *priv) +{ + if (!priv->ipsec) + return 0; + + return NUM_IPSEC_COUNTERS; +} + +int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + unsigned int i, idx = 0; + + if (!priv->ipsec) + return 0; + + for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_ipsec_hw_stats_desc[i].format); + + for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_ipsec_sw_stats_desc[i].format); + + return NUM_IPSEC_COUNTERS; +} + +void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv) +{ + int ret; + + if (!priv->ipsec) + return; + + ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats, + NUM_IPSEC_HW_COUNTERS); + if (ret) + memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats)); +} + +int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + int i, idx = 0; + + if (!priv->ipsec) + return 0; + + for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats, + mlx5e_ipsec_hw_stats_desc, i); + + for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats, + mlx5e_ipsec_sw_stats_desc, i); + + return NUM_IPSEC_COUNTERS; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index c8a005326e30..12d3ced61114 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -178,34 +178,26 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, struct mlx5_flow_destination dest; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; + enum mlx5e_traffic_types tt; int err = 0; - spec = mlx5_vzalloc(sizeof(*spec)); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); err = -ENOMEM; goto out; } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - switch (type) { - case ARFS_IPV4_TCP: - dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn; - break; - case ARFS_IPV4_UDP: - dest.tir_num = tir[MLX5E_TT_IPV4_UDP].tirn; - break; - case ARFS_IPV6_TCP: - dest.tir_num = tir[MLX5E_TT_IPV6_TCP].tirn; - break; - case ARFS_IPV6_UDP: - dest.tir_num = tir[MLX5E_TT_IPV6_UDP].tirn; - break; - default: + tt = arfs_get_tt(type); + if (tt == -EINVAL) { + netdev_err(priv->netdev, "%s: bad arfs_type: %d\n", + __func__, type); err = -EINVAL; goto out; } + dest.tir_num = tir[tt].tirn; + arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec, &flow_act, &dest, 1); @@ -237,7 +229,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kvfree(ft->g); kvfree(in); @@ -481,9 +473,8 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, struct mlx5_flow_table *ft; int err = 0; - spec = mlx5_vzalloc(sizeof(*spec)); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); err = -ENOMEM; goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index e706a87fc8b2..84dd63e74041 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -53,6 +53,15 @@ enum { MLX5E_EVENT_MODE_ONCE_TILL_ARM = 0x2, }; +enum { + MLX5E_MTPPS_FS_ENABLE = BIT(0x0), + MLX5E_MTPPS_FS_PATTERN = BIT(0x2), + MLX5E_MTPPS_FS_PIN_MODE = BIT(0x3), + MLX5E_MTPPS_FS_TIME_STAMP = BIT(0x4), + MLX5E_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), + MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), +}; + void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp, struct skb_shared_hwtstamps *hwts) { @@ -73,22 +82,50 @@ static u64 mlx5e_read_internal_timer(const struct cyclecounter *cc) return mlx5_read_internal_timer(tstamp->mdev) & cc->mask; } +static void mlx5e_pps_out(struct work_struct *work) +{ + struct mlx5e_pps *pps_info = container_of(work, struct mlx5e_pps, + out_work); + struct mlx5e_tstamp *tstamp = container_of(pps_info, struct mlx5e_tstamp, + pps_info); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + unsigned long flags; + int i; + + for (i = 0; i < tstamp->ptp_info.n_pins; i++) { + u64 tstart; + + write_lock_irqsave(&tstamp->lock, flags); + tstart = tstamp->pps_info.start[i]; + tstamp->pps_info.start[i] = 0; + write_unlock_irqrestore(&tstamp->lock, flags); + if (!tstart) + continue; + + MLX5_SET(mtpps_reg, in, pin, i); + MLX5_SET64(mtpps_reg, in, time_stamp, tstart); + MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_TIME_STAMP); + mlx5_set_mtpps(tstamp->mdev, in, sizeof(in)); + } +} + static void mlx5e_timestamp_overflow(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp, overflow_work); + struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp); unsigned long flags; write_lock_irqsave(&tstamp->lock, flags); timecounter_read(&tstamp->clock); write_unlock_irqrestore(&tstamp->lock, flags); - schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period); + queue_delayed_work(priv->wq, &tstamp->overflow_work, + msecs_to_jiffies(tstamp->overflow_period * 1000)); } -int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) { - struct mlx5e_priv *priv = netdev_priv(dev); struct hwtstamp_config config; int err; @@ -128,11 +165,12 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: /* Disable CQE compression */ - netdev_warn(dev, "Disabling cqe compression"); + netdev_warn(priv->netdev, "Disabling cqe compression"); err = mlx5e_modify_rx_cqe_compression_locked(priv, false); if (err) { - netdev_err(dev, "Failed disabling cqe compression err=%d\n", err); + netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); mutex_unlock(&priv->state_lock); return err; } @@ -150,9 +188,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) sizeof(config)) ? -EFAULT : 0; } -int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr) +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) { - struct mlx5e_priv *priv = netdev_priv(dev); struct hwtstamp_config *cfg = &priv->tstamp.hwtstamp_config; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) @@ -214,18 +251,6 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) int neg_adj = 0; struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, ptp_info); - struct mlx5e_priv *priv = - container_of(tstamp, struct mlx5e_priv, tstamp); - - if (MLX5_CAP_GEN(priv->mdev, pps_modify)) { - u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - - /* For future use need to add a loop for finding all 1PPS out pins */ - MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT); - MLX5_SET(mtpps_reg, in, out_periodic_adjustment, delta & 0xFFFF); - - mlx5_set_mtpps(priv->mdev, in, sizeof(in)); - } if (delta < 0) { neg_adj = 1; @@ -254,12 +279,13 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp, struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u32 field_select = 0; + u8 pin_mode = 0; u8 pattern = 0; int pin = -1; int err = 0; - if (!MLX5_CAP_GEN(priv->mdev, pps) || - !MLX5_CAP_GEN(priv->mdev, pps_modify)) + if (!MLX5_PPS_CAP(priv->mdev)) return -EOPNOTSUPP; if (rq->extts.index >= tstamp->ptp_info.n_pins) @@ -269,15 +295,21 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp, pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index); if (pin < 0) return -EBUSY; + pin_mode = MLX5E_PIN_MODE_IN; + pattern = !!(rq->extts.flags & PTP_FALLING_EDGE); + field_select = MLX5E_MTPPS_FS_PIN_MODE | + MLX5E_MTPPS_FS_PATTERN | + MLX5E_MTPPS_FS_ENABLE; + } else { + pin = rq->extts.index; + field_select = MLX5E_MTPPS_FS_ENABLE; } - if (rq->extts.flags & PTP_FALLING_EDGE) - pattern = 1; - MLX5_SET(mtpps_reg, in, pin, pin); - MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); MLX5_SET(mtpps_reg, in, pattern, pattern); MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET(mtpps_reg, in, field_select, field_select); err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); if (err) @@ -296,14 +328,18 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp, struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - u64 nsec_now, nsec_delta, time_stamp; + u64 nsec_now, nsec_delta, time_stamp = 0; u64 cycles_now, cycles_delta; struct timespec64 ts; unsigned long flags; + u32 field_select = 0; + u8 pin_mode = 0; + u8 pattern = 0; int pin = -1; + int err = 0; s64 ns; - if (!MLX5_CAP_GEN(priv->mdev, pps_modify)) + if (!MLX5_PPS_CAP(priv->mdev)) return -EOPNOTSUPP; if (rq->perout.index >= tstamp->ptp_info.n_pins) @@ -314,32 +350,60 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp, rq->perout.index); if (pin < 0) return -EBUSY; - } - ts.tv_sec = rq->perout.period.sec; - ts.tv_nsec = rq->perout.period.nsec; - ns = timespec64_to_ns(&ts); - if (on) + pin_mode = MLX5E_PIN_MODE_OUT; + pattern = MLX5E_OUT_PATTERN_PERIODIC; + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + ns = timespec64_to_ns(&ts); + if ((ns >> 1) != 500000000LL) return -EINVAL; - ts.tv_sec = rq->perout.start.sec; - ts.tv_nsec = rq->perout.start.nsec; - ns = timespec64_to_ns(&ts); - cycles_now = mlx5_read_internal_timer(tstamp->mdev); - write_lock_irqsave(&tstamp->lock, flags); - nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); - nsec_delta = ns - nsec_now; - cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, - tstamp->cycles.mult); - write_unlock_irqrestore(&tstamp->lock, flags); - time_stamp = cycles_now + cycles_delta; + + ts.tv_sec = rq->perout.start.sec; + ts.tv_nsec = rq->perout.start.nsec; + ns = timespec64_to_ns(&ts); + cycles_now = mlx5_read_internal_timer(tstamp->mdev); + write_lock_irqsave(&tstamp->lock, flags); + nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); + nsec_delta = ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, + tstamp->cycles.mult); + write_unlock_irqrestore(&tstamp->lock, flags); + time_stamp = cycles_now + cycles_delta; + field_select = MLX5E_MTPPS_FS_PIN_MODE | + MLX5E_MTPPS_FS_PATTERN | + MLX5E_MTPPS_FS_ENABLE | + MLX5E_MTPPS_FS_TIME_STAMP; + } else { + pin = rq->perout.index; + field_select = MLX5E_MTPPS_FS_ENABLE; + } + MLX5_SET(mtpps_reg, in, pin, pin); - MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT); - MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); MLX5_SET(mtpps_reg, in, enable, on); MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); + MLX5_SET(mtpps_reg, in, field_select, field_select); + + err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + if (err) + return err; - return mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + return mlx5_set_mtppse(priv->mdev, pin, 0, + MLX5E_EVENT_MODE_REPETETIVE & on); +} + +static int mlx5e_pps_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5e_tstamp *tstamp = + container_of(ptp, struct mlx5e_tstamp, ptp_info); + + tstamp->pps_info.enabled = !!on; + return 0; } static int mlx5e_ptp_enable(struct ptp_clock_info *ptp, @@ -351,6 +415,8 @@ static int mlx5e_ptp_enable(struct ptp_clock_info *ptp, return mlx5e_extts_configure(ptp, rq, on); case PTP_CLK_REQ_PEROUT: return mlx5e_perout_configure(ptp, rq, on); + case PTP_CLK_REQ_PPS: + return mlx5e_pps_configure(ptp, rq, on); default: return -EOPNOTSUPP; } @@ -396,6 +462,7 @@ static int mlx5e_init_pin_config(struct mlx5e_tstamp *tstamp) return -ENOMEM; tstamp->ptp_info.enable = mlx5e_ptp_enable; tstamp->ptp_info.verify = mlx5e_ptp_verify; + tstamp->ptp_info.pps = 1; for (i = 0; i < tstamp->ptp_info.n_pins; i++) { snprintf(tstamp->ptp_info.pin_config[i].name, @@ -423,22 +490,56 @@ static void mlx5e_get_pps_caps(struct mlx5e_priv *priv, tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, cap_max_num_of_pps_out_pins); - tstamp->pps_pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); - tstamp->pps_pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); - tstamp->pps_pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); - tstamp->pps_pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); - tstamp->pps_pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); - tstamp->pps_pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); - tstamp->pps_pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); - tstamp->pps_pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); + tstamp->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); + tstamp->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); + tstamp->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); + tstamp->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); + tstamp->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); + tstamp->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); + tstamp->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); + tstamp->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); } void mlx5e_pps_event_handler(struct mlx5e_priv *priv, struct ptp_clock_event *event) { + struct net_device *netdev = priv->netdev; struct mlx5e_tstamp *tstamp = &priv->tstamp; + struct timespec64 ts; + u64 nsec_now, nsec_delta; + u64 cycles_now, cycles_delta; + int pin = event->index; + s64 ns; + unsigned long flags; - ptp_clock_event(tstamp->ptp, event); + switch (tstamp->ptp_info.pin_config[pin].func) { + case PTP_PF_EXTTS: + if (tstamp->pps_info.enabled) { + event->type = PTP_CLOCK_PPSUSR; + event->pps_times.ts_real = ns_to_timespec64(event->timestamp); + } else { + event->type = PTP_CLOCK_EXTTS; + } + ptp_clock_event(tstamp->ptp, event); + break; + case PTP_PF_PEROUT: + mlx5e_ptp_gettime(&tstamp->ptp_info, &ts); + cycles_now = mlx5_read_internal_timer(tstamp->mdev); + ts.tv_sec += 1; + ts.tv_nsec = 0; + ns = timespec64_to_ns(&ts); + write_lock_irqsave(&tstamp->lock, flags); + nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); + nsec_delta = ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, + tstamp->cycles.mult); + tstamp->pps_info.start[pin] = cycles_now + cycles_delta; + queue_work(priv->wq, &tstamp->pps_info.out_work); + write_unlock_irqrestore(&tstamp->lock, flags); + break; + default: + netdev_err(netdev, "%s: Unhandled event\n", __func__); + } } void mlx5e_timestamp_init(struct mlx5e_priv *priv) @@ -474,9 +575,10 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) do_div(ns, NSEC_PER_SEC / 2 / HZ); tstamp->overflow_period = ns; + INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out); INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow); if (tstamp->overflow_period) - schedule_delayed_work(&tstamp->overflow_work, 0); + queue_delayed_work(priv->wq, &tstamp->overflow_work, 0); else mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n"); @@ -485,16 +587,10 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp"); /* Initialize 1PPS data structures */ -#define MAX_PIN_NUM 8 - tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL); - if (tstamp->pps_pin_caps) { - if (MLX5_CAP_GEN(priv->mdev, pps)) - mlx5e_get_pps_caps(priv, tstamp); - if (tstamp->ptp_info.n_pins) - mlx5e_init_pin_config(tstamp); - } else { - mlx5_core_warn(priv->mdev, "1PPS initialization failed\n"); - } + if (MLX5_PPS_CAP(priv->mdev)) + mlx5e_get_pps_caps(priv, tstamp); + if (tstamp->ptp_info.n_pins) + mlx5e_init_pin_config(tstamp); tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, &priv->mdev->pdev->dev); @@ -517,8 +613,7 @@ void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv) priv->tstamp.ptp = NULL; } - kfree(tstamp->pps_pin_caps); - kfree(tstamp->ptp_info.pin_config); - + cancel_work_sync(&tstamp->pps_info.out_work); cancel_delayed_work_sync(&tstamp->overflow_work); + kfree(tstamp->ptp_info.pin_config); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index f1f17f7a3cd0..ece3fb147e3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -65,7 +65,7 @@ static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *in; int err; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -145,9 +145,8 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) int inlen; void *in; - inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 8fa23f6a1f67..51c4cc00a186 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -41,6 +41,11 @@ #define MLX5E_CEE_STATE_UP 1 #define MLX5E_CEE_STATE_DOWN 0 +enum { + MLX5E_VENDOR_TC_GROUP_NUM = 7, + MLX5E_LOWEST_PRIO_GROUP = 0, +}; + /* If dcbx mode is non-host set the dcbx mode to host. */ static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv, @@ -85,6 +90,9 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + u8 tc_group[IEEE_8021QAZ_MAX_TCS]; + bool is_tc_group_6_exist = false; + bool is_zero_bw_ets_tc = false; int err = 0; int i; @@ -96,37 +104,64 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]); if (err) return err; - } - for (i = 0; i < ets->ets_cap; i++) { + err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]); + if (err) + return err; + err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]); if (err) return err; + + if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC && + tc_group[i] == (MLX5E_LOWEST_PRIO_GROUP + 1)) + is_zero_bw_ets_tc = true; + + if (tc_group[i] == (MLX5E_VENDOR_TC_GROUP_NUM - 1)) + is_tc_group_6_exist = true; + } + + /* Report 0% ets tc if exits*/ + if (is_zero_bw_ets_tc) { + for (i = 0; i < ets->ets_cap; i++) + if (tc_group[i] == MLX5E_LOWEST_PRIO_GROUP) + ets->tc_tx_bw[i] = 0; + } + + /* Update tc_tsa based on fw setting*/ + for (i = 0; i < ets->ets_cap; i++) { if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC) priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + else if (tc_group[i] == MLX5E_VENDOR_TC_GROUP_NUM && + !is_tc_group_6_exist) + priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR; } - memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa)); return err; } -enum { - MLX5E_VENDOR_TC_GROUP_NUM = 7, - MLX5E_ETS_TC_GROUP_NUM = 0, -}; - static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) { bool any_tc_mapped_to_ets = false; + bool ets_zero_bw = false; int strict_group; int i; - for (i = 0; i <= max_tc; i++) - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) + for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { any_tc_mapped_to_ets = true; + if (!ets->tc_tx_bw[i]) + ets_zero_bw = true; + } + } - strict_group = any_tc_mapped_to_ets ? 1 : 0; + /* strict group has higher priority than ets group */ + strict_group = MLX5E_LOWEST_PRIO_GROUP; + if (any_tc_mapped_to_ets) + strict_group++; + if (ets_zero_bw) + strict_group++; for (i = 0; i <= max_tc; i++) { switch (ets->tc_tsa[i]) { @@ -137,7 +172,9 @@ static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) tc_group[i] = strict_group++; break; case IEEE_8021QAZ_TSA_ETS: - tc_group[i] = MLX5E_ETS_TC_GROUP_NUM; + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP; + if (ets->tc_tx_bw[i] && ets_zero_bw) + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP + 1; break; } } @@ -146,9 +183,23 @@ static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, u8 *tc_group, int max_tc) { + int bw_for_ets_zero_bw_tc = 0; + int last_ets_zero_bw_tc = -1; + int num_ets_zero_bw = 0; int i; for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS && + !ets->tc_tx_bw[i]) { + num_ets_zero_bw++; + last_ets_zero_bw_tc = i; + } + } + + if (num_ets_zero_bw) + bw_for_ets_zero_bw_tc = MLX5E_MAX_BW_ALLOC / num_ets_zero_bw; + + for (i = 0; i <= max_tc; i++) { switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_VENDOR: tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; @@ -157,12 +208,26 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; break; case IEEE_8021QAZ_TSA_ETS: - tc_tx_bw[i] = ets->tc_tx_bw[i]; + tc_tx_bw[i] = ets->tc_tx_bw[i] ? + ets->tc_tx_bw[i] : + bw_for_ets_zero_bw_tc; break; } } + + /* Make sure the total bw for ets zero bw group is 100% */ + if (last_ets_zero_bw_tc != -1) + tc_tx_bw[last_ets_zero_bw_tc] += + MLX5E_MAX_BW_ALLOC % num_ets_zero_bw; } +/* If there are ETS BW 0, + * Set ETS group # to 1 for all ETS non zero BW tcs. Their sum must be 100%. + * Set group #0 to all the ETS BW 0 tcs and + * equally splits the 100% BW between them + * Report both group #0 and #1 as ETS type. + * All the tcs in group #0 will be reported with 0% BW. + */ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) { struct mlx5_core_dev *mdev = priv->mdev; @@ -188,7 +253,6 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) return err; memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa)); - return err; } @@ -209,17 +273,9 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, } /* Validate Bandwidth Sum */ - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { - if (!ets->tc_tx_bw[i]) { - netdev_err(netdev, - "Failed to validate ETS: BW 0 is illegal\n"); - return -EINVAL; - } - + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) bw_sum += ets->tc_tx_bw[i]; - } - } if (bw_sum != 0 && bw_sum != 100) { netdev_err(netdev, @@ -288,13 +344,8 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_dcbx *dcbx = &priv->dcbx; - u8 mode = DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_VER_CEE; - - if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST) - mode |= DCB_CAP_DCBX_HOST; - return mode; + return priv->dcbx.cap; } static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode) @@ -312,6 +363,7 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode) /* set dcbx to fw controlled */ if (!mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_AUTO)) { dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO; + dcbx->cap &= ~DCB_CAP_DCBX_HOST; return 0; } @@ -324,6 +376,8 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode) if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev))) return 1; + dcbx->cap = mode; + return 0; } @@ -464,6 +518,8 @@ static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev, if (!perm_addr) return; + memset(perm_addr, 0xff, MAX_ADDR_LEN); + mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr); } @@ -533,8 +589,7 @@ static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev, static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev, int pgid, u8 *bw_pct) { - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; + struct ieee_ets ets; if (pgid >= CEE_DCBX_MAX_PGS) { netdev_err(netdev, @@ -542,8 +597,8 @@ static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev, return; } - if (mlx5_query_port_tc_bw_alloc(mdev, pgid, bw_pct)) - *bw_pct = 0; + mlx5e_dcbnl_ieee_getets(netdev, &ets); + *bw_pct = ets.tc_tx_bw[pgid]; } static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev, @@ -626,9 +681,9 @@ static u8 mlx5e_dcbnl_getcap(struct net_device *netdev, *cap = false; break; case DCB_CAP_ATTR_DCBX: - *cap = (DCB_CAP_DCBX_LLD_MANAGED | - DCB_CAP_DCBX_VER_CEE | - DCB_CAP_DCBX_STATIC); + *cap = priv->dcbx.cap | + DCB_CAP_DCBX_VER_CEE | + DCB_CAP_DCBX_VER_IEEE; break; default: *cap = 0; @@ -739,8 +794,6 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) ets.prio_tc[i] = i; } - memcpy(priv->dcbx.tc_tsa, ets.tc_tsa, sizeof(ets.tc_tsa)); - /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */ ets.prio_tc[0] = 1; ets.prio_tc[1] = 0; @@ -752,8 +805,16 @@ void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) { struct mlx5e_dcbx *dcbx = &priv->dcbx; + if (!MLX5_CAP_GEN(priv->mdev, qos)) + return; + if (MLX5_CAP_GEN(priv->mdev, dcbx)) mlx5e_dcbnl_query_dcbx_mode(priv, &dcbx->mode); + priv->dcbx.cap = DCB_CAP_DCBX_VER_CEE | + DCB_CAP_DCBX_VER_IEEE; + if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST) + priv->dcbx.cap |= DCB_CAP_DCBX_HOST; + mlx5e_ets_init(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 16486dff1493..d12e9fc0d76b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -31,15 +31,15 @@ */ #include "en.h" +#include "en_accel/ipsec.h" -static void mlx5e_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *drvinfo) +void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, + struct ethtool_drvinfo *drvinfo) { - struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; strlcpy(drvinfo->driver, DRIVER_NAME, sizeof(drvinfo->driver)); - strlcpy(drvinfo->version, DRIVER_VERSION " (" DRIVER_RELDATE ")", + strlcpy(drvinfo->version, DRIVER_VERSION, sizeof(drvinfo->version)); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d (%.16s)", @@ -49,6 +49,14 @@ static void mlx5e_get_drvinfo(struct net_device *dev, sizeof(drvinfo->bus_info)); } +static void mlx5e_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_drvinfo(priv, drvinfo); +} + struct ptys2ethtool_config { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); @@ -135,6 +143,9 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) u8 pfc_en_rx; int err; + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); return err ? 0 : pfc_en_tx | pfc_en_rx; @@ -147,6 +158,9 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) u32 tx_pause; int err; + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return false; + err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); return err ? false : rx_pause | tx_pause; @@ -160,10 +174,8 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \ NUM_PPORT_PER_PRIO_PFC_COUNTERS) -static int mlx5e_get_sset_count(struct net_device *dev, int sset) +int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { - struct mlx5e_priv *priv = netdev_priv(dev); - switch (sset) { case ETH_SS_STATS: return NUM_SW_COUNTERS + @@ -174,7 +186,8 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv) + ARRAY_SIZE(mlx5e_pme_status_desc) + - ARRAY_SIZE(mlx5e_pme_error_desc); + ARRAY_SIZE(mlx5e_pme_error_desc) + + mlx5e_ipsec_get_count(priv); case ETH_SS_PRIV_FLAGS: return ARRAY_SIZE(mlx5e_priv_flags); @@ -186,7 +199,14 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) } } -static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) +static int mlx5e_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_sset_count(priv, sset); +} + +static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) { int i, j, tc, prio, idx = 0; unsigned long pfc_combined; @@ -221,10 +241,22 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_phy_statistical_stats_desc[i].format); + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_eth_ext_stats_desc[i].format); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, pcie_perf_stats_desc[i].format); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc64[i].format); + + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stall_stats_desc[i].format); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, @@ -256,6 +288,9 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format); + /* IPSec counters */ + idx += mlx5e_ipsec_get_strings(priv, data + idx * ETH_GSTRING_LEN); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return; @@ -273,10 +308,8 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) priv->channel_tc2txq[i][tc]); } -static void mlx5e_get_strings(struct net_device *dev, - uint32_t stringset, uint8_t *data) +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) { - struct mlx5e_priv *priv = netdev_priv(dev); int i; switch (stringset) { @@ -297,10 +330,16 @@ static void mlx5e_get_strings(struct net_device *dev, } } -static void mlx5e_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) +static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_strings(priv, stringset, data); +} + +void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, + struct ethtool_stats *stats, u64 *data) +{ struct mlx5e_channels *channels; struct mlx5_priv *mlx5_priv; int i, j, tc, prio, idx = 0; @@ -311,7 +350,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_update_stats(priv); + mlx5e_update_stats(priv, true); channels = &priv->channels; mutex_unlock(&priv->state_lock); @@ -343,10 +382,22 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, pport_phy_statistical_stats_desc, i); + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, + pport_eth_ext_stats_desc, i); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, pcie_perf_stats_desc, i); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc64, i); + + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stall_stats_desc, i); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], @@ -378,6 +429,9 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, mlx5e_pme_error_desc, i); + /* IPSec counters */ + idx += mlx5e_ipsec_get_stats(priv, data + idx); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return; @@ -395,6 +449,15 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, sq_stats_desc, j); } +static void mlx5e_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type, int num_wqe) { @@ -439,10 +502,9 @@ static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type, return 1 << (order_base_2(num_wqes)); } -static void mlx5e_get_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) +void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param) { - struct mlx5e_priv *priv = netdev_priv(dev); int rq_wq_type = priv->channels.params.rq_wq_type; param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, @@ -453,10 +515,17 @@ static void mlx5e_get_ringparam(struct net_device *dev, param->tx_pending = 1 << priv->channels.params.log_sq_size; } -static int mlx5e_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) +static void mlx5e_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ringparam(priv, param); +} + +int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param) +{ int rq_wq_type = priv->channels.params.rq_wq_type; struct mlx5e_channels new_channels = {}; u32 rx_pending_wqes; @@ -468,12 +537,12 @@ static int mlx5e_set_ringparam(struct net_device *dev, int err = 0; if (param->rx_jumbo_pending) { - netdev_info(dev, "%s: rx_jumbo_pending not supported\n", + netdev_info(priv->netdev, "%s: rx_jumbo_pending not supported\n", __func__); return -EINVAL; } if (param->rx_mini_pending) { - netdev_info(dev, "%s: rx_mini_pending not supported\n", + netdev_info(priv->netdev, "%s: rx_mini_pending not supported\n", __func__); return -EINVAL; } @@ -486,13 +555,13 @@ static int mlx5e_set_ringparam(struct net_device *dev, param->rx_pending); if (param->rx_pending < min_rq_size) { - netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", + netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n", __func__, param->rx_pending, min_rq_size); return -EINVAL; } if (param->rx_pending > max_rq_size) { - netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", + netdev_info(priv->netdev, "%s: rx_pending (%d) > max (%d)\n", __func__, param->rx_pending, max_rq_size); return -EINVAL; @@ -501,19 +570,19 @@ static int mlx5e_set_ringparam(struct net_device *dev, num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes); if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && !MLX5E_VALID_NUM_MTTS(num_mtts)) { - netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", + netdev_info(priv->netdev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", __func__, param->rx_pending); return -EINVAL; } if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { - netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n", + netdev_info(priv->netdev, "%s: tx_pending (%d) < min (%d)\n", __func__, param->tx_pending, 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); return -EINVAL; } if (param->tx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE)) { - netdev_info(dev, "%s: tx_pending (%d) > max (%d)\n", + netdev_info(priv->netdev, "%s: tx_pending (%d) > max (%d)\n", __func__, param->tx_pending, 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE); return -EINVAL; @@ -549,26 +618,39 @@ unlock: return err; } -static void mlx5e_get_channels(struct net_device *dev, - struct ethtool_channels *ch) +static int mlx5e_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); + return mlx5e_ethtool_set_ringparam(priv, param); +} + +void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch) +{ ch->max_combined = priv->profile->max_nch(priv->mdev); ch->combined_count = priv->channels.params.num_channels; } -static int mlx5e_set_channels(struct net_device *dev, - struct ethtool_channels *ch) +static void mlx5e_get_channels(struct net_device *dev, + struct ethtool_channels *ch) { struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch) +{ unsigned int count = ch->combined_count; struct mlx5e_channels new_channels = {}; bool arfs_enabled; int err = 0; if (!count) { - netdev_info(dev, "%s: combined_count=0 not supported\n", + netdev_info(priv->netdev, "%s: combined_count=0 not supported\n", __func__); return -EINVAL; } @@ -580,8 +662,9 @@ static int mlx5e_set_channels(struct net_device *dev, new_channels.params = priv->channels.params; new_channels.params.num_channels = count; - mlx5e_build_default_indir_rqt(priv->mdev, new_channels.params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); + if (!netif_is_rxfh_configured(priv->netdev)) + mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; @@ -593,7 +676,7 @@ static int mlx5e_set_channels(struct net_device *dev, if (err) goto out; - arfs_enabled = dev->features & NETIF_F_NTUPLE; + arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE; if (arfs_enabled) mlx5e_arfs_disable(priv); @@ -603,7 +686,7 @@ static int mlx5e_set_channels(struct net_device *dev, if (arfs_enabled) { err = mlx5e_arfs_enable(priv); if (err) - netdev_err(dev, "%s: mlx5e_arfs_enable failed: %d\n", + netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n", __func__, err); } @@ -613,11 +696,17 @@ out: return err; } -static int mlx5e_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *coal) +static int mlx5e_set_channels(struct net_device *dev, + struct ethtool_channels *ch) { - struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_priv *priv = netdev_priv(dev); + return mlx5e_ethtool_set_channels(priv, ch); +} + +int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal) +{ if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; @@ -630,6 +719,14 @@ static int mlx5e_get_coalesce(struct net_device *netdev, return 0; } +static int mlx5e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal); +} + static void mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { @@ -653,10 +750,9 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc } } -static int mlx5e_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *coal) +int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; int err = 0; @@ -699,6 +795,14 @@ out: return err; } +static int mlx5e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal); +} + static void ptys2ethtool_supported_link(unsigned long *supported_modes, u32 eth_proto_cap) { @@ -723,24 +827,81 @@ static void ptys2ethtool_adver_link(unsigned long *advertising_modes, __ETHTOOL_LINK_MODE_MASK_NBITS); } -static void ptys2ethtool_supported_port(struct ethtool_link_ksettings *link_ksettings, - u32 eth_proto_cap) +static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings, + u32 eth_proto_cap, + u8 connector_type) { - if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) - | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) - | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) - | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) - | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) - | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { - ethtool_link_ksettings_add_link_mode(link_ksettings, supported, FIBRE); + if (!connector_type || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) { + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, + FIBRE); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, + FIBRE); + } + + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, + Backplane); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, + Backplane); + } + return; } - if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4) - | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) - | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) - | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) - | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) { - ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Backplane); + switch (connector_type) { + case MLX5E_PORT_TP: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, TP); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, TP); + break; + case MLX5E_PORT_AUI: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, AUI); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, AUI); + break; + case MLX5E_PORT_BNC: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, BNC); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, BNC); + break; + case MLX5E_PORT_MII: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, MII); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, MII); + break; + case MLX5E_PORT_FIBRE: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, FIBRE); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, FIBRE); + break; + case MLX5E_PORT_DA: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Backplane); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Backplane); + break; + case MLX5E_PORT_NONE: + case MLX5E_PORT_OTHER: + default: + break; } } @@ -791,7 +952,6 @@ static void get_supported(u32 eth_proto_cap, { unsigned long *supported = link_ksettings->link_modes.supported; - ptys2ethtool_supported_port(link_ksettings, eth_proto_cap); ptys2ethtool_supported_link(supported, eth_proto_cap); ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); } @@ -809,26 +969,44 @@ static void get_advertising(u32 eth_proto_cap, u8 tx_pause, ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause); } -static u8 get_connector_port(u32 eth_proto) +static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = { + [MLX5E_PORT_UNKNOWN] = PORT_OTHER, + [MLX5E_PORT_NONE] = PORT_NONE, + [MLX5E_PORT_TP] = PORT_TP, + [MLX5E_PORT_AUI] = PORT_AUI, + [MLX5E_PORT_BNC] = PORT_BNC, + [MLX5E_PORT_MII] = PORT_MII, + [MLX5E_PORT_FIBRE] = PORT_FIBRE, + [MLX5E_PORT_DA] = PORT_DA, + [MLX5E_PORT_OTHER] = PORT_OTHER, + }; + +static u8 get_connector_port(u32 eth_proto, u8 connector_type) { - if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) - | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) - | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) - | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { - return PORT_FIBRE; + if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER) + return ptys2connector_type[connector_type]; + + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return PORT_FIBRE; } - if (eth_proto & (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) - | MLX5E_PROT_MASK(MLX5E_10GBASE_CR) - | MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { - return PORT_DA; + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | + MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { + return PORT_DA; } - if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) - | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) - | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) - | MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { - return PORT_NONE; + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_KR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { + return PORT_NONE; } return PORT_OTHER; @@ -856,6 +1034,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, u32 eth_proto_oper; u8 an_disable_admin; u8 an_status; + u8 connector_type; int err; err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); @@ -871,6 +1050,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); an_status = MLX5_GET(ptys_reg, out, an_status); + connector_type = MLX5_GET(ptys_reg, out, connector_type); mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); @@ -883,7 +1063,10 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; - link_ksettings->base.port = get_connector_port(eth_proto_oper); + link_ksettings->base.port = get_connector_port(eth_proto_oper, + connector_type); + ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin, + connector_type); get_lp_advertising(eth_proto_lp, link_ksettings); if (an_status == MLX5_AN_COMPLETE) @@ -1030,9 +1213,18 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); } + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); + mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen); + } } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, @@ -1048,7 +1240,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, (hfunc != ETH_RSS_HASH_TOP)) return -EINVAL; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1222,13 +1414,12 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, return err; } -static int mlx5e_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) +int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, + struct ethtool_ts_info *info) { - struct mlx5e_priv *priv = netdev_priv(dev); int ret; - ret = ethtool_op_get_ts_info(dev, info); + ret = ethtool_op_get_ts_info(priv->netdev, info); if (ret) return ret; @@ -1251,6 +1442,14 @@ static int mlx5e_get_ts_info(struct net_device *dev, return 0; } +static int mlx5e_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_ts_info(priv, info); +} + static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev) { __u32 ret = 0; @@ -1638,6 +1837,40 @@ static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return err; } +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *dev = priv->netdev; + const struct firmware *fw; + int err; + + if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, flash->data, &dev->dev); + if (err) + return err; + + dev_hold(dev); + rtnl_unlock(); + + err = mlx5_firmware_flash(mdev, fw); + release_firmware(fw); + + rtnl_lock(); + dev_put(dev); + return err; +} + +static int mlx5e_flash_device(struct net_device *dev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1658,6 +1891,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, .set_rxnfc = mlx5e_set_rxnfc, + .flash_device = mlx5e_flash_device, .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 53ed58320a24..4837045ffba3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -36,6 +36,7 @@ #include <linux/tcp.h> #include <linux/mlx5/fs.h> #include "en.h" +#include "lib/mpfs.h" static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type); @@ -65,6 +66,7 @@ struct mlx5e_l2_hash_node { struct hlist_node hlist; u8 action; struct mlx5e_l2_rule ai; + bool mpfs; }; static inline int mlx5e_hash_l2(u8 *addr) @@ -170,7 +172,6 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: rule_p = &priv->fs.vlan.untagged_rule; @@ -218,11 +219,9 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec; int err = 0; - spec = mlx5_vzalloc(sizeof(*spec)); - if (!spec) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) return -ENOMEM; - } if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID) mlx5e_vport_context_update_vlans(priv); @@ -292,7 +291,7 @@ void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) priv->fs.vlan.filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; - mlx5e_del_any_vid_rules(priv); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) @@ -303,7 +302,7 @@ void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) priv->fs.vlan.filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; - mlx5e_add_any_vid_rules(priv); + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, @@ -365,17 +364,33 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, struct mlx5e_l2_hash_node *hn) { - switch (hn->action) { + u8 action = hn->action; + u8 mac_addr[ETH_ALEN]; + int l2_err = 0; + + ether_addr_copy(mac_addr, hn->ai.addr); + + switch (action) { case MLX5E_ACTION_ADD: mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH); + if (!is_multicast_ether_addr(mac_addr)) { + l2_err = mlx5_mpfs_add_mac(priv->mdev, mac_addr); + hn->mpfs = !l2_err; + } hn->action = MLX5E_ACTION_NONE; break; case MLX5E_ACTION_DEL: + if (!is_multicast_ether_addr(mac_addr) && hn->mpfs) + l2_err = mlx5_mpfs_del_mac(priv->mdev, mac_addr); mlx5e_del_l2_flow_rule(priv, &hn->ai); mlx5e_del_l2_from_hash(hn); break; } + + if (l2_err) + netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n", + action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err); } static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) @@ -596,12 +611,21 @@ static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) ttc->rules[i] = NULL; } } + + for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { + mlx5_del_flow_rules(ttc->tunnel_rules[i]); + ttc->tunnel_rules[i] = NULL; + } + } } -static struct { +struct mlx5e_etype_proto { u16 etype; u8 proto; -} ttc_rules[] = { +}; + +static struct mlx5e_etype_proto ttc_rules[] = { [MLX5E_TT_IPV4_TCP] = { .etype = ETH_P_IP, .proto = IPPROTO_TCP, @@ -648,6 +672,28 @@ static struct { }, }; +static struct mlx5e_etype_proto ttc_tunnel_rules[] = { + [MLX5E_TT_IPV4_GRE] = { + .etype = ETH_P_IP, + .proto = IPPROTO_GRE, + }, + [MLX5E_TT_IPV6_GRE] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_GRE, + }, +}; + +static u8 mlx5e_etype_to_ipv(u16 ethertype) +{ + if (ethertype == ETH_P_IP) + return 4; + + if (ethertype == ETH_P_IPV6) + return 6; + + return 0; +} + static struct mlx5_flow_handle * mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, struct mlx5_flow_table *ft, @@ -655,23 +701,29 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u16 etype, u8 proto) { + int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; + u8 ipv; - spec = mlx5_vzalloc(sizeof(*spec)); - if (!spec) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) return ERR_PTR(-ENOMEM); - } if (proto) { spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); } - if (etype) { + + ipv = mlx5e_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); + } else if (etype) { spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); @@ -713,6 +765,20 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) goto del_rules; } + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + rules = ttc->tunnel_rules; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.inner_ttc.ft.t; + for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { + rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, + ttc_tunnel_rules[tt].etype, + ttc_tunnel_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + return 0; del_rules: @@ -723,13 +789,23 @@ del_rules: } #define MLX5E_TTC_NUM_GROUPS 3 -#define MLX5E_TTC_GROUP1_SIZE BIT(3) -#define MLX5E_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT) +#define MLX5E_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_TTC_GROUP3_SIZE BIT(0) #define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ MLX5E_TTC_GROUP2_SIZE +\ MLX5E_TTC_GROUP3_SIZE) -static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) + +#define MLX5E_INNER_TTC_NUM_GROUPS 3 +#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\ + MLX5E_INNER_TTC_GROUP2_SIZE +\ + MLX5E_INNER_TTC_GROUP3_SIZE) + +static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, + bool use_ipv) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5e_flow_table *ft = &ttc->ft; @@ -742,7 +818,7 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) sizeof(*ft->g), GFP_KERNEL); if (!ft->g) return -ENOMEM; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) { kfree(ft->g); return -ENOMEM; @@ -751,7 +827,10 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) /* L4 Group */ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_TTC_GROUP1_SIZE; @@ -792,6 +871,190 @@ err: return err; } +static struct mlx5_flow_handle * +mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, u8 proto) +{ + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ipv = mlx5e_etype_to_ipv(etype); + if (etype && ipv) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_handle **rules; + struct mlx5e_ttc_table *ttc; + struct mlx5_flow_table *ft; + int err; + int tt; + + ttc = &priv->fs.inner_ttc; + ft = ttc->ft.t; + rules = ttc->rules; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + if (tt == MLX5E_TT_ANY) + dest.tir_num = priv->direct_tir[0].tirn; + else + dest.tir_num = priv->inner_indir_tir[tt].tirn; + + rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest, + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + + return 0; + +del_rules: + err = PTR_ERR(rules[tt]); + rules[tt] = NULL; + mlx5e_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &ttc->ft; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_flow_table *ft = &ttc->ft; + int err; + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + ft_attr.max_fte = MLX5E_INNER_TTC_TABLE_SIZE; + ft_attr.level = MLX5E_INNER_TTC_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_inner_ttc_table_groups(ttc); + if (err) + goto err; + + err = mlx5e_generate_inner_ttc_table_rules(priv); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + mlx5e_cleanup_ttc_rules(ttc); + mlx5e_destroy_flow_table(&ttc->ft); +} + void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; @@ -802,6 +1065,7 @@ void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) int mlx5e_create_ttc_table(struct mlx5e_priv *priv) { + bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); struct mlx5e_ttc_table *ttc = &priv->fs.ttc; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; @@ -818,7 +1082,7 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv) return err; } - err = mlx5e_create_ttc_table_groups(ttc); + err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer); if (err) goto err; @@ -852,11 +1116,9 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, u8 *mc_dmac; u8 *mv_dmac; - spec = mlx5_vzalloc(sizeof(*spec)); - if (!spec) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) return -ENOMEM; - } mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers.dmac_47_16); @@ -916,7 +1178,7 @@ static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) ft->g = kcalloc(MLX5E_NUM_L2_GROUPS, sizeof(*ft->g), GFP_KERNEL); if (!ft->g) return -ENOMEM; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) { kfree(ft->g); return -ENOMEM; @@ -1071,7 +1333,7 @@ static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); int err; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1146,11 +1408,18 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } + err = mlx5e_create_inner_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); - goto err_destroy_arfs_tables; + goto err_destroy_inner_ttc_table; } err = mlx5e_create_l2_table(priv); @@ -1175,6 +1444,8 @@ err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv); +err_destroy_inner_ttc_table: + mlx5e_destroy_inner_ttc_table(priv); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -1186,6 +1457,7 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 85bf4a389295..eafc59280ada 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -276,7 +276,7 @@ static void add_rule_to_list(struct mlx5e_priv *priv, static bool outer_header_zero(u32 *match_criteria) { - int size = MLX5_ST_SZ_BYTES(fte_match_param); + int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers); char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers); @@ -296,7 +296,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, struct mlx5_flow_handle *rule; int err = 0; - spec = mlx5_vzalloc(sizeof(*spec)); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return ERR_PTR(-ENOMEM); err = set_flow_attrs(spec->match_criteria, spec->match_value, @@ -320,7 +320,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 277f4de30375..cc11bbbd0309 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -39,6 +39,9 @@ #include "en.h" #include "en_tc.h" #include "en_rep.h" +#include "en_accel/ipsec.h" +#include "en_accel/ipsec_rxtx.h" +#include "accel/ipsec.h" #include "vxlan.h" struct mlx5e_rq_param { @@ -68,6 +71,11 @@ struct mlx5e_channel_param { struct mlx5e_cq_param icosq_cq; }; +static int mlx5e_get_node(struct mlx5e_priv *priv, int ix) +{ + return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix); +} + static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { return MLX5_CAP_GEN(mdev, striding_rq) && @@ -96,9 +104,12 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + params->rq_headroom = params->xdp_prog ? + XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; + params->rq_headroom += NET_IP_ALIGN; /* Extra room needed for build_skb */ - params->lro_wqe_sz -= MLX5_RX_HEADROOM + + params->lro_wqe_sz -= params->rq_headroom + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } @@ -112,7 +123,7 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && - !params->xdp_prog ? + !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; mlx5e_set_rq_type_params(mdev, params, rq_type); @@ -124,7 +135,8 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) u8 port_state; port_state = mlx5_query_vport_state(mdev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, + 0); if (port_state == VPORT_STATE_UP) { netdev_info(priv->netdev, "Link up\n"); @@ -142,7 +154,8 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_update_carrier(priv); + if (priv->profile->update_carrier) + priv->profile->update_carrier(priv); mutex_unlock(&priv->state_lock); } @@ -171,7 +184,6 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) struct mlx5e_sw_stats temp, *s = &temp; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; - u64 tx_offload_none = 0; int i, j; memset(s, 0, sizeof(*s)); @@ -186,6 +198,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_lro_bytes += rq_stats->lro_bytes; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_unnecessary += rq_stats->csum_unnecessary; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; s->rx_xdp_tx += rq_stats->xdp_tx; @@ -195,10 +208,12 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; + s->rx_page_reuse += rq_stats->page_reuse; s->rx_cache_reuse += rq_stats->cache_reuse; s->rx_cache_full += rq_stats->cache_full; s->rx_cache_empty += rq_stats->cache_empty; s->rx_cache_busy += rq_stats->cache_busy; + s->rx_cache_waive += rq_stats->cache_waive; for (j = 0; j < priv->channels.params.num_tc; j++) { sq_stats = &c->sq[j].stats; @@ -214,14 +229,11 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->tx_queue_dropped += sq_stats->dropped; s->tx_xmit_more += sq_stats->xmit_more; s->tx_csum_partial_inner += sq_stats->csum_partial_inner; - tx_offload_none += sq_stats->csum_none; + s->tx_csum_none += sq_stats->csum_none; + s->tx_csum_partial += sq_stats->csum_partial; } } - /* Update calculated offload counters */ - s->tx_csum_partial = s->tx_packets - tx_offload_none - s->tx_csum_partial_inner; - s->rx_csum_unnecessary = s->rx_packets - s->rx_csum_none - s->rx_csum_complete; - s->link_down_events_phy = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, counter_set.phys_layer_cntrs.link_down_events); @@ -243,18 +255,14 @@ static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } -static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) +static void mlx5e_update_pport_counters(struct mlx5e_priv *priv, bool full) { struct mlx5e_pport_stats *pstats = &priv->stats.pport; struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); int prio; void *out; - u32 *in; - - in = mlx5_vzalloc(sz); - if (!in) - goto free_out; MLX5_SET(ppcnt_reg, in, local_port, 1); @@ -262,6 +270,9 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + if (!full) + return; + out = pstats->RFC_2863_counters; MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); @@ -280,6 +291,12 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } + if (MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) { + out = pstats->eth_ext_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { out = pstats->per_prio_counters[prio]; @@ -287,53 +304,57 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } - -free_out: - kvfree(in); } static void mlx5e_update_q_counter(struct mlx5e_priv *priv) { struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)]; + int err; if (!priv->q_counter) return; - mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter, - &qcnt->rx_out_of_buffer); + err = mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, sizeof(out)); + if (err) + return; + + qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer); } static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) { struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0}; int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); void *out; - u32 *in; if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group)) return; - in = mlx5_vzalloc(sz); - if (!in) - return; - out = pcie_stats->pcie_perf_counters; MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); - - kvfree(in); } -void mlx5e_update_stats(struct mlx5e_priv *priv) +void mlx5e_update_stats(struct mlx5e_priv *priv, bool full) { - mlx5e_update_pcie_counters(priv); - mlx5e_update_pport_counters(priv); + if (full) { + mlx5e_update_pcie_counters(priv); + mlx5e_ipsec_update_stats(priv); + } + mlx5e_update_pport_counters(priv, full); mlx5e_update_vport_counters(priv); mlx5e_update_q_counter(priv); mlx5e_update_sw_counters(priv); } +static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) +{ + mlx5e_update_stats(priv, false); +} + void mlx5e_update_stats_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); @@ -365,7 +386,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, break; case MLX5_DEV_EVENT_PPS: eqe = (struct mlx5_eqe *)param; - ptp_event.type = PTP_CLOCK_EXTTS; ptp_event.index = eqe->data.pps.pin; ptp_event.timestamp = timecounter_cyc2time(&priv->tstamp.clock, @@ -385,7 +405,7 @@ static void mlx5e_enable_async_events(struct mlx5e_priv *priv) static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); - synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC)); + synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC)); } static inline int mlx5e_get_wqe_mtt_sz(void) @@ -432,16 +452,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int wq_sz = mlx5_wq_ll_get_size(&rq->wq); int mtt_sz = mlx5e_get_wqe_mtt_sz(); int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; + int node = mlx5e_get_node(c->priv, c->ix); int i; rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, node); if (!rq->mpwqe.info) goto err_out; /* We allocate more than mtt_sz as we will align the pointer */ - rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, - cpu_to_node(c->cpu)); + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, + GFP_KERNEL, node); if (unlikely(!rq->mpwqe.mtt_no_align)) goto err_free_wqe_info; @@ -503,7 +524,7 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, if (!MLX5E_VALID_NUM_MTTS(npages)) return -EINVAL; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -544,13 +565,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 byte_count; - u32 frag_sz; int npages; int wq_sz; int err; int i; - rqp->wq.db_numa_node = cpu_to_node(c->cpu); + rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); @@ -576,32 +596,33 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - if (rq->xdp_prog) { - rq->buff.map_dir = DMA_BIDIRECTIONAL; - rq->rx_headroom = XDP_PACKET_HEADROOM; - } else { - rq->buff.map_dir = DMA_FROM_DEVICE; - rq->rx_headroom = MLX5_RX_HEADROOM; - } + rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + rq->buff.headroom = params->rq_headroom; switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe; +#ifdef CONFIG_MLX5_EN_IPSEC + if (MLX5_IPSEC_DEV(mdev)) { + err = -EINVAL; + netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n"); + goto err_rq_wq_destroy; + } +#endif if (!rq->handle_rx_cqe) { err = -EINVAL; netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err); goto err_rq_wq_destroy; } - rq->mpwqe_stride_sz = BIT(params->mpwqe_log_stride_sz); - rq->mpwqe_num_strides = BIT(params->mpwqe_log_num_strides); + rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz; + rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides); - rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; - byte_count = rq->buff.wqe_sz; + byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) @@ -613,35 +634,42 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_destroy_umr_mkey; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info), - GFP_KERNEL, cpu_to_node(c->cpu)); - if (!rq->dma_info) { + rq->wqe.frag_info = + kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), + GFP_KERNEL, + mlx5e_get_node(c->priv, c->ix)); + if (!rq->wqe.frag_info) { err = -ENOMEM; goto err_rq_wq_destroy; } - rq->alloc_wqe = mlx5e_alloc_rx_wqe; + rq->post_wqes = mlx5e_post_rx_wqes; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe; +#ifdef CONFIG_MLX5_EN_IPSEC + if (c->priv->ipsec) + rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe; + else +#endif + rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe; if (!rq->handle_rx_cqe) { - kfree(rq->dma_info); + kfree(rq->wqe.frag_info); err = -EINVAL; netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err); goto err_rq_wq_destroy; } - rq->buff.wqe_sz = params->lro_en ? + byte_count = params->lro_en ? params->lro_wqe_sz : - MLX5E_SW2HW_MTU(c->netdev->mtu); - byte_count = rq->buff.wqe_sz; + MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu); +#ifdef CONFIG_MLX5_EN_IPSEC + if (MLX5_IPSEC_DEV(mdev)) + byte_count += MLX5E_METADATA_ETHER_LEN; +#endif + rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en; /* calc the required page order */ - frag_sz = rq->rx_headroom + - byte_count /* packet data */ + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - frag_sz = SKB_DATA_ALIGN(frag_sz); - - npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE); + rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->buff.headroom + byte_count); + npages = DIV_ROUND_UP(rq->wqe.frag_sz, PAGE_SIZE); rq->buff.page_order = order_base_2(npages); byte_count |= MLX5_HW_START_PADDING; @@ -651,6 +679,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, i) << PAGE_SHIFT; + + wqe->data.addr = cpu_to_be64(dma_offset); + } + wqe->data.byte_count = cpu_to_be32(byte_count); wqe->data.lkey = rq->mkey_be; } @@ -686,7 +720,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - kfree(rq->dma_info); + kfree(rq->wqe.frag_info); } for (i = rq->page_cache.head; i != rq->page_cache.tail; @@ -711,7 +745,7 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq, inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rq->wq_ctrl.buf.npages; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -748,7 +782,7 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int err; inlen = MLX5_ST_SZ_BYTES(modify_rq_in); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -776,7 +810,7 @@ static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable) int err; inlen = MLX5_ST_SZ_BYTES(modify_rq_in); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -805,7 +839,7 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) int err; inlen = MLX5_ST_SZ_BYTES(modify_rq_in); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -857,7 +891,8 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) u16 wqe_ix; /* UMR WQE (if in progress) is always at wq->head */ - if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + rq->mpwqe.umr_in_progress) mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { @@ -868,6 +903,16 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) mlx5_wq_ll_pop(&rq->wq, wqe_ix_be, &wqe->next.next_wqe_index); } + + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST && rq->wqe.page_reuse) { + /* Clean outstanding pages on handled WQEs that decided to do page-reuse, + * but yet to be re-posted. + */ + int wq_sz = mlx5_wq_ll_get_size(&rq->wq); + + for (wqe_ix = 0; wqe_ix < wq_sz; wqe_ix++) + rq->dealloc_wqe(rq, wqe_ix); + } } static int mlx5e_open_rq(struct mlx5e_channel *c, @@ -890,7 +935,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, goto err_destroy_rq; if (params->rx_am_enabled) - set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + c->rq.state |= BIT(MLX5E_RQ_STATE_AM); return 0; @@ -910,7 +955,6 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq) set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - sq->db.ico_wqe[pi].num_wqebbs = 1; nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nopwqe->ctrl); } @@ -963,13 +1007,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix)); if (err) goto err_sq_wq_destroy; @@ -1012,18 +1056,17 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->mdev; int err; - sq->pdev = c->pdev; sq->mkey_be = c->mkey_be; sq->channel = c; sq->uar_map = mdev->mlx5e_res.bfreg.map; - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix)); if (err) goto err_sq_wq_destroy; @@ -1086,14 +1129,16 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->max_inline = params->tx_max_inline; sq->min_inline_mode = params->tx_min_inline_mode; + if (MLX5_IPSEC_DEV(c->priv->mdev)) + set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix)); if (err) goto err_sq_wq_destroy; @@ -1134,7 +1179,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * csp->wq_ctrl->buf.npages; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1182,7 +1227,7 @@ static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, int err; inlen = MLX5_ST_SZ_BYTES(modify_sq_in); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1465,8 +1510,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->priv->mdev; int err; - param->wq.buf_numa_node = cpu_to_node(c->cpu); - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); param->eq_ix = c->ix; err = mlx5e_alloc_cq_common(mdev, param, cq); @@ -1496,7 +1541,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) inlen = MLX5_ST_SZ_BYTES(create_cq_in) + sizeof(u64) * cq->wq_ctrl.frag_buf.npages; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1565,11 +1610,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) mlx5e_free_cq(cq); } -static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) -{ - return cpumask_first(priv->mdev->priv.irq_info[ix].mask); -} - static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1718,11 +1758,12 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, { struct mlx5e_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; - int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; + unsigned int irq; int err; + int eqn; - c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); if (!c) return -ENOMEM; @@ -1730,13 +1771,15 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->mdev = priv->mdev; c->tstamp = &priv->tstamp; c->ix = ix; - c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; + mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + c->irq_desc = irq_to_desc(irq); + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); @@ -1816,7 +1859,8 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); - netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); + netif_set_xps_queue(c->netdev, + mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) @@ -1905,6 +1949,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, mlx5e_build_sq_param_common(priv, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev)); } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1937,6 +1982,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, } mlx5e_build_common_cq_param(priv, param); + param->cq_period_mode = params->rx_cq_period_mode; } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, @@ -2091,7 +2137,7 @@ mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt) int i; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -2210,7 +2256,7 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int err; inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -2311,9 +2357,10 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, enum mlx5e_traffic_types tt, - void *tirc) + void *tirc, bool inner) { - void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) : + MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -2433,7 +2480,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) int ix; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -2462,10 +2509,25 @@ free_in: return err; } +static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) +{ + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); + MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1); + + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); +} + static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { struct mlx5_core_dev *mdev = priv->mdev; - u16 hw_mtu = MLX5E_SW2HW_MTU(mtu); + u16 hw_mtu = MLX5E_SW2HW_MTU(priv, mtu); int err; err = mlx5_set_port_mtu(mdev, hw_mtu, 1); @@ -2487,7 +2549,7 @@ static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu) if (err || !hw_mtu) /* fallback to port oper mtu */ mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); - *mtu = MLX5E_HW2SW_MTU(hw_mtu); + *mtu = MLX5E_HW2SW_MTU(priv, hw_mtu); } static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) @@ -2549,12 +2611,6 @@ static void mlx5e_build_channels_tx_maps(struct mlx5e_priv *priv) } } -static bool mlx5e_is_eswitch_vport_mngr(struct mlx5_core_dev *mdev) -{ - return (MLX5_CAP_GEN(mdev, vport_group_manager) && - MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH); -} - void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { int num_txqs = priv->channels.num * priv->channels.params.num_tc; @@ -2568,7 +2624,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_activate_channels(&priv->channels); netif_tx_start_all_queues(priv->netdev); - if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_add_sqs_fwd_rules(priv); mlx5e_wait_channels_min_rx_wqes(&priv->channels); @@ -2579,7 +2635,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { mlx5e_redirect_rqts_to_drop(priv); - if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_remove_sqs_fwd_rules(priv); /* FIXME: This is a W/A only for tx timeout watch dog false alarm when @@ -2596,9 +2652,10 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, { struct net_device *netdev = priv->netdev; int new_num_txqs; - + int carrier_ok; new_num_txqs = new_chs->num * new_chs->params.num_tc; + carrier_ok = netif_carrier_ok(netdev); netif_carrier_off(netdev); if (new_num_txqs < netdev->real_num_tx_queues) @@ -2616,7 +2673,9 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, mlx5e_refresh_tirs(priv, false); mlx5e_activate_priv_channels(priv); - mlx5e_update_carrier(priv); + /* return carrier back if needed */ + if (carrier_ok) + netif_carrier_on(netdev); } int mlx5e_open_locked(struct net_device *netdev) @@ -2632,7 +2691,8 @@ int mlx5e_open_locked(struct net_device *netdev) mlx5e_refresh_tirs(priv, false); mlx5e_activate_priv_channels(priv); - mlx5e_update_carrier(priv); + if (priv->profile->update_carrier) + priv->profile->update_carrier(priv); mlx5e_timestamp_init(priv); if (priv->profile->update_stats) @@ -2652,6 +2712,8 @@ int mlx5e_open(struct net_device *netdev) mutex_lock(&priv->state_lock); err = mlx5e_open_locked(netdev); + if (!err) + mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP); mutex_unlock(&priv->state_lock); return err; @@ -2686,6 +2748,7 @@ int mlx5e_close(struct net_device *netdev) return -ENODEV; mutex_lock(&priv->state_lock); + mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN); err = mlx5e_close_locked(netdev); mutex_unlock(&priv->state_lock); @@ -2826,7 +2889,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); } static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) @@ -2845,12 +2908,13 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) struct mlx5e_tir *tir; void *tirc; int inlen; + int i = 0; int err; u32 *in; int tt; inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -2860,16 +2924,36 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_indir_tir_ctx(priv, tt, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); - if (err) - goto err_destroy_tirs; + if (err) { + mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } } + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + goto out; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { + memset(in, 0, inlen); + tir = &priv->inner_indir_tir[i]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_inner_indir_tir_ctx(priv, i, tirc); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); + if (err) { + mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } + } + +out: kvfree(in); return 0; -err_destroy_tirs: - mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); +err_destroy_inner_tirs: + for (i--; i >= 0; i--) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); + for (tt--; tt >= 0; tt--) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); @@ -2889,7 +2973,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) int ix; inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -2923,6 +3007,12 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); } void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) @@ -2962,12 +3052,16 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) return 0; } -static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) +static int mlx5e_setup_tc_mqprio(struct net_device *netdev, + struct tc_mqprio_qopt *mqprio) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_channels new_channels = {}; + u8 tc = mqprio->num_tc; int err = 0; + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + if (tc && tc != MLX5E_MAX_NUM_TC) return -EINVAL; @@ -2991,35 +3085,42 @@ out: return err; } -static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle, - __be16 proto, struct tc_to_netdev *tc) +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5e_setup_tc_cls_flower(struct net_device *dev, + struct tc_cls_flower_offload *cls_flower) { struct mlx5e_priv *priv = netdev_priv(dev); - if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) - goto mqprio; + if (!is_classid_clsact_ingress(cls_flower->common.classid) || + cls_flower->common.chain_index) + return -EOPNOTSUPP; - switch (tc->type) { - case TC_SETUP_CLSFLOWER: - switch (tc->cls_flower->command) { - case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, proto, tc->cls_flower); - case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, tc->cls_flower); - case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, tc->cls_flower); - } + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, cls_flower); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, cls_flower); default: return -EOPNOTSUPP; } +} +#endif -mqprio: - if (tc->type != TC_SETUP_MQPRIO) - return -EINVAL; - - tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; - - return mlx5e_setup_tc(dev, tc->mqprio->num_tc); +static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { +#ifdef CONFIG_MLX5_ESWITCH + case TC_SETUP_CLSFLOWER: + return mlx5e_setup_tc_cls_flower(dev, type_data); +#endif + case TC_SETUP_MQPRIO: + return mlx5e_setup_tc_mqprio(dev, type_data); + default: + return -EOPNOTSUPP; + } } static void @@ -3053,8 +3154,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors); stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards); - stats->tx_carrier_errors = - PPORT_802_3_GET(pstats, a_symbol_error_during_carrier); stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + stats->rx_frame_errors; stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; @@ -3064,7 +3163,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) */ stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); - } static void mlx5e_set_rx_mode(struct net_device *dev) @@ -3232,8 +3330,8 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { - netdev_err(netdev, "%s feature 0x%llx failed err %d\n", - enable ? "Enable" : "Disable", feature, err); + netdev_err(netdev, "%s feature %pNF failed, err %d\n", + enable ? "Enable" : "Disable", &feature, err); return err; } @@ -3307,16 +3405,19 @@ out: static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { + struct mlx5e_priv *priv = netdev_priv(dev); + switch (cmd) { case SIOCSHWTSTAMP: - return mlx5e_hwstamp_set(dev, ifr); + return mlx5e_hwstamp_set(priv, ifr); case SIOCGHWTSTAMP: - return mlx5e_hwstamp_get(dev, ifr); + return mlx5e_hwstamp_get(priv, ifr); default: return -EOPNOTSUPP; } } +#ifdef CONFIG_MLX5_ESWITCH static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -3419,6 +3520,7 @@ static int mlx5e_get_vf_stats(struct net_device *dev, return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, vf_stats); } +#endif static void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti) @@ -3448,13 +3550,13 @@ static void mlx5e_del_vxlan_port(struct net_device *netdev, mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 0); } -static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, - struct sk_buff *skb, - netdev_features_t features) +static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, + struct sk_buff *skb, + netdev_features_t features) { struct udphdr *udph; - u16 proto; - u16 port = 0; + u8 proto; + u16 port; switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): @@ -3467,14 +3569,17 @@ static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, goto out; } - if (proto == IPPROTO_UDP) { + switch (proto) { + case IPPROTO_GRE: + return features; + case IPPROTO_UDP: udph = udp_hdr(skb); port = be16_to_cpu(udph->dest); - } - /* Verify if UDP port is being offloaded by HW */ - if (port && mlx5e_vxlan_lookup_port(priv, port)) - return features; + /* Verify if UDP port is being offloaded by HW */ + if (mlx5e_vxlan_lookup_port(priv, port)) + return features; + } out: /* Disable CSUM and GSO if the udp dport is not offloaded by HW */ @@ -3490,10 +3595,15 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, features = vlan_features_check(skb, features); features = vxlan_features_check(skb, features); +#ifdef CONFIG_MLX5_EN_IPSEC + if (mlx5e_ipsec_feature_check(skb, netdev, features)) + return features; +#endif + /* Validate if the tunneled packet is being offloaded by HW */ if (skb->encapsulation && (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) - return mlx5e_vxlan_features_check(priv, skb, features); + return mlx5e_tunnel_features_check(priv, skb, features); return features; } @@ -3537,6 +3647,12 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) goto unlock; } + if ((netdev->features & NETIF_F_HW_ESP) && prog) { + netdev_warn(netdev, "can't set XDP with IPSec offload\n"); + err = -EINVAL; + goto unlock; + } + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); /* no need for full reset when exchanging programs */ reset = (!priv->channels.params.xdp_prog || !prog); @@ -3584,7 +3700,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); /* napi_schedule in case we have missed anything */ - set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); napi_schedule(&c->napi); if (old_prog) @@ -3596,11 +3711,19 @@ unlock: return err; } -static bool mlx5e_xdp_attached(struct net_device *dev) +static u32 mlx5e_xdp_query(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); + const struct bpf_prog *xdp_prog; + u32 prog_id = 0; + + mutex_lock(&priv->state_lock); + xdp_prog = priv->channels.params.xdp_prog; + if (xdp_prog) + prog_id = xdp_prog->aux->id; + mutex_unlock(&priv->state_lock); - return !!priv->channels.params.xdp_prog; + return prog_id; } static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) @@ -3609,7 +3732,8 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) case XDP_SETUP_PROG: return mlx5e_xdp_set(dev, xdp->prog); case XDP_QUERY_PROG: - xdp->prog_attached = mlx5e_xdp_attached(dev); + xdp->prog_id = mlx5e_xdp_query(dev); + xdp->prog_attached = !!xdp->prog_id; return 0; default: return -EINVAL; @@ -3632,11 +3756,11 @@ static void mlx5e_netpoll(struct net_device *dev) } #endif -static const struct net_device_ops mlx5e_netdev_ops_basic = { +static const struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, - .ndo_setup_tc = mlx5e_ndo_setup_tc, + .ndo_setup_tc = mlx5e_setup_tc, .ndo_select_queue = mlx5e_select_queue, .ndo_get_stats64 = mlx5e_get_stats, .ndo_set_rx_mode = mlx5e_set_rx_mode, @@ -3647,6 +3771,9 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, + .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, + .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, + .ndo_features_check = mlx5e_features_check, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif @@ -3655,29 +3782,8 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx5e_netpoll, #endif -}; - -static const struct net_device_ops mlx5e_netdev_ops_sriov = { - .ndo_open = mlx5e_open, - .ndo_stop = mlx5e_close, - .ndo_start_xmit = mlx5e_xmit, - .ndo_setup_tc = mlx5e_ndo_setup_tc, - .ndo_select_queue = mlx5e_select_queue, - .ndo_get_stats64 = mlx5e_get_stats, - .ndo_set_rx_mode = mlx5e_set_rx_mode, - .ndo_set_mac_address = mlx5e_set_mac, - .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, - .ndo_set_features = mlx5e_set_features, - .ndo_change_mtu = mlx5e_change_mtu, - .ndo_do_ioctl = mlx5e_ioctl, - .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, - .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, - .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, - .ndo_features_check = mlx5e_features_check, -#ifdef CONFIG_RFS_ACCEL - .ndo_rx_flow_steer = mlx5e_rx_flow_steer, -#endif +#ifdef CONFIG_MLX5_ESWITCH + /* SRIOV E-Switch NDOs */ .ndo_set_vf_mac = mlx5e_set_vf_mac, .ndo_set_vf_vlan = mlx5e_set_vf_vlan, .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, @@ -3686,13 +3792,9 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, - .ndo_tx_timeout = mlx5e_tx_timeout, - .ndo_xdp = mlx5e_xdp, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = mlx5e_netpoll, -#endif .ndo_has_offload_stats = mlx5e_has_offload_stats, .ndo_get_offload_stats = mlx5e_get_offload_stats, +#endif }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -3715,7 +3817,7 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable)) mlx5_core_warn(mdev, "Self loop back prevention is not supported\n"); if (!MLX5_CAP_GEN(mdev, cq_moderation)) - mlx5_core_warn(mdev, "CQ modiration is not supported\n"); + mlx5_core_warn(mdev, "CQ moderation is not supported\n"); return 0; } @@ -3729,22 +3831,11 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) 2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/; } -void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, - u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels) { - int node = mdev->priv.numa_node; - int node_num_of_cores; int i; - if (node == -1) - node = first_online_node; - - node_num_of_cores = cpumask_weight(cpumask_of_node(node)); - - if (node_num_of_cores) - num_channels = min_t(int, num_channels, node_num_of_cores); - for (i = 0; i < len; i++) indirection_rqt[i] = i % num_channels; } @@ -3848,7 +3939,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* set CQE compression */ params->rx_cqe_compress_def = false; if (MLX5_CAP_GEN(mdev, cqe_compression) && - MLX5_CAP_GEN(mdev, vport_group_manager)) + MLX5_CAP_GEN(mdev, vport_group_manager)) params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw); MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); @@ -3857,6 +3948,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, mlx5e_set_rq_params(mdev, params); /* HW LRO */ + /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) params->lro_en = hw_lro_heuristic(link_speed, pci_bw); @@ -3882,7 +3974,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* RSS */ params->rss_hfunc = ETH_RSS_HASH_XOR; netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(mdev, params->indirection_rqt, + mlx5e_build_default_indir_rqt(params->indirection_rqt, MLX5E_INDIR_RQT_SIZE, max_channels); } @@ -3897,6 +3989,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->netdev = netdev; priv->profile = profile; priv->ppriv = ppriv; + priv->hard_mtu = MLX5E_ETH_HARD_MTU; mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); @@ -3920,9 +4013,11 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) } } +#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH) static const struct switchdev_ops mlx5e_switchdev_ops = { .switchdev_port_attr_get = mlx5e_attr_get, }; +#endif static void mlx5e_build_nic_netdev(struct net_device *netdev) { @@ -3933,15 +4028,12 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) SET_NETDEV_DEV(netdev, &mdev->pdev->dev); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - netdev->netdev_ops = &mlx5e_netdev_ops_sriov; + netdev->netdev_ops = &mlx5e_netdev_ops; + #ifdef CONFIG_MLX5_CORE_EN_DCB - if (MLX5_CAP_GEN(mdev, qos)) - netdev->dcbnl_ops = &mlx5e_dcbnl_ops; + if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; #endif - } else { - netdev->netdev_ops = &mlx5e_netdev_ops_basic; - } netdev->watchdog_timeo = 15 * HZ; @@ -3964,20 +4056,32 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; - if (mlx5e_vxlan_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; + if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_features |= NETIF_F_GSO_PARTIAL; netdev->hw_enc_features |= NETIF_F_IP_CSUM; netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; + netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; + } + + if (mlx5e_vxlan_allowed(mdev)) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } + if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->gso_partial_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + } + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); if (fcs_supported) @@ -4013,10 +4117,12 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) mlx5e_set_netdev_dev_addr(netdev); -#ifdef CONFIG_NET_SWITCHDEV - if (MLX5_CAP_GEN(mdev, vport_group_manager)) +#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH) + if (MLX5_VPORT_MANAGER(mdev)) netdev->switchdev_ops = &mlx5e_switchdev_ops; #endif + + mlx5e_ipsec_build_netdev(priv); } static void mlx5e_create_q_counter(struct mlx5e_priv *priv) @@ -4045,14 +4151,19 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev, void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); + int err; mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv); + err = mlx5e_ipsec_init(priv); + if (err) + mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err); mlx5e_build_nic_netdev(netdev); mlx5e_vxlan_init(priv); } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { + mlx5e_ipsec_cleanup(priv); mlx5e_vxlan_cleanup(priv); if (priv->channels.params.xdp_prog) @@ -4139,17 +4250,21 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_init_l2_addr(priv); + /* Marking the link as currently not needed by the Driver */ + if (!netif_running(netdev)) + mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); + /* MTU range: 68 - hw-specific max */ netdev->min_mtu = ETH_MIN_MTU; mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); - netdev->max_mtu = MLX5E_HW2SW_MTU(max_mtu); + netdev->max_mtu = MLX5E_HW2SW_MTU(priv, max_mtu); mlx5e_set_dev_port_mtu(priv); mlx5_lag_add(mdev, netdev); mlx5e_enable_async_events(priv); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_register_vport_reps(priv); if (netdev->reg_state != NETREG_REGISTERED) @@ -4183,7 +4298,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_unregister_vport_reps(priv); mlx5e_disable_async_events(priv); @@ -4199,8 +4314,9 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .cleanup_tx = mlx5e_cleanup_nic_tx, .enable = mlx5e_nic_enable, .disable = mlx5e_nic_disable, - .update_stats = mlx5e_update_stats, + .update_stats = mlx5e_update_ndo_stats, .max_nch = mlx5e_get_max_num_channels, + .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = MLX5E_MAX_NUM_TC, @@ -4355,32 +4471,29 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) static void *mlx5e_add(struct mlx5_core_dev *mdev) { - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - struct mlx5e_rep_priv *rpriv = NULL; + struct net_device *netdev; + void *rpriv = NULL; void *priv; - int vport; int err; - struct net_device *netdev; err = mlx5e_check_required_hca_cap(mdev); if (err) return NULL; - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); +#ifdef CONFIG_MLX5_ESWITCH + if (MLX5_VPORT_MANAGER(mdev)) { + rpriv = mlx5e_alloc_nic_rep_priv(mdev); if (!rpriv) { - mlx5_core_warn(mdev, - "Not creating net device, Failed to alloc rep priv data\n"); + mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n"); return NULL; } - rpriv->rep = &esw->offloads.vport_reps[0]; } +#endif netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); - goto err_unregister_reps; + goto err_free_rpriv; } priv = netdev_priv(netdev); @@ -4401,14 +4514,9 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) err_detach: mlx5e_detach(mdev, priv); - err_destroy_netdev: mlx5e_destroy_netdev(priv); - -err_unregister_reps: - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport); - +err_free_rpriv: kfree(rpriv); return NULL; } @@ -4443,6 +4551,7 @@ static struct mlx5_interface mlx5e_interface = { void mlx5e_init(void) { + mlx5e_ipsec_build_inverse_table(); mlx5e_build_ptys2ethtool_map(); mlx5_register_interface(&mlx5e_interface); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 46984a52a94b..45e03c427faf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -613,15 +613,18 @@ static int mlx5e_rep_open(struct net_device *dev) struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; - err = mlx5e_open(dev); + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(dev); if (err) - return err; + goto unlock; - err = mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP); - if (!err) + if (!mlx5_eswitch_set_vport_state(esw, rep->vport, + MLX5_ESW_VPORT_ADMIN_STATE_UP)) netif_carrier_on(dev); - return 0; +unlock: + mutex_unlock(&priv->state_lock); + return err; } static int mlx5e_rep_close(struct net_device *dev) @@ -630,10 +633,13 @@ static int mlx5e_rep_close(struct net_device *dev) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int ret; + mutex_lock(&priv->state_lock); (void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN); - - return mlx5e_close(dev); + ret = mlx5e_close_locked(dev); + mutex_unlock(&priv->state_lock); + return ret; } static int mlx5e_rep_get_phys_port_name(struct net_device *dev, @@ -651,32 +657,42 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, return 0; } -static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle, - __be16 proto, struct tc_to_netdev *tc) +static int +mlx5e_rep_setup_tc_cls_flower(struct net_device *dev, + struct tc_cls_flower_offload *cls_flower) { struct mlx5e_priv *priv = netdev_priv(dev); - if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + if (!is_classid_clsact_ingress(cls_flower->common.classid) || + cls_flower->common.chain_index) return -EOPNOTSUPP; - if (tc->egress_dev) { + if (cls_flower->egress_dev) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_dev = mlx5_eswitch_get_uplink_netdev(esw); - return uplink_dev->netdev_ops->ndo_setup_tc(uplink_dev, handle, - proto, tc); + dev = mlx5_eswitch_get_uplink_netdev(esw); + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, + cls_flower); } - switch (tc->type) { + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, cls_flower); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, cls_flower); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { case TC_SETUP_CLSFLOWER: - switch (tc->cls_flower->command) { - case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, proto, tc->cls_flower); - case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, tc->cls_flower); - case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, tc->cls_flower); - } + return mlx5e_rep_setup_tc_cls_flower(dev, type_data); default: return -EOPNOTSUPP; } @@ -768,7 +784,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_stop = mlx5e_rep_close, .ndo_start_xmit = mlx5e_xmit, .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, - .ndo_setup_tc = mlx5e_rep_ndo_setup_tc, + .ndo_setup_tc = mlx5e_rep_setup_tc, .ndo_get_stats64 = mlx5e_rep_get_stats, .ndo_has_offload_stats = mlx5e_has_offload_stats, .ndo_get_offload_stats = mlx5e_get_offload_stats, @@ -830,6 +846,9 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev, INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); priv->channels.params.num_channels = profile->max_nch(mdev); + + priv->hard_mtu = MLX5E_ETH_HARD_MTU; + mlx5e_build_rep_params(mdev, &priv->channels.params); mlx5e_build_rep_netdev(netdev); } @@ -905,7 +924,7 @@ static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev) return MLX5E_PORT_REPRESENTOR_NCH; } -static struct mlx5e_profile mlx5e_rep_profile = { +static const struct mlx5e_profile mlx5e_rep_profile = { .init = mlx5e_init_rep, .init_rx = mlx5e_init_rep_rx, .cleanup_rx = mlx5e_cleanup_rep_rx, @@ -913,6 +932,7 @@ static struct mlx5e_profile mlx5e_rep_profile = { .cleanup_tx = mlx5e_cleanup_nic_tx, .update_stats = mlx5e_rep_update_stats, .max_nch = mlx5e_get_rep_max_num_channels, + .update_carrier = NULL, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */, .max_tc = 1, @@ -1016,7 +1036,6 @@ err_destroy_netdev: mlx5e_destroy_netdev(netdev_priv(netdev)); kfree(rpriv); return err; - } static void @@ -1091,3 +1110,16 @@ void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */ mlx5_eswitch_unregister_vport_rep(esw, 0); /* UPLINK PF*/ } + +void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv; + + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return NULL; + + rpriv->rep = &esw->offloads.vport_reps[0]; + return rpriv; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index a0a1a7a1d6c0..5659ed9f51e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -38,6 +38,7 @@ #include "eswitch.h" #include "en.h" +#ifdef CONFIG_MLX5_ESWITCH struct mlx5e_neigh_update_table { struct rhashtable neigh_ht; /* Save the neigh hash entries in a list in addition to the hash table @@ -123,6 +124,7 @@ struct mlx5e_encap_entry { int encap_size; }; +void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev); void mlx5e_register_vport_reps(struct mlx5e_priv *priv); void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv); bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); @@ -141,5 +143,12 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); +#else /* CONFIG_MLX5_ESWITCH */ +static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {} +static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {} +static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } +static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {} +#endif #endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 66b5fec15313..15a1687483cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -40,7 +40,8 @@ #include "en_tc.h" #include "eswitch.h" #include "en_rep.h" -#include "ipoib.h" +#include "ipoib/ipoib.h" +#include "en_accel/ipsec_rxtx.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -160,6 +161,11 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, #define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) +static inline bool mlx5e_page_is_reserved(struct page *page) +{ + return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); +} + static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { @@ -171,8 +177,10 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, return false; } - if (unlikely(page_is_pfmemalloc(dma_info->page))) + if (unlikely(mlx5e_page_is_reserved(dma_info->page))) { + rq->stats.cache_waive++; return false; + } cache->page_cache[cache->tail] = *dma_info; cache->tail = tail_next; @@ -216,13 +224,13 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, if (unlikely(!page)) return -ENOMEM; - dma_info->page = page; dma_info->addr = dma_map_page(rq->pdev, page, 0, RQ_PAGE_SIZE(rq), rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { put_page(page); return -ENOMEM; } + dma_info->page = page; return 0; } @@ -238,27 +246,58 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, put_page(dma_info->page); } -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi) { - struct mlx5e_dma_info *di = &rq->dma_info[ix]; + return rq->wqe.page_reuse && wi->di.page && + (wi->offset + rq->wqe.frag_sz <= RQ_PAGE_SIZE(rq)) && + !mlx5e_page_is_reserved(wi->di.page); +} - if (unlikely(mlx5e_page_alloc_mapped(rq, di))) - return -ENOMEM; +static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix]; - wqe->data.addr = cpu_to_be64(di->addr + rq->rx_headroom); + /* check if page exists, hence can be reused */ + if (!wi->di.page) { + if (unlikely(mlx5e_page_alloc_mapped(rq, &wi->di))) + return -ENOMEM; + wi->offset = 0; + } + + wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset + rq->buff.headroom); return 0; } +static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi) +{ + mlx5e_page_release(rq, &wi->di, true); + wi->di.page = NULL; +} + +static inline void mlx5e_free_rx_wqe_reuse(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi) +{ + if (mlx5e_page_reuse(rq, wi)) { + rq->stats.page_reuse++; + return; + } + + mlx5e_free_rx_wqe(rq, wi); +} + void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_dma_info *di = &rq->dma_info[ix]; + struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix]; - mlx5e_page_release(rq, di, true); + if (wi->di.page) + mlx5e_free_rx_wqe(rq, wi); } static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) { - return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; + return rq->mpwqe.num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; } static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, @@ -267,7 +306,7 @@ static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, u32 page_idx, u32 frag_offset, u32 len) { - unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); + unsigned int truesize = ALIGN(len, BIT(rq->mpwqe.log_stride_sz)); dma_sync_single_for_cpu(rq->pdev, wi->umr.dma_info[page_idx].addr + frag_offset, @@ -320,7 +359,6 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) /* fill sq edge with nops to avoid wqe wrap around */ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_post_nop(wq, sq->sqn, &sq->pc); } @@ -331,41 +369,35 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) MLX5_OPCODE_UMR); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; - sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); } static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, u16 ix) { struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; int err; int i; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { err = mlx5e_page_alloc_mapped(rq, dma_info); if (unlikely(err)) goto err_unmap; wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR); page_ref_add(dma_info->page, pg_strides); - wi->skbs_frags[i] = 0; } + memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE); wi->consumed_strides = 0; - wqe->data.addr = cpu_to_be64(dma_offset); return 0; err_unmap: while (--i >= 0) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + dma_info--; page_ref_sub(dma_info->page, pg_strides); mlx5e_page_release(rq, dma_info, true); } @@ -376,27 +408,21 @@ err_unmap: void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) { int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; int i; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); mlx5e_page_release(rq, dma_info, true); } } -void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) +static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); - - if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) { - mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); - return; - } + rq->mpwqe.umr_in_progress = false; mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); @@ -406,16 +432,18 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { int err; - err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix); - if (unlikely(err)) + err = mlx5e_alloc_rx_umr_mpwqe(rq, ix); + if (unlikely(err)) { + rq->stats.buff_alloc_err++; return err; - set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + } + rq->mpwqe.umr_in_progress = true; mlx5e_post_umr_wqe(rq, ix); - return -EBUSY; + return 0; } void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) @@ -425,94 +453,150 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) mlx5e_free_rx_mpwqe(rq, wi); } -#define RQ_CANNOT_POST(rq) \ - (!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state) || \ - test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; + int err; - if (unlikely(RQ_CANNOT_POST(rq))) + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) return false; - while (!mlx5_wq_ll_is_full(wq)) { + if (mlx5_wq_ll_is_full(wq)) + return false; + + do { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - int err; - err = rq->alloc_wqe(rq, wqe, wq->head); - if (err == -EBUSY) - return true; + err = mlx5e_alloc_rx_wqe(rq, wqe, wq->head); if (unlikely(err)) { rq->stats.buff_alloc_err++; break; } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); - } + } while (!mlx5_wq_ll_is_full(wq)); /* ensure wqes are visible to device before updating doorbell record */ dma_wmb(); mlx5_wq_ll_update_db_record(wq); - return !mlx5_wq_ll_is_full(wq); + return !!err; +} + +static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, + struct mlx5e_icosq *sq, + struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; + + mlx5_cqwq_pop(&cq->wq); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); + return; + } + + if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { + mlx5e_post_rx_mpwqe(rq); + return; + } + + if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); +} + +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); + struct mlx5_cqe64 *cqe; + + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) + return; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (likely(!cqe)) + return; + + /* by design, there's only a single cqe */ + mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe); + + mlx5_cqwq_update_db_record(&cq->wq); +} + +bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) + return false; + + mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq); + + if (mlx5_wq_ll_is_full(wq)) + return false; + + if (!rq->mpwqe.umr_in_progress) + mlx5e_alloc_rx_mpwqe(rq, wq->head); + + return true; } static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); - struct iphdr *ipv4; - struct ipv6hdr *ipv6; struct tcphdr *tcp; int network_depth = 0; __be16 proto; u16 tot_len; + void *ip_p; u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); - int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || - (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); + u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || + (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); skb->mac_len = ETH_HLEN; proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); - ipv4 = (struct iphdr *)(skb->data + network_depth); - ipv6 = (struct ipv6hdr *)(skb->data + network_depth); tot_len = cqe_bcnt - network_depth; + ip_p = skb->data + network_depth; if (proto == htons(ETH_P_IP)) { - tcp = (struct tcphdr *)(skb->data + network_depth + - sizeof(struct iphdr)); - ipv6 = NULL; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - } else { - tcp = (struct tcphdr *)(skb->data + network_depth + - sizeof(struct ipv6hdr)); - ipv4 = NULL; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; - } - - if (get_cqe_lro_tcppsh(cqe)) - tcp->psh = 1; + struct iphdr *ipv4 = ip_p; - if (tcp_ack) { - tcp->ack = 1; - tcp->ack_seq = cqe->lro_ack_seq_num; - tcp->window = cqe->lro_tcp_win; - } + tcp = ip_p + sizeof(struct iphdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - if (ipv4) { ipv4->ttl = cqe->lro_min_ttl; ipv4->tot_len = cpu_to_be16(tot_len); ipv4->check = 0; ipv4->check = ip_fast_csum((unsigned char *)ipv4, ipv4->ihl); } else { + struct ipv6hdr *ipv6 = ip_p; + + tcp = ip_p + sizeof(struct ipv6hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + ipv6->hop_limit = cqe->lro_min_ttl; ipv6->payload_len = cpu_to_be16(tot_len - sizeof(struct ipv6hdr)); } + + tcp->psh = get_cqe_lro_tcppsh(cqe); + + if (tcp_ack) { + tcp->ack = 1; + tcp->ack_seq = cqe->lro_ack_seq_num; + tcp->window = cqe->lro_tcp_win; + } } static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, @@ -543,6 +627,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (lro) { skb->ip_summed = CHECKSUM_UNNECESSARY; + rq->stats.csum_unnecessary++; return; } @@ -560,7 +645,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum_level = 1; skb->encapsulation = 1; rq->stats.csum_unnecessary_inner++; + return; } + rq->stats.csum_unnecessary++; return; } csum_none: @@ -648,9 +735,8 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, prefetchw(wqe); if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || - MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) { + MLX5E_SW2HW_MTU(rq->channel->priv, rq->netdev->mtu) < dma_len)) { rq->stats.xdp_drop++; - mlx5e_page_release(rq, di, true); return false; } @@ -661,7 +747,6 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, sq->db.doorbell = false; } rq->stats.xdp_tx_full++; - mlx5e_page_release(rq, di, true); return false; } @@ -686,10 +771,15 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); + /* move page to reference to sq responsibility, + * and mark so it's not put back in page-cache. + */ + rq->wqe.xdp_xmit = true; sq->db.di[pi] = *di; sq->pc++; sq->db.doorbell = true; + rq->stats.xdp_tx++; return true; } @@ -726,35 +816,34 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, trace_xdp_exception(rq->netdev, prog, act); case XDP_DROP: rq->stats.xdp_drop++; - mlx5e_page_release(rq, di, true); return true; } } static inline struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, - u16 wqe_counter, u32 cqe_bcnt) + struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) { - struct mlx5e_dma_info *di; + struct mlx5e_dma_info *di = &wi->di; + u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb; void *va, *data; - u16 rx_headroom = rq->rx_headroom; bool consumed; + u32 frag_size; - di = &rq->dma_info[wqe_counter]; - va = page_address(di->page); + va = page_address(di->page) + wi->offset; data = va + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); dma_sync_single_range_for_cpu(rq->pdev, - di->addr, - rx_headroom, - rq->buff.wqe_sz, + di->addr + wi->offset, + 0, frag_size, DMA_FROM_DEVICE); prefetch(data); + wi->offset += frag_size; if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; - mlx5e_page_release(rq, di, true); return NULL; } @@ -764,16 +853,14 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, if (consumed) return NULL; /* page/packet was consumed by XDP */ - skb = build_skb(va, RQ_PAGE_SIZE(rq)); + skb = build_skb(va, frag_size); if (unlikely(!skb)) { rq->stats.buff_alloc_err++; - mlx5e_page_release(rq, di, true); return NULL; } - /* queue up for recycling ..*/ + /* queue up for recycling/reuse */ page_ref_inc(di->page); - mlx5e_page_release(rq, di, true); skb_reserve(skb, rx_headroom); skb_put(skb, cqe_bcnt); @@ -783,6 +870,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct mlx5e_wqe_frag_info *wi; struct mlx5e_rx_wqe *wqe; __be16 wqe_counter_be; struct sk_buff *skb; @@ -792,26 +880,40 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + wi = &rq->wqe.frag_info[wqe_counter]; cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); - if (!skb) + skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); + if (!skb) { + /* probably for XDP */ + if (rq->wqe.xdp_xmit) { + wi->di.page = NULL; + rq->wqe.xdp_xmit = false; + /* do not return page to cache, it will be returned on XDP_TX completion */ + goto wq_ll_pop; + } + /* probably an XDP_DROP, save the page-reuse checks */ + mlx5e_free_rx_wqe(rq, wi); goto wq_ll_pop; + } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); napi_gro_receive(rq->cq.napi, skb); + mlx5e_free_rx_wqe_reuse(rq, wi); wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); } +#ifdef CONFIG_MLX5_ESWITCH void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct net_device *netdev = rq->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5e_wqe_frag_info *wi; struct mlx5e_rx_wqe *wqe; struct sk_buff *skb; __be16 wqe_counter_be; @@ -821,11 +923,21 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + wi = &rq->wqe.frag_info[wqe_counter]; cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); - if (!skb) + skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); + if (!skb) { + if (rq->wqe.xdp_xmit) { + wi->di.page = NULL; + rq->wqe.xdp_xmit = false; + /* do not return page to cache, it will be returned on XDP_TX completion */ + goto wq_ll_pop; + } + /* probably an XDP_DROP, save the page-reuse checks */ + mlx5e_free_rx_wqe(rq, wi); goto wq_ll_pop; + } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); @@ -834,10 +946,12 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) napi_gro_receive(rq->cq.napi, skb); + mlx5e_free_rx_wqe_reuse(rq, wi); wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); } +#endif static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, @@ -846,7 +960,7 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb) { u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); - u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz; + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); u32 page_idx = wqe_offset >> PAGE_SHIFT; u32 head_page_idx = page_idx; @@ -914,7 +1028,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) napi_gro_receive(rq->cq.napi, skb); mpwrq_cqe_out: - if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) return; mlx5e_free_rx_mpwqe(rq, wi); @@ -924,21 +1038,23 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + struct mlx5e_xdpsq *xdpsq; + struct mlx5_cqe64 *cqe; int work_done = 0; - if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) return 0; if (cq->decmprs_left) work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); - for (; work_done < budget; work_done++) { - struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq); + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return 0; - if (!cqe) - break; + xdpsq = &rq->xdpsq; + do { if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { work_done += mlx5e_decompress_cqes_start(rq, cq, @@ -949,7 +1065,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) mlx5_cqwq_pop(&cq->wq); rq->handle_rx_cqe(rq, cqe); - } + } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); if (xdpsq->db.doorbell) { mlx5e_xmit_xdp_doorbell(xdpsq); @@ -967,13 +1083,18 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) { struct mlx5e_xdpsq *sq; + struct mlx5_cqe64 *cqe; struct mlx5e_rq *rq; u16 sqcc; int i; sq = container_of(cq, struct mlx5e_xdpsq, cq); - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) return false; rq = container_of(sq, struct mlx5e_rq, xdpsq); @@ -983,15 +1104,11 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) */ sqcc = sq->cc; - for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { - struct mlx5_cqe64 *cqe; + i = 0; + do { u16 wqe_counter; bool last_wqe; - cqe = mlx5e_get_cqe(cq); - if (!cqe) - break; - mlx5_cqwq_pop(&cq->wq); wqe_counter = be16_to_cpu(cqe->wqe_counter); @@ -1009,7 +1126,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) /* Recycle RX page */ mlx5e_page_release(rq, di, true); } while (!last_wqe); - } + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); mlx5_cqwq_update_db_record(&cq->wq); @@ -1038,11 +1155,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) #ifdef CONFIG_MLX5_CORE_IPOIB #define MLX5_IB_GRH_DGID_OFFSET 24 -#define MLX5_IB_GRH_BYTES 40 -#define MLX5_IPOIB_ENCAP_LEN 4 #define MLX5_GID_SIZE 16 -#define MLX5_IPOIB_PSEUDO_LEN 20 -#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN) static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, @@ -1050,6 +1163,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; + struct mlx5e_tstamp *tstamp = rq->tstamp; char *pseudo_header; u8 *dgid; u8 g; @@ -1074,6 +1188,9 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + if (unlikely(mlx5e_rx_hw_stamp(tstamp))) + mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); + skb_record_rx_queue(skb, rq->ix); if (likely(netdev->features & NETIF_F_RXHASH)) @@ -1094,6 +1211,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct mlx5e_wqe_frag_info *wi; struct mlx5e_rx_wqe *wqe; __be16 wqe_counter_be; struct sk_buff *skb; @@ -1103,18 +1221,60 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + wi = &rq->wqe.frag_info[wqe_counter]; cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); if (!skb) - goto wq_ll_pop; + goto wq_free_wqe; mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); napi_gro_receive(rq->cq.napi, skb); -wq_ll_pop: +wq_free_wqe: + mlx5e_free_rx_wqe_reuse(rq, wi); mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); } #endif /* CONFIG_MLX5_CORE_IPOIB */ + +#ifdef CONFIG_MLX5_EN_IPSEC + +void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_wqe_frag_info *wi; + struct mlx5e_rx_wqe *wqe; + __be16 wqe_counter_be; + struct sk_buff *skb; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + wi = &rq->wqe.frag_info[wqe_counter]; + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); + if (unlikely(!skb)) { + /* a DROP, save the page-reuse checks */ + mlx5e_free_rx_wqe(rq, wi); + goto wq_ll_pop; + } + skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb); + if (unlikely(!skb)) { + mlx5e_free_rx_wqe(rq, wi); + goto wq_ll_pop; + } + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); + + mlx5e_free_rx_wqe_reuse(rq, wi); +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + +#endif /* CONFIG_MLX5_EN_IPSEC */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 5225f2226a67..1f1f8af87d4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -132,14 +132,14 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) skb_reserve(skb, NET_IP_ALIGN); /* Reserve for ethernet and IP header */ - ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); + ethh = skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); skb_set_network_header(skb, skb->len); - iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)); + iph = skb_put(skb, sizeof(struct iphdr)); skb_set_transport_header(skb, skb->len); - udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); + udph = skb_put(skb, sizeof(struct udphdr)); /* Fill ETH header */ ether_addr_copy(ethh->h_dest, priv->netdev->dev_addr); @@ -167,12 +167,12 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) ip_send_check(iph); /* Fill test header and data */ - mlxh = (struct mlx5ehdr *)skb_put(skb, sizeof(*mlxh)); + mlxh = skb_put(skb, sizeof(*mlxh)); mlxh->version = 0; mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC); strlcpy(mlxh->text, mlx5e_test_text, sizeof(mlxh->text)); datalen -= sizeof(*mlxh); - memset(skb_put(skb, datalen), 0, datalen); + skb_put_zero(skb, datalen); skb->csum = 0; skb->ip_summed = CHECKSUM_PARTIAL; @@ -189,6 +189,7 @@ struct mlx5e_lbt_priv { struct packet_type pt; struct completion comp; bool loopback_ok; + bool local_lb; }; static int @@ -236,6 +237,13 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, { int err = 0; + /* Temporarily enable local_lb */ + if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) { + mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb); + if (!lbtp->local_lb) + mlx5_nic_vport_update_local_lb(priv->mdev, true); + } + err = mlx5e_refresh_tirs(priv, true); if (err) return err; @@ -254,6 +262,11 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, struct mlx5e_lbt_priv *lbtp) { + if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) { + if (!lbtp->local_lb) + mlx5_nic_vport_update_local_lb(priv->mdev, false); + } + dev_remove_pack(&lbtp->pt); mlx5e_refresh_tirs(priv, false); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index f81c3aa60b46..f8637213afc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -47,7 +47,7 @@ struct counter_desc { char format[ETH_GSTRING_LEN]; - int offset; /* Byte offset */ + size_t offset; /* Byte offset */ }; struct mlx5e_sw_stats { @@ -68,6 +68,7 @@ struct mlx5e_sw_stats { u64 rx_xdp_drop; u64 rx_xdp_tx; u64 rx_xdp_tx_full; + u64 tx_csum_none; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -79,10 +80,12 @@ struct mlx5e_sw_stats { u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; + u64 rx_page_reuse; u64 rx_cache_reuse; u64 rx_cache_full; u64 rx_cache_empty; u64 rx_cache_busy; + u64 rx_cache_waive; /* Special handling counters */ u64 link_down_events_phy; @@ -106,6 +109,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -117,10 +121,12 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; @@ -214,6 +220,12 @@ static const struct counter_desc vport_stats_desc[] = { MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ counter_set.eth_per_prio_grp_data_layout.c##_high) #define NUM_PPORT_PRIO 8 +#define PPORT_ETH_EXT_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) +#define PPORT_ETH_EXT_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) struct mlx5e_pport_stats { __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; @@ -222,6 +234,7 @@ struct mlx5e_pport_stats { __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; static const struct counter_desc pport_802_3_stats_desc[] = { @@ -268,7 +281,7 @@ static const struct counter_desc pport_2819_stats_desc[] = { }; static const struct counter_desc pport_phy_statistical_stats_desc[] = { - { "rx_symbol_errors_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, + { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, }; @@ -288,12 +301,22 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; +static const struct counter_desc pport_eth_ext_stats_desc[] = { + { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) }, +}; + #define PCIE_PERF_OFF(c) \ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) #define PCIE_PERF_GET(pcie_stats, c) \ MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ counter_set.pcie_perf_cntrs_grp_data_layout.c) +#define PCIE_PERF_OFF64(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) +#define PCIE_PERF_GET64(pcie_stats, c) \ + MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ + counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) + struct mlx5e_pcie_stats { __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; }; @@ -303,10 +326,22 @@ static const struct counter_desc pcie_perf_stats_desc[] = { { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, }; +static const struct counter_desc pcie_perf_stats_desc64[] = { + { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) }, +}; + +static const struct counter_desc pcie_perf_stall_stats_desc[] = { + { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) }, + { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) }, + { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) }, + { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) }, +}; + struct mlx5e_rq_stats { u64 packets; u64 bytes; u64 csum_complete; + u64 csum_unnecessary; u64 csum_unnecessary_inner; u64 csum_none; u64 lro_packets; @@ -319,16 +354,19 @@ struct mlx5e_rq_stats { u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; + u64 page_reuse; u64 cache_reuse; u64 cache_full; u64 cache_empty; u64 cache_busy; + u64 cache_waive; }; static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, @@ -341,10 +379,12 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, }; struct mlx5e_sq_stats { @@ -356,6 +396,7 @@ struct mlx5e_sq_stats { u64 tso_bytes; u64 tso_inner_packets; u64 tso_inner_bytes; + u64 csum_partial; u64 csum_partial_inner; u64 nop; /* less likely accessed in data path */ @@ -372,6 +413,7 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, @@ -393,17 +435,29 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PCIE_PERF_COUNTERS(priv) \ (ARRAY_SIZE(pcie_perf_stats_desc) * \ MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) +#define NUM_PCIE_PERF_COUNTERS64(priv) \ + (ARRAY_SIZE(pcie_perf_stats_desc64) * \ + MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) +#define NUM_PCIE_PERF_STALL_COUNTERS(priv) \ + (ARRAY_SIZE(pcie_perf_stall_stats_desc) * \ + MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ ARRAY_SIZE(pport_per_prio_traffic_stats_desc) #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ ARRAY_SIZE(pport_per_prio_pfc_stats_desc) +#define NUM_PPORT_ETH_EXT_COUNTERS(priv) \ + (ARRAY_SIZE(pport_eth_ext_stats_desc) * \ + MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) #define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_802_3_COUNTERS + \ NUM_PPORT_2863_COUNTERS + \ NUM_PPORT_2819_COUNTERS + \ NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \ NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ - NUM_PPORT_PRIO) -#define NUM_PCIE_COUNTERS(priv) NUM_PCIE_PERF_COUNTERS(priv) + NUM_PPORT_PRIO + \ + NUM_PPORT_ETH_EXT_COUNTERS(priv)) +#define NUM_PCIE_COUNTERS(priv) (NUM_PCIE_PERF_COUNTERS(priv) + \ + NUM_PCIE_PERF_COUNTERS64(priv) +\ + NUM_PCIE_PERF_STALL_COUNTERS(priv)) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9df9fc0d26f5..9ba1f72060aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -69,7 +69,8 @@ struct mlx5e_tc_flow { u64 cookie; u8 flags; struct mlx5_flow_handle *rule; - struct list_head encap; /* flows sharing the same encap */ + struct list_head encap; /* flows sharing the same encap ID */ + struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -77,9 +78,11 @@ struct mlx5e_tc_flow { }; struct mlx5e_tc_flow_parse_attr { + struct ip_tunnel_info tun_info; struct mlx5_flow_spec spec; int num_mod_hdr_actions; void *mod_hdr_actions; + int mirred_ifindex; }; enum { @@ -90,6 +93,135 @@ enum { #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 #define MLX5E_TC_TABLE_NUM_GROUPS 4 +struct mod_hdr_key { + int num_actions; + void *actions; +}; + +struct mlx5e_mod_hdr_entry { + /* a node of a hash table which keeps all the mod_hdr entries */ + struct hlist_node mod_hdr_hlist; + + /* flows sharing the same mod_hdr entry */ + struct list_head flows; + + struct mod_hdr_key key; + + u32 mod_hdr_id; +}; + +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) + +static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key) +{ + return jhash(key->actions, + key->num_actions * MLX5_MH_ACT_SZ, 0); +} + +static inline int cmp_mod_hdr_info(struct mod_hdr_key *a, + struct mod_hdr_key *b) +{ + if (a->num_actions != b->num_actions) + return 1; + + return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ); +} + +static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int num_actions, actions_size, namespace, err; + struct mlx5e_mod_hdr_entry *mh; + struct mod_hdr_key key; + bool found = false; + u32 hash_key; + + num_actions = parse_attr->num_mod_hdr_actions; + actions_size = MLX5_MH_ACT_SZ * num_actions; + + key.actions = parse_attr->mod_hdr_actions; + key.num_actions = num_actions; + + hash_key = hash_mod_hdr_info(&key); + + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + namespace = MLX5_FLOW_NAMESPACE_FDB; + hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh, + mod_hdr_hlist, hash_key) { + if (!cmp_mod_hdr_info(&mh->key, &key)) { + found = true; + break; + } + } + } else { + namespace = MLX5_FLOW_NAMESPACE_KERNEL; + hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh, + mod_hdr_hlist, hash_key) { + if (!cmp_mod_hdr_info(&mh->key, &key)) { + found = true; + break; + } + } + } + + if (found) + goto attach_flow; + + mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL); + if (!mh) + return -ENOMEM; + + mh->key.actions = (void *)mh + sizeof(*mh); + memcpy(mh->key.actions, key.actions, actions_size); + mh->key.num_actions = num_actions; + INIT_LIST_HEAD(&mh->flows); + + err = mlx5_modify_header_alloc(priv->mdev, namespace, + mh->key.num_actions, + mh->key.actions, + &mh->mod_hdr_id); + if (err) + goto out_err; + + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); + else + hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); + +attach_flow: + list_add(&flow->mod_hdr, &mh->flows); + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + flow->esw_attr->mod_hdr_id = mh->mod_hdr_id; + else + flow->nic_attr->mod_hdr_id = mh->mod_hdr_id; + + return 0; + +out_err: + kfree(mh); + return err; +} + +static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct list_head *next = flow->mod_hdr.next; + + list_del(&flow->mod_hdr); + + if (list_empty(next)) { + struct mlx5e_mod_hdr_entry *mh; + + mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows); + + mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id); + hash_del(&mh->mod_hdr_hlist); + kfree(mh); + } +} + static struct mlx5_flow_handle * mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -121,10 +253,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - err = mlx5_modify_header_alloc(dev, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr->num_mod_hdr_actions, - parse_attr->mod_hdr_actions, - &attr->mod_hdr_id); + err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); flow_act.modify_id = attr->mod_hdr_id; kfree(parse_attr->mod_hdr_actions); if (err) { @@ -166,8 +295,7 @@ err_add_rule: } err_create_ft: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5_modify_header_dealloc(priv->mdev, - attr->mod_hdr_id); + mlx5e_detach_mod_hdr(priv, flow); err_create_mod_hdr_id: mlx5_fc_destroy(dev, counter); @@ -177,6 +305,7 @@ err_create_mod_hdr_id: static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { + struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_fc *counter = NULL; counter = mlx5_flow_rule_counter(flow->rule); @@ -188,14 +317,19 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, priv->fs.tc.t = NULL; } - if (flow->nic_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5_modify_header_dealloc(priv->mdev, - flow->nic_attr->mod_hdr_id); + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5e_detach_mod_hdr(priv, flow); } static void mlx5e_detach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); +static int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct ip_tunnel_info *tun_info, + struct net_device *mirred_dev, + struct net_device **encap_dev, + struct mlx5e_tc_flow *flow); + static struct mlx5_flow_handle * mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -203,9 +337,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; - struct mlx5_flow_handle *rule; + struct net_device *out_dev, *encap_dev = NULL; + struct mlx5_flow_handle *rule = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; int err; + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + out_dev = __dev_get_by_index(dev_net(priv->netdev), + attr->parse_attr->mirred_ifindex); + err = mlx5e_attach_encap(priv, &parse_attr->tun_info, + out_dev, &encap_dev, flow); + if (err) { + rule = ERR_PTR(err); + if (err != -EAGAIN) + goto err_attach_encap; + } + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; + } + err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) { rule = ERR_PTR(err); @@ -213,10 +365,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - err = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_FDB, - parse_attr->num_mod_hdr_actions, - parse_attr->mod_hdr_actions, - &attr->mod_hdr_id); + err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); kfree(parse_attr->mod_hdr_actions); if (err) { rule = ERR_PTR(err); @@ -224,21 +373,25 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } } - rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); - if (IS_ERR(rule)) - goto err_add_rule; - + /* we get here if (1) there's no error (rule being null) or when + * (2) there's an encap action and we're on -EAGAIN (no valid neigh) + */ + if (rule != ERR_PTR(-EAGAIN)) { + rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); + if (IS_ERR(rule)) + goto err_add_rule; + } return rule; err_add_rule: - if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5_modify_header_dealloc(priv->mdev, - attr->mod_hdr_id); + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5e_detach_mod_hdr(priv, flow); err_mod_hdr: mlx5_eswitch_del_vlan_action(esw, attr); err_add_vlan: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) mlx5e_detach_encap(priv, flow); +err_attach_encap: return rule; } @@ -250,24 +403,25 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule, attr); } - mlx5_eswitch_del_vlan_action(esw, flow->esw_attr); + mlx5_eswitch_del_vlan_action(esw, attr); - if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { mlx5e_detach_encap(priv, flow); - kvfree(flow->esw_attr->parse_attr); + kvfree(attr->parse_attr); } - if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5_modify_header_dealloc(priv->mdev, - attr->mod_hdr_id); + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5e_detach_mod_hdr(priv, flow); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; struct mlx5e_tc_flow *flow; int err; @@ -283,10 +437,9 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, mlx5e_rep_queue_neigh_stats_work(priv); list_for_each_entry(flow, &e->flows, encap) { - flow->esw_attr->encap_id = e->encap_id; - flow->rule = mlx5e_tc_add_fdb_flow(priv, - flow->esw_attr->parse_attr, - flow); + esw_attr = flow->esw_attr; + esw_attr->encap_id = e->encap_id; + flow->rule = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr); if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", @@ -300,15 +453,13 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow *flow; - struct mlx5_fc *counter; list_for_each_entry(flow, &e->flows, encap) { if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - counter = mlx5_flow_rule_counter(flow->rule); - mlx5_del_flow_rules(flow->rule); - mlx5_fc_destroy(priv->mdev, counter); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr); } } @@ -581,7 +732,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | - BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL))) { + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_TCP) | + BIT(FLOW_DISSECTOR_KEY_IP))) { netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", f->dissector->used_keys); return -EOPNOTSUPP; @@ -765,6 +918,34 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *min_inline = MLX5_INLINE_MODE_IP; } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) { + struct flow_dissector_key_ip *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->key); + struct flow_dissector_key_ip *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->mask); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + + if (mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_ipv4_ttl)) + return -EOPNOTSUPP; + + if (mask->tos || mask->ttl) + *min_inline = MLX5_INLINE_MODE_IP; + } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_dissector_key_ports *key = skb_flow_dissector_target(f->dissector, @@ -808,6 +989,25 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *min_inline = MLX5_INLINE_MODE_TCP_UDP; } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_dissector_key_tcp *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_TCP, + f->key); + struct flow_dissector_key_tcp *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_TCP, + f->mask); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(mask->flags)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, + ntohs(key->flags)); + + if (mask->flags) + *min_inline = MLX5_INLINE_MODE_TCP_UDP; + } + return 0; } @@ -888,32 +1088,37 @@ struct mlx5_fields { u32 offset; }; +#define OFFLOAD(fw_field, size, field, off) \ + {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)} + static struct mlx5_fields fields[] = { - {MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_dest[0])}, - {MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_dest[4])}, - {MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_source[0])}, - {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_source[4])}, - {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE, 2, offsetof(struct pedit_headers, eth.h_proto)}, - - {MLX5_ACTION_IN_FIELD_OUT_IP_TTL, 1, offsetof(struct pedit_headers, ip4.ttl)}, - {MLX5_ACTION_IN_FIELD_OUT_SIPV4, 4, offsetof(struct pedit_headers, ip4.saddr)}, - {MLX5_ACTION_IN_FIELD_OUT_DIPV4, 4, offsetof(struct pedit_headers, ip4.daddr)}, - - {MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[0])}, - {MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[1])}, - {MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[2])}, - {MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[3])}, - {MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[0])}, - {MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[1])}, - {MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[2])}, - {MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[3])}, - - {MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT, 2, offsetof(struct pedit_headers, tcp.source)}, - {MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT, 2, offsetof(struct pedit_headers, tcp.dest)}, - {MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS, 1, offsetof(struct pedit_headers, tcp.ack_seq) + 5}, - - {MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT, 2, offsetof(struct pedit_headers, udp.source)}, - {MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT, 2, offsetof(struct pedit_headers, udp.dest)}, + OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0), + OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0), + OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0), + OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0), + OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0), + OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0), + + OFFLOAD(IP_TTL, 1, ip4.ttl, 0), + OFFLOAD(SIPV4, 4, ip4.saddr, 0), + OFFLOAD(DIPV4, 4, ip4.daddr, 0), + + OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0), + OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0), + OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0), + OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0), + OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0), + OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0), + OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0), + OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0), + OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0), + + OFFLOAD(TCP_SPORT, 2, tcp.source, 0), + OFFLOAD(TCP_DPORT, 2, tcp.dest, 0), + OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5), + + OFFLOAD(UDP_SPORT, 2, udp.source, 0), + OFFLOAD(UDP_DPORT, 2, udp.dest, 0), }; /* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at @@ -925,12 +1130,14 @@ static int offload_pedit_fields(struct pedit_headers *masks, struct mlx5e_tc_flow_parse_attr *parse_attr) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; - int i, action_size, nactions, max_actions, first, last, first_z; + int i, action_size, nactions, max_actions, first, last, next_z; void *s_masks_p, *a_masks_p, *vals_p; struct mlx5_fields *f; u8 cmd, field_bsize; u32 s_mask, a_mask; unsigned long mask; + __be32 mask_be32; + __be16 mask_be16; void *action; set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET]; @@ -984,11 +1191,19 @@ static int offload_pedit_fields(struct pedit_headers *masks, field_bsize = f->size * BITS_PER_BYTE; - first_z = find_first_zero_bit(&mask, field_bsize); + if (field_bsize == 32) { + mask_be32 = *(__be32 *)&mask; + mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); + } else if (field_bsize == 16) { + mask_be16 = *(__be16 *)&mask; + mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); + } + first = find_first_bit(&mask, field_bsize); + next_z = find_next_zero_bit(&mask, field_bsize, first); last = find_last_bit(&mask, field_bsize); - if (first > 0 || last != (field_bsize - 1) || first_z < last) { - printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n", + if (first < next_z && next_z < last) { + printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", mask); return -EOPNOTSUPP; } @@ -997,17 +1212,17 @@ static int offload_pedit_fields(struct pedit_headers *masks, MLX5_SET(set_action_in, action, field, f->field); if (cmd == MLX5_ACTION_TYPE_SET) { - MLX5_SET(set_action_in, action, offset, 0); + MLX5_SET(set_action_in, action, offset, first); /* length is num of bits to be written, zero means length of 32 */ - MLX5_SET(set_action_in, action, length, field_bsize); + MLX5_SET(set_action_in, action, length, (last - first + 1)); } if (field_bsize == 32) - MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p)); + MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first); else if (field_bsize == 16) - MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p)); + MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first); else if (field_bsize == 8) - MLX5_SET(set_action_in, action, data, *(u8 *)vals_p); + MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); action += action_size; nactions++; @@ -1132,6 +1347,69 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 upda return true; } +static bool modify_header_match_supported(struct mlx5_flow_spec *spec, + struct tcf_exts *exts) +{ + const struct tc_action *a; + bool modify_ip_header; + LIST_HEAD(actions); + u8 htype, ip_proto; + void *headers_v; + u16 ethertype; + int nkeys, i; + + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); + + /* for non-IP we only re-write MACs, so we're okay */ + if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) + goto out_ok; + + modify_ip_header = false; + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + if (!is_tcf_pedit(a)) + continue; + + nkeys = tcf_pedit_nkeys(a); + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || + htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { + modify_ip_header = true; + break; + } + } + } + + ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); + if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { + pr_info("can't offload re-write of ip proto %d\n", ip_proto); + return false; + } + +out_ok: + return true; +} + +static bool actions_match_supported(struct mlx5e_priv *priv, + struct tcf_exts *exts, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) +{ + u32 actions; + + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + actions = flow->esw_attr->action; + else + actions = flow->nic_attr->action; + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + return modify_header_match_supported(&parse_attr->spec, exts); + + return true; +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow) @@ -1141,7 +1419,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, LIST_HEAD(actions); int err; - if (tc_no_actions(exts)) + if (!tcf_exts_has_actions(exts)) return -EINVAL; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; @@ -1149,10 +1427,6 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { - /* Only support a single action per rule */ - if (attr->action) - return -EINVAL; - if (is_tcf_gact_shot(a)) { attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, @@ -1197,6 +1471,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + if (!actions_match_supported(priv, exts, parse_attr, flow)) + return -EOPNOTSUPP; + return 0; } @@ -1262,12 +1539,10 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int ret; - dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6); - ret = dst->error; - if (ret) { - dst_release(dst); + ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, + fl6); + if (ret < 0) return ret; - } *out_ttl = ip6_dst_hoplimit(dst); @@ -1385,7 +1660,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto free_encap; } fl4.flowi4_tos = tun_key->tos; fl4.daddr = tun_key->u.ipv4.dst; @@ -1394,7 +1669,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &fl4, &n, &ttl); if (err) - goto out; + goto free_encap; /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -1411,7 +1686,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, */ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); if (err) - goto out; + goto free_encap; read_lock_bh(&n->lock); nud_state = n->nud_state; @@ -1435,8 +1710,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, if (!(nud_state & NUD_VALID)) { neigh_event_send(n, NULL); - neigh_release(n); - return -EAGAIN; + err = -EAGAIN; + goto out; } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, @@ -1451,8 +1726,9 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, destroy_neigh_entry: mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -out: +free_encap: kfree(encap_header); +out: if (n) neigh_release(n); return err; @@ -1489,7 +1765,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto free_encap; } fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); @@ -1499,7 +1775,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &fl6, &n, &ttl); if (err) - goto out; + goto free_encap; /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -1516,7 +1792,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, */ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); if (err) - goto out; + goto free_encap; read_lock_bh(&n->lock); nud_state = n->nud_state; @@ -1541,8 +1817,8 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, if (!(nud_state & NUD_VALID)) { neigh_event_send(n, NULL); - neigh_release(n); - return -EAGAIN; + err = -EAGAIN; + goto out; } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, @@ -1557,8 +1833,9 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, destroy_neigh_entry: mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -out: +free_encap: kfree(encap_header); +out: if (n) neigh_release(n); return err; @@ -1612,6 +1889,7 @@ vxlan_encap_offload_err: } } + /* must verify if encap is valid or not */ if (found) goto attach_flow; @@ -1638,6 +1916,8 @@ attach_flow: *encap_dev = e->out_dev; if (e->flags & MLX5_ENCAP_ENTRY_VALID) attr->encap_id = e->encap_id; + else + err = -EAGAIN; return err; @@ -1658,7 +1938,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, bool encap = false; int err = 0; - if (tc_no_actions(exts)) + if (!tcf_exts_has_actions(exts)) return -EINVAL; memset(attr, 0, sizeof(*attr)); @@ -1692,7 +1972,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); - struct net_device *out_dev, *encap_dev = NULL; + struct net_device *out_dev; struct mlx5e_priv *out_priv; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -1705,17 +1985,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, rpriv = out_priv->ppriv; attr->out_rep = rpriv->rep; } else if (encap) { - err = mlx5e_attach_encap(priv, info, - out_dev, &encap_dev, flow); - if (err && err != -EAGAIN) - return err; + parse_attr->mirred_ifindex = ifindex; + parse_attr->tun_info = *info; + attr->parse_attr = parse_attr; attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - out_priv = netdev_priv(encap_dev); - rpriv = out_priv->ppriv; - attr->out_rep = rpriv->rep; - attr->parse_attr = parse_attr; + /* attr->out_rep is resolved when we handle encap */ } else { pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); @@ -1755,10 +2031,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + + if (!actions_match_supported(priv, exts, parse_attr, flow)) + return -EOPNOTSUPP; + return err; } -int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, +int mlx5e_configure_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -1777,7 +2057,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, } flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); - parse_attr = mlx5_vzalloc(sizeof(*parse_attr)); + parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); if (!parse_attr || !flow) { err = -ENOMEM; goto err_free; @@ -1793,7 +2073,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow); if (err < 0) - goto err_handle_encap_flow; + goto err_free; flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); } else { err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); @@ -1804,10 +2084,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_free; + if (err != -EAGAIN) + goto err_free; } - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + if (err != -EAGAIN) + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + err = rhashtable_insert_fast(&tc->ht, &flow->node, tc->ht_params); if (err) @@ -1821,16 +2104,6 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, err_del_rule: mlx5e_tc_del_flow(priv, flow); -err_handle_encap_flow: - if (err == -EAGAIN) { - err = rhashtable_insert_fast(&tc->ht, &flow->node, - tc->ht_params); - if (err) - mlx5e_tc_del_flow(priv, flow); - else - return 0; - } - err_free: kvfree(parse_attr); kfree(flow); @@ -1862,9 +2135,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, { struct mlx5e_tc_table *tc = &priv->fs.tc; struct mlx5e_tc_flow *flow; - struct tc_action *a; struct mlx5_fc *counter; - LIST_HEAD(actions); u64 bytes; u64 packets; u64 lastuse; @@ -1883,13 +2154,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); - preempt_disable(); - - tcf_exts_to_list(f->exts, &actions); - list_for_each_entry(a, &actions, list) - tcf_action_stats_update(a, bytes, packets, lastuse); - - preempt_enable(); + tcf_exts_stats_update(f->exts, bytes, packets, lastuse); return 0; } @@ -1905,6 +2170,8 @@ int mlx5e_tc_init(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = &priv->fs.tc; + hash_init(tc->mod_hdr_tbl); + tc->ht_params = mlx5e_tc_flow_ht_params; return rhashtable_init(&tc->ht, &tc->ht_params); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index ecbe30d808ae..c14c263a739b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -33,12 +33,15 @@ #ifndef __MLX5_EN_TC_H__ #define __MLX5_EN_TC_H__ +#include <net/pkt_cls.h> + #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff +#ifdef CONFIG_MLX5_ESWITCH int mlx5e_tc_init(struct mlx5e_priv *priv); void mlx5e_tc_cleanup(struct mlx5e_priv *priv); -int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, +int mlx5e_configure_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); int mlx5e_delete_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); @@ -60,4 +63,10 @@ static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) return atomic_read(&priv->fs.tc.ht.nelems); } +#else /* CONFIG_MLX5_ESWITCH */ +static inline int mlx5e_tc_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tc_cleanup(struct mlx5e_priv *priv) {} +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; } +#endif + #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index ab3bb026ff9e..1d6925d4369a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -33,7 +33,8 @@ #include <linux/tcp.h> #include <linux/if_vlan.h> #include "en.h" -#include "ipoib.h" +#include "ipoib/ipoib.h" +#include "en_accel/ipsec_rxtx.h" #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ @@ -127,10 +128,10 @@ static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) return mlx5e_skb_l2_header_offset(skb); } -static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, - struct sk_buff *skb) +static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, + struct sk_buff *skb) { - int hlen; + u16 hlen; switch (mode) { case MLX5_INLINE_MODE_NONE: @@ -139,19 +140,22 @@ static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, hlen = eth_get_headlen(skb->data, skb_headlen(skb)); if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) hlen += VLAN_HLEN; - return hlen; + break; case MLX5_INLINE_MODE_IP: /* When transport header is set to zero, it means no transport * header. When transport header is set to 0xff's, it means * transport header wasn't set. */ - if (skb_transport_offset(skb)) - return mlx5e_skb_l3_header_offset(skb); + if (skb_transport_offset(skb)) { + hlen = mlx5e_skb_l3_header_offset(skb); + break; + } /* fall through */ case MLX5_INLINE_MODE_L2: default: - return mlx5e_skb_l2_header_offset(skb); + hlen = mlx5e_skb_l2_header_offset(skb); } + return min_t(u16, hlen, skb->len); } static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, @@ -189,6 +193,7 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct sq->stats.csum_partial_inner++; } else { eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; + sq->stats.csum_partial++; } } else sq->stats.csum_none++; @@ -245,7 +250,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, - DMA_TO_DEVICE); + DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) return -ENOMEM; @@ -299,12 +304,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, } } -static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb) +static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_tx_wqe *wqe, u16 pi) { - struct mlx5_wq_cyc *wq = &sq->wq; - - u16 pi = sq->pc & wq->sz_m1; - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; @@ -319,8 +321,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb) u16 ds_cnt; u16 ihs; - memset(wqe, 0, sizeof(*wqe)); - mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); if (skb_is_gso(skb)) { @@ -375,13 +375,27 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)]; + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - return mlx5e_sq_xmit(sq, skb); + memset(wqe, 0, sizeof(*wqe)); + +#ifdef CONFIG_MLX5_EN_IPSEC + if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) { + skb = mlx5e_ipsec_handle_tx_skb(dev, wqe, skb); + if (unlikely(!skb)) + return NETDEV_TX_OK; + } +#endif + + return mlx5e_sq_xmit(sq, skb, wqe, pi); } bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_txqsq *sq; + struct mlx5_cqe64 *cqe; u32 dma_fifo_cc; u32 nbytes; u16 npkts; @@ -390,7 +404,11 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_txqsq, cq); - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) return false; npkts = 0; @@ -404,15 +422,11 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) /* avoid dirtying sq cache line every cqe */ dma_fifo_cc = sq->dma_fifo_cc; - for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { - struct mlx5_cqe64 *cqe; + i = 0; + do { u16 wqe_counter; bool last_wqe; - cqe = mlx5e_get_cqe(cq); - if (!cqe) - break; - mlx5_cqwq_pop(&cq->wq); wqe_counter = be16_to_cpu(cqe->wqe_counter); @@ -455,7 +469,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sqcc += wi->num_wqebbs; napi_consume_skb(skb, napi_budget); } while (!last_wqe); - } + + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); mlx5_cqwq_update_db_record(&cq->wq); @@ -557,11 +572,16 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, if (skb_is_gso(skb)) { opcode = MLX5_OPCODE_LSO; ihs = mlx5e_txwqe_build_eseg_gso(sq, skb, eseg, &num_bytes); + sq->stats.packets += skb_shinfo(skb)->gso_segs; } else { ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + sq->stats.packets++; } + sq->stats.bytes += num_bytes; + sq->stats.xmit_more += skb->xmit_more; + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; if (ihs) { memcpy(eseg->inline_hdr.start, skb_data, ihs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 5ca6714e3e02..e906b754415c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -30,83 +30,18 @@ * SOFTWARE. */ +#include <linux/irq.h> #include "en.h" -struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) +static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) { - struct mlx5_cqwq *wq = &cq->wq; - u32 ci = mlx5_cqwq_get_ci(wq); - struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); - u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; - u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; + int current_cpu = smp_processor_id(); + const struct cpumask *aff; + struct irq_data *idata; - if (cqe_ownership_bit != sw_ownership_val) - return NULL; - - /* ensure cqe content is read after cqe ownership bit */ - dma_rmb(); - - return cqe; -} - -static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, - struct mlx5e_icosq *sq, - struct mlx5_cqe64 *cqe, - u16 *sqcc) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; - struct mlx5e_rq *rq = &sq->channel->rq; - - prefetch(rq); - mlx5_cqwq_pop(&cq->wq); - *sqcc += icowi->num_wqebbs; - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { - WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", - cqe->op_own); - return; - } - - if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { - mlx5e_post_rx_mpwqe(rq); - return; - } - - if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) - WARN_ONCE(true, - "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", - icowi->opcode); -} - -static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) -{ - struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); - struct mlx5_cqe64 *cqe; - u16 sqcc; - - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) - return; - - cqe = mlx5e_get_cqe(cq); - if (likely(!cqe)) - return; - - /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), - * otherwise a cq overrun may occur - */ - sqcc = sq->cc; - - /* by design, there's only a single cqe */ - mlx5e_poll_ico_single_cqe(cq, sq, cqe, &sqcc); - - mlx5_cqwq_update_db_record(&cq->wq); - - /* ensure cq space is freed before enabling more cqes */ - wmb(); - - sq->cc = sqcc; + idata = irq_desc_get_irq_data(c->irq_desc); + aff = irq_data_get_affinity_mask(idata); + return cpumask_test_cpu(current_cpu, aff); } int mlx5e_napi_poll(struct napi_struct *napi, int budget) @@ -117,8 +52,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) int work_done; int i; - clear_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); - for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); @@ -128,25 +61,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; - mlx5e_poll_ico_cq(&c->icosq.cq); - - busy |= mlx5e_post_rx_wqes(&c->rq); + busy |= c->rq.post_wqes(&c->rq); - if (busy) - return budget; - - napi_complete_done(napi, work_done); + if (busy) { + if (likely(mlx5e_channel_no_affinity_change(c))) + return budget; + if (work_done == budget) + work_done--; + } - /* avoid losing completion event during/after polling cqs */ - if (test_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags)) { - napi_schedule(napi); + if (unlikely(!napi_complete_done(napi, work_done))) return work_done; - } for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); - if (test_bit(MLX5E_RQ_STATE_AM, &c->rq.state)) + if (MLX5E_TEST_BIT(c->rq.state, MLX5E_RQ_STATE_AM)) mlx5e_rx_am(&c->rq); mlx5e_cq_arm(&c->rq.cq); @@ -160,7 +90,6 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq) struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); cq->event_ctr++; - set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); napi_schedule(cq->napi); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 33eae5ad2fb0..fc606bfd1d6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -35,9 +35,8 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" -#ifdef CONFIG_MLX5_CORE_EN +#include "fpga/core.h" #include "eswitch.h" -#endif enum { MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), @@ -156,6 +155,12 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_PAGE_FAULT"; case MLX5_EVENT_TYPE_PPS_EVENT: return "MLX5_EVENT_TYPE_PPS_EVENT"; + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; + case MLX5_EVENT_TYPE_FPGA_ERROR: + return "MLX5_EVENT_TYPE_FPGA_ERROR"; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + return "MLX5_EVENT_TYPE_GENERAL_EVENT"; default: return "Unrecognized event"; } @@ -186,7 +191,8 @@ static void eq_update_ci(struct mlx5_eq *eq, int arm) { __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); - __raw_writel((__force u32) cpu_to_be32(val), addr); + + __raw_writel((__force u32)cpu_to_be32(val), addr); /* We still want ordering, just not swabbing, so add a barrier */ mb(); } @@ -373,6 +379,20 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); #endif +static void general_event_handler(struct mlx5_core_dev *dev, + struct mlx5_eqe *eqe) +{ + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: + if (dev->event) + dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0); + break; + default: + mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", + eqe->sub_type); + } +} + static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) { struct mlx5_eq *eq = eq_ptr; @@ -462,11 +482,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) } break; -#ifdef CONFIG_MLX5_CORE_EN case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); break; -#endif case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: mlx5_port_module_event(dev, eqe); @@ -476,6 +494,14 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) if (dev->event) dev->event(dev, MLX5_DEV_EVENT_PPS, (unsigned long)eqe); break; + + case MLX5_EVENT_TYPE_FPGA_ERROR: + mlx5_fpga_event(dev, eqe->type, &eqe->data.raw); + break; + + case MLX5_EVENT_TYPE_GENERAL_EVENT: + general_event_handler(dev, eqe); + break; default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); @@ -548,7 +574,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, inlen = MLX5_ST_SZ_BYTES(create_eq_in) + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_buf; @@ -575,7 +601,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, name, pci_name(dev->pdev)); eq->eqn = MLX5_GET(create_eq_out, out, eq_number); - eq->irqn = priv->msix_arr[vecidx].vector; + eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; err = request_irq(eq->irqn, handler, 0, @@ -610,7 +636,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, return 0; err_irq: - free_irq(priv->msix_arr[vecidx].vector, eq); + free_irq(eq->irqn, eq); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -651,11 +677,6 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) } EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq); -u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx) -{ - return dev->priv.msix_arr[MLX5_EQ_VEC_ASYNC].vector; -} - int mlx5_eq_init(struct mlx5_core_dev *dev) { int err; @@ -667,7 +688,6 @@ int mlx5_eq_init(struct mlx5_core_dev *dev) return err; } - void mlx5_eq_cleanup(struct mlx5_core_dev *dev) { mlx5_eq_debugfs_cleanup(dev); @@ -679,20 +699,24 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; + if (MLX5_VPORT_MANAGER(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && - MLX5_CAP_GEN(dev, vport_group_manager) && - mlx5_core_is_pf(dev)) - async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); + MLX5_CAP_GEN(dev, general_notification_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT); if (MLX5_CAP_GEN(dev, port_module_event)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT); else mlx5_core_dbg(dev, "port_module_event is not set\n"); - if (MLX5_CAP_GEN(dev, pps)) + if (MLX5_PPS_CAP(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); + if (MLX5_CAP_GEN(dev, fpga)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2e34d95ea776..c77f4c0c7769 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -46,19 +46,13 @@ enum { MLX5_ACTION_DEL = 2, }; -/* E-Switch UC L2 table hash node */ -struct esw_uc_addr { - struct l2addr_node node; - u32 table_index; - u32 vport; -}; - /* Vport UC/MC hash node */ struct vport_addr { struct l2addr_node node; u8 action; u32 vport; - struct mlx5_flow_handle *flow_rule; /* SRIOV only */ + struct mlx5_flow_handle *flow_rule; + bool mpfs; /* UC MAC was added to MPFs */ /* A flag indicating that mac was added due to mc promiscuous vport */ bool mc_promisc; }; @@ -154,81 +148,6 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); } -/* HW L2 Table (MPFS) management */ -static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, - u8 *mac, u8 vlan_valid, u16 vlan) -{ - u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0}; - u8 *in_mac_addr; - - MLX5_SET(set_l2_table_entry_in, in, opcode, - MLX5_CMD_OP_SET_L2_TABLE_ENTRY); - MLX5_SET(set_l2_table_entry_in, in, table_index, index); - MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid); - MLX5_SET(set_l2_table_entry_in, in, vlan, vlan); - - in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); - ether_addr_copy(&in_mac_addr[2], mac); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) -{ - u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0}; - - MLX5_SET(delete_l2_table_entry_in, in, opcode, - MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); - MLX5_SET(delete_l2_table_entry_in, in, table_index, index); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) -{ - int err = 0; - - *ix = find_first_zero_bit(l2_table->bitmap, l2_table->size); - if (*ix >= l2_table->size) - err = -ENOSPC; - else - __set_bit(*ix, l2_table->bitmap); - - return err; -} - -static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix) -{ - __clear_bit(ix, l2_table->bitmap); -} - -static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac, - u8 vlan_valid, u16 vlan, - u32 *index) -{ - struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; - int err; - - err = alloc_l2_table_index(l2_table, index); - if (err) - return err; - - err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan); - if (err) - free_l2_table_index(l2_table, *index); - - return err; -} - -static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) -{ - struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; - - del_l2_table_entry_cmd(dev, index); - free_l2_table_index(l2_table, index); -} - /* E-Switch FDB */ static struct mlx5_flow_handle * __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, @@ -248,11 +167,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, if (rx_rule) match_header |= MLX5_MATCH_MISC_PARAMETERS; - spec = mlx5_vzalloc(sizeof(*spec)); - if (!spec) { - esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) return NULL; - } + dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dmac_47_16); dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -350,10 +268,9 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) return -EOPNOTSUPP; } - flow_group_in = mlx5_vzalloc(inlen); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; - memset(flow_group_in, 0, inlen); table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); @@ -457,65 +374,60 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { - struct hlist_head *hash = esw->l2_table.l2_hash; - struct esw_uc_addr *esw_uc; u8 *mac = vaddr->node.addr; u32 vport = vaddr->vport; int err; - esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); - if (esw_uc) { + /* Skip mlx5_mpfs_add_mac for PFs, + * it is already done by the PF netdev in mlx5e_execute_l2_action + */ + if (!vport) + goto fdb_add; + + err = mlx5_mpfs_add_mac(esw->dev, mac); + if (err) { esw_warn(esw->dev, - "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", - mac, vport, esw_uc->vport); - return -EEXIST; + "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n", + mac, vport, err); + return err; } + vaddr->mpfs = true; - esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL); - if (!esw_uc) - return -ENOMEM; - esw_uc->vport = vport; - - err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index); - if (err) - goto abort; - +fdb_add: /* SRIOV is enabled: Forward UC MAC to vport */ if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); - esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", - vport, mac, esw_uc->table_index, vaddr->flow_rule); - return err; -abort: - l2addr_hash_del(esw_uc); - return err; + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", + vport, mac, vaddr->flow_rule); + + return 0; } static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { - struct hlist_head *hash = esw->l2_table.l2_hash; - struct esw_uc_addr *esw_uc; u8 *mac = vaddr->node.addr; u32 vport = vaddr->vport; + int err = 0; - esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); - if (!esw_uc || esw_uc->vport != vport) { - esw_debug(esw->dev, - "MAC(%pM) doesn't belong to vport (%d)\n", - mac, vport); - return -EINVAL; - } - esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n", - vport, mac, esw_uc->table_index, vaddr->flow_rule); + /* Skip mlx5_mpfs_del_mac for PFs, + * it is already done by the PF netdev in mlx5e_execute_l2_action + */ + if (!vport || !vaddr->mpfs) + goto fdb_del; - del_l2_table_entry(esw->dev, esw_uc->table_index); + err = mlx5_mpfs_del_mac(esw->dev, mac); + if (err) + esw_warn(esw->dev, + "Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n", + mac, vport, err); + vaddr->mpfs = false; +fdb_del: if (vaddr->flow_rule) mlx5_del_flow_rules(vaddr->flow_rule); vaddr->flow_rule = NULL; - l2addr_hash_del(esw_uc); return 0; } @@ -961,7 +873,7 @@ static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, return -EOPNOTSUPP; } - flow_group_in = mlx5_vzalloc(inlen); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -1078,7 +990,7 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, return -EOPNOTSUPP; } - flow_group_in = mlx5_vzalloc(inlen); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -1219,7 +1131,6 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", vport->vport); return -EPERM; - } esw_vport_cleanup_ingress_rules(esw, vport); @@ -1241,11 +1152,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", vport->vport, vport->info.vlan, vport->info.qos); - spec = mlx5_vzalloc(sizeof(*spec)); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { err = -ENOMEM; - esw_warn(esw->dev, "vport[%d] configure ingress rules failed, err(%d)\n", - vport->vport, err); goto out; } @@ -1322,11 +1231,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", vport->vport, vport->info.vlan, vport->info.qos); - spec = mlx5_vzalloc(sizeof(*spec)); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { err = -ENOMEM; - esw_warn(esw->dev, "vport[%d] configure egress rules failed, err(%d)\n", - vport->vport, err); goto out; } @@ -1592,7 +1499,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) /* Mark this vport as disabled to discard new events */ vport->enabled = false; - synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC)); + synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC)); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ @@ -1618,13 +1525,14 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) } /* Public E-Switch API */ +#define ESW_ALLOWED(esw) ((esw) && MLX5_VPORT_MANAGER((esw)->dev)) + int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; int i, enabled_events; - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + if (!ESW_ALLOWED(esw)) return 0; if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || @@ -1641,7 +1549,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); esw->mode = mode; - esw_disable_vport(esw, 0); if (mode == SRIOV_LEGACY) err = esw_create_legacy_fdb_table(esw, nvfs + 1); @@ -1654,7 +1561,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (err) esw_warn(esw->dev, "Failed to create eswitch TSAR"); - enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE; + /* Don't enable vport events when in SRIOV_OFFLOADS mode, since: + * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode + * 2. FDB/Eswitch is programmed by user space tools + */ + enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; for (i = 0; i <= nvfs; i++) esw_enable_vport(esw, i, enabled_events); @@ -1663,7 +1574,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) return 0; abort: - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); esw->mode = SRIOV_NONE; return err; } @@ -1674,8 +1584,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) int nvports; int i; - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) return; esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", @@ -1698,44 +1607,21 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_offloads_cleanup(esw, nvports); esw->mode = SRIOV_NONE; - /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); -} - -void mlx5_eswitch_attach(struct mlx5_eswitch *esw) -{ - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return; - - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); - /* VF Vports will be enabled when SRIOV is enabled */ -} - -void mlx5_eswitch_detach(struct mlx5_eswitch *esw) -{ - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return; - - esw_disable_vport(esw, 0); } int mlx5_eswitch_init(struct mlx5_core_dev *dev) { - int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; int vport_num; int err; - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + if (!MLX5_VPORT_MANAGER(dev)) return 0; esw_info(dev, - "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n", - total_vports, l2_table_size, + "Total vports %d, per vport: max uc(%d) max mc(%d)\n", + total_vports, MLX5_MAX_UC_PER_VPORT(dev), MLX5_MAX_MC_PER_VPORT(dev)); @@ -1745,14 +1631,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->dev = dev; - esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size), - sizeof(uintptr_t), GFP_KERNEL); - if (!esw->l2_table.bitmap) { - err = -ENOMEM; - goto abort; - } - esw->l2_table.size = l2_table_size; - esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { err = -ENOMEM; @@ -1775,6 +1653,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) } hash_init(esw->offloads.encap_tbl); + hash_init(esw->offloads.mod_hdr_tbl); mutex_init(&esw->state_lock); for (vport_num = 0; vport_num < total_vports; vport_num++) { @@ -1802,7 +1681,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) abort: if (esw->work_queue) destroy_workqueue(esw->work_queue); - kfree(esw->l2_table.bitmap); kfree(esw->vports); kfree(esw->offloads.vport_reps); kfree(esw); @@ -1811,15 +1689,13 @@ abort: void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) { - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) return; esw_info(esw->dev, "cleanup\n"); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); - kfree(esw->l2_table.bitmap); kfree(esw->offloads.vport_reps); kfree(esw->vports); kfree(esw); @@ -1843,8 +1719,6 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) } /* Vport Administration */ -#define ESW_ALLOWED(esw) \ - (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, @@ -2158,7 +2032,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - out = mlx5_vzalloc(outlen); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b746f62c8c79..565c8b7a399a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -37,6 +37,15 @@ #include <linux/if_link.h> #include <net/devlink.h> #include <linux/mlx5/device.h> +#include "lib/mpfs.h" + +enum { + SRIOV_NONE, + SRIOV_LEGACY, + SRIOV_OFFLOADS +}; + +#ifdef CONFIG_MLX5_ESWITCH #define MLX5_MAX_UC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) @@ -44,9 +53,6 @@ #define MLX5_MAX_MC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) -#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) -#define MLX5_L2_ADDR_HASH(addr) (addr[5]) - #define FDB_UPLINK_VPORT 0xffff #define MLX5_MIN_BW_SHARE 1 @@ -54,48 +60,6 @@ #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit) -/* L2 -mac address based- hash helpers */ -struct l2addr_node { - struct hlist_node hlist; - u8 addr[ETH_ALEN]; -}; - -#define for_each_l2hash_node(hn, tmp, hash, i) \ - for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ - hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) - -#define l2addr_hash_find(hash, mac, type) ({ \ - int ix = MLX5_L2_ADDR_HASH(mac); \ - bool found = false; \ - type *ptr = NULL; \ - \ - hlist_for_each_entry(ptr, &hash[ix], node.hlist) \ - if (ether_addr_equal(ptr->node.addr, mac)) {\ - found = true; \ - break; \ - } \ - if (!found) \ - ptr = NULL; \ - ptr; \ -}) - -#define l2addr_hash_add(hash, mac, type, gfp) ({ \ - int ix = MLX5_L2_ADDR_HASH(mac); \ - type *ptr = NULL; \ - \ - ptr = kzalloc(sizeof(type), gfp); \ - if (ptr) { \ - ether_addr_copy(ptr->node.addr, mac); \ - hlist_add_head(&ptr->node.hlist, &hash[ix]);\ - } \ - ptr; \ -}) - -#define l2addr_hash_del(ptr) ({ \ - hlist_del(&ptr->node.hlist); \ - kfree(ptr); \ -}) - struct vport_ingress { struct mlx5_flow_table *acl; struct mlx5_flow_group *allow_untagged_spoofchk_grp; @@ -150,12 +114,6 @@ struct mlx5_vport { u16 enabled_events; }; -struct mlx5_l2_table { - struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE]; - u32 size; - unsigned long *bitmap; -}; - struct mlx5_eswitch_fdb { void *fdb; union { @@ -175,12 +133,6 @@ struct mlx5_eswitch_fdb { }; }; -enum { - SRIOV_NONE, - SRIOV_LEGACY, - SRIOV_OFFLOADS -}; - struct mlx5_esw_sq { struct mlx5_flow_handle *send_to_vport_rule; struct list_head list; @@ -207,6 +159,7 @@ struct mlx5_esw_offload { struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; DECLARE_HASHTABLE(encap_tbl, 8); + DECLARE_HASHTABLE(mod_hdr_tbl, 8); u8 inline_mode; u64 num_flows; u8 encap; @@ -221,7 +174,6 @@ struct esw_mc_addr { /* SRIOV only */ struct mlx5_eswitch { struct mlx5_core_dev *dev; - struct mlx5_l2_table l2_table; struct mlx5_eswitch_fdb fdb_table; struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; @@ -249,8 +201,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -void mlx5_eswitch_attach(struct mlx5_eswitch *esw); -void mlx5_eswitch_detach(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); @@ -344,4 +294,13 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) +#else /* CONFIG_MLX5_ESWITCH */ +/* eswitch API stubs */ +static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} +static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {} +static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } +static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} +#endif /* CONFIG_MLX5_ESWITCH */ + #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a53e982a6863..d9fd8570b07c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -311,9 +311,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn struct mlx5_flow_spec *spec; void *misc; - spec = mlx5_vzalloc(sizeof(*spec)); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { - esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); flow_rule = ERR_PTR(-ENOMEM); goto out; } @@ -401,9 +400,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) struct mlx5_flow_spec *spec; int err = 0; - spec = mlx5_vzalloc(sizeof(*spec)); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { - esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); err = -ENOMEM; goto out; } @@ -435,6 +433,8 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) struct mlx5_flow_table *fdb = NULL; int esw_size, err = 0; u32 flags = 0; + u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { @@ -445,9 +445,9 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), - MLX5_CAP_GEN(dev, max_flow_counter), ESW_OFFLOADS_NUM_GROUPS); + max_flow_counter, ESW_OFFLOADS_NUM_GROUPS); - esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS, + esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS, 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) @@ -488,7 +488,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) u32 *flow_group_in; esw_debug(esw->dev, "Create offloads FDB Tables\n"); - flow_group_in = mlx5_vzalloc(inlen); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -631,7 +631,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) int err = 0; int nvports = priv->sriov.num_vfs + 2; - flow_group_in = mlx5_vzalloc(inlen); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -675,9 +675,8 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) struct mlx5_flow_spec *spec; void *misc; - spec = mlx5_vzalloc(sizeof(*spec)); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { - esw_warn(esw->dev, "Failed to alloc match parameters\n"); flow_rule = ERR_PTR(-ENOMEM); goto out; } @@ -694,7 +693,7 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, - &flow_act, &dest, 1); + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); goto out; @@ -818,7 +817,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) struct mlx5_eswitch_rep *rep; int vport; - for (vport = 0; vport < nvports; vport++) { + for (vport = nvports - 1; vport >= 0; vport--) { rep = &esw->offloads.vport_reps[vport]; if (!rep->valid) continue; @@ -1100,7 +1099,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap) if (err) { esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err); esw->offloads.encap = !encap; - (void) esw_create_offloads_fast_fdb_table(esw); + (void)esw_create_offloads_fast_fdb_table(esw); } return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile new file mode 100644 index 000000000000..d8e17110f25d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c new file mode 100644 index 000000000000..c0fd2212e890 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/cmd.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> + +#include "mlx5_core.h" +#include "fpga/cmd.h" + +#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \ + MLX5_FPGA_ACCESS_REG_SIZE_MAX) + +int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, + void *buf, bool write) +{ + u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0}; + u32 out[MLX5_FPGA_ACCESS_REG_SZ]; + int err; + + if (size & 3) + return -EINVAL; + if (addr & 3) + return -EINVAL; + if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX) + return -EINVAL; + + MLX5_SET(fpga_access_reg, in, size, size); + MLX5_SET64(fpga_access_reg, in, address, addr); + if (write) + memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_ACCESS_REG, 0, write); + if (err) + return err; + + if (!write) + memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size); + + return 0; +} + +int mlx5_fpga_caps(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0}; + + return mlx5_core_access_reg(dev, in, sizeof(in), dev->caps.fpga, + MLX5_ST_SZ_BYTES(fpga_cap), + MLX5_REG_FPGA_CAP, 0, 0); +} + +int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op) +{ + u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_ctrl)]; + + MLX5_SET(fpga_ctrl, in, operation, op); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_CTRL, 0, true); +} + +int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size) +{ + unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len); + u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr); + unsigned int read; + int ret = 0; + + if (cap_size > size) { + mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u", + size, cap_size); + return -EINVAL; + } + + while (cap_size > 0) { + read = min_t(unsigned int, cap_size, + MLX5_FPGA_ACCESS_REG_SIZE_MAX); + + ret = mlx5_fpga_access_reg(dev, read, addr, caps, false); + if (ret) { + mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d", + read, addr, ret); + return ret; + } + + cap_size -= read; + addr += read; + caps += read; + } + + return ret; +} + +int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query) +{ + u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_ctrl)]; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_CTRL, 0, false); + if (err) + return err; + + query->status = MLX5_GET(fpga_ctrl, out, status); + query->admin_image = MLX5_GET(fpga_ctrl, out, flash_select_admin); + query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper); + return 0; +} + +int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc, + u32 *fpga_qpn) +{ + u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)]; + int ret; + + MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP); + memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc, + MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc), + MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc)); + *fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn); + return ret; +} + +int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, + enum mlx5_fpga_qpc_field_select fields, + void *fpga_qpc) +{ + u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)]; + + MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP); + MLX5_SET(fpga_modify_qp_in, in, field_select, fields); + MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn); + memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc, + MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc)); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, + u32 fpga_qpn, void *fpga_qpc) +{ + u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)]; + int ret; + + MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP); + MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, out, fpga_qpc), + MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc)); + return ret; +} + +int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn) +{ + u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_destroy_qp_out)]; + + MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP); + MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn, + bool clear, struct mlx5_fpga_qp_counters *data) +{ + u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)]; + int ret; + + MLX5_SET(fpga_query_qp_counters_in, in, opcode, + MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS); + MLX5_SET(fpga_query_qp_counters_in, in, clear, clear); + MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_ack_packets); + data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_send_packets); + data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + tx_ack_packets); + data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + tx_send_packets); + data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_total_drop); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h new file mode 100644 index 000000000000..d05233c9b4f6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_FPGA_H__ +#define __MLX5_FPGA_H__ + +#include <linux/mlx5/driver.h> + +enum mlx5_fpga_image { + MLX5_FPGA_IMAGE_USER = 0, + MLX5_FPGA_IMAGE_FACTORY, +}; + +enum mlx5_fpga_status { + MLX5_FPGA_STATUS_SUCCESS = 0, + MLX5_FPGA_STATUS_FAILURE = 1, + MLX5_FPGA_STATUS_IN_PROGRESS = 2, + MLX5_FPGA_STATUS_NONE = 0xFFFF, +}; + +struct mlx5_fpga_query { + enum mlx5_fpga_image admin_image; + enum mlx5_fpga_image oper_image; + enum mlx5_fpga_status status; +}; + +enum mlx5_fpga_qpc_field_select { + MLX5_FPGA_QPC_STATE = BIT(0), +}; + +struct mlx5_fpga_qp_counters { + u64 rx_ack_packets; + u64 rx_send_packets; + u64 tx_ack_packets; + u64 tx_send_packets; + u64 rx_total_drop; +}; + +int mlx5_fpga_caps(struct mlx5_core_dev *dev); +int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query); +int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op); +int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, + void *buf, bool write); +int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size); + +int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc, + u32 *fpga_qpn); +int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, + enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc); +int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc); +int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn, + bool clear, struct mlx5_fpga_qp_counters *data); +int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn); + +#endif /* __MLX5_FPGA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c new file mode 100644 index 000000000000..c4392f741c5f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -0,0 +1,1042 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <net/addrconf.h> +#include <linux/etherdevice.h> +#include <linux/mlx5/vport.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" +#include "fpga/conn.h" + +#define MLX5_FPGA_PKEY 0xFFFF +#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0xFFFF is always at index 0 */ +#define MLX5_FPGA_RECV_SIZE 2048 +#define MLX5_FPGA_PORT_NUM 1 +#define MLX5_FPGA_CQ_BUDGET 64 + +static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct device *dma_device; + int err = 0; + + if (unlikely(!buf->sg[0].data)) + goto out; + + dma_device = &conn->fdev->mdev->pdev->dev; + buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data, + buf->sg[0].size, buf->dma_dir); + err = dma_mapping_error(dma_device, buf->sg[0].dma_addr); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, "DMA error on sg 0: %d\n", err); + err = -ENOMEM; + goto out; + } + + if (!buf->sg[1].data) + goto out; + + buf->sg[1].dma_addr = dma_map_single(dma_device, buf->sg[1].data, + buf->sg[1].size, buf->dma_dir); + err = dma_mapping_error(dma_device, buf->sg[1].dma_addr); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, "DMA error on sg 1: %d\n", err); + dma_unmap_single(dma_device, buf->sg[0].dma_addr, + buf->sg[0].size, buf->dma_dir); + err = -ENOMEM; + } + +out: + return err; +} + +static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct device *dma_device; + + dma_device = &conn->fdev->mdev->pdev->dev; + if (buf->sg[1].data) + dma_unmap_single(dma_device, buf->sg[1].dma_addr, + buf->sg[1].size, buf->dma_dir); + + if (likely(buf->sg[0].data)) + dma_unmap_single(dma_device, buf->sg[0].dma_addr, + buf->sg[0].size, buf->dma_dir); +} + +static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_wqe_data_seg *data; + unsigned int ix; + int err = 0; + + err = mlx5_fpga_conn_map_buf(conn, buf); + if (unlikely(err)) + goto out; + + if (unlikely(conn->qp.rq.pc - conn->qp.rq.cc >= conn->qp.rq.size)) { + mlx5_fpga_conn_unmap_buf(conn, buf); + return -EBUSY; + } + + ix = conn->qp.rq.pc & (conn->qp.rq.size - 1); + data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix); + data->byte_count = cpu_to_be32(buf->sg[0].size); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key); + data->addr = cpu_to_be64(buf->sg[0].dma_addr); + + conn->qp.rq.pc++; + conn->qp.rq.bufs[ix] = buf; + + /* Make sure that descriptors are written before doorbell record. */ + dma_wmb(); + *conn->qp.wq.rq.db = cpu_to_be32(conn->qp.rq.pc & 0xffff); +out: + return err; +} + +static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe) +{ + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + *conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc); + /* Make sure that doorbell record is visible before ringing */ + wmb(); + mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL); +} + +static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_data_seg *data; + unsigned int ix, sgi; + int size = 1; + + ix = conn->qp.sq.pc & (conn->qp.sq.size - 1); + + ctrl = mlx5_wq_cyc_get_wqe(&conn->qp.wq.sq, ix); + data = (void *)(ctrl + 1); + + for (sgi = 0; sgi < ARRAY_SIZE(buf->sg); sgi++) { + if (!buf->sg[sgi].data) + break; + data->byte_count = cpu_to_be32(buf->sg[sgi].size); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key); + data->addr = cpu_to_be64(buf->sg[sgi].dma_addr); + data++; + size++; + } + + ctrl->imm = 0; + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) | + MLX5_OPCODE_SEND); + ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.mqp.qpn << 8)); + + conn->qp.sq.pc++; + conn->qp.sq.bufs[ix] = buf; + mlx5_fpga_conn_notify_hw(conn, ctrl); +} + +int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + unsigned long flags; + int err; + + if (!conn->qp.active) + return -ENOTCONN; + + err = mlx5_fpga_conn_map_buf(conn, buf); + if (err) + return err; + + spin_lock_irqsave(&conn->qp.sq.lock, flags); + + if (conn->qp.sq.pc - conn->qp.sq.cc >= conn->qp.sq.size) { + list_add_tail(&buf->list, &conn->qp.sq.backlog); + goto out_unlock; + } + + mlx5_fpga_conn_post_send(conn, buf); + +out_unlock: + spin_unlock_irqrestore(&conn->qp.sq.lock, flags); + return err; +} + +static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_dma_buf *buf; + int err; + + buf = kzalloc(sizeof(*buf) + MLX5_FPGA_RECV_SIZE, 0); + if (!buf) + return -ENOMEM; + + buf->sg[0].data = (void *)(buf + 1); + buf->sg[0].size = MLX5_FPGA_RECV_SIZE; + buf->dma_dir = DMA_FROM_DEVICE; + + err = mlx5_fpga_conn_post_recv(conn, buf); + if (err) + kfree(buf); + + return err; +} + +static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_core_mkey *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +static void mlx5_fpga_conn_rq_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe, u8 status) +{ + struct mlx5_fpga_dma_buf *buf; + int ix, err; + + ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.rq.size - 1); + buf = conn->qp.rq.bufs[ix]; + conn->qp.rq.bufs[ix] = NULL; + if (!status) + buf->sg[0].size = be32_to_cpu(cqe->byte_cnt); + conn->qp.rq.cc++; + + if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR))) + mlx5_fpga_warn(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + else + mlx5_fpga_dbg(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + + mlx5_fpga_conn_unmap_buf(conn, buf); + + if (unlikely(status || !conn->qp.active)) { + conn->qp.active = false; + kfree(buf); + return; + } + + mlx5_fpga_dbg(conn->fdev, "Message with %u bytes received successfully\n", + buf->sg[0].size); + conn->recv_cb(conn->cb_arg, buf); + + buf->sg[0].size = MLX5_FPGA_RECV_SIZE; + err = mlx5_fpga_conn_post_recv(conn, buf); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, + "Failed to re-post recv buf: %d\n", err); + kfree(buf); + } +} + +static void mlx5_fpga_conn_sq_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe, u8 status) +{ + struct mlx5_fpga_dma_buf *buf, *nextbuf; + unsigned long flags; + int ix; + + spin_lock_irqsave(&conn->qp.sq.lock, flags); + + ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.sq.size - 1); + buf = conn->qp.sq.bufs[ix]; + conn->qp.sq.bufs[ix] = NULL; + conn->qp.sq.cc++; + + /* Handle backlog still under the spinlock to ensure message post order */ + if (unlikely(!list_empty(&conn->qp.sq.backlog))) { + if (likely(conn->qp.active)) { + nextbuf = list_first_entry(&conn->qp.sq.backlog, + struct mlx5_fpga_dma_buf, list); + list_del(&nextbuf->list); + mlx5_fpga_conn_post_send(conn, nextbuf); + } + } + + spin_unlock_irqrestore(&conn->qp.sq.lock, flags); + + if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR))) + mlx5_fpga_warn(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + else + mlx5_fpga_dbg(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + + mlx5_fpga_conn_unmap_buf(conn, buf); + + if (likely(buf->complete)) + buf->complete(conn, conn->fdev, buf, status); + + if (unlikely(status)) + conn->qp.active = false; +} + +static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe) +{ + u8 opcode, status = 0; + + opcode = cqe->op_own >> 4; + + switch (opcode) { + case MLX5_CQE_REQ_ERR: + status = ((struct mlx5_err_cqe *)cqe)->syndrome; + /* Fall through */ + case MLX5_CQE_REQ: + mlx5_fpga_conn_sq_cqe(conn, cqe, status); + break; + + case MLX5_CQE_RESP_ERR: + status = ((struct mlx5_err_cqe *)cqe)->syndrome; + /* Fall through */ + case MLX5_CQE_RESP_SEND: + mlx5_fpga_conn_rq_cqe(conn, cqe, status); + break; + default: + mlx5_fpga_warn(conn->fdev, "Unexpected cqe opcode %u\n", + opcode); + } +} + +static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn) +{ + mlx5_cq_arm(&conn->cq.mcq, MLX5_CQ_DB_REQ_NOT, + conn->fdev->conn_res.uar->map, conn->cq.wq.cc); +} + +static void mlx5_fpga_conn_cq_event(struct mlx5_core_cq *mcq, + enum mlx5_event event) +{ + struct mlx5_fpga_conn *conn; + + conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq); + mlx5_fpga_warn(conn->fdev, "CQ event %u on CQ #%u\n", event, mcq->cqn); +} + +static void mlx5_fpga_conn_event(struct mlx5_core_qp *mqp, int event) +{ + struct mlx5_fpga_conn *conn; + + conn = container_of(mqp, struct mlx5_fpga_conn, qp.mqp); + mlx5_fpga_warn(conn->fdev, "QP event %u on QP #%u\n", event, mqp->qpn); +} + +static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn, + unsigned int budget) +{ + struct mlx5_cqe64 *cqe; + + while (budget) { + cqe = mlx5_cqwq_get_cqe(&conn->cq.wq); + if (!cqe) + break; + + budget--; + mlx5_cqwq_pop(&conn->cq.wq); + mlx5_fpga_conn_handle_cqe(conn, cqe); + mlx5_cqwq_update_db_record(&conn->cq.wq); + } + if (!budget) { + tasklet_schedule(&conn->cq.tasklet); + return; + } + + mlx5_fpga_dbg(conn->fdev, "Re-arming CQ with cc# %u\n", conn->cq.wq.cc); + /* ensure cq space is freed before enabling more cqes */ + wmb(); + mlx5_fpga_conn_arm_cq(conn); +} + +static void mlx5_fpga_conn_cq_tasklet(unsigned long data) +{ + struct mlx5_fpga_conn *conn = (void *)data; + + if (unlikely(!conn->qp.active)) + return; + mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); +} + +static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq) +{ + struct mlx5_fpga_conn *conn; + + conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq); + if (unlikely(!conn->qp.active)) + return; + mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); +} + +static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0}; + struct mlx5_wq_param wqp; + struct mlx5_cqe64 *cqe; + int inlen, err, eqn; + unsigned int irqn; + void *cqc, *in; + __be64 *pas; + u32 i; + + cq_size = roundup_pow_of_two(cq_size); + MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size)); + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &conn->cq.wq, + &conn->cq.wq_ctrl); + if (err) + return err; + + for (i = 0; i < mlx5_cqwq_get_size(&conn->cq.wq); i++) { + cqe = mlx5_cqwq_get_wqe(&conn->cq.wq, i); + cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; + } + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * conn->cq.wq_ctrl.frag_buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_cqwq; + } + + err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); + if (err) + goto err_cqwq; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size)); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index); + MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.frag_buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma); + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.frag_buf, pas); + + err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen); + kvfree(in); + + if (err) + goto err_cqwq; + + conn->cq.mcq.cqe_sz = 64; + conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; + conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; + *conn->cq.mcq.set_ci_db = 0; + *conn->cq.mcq.arm_db = 0; + conn->cq.mcq.vector = 0; + conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; + conn->cq.mcq.event = mlx5_fpga_conn_cq_event; + conn->cq.mcq.irqn = irqn; + conn->cq.mcq.uar = fdev->conn_res.uar; + tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet, + (unsigned long)conn); + + mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn); + + goto out; + +err_cqwq: + mlx5_cqwq_destroy(&conn->cq.wq_ctrl); +out: + return err; +} + +static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn) +{ + tasklet_disable(&conn->cq.tasklet); + tasklet_kill(&conn->cq.tasklet); + mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq); + mlx5_cqwq_destroy(&conn->cq.wq_ctrl); +} + +static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + struct mlx5_wq_param wqp; + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + return mlx5_wq_qp_create(mdev, &wqp, qpc, &conn->qp.wq, + &conn->qp.wq_ctrl); +} + +static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn, + unsigned int tx_size, unsigned int rx_size) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {0}; + void *in = NULL, *qpc; + int err, inlen; + + conn->qp.rq.pc = 0; + conn->qp.rq.cc = 0; + conn->qp.rq.size = roundup_pow_of_two(rx_size); + conn->qp.sq.pc = 0; + conn->qp.sq.cc = 0; + conn->qp.sq.size = roundup_pow_of_two(tx_size); + + MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(conn->qp.rq.size)); + MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(conn->qp.sq.size)); + err = mlx5_fpga_conn_create_wq(conn, temp_qpc); + if (err) + goto out; + + conn->qp.rq.bufs = kvzalloc(sizeof(conn->qp.rq.bufs[0]) * + conn->qp.rq.size, GFP_KERNEL); + if (!conn->qp.rq.bufs) { + err = -ENOMEM; + goto err_wq; + } + + conn->qp.sq.bufs = kvzalloc(sizeof(conn->qp.sq.bufs[0]) * + conn->qp.sq.size, GFP_KERNEL); + if (!conn->qp.sq.bufs) { + err = -ENOMEM; + goto err_rq_bufs; + } + + inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * + conn->qp.wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_sq_bufs; + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, uar_page, fdev->conn_res.uar->index); + MLX5_SET(qpc, qpc, log_page_size, + conn->qp.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, fdev->conn_res.pdn); + MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(conn->qp.rq.size)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size)); + MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); + MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma); + if (MLX5_CAP_GEN(mdev, cqe_version) == 1) + MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); + + mlx5_fill_page_array(&conn->qp.wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas)); + + err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen); + if (err) + goto err_sq_bufs; + + conn->qp.mqp.event = mlx5_fpga_conn_event; + mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn); + + goto out; + +err_sq_bufs: + kvfree(conn->qp.sq.bufs); +err_rq_bufs: + kvfree(conn->qp.rq.bufs); +err_wq: + mlx5_wq_destroy(&conn->qp.wq_ctrl); +out: + kvfree(in); + return err; +} + +static void mlx5_fpga_conn_free_recv_bufs(struct mlx5_fpga_conn *conn) +{ + int ix; + + for (ix = 0; ix < conn->qp.rq.size; ix++) { + if (!conn->qp.rq.bufs[ix]) + continue; + mlx5_fpga_conn_unmap_buf(conn, conn->qp.rq.bufs[ix]); + kfree(conn->qp.rq.bufs[ix]); + conn->qp.rq.bufs[ix] = NULL; + } +} + +static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_dma_buf *buf, *temp; + int ix; + + for (ix = 0; ix < conn->qp.sq.size; ix++) { + buf = conn->qp.sq.bufs[ix]; + if (!buf) + continue; + conn->qp.sq.bufs[ix] = NULL; + mlx5_fpga_conn_unmap_buf(conn, buf); + if (!buf->complete) + continue; + buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR); + } + list_for_each_entry_safe(buf, temp, &conn->qp.sq.backlog, list) { + mlx5_fpga_conn_unmap_buf(conn, buf); + if (!buf->complete) + continue; + buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR); + } +} + +static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn) +{ + mlx5_core_destroy_qp(conn->fdev->mdev, &conn->qp.mqp); + mlx5_fpga_conn_free_recv_bufs(conn); + mlx5_fpga_conn_flush_send_bufs(conn); + kvfree(conn->qp.sq.bufs); + kvfree(conn->qp.rq.bufs); + mlx5_wq_destroy(&conn->qp.wq_ctrl); +} + +static inline int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_core_dev *mdev = conn->fdev->mdev; + + mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.mqp.qpn); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, NULL, + &conn->qp.mqp); +} + +static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc = NULL; + int err; + + mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.mqp.qpn); + + qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL); + if (!qpc) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); + MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM); + MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn); + MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); + MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma); + + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc, + &conn->qp.mqp); + if (err) { + mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err); + goto out; + } + +out: + kfree(qpc); + return err; +} + +static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc = NULL; + int err; + + mlx5_fpga_dbg(conn->fdev, "QP RTR\n"); + + qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL); + if (!qpc) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES); + MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg)); + MLX5_SET(qpc, qpc, remote_qpn, conn->fpga_qpn); + MLX5_SET(qpc, qpc, next_rcv_psn, + MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn)); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); + MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM); + ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), + MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32)); + MLX5_SET(qpc, qpc, primary_address_path.udp_sport, + MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port)); + MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, + conn->qp.sgid_index); + MLX5_SET(qpc, qpc, primary_address_path.hop_limit, 0); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip), + MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip)); + + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc, + &conn->qp.mqp); + if (err) { + mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err); + goto out; + } + +out: + kfree(qpc); + return err; +} + +static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc = NULL; + u32 opt_mask; + int err; + + mlx5_fpga_dbg(conn->fdev, "QP RTS\n"); + + qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL); + if (!qpc) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpc, qpc, log_ack_req_freq, 8); + MLX5_SET(qpc, qpc, min_rnr_nak, 0x12); + MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x12); /* ~1.07s */ + MLX5_SET(qpc, qpc, next_send_psn, + MLX5_GET(fpga_qpc, conn->fpga_qpc, next_rcv_psn)); + MLX5_SET(qpc, qpc, retry_count, 7); + MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */ + + opt_mask = MLX5_QP_OPTPAR_RNR_TIMEOUT; + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, opt_mask, qpc, + &conn->qp.mqp); + if (err) { + mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err); + goto out; + } + +out: + kfree(qpc); + return err; +} + +static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + int err; + + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_ACTIVE); + err = mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn, + MLX5_FPGA_QPC_STATE, &conn->fpga_qpc); + if (err) { + mlx5_fpga_err(fdev, "Failed to activate FPGA RC QP: %d\n", err); + goto out; + } + + err = mlx5_fpga_conn_reset_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state to reset\n"); + goto err_fpga_qp; + } + + err = mlx5_fpga_conn_init_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to modify QP from RESET to INIT\n"); + goto err_fpga_qp; + } + conn->qp.active = true; + + while (!mlx5_fpga_conn_post_recv_buf(conn)) + ; + + err = mlx5_fpga_conn_rtr_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state from INIT to RTR\n"); + goto err_recv_bufs; + } + + err = mlx5_fpga_conn_rts_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state from RTR to RTS\n"); + goto err_recv_bufs; + } + goto out; + +err_recv_bufs: + mlx5_fpga_conn_free_recv_bufs(conn); +err_fpga_qp: + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT); + if (mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn, + MLX5_FPGA_QPC_STATE, &conn->fpga_qpc)) + mlx5_fpga_err(fdev, "Failed to revert FPGA QP to INIT\n"); +out: + return err; +} + +struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr, + enum mlx5_ifc_fpga_qp_type qp_type) +{ + struct mlx5_fpga_conn *ret, *conn; + u8 *remote_mac, *remote_ip; + int err; + + if (!attr->recv_cb) + return ERR_PTR(-EINVAL); + + conn = kzalloc(sizeof(*conn), GFP_KERNEL); + if (!conn) + return ERR_PTR(-ENOMEM); + + conn->fdev = fdev; + INIT_LIST_HEAD(&conn->qp.sq.backlog); + + spin_lock_init(&conn->qp.sq.lock); + + conn->recv_cb = attr->recv_cb; + conn->cb_arg = attr->cb_arg; + + remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32); + err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac); + if (err) { + mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err); + ret = ERR_PTR(err); + goto err; + } + + /* Build Modified EUI-64 IPv6 address from the MAC address */ + remote_ip = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_ip); + remote_ip[0] = 0xfe; + remote_ip[1] = 0x80; + addrconf_addr_eui48(&remote_ip[8], remote_mac); + + err = mlx5_core_reserved_gid_alloc(fdev->mdev, &conn->qp.sgid_index); + if (err) { + mlx5_fpga_err(fdev, "Failed to allocate SGID: %d\n", err); + ret = ERR_PTR(err); + goto err; + } + + err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, + MLX5_ROCE_VERSION_2, + MLX5_ROCE_L3_TYPE_IPV6, + remote_ip, remote_mac, true, 0); + if (err) { + mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err); + ret = ERR_PTR(err); + goto err_rsvd_gid; + } + mlx5_fpga_dbg(fdev, "Reserved SGID index %u\n", conn->qp.sgid_index); + + /* Allow for one cqe per rx/tx wqe, plus one cqe for the next wqe, + * created during processing of the cqe + */ + err = mlx5_fpga_conn_create_cq(conn, + (attr->tx_size + attr->rx_size) * 2); + if (err) { + mlx5_fpga_err(fdev, "Failed to create CQ: %d\n", err); + ret = ERR_PTR(err); + goto err_gid; + } + + mlx5_fpga_conn_arm_cq(conn); + + err = mlx5_fpga_conn_create_qp(conn, attr->tx_size, attr->rx_size); + if (err) { + mlx5_fpga_err(fdev, "Failed to create QP: %d\n", err); + ret = ERR_PTR(err); + goto err_cq; + } + + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT); + MLX5_SET(fpga_qpc, conn->fpga_qpc, qp_type, qp_type); + MLX5_SET(fpga_qpc, conn->fpga_qpc, st, MLX5_FPGA_QPC_ST_RC); + MLX5_SET(fpga_qpc, conn->fpga_qpc, ether_type, ETH_P_8021Q); + MLX5_SET(fpga_qpc, conn->fpga_qpc, vid, 0); + MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1); + MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0); + MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY); + MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.mqp.qpn); + MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7); + MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7); + + err = mlx5_fpga_create_qp(fdev->mdev, &conn->fpga_qpc, + &conn->fpga_qpn); + if (err) { + mlx5_fpga_err(fdev, "Failed to create FPGA RC QP: %d\n", err); + ret = ERR_PTR(err); + goto err_qp; + } + + err = mlx5_fpga_conn_connect(conn); + if (err) { + ret = ERR_PTR(err); + goto err_conn; + } + + mlx5_fpga_dbg(fdev, "FPGA QPN is %u\n", conn->fpga_qpn); + ret = conn; + goto out; + +err_conn: + mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn); +err_qp: + mlx5_fpga_conn_destroy_qp(conn); +err_cq: + mlx5_fpga_conn_destroy_cq(conn); +err_gid: + mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL, + NULL, false, 0); +err_rsvd_gid: + mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index); +err: + kfree(conn); +out: + return ret; +} + +void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + int err = 0; + + conn->qp.active = false; + tasklet_disable(&conn->cq.tasklet); + synchronize_irq(conn->cq.mcq.irqn); + + mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn); + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, NULL, + &conn->qp.mqp); + if (err) + mlx5_fpga_warn(fdev, "qp_modify 2ERR failed: %d\n", err); + mlx5_fpga_conn_destroy_qp(conn); + mlx5_fpga_conn_destroy_cq(conn); + + mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0, + NULL, NULL, false, 0); + mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index); + kfree(conn); +} + +int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev) +{ + int err; + + err = mlx5_nic_vport_enable_roce(fdev->mdev); + if (err) { + mlx5_fpga_err(fdev, "Failed to enable RoCE: %d\n", err); + goto out; + } + + fdev->conn_res.uar = mlx5_get_uars_page(fdev->mdev); + if (IS_ERR(fdev->conn_res.uar)) { + err = PTR_ERR(fdev->conn_res.uar); + mlx5_fpga_err(fdev, "get_uars_page failed, %d\n", err); + goto err_roce; + } + mlx5_fpga_dbg(fdev, "Allocated UAR index %u\n", + fdev->conn_res.uar->index); + + err = mlx5_core_alloc_pd(fdev->mdev, &fdev->conn_res.pdn); + if (err) { + mlx5_fpga_err(fdev, "alloc pd failed, %d\n", err); + goto err_uar; + } + mlx5_fpga_dbg(fdev, "Allocated PD %u\n", fdev->conn_res.pdn); + + err = mlx5_fpga_conn_create_mkey(fdev->mdev, fdev->conn_res.pdn, + &fdev->conn_res.mkey); + if (err) { + mlx5_fpga_err(fdev, "create mkey failed, %d\n", err); + goto err_dealloc_pd; + } + mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key); + + return 0; + +err_dealloc_pd: + mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn); +err_uar: + mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar); +err_roce: + mlx5_nic_vport_disable_roce(fdev->mdev); +out: + return err; +} + +void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev) +{ + mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey); + mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn); + mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar); + mlx5_nic_vport_disable_roce(fdev->mdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h new file mode 100644 index 000000000000..44bd9eccc711 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_FPGA_CONN_H__ +#define __MLX5_FPGA_CONN_H__ + +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> + +#include "fpga/core.h" +#include "fpga/sdk.h" +#include "wq.h" + +struct mlx5_fpga_conn { + struct mlx5_fpga_device *fdev; + + void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + void *cb_arg; + + /* FPGA QP */ + u32 fpga_qpc[MLX5_ST_SZ_DW(fpga_qpc)]; + u32 fpga_qpn; + + /* CQ */ + struct { + struct mlx5_cqwq wq; + struct mlx5_frag_wq_ctrl wq_ctrl; + struct mlx5_core_cq mcq; + struct tasklet_struct tasklet; + } cq; + + /* QP */ + struct { + bool active; + int sgid_index; + struct mlx5_wq_qp wq; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_qp mqp; + struct { + spinlock_t lock; /* Protects all SQ state */ + unsigned int pc; + unsigned int cc; + unsigned int size; + struct mlx5_fpga_dma_buf **bufs; + struct list_head backlog; + } sq; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + struct mlx5_fpga_dma_buf **bufs; + } rq; + } qp; +}; + +int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev); +void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev); +struct mlx5_fpga_conn * +mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr, + enum mlx5_ifc_fpga_qp_type qp_type); +void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn); +int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf); + +#endif /* __MLX5_FPGA_CONN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c new file mode 100644 index 000000000000..dc8970346521 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" +#include "fpga/core.h" +#include "fpga/conn.h" + +static const char *const mlx5_fpga_error_strings[] = { + "Null Syndrome", + "Corrupted DDR", + "Flash Timeout", + "Internal Link Error", + "Watchdog HW Failure", + "I2C Failure", + "Image Changed", + "Temperature Critical", +}; + +static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void) +{ + struct mlx5_fpga_device *fdev = NULL; + + fdev = kzalloc(sizeof(*fdev), GFP_KERNEL); + if (!fdev) + return NULL; + + spin_lock_init(&fdev->state_lock); + fdev->state = MLX5_FPGA_STATUS_NONE; + return fdev; +} + +static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image) +{ + switch (image) { + case MLX5_FPGA_IMAGE_USER: + return "user"; + case MLX5_FPGA_IMAGE_FACTORY: + return "factory"; + default: + return "unknown"; + } +} + +static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev) +{ + struct mlx5_fpga_query query; + int err; + + err = mlx5_fpga_query(fdev->mdev, &query); + if (err) { + mlx5_fpga_err(fdev, "Failed to query status: %d\n", err); + return err; + } + + fdev->last_admin_image = query.admin_image; + fdev->last_oper_image = query.oper_image; + + mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n", + query.status, query.admin_image, query.oper_image); + + if (query.status != MLX5_FPGA_STATUS_SUCCESS) { + mlx5_fpga_err(fdev, "%s image failed to load; status %u\n", + mlx5_fpga_image_name(fdev->last_oper_image), + query.status); + return -EIO; + } + + return 0; +} + +static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev) +{ + int err; + struct mlx5_core_dev *mdev = fdev->mdev; + + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); + if (err) { + mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err); + return err; + } + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX); + if (err) { + mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err); + return err; + } + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF); + if (err) { + mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err); + return err; + } + return 0; +} + +int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned long flags; + unsigned int max_num_qps; + int err; + + if (!fdev) + return 0; + + err = mlx5_fpga_device_load_check(fdev); + if (err) + goto out; + + err = mlx5_fpga_caps(fdev->mdev); + if (err) + goto out; + + mlx5_fpga_info(fdev, "device %u; %s image, version %u\n", + MLX5_CAP_FPGA(fdev->mdev, fpga_device), + mlx5_fpga_image_name(fdev->last_oper_image), + MLX5_CAP_FPGA(fdev->mdev, image_version)); + + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); + err = mlx5_core_reserve_gids(mdev, max_num_qps); + if (err) + goto out; + + err = mlx5_fpga_conn_device_init(fdev); + if (err) + goto err_rsvd_gid; + + if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { + err = mlx5_fpga_device_brb(fdev); + if (err) + goto err_conn_init; + } + + goto out; + +err_conn_init: + mlx5_fpga_conn_device_cleanup(fdev); + +err_rsvd_gid: + mlx5_core_unreserve_gids(mdev, max_num_qps); +out: + spin_lock_irqsave(&fdev->state_lock, flags); + fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS; + spin_unlock_irqrestore(&fdev->state_lock, flags); + return err; +} + +int mlx5_fpga_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = NULL; + + if (!MLX5_CAP_GEN(mdev, fpga)) { + mlx5_core_dbg(mdev, "FPGA capability not present\n"); + return 0; + } + + mlx5_core_dbg(mdev, "Initializing FPGA\n"); + + fdev = mlx5_fpga_device_alloc(); + if (!fdev) + return -ENOMEM; + + fdev->mdev = mdev; + mdev->fpga = fdev; + + return 0; +} + +void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned int max_num_qps; + unsigned long flags; + int err; + + if (!fdev) + return; + + spin_lock_irqsave(&fdev->state_lock, flags); + if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) { + spin_unlock_irqrestore(&fdev->state_lock, flags); + return; + } + fdev->state = MLX5_FPGA_STATUS_NONE; + spin_unlock_irqrestore(&fdev->state_lock, flags); + + if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); + if (err) + mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n", + err); + } + + mlx5_fpga_conn_device_cleanup(fdev); + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); + mlx5_core_unreserve_gids(mdev, max_num_qps); +} + +void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + mlx5_fpga_device_stop(mdev); + kfree(fdev); + mdev->fpga = NULL; +} + +static const char *mlx5_fpga_syndrome_to_string(u8 syndrome) +{ + if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings)) + return mlx5_fpga_error_strings[syndrome]; + return "Unknown"; +} + +void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + const char *event_name; + bool teardown = false; + unsigned long flags; + u8 syndrome; + + if (event != MLX5_EVENT_TYPE_FPGA_ERROR) { + mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n", + event); + return; + } + + syndrome = MLX5_GET(fpga_error_event, data, syndrome); + event_name = mlx5_fpga_syndrome_to_string(syndrome); + + spin_lock_irqsave(&fdev->state_lock, flags); + switch (fdev->state) { + case MLX5_FPGA_STATUS_SUCCESS: + mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name); + teardown = true; + break; + default: + mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n", + syndrome, event_name); + } + spin_unlock_irqrestore(&fdev->state_lock, flags); + /* We tear-down the card's interfaces and functionality because + * the FPGA bump-on-the-wire is misbehaving and we lose ability + * to communicate with the network. User may still be able to + * recover by re-programming or debugging the FPGA + */ + if (teardown) + mlx5_trigger_health_work(fdev->mdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h new file mode 100644 index 000000000000..82405ed84725 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_FPGA_CORE_H__ +#define __MLX5_FPGA_CORE_H__ + +#ifdef CONFIG_MLX5_FPGA + +#include "fpga/cmd.h" + +/* Represents an Innova device */ +struct mlx5_fpga_device { + struct mlx5_core_dev *mdev; + spinlock_t state_lock; /* Protects state transitions */ + enum mlx5_fpga_status state; + enum mlx5_fpga_image last_admin_image; + enum mlx5_fpga_image last_oper_image; + + /* QP Connection resources */ + struct { + u32 pdn; + struct mlx5_core_mkey mkey; + struct mlx5_uars_page *uar; + } conn_res; + + struct mlx5_fpga_ipsec *ipsec; +}; + +#define mlx5_fpga_dbg(__adev, format, ...) \ + dev_dbg(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_err(__adev, format, ...) \ + dev_err(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_warn(__adev, format, ...) \ + dev_warn(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_warn_ratelimited(__adev, format, ...) \ + dev_warn_ratelimited(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d: " \ + format, __func__, __LINE__, ##__VA_ARGS__) + +#define mlx5_fpga_notice(__adev, format, ...) \ + dev_notice(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__) + +#define mlx5_fpga_info(__adev, format, ...) \ + dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__) + +int mlx5_fpga_init(struct mlx5_core_dev *mdev); +void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev); +int mlx5_fpga_device_start(struct mlx5_core_dev *mdev); +void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev); +void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data); + +#else + +static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) +{ +} + +static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) +{ +} + +static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, + void *data) +{ +} + +#endif + +#endif /* __MLX5_FPGA_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c new file mode 100644 index 000000000000..35d0e33381ca --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" +#include "fpga/ipsec.h" +#include "fpga/sdk.h" +#include "fpga/core.h" + +#define SBU_QP_QUEUE_SIZE 8 + +enum mlx5_ipsec_response_syndrome { + MLX5_IPSEC_RESPONSE_SUCCESS = 0, + MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1, + MLX5_IPSEC_RESPONSE_SADB_ISSUE = 2, + MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3, +}; + +enum mlx5_fpga_ipsec_sacmd_status { + MLX5_FPGA_IPSEC_SACMD_PENDING, + MLX5_FPGA_IPSEC_SACMD_SEND_FAIL, + MLX5_FPGA_IPSEC_SACMD_COMPLETE, +}; + +struct mlx5_ipsec_command_context { + struct mlx5_fpga_dma_buf buf; + struct mlx5_accel_ipsec_sa sa; + enum mlx5_fpga_ipsec_sacmd_status status; + int status_code; + struct completion complete; + struct mlx5_fpga_device *dev; + struct list_head list; /* Item in pending_cmds */ +}; + +struct mlx5_ipsec_sadb_resp { + __be32 syndrome; + __be32 sw_sa_handle; + u8 reserved[24]; +} __packed; + +struct mlx5_fpga_ipsec { + struct list_head pending_cmds; + spinlock_t pending_cmds_lock; /* Protects pending_cmds */ + u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)]; + struct mlx5_fpga_conn *conn; +}; + +static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) +{ + if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) + return false; + + if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) != + MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX) + return false; + + if (MLX5_CAP_FPGA(mdev, sandbox_product_id) != + MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC) + return false; + + return true; +} + +static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *buf, + u8 status) +{ + struct mlx5_ipsec_command_context *context; + + if (status) { + context = container_of(buf, struct mlx5_ipsec_command_context, + buf); + mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n", + status); + context->status = MLX5_FPGA_IPSEC_SACMD_SEND_FAIL; + complete(&context->complete); + } +} + +static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome) +{ + switch (syndrome) { + case MLX5_IPSEC_RESPONSE_SUCCESS: + return 0; + case MLX5_IPSEC_RESPONSE_SADB_ISSUE: + return -EEXIST; + case MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST: + return -EINVAL; + case MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE: + return -EIO; + } + return -EIO; +} + +static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_ipsec_sadb_resp *resp = buf->sg[0].data; + struct mlx5_ipsec_command_context *context; + enum mlx5_ipsec_response_syndrome syndrome; + struct mlx5_fpga_device *fdev = cb_arg; + unsigned long flags; + + if (buf->sg[0].size < sizeof(*resp)) { + mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n", + buf->sg[0].size, sizeof(*resp)); + return; + } + + mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x sa_id %x\n", + ntohl(resp->syndrome), ntohl(resp->sw_sa_handle)); + + spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); + context = list_first_entry_or_null(&fdev->ipsec->pending_cmds, + struct mlx5_ipsec_command_context, + list); + if (context) + list_del(&context->list); + spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); + + if (!context) { + mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n"); + return; + } + mlx5_fpga_dbg(fdev, "Handling response for %p\n", context); + + if (context->sa.sw_sa_handle != resp->sw_sa_handle) { + mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n", + ntohl(context->sa.sw_sa_handle), + ntohl(resp->sw_sa_handle)); + return; + } + + syndrome = ntohl(resp->syndrome); + context->status_code = syndrome_to_errno(syndrome); + context->status = MLX5_FPGA_IPSEC_SACMD_COMPLETE; + + if (context->status_code) + mlx5_fpga_warn(fdev, "IPSec SADB command failed with syndrome %08x\n", + syndrome); + complete(&context->complete); +} + +void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, + struct mlx5_accel_ipsec_sa *cmd) +{ + struct mlx5_ipsec_command_context *context; + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned long flags; + int res = 0; + + BUILD_BUG_ON((sizeof(struct mlx5_accel_ipsec_sa) & 3) != 0); + if (!fdev || !fdev->ipsec) + return ERR_PTR(-EOPNOTSUPP); + + context = kzalloc(sizeof(*context), GFP_ATOMIC); + if (!context) + return ERR_PTR(-ENOMEM); + + memcpy(&context->sa, cmd, sizeof(*cmd)); + context->buf.complete = mlx5_fpga_ipsec_send_complete; + context->buf.sg[0].size = sizeof(context->sa); + context->buf.sg[0].data = &context->sa; + init_completion(&context->complete); + context->dev = fdev; + spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); + list_add_tail(&context->list, &fdev->ipsec->pending_cmds); + spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); + + context->status = MLX5_FPGA_IPSEC_SACMD_PENDING; + + res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf); + if (res) { + mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n", + res); + spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); + list_del(&context->list); + spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); + kfree(context); + return ERR_PTR(res); + } + /* Context will be freed by wait func after completion */ + return context; +} + +int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx) +{ + struct mlx5_ipsec_command_context *context = ctx; + int res; + + res = wait_for_completion_killable(&context->complete); + if (res) { + mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n"); + return -EINTR; + } + + if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE) + res = context->status_code; + else + res = -EIO; + + kfree(context); + return res; +} + +u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + u32 ret = 0; + + if (mlx5_fpga_is_ipsec_device(mdev)) + ret |= MLX5_ACCEL_IPSEC_DEVICE; + else + return ret; + + if (!fdev->ipsec) + return ret; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp)) + ret |= MLX5_ACCEL_IPSEC_ESP; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6)) + ret |= MLX5_ACCEL_IPSEC_IPV6; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso)) + ret |= MLX5_ACCEL_IPSEC_LSO; + + return ret; +} + +unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + if (!fdev || !fdev->ipsec) + return 0; + + return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, + number_of_ipsec_counters); +} + +int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int counters_count) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned int i; + __be32 *data; + u32 count; + u64 addr; + int ret; + + if (!fdev || !fdev->ipsec) + return 0; + + addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, + ipsec_counters_addr_low) + + ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, + ipsec_counters_addr_high) << 32); + + count = mlx5_fpga_ipsec_counters_count(mdev); + + data = kzalloc(sizeof(*data) * count * 2, GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto out; + } + + ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data, + MLX5_FPGA_ACCESS_TYPE_DONTCARE); + if (ret < 0) { + mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n", + ret); + goto out; + } + ret = 0; + + if (count > counters_count) + count = counters_count; + + /* Each counter is low word, then high. But each word is big-endian */ + for (i = 0; i < count; i++) + counters[i] = (u64)ntohl(data[i * 2]) | + ((u64)ntohl(data[i * 2 + 1]) << 32); + +out: + kfree(data); + return ret; +} + +int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_conn_attr init_attr = {0}; + struct mlx5_fpga_device *fdev = mdev->fpga; + struct mlx5_fpga_conn *conn; + int err; + + if (!mlx5_fpga_is_ipsec_device(mdev)) + return 0; + + fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL); + if (!fdev->ipsec) + return -ENOMEM; + + err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps), + fdev->ipsec->caps); + if (err) { + mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n", + err); + goto error; + } + + INIT_LIST_HEAD(&fdev->ipsec->pending_cmds); + spin_lock_init(&fdev->ipsec->pending_cmds_lock); + + init_attr.rx_size = SBU_QP_QUEUE_SIZE; + init_attr.tx_size = SBU_QP_QUEUE_SIZE; + init_attr.recv_cb = mlx5_fpga_ipsec_recv; + init_attr.cb_arg = fdev; + conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr); + if (IS_ERR(conn)) { + err = PTR_ERR(conn); + mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n", + err); + goto error; + } + fdev->ipsec->conn = conn; + return 0; + +error: + kfree(fdev->ipsec); + fdev->ipsec = NULL; + return err; +} + +void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + if (!mlx5_fpga_is_ipsec_device(mdev)) + return; + + mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn); + kfree(fdev->ipsec); + fdev->ipsec = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h new file mode 100644 index 000000000000..26a3e4b56972 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_FPGA_IPSEC_H__ +#define __MLX5_FPGA_IPSEC_H__ + +#include "accel/ipsec.h" + +#ifdef CONFIG_MLX5_FPGA + +void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, + struct mlx5_accel_ipsec_sa *cmd); +int mlx5_fpga_ipsec_sa_cmd_wait(void *context); + +u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); +unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev); +int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int counters_count); + +int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev); +void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev); + +#else + +static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, + struct mlx5_accel_ipsec_sa *cmd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline int mlx5_fpga_ipsec_sa_cmd_wait(void *context) +{ + return -EOPNOTSUPP; +} + +static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline unsigned int +mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, + u64 *counters) +{ + return 0; +} + +static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ +} + +#endif /* CONFIG_MLX5_FPGA */ + +#endif /* __MLX5_FPGA_SADB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c new file mode 100644 index 000000000000..3c11d6e2160a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/device.h> + +#include "fpga/core.h" +#include "fpga/conn.h" +#include "fpga/sdk.h" + +struct mlx5_fpga_conn * +mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr) +{ + return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create); + +void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn) +{ + mlx5_fpga_conn_destroy(conn); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy); + +int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + return mlx5_fpga_conn_send(conn, buf); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg); + +static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size, + u64 addr, u8 *buf) +{ + size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX; + size_t bytes_done = 0; + u8 actual_size; + int err; + + if (!fdev->mdev) + return -ENOTCONN; + + while (bytes_done < size) { + actual_size = min(max_size, (size - bytes_done)); + + err = mlx5_fpga_access_reg(fdev->mdev, actual_size, + addr + bytes_done, + buf + bytes_done, false); + if (err) { + mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n", + err); + break; + } + + bytes_done += actual_size; + } + + return err; +} + +static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size, + u64 addr, u8 *buf) +{ + size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX; + size_t bytes_done = 0; + u8 actual_size; + int err; + + if (!fdev->mdev) + return -ENOTCONN; + + while (bytes_done < size) { + actual_size = min(max_size, (size - bytes_done)); + + err = mlx5_fpga_access_reg(fdev->mdev, actual_size, + addr + bytes_done, + buf + bytes_done, true); + if (err) { + mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n"); + break; + } + + bytes_done += actual_size; + } + + return err; +} + +int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type) +{ + int ret; + + switch (access_type) { + case MLX5_FPGA_ACCESS_TYPE_I2C: + ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf); + if (ret) + return ret; + break; + default: + mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n", + access_type); + return -EACCES; + } + + return size; +} +EXPORT_SYMBOL(mlx5_fpga_mem_read); + +int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type) +{ + int ret; + + switch (access_type) { + case MLX5_FPGA_ACCESS_TYPE_I2C: + ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf); + if (ret) + return ret; + break; + default: + mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n", + access_type); + return -EACCES; + } + + return size; +} +EXPORT_SYMBOL(mlx5_fpga_mem_write); + +int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf) +{ + return mlx5_fpga_sbu_caps(fdev->mdev, buf, size); +} +EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h new file mode 100644 index 000000000000..baa537e54a49 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef MLX5_FPGA_SDK_H +#define MLX5_FPGA_SDK_H + +#include <linux/types.h> +#include <linux/dma-direction.h> + +/** + * DOC: Innova SDK + * This header defines the in-kernel API for Innova FPGA client drivers. + */ + +enum mlx5_fpga_access_type { + MLX5_FPGA_ACCESS_TYPE_I2C = 0x0, + MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0, +}; + +struct mlx5_fpga_conn; +struct mlx5_fpga_device; + +/** + * struct mlx5_fpga_dma_entry - A scatter-gather DMA entry + */ +struct mlx5_fpga_dma_entry { + /** @data: Virtual address pointer to the data */ + void *data; + /** @size: Size in bytes of the data */ + unsigned int size; + /** @dma_addr: Private member. Physical DMA-mapped address of the data */ + dma_addr_t dma_addr; +}; + +/** + * struct mlx5_fpga_dma_buf - A packet buffer + * May contain up to 2 scatter-gather data entries + */ +struct mlx5_fpga_dma_buf { + /** @dma_dir: DMA direction */ + enum dma_data_direction dma_dir; + /** @sg: Scatter-gather entries pointing to the data in memory */ + struct mlx5_fpga_dma_entry sg[2]; + /** @list: Item in SQ backlog, for TX packets */ + struct list_head list; + /** + * @complete: Completion routine, for TX packets + * @conn: FPGA Connection this packet was sent to + * @fdev: FPGA device this packet was sent to + * @buf: The packet buffer + * @status: 0 if successful, or an error code otherwise + */ + void (*complete)(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *buf, u8 status); +}; + +/** + * struct mlx5_fpga_conn_attr - FPGA connection attributes + * Describes the attributes of a connection + */ +struct mlx5_fpga_conn_attr { + /** @tx_size: Size of connection TX queue, in packets */ + unsigned int tx_size; + /** @rx_size: Size of connection RX queue, in packets */ + unsigned int rx_size; + /** + * @recv_cb: Callback function which is called for received packets + * @cb_arg: The value provided in mlx5_fpga_conn_attr.cb_arg + * @buf: A buffer containing a received packet + * + * buf is guaranteed to only contain a single scatter-gather entry. + * The size of the actual packet received is specified in buf.sg[0].size + * When this callback returns, the packet buffer may be re-used for + * subsequent receives. + */ + void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + void *cb_arg; +}; + +/** + * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection + * @fdev: The FPGA device + * @attr: Attributes of the new connection + * + * Sets up a new FPGA SBU connection with the specified attributes. + * The receive callback function may be called for incoming messages even + * before this function returns. + * + * The caller must eventually destroy the connection by calling + * mlx5_fpga_sbu_conn_destroy. + * + * Return: A new connection, or ERR_PTR() error value otherwise. + */ +struct mlx5_fpga_conn * +mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr); + +/** + * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection + * @conn: The FPGA SBU connection to destroy + * + * Cleans up an FPGA SBU connection which was previously created with + * mlx5_fpga_sbu_conn_create. + */ +void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn); + +/** + * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet + * @fdev: An FPGA SBU connection + * @buf: The packet buffer + * + * Queues a packet for transmission over an FPGA SBU connection. + * The buffer should not be modified or freed until completion. + * Upon completion, the buf's complete() callback is invoked, indicating the + * success or error status of the transmission. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf); + +/** + * mlx5_fpga_mem_read() - Read from FPGA memory address space + * @fdev: The FPGA device + * @size: Size of chunk to read, in bytes + * @addr: Starting address to read from, in FPGA address space + * @buf: Buffer to read into + * @access_type: Method for reading + * + * Reads from the specified address into the specified buffer. + * The address may point to configuration space or to DDR. + * Large reads may be performed internally as several non-atomic operations. + * This function may sleep, so should not be called from atomic contexts. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type); + +/** + * mlx5_fpga_mem_write() - Write to FPGA memory address space + * @fdev: The FPGA device + * @size: Size of chunk to write, in bytes + * @addr: Starting address to write to, in FPGA address space + * @buf: Buffer which contains data to write + * @access_type: Method for writing + * + * Writes the specified buffer data to FPGA memory at the specified address. + * The address may point to configuration space or to DDR. + * Large writes may be performed internally as several non-atomic operations. + * This function may sleep, so should not be called from atomic contexts. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type); + +/** + * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities + * @fdev: The FPGA device + * @size: Size of the buffer to read into + * @buf: Buffer to read the capabilities into + * + * Reads the FPGA SBU capabilities into the specified buffer. + * The format of the capabilities buffer is SBU-dependent. + * + * Return: 0 if successful + * -EINVAL if the buffer is not large enough to contain SBU caps + * or any other error value otherwise. + */ +int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf); + +#endif /* MLX5_FPGA_SDK_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index fcec7bedd3cd..36ecc2b2e187 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -78,28 +78,33 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, MLX5_CMD_OP_CREATE_FLOW_TABLE); MLX5_SET(create_flow_table_in, in, table_type, type); - MLX5_SET(create_flow_table_in, in, level, level); - MLX5_SET(create_flow_table_in, in, log_size, log_size); + MLX5_SET(create_flow_table_in, in, flow_table_context.level, level); + MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, log_size); if (vport) { MLX5_SET(create_flow_table_in, in, vport_number, vport); MLX5_SET(create_flow_table_in, in, other_vport, 1); } - MLX5_SET(create_flow_table_in, in, decap_en, en_encap_decap); - MLX5_SET(create_flow_table_in, in, encap_en, en_encap_decap); + MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en, + en_encap_decap); + MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en, + en_encap_decap); switch (op_mod) { case FS_FT_OP_MOD_NORMAL: if (next_ft) { - MLX5_SET(create_flow_table_in, in, table_miss_mode, 1); - MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id); + MLX5_SET(create_flow_table_in, in, + flow_table_context.table_miss_action, 1); + MLX5_SET(create_flow_table_in, in, + flow_table_context.table_miss_id, next_ft->id); } break; case FS_FT_OP_MOD_LAG_DEMUX: MLX5_SET(create_flow_table_in, in, op_mod, 0x1); if (next_ft) - MLX5_SET(create_flow_table_in, in, lag_master_next_table_id, + MLX5_SET(create_flow_table_in, in, + flow_table_context.lag_master_next_table_id, next_ft->id); break; } @@ -146,10 +151,10 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID); if (next_ft) { MLX5_SET(modify_flow_table_in, in, - lag_master_next_table_id, next_ft->id); + flow_table_context.lag_master_next_table_id, next_ft->id); } else { MLX5_SET(modify_flow_table_in, in, - lag_master_next_table_id, 0); + flow_table_context.lag_master_next_table_id, 0); } } else { if (ft->vport) { @@ -160,11 +165,14 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, MLX5_SET(modify_flow_table_in, in, modify_field_select, MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); if (next_ft) { - MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1); - MLX5_SET(modify_flow_table_in, in, table_miss_id, + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_action, 1); + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_id, next_ft->id); } else { - MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0); + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_action, 0); } } @@ -232,11 +240,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, u32 *in; int err; - in = mlx5_vzalloc(inlen); - if (!in) { - mlx5_core_warn(dev, "failed to allocate inbox\n"); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) return -ENOMEM; - } MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); MLX5_SET(set_fte_in, in, op_mod, opmod); @@ -257,7 +263,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); - memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(in_match_value, &fte->val, sizeof(fte->val)); in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { @@ -287,6 +293,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, } if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, + log_max_flow_counter, + ft->type)); int list_size = 0; list_for_each_entry(dst, &fte->node.children, node.list) { @@ -299,12 +308,17 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); list_size++; } + if (list_size > max_list_size) { + err = -EINVAL; + goto err_out; + } MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, list_size); } err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +err_out: kvfree(in); return err; } @@ -353,7 +367,7 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) { u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0}; u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0}; @@ -368,7 +382,7 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) return err; } -int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id) { u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0}; u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0}; @@ -379,7 +393,7 @@ int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes) { u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) + @@ -403,14 +417,14 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, } struct mlx5_cmd_fc_bulk { - u16 id; + u32 id; int num; int outlen; u32 out[0]; }; struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num) { struct mlx5_cmd_fc_bulk *b; int outlen = @@ -447,7 +461,7 @@ mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) } void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u16 id, + struct mlx5_cmd_fc_bulk *b, u32 id, u64 *packets, u64 *bytes) { int index = id - b->id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 0f98a7cf4877..c6d7bdf255b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -74,20 +74,20 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, u32 underlay_qpn); -int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); -int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); -int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes); struct mlx5_cmd_fc_bulk; struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num); +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num); void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u16 id, + struct mlx5_cmd_fc_bulk *b, u32 id, u64 *packets, u64 *bytes); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 8f5125ccd8d4..5a7bea688ec8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -36,6 +36,7 @@ #include "mlx5_core.h" #include "fs_core.h" #include "fs_cmd.h" +#include "diag/fs_tracepoint.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -82,8 +83,8 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Vlan, mac, ttc, aRFS */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 4 +/* Vlan, mac, ttc, inner ttc, aRFS */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 5 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) @@ -104,6 +105,7 @@ struct node_caps { size_t arr_sz; long *caps; }; + static struct init_tree_node { enum fs_node_type type; struct init_tree_node *children; @@ -149,6 +151,23 @@ enum fs_i_mutex_lock_class { FS_MUTEX_CHILD }; +static const struct rhashtable_params rhash_fte = { + .key_len = FIELD_SIZEOF(struct fs_fte, val), + .key_offset = offsetof(struct fs_fte, val), + .head_offset = offsetof(struct fs_fte, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +static const struct rhashtable_params rhash_fg = { + .key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask), + .key_offset = offsetof(struct mlx5_flow_group, mask), + .head_offset = offsetof(struct mlx5_flow_group, hash), + .automatic_shrinking = true, + .min_size = 1, + +}; + static void del_rule(struct fs_node *node); static void del_flow_table(struct fs_node *node); static void del_flow_group(struct fs_node *node); @@ -254,71 +273,77 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, return NULL; } -static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size) +static bool check_last_reserved(const u32 *match_criteria) { - unsigned int i; - - for (i = 0; i < size; i++, mask++, val1++, val2++) - if ((*((u8 *)val1) & (*(u8 *)mask)) != - ((*(u8 *)val2) & (*(u8 *)mask))) - return false; + char *match_criteria_reserved = + MLX5_ADDR_OF(fte_match_param, match_criteria, MLX5_FTE_MATCH_PARAM_RESERVED); - return true; + return !match_criteria_reserved[0] && + !memcmp(match_criteria_reserved, match_criteria_reserved + 1, + MLX5_FLD_SZ_BYTES(fte_match_param, + MLX5_FTE_MATCH_PARAM_RESERVED) - 1); } -static bool compare_match_value(struct mlx5_flow_group_mask *mask, - void *fte_param1, void *fte_param2) +static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria) { - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, outer_headers); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, outer_headers); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, outer_headers); + if (match_criteria_enable & ~( + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) | + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) | + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS))) + return false; + + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, outer_headers); - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) return false; } - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, misc_parameters); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, misc_parameters); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, misc_parameters); + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, misc_parameters); - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_misc))) + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1)) return false; } - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, inner_headers); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, inner_headers); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, inner_headers); + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, inner_headers); - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) return false; } - return true; + + return check_last_reserved(match_criteria); } -static bool compare_match_criteria(u8 match_criteria_enable1, - u8 match_criteria_enable2, - void *mask1, void *mask2) +static bool check_valid_spec(const struct mlx5_flow_spec *spec) { - return match_criteria_enable1 == match_criteria_enable2 && - !memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param)); + int i; + + if (!check_valid_mask(spec->match_criteria_enable, spec->match_criteria)) { + pr_warn("mlx5_core: Match criteria given mismatches match_criteria_enable\n"); + return false; + } + + for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) + if (spec->match_value[i] & ~spec->match_criteria[i]) { + pr_warn("mlx5_core: match_value differs from match_criteria\n"); + return false; + } + + return check_last_reserved(spec->match_value); } static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) @@ -359,6 +384,8 @@ static void del_flow_table(struct fs_node *node) err = mlx5_cmd_destroy_flow_table(dev, ft); if (err) mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + ida_destroy(&ft->fte_allocator); + rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; } @@ -369,24 +396,16 @@ static void del_rule(struct fs_node *node) struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; struct fs_fte *fte; - u32 *match_value; int modify_mask; struct mlx5_core_dev *dev = get_dev(node); - int match_len = MLX5_ST_SZ_BYTES(fte_match_param); int err; bool update_fte = false; - match_value = mlx5_vzalloc(match_len); - if (!match_value) { - mlx5_core_warn(dev, "failed to allocate inbox\n"); - return; - } - fs_get_obj(rule, node); fs_get_obj(fte, rule->node.parent); fs_get_obj(fg, fte->node.parent); - memcpy(match_value, fte->val, sizeof(fte->val)); fs_get_obj(ft, fg->node.parent); + trace_mlx5_fs_del_rule(rule); list_del(&rule->node.list); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { mutex_lock(&rule->dest_attr.ft->lock); @@ -415,7 +434,18 @@ out: "%s can't del rule fg id=%d fte_index=%d\n", __func__, fg->id, fte->index); } - kvfree(match_value); +} + +static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg) +{ + struct mlx5_flow_table *ft; + int ret; + + ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte); + WARN_ON(ret); + fte->status = 0; + fs_get_obj(ft, fg->node.parent); + ida_simple_remove(&ft->fte_allocator, fte->index); } static void del_fte(struct fs_node *node) @@ -429,6 +459,7 @@ static void del_fte(struct fs_node *node) fs_get_obj(fte, node); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); + trace_mlx5_fs_del_fte(fte); dev = get_dev(&ft->node); err = mlx5_cmd_delete_fte(dev, ft, @@ -438,8 +469,7 @@ static void del_fte(struct fs_node *node) "flow steering can't delete fte in index %d of flow group id %d\n", fte->index, fg->id); - fte->status = 0; - fg->num_ftes--; + destroy_fte(fte, fg); } static void del_flow_group(struct fs_node *node) @@ -447,14 +477,21 @@ static void del_flow_group(struct fs_node *node) struct mlx5_flow_group *fg; struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; + int err; fs_get_obj(fg, node); fs_get_obj(ft, fg->node.parent); dev = get_dev(&ft->node); + trace_mlx5_fs_del_fg(fg); if (ft->autogroup.active) ft->autogroup.num_groups--; + rhashtable_destroy(&fg->ftes_hash); + err = rhltable_remove(&ft->fgs_hash, + &fg->hash, + rhash_fg); + WARN_ON(err); if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", fg->id, ft->id); @@ -489,10 +526,17 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) u8 match_criteria_enable = MLX5_GET(create_flow_group_in, create_fg_in, match_criteria_enable); + int ret; + fg = kzalloc(sizeof(*fg), GFP_KERNEL); if (!fg) return ERR_PTR(-ENOMEM); + ret = rhashtable_init(&fg->ftes_hash, &rhash_fte); + if (ret) { + kfree(fg); + return ERR_PTR(ret); + } fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, sizeof(fg->mask.match_criteria)); @@ -510,10 +554,17 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft u32 flags) { struct mlx5_flow_table *ft; + int ret; ft = kzalloc(sizeof(*ft), GFP_KERNEL); if (!ft) - return NULL; + return ERR_PTR(-ENOMEM); + + ret = rhltable_init(&ft->fgs_hash, &rhash_fg); + if (ret) { + kfree(ft); + return ERR_PTR(ret); + } ft->level = level; ft->node.type = FS_TYPE_FLOW_TABLE; @@ -524,6 +575,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->flags = flags; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); + ida_init(&ft->fte_allocator); return ft; } @@ -813,8 +865,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0, root->table_type, op_mod, ft_attr->flags); - if (!ft) { - err = -ENOMEM; + if (IS_ERR(ft)) { + err = PTR_ERR(ft); goto unlock_root; } @@ -840,6 +892,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa destroy_ft: mlx5_cmd_destroy_flow_table(root->dev, ft); free_ft: + ida_destroy(&ft->fte_allocator); kfree(ft); unlock_root: mutex_unlock(&root->chain_lock); @@ -925,11 +978,13 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * if (IS_ERR(fg)) return fg; + err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg); + if (err) + goto err_free_fg; + err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); - if (err) { - kfree(fg); - return ERR_PTR(err); - } + if (err) + goto err_remove_fg; if (ft->autogroup.active) ft->autogroup.num_groups++; @@ -939,14 +994,33 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * /* Add node to group list */ list_add(&fg->node.list, prev_fg); + trace_mlx5_fs_add_fg(fg); return fg; + +err_remove_fg: + WARN_ON(rhltable_remove(&ft->fgs_hash, + &fg->hash, + rhash_fg)); +err_free_fg: + rhashtable_destroy(&fg->ftes_hash); + kfree(fg); + + return ERR_PTR(err); } struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *fg_in) { + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + fg_in, match_criteria); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + fg_in, + match_criteria_enable); struct mlx5_flow_group *fg; + if (!check_valid_mask(match_criteria_enable, match_criteria)) + return ERR_PTR(-EINVAL); + if (ft->autogroup.active) return ERR_PTR(-EPERM); @@ -1103,43 +1177,38 @@ free_handle: return ERR_PTR(err); } -/* Assumed fg is locked */ -static unsigned int get_free_fte_index(struct mlx5_flow_group *fg, - struct list_head **prev) -{ - struct fs_fte *fte; - unsigned int start = fg->start_index; - - if (prev) - *prev = &fg->node.children; - - /* assumed list is sorted by index */ - fs_for_each_fte(fte, fg) { - if (fte->index != start) - return start; - start++; - if (prev) - *prev = &fte->node.list; - } - - return start; -} - -/* prev is output, prev->next = new_fte */ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, u32 *match_value, - struct mlx5_flow_act *flow_act, - struct list_head **prev) + struct mlx5_flow_act *flow_act) { + struct mlx5_flow_table *ft; struct fs_fte *fte; int index; + int ret; + + fs_get_obj(ft, fg->node.parent); + index = ida_simple_get(&ft->fte_allocator, fg->start_index, + fg->start_index + fg->max_ftes, + GFP_KERNEL); + if (index < 0) + return ERR_PTR(index); - index = get_free_fte_index(fg, prev); fte = alloc_fte(flow_act, match_value, index); - if (IS_ERR(fte)) - return fte; + if (IS_ERR(fte)) { + ret = PTR_ERR(fte); + goto err_alloc; + } + ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte); + if (ret) + goto err_hash; return fte; + +err_hash: + kfree(fte); +err_alloc: + ida_simple_remove(&ft->fte_allocator, index); + return ERR_PTR(ret); } static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, @@ -1157,7 +1226,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, if (!ft->autogroup.active) return ERR_PTR(-ENOENT); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return ERR_PTR(-ENOMEM); @@ -1227,79 +1296,104 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, return NULL; } +static bool check_conflicting_actions(u32 action1, u32 action2) +{ + u32 xored_actions = action1 ^ action2; + + /* if one rule only wants to count, it's ok */ + if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT || + action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT) + return false; + + if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_DECAP)) + return true; + + return false; +} + +static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) +{ + if (check_conflicting_actions(flow_act->action, fte->action)) { + mlx5_core_warn(get_dev(&fte->node), + "Found two FTEs with conflicting actions\n"); + return -EEXIST; + } + + if (fte->flow_tag != flow_act->flow_tag) { + mlx5_core_warn(get_dev(&fte->node), + "FTE flow tag %u already exists with different flow tag %u\n", + fte->flow_tag, + flow_act->flow_tag); + return -EEXIST; + } + + return 0; +} + static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, - int dest_num) + int dest_num, + struct fs_fte *fte) { struct mlx5_flow_handle *handle; struct mlx5_flow_table *ft; - struct list_head *prev; - struct fs_fte *fte; int i; - nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT); - fs_for_each_fte(fte, fg) { + if (fte) { + int old_action; + int ret; + nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); - if (compare_match_value(&fg->mask, match_value, &fte->val) && - (flow_act->action & fte->action)) { - int old_action = fte->action; - - if (fte->flow_tag != flow_act->flow_tag) { - mlx5_core_warn(get_dev(&fte->node), - "FTE flow tag %u already exists with different flow tag %u\n", - fte->flow_tag, - flow_act->flow_tag); - handle = ERR_PTR(-EEXIST); - goto unlock_fte; - } + ret = check_conflicting_ftes(fte, flow_act); + if (ret) { + handle = ERR_PTR(ret); + goto unlock_fte; + } - fte->action |= flow_act->action; - handle = add_rule_fte(fte, fg, dest, dest_num, - old_action != flow_act->action); - if (IS_ERR(handle)) { - fte->action = old_action; - goto unlock_fte; - } else { - goto add_rules; - } + old_action = fte->action; + fte->action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); + if (IS_ERR(handle)) { + fte->action = old_action; + goto unlock_fte; + } else { + trace_mlx5_fs_set_fte(fte, false); + goto add_rules; } - unlock_ref_node(&fte->node); } fs_get_obj(ft, fg->node.parent); - if (fg->num_ftes >= fg->max_ftes) { - handle = ERR_PTR(-ENOSPC); - goto unlock_fg; - } - fte = create_fte(fg, match_value, flow_act, &prev); - if (IS_ERR(fte)) { - handle = (void *)fte; - goto unlock_fg; - } + fte = create_fte(fg, match_value, flow_act); + if (IS_ERR(fte)) + return (void *)fte; tree_init_node(&fte->node, 0, del_fte); nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); handle = add_rule_fte(fte, fg, dest, dest_num, false); if (IS_ERR(handle)) { unlock_ref_node(&fte->node); + destroy_fte(fte, fg); kfree(fte); - goto unlock_fg; + return handle; } - fg->num_ftes++; - tree_add_node(&fte->node, &fg->node); - list_add(&fte->node.list, prev); + /* fte list isn't sorted */ + list_add_tail(&fte->node.list, &fg->node.children); + trace_mlx5_fs_set_fte(fte, true); add_rules: for (i = 0; i < handle->num_rules; i++) { - if (atomic_read(&handle->rule[i]->node.refcount) == 1) + if (atomic_read(&handle->rule[i]->node.refcount) == 1) { tree_add_node(&handle->rule[i]->node, &fte->node); + trace_mlx5_fs_add_rule(handle->rule[i]); + } } unlock_fte: unlock_ref_node(&fte->node); -unlock_fg: - unlock_ref_node(&fg->node); return handle; } @@ -1348,6 +1442,96 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, } static struct mlx5_flow_handle * +try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num) +{ + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT); + struct rhlist_head *tmp, *list; + struct match_list { + struct list_head list; + struct mlx5_flow_group *g; + } match_list, *iter; + LIST_HEAD(match_head); + + rcu_read_lock(); + /* Collect all fgs which has a matching match_criteria */ + list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); + rhl_for_each_entry_rcu(g, tmp, list, hash) { + struct match_list *curr_match; + + if (likely(list_empty(&match_head))) { + match_list.g = g; + list_add_tail(&match_list.list, &match_head); + continue; + } + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + + if (!curr_match) { + rcu_read_unlock(); + rule = ERR_PTR(-ENOMEM); + goto free_list; + } + curr_match->g = g; + list_add_tail(&curr_match->list, &match_head); + } + rcu_read_unlock(); + + /* Try to find a fg that already contains a matching fte */ + list_for_each_entry(iter, &match_head, list) { + struct fs_fte *fte; + + g = iter->g; + nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, + rhash_fte); + if (fte) { + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte); + unlock_ref_node(&g->node); + goto free_list; + } + unlock_ref_node(&g->node); + } + + /* No group with matching fte found. Try to add a new fte to any + * matching fg. + */ + list_for_each_entry(iter, &match_head, list) { + g = iter->g; + + nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, NULL); + if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) { + unlock_ref_node(&g->node); + goto free_list; + } + unlock_ref_node(&g->node); + } + +free_list: + if (!list_empty(&match_head)) { + struct match_list *match_tmp; + + /* The most common case is having one FG. Since we want to + * optimize this case, we save the first on the stack. + * Therefore, no need to free it. + */ + list_del(&list_first_entry(&match_head, typeof(*iter), list)->list); + list_for_each_entry_safe(iter, match_tmp, &match_head, list) { + list_del(&iter->list); + kfree(iter); + } + } + + return rule; +} + +static struct mlx5_flow_handle * _mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, @@ -1359,22 +1543,18 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; int i; + if (!check_valid_spec(spec)) + return ERR_PTR(-EINVAL); + for (i = 0; i < dest_num; i++) { if (!dest_is_valid(&dest[i], flow_act->action, ft)) return ERR_PTR(-EINVAL); } nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); - fs_for_each_fg(g, ft) - if (compare_match_criteria(g->mask.match_criteria_enable, - spec->match_criteria_enable, - g->mask.match_criteria, - spec->match_criteria)) { - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num); - if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) - goto unlock; - } + rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); + if (!IS_ERR(rule)) + goto unlock; g = create_autogroup(ft, spec->match_criteria_enable, spec->match_criteria); @@ -1383,7 +1563,8 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, goto unlock; } - rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num); + rule = add_rule_fg(g, spec->match_value, flow_act, dest, + dest_num, NULL); if (IS_ERR(rule)) { /* Remove assumes refcount > 0 and autogroup creates a group * with a refcount = 0. @@ -1777,7 +1958,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering struct mlx5_flow_namespace *ns; /* Create the root namespace */ - root_ns = mlx5_vzalloc(sizeof(*root_ns)); + root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL); if (!root_ns) return NULL; @@ -1860,7 +2041,6 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering) static int init_root_ns(struct mlx5_flow_steering *steering) { - steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); if (!steering->root_ns) goto cleanup; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 990acee6fb09..48dd78975062 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -34,6 +34,7 @@ #define _MLX5_FS_CORE_ #include <linux/mlx5/fs.h> +#include <linux/rhashtable.h> enum fs_node_type { FS_TYPE_NAMESPACE, @@ -51,6 +52,7 @@ enum fs_flow_table_type { FS_FT_FDB = 0X4, FS_FT_SNIFFER_RX = 0X5, FS_FT_SNIFFER_TX = 0X6, + FS_FT_MAX_TYPE = FS_FT_SNIFFER_TX, }; enum fs_flow_table_op_mod { @@ -118,6 +120,8 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; + struct ida fte_allocator; + struct rhltable fgs_hash; }; struct mlx5_fc_cache { @@ -136,17 +140,29 @@ struct mlx5_fc { u64 lastpackets; u64 lastbytes; - u16 id; + u32 id; bool deleted; bool aging; struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; }; +#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_600 +/* Calculate the fte_match_param length and without the reserved length. + * Make sure the reserved field is the last. + */ +#define MLX5_ST_SZ_DW_MATCH_PARAM \ + ((MLX5_BYTE_OFF(fte_match_param, MLX5_FTE_MATCH_PARAM_RESERVED) / sizeof(u32)) + \ + BUILD_BUG_ON_ZERO(MLX5_ST_SZ_BYTES(fte_match_param) != \ + MLX5_FLD_SZ_BYTES(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED) +\ + MLX5_BYTE_OFF(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED))) + /* Type of children is mlx5_flow_rule */ struct fs_fte { struct fs_node node; - u32 val[MLX5_ST_SZ_DW(fte_match_param)]; + u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 flow_tag; u32 index; @@ -155,6 +171,7 @@ struct fs_fte { u32 modify_id; enum fs_fte_status status; struct mlx5_fc *counter; + struct rhash_head hash; }; /* Type of children is mlx5_flow_table/namespace */ @@ -174,7 +191,7 @@ struct mlx5_flow_namespace { struct mlx5_flow_group_mask { u8 match_criteria_enable; - u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; + u32 match_criteria[MLX5_ST_SZ_DW_MATCH_PARAM]; }; /* Type of children is fs_fte */ @@ -183,8 +200,9 @@ struct mlx5_flow_group { struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; - u32 num_ftes; u32 id; + struct rhashtable ftes_hash; + struct rhlist_head hash; }; struct mlx5_flow_root_namespace { @@ -243,4 +261,14 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_for_each_dst(pos, fte) \ fs_list_for_each_entry(pos, &(fte)->node.children) +#define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) ( \ + (type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) : \ + (type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) : \ + (type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) : \ + (type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \ + (type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) : \ + (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \ + (BUILD_BUG_ON_ZERO(FS_FT_SNIFFER_TX != FS_FT_MAX_TYPE))\ + ) + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 6507d8acc54d..89d1f8650033 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -38,6 +38,8 @@ #include "fs_cmd.h" #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) +/* Max number of counters to query in bulk read is 32K */ +#define MLX5_SW_MAX_COUNTERS_BULK BIT(15) /* locking scheme: * @@ -90,16 +92,21 @@ static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) rb_insert_color(&counter->node, root); } +/* The function returns the last node that was queried so the caller + * function can continue calling it till all counters are queried. + */ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, struct mlx5_fc *first, - u16 last_id) + u32 last_id) { struct mlx5_cmd_fc_bulk *b; struct rb_node *node = NULL; - u16 afirst_id; + u32 afirst_id; int num; int err; - int max_bulk = 1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk); + + int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); /* first id must be aligned to 4 when using bulk query */ afirst_id = first->id & ~0x3; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 1bc14d0fded8..2c71557d1cee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -34,6 +34,7 @@ #include <linux/mlx5/cmd.h> #include <linux/module.h> #include "mlx5_core.h" +#include "../../mlxfw/mlxfw.h" static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, int outlen) @@ -119,6 +120,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_IPOIB_ENHANCED_OFFLOADS); + if (err) + return err; + } + if (MLX5_CAP_GEN(dev, pg)) { err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); if (err) @@ -195,3 +202,298 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0}; + int force_state; + int ret; + + if (!MLX5_CAP_GEN(dev, force_teardown)) { + mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + MLX5_SET(teardown_hca_in, in, profile, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE); + + ret = mlx5_cmd_exec_polling(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + force_state = MLX5_GET(teardown_hca_out, out, force_state); + if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) { + mlx5_core_err(dev, "teardown with force mode failed\n"); + return -EIO; + } + + return 0; +} + +enum mlxsw_reg_mcc_instruction { + MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01, + MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02, + MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT = 0x03, + MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT = 0x04, + MLX5_REG_MCC_INSTRUCTION_ACTIVATE = 0x06, + MLX5_REG_MCC_INSTRUCTION_CANCEL = 0x08, +}; + +static int mlx5_reg_mcc_set(struct mlx5_core_dev *dev, + enum mlxsw_reg_mcc_instruction instr, + u16 component_index, u32 update_handle, + u32 component_size) +{ + u32 out[MLX5_ST_SZ_DW(mcc_reg)]; + u32 in[MLX5_ST_SZ_DW(mcc_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(mcc_reg, in, instruction, instr); + MLX5_SET(mcc_reg, in, component_index, component_index); + MLX5_SET(mcc_reg, in, update_handle, update_handle); + MLX5_SET(mcc_reg, in, component_size, component_size); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCC, 0, 1); +} + +static int mlx5_reg_mcc_query(struct mlx5_core_dev *dev, + u32 *update_handle, u8 *error_code, + u8 *control_state) +{ + u32 out[MLX5_ST_SZ_DW(mcc_reg)]; + u32 in[MLX5_ST_SZ_DW(mcc_reg)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + MLX5_SET(mcc_reg, in, update_handle, *update_handle); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCC, 0, 0); + if (err) + goto out; + + *update_handle = MLX5_GET(mcc_reg, out, update_handle); + *error_code = MLX5_GET(mcc_reg, out, error_code); + *control_state = MLX5_GET(mcc_reg, out, control_state); + +out: + return err; +} + +static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev, + u32 update_handle, + u32 offset, u16 size, + u8 *data) +{ + int err, in_size = MLX5_ST_SZ_BYTES(mcda_reg) + size; + u32 out[MLX5_ST_SZ_DW(mcda_reg)]; + int i, j, dw_size = size >> 2; + __be32 data_element; + u32 *in; + + in = kzalloc(in_size, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(mcda_reg, in, update_handle, update_handle); + MLX5_SET(mcda_reg, in, offset, offset); + MLX5_SET(mcda_reg, in, size, size); + + for (i = 0; i < dw_size; i++) { + j = i * 4; + data_element = htonl(*(u32 *)&data[j]); + memcpy(MLX5_ADDR_OF(mcda_reg, in, data) + j, &data_element, 4); + } + + err = mlx5_core_access_reg(dev, in, in_size, out, + sizeof(out), MLX5_REG_MCDA, 0, 1); + kfree(in); + return err; +} + +static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev, + u16 component_index, + u32 *max_component_size, + u8 *log_mcda_word_size, + u16 *mcda_max_write_size) +{ + u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)]; + int offset = MLX5_ST_SZ_DW(mcqi_reg); + u32 in[MLX5_ST_SZ_DW(mcqi_reg)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(mcqi_reg, in, component_index, component_index); + MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap)); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCQI, 0, 0); + if (err) + goto out; + + *max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size); + *log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size); + *mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size); + +out: + return err; +} + +struct mlx5_mlxfw_dev { + struct mlxfw_dev mlxfw_dev; + struct mlx5_core_dev *mlx5_core_dev; +}; + +static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev, + u16 component_index, u32 *p_max_size, + u8 *p_align_bits, u16 *p_max_write_size) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcqi_query(dev, component_index, p_max_size, + p_align_bits, p_max_write_size); +} + +static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u8 control_state, error_code; + int err; + + *fwhandle = 0; + err = mlx5_reg_mcc_query(dev, fwhandle, &error_code, &control_state); + if (err) + return err; + + if (control_state != MLXFW_FSM_STATE_IDLE) + return -EBUSY; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, + 0, *fwhandle, 0); +} + +static int mlx5_fsm_component_update(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index, u32 component_size) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT, + component_index, fwhandle, component_size); +} + +static int mlx5_fsm_block_download(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u8 *data, u16 size, u32 offset) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcda_set(dev, fwhandle, offset, size, data); +} + +static int mlx5_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT, + component_index, fwhandle, 0); +} + +static int mlx5_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_ACTIVATE, 0, + fwhandle, 0); +} + +static int mlx5_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u8 control_state, error_code; + int err; + + err = mlx5_reg_mcc_query(dev, &fwhandle, &error_code, &control_state); + if (err) + return err; + + *fsm_state = control_state; + *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, + MLXFW_FSM_STATE_ERR_MAX); + return 0; +} + +static void mlx5_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_CANCEL, 0, fwhandle, 0); +} + +static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0, + fwhandle, 0); +} + +static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { + .component_query = mlx5_component_query, + .fsm_lock = mlx5_fsm_lock, + .fsm_component_update = mlx5_fsm_component_update, + .fsm_block_download = mlx5_fsm_block_download, + .fsm_component_verify = mlx5_fsm_component_verify, + .fsm_activate = mlx5_fsm_activate, + .fsm_query_state = mlx5_fsm_query_state, + .fsm_cancel = mlx5_fsm_cancel, + .fsm_release = mlx5_fsm_release +}; + +int mlx5_firmware_flash(struct mlx5_core_dev *dev, + const struct firmware *firmware) +{ + struct mlx5_mlxfw_dev mlx5_mlxfw_dev = { + .mlxfw_dev = { + .ops = &mlx5_mlxfw_dev_ops, + .psid = dev->board_id, + .psid_size = strlen(dev->board_id), + }, + .mlx5_core_dev = dev + }; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || + !MLX5_CAP_MCAM_REG(dev, mcqi) || + !MLX5_CAP_MCAM_REG(dev, mcc) || + !MLX5_CAP_MCAM_REG(dev, mcda)) { + pr_info("%s flashing isn't supported by the running FW\n", __func__); + return -EOPNOTSUPP; + } + + return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, firmware); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index f27f84ffbc85..db86e1506c8b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -67,6 +67,7 @@ enum { enum { MLX5_DROP_NEW_HEALTH_WORK, + MLX5_DROP_NEW_RECOVERY_WORK, }; static u8 get_nic_state(struct mlx5_core_dev *dev) @@ -80,7 +81,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev) u64 vector; /* wait for pending handlers to complete */ - synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector); + synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD)); spin_lock_irqsave(&dev->cmd.alloc_lock, flags); vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); if (!vector) @@ -111,14 +112,14 @@ static int in_fatal(struct mlx5_core_dev *dev) return 0; } -void mlx5_enter_error_state(struct mlx5_core_dev *dev) +void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) { mutex_lock(&dev->intf_state_mutex); if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto unlock; mlx5_core_err(dev, "start\n"); - if (pci_channel_offline(dev->pdev) || in_fatal(dev)) { + if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; trigger_cmd_completions(dev); } @@ -185,6 +186,7 @@ static void health_care(struct work_struct *work) struct mlx5_core_health *health; struct mlx5_core_dev *dev; struct mlx5_priv *priv; + unsigned long flags; health = container_of(work, struct mlx5_core_health, work); priv = container_of(health, struct mlx5_priv, health); @@ -192,13 +194,13 @@ static void health_care(struct work_struct *work) mlx5_core_warn(dev, "handling bad device here\n"); mlx5_handle_bad_state(dev); - spin_lock(&health->wq_lock); - if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) + spin_lock_irqsave(&health->wq_lock, flags); + if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) schedule_delayed_work(&health->recover_work, recover_delay); else dev_err(&dev->pdev->dev, "new health works are not permitted at this stage\n"); - spin_unlock(&health->wq_lock); + spin_unlock_irqrestore(&health->wq_lock, flags); } static const char *hsynd_str(u8 synd) @@ -269,6 +271,20 @@ static unsigned long get_next_poll_jiffies(void) return next; } +void mlx5_trigger_health_work(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + spin_lock_irqsave(&health->wq_lock, flags); + if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) + queue_work(health->wq, &health->work); + else + dev_err(&dev->pdev->dev, + "new health works are not permitted at this stage\n"); + spin_unlock_irqrestore(&health->wq_lock, flags); +} + static void poll_health(unsigned long data) { struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; @@ -293,13 +309,7 @@ static void poll_health(unsigned long data) if (in_fatal(dev) && !health->sick) { health->sick = true; print_health_info(dev); - spin_lock(&health->wq_lock); - if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) - queue_work(health->wq, &health->work); - else - dev_err(&dev->pdev->dev, - "new health works are not permitted at this stage\n"); - spin_unlock(&health->wq_lock); + mlx5_trigger_health_work(dev); } out: @@ -313,6 +323,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) init_timer(&health->timer); health->sick = 0; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; @@ -332,14 +343,27 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev) void mlx5_drain_health_wq(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; - spin_lock(&health->wq_lock); + spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - spin_unlock(&health->wq_lock); + set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + spin_unlock_irqrestore(&health->wq_lock, flags); cancel_delayed_work_sync(&health->recover_work); cancel_work_sync(&health->work); } +void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + spin_lock_irqsave(&health->wq_lock, flags); + set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + spin_unlock_irqrestore(&health->wq_lock, flags); + cancel_delayed_work_sync(&dev->priv.health.recover_work); +} + void mlx5_health_cleanup(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile new file mode 100644 index 000000000000..d8e17110f25d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c new file mode 100644 index 000000000000..43c126c63955 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" +#include "ipoib.h" + +static void mlx5i_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_drvinfo(priv, drvinfo); + strlcpy(drvinfo->driver, DRIVER_NAME "[ib_ipoib]", + sizeof(drvinfo->driver)); +} + +static void mlx5i_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_strings(priv, stringset, data); +} + +static int mlx5i_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_get_sset_count(priv, sset); +} + +static void mlx5i_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + +static int mlx5i_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_set_ringparam(priv, param); +} + +static void mlx5i_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_ringparam(priv, param); +} + +static int mlx5i_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_set_channels(priv, ch); +} + +static void mlx5i_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +static int mlx5i_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal); +} + +static int mlx5i_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal); +} + +static int mlx5i_get_ts_info(struct net_device *netdev, + struct ethtool_ts_info *info) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_get_ts_info(priv, info); +} + +static int mlx5i_flash_device(struct net_device *netdev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + +enum mlx5_ptys_width { + MLX5_PTYS_WIDTH_1X = 1 << 0, + MLX5_PTYS_WIDTH_2X = 1 << 1, + MLX5_PTYS_WIDTH_4X = 1 << 2, + MLX5_PTYS_WIDTH_8X = 1 << 3, + MLX5_PTYS_WIDTH_12X = 1 << 4, +}; + +static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width) +{ + switch (width) { + case MLX5_PTYS_WIDTH_1X: return 1; + case MLX5_PTYS_WIDTH_2X: return 2; + case MLX5_PTYS_WIDTH_4X: return 4; + case MLX5_PTYS_WIDTH_8X: return 8; + case MLX5_PTYS_WIDTH_12X: return 12; + default: return -1; + } +} + +enum mlx5_ptys_rate { + MLX5_PTYS_RATE_SDR = 1 << 0, + MLX5_PTYS_RATE_DDR = 1 << 1, + MLX5_PTYS_RATE_QDR = 1 << 2, + MLX5_PTYS_RATE_FDR10 = 1 << 3, + MLX5_PTYS_RATE_FDR = 1 << 4, + MLX5_PTYS_RATE_EDR = 1 << 5, + MLX5_PTYS_RATE_HDR = 1 << 6, +}; + +static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) +{ + switch (rate) { + case MLX5_PTYS_RATE_SDR: return 2500; + case MLX5_PTYS_RATE_DDR: return 5000; + case MLX5_PTYS_RATE_QDR: + case MLX5_PTYS_RATE_FDR10: return 10000; + case MLX5_PTYS_RATE_FDR: return 14000; + case MLX5_PTYS_RATE_EDR: return 25000; + case MLX5_PTYS_RATE_HDR: return 50000; + default: return -1; + } +} + +static int mlx5i_get_port_settings(struct net_device *netdev, + u16 *ib_link_width_oper, u16 *ib_proto_oper) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; + int ret; + + ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1); + if (ret) + return ret; + + *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); + *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} + +static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) +{ + int rate, width; + + rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper); + if (rate < 0) + return -EINVAL; + width = mlx5_ptys_width_enum_to_int(ib_link_width_oper); + if (width < 0) + return -EINVAL; + + return rate * width; +} + +static int mlx5i_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + u16 ib_link_width_oper; + u16 ib_proto_oper; + int speed, ret; + + ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper); + if (ret) + return ret; + + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper); + if (speed < 0) + return -EINVAL; + + link_ksettings->base.duplex = DUPLEX_FULL; + link_ksettings->base.port = PORT_OTHER; + + link_ksettings->base.autoneg = AUTONEG_DISABLE; + + link_ksettings->base.speed = speed; + + return 0; +} + +const struct ethtool_ops mlx5i_ethtool_ops = { + .get_drvinfo = mlx5i_get_drvinfo, + .get_strings = mlx5i_get_strings, + .get_sset_count = mlx5i_get_sset_count, + .get_ethtool_stats = mlx5i_get_ethtool_stats, + .get_ringparam = mlx5i_get_ringparam, + .set_ringparam = mlx5i_set_ringparam, + .flash_device = mlx5i_flash_device, + .get_channels = mlx5i_get_channels, + .set_channels = mlx5i_set_channels, + .get_coalesce = mlx5i_get_coalesce, + .set_coalesce = mlx5i_set_coalesce, + .get_ts_info = mlx5i_get_ts_info, + .get_link_ksettings = mlx5i_get_link_ksettings, + .get_link = ethtool_op_get_link, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index cc1858752e70..145e392ab849 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -36,20 +36,38 @@ #include "ipoib.h" #define IB_DEFAULT_Q_KEY 0xb1b +#define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9 static int mlx5i_open(struct net_device *netdev); static int mlx5i_close(struct net_device *netdev); static int mlx5i_dev_init(struct net_device *dev); static void mlx5i_dev_cleanup(struct net_device *dev); +static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu); +static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); static const struct net_device_ops mlx5i_netdev_ops = { .ndo_open = mlx5i_open, .ndo_stop = mlx5i_close, .ndo_init = mlx5i_dev_init, .ndo_uninit = mlx5i_dev_cleanup, + .ndo_change_mtu = mlx5i_change_mtu, + .ndo_do_ioctl = mlx5i_ioctl, }; /* IPoIB mlx5 netdev profile */ +static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ + mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); + + /* RQ size in ipoib by default is 512 */ + params->log_rq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE; + + params->lro_en = false; +} /* Called directly after IPoIB netdevice was created to initialize SW structs */ static void mlx5i_init(struct mlx5_core_dev *mdev, @@ -59,19 +77,18 @@ static void mlx5i_init(struct mlx5_core_dev *mdev, { struct mlx5e_priv *priv = mlx5i_epriv(netdev); + /* priv init */ priv->mdev = mdev; priv->netdev = netdev; priv->profile = profile; priv->ppriv = ppriv; + priv->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; + mutex_init(&priv->state_lock); mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); + mlx5i_build_nic_params(mdev, &priv->channels.params); - /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ - mlx5e_set_rq_type_params(mdev, &priv->channels.params, MLX5_WQ_TYPE_LINKED_LIST); - priv->channels.params.lro_en = false; - - mutex_init(&priv->state_lock); - + /* netdev init */ netdev->hw_features |= NETIF_F_SG; netdev->hw_features |= NETIF_F_IP_CSUM; netdev->hw_features |= NETIF_F_IPV6_CSUM; @@ -82,6 +99,7 @@ static void mlx5i_init(struct mlx5_core_dev *mdev, netdev->hw_features |= NETIF_F_RXHASH; netdev->netdev_ops = &mlx5i_netdev_ops; + netdev->ethtool_ops = &mlx5i_ethtool_ops; } /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ @@ -102,7 +120,7 @@ static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core void *qpc; inlen = MLX5_ST_SZ_BYTES(create_qp_in); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -160,8 +178,6 @@ out: static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) { - mlx5_fs_remove_rx_underlay_qpn(mdev, qp->qpn); - mlx5_core_destroy_qp(mdev, qp); } @@ -176,8 +192,6 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) return err; } - mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); - err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); @@ -235,6 +249,7 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) static int mlx5i_init_rx(struct mlx5e_priv *priv) { + struct mlx5i_priv *ipriv = priv->ppriv; int err; err = mlx5e_create_indirect_rqt(priv); @@ -253,12 +268,18 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_indirect_tirs; - err = mlx5i_create_flow_steering(priv); + err = mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); if (err) goto err_destroy_direct_tirs; + err = mlx5i_create_flow_steering(priv); + if (err) + goto err_remove_rx_underlay_qpn; + return 0; +err_remove_rx_underlay_qpn: + mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_indirect_tirs: @@ -272,6 +293,9 @@ err_destroy_indirect_rqts: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); mlx5i_destroy_flow_steering(priv); mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_indirect_tirs(priv); @@ -290,6 +314,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .disable = NULL, /* mlx5i_disable */ .update_stats = NULL, /* mlx5i_update_stats */ .max_nch = mlx5e_get_max_num_channels, + .update_carrier = NULL, /* no HW update in IB link */ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ .max_tc = MLX5I_MAX_NUM_TC, @@ -297,6 +322,35 @@ static const struct mlx5e_profile mlx5i_nic_profile = { /* mlx5i netdev NDos */ +static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_channels new_channels = {}; + int curr_mtu; + int err = 0; + + mutex_lock(&priv->state_lock); + + curr_mtu = netdev->mtu; + netdev->mtu = new_mtu; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + + new_channels.params = priv->channels.params; + err = mlx5e_open_channels(priv, &new_channels); + if (err) { + netdev->mtu = curr_mtu; + goto out; + } + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + +out: + mutex_unlock(&priv->state_lock); + return err; +} + static int mlx5i_dev_init(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); @@ -310,6 +364,20 @@ static int mlx5i_dev_init(struct net_device *dev) return 0; } +static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlx5e_hwstamp_set(priv, ifr); + case SIOCGHWTSTAMP: + return mlx5e_hwstamp_get(priv, ifr); + default: + return -EOPNOTSUPP; + } +} + static void mlx5i_dev_cleanup(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); @@ -336,6 +404,8 @@ static int mlx5i_open(struct net_device *netdev) mlx5e_refresh_tirs(priv, false); mlx5e_activate_priv_channels(priv); + mlx5e_timestamp_init(priv); + mutex_unlock(&priv->state_lock); return 0; @@ -359,6 +429,7 @@ static int mlx5i_close(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); + mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); mlx5e_deactivate_priv_channels(priv); mlx5e_close_channels(&priv->channels); @@ -501,13 +572,13 @@ void mlx5_rdma_netdev_free(struct net_device *netdev) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); const struct mlx5e_profile *profile = priv->profile; + struct mlx5_core_dev *mdev = priv->mdev; mlx5e_detach_netdev(priv); profile->cleanup(priv); destroy_workqueue(priv->wq); free_netdev(netdev); - mlx5e_destroy_mdev_resources(priv->mdev); + mlx5e_destroy_mdev_resources(mdev); } EXPORT_SYMBOL(mlx5_rdma_netdev_free); - diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 213191a78464..a0f405f520f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -38,6 +38,13 @@ #define MLX5I_MAX_NUM_TC 1 +extern const struct ethtool_ops mlx5i_ethtool_ops; + +#define MLX5_IB_GRH_BYTES 40 +#define MLX5_IPOIB_ENCAP_LEN 4 +#define MLX5_IPOIB_PSEUDO_LEN 20 +#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN) + /* ipoib rdma netdev's private data structure */ struct mlx5i_priv { struct rdma_netdev rn; /* keep this first */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index b5d5519542e8..f26f97fe4666 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -61,6 +61,11 @@ struct mlx5_lag { struct lag_tracker tracker; struct delayed_work bond_work; struct notifier_block nb; + + /* Admin state. Allow lag only if allowed is true + * even if network conditions for lag were met + */ + bool allowed; }; /* General purpose, use for short periods of time. @@ -157,22 +162,17 @@ static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev) static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, u8 *port1, u8 *port2) { - if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { - if (tracker->netdev_state[0].tx_enabled) { - *port1 = 1; - *port2 = 1; - } else { - *port1 = 2; - *port2 = 2; - } - } else { - *port1 = 1; - *port2 = 2; - if (!tracker->netdev_state[0].link_up) - *port1 = 2; - else if (!tracker->netdev_state[1].link_up) - *port2 = 1; + *port1 = 1; + *port2 = 2; + if (!tracker->netdev_state[0].tx_enabled || + !tracker->netdev_state[0].link_up) { + *port1 = 2; + return; } + + if (!tracker->netdev_state[1].tx_enabled || + !tracker->netdev_state[1].link_up) + *port2 = 1; } static void mlx5_activate_lag(struct mlx5_lag *ldev, @@ -214,6 +214,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) struct lag_tracker tracker; u8 v2p_port1, v2p_port2; int i, err; + bool do_bond; if (!dev0 || !dev1) return; @@ -222,13 +223,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) tracker = ldev->tracker; mutex_unlock(&lag_mutex); - if (tracker.is_bonded && !mlx5_lag_is_bonded(ldev)) { - if (mlx5_sriov_is_enabled(dev0) || - mlx5_sriov_is_enabled(dev1)) { - mlx5_core_warn(dev0, "LAG is not supported with SRIOV"); - return; - } + do_bond = tracker.is_bonded && ldev->allowed; + if (do_bond && !mlx5_lag_is_bonded(ldev)) { for (i = 0; i < MLX5_MAX_PORTS; i++) mlx5_remove_dev_by_protocol(ldev->pf[i].dev, MLX5_INTERFACE_PROTOCOL_IB); @@ -237,7 +234,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_nic_vport_enable_roce(dev1); - } else if (tracker.is_bonded && mlx5_lag_is_bonded(ldev)) { + } else if (do_bond && mlx5_lag_is_bonded(ldev)) { mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1, &v2p_port2); @@ -252,7 +249,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) "Failed to modify LAG (%d)\n", err); } - } else if (!tracker.is_bonded && mlx5_lag_is_bonded(ldev)) { + } else if (!do_bond && mlx5_lag_is_bonded(ldev)) { mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_nic_vport_disable_roce(dev1); @@ -411,6 +408,15 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, return NOTIFY_DONE; } +static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +{ + if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) || + (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev))) + return false; + else + return true; +} + static struct mlx5_lag *mlx5_lag_dev_alloc(void) { struct mlx5_lag *ldev; @@ -420,6 +426,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void) return NULL; INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); + ldev->allowed = mlx5_lag_check_prereq(ldev); return ldev; } @@ -444,7 +451,9 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, ldev->tracker.netdev_state[fn].link_up = 0; ldev->tracker.netdev_state[fn].tx_enabled = 0; + ldev->allowed = mlx5_lag_check_prereq(ldev); dev->priv.lag = ldev; + mutex_unlock(&lag_mutex); } @@ -464,10 +473,10 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); dev->priv.lag = NULL; + ldev->allowed = mlx5_lag_check_prereq(ldev); mutex_unlock(&lag_mutex); } - /* Must be called with intf_mutex held */ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) { @@ -543,6 +552,44 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_active); +static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow) +{ + struct mlx5_lag *ldev; + int ret = 0; + bool lag_active; + + mlx5_dev_list_lock(); + + ldev = mlx5_lag_dev_get(dev); + if (!ldev) { + ret = -ENODEV; + goto unlock; + } + lag_active = mlx5_lag_is_bonded(ldev); + if (!mlx5_lag_check_prereq(ldev) && allow) { + ret = -EINVAL; + goto unlock; + } + if (ldev->allowed == allow) + goto unlock; + ldev->allowed = allow; + if ((lag_active && !allow) || allow) + mlx5_do_bond(ldev); +unlock: + mlx5_dev_list_unlock(); + return ret; +} + +int mlx5_lag_forbid(struct mlx5_core_dev *dev) +{ + return mlx5_lag_set_state(dev, false); +} + +int mlx5_lag_allow(struct mlx5_core_dev *dev) +{ + return mlx5_lag_set_state(dev, true); +} + struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) { struct net_device *ndev = NULL; @@ -586,4 +633,3 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) /* If bonded, we do not add an IB device for PF1. */ return false; } - diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile new file mode 100644 index 000000000000..d8e17110f25d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c new file mode 100644 index 000000000000..573f59f46d41 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/etherdevice.h> +#include <linux/idr.h> +#include "mlx5_core.h" +#include "lib/mlx5.h" + +void mlx5_init_reserved_gids(struct mlx5_core_dev *dev) +{ + unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size); + + ida_init(&dev->roce.reserved_gids.ida); + dev->roce.reserved_gids.start = tblsz; + dev->roce.reserved_gids.count = 0; +} + +void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev) +{ + WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida)); + dev->roce.reserved_gids.start = 0; + dev->roce.reserved_gids.count = 0; + ida_destroy(&dev->roce.reserved_gids.ida); +} + +int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count) +{ + if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n"); + return -EPERM; + } + if (dev->roce.reserved_gids.start < count) { + mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n", + count); + return -ENOMEM; + } + if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) { + mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count); + return -ENOMEM; + } + + dev->roce.reserved_gids.start -= count; + dev->roce.reserved_gids.count += count; + mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n", + dev->roce.reserved_gids.count, + dev->roce.reserved_gids.start); + return 0; +} + +void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count) +{ + WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up"); + WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved", + count, dev->roce.reserved_gids.count); + + dev->roce.reserved_gids.start += count; + dev->roce.reserved_gids.count -= count; + mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n", + dev->roce.reserved_gids.count, + dev->roce.reserved_gids.start); +} + +int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index) +{ + int end = dev->roce.reserved_gids.start + + dev->roce.reserved_gids.count; + int index = 0; + + index = ida_simple_get(&dev->roce.reserved_gids.ida, + dev->roce.reserved_gids.start, end, + GFP_KERNEL); + if (index < 0) + return index; + + mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index); + *gid_index = index; + return 0; +} + +void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index) +{ + mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index); + ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index); +} + +unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev) +{ + return dev->roce.reserved_gids.count; +} +EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count); + +int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, + u8 roce_version, u8 roce_l3_type, const u8 *gid, + const u8 *mac, bool vlan, u16 vlan_id) +{ +#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) + u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0}; + void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); + char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr, + source_l3_address); + void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr, + source_mac_47_32); + int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address); + + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EINVAL; + + if (gid) { + if (vlan) { + MLX5_SET_RA(in_addr, vlan_valid, 1); + MLX5_SET_RA(in_addr, vlan_id, vlan_id); + } + + ether_addr_copy(addr_mac, mac); + MLX5_SET_RA(in_addr, roce_version, roce_version); + MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type); + memcpy(addr_l3_addr, gid, gidsz); + } + + MLX5_SET(set_roce_address_in, in, roce_address_index, index); + MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_roce_gid_set); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h new file mode 100644 index 000000000000..7550b1cc8c6a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_MLX5_H__ +#define __LIB_MLX5_H__ + +void mlx5_init_reserved_gids(struct mlx5_core_dev *dev); +void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev); +int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); +void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); +int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); +void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c new file mode 100644 index 000000000000..7cb67122e8b5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include "mlx5_core.h" +#include "lib/mpfs.h" + +/* HW L2 Table (MPFS) management */ +static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0}; + u8 *in_mac_addr; + + MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0}; + + MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +/* UC L2 table hash node */ +struct l2table_node { + struct l2addr_node node; + u32 index; /* index in HW l2 table */ +}; + +struct mlx5_mpfs { + struct hlist_head hash[MLX5_L2_ADDR_HASH_SIZE]; + struct mutex lock; /* Synchronize l2 table access */ + u32 size; + unsigned long *bitmap; +}; + +static int alloc_l2table_index(struct mlx5_mpfs *l2table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2table->bitmap, l2table->size); + if (*ix >= l2table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2table->bitmap); + + return err; +} + +static void free_l2table_index(struct mlx5_mpfs *l2table, u32 ix) +{ + __clear_bit(ix, l2table->bitmap); +} + +int mlx5_mpfs_init(struct mlx5_core_dev *dev) +{ + int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + struct mlx5_mpfs *mpfs; + + if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); + if (!mpfs) + return -ENOMEM; + + mutex_init(&mpfs->lock); + mpfs->size = l2table_size; + mpfs->bitmap = kcalloc(BITS_TO_LONGS(l2table_size), + sizeof(uintptr_t), GFP_KERNEL); + if (!mpfs->bitmap) { + kfree(mpfs); + return -ENOMEM; + } + + dev->priv.mpfs = mpfs; + return 0; +} + +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + + if (!MLX5_VPORT_MANAGER(dev)) + return; + + WARN_ON(!hlist_empty(mpfs->hash)); + kfree(mpfs->bitmap); + kfree(mpfs); +} + +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + u32 index; + int err; + + if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (l2addr) { + err = -EEXIST; + goto abort; + } + + err = alloc_l2table_index(mpfs, &index); + if (err) + goto abort; + + l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); + if (!l2addr) { + free_l2table_index(mpfs, index); + err = -ENOMEM; + goto abort; + } + + l2addr->index = index; + err = set_l2table_entry_cmd(dev, index, mac); + if (err) { + l2addr_hash_del(l2addr); + free_l2table_index(mpfs, index); + } + + mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); +abort: + mutex_unlock(&mpfs->lock); + return err; +} + +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + int err = 0; + u32 index; + + if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (!l2addr) { + err = -ENOENT; + goto unlock; + } + + index = l2addr->index; + del_l2table_entry_cmd(dev, index); + l2addr_hash_del(l2addr); + free_l2table_index(mpfs, index); + mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index); +unlock: + mutex_unlock(&mpfs->lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h new file mode 100644 index 000000000000..4a7b2c3203a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_MPFS_H__ +#define __MLX5_MPFS_H__ + +#include <linux/if_ether.h> +#include <linux/mlx5/device.h> + +/* L2 -mac address based- hash helpers */ +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &(hash)[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &(hash)[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&(ptr)->node.hlist); \ + kfree(ptr); \ +}) + +#ifdef CONFIG_MLX5_MPFS +int mlx5_mpfs_init(struct mlx5_core_dev *dev); +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); +#else /* #ifndef CONFIG_MLX5_MPFS */ +static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} +static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +#endif +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 13be264587f1..0d2c8dcd6eae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -47,15 +47,18 @@ #include <linux/debugfs.h> #include <linux/kmod.h> #include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> #ifdef CONFIG_RFS_ACCEL #include <linux/cpu_rmap.h> #endif #include <net/devlink.h> #include "mlx5_core.h" #include "fs_core.h" -#ifdef CONFIG_MLX5_CORE_EN +#include "lib/mpfs.h" #include "eswitch.h" -#endif +#include "lib/mlx5.h" +#include "fpga/core.h" +#include "accel/ipsec.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -309,13 +312,15 @@ static void release_bar(struct pci_dev *pdev) pci_release_regions(pdev); } -static int mlx5_enable_msix(struct mlx5_core_dev *dev) +static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_eq_table *table = &priv->eq_table; + struct irq_affinity irqdesc = { + .pre_vectors = MLX5_EQ_VEC_COMP_BASE, + }; int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); int nvec; - int i; nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE; @@ -323,17 +328,14 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; - priv->msix_arr = kcalloc(nvec, sizeof(*priv->msix_arr), GFP_KERNEL); - priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); - if (!priv->msix_arr || !priv->irq_info) + if (!priv->irq_info) goto err_free_msix; - for (i = 0; i < nvec; i++) - priv->msix_arr[i].entry = i; - - nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr, - MLX5_EQ_VEC_COMP_BASE + 1, nvec); + nvec = pci_alloc_irq_vectors_affinity(dev->pdev, + MLX5_EQ_VEC_COMP_BASE + 1, nvec, + PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, + &irqdesc); if (nvec < 0) return nvec; @@ -343,25 +345,22 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) err_free_msix: kfree(priv->irq_info); - kfree(priv->msix_arr); return -ENOMEM; } -static void mlx5_disable_msix(struct mlx5_core_dev *dev) +static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; - pci_disable_msix(dev->pdev); + pci_free_irq_vectors(dev->pdev); kfree(priv->irq_info); - kfree(priv->msix_arr); } -struct mlx5_reg_host_endianess { +struct mlx5_reg_host_endianness { u8 he; u8 rsvd[15]; }; - #define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos)) enum { @@ -475,7 +474,7 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) req_endianness = MLX5_CAP_ATOMIC(dev, - supported_atomic_req_8B_endianess_mode_1); + supported_atomic_req_8B_endianness_mode_1); if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS) return 0; @@ -487,7 +486,7 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); /* Set requestor to host endianness */ - MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode, + MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode, MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS); err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC); @@ -562,8 +561,8 @@ query_ex: static int set_hca_ctrl(struct mlx5_core_dev *dev) { - struct mlx5_reg_host_endianess he_in; - struct mlx5_reg_host_endianess he_out; + struct mlx5_reg_host_endianness he_in; + struct mlx5_reg_host_endianness he_out; int err; if (!mlx5_core_is_pf(dev)) @@ -577,6 +576,18 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) return err; } +static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev) +{ + int ret = 0; + + /* Disable local_lb by default */ + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + MLX5_CAP_GEN(dev, disable_local_lb)) + ret = mlx5_nic_vport_update_local_lb(dev, false); + + return ret; +} + int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) { u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0}; @@ -610,65 +621,6 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) return (u64)timer_l | (u64)timer_h1 << 32; } -static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - struct msix_entry *msix = priv->msix_arr; - int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - - if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[i].mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[i].mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - struct msix_entry *msix = priv->msix_arr; - int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[i].mask); -} - -static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) -{ - int err; - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { - err = mlx5_irq_set_affinity_hint(mdev, i); - if (err) - goto err_out; - } - - return 0; - -err_out: - for (i--; i >= 0; i--) - mlx5_irq_clear_affinity_hint(mdev, i); - - return err; -} - -static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) -{ - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) - mlx5_irq_clear_affinity_hint(mdev, i); -} - int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn) { @@ -758,8 +710,8 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) } #ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(dev->rmap, - dev->priv.msix_arr[i + MLX5_EQ_VEC_COMP_BASE].vector); + irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev, + MLX5_EQ_VEC_COMP_BASE + i)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, @@ -835,7 +787,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return -EOPNOTSUPP; } - static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { struct pci_dev *pdev = dev->pdev; @@ -936,19 +887,25 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_mkey_table(dev); + mlx5_init_reserved_gids(dev); + err = mlx5_init_rl_table(dev); if (err) { dev_err(&pdev->dev, "Failed to init rate limiting\n"); goto err_tables_cleanup; } -#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_mpfs_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init l2 table %d\n", err); + goto err_rl_cleanup; + } + err = mlx5_eswitch_init(dev); if (err) { dev_err(&pdev->dev, "Failed to init eswitch %d\n", err); - goto err_rl_cleanup; + goto err_mpfs_cleanup; } -#endif err = mlx5_sriov_init(dev); if (err) { @@ -956,16 +913,22 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_eswitch_cleanup; } + err = mlx5_fpga_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init fpga device %d\n", err); + goto err_sriov_cleanup; + } + return 0; +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); err_eswitch_cleanup: -#ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); - +err_mpfs_cleanup: + mlx5_mpfs_cleanup(dev); err_rl_cleanup: -#endif mlx5_cleanup_rl_table(dev); - err_tables_cleanup: mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); @@ -981,11 +944,12 @@ out: static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { + mlx5_fpga_cleanup(dev); mlx5_sriov_cleanup(dev); -#ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); -#endif + mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); + mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); @@ -1020,7 +984,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (err) { dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n", FW_PRE_INIT_TIMEOUT_MILI); - goto out; + goto out_err; } err = mlx5_cmd_init(dev); @@ -1105,9 +1069,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_stop_poll; } - err = mlx5_enable_msix(dev); + err = mlx5_alloc_irq_vectors(dev); if (err) { - dev_err(&pdev->dev, "enable msix failed\n"); + dev_err(&pdev->dev, "alloc irq vectors failed\n"); goto err_cleanup_once; } @@ -1129,21 +1093,17 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_stop_eqs; } - err = mlx5_irq_set_affinity_hints(dev); - if (err) { - dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - goto err_affinity_hints; - } - err = mlx5_init_fs(dev); if (err) { dev_err(&pdev->dev, "Failed to init flow steering\n"); goto err_fs; } -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_attach(dev->priv.eswitch); -#endif + err = mlx5_core_set_hca_defaults(dev); + if (err) { + dev_err(&pdev->dev, "Failed to set hca defaults\n"); + goto err_fs; + } err = mlx5_sriov_attach(dev); if (err) { @@ -1151,6 +1111,17 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_sriov; } + err = mlx5_fpga_device_start(dev); + if (err) { + dev_err(&pdev->dev, "fpga device start failed %d\n", err); + goto err_fpga_start; + } + err = mlx5_accel_ipsec_init(dev); + if (err) { + dev_err(&pdev->dev, "IPSec device start failed %d\n", err); + goto err_ipsec_start; + } + if (mlx5_device_registered(dev)) { mlx5_attach_device(dev); } else { @@ -1161,7 +1132,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, } } - clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: mutex_unlock(&dev->intf_state_mutex); @@ -1169,18 +1139,17 @@ out: return 0; err_reg_dev: + mlx5_accel_ipsec_cleanup(dev); +err_ipsec_start: + mlx5_fpga_device_stop(dev); + +err_fpga_start: mlx5_sriov_detach(dev); err_sriov: -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_detach(dev->priv.eswitch); -#endif mlx5_cleanup_fs(dev); err_fs: - mlx5_irq_clear_affinity_hints(dev); - -err_affinity_hints: free_comp_eqs(dev); err_stop_eqs: @@ -1190,7 +1159,7 @@ err_put_uars: mlx5_put_uars_page(dev, priv->uar); err_disable_msix: - mlx5_disable_msix(dev); + mlx5_free_irq_vectors(dev); err_cleanup_once: if (boot) @@ -1228,10 +1197,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, int err = 0; if (cleanup) - mlx5_drain_health_wq(dev); + mlx5_drain_health_recovery(dev); mutex_lock(&dev->intf_state_mutex); - if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__); if (cleanup) @@ -1239,19 +1208,20 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto out; } + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + if (mlx5_device_registered(dev)) mlx5_detach_device(dev); + mlx5_accel_ipsec_cleanup(dev); + mlx5_fpga_device_stop(dev); + mlx5_sriov_detach(dev); -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_detach(dev->priv.eswitch); -#endif mlx5_cleanup_fs(dev); - mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_put_uars_page(dev, priv->uar); - mlx5_disable_msix(dev); + mlx5_free_irq_vectors(dev); if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev); @@ -1266,8 +1236,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_cmd_cleanup(dev); out: - clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); mutex_unlock(&dev->intf_state_mutex); return err; } @@ -1279,7 +1247,7 @@ struct mlx5_core_event_handler { }; static const struct devlink_ops mlx5_devlink_ops = { -#ifdef CONFIG_MLX5_CORE_EN +#ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, @@ -1319,6 +1287,9 @@ static int init_one(struct pci_dev *pdev, mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); + INIT_LIST_HEAD(&priv->waiting_events_list); + priv->is_accum_events = false; + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING err = init_srcu_struct(&priv->pfault_srcu); if (err) { @@ -1373,7 +1344,6 @@ clean_srcu: cleanup_srcu_struct(&priv->pfault_srcu); clean_dev: #endif - pci_set_drvdata(pdev, NULL); devlink_free(devlink); return err; @@ -1400,7 +1370,6 @@ static void remove_one(struct pci_dev *pdev) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING cleanup_srcu_struct(&priv->pfault_srcu); #endif - pci_set_drvdata(pdev, NULL); devlink_free(devlink); } @@ -1412,7 +1381,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); - mlx5_enter_error_state(dev); + mlx5_enter_error_state(dev, false); mlx5_unload_one(dev, priv, false); /* In case of kernel call drain the health wq */ if (state) { @@ -1499,24 +1468,50 @@ static const struct pci_error_handlers mlx5_err_handler = { .resume = mlx5_pci_resume }; +static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) +{ + int ret; + + if (!MLX5_CAP_GEN(dev, force_teardown)) { + mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n"); + return -EOPNOTSUPP; + } + + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_dbg(dev, "Device in internal error state, giving up\n"); + return -EAGAIN; + } + + ret = mlx5_cmd_force_teardown_hca(dev); + if (ret) { + mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); + return ret; + } + + mlx5_enter_error_state(dev, true); + + return 0; +} + static void shutdown(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_priv *priv = &dev->priv; + int err; dev_info(&pdev->dev, "Shutdown was called\n"); - /* Notify mlx5 clients that the kernel is being shut down */ - set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state); - mlx5_unload_one(dev, priv, false); + err = mlx5_try_fast_unload(dev); + if (err) + mlx5_unload_one(dev, priv, false); mlx5_pci_disable_device(dev); } static const struct pci_device_id mlx5_core_pci_table[] = { - { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) }, { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ - { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) }, { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ - { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) }, { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ @@ -1524,6 +1519,8 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */ { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ + { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { 0, } }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index fbc6e9e9e305..b7c2900b75f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -37,12 +37,16 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/if_link.h> +#include <linux/firmware.h> #define DRIVER_NAME "mlx5_core" -#define DRIVER_VERSION "3.0-1" -#define DRIVER_RELDATE "January 2015" +#define DRIVER_VERSION "5.0-0" #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev)) +#define MLX5_VPORT_MANAGER(mdev) \ + (MLX5_CAP_GEN(mdev, vport_group_manager) && \ + (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ + mlx5_core_is_pf(mdev)) extern uint mlx5_core_debug_mask; @@ -84,12 +88,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_core_page_fault(struct mlx5_core_dev *dev, struct mlx5_pagefault *pfault); void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); -void mlx5_enter_error_state(struct mlx5_core_dev *dev); +void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); @@ -109,7 +114,6 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); -u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); void mlx5_cq_tasklet_cb(unsigned long data); @@ -153,6 +157,13 @@ int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); +#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \ + MLX5_CAP_GEN((mdev), pps_modify) && \ + MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ + MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj)) + +int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw); + void mlx5e_init(void); void mlx5e_cleanup(void); @@ -168,4 +179,7 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) MLX5_CAP_GEN(dev, lag_master); } +int mlx5_lag_allow(struct mlx5_core_dev *dev); +int mlx5_lag_forbid(struct mlx5_core_dev *dev); + #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index a57d5a81eb05..e36d3e3675f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -279,7 +279,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, int i; inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]); - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); @@ -376,7 +376,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, *nclaimed = 0; outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); - out = mlx5_vzalloc(outlen); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; @@ -403,7 +403,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, for (i = 0; i < num_claimed; i++) free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i])); - if (nclaimed) *nclaimed = num_claimed; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 141583daf5a2..e07061f565d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -47,8 +47,8 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, u32 *in = NULL; void *data; - in = mlx5_vzalloc(inlen); - out = mlx5_vzalloc(outlen); + in = kvzalloc(inlen, GFP_KERNEL); + out = kvzalloc(outlen, GFP_KERNEL); if (!in || !out) goto out; @@ -454,7 +454,7 @@ int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, u32 *in; int err; - in = mlx5_vzalloc(sz); + in = kvzalloc(sz, GFP_KERNEL); if (!in) { err = -ENOMEM; return err; @@ -677,6 +677,27 @@ int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group) } EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group); +int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, + u8 tc, u8 *tc_group) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, + tc_configuration[tc]); + + *tc_group = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + group); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group); + int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) { u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index cbbcef2884be..db9e665ab104 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -30,7 +30,6 @@ * SOFTWARE. */ - #include <linux/gfp.h> #include <linux/export.h> #include <linux/mlx5/cmd.h> @@ -243,6 +242,20 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp); +int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev, + u32 timeout_usec) +{ + u32 out[MLX5_ST_SZ_DW(set_delay_drop_params_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(set_delay_drop_params_in)] = {0}; + + MLX5_SET(set_delay_drop_params_in, in, opcode, + MLX5_CMD_OP_SET_DELAY_DROP_PARAMS); + MLX5_SET(set_delay_drop_params_in, in, delay_drop_timeout, + timeout_usec / 100); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_set_delay_drop); + struct mbox_info { u32 *in; u32 *out; @@ -519,23 +532,3 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); - -int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, - u32 *out_of_buffer) -{ - int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); - void *out; - int err; - - out = mlx5_vzalloc(outlen); - if (!out) - return -ENOMEM; - - err = mlx5_core_query_q_counter(dev, counter_id, 0, out, outlen); - if (!err) - *out_of_buffer = MLX5_GET(query_q_counter_out, out, - out_of_buffer); - - kfree(out); - return err; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index e08627785590..2a8b529ce6dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -32,10 +32,9 @@ #include <linux/pci.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> #include "mlx5_core.h" -#ifdef CONFIG_MLX5_CORE_EN #include "eswitch.h" -#endif bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) { @@ -44,6 +43,38 @@ bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return !!sriov->num_vfs; } +static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct mlx5_hca_vport_context *in; + int err = 0; + + /* Restore sriov guid and policy settings */ + if (sriov->vfs_ctx[vf].node_guid || + sriov->vfs_ctx[vf].port_guid || + sriov->vfs_ctx[vf].policy != MLX5_POLICY_INVALID) { + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->node_guid = sriov->vfs_ctx[vf].node_guid; + in->port_guid = sriov->vfs_ctx[vf].port_guid; + in->policy = sriov->vfs_ctx[vf].policy; + in->field_select = + !!(in->port_guid) * MLX5_HCA_VPORT_SEL_PORT_GUID | + !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID | + !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY; + + err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in); + if (err) + mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf); + + kfree(in); + } + + return err; +} + static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -57,14 +88,12 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) return -EBUSY; } -#ifdef CONFIG_MLX5_CORE_EN err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); if (err) { mlx5_core_warn(dev, "failed to enable eswitch SRIOV (%d)\n", err); return err; } -#endif for (vf = 0; vf < num_vfs; vf++) { err = mlx5_core_enable_hca(dev, vf + 1); @@ -74,8 +103,16 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) } sriov->vfs_ctx[vf].enabled = 1; sriov->enabled_vfs++; + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { + err = sriov_restore_guids(dev, vf); + if (err) { + mlx5_core_warn(dev, + "failed to restore VF %d settings, err %d\n", + vf, err); + continue; + } + } mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); - } return 0; @@ -88,7 +125,7 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) int vf; if (!sriov->enabled_vfs) - return; + goto out; for (vf = 0; vf < sriov->num_vfs; vf++) { if (!sriov->vfs_ctx[vf].enabled) @@ -102,9 +139,8 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) sriov->enabled_vfs--; } -#ifdef CONFIG_MLX5_CORE_EN +out: mlx5_eswitch_disable_sriov(dev->priv.eswitch); -#endif if (mlx5_wait_for_vf_pages(dev)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); @@ -175,15 +211,20 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!mlx5_core_is_pf(dev)) return -EPERM; - if (num_vfs && mlx5_lag_is_active(dev)) { - mlx5_core_warn(dev, "can't turn sriov on while LAG is active"); - return -EINVAL; + if (num_vfs) { + int ret; + + ret = mlx5_lag_forbid(dev); + if (ret && (ret != -ENODEV)) + return ret; } - if (num_vfs) + if (num_vfs) { err = mlx5_sriov_enable(pdev, num_vfs); - else + } else { mlx5_sriov_disable(pdev); + mlx5_lag_allow(dev); + } return err ? err : num_vfs; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 3099630015d7..23cc337a96c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -162,7 +162,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; - create_in = mlx5_vzalloc(inlen); + create_in = kvzalloc(inlen, GFP_KERNEL); if (!create_in) return -ENOMEM; @@ -201,13 +201,13 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev, static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq) { - /* arm_srq structs missing using identical xrc ones */ - u32 srq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; + u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - MLX5_SET(arm_xrc_srq_in, srq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn); - MLX5_SET(arm_xrc_srq_in, srq_in, lwm, lwm); + MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); + MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, srq_in, lwm, lwm); return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), srq_out, sizeof(srq_out)); @@ -221,7 +221,7 @@ static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, void *srqc; int err; - srq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_srq_out)); + srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); if (!srq_out) return -ENOMEM; @@ -256,7 +256,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; - create_in = mlx5_vzalloc(inlen); + create_in = kvzalloc(inlen, GFP_KERNEL); if (!create_in) return -ENOMEM; @@ -320,7 +320,7 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, void *xrc_srqc; int err; - xrcsrq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); if (!xrcsrq_out) return -ENOMEM; memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); @@ -357,7 +357,7 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; - create_in = mlx5_vzalloc(inlen); + create_in = kvzalloc(inlen, GFP_KERNEL); if (!create_in) return -ENOMEM; @@ -390,7 +390,7 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev, void *bitmask; int err; - in = mlx5_vzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in)); + in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); if (!in) return -ENOMEM; @@ -417,7 +417,7 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, void *rmpc; int err; - rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out)); + rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL); if (!rmp_out) return -ENOMEM; @@ -435,16 +435,128 @@ out: return err; } +static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; + void *create_in; + void *xrqc; + void *wq; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); + wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + + if (in->type == IB_SRQT_TM) { + MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); + if (in->flags & MLX5_SRQ_FLAG_RNDV) + MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); + MLX5_SET(xrqc, xrqc, + tag_matching_topology_context.log_matching_list_sz, + in->tm_log_list_size); + } + MLX5_SET(xrqc, xrqc, user_index, in->user_index); + MLX5_SET(xrqc, xrqc, cqn, in->cqn); + MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) + srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); + + return err; +} + +static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; + + MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_xrq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); + MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, in, lwm, lwm); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; + u32 *xrq_out; + int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); + void *xrqc; + int err; + + xrq_out = kvzalloc(outlen, GFP_KERNEL); + if (!xrq_out) + return -ENOMEM; + + MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); + MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); + + err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen); + if (err) + goto out; + + xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); + get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); + if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + out->tm_next_tag = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.append_next_index); + out->tm_hw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.hw_phase_cnt); + out->tm_sw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.sw_phase_cnt); + +out: + kvfree(xrq_out); + return err; +} + static int create_srq_split(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { if (!dev->issi) return create_srq_cmd(dev, srq, in); - else if (srq->common.res == MLX5_RES_XSRQ) + switch (srq->common.res) { + case MLX5_RES_XSRQ: return create_xrc_srq_cmd(dev, srq, in); - else + case MLX5_RES_XRQ: + return create_xrq_cmd(dev, srq, in); + default: return create_rmp_cmd(dev, srq, in); + } } static int destroy_srq_split(struct mlx5_core_dev *dev, @@ -452,10 +564,14 @@ static int destroy_srq_split(struct mlx5_core_dev *dev, { if (!dev->issi) return destroy_srq_cmd(dev, srq); - else if (srq->common.res == MLX5_RES_XSRQ) + switch (srq->common.res) { + case MLX5_RES_XSRQ: return destroy_xrc_srq_cmd(dev, srq); - else + case MLX5_RES_XRQ: + return destroy_xrq_cmd(dev, srq); + default: return destroy_rmp_cmd(dev, srq); + } } int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, @@ -464,10 +580,16 @@ int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, int err; struct mlx5_srq_table *table = &dev->priv.srq_table; - if (in->type == IB_SRQT_XRC) + switch (in->type) { + case IB_SRQT_XRC: srq->common.res = MLX5_RES_XSRQ; - else + break; + case IB_SRQT_TM: + srq->common.res = MLX5_RES_XRQ; + break; + default: srq->common.res = MLX5_RES_SRQ; + } err = create_srq_split(dev, srq, in); if (err) @@ -528,10 +650,14 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, { if (!dev->issi) return query_srq_cmd(dev, srq, out); - else if (srq->common.res == MLX5_RES_XSRQ) + switch (srq->common.res) { + case MLX5_RES_XSRQ: return query_xrc_srq_cmd(dev, srq, out); - else + case MLX5_RES_XRQ: + return query_xrq_cmd(dev, srq, out); + default: return query_rmp_cmd(dev, srq, out); + } } EXPORT_SYMBOL(mlx5_core_query_srq); @@ -540,10 +666,14 @@ int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, { if (!dev->issi) return arm_srq_cmd(dev, srq, lwm, is_srq); - else if (srq->common.res == MLX5_RES_XSRQ) + switch (srq->common.res) { + case MLX5_RES_XSRQ: return arm_xrc_srq_cmd(dev, srq, lwm); - else + case MLX5_RES_XRQ: + return arm_xrq_cmd(dev, srq, lwm); + default: return arm_rmp_cmd(dev, srq, lwm); + } } EXPORT_SYMBOL(mlx5_core_arm_srq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index a00ff49eec18..5e128d7a9ffd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -284,7 +284,7 @@ int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) void *bitmask; int err; - in = mlx5_vzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in)); + in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); if (!in) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 15c2294dd2b4..d653b0025b13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -172,7 +172,7 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *out_addr; int err; - out = mlx5_vzalloc(outlen); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; @@ -197,11 +197,9 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, void *nic_vport_ctx; u8 *perm_mac; - in = mlx5_vzalloc(inlen); - if (!in) { - mlx5_core_warn(mdev, "failed to allocate inbox\n"); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) return -ENOMEM; - } MLX5_SET(modify_nic_vport_context_in, in, field_select.permanent_address, 1); @@ -231,7 +229,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu) u32 *out; int err; - out = mlx5_vzalloc(outlen); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; @@ -251,7 +249,7 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) void *in; int err; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -501,7 +499,7 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); - out = mlx5_vzalloc(outlen); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; @@ -521,7 +519,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); - out = mlx5_vzalloc(outlen); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; @@ -551,7 +549,7 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify)) return -EOPNOTSUPP; - in = mlx5_vzalloc(inlen); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -577,7 +575,7 @@ int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); - out = mlx5_vzalloc(outlen); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; @@ -879,11 +877,9 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); int err; - in = mlx5_vzalloc(inlen); - if (!in) { - mlx5_core_err(mdev, "failed to allocate inbox\n"); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) return -ENOMEM; - } MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); MLX5_SET(modify_nic_vport_context_in, in, @@ -901,6 +897,68 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); +enum { + UC_LOCAL_LB, + MC_LOCAL_LB +}; + +int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + mlx5_core_dbg(mdev, "%s local_lb\n", enable ? "enable" : "disable"); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_mc_local_lb, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_mc_local_lb, !enable); + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_uc_local_lb, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_uc_local_lb, !enable); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_update_local_lb); + +int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int value; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out, outlen); + if (err) + goto out; + + value = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.disable_mc_local_lb) << MC_LOCAL_LB; + + value |= MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.disable_uc_local_lb) << UC_LOCAL_LB; + + *status = !value; + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb); + enum mlx5_vport_roce_state { MLX5_VPORT_ROCE_DISABLED = 0, MLX5_VPORT_ROCE_ENABLED = 1, @@ -913,11 +971,9 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev, int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); int err; - in = mlx5_vzalloc(inlen); - if (!in) { - mlx5_core_warn(mdev, "failed to allocate inbox\n"); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) return -ENOMEM; - } MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1); MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en, @@ -932,12 +988,16 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev) { + if (atomic_inc_return(&mdev->roce.roce_en) != 1) + return 0; return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED); } EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) { + if (atomic_dec_return(&mdev->roce.roce_en) != 0) + return 0; return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); } EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); @@ -952,7 +1012,7 @@ int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, int err; is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); - in = mlx5_vzalloc(in_sz); + in = kvzalloc(in_sz, GFP_KERNEL); if (!in) { err = -ENOMEM; return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 921673c42bc9..6bcfc25350f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -54,6 +54,12 @@ static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq) return mlx5_wq_cyc_get_size(wq) << wq->log_stride; } +static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq) +{ + return mlx5_wq_cyc_get_byte_size(&wq->rq) + + mlx5_wq_cyc_get_byte_size(&wq->sq); +} + static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq) { return mlx5_cqwq_get_size(wq) << wq->log_stride; @@ -99,6 +105,46 @@ err_db_free: return err; } +int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + int err; + + wq->rq.log_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4; + wq->rq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_rq_size)) - 1; + + wq->sq.log_stride = ilog2(MLX5_SEND_WQE_BB); + wq->sq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_sq_size)) - 1; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + err = mlx5_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); + goto err_db_free; + } + + wq->rq.buf = wq_ctrl->buf.direct.buf; + wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq); + wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR]; + wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR]; + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_frag_wq_ctrl *wq_ctrl) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index d8afed898c31..718589d0cec2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -34,6 +34,8 @@ #define __MLX5_WQ_H__ #include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> struct mlx5_wq_param { int linear; @@ -60,6 +62,11 @@ struct mlx5_wq_cyc { u8 log_stride; }; +struct mlx5_wq_qp { + struct mlx5_wq_cyc rq; + struct mlx5_wq_cyc sq; +}; + struct mlx5_cqwq { struct mlx5_frag_buf frag_buf; __be32 *db; @@ -87,6 +94,10 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, struct mlx5_wq_ctrl *wq_ctrl); u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq); +int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, + struct mlx5_wq_ctrl *wq_ctrl); + int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_frag_wq_ctrl *wq_ctrl); @@ -146,6 +157,22 @@ static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq) *wq->db = cpu_to_be32(wq->cc & 0xffffff); } +static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq) +{ + u32 ci = mlx5_cqwq_get_ci(wq); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); + u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; + u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; + + if (cqe_ownership_bit != sw_ownership_val) + return NULL; + + /* ensure cqe content is read after cqe ownership bit */ + dma_rmb(); + + return cqe; +} + static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq) { return wq->cur_sz == wq->sz_m1; diff --git a/drivers/net/ethernet/mellanox/mlxfw/Kconfig b/drivers/net/ethernet/mellanox/mlxfw/Kconfig new file mode 100644 index 000000000000..186ebe783f97 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/Kconfig @@ -0,0 +1,13 @@ +# +# Mellanox firmware flash library configuration +# + +config MLXFW + tristate "Mellanox Technologies firmware flash module" + ---help--- + This driver supports Mellanox Technologies Firmware + flashing common logic. + + To compile this driver as a module, choose M here: the + module will be called mlxfw. + select XZ_DEC diff --git a/drivers/net/ethernet/mellanox/mlxfw/Makefile b/drivers/net/ethernet/mellanox/mlxfw/Makefile new file mode 100644 index 000000000000..7448b301104c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_MLXFW) += mlxfw.o +mlxfw-objs := mlxfw_fsm.o mlxfw_mfa2_tlv_multi.o mlxfw_mfa2.o diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h new file mode 100644 index 000000000000..7a712b6b09ec --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h @@ -0,0 +1,111 @@ +/* + * drivers/net/ethernet/mellanox/mlxfw/mlxfw.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXFW_H +#define _MLXFW_H + +#include <linux/firmware.h> + +enum mlxfw_fsm_state { + MLXFW_FSM_STATE_IDLE, + MLXFW_FSM_STATE_LOCKED, + MLXFW_FSM_STATE_INITIALIZE, + MLXFW_FSM_STATE_DOWNLOAD, + MLXFW_FSM_STATE_VERIFY, + MLXFW_FSM_STATE_APPLY, + MLXFW_FSM_STATE_ACTIVATE, +}; + +enum mlxfw_fsm_state_err { + MLXFW_FSM_STATE_ERR_OK, + MLXFW_FSM_STATE_ERR_ERROR, + MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR, + MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE, + MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY, + MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED, + MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED, + MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE, + MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT, + MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET, + MLXFW_FSM_STATE_ERR_MAX, +}; + +struct mlxfw_dev; + +struct mlxfw_dev_ops { + int (*component_query)(struct mlxfw_dev *mlxfw_dev, u16 component_index, + u32 *p_max_size, u8 *p_align_bits, + u16 *p_max_write_size); + + int (*fsm_lock)(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle); + + int (*fsm_component_update)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index, u32 component_size); + + int (*fsm_block_download)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u8 *data, u16 size, u32 offset); + + int (*fsm_component_verify)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index); + + int (*fsm_activate)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); + + int (*fsm_query_state)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err); + + void (*fsm_cancel)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); + + void (*fsm_release)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); +}; + +struct mlxfw_dev { + const struct mlxfw_dev_ops *ops; + const char *psid; + u16 psid_size; +}; + +#if IS_REACHABLE(CONFIG_MLXFW) +int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, + const struct firmware *firmware); +#else +static inline +int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, + const struct firmware *firmware) +{ + return -EOPNOTSUPP; +} +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c new file mode 100644 index 000000000000..2cf89126fb23 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -0,0 +1,273 @@ +/* + * drivers/net/ethernet/mellanox/mlxfw/mlxfw.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) "mlxfw: " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/delay.h> + +#include "mlxfw.h" +#include "mlxfw_mfa2.h" + +#define MLXFW_FSM_STATE_WAIT_CYCLE_MS 200 +#define MLXFW_FSM_STATE_WAIT_TIMEOUT_MS 30000 +#define MLXFW_FSM_STATE_WAIT_ROUNDS \ + (MLXFW_FSM_STATE_WAIT_TIMEOUT_MS / MLXFW_FSM_STATE_WAIT_CYCLE_MS) +#define MLXFW_FSM_MAX_COMPONENT_SIZE (10 * (1 << 20)) + +static const char * const mlxfw_fsm_state_err_str[] = { + [MLXFW_FSM_STATE_ERR_ERROR] = + "general error", + [MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR] = + "component hash mismatch", + [MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE] = + "component not applicable", + [MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY] = + "unknown key", + [MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED] = + "authentication failed", + [MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED] = + "component was not signed", + [MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE] = + "key not applicable", + [MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT] = + "bad format", + [MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET] = + "pending reset", + [MLXFW_FSM_STATE_ERR_MAX] = + "unknown error" +}; + +static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state fsm_state) +{ + enum mlxfw_fsm_state_err fsm_state_err; + enum mlxfw_fsm_state curr_fsm_state; + int times; + int err; + + times = MLXFW_FSM_STATE_WAIT_ROUNDS; +retry: + err = mlxfw_dev->ops->fsm_query_state(mlxfw_dev, fwhandle, + &curr_fsm_state, &fsm_state_err); + if (err) + return err; + + if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) { + pr_err("Firmware flash failed: %s\n", + mlxfw_fsm_state_err_str[fsm_state_err]); + return -EINVAL; + } + if (curr_fsm_state != fsm_state) { + if (--times == 0) { + pr_err("Timeout reached on FSM state change"); + return -ETIMEDOUT; + } + msleep(MLXFW_FSM_STATE_WAIT_CYCLE_MS); + goto retry; + } + return 0; +} + +#define MLXFW_ALIGN_DOWN(x, align_bits) ((x) & ~((1 << (align_bits)) - 1)) +#define MLXFW_ALIGN_UP(x, align_bits) \ + MLXFW_ALIGN_DOWN((x) + ((1 << (align_bits)) - 1), (align_bits)) + +static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, + struct mlxfw_mfa2_component *comp) +{ + u16 comp_max_write_size; + u8 comp_align_bits; + u32 comp_max_size; + u16 block_size; + u8 *block_ptr; + u32 offset; + int err; + + err = mlxfw_dev->ops->component_query(mlxfw_dev, comp->index, + &comp_max_size, &comp_align_bits, + &comp_max_write_size); + if (err) + return err; + + comp_max_size = min_t(u32, comp_max_size, MLXFW_FSM_MAX_COMPONENT_SIZE); + if (comp->data_size > comp_max_size) { + pr_err("Component %d is of size %d which is bigger than limit %d\n", + comp->index, comp->data_size, comp_max_size); + return -EINVAL; + } + + comp_max_write_size = MLXFW_ALIGN_DOWN(comp_max_write_size, + comp_align_bits); + + pr_debug("Component update\n"); + err = mlxfw_dev->ops->fsm_component_update(mlxfw_dev, fwhandle, + comp->index, + comp->data_size); + if (err) + return err; + + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_DOWNLOAD); + if (err) + goto err_out; + + pr_debug("Component download\n"); + for (offset = 0; + offset < MLXFW_ALIGN_UP(comp->data_size, comp_align_bits); + offset += comp_max_write_size) { + block_ptr = comp->data + offset; + block_size = (u16) min_t(u32, comp->data_size - offset, + comp_max_write_size); + err = mlxfw_dev->ops->fsm_block_download(mlxfw_dev, fwhandle, + block_ptr, block_size, + offset); + if (err) + goto err_out; + } + + pr_debug("Component verify\n"); + err = mlxfw_dev->ops->fsm_component_verify(mlxfw_dev, fwhandle, + comp->index); + if (err) + goto err_out; + + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED); + if (err) + goto err_out; + return 0; + +err_out: + mlxfw_dev->ops->fsm_cancel(mlxfw_dev, fwhandle); + return err; +} + +static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + struct mlxfw_mfa2_file *mfa2_file) +{ + u32 component_count; + int err; + int i; + + err = mlxfw_mfa2_file_component_count(mfa2_file, mlxfw_dev->psid, + mlxfw_dev->psid_size, + &component_count); + if (err) { + pr_err("Could not find device PSID in MFA2 file\n"); + return err; + } + + for (i = 0; i < component_count; i++) { + struct mlxfw_mfa2_component *comp; + + comp = mlxfw_mfa2_file_component_get(mfa2_file, mlxfw_dev->psid, + mlxfw_dev->psid_size, i); + if (IS_ERR(comp)) + return PTR_ERR(comp); + + pr_info("Flashing component type %d\n", comp->index); + err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp); + mlxfw_mfa2_file_component_put(comp); + if (err) + return err; + } + return 0; +} + +int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, + const struct firmware *firmware) +{ + struct mlxfw_mfa2_file *mfa2_file; + u32 fwhandle; + int err; + + if (!mlxfw_mfa2_check(firmware)) { + pr_err("Firmware file is not MFA2\n"); + return -EINVAL; + } + + mfa2_file = mlxfw_mfa2_file_init(firmware); + if (IS_ERR(mfa2_file)) + return PTR_ERR(mfa2_file); + + pr_info("Initialize firmware flash process\n"); + err = mlxfw_dev->ops->fsm_lock(mlxfw_dev, &fwhandle); + if (err) { + pr_err("Could not lock the firmware FSM\n"); + goto err_fsm_lock; + } + + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_LOCKED); + if (err) + goto err_state_wait_idle_to_locked; + + err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file); + if (err) + goto err_flash_components; + + pr_debug("Activate image\n"); + err = mlxfw_dev->ops->fsm_activate(mlxfw_dev, fwhandle); + if (err) { + pr_err("Could not activate the downloaded image\n"); + goto err_fsm_activate; + } + + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED); + if (err) + goto err_state_wait_activate_to_locked; + + pr_debug("Handle release\n"); + mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle); + + pr_info("Firmware flash done.\n"); + mlxfw_mfa2_file_fini(mfa2_file); + return 0; + +err_state_wait_activate_to_locked: +err_fsm_activate: +err_flash_components: +err_state_wait_idle_to_locked: + mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle); +err_fsm_lock: + mlxfw_mfa2_file_fini(mfa2_file); + return err; +} +EXPORT_SYMBOL(mlxfw_firmware_flash); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox firmware flash lib"); diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c new file mode 100644 index 000000000000..993cb5ba934e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c @@ -0,0 +1,619 @@ +/* + * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) "mlxfw_mfa2: " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/xz.h> +#include "mlxfw_mfa2.h" +#include "mlxfw_mfa2_file.h" +#include "mlxfw_mfa2_tlv.h" +#include "mlxfw_mfa2_format.h" +#include "mlxfw_mfa2_tlv_multi.h" + +/* MFA2 FILE + * +----------------------------------+ + * | MFA2 finger print | + * +----------------------------------+ + * | package descriptor multi_tlv | + * | +------------------------------+ | +-----------------+ + * | | package descriptor tlv +-----> |num_devices=n | + * | +------------------------------+ | |num_components=m | + * +----------------------------------+ |CB offset | + * | device descriptor multi_tlv | |... | + * | +------------------------------+ | | | + * | | PSID tlv | | +-----------------+ + * | +------------------------------+ | + * | | component index tlv | | + * | +------------------------------+ | + * +----------------------------------+ + * | component descriptor multi_tlv | + * | +------------------------------+ | +-----------------+ + * | | component descriptor tlv +-----> |Among others: | + * | +------------------------------+ | |CB offset=o | + * +----------------------------------+ |comp index=i | + * | | |... | + * | | | | + * | | +-----------------+ + * | COMPONENT BLOCK (CB) | + * | | + * | | + * | | + * +----------------------------------+ + * + * On the top level, an MFA2 file contains: + * - Fingerprint + * - Several multi_tlvs (TLVs of type MLXFW_MFA2_TLV_MULTI, as defined in + * mlxfw_mfa2_format.h) + * - Compresses content block + * + * The first multi_tlv + * ------------------- + * The first multi TLV is treated as package descriptor, and expected to have a + * first TLV child of type MLXFW_MFA2_TLV_PACKAGE_DESCRIPTOR which contains all + * the global information needed to parse the file. Among others, it contains + * the number of device descriptors and component descriptor following this + * multi TLV. + * + * The device descriptor multi_tlv + * ------------------------------- + * The multi TLVs following the package descriptor are treated as device + * descriptor, and are expected to have the following children: + * - PSID TLV child of type MLXFW_MFA2_TLV_PSID containing that device PSID. + * - Component index of type MLXFW_MFA2_TLV_COMPONENT_PTR that contains that + * device component index. + * + * The component descriptor multi_tlv + * ---------------------------------- + * The multi TLVs following the device descriptor multi TLVs are treated as + * component descriptor, and are expected to have a first child of type + * MLXFW_MFA2_TLV_COMPONENT_DESCRIPTOR that contains mostly the component index, + * needed for the flash process and the offset to the binary within the + * component block. + */ + +static const u8 mlxfw_mfa2_fingerprint[] = "MLNX.MFA2.XZ.00!"; +static const int mlxfw_mfa2_fingerprint_len = + sizeof(mlxfw_mfa2_fingerprint) - 1; + +static const u8 mlxfw_mfa2_comp_magic[] = "#BIN.COMPONENT!#"; +static const int mlxfw_mfa2_comp_magic_len = sizeof(mlxfw_mfa2_comp_magic) - 1; + +bool mlxfw_mfa2_check(const struct firmware *fw) +{ + if (fw->size < sizeof(mlxfw_mfa2_fingerprint)) + return false; + + return memcmp(fw->data, mlxfw_mfa2_fingerprint, + mlxfw_mfa2_fingerprint_len) == 0; +} + +static bool +mlxfw_mfa2_tlv_multi_validate(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi) +{ + const struct mlxfw_mfa2_tlv *tlv; + u16 idx; + + /* Check that all children are valid */ + mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) { + if (!tlv) { + pr_err("Multi has invalid child"); + return false; + } + } + return true; +} + +static bool +mlxfw_mfa2_file_dev_validate(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *dev_tlv, + u16 dev_idx) +{ + const struct mlxfw_mfa2_tlv_component_ptr *cptr; + const struct mlxfw_mfa2_tlv_multi *multi; + const struct mlxfw_mfa2_tlv_psid *psid; + const struct mlxfw_mfa2_tlv *tlv; + u16 cptr_count; + u16 cptr_idx; + int err; + + pr_debug("Device %d\n", dev_idx); + + multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, dev_tlv); + if (!multi) { + pr_err("Device %d is not a valid TLV error\n", dev_idx); + return false; + } + + if (!mlxfw_mfa2_tlv_multi_validate(mfa2_file, multi)) + return false; + + /* Validate the device has PSID tlv */ + tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, multi, + MLXFW_MFA2_TLV_PSID, 0); + if (!tlv) { + pr_err("Device %d does not have PSID\n", dev_idx); + return false; + } + + psid = mlxfw_mfa2_tlv_psid_get(mfa2_file, tlv); + if (!psid) { + pr_err("Device %d PSID TLV is not valid\n", dev_idx); + return false; + } + + print_hex_dump_debug(" -- Device PSID ", DUMP_PREFIX_NONE, 16, 16, + psid->psid, be16_to_cpu(tlv->len), true); + + /* Validate the device has COMPONENT_PTR */ + err = mlxfw_mfa2_tlv_multi_child_count(mfa2_file, multi, + MLXFW_MFA2_TLV_COMPONENT_PTR, + &cptr_count); + if (err) + return false; + + if (cptr_count == 0) { + pr_err("Device %d has no components\n", dev_idx); + return false; + } + + for (cptr_idx = 0; cptr_idx < cptr_count; cptr_idx++) { + tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, multi, + MLXFW_MFA2_TLV_COMPONENT_PTR, + cptr_idx); + if (!tlv) + return false; + + cptr = mlxfw_mfa2_tlv_component_ptr_get(mfa2_file, tlv); + if (!cptr) { + pr_err("Device %d COMPONENT_PTR TLV is not valid\n", + dev_idx); + return false; + } + + pr_debug(" -- Component index %d\n", + be16_to_cpu(cptr->component_index)); + } + return true; +} + +static bool +mlxfw_mfa2_file_comp_validate(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *comp_tlv, + u16 comp_idx) +{ + const struct mlxfw_mfa2_tlv_component_descriptor *cdesc; + const struct mlxfw_mfa2_tlv_multi *multi; + const struct mlxfw_mfa2_tlv *tlv; + + pr_debug("Component %d\n", comp_idx); + + multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, comp_tlv); + if (!multi) { + pr_err("Component %d is not a valid TLV error\n", comp_idx); + return false; + } + + if (!mlxfw_mfa2_tlv_multi_validate(mfa2_file, multi)) + return false; + + /* Check that component have COMPONENT_DESCRIPTOR as first child */ + tlv = mlxfw_mfa2_tlv_multi_child(mfa2_file, multi); + if (!tlv) { + pr_err("Component descriptor %d multi TLV error\n", comp_idx); + return false; + } + + cdesc = mlxfw_mfa2_tlv_component_descriptor_get(mfa2_file, tlv); + if (!cdesc) { + pr_err("Component %d does not have a valid descriptor\n", + comp_idx); + return false; + } + pr_debug(" -- Component type %d\n", be16_to_cpu(cdesc->identifier)); + pr_debug(" -- Offset 0x%llx and size %d\n", + ((u64) be32_to_cpu(cdesc->cb_offset_h) << 32) + | be32_to_cpu(cdesc->cb_offset_l), be32_to_cpu(cdesc->size)); + + return true; +} + +static bool mlxfw_mfa2_file_validate(const struct mlxfw_mfa2_file *mfa2_file) +{ + const struct mlxfw_mfa2_tlv *tlv; + u16 idx; + + pr_debug("Validating file\n"); + + /* check that all the devices exist */ + mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, mfa2_file->first_dev, + mfa2_file->dev_count) { + if (!tlv) { + pr_err("Device TLV error\n"); + return false; + } + + /* Check each device */ + if (!mlxfw_mfa2_file_dev_validate(mfa2_file, tlv, idx)) + return false; + } + + /* check that all the components exist */ + mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, mfa2_file->first_component, + mfa2_file->component_count) { + if (!tlv) { + pr_err("Device TLV error\n"); + return false; + } + + /* Check each component */ + if (!mlxfw_mfa2_file_comp_validate(mfa2_file, tlv, idx)) + return false; + } + return true; +} + +struct mlxfw_mfa2_file *mlxfw_mfa2_file_init(const struct firmware *fw) +{ + const struct mlxfw_mfa2_tlv_package_descriptor *pd; + const struct mlxfw_mfa2_tlv_multi *multi; + const struct mlxfw_mfa2_tlv *multi_child; + const struct mlxfw_mfa2_tlv *first_tlv; + struct mlxfw_mfa2_file *mfa2_file; + const void *first_tlv_ptr; + const void *cb_top_ptr; + + mfa2_file = kcalloc(1, sizeof(*mfa2_file), GFP_KERNEL); + if (!mfa2_file) + return ERR_PTR(-ENOMEM); + + mfa2_file->fw = fw; + first_tlv_ptr = fw->data + NLA_ALIGN(mlxfw_mfa2_fingerprint_len); + first_tlv = mlxfw_mfa2_tlv_get(mfa2_file, first_tlv_ptr); + if (!first_tlv) { + pr_err("Could not parse package descriptor TLV\n"); + goto err_out; + } + + multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, first_tlv); + if (!multi) { + pr_err("First TLV is not of valid multi type\n"); + goto err_out; + } + + multi_child = mlxfw_mfa2_tlv_multi_child(mfa2_file, multi); + if (!multi_child) + goto err_out; + + pd = mlxfw_mfa2_tlv_package_descriptor_get(mfa2_file, multi_child); + if (!pd) { + pr_err("Could not parse package descriptor TLV\n"); + goto err_out; + } + + mfa2_file->first_dev = mlxfw_mfa2_tlv_next(mfa2_file, first_tlv); + if (!mfa2_file->first_dev) { + pr_err("First device TLV is not valid\n"); + goto err_out; + } + + mfa2_file->dev_count = be16_to_cpu(pd->num_devices); + mfa2_file->first_component = mlxfw_mfa2_tlv_advance(mfa2_file, + mfa2_file->first_dev, + mfa2_file->dev_count); + mfa2_file->component_count = be16_to_cpu(pd->num_components); + mfa2_file->cb = fw->data + NLA_ALIGN(be32_to_cpu(pd->cb_offset)); + if (!mlxfw_mfa2_valid_ptr(mfa2_file, mfa2_file->cb)) { + pr_err("Component block is out side the file\n"); + goto err_out; + } + mfa2_file->cb_archive_size = be32_to_cpu(pd->cb_archive_size); + cb_top_ptr = mfa2_file->cb + mfa2_file->cb_archive_size - 1; + if (!mlxfw_mfa2_valid_ptr(mfa2_file, cb_top_ptr)) { + pr_err("Component block size is too big\n"); + goto err_out; + } + + if (!mlxfw_mfa2_file_validate(mfa2_file)) + goto err_out; + return mfa2_file; +err_out: + kfree(mfa2_file); + return ERR_PTR(-EINVAL); +} + +static const struct mlxfw_mfa2_tlv_multi * +mlxfw_mfa2_tlv_dev_get(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, u16 psid_size) +{ + const struct mlxfw_mfa2_tlv_psid *tlv_psid; + const struct mlxfw_mfa2_tlv_multi *dev_multi; + const struct mlxfw_mfa2_tlv *dev_tlv; + const struct mlxfw_mfa2_tlv *tlv; + u32 idx; + + /* for each device tlv */ + mlxfw_mfa2_tlv_foreach(mfa2_file, dev_tlv, idx, mfa2_file->first_dev, + mfa2_file->dev_count) { + if (!dev_tlv) + return NULL; + + dev_multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, dev_tlv); + if (!dev_multi) + return NULL; + + /* find psid child and compare */ + tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, dev_multi, + MLXFW_MFA2_TLV_PSID, 0); + if (!tlv) + return NULL; + if (be16_to_cpu(tlv->len) != psid_size) + continue; + + tlv_psid = mlxfw_mfa2_tlv_psid_get(mfa2_file, tlv); + if (!tlv_psid) + return NULL; + + if (memcmp(psid, tlv_psid->psid, psid_size) == 0) + return dev_multi; + } + + return NULL; +} + +int mlxfw_mfa2_file_component_count(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, u32 psid_size, + u32 *p_count) +{ + const struct mlxfw_mfa2_tlv_multi *dev_multi; + u16 count; + int err; + + dev_multi = mlxfw_mfa2_tlv_dev_get(mfa2_file, psid, psid_size); + if (!dev_multi) + return -EINVAL; + + err = mlxfw_mfa2_tlv_multi_child_count(mfa2_file, dev_multi, + MLXFW_MFA2_TLV_COMPONENT_PTR, + &count); + if (err) + return err; + + *p_count = count; + return 0; +} + +static int mlxfw_mfa2_xz_dec_run(struct xz_dec *xz_dec, struct xz_buf *xz_buf, + bool *finished) +{ + enum xz_ret xz_ret; + + xz_ret = xz_dec_run(xz_dec, xz_buf); + + switch (xz_ret) { + case XZ_STREAM_END: + *finished = true; + return 0; + case XZ_OK: + *finished = false; + return 0; + case XZ_MEM_ERROR: + pr_err("xz no memory\n"); + return -ENOMEM; + case XZ_DATA_ERROR: + pr_err("xz file corrupted\n"); + return -EINVAL; + case XZ_FORMAT_ERROR: + pr_err("xz format not found\n"); + return -EINVAL; + case XZ_OPTIONS_ERROR: + pr_err("unsupported xz option\n"); + return -EINVAL; + case XZ_MEMLIMIT_ERROR: + pr_err("xz dictionary too small\n"); + return -EINVAL; + default: + pr_err("xz error %d\n", xz_ret); + return -EINVAL; + } +} + +static int mlxfw_mfa2_file_cb_offset_xz(const struct mlxfw_mfa2_file *mfa2_file, + off_t off, size_t size, u8 *buf) +{ + struct xz_dec *xz_dec; + struct xz_buf dec_buf; + off_t curr_off = 0; + bool finished; + int err; + + xz_dec = xz_dec_init(XZ_DYNALLOC, (u32) -1); + if (!xz_dec) + return -EINVAL; + + dec_buf.in_size = mfa2_file->cb_archive_size; + dec_buf.in = mfa2_file->cb; + dec_buf.in_pos = 0; + dec_buf.out = buf; + + /* decode up to the offset */ + do { + dec_buf.out_pos = 0; + dec_buf.out_size = min_t(size_t, size, off - curr_off); + if (dec_buf.out_size == 0) + break; + + err = mlxfw_mfa2_xz_dec_run(xz_dec, &dec_buf, &finished); + if (err) + goto out; + if (finished) { + pr_err("xz section too short\n"); + err = -EINVAL; + goto out; + } + curr_off += dec_buf.out_pos; + } while (curr_off != off); + + /* decode the needed section */ + dec_buf.out_pos = 0; + dec_buf.out_size = size; + err = mlxfw_mfa2_xz_dec_run(xz_dec, &dec_buf, &finished); +out: + xz_dec_end(xz_dec); + return err; +} + +static const struct mlxfw_mfa2_tlv_component_descriptor * +mlxfw_mfa2_file_component_tlv_get(const struct mlxfw_mfa2_file *mfa2_file, + u16 comp_index) +{ + const struct mlxfw_mfa2_tlv_multi *multi; + const struct mlxfw_mfa2_tlv *multi_child; + const struct mlxfw_mfa2_tlv *comp_tlv; + + if (comp_index > mfa2_file->component_count) + return NULL; + + comp_tlv = mlxfw_mfa2_tlv_advance(mfa2_file, mfa2_file->first_component, + comp_index); + if (!comp_tlv) + return NULL; + + multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, comp_tlv); + if (!multi) + return NULL; + + multi_child = mlxfw_mfa2_tlv_multi_child(mfa2_file, multi); + if (!multi_child) + return NULL; + + return mlxfw_mfa2_tlv_component_descriptor_get(mfa2_file, multi_child); +} + +struct mlxfw_mfa2_comp_data { + struct mlxfw_mfa2_component comp; + u8 buff[0]; +}; + +static const struct mlxfw_mfa2_tlv_component_descriptor * +mlxfw_mfa2_file_component_find(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, int psid_size, + int component_index) +{ + const struct mlxfw_mfa2_tlv_component_ptr *cptr; + const struct mlxfw_mfa2_tlv_multi *dev_multi; + const struct mlxfw_mfa2_tlv *cptr_tlv; + u16 comp_idx; + + dev_multi = mlxfw_mfa2_tlv_dev_get(mfa2_file, psid, psid_size); + if (!dev_multi) + return NULL; + + cptr_tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, dev_multi, + MLXFW_MFA2_TLV_COMPONENT_PTR, + component_index); + if (!cptr_tlv) + return NULL; + + cptr = mlxfw_mfa2_tlv_component_ptr_get(mfa2_file, cptr_tlv); + if (!cptr) + return NULL; + + comp_idx = be16_to_cpu(cptr->component_index); + return mlxfw_mfa2_file_component_tlv_get(mfa2_file, comp_idx); +} + +struct mlxfw_mfa2_component * +mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, int psid_size, + int component_index) +{ + const struct mlxfw_mfa2_tlv_component_descriptor *comp; + struct mlxfw_mfa2_comp_data *comp_data; + u32 comp_buf_size; + off_t cb_offset; + u32 comp_size; + int err; + + comp = mlxfw_mfa2_file_component_find(mfa2_file, psid, psid_size, + component_index); + if (!comp) + return ERR_PTR(-EINVAL); + + cb_offset = (u64) be32_to_cpu(comp->cb_offset_h) << 32 | + be32_to_cpu(comp->cb_offset_l); + comp_size = be32_to_cpu(comp->size); + comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len; + + comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL); + if (!comp_data) + return ERR_PTR(-ENOMEM); + comp_data->comp.data_size = comp_size; + comp_data->comp.index = be16_to_cpu(comp->identifier); + err = mlxfw_mfa2_file_cb_offset_xz(mfa2_file, cb_offset, comp_buf_size, + comp_data->buff); + if (err) { + pr_err("Component could not be reached in CB\n"); + goto err_out; + } + + if (memcmp(comp_data->buff, mlxfw_mfa2_comp_magic, + mlxfw_mfa2_comp_magic_len) != 0) { + pr_err("Component has wrong magic\n"); + err = -EINVAL; + goto err_out; + } + + comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len; + return &comp_data->comp; +err_out: + kfree(comp_data); + return ERR_PTR(err); +} + +void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *comp) +{ + const struct mlxfw_mfa2_comp_data *comp_data; + + comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp); + kfree(comp_data); +} + +void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file) +{ + kfree(mfa2_file); +} diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h new file mode 100644 index 000000000000..20472aa139cd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h @@ -0,0 +1,66 @@ +/* + * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXFW_MFA2_H +#define _MLXFW_MFA2_H + +#include <linux/firmware.h> +#include "mlxfw.h" + +struct mlxfw_mfa2_component { + u16 index; + u32 data_size; + u8 *data; +}; + +struct mlxfw_mfa2_file; + +bool mlxfw_mfa2_check(const struct firmware *fw); + +struct mlxfw_mfa2_file *mlxfw_mfa2_file_init(const struct firmware *fw); + +int mlxfw_mfa2_file_component_count(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, u32 psid_size, + u32 *p_count); + +struct mlxfw_mfa2_component * +mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, + const char *psid, int psid_size, + int component_index); + +void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *component); + +void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h new file mode 100644 index 000000000000..f667942b1ea3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h @@ -0,0 +1,60 @@ +/* + * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXFW_MFA2_FILE_H +#define _MLXFW_MFA2_FILE_H + +#include <linux/firmware.h> +#include <linux/kernel.h> + +struct mlxfw_mfa2_file { + const struct firmware *fw; + const struct mlxfw_mfa2_tlv *first_dev; + u16 dev_count; + const struct mlxfw_mfa2_tlv *first_component; + u16 component_count; + const void *cb; /* components block */ + u32 cb_archive_size; /* size of compressed components block */ +}; + +static inline bool mlxfw_mfa2_valid_ptr(const struct mlxfw_mfa2_file *mfa2_file, + const void *ptr) +{ + const void *valid_to = mfa2_file->fw->data + mfa2_file->fw->size; + const void *valid_from = mfa2_file->fw->data; + + return ptr > valid_from && ptr < valid_to; +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h new file mode 100644 index 000000000000..dd66737c033d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h @@ -0,0 +1,103 @@ +/* + * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _MLXFW_MFA2_FORMAT_H +#define _MLXFW_MFA2_FORMAT_H + +#include "mlxfw_mfa2_file.h" +#include "mlxfw_mfa2_tlv.h" + +enum mlxfw_mfa2_tlv_type { + MLXFW_MFA2_TLV_MULTI_PART = 0x01, + MLXFW_MFA2_TLV_PACKAGE_DESCRIPTOR = 0x02, + MLXFW_MFA2_TLV_COMPONENT_DESCRIPTOR = 0x04, + MLXFW_MFA2_TLV_COMPONENT_PTR = 0x22, + MLXFW_MFA2_TLV_PSID = 0x2A, +}; + +enum mlxfw_mfa2_compression_type { + MLXFW_MFA2_COMPRESSION_TYPE_NONE, + MLXFW_MFA2_COMPRESSION_TYPE_XZ, +}; + +struct mlxfw_mfa2_tlv_package_descriptor { + __be16 num_components; + __be16 num_devices; + __be32 cb_offset; + __be32 cb_archive_size; + __be32 cb_size_h; + __be32 cb_size_l; + u8 padding[3]; + u8 cv_compression; + __be32 user_data_offset; +} __packed; + +MLXFW_MFA2_TLV(package_descriptor, struct mlxfw_mfa2_tlv_package_descriptor, + MLXFW_MFA2_TLV_PACKAGE_DESCRIPTOR); + +struct mlxfw_mfa2_tlv_multi { + __be16 num_extensions; + __be16 total_len; +} __packed; + +MLXFW_MFA2_TLV(multi, struct mlxfw_mfa2_tlv_multi, + MLXFW_MFA2_TLV_MULTI_PART); + +struct mlxfw_mfa2_tlv_psid { + u8 psid[0]; +} __packed; + +MLXFW_MFA2_TLV_VARSIZE(psid, struct mlxfw_mfa2_tlv_psid, + MLXFW_MFA2_TLV_PSID); + +struct mlxfw_mfa2_tlv_component_ptr { + __be16 storage_id; + __be16 component_index; + __be32 storage_address; +} __packed; + +MLXFW_MFA2_TLV(component_ptr, struct mlxfw_mfa2_tlv_component_ptr, + MLXFW_MFA2_TLV_COMPONENT_PTR); + +struct mlxfw_mfa2_tlv_component_descriptor { + __be16 pldm_classification; + __be16 identifier; + __be32 cb_offset_h; + __be32 cb_offset_l; + __be32 size; +} __packed; + +MLXFW_MFA2_TLV(component_descriptor, struct mlxfw_mfa2_tlv_component_descriptor, + MLXFW_MFA2_TLV_COMPONENT_DESCRIPTOR); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h new file mode 100644 index 000000000000..cc013e77b326 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h @@ -0,0 +1,98 @@ +/* + * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXFW_MFA2_TLV_H +#define _MLXFW_MFA2_TLV_H + +#include <linux/kernel.h> +#include "mlxfw_mfa2_file.h" + +struct mlxfw_mfa2_tlv { + u8 version; + u8 type; + __be16 len; + u8 data[0]; +} __packed; + +static inline const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_get(const struct mlxfw_mfa2_file *mfa2_file, const void *ptr) +{ + if (!mlxfw_mfa2_valid_ptr(mfa2_file, ptr) || + !mlxfw_mfa2_valid_ptr(mfa2_file, ptr + sizeof(struct mlxfw_mfa2_tlv))) + return NULL; + return ptr; +} + +static inline const void * +mlxfw_mfa2_tlv_payload_get(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *tlv, u8 payload_type, + size_t payload_size, bool varsize) +{ + void *tlv_top; + + tlv_top = (void *) tlv + be16_to_cpu(tlv->len) - 1; + if (!mlxfw_mfa2_valid_ptr(mfa2_file, tlv) || + !mlxfw_mfa2_valid_ptr(mfa2_file, tlv_top)) + return NULL; + if (tlv->type != payload_type) + return NULL; + if (varsize && (be16_to_cpu(tlv->len) < payload_size)) + return NULL; + if (!varsize && (be16_to_cpu(tlv->len) != payload_size)) + return NULL; + + return tlv->data; +} + +#define MLXFW_MFA2_TLV(name, payload_type, tlv_type) \ +static inline const payload_type * \ +mlxfw_mfa2_tlv_ ## name ## _get(const struct mlxfw_mfa2_file *mfa2_file, \ + const struct mlxfw_mfa2_tlv *tlv) \ +{ \ + return mlxfw_mfa2_tlv_payload_get(mfa2_file, tlv, \ + tlv_type, sizeof(payload_type), \ + false); \ +} + +#define MLXFW_MFA2_TLV_VARSIZE(name, payload_type, tlv_type) \ +static inline const payload_type * \ +mlxfw_mfa2_tlv_ ## name ## _get(const struct mlxfw_mfa2_file *mfa2_file, \ + const struct mlxfw_mfa2_tlv *tlv) \ +{ \ + return mlxfw_mfa2_tlv_payload_get(mfa2_file, tlv, \ + tlv_type, sizeof(payload_type), \ + true); \ +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c new file mode 100644 index 000000000000..0094b92a233b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c @@ -0,0 +1,126 @@ +/* + * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) "MFA2: " fmt + +#include "mlxfw_mfa2_tlv_multi.h" +#include <uapi/linux/netlink.h> + +#define MLXFW_MFA2_TLV_TOTAL_SIZE(tlv) \ + NLA_ALIGN(sizeof(*(tlv)) + be16_to_cpu((tlv)->len)) + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_multi_child(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi) +{ + size_t multi_len; + + multi_len = NLA_ALIGN(sizeof(struct mlxfw_mfa2_tlv_multi)); + return mlxfw_mfa2_tlv_get(mfa2_file, (void *) multi + multi_len); +} + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_next(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *tlv) +{ + const struct mlxfw_mfa2_tlv_multi *multi; + u16 tlv_len; + void *next; + + tlv_len = MLXFW_MFA2_TLV_TOTAL_SIZE(tlv); + + if (tlv->type == MLXFW_MFA2_TLV_MULTI_PART) { + multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, tlv); + tlv_len = NLA_ALIGN(tlv_len + be16_to_cpu(multi->total_len)); + } + + next = (void *) tlv + tlv_len; + return mlxfw_mfa2_tlv_get(mfa2_file, next); +} + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_advance(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *from_tlv, u16 count) +{ + const struct mlxfw_mfa2_tlv *tlv; + u16 idx; + + mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, from_tlv, count) + if (!tlv) + return NULL; + return tlv; +} + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_multi_child_find(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi, + enum mlxfw_mfa2_tlv_type type, u16 index) +{ + const struct mlxfw_mfa2_tlv *tlv; + u16 skip = 0; + u16 idx; + + mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) { + if (!tlv) { + pr_err("TLV parsing error\n"); + return NULL; + } + if (tlv->type == type) + if (skip++ == index) + return tlv; + } + return NULL; +} + +int mlxfw_mfa2_tlv_multi_child_count(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi, + enum mlxfw_mfa2_tlv_type type, + u16 *p_count) +{ + const struct mlxfw_mfa2_tlv *tlv; + u16 count = 0; + u16 idx; + + mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) { + if (!tlv) { + pr_err("TLV parsing error\n"); + return -EINVAL; + } + + if (tlv->type == type) + count++; + } + *p_count = count; + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h new file mode 100644 index 000000000000..2c667894f3a2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h @@ -0,0 +1,71 @@ +/* + * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _MLXFW_MFA2_TLV_MULTI_H +#define _MLXFW_MFA2_TLV_MULTI_H + +#include "mlxfw_mfa2_tlv.h" +#include "mlxfw_mfa2_format.h" +#include "mlxfw_mfa2_file.h" + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_multi_child(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi); + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_next(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *tlv); + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_advance(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv *from_tlv, u16 count); + +const struct mlxfw_mfa2_tlv * +mlxfw_mfa2_tlv_multi_child_find(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi, + enum mlxfw_mfa2_tlv_type type, u16 index); + +int mlxfw_mfa2_tlv_multi_child_count(const struct mlxfw_mfa2_file *mfa2_file, + const struct mlxfw_mfa2_tlv_multi *multi, + enum mlxfw_mfa2_tlv_type type, + u16 *p_count); + +#define mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, from_tlv, count) \ + for (idx = 0, tlv = from_tlv; idx < (count); \ + idx++, tlv = mlxfw_mfa2_tlv_next(mfa2_file, tlv)) + +#define mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) \ + mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, \ + mlxfw_mfa2_tlv_multi_child(mfa2_file, multi), \ + be16_to_cpu(multi->num_extensions) + 1) +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index ef23eaedc2ff..d56eea310509 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -74,7 +74,10 @@ config MLXSW_SPECTRUM tristate "Mellanox Technologies Spectrum support" depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q depends on PSAMPLE || PSAMPLE=n + depends on BRIDGE || BRIDGE=n + depends on IPV6 || IPV6=n select PARMAN + select MLXFW default m ---help--- This driver supports Mellanox Technologies Spectrum Ethernet diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 2fb8c6585ac7..9a5a1cc877b6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o mlxsw_core-objs := core.o core_acl_flex_keys.o \ core_acl_flex_actions.o @@ -16,7 +17,9 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_switchdev.o spectrum_router.o \ spectrum_kvdl.o spectrum_acl_tcam.o \ spectrum_acl.o spectrum_flower.o \ - spectrum_cnt.o spectrum_dpipe.o + spectrum_cnt.o spectrum_fid.o \ + spectrum_ipip.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o +mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o mlxsw_minimal-objs := minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index affe84eb4bff..f3315bc874ad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -96,6 +96,7 @@ struct mlxsw_core { const struct mlxsw_bus *bus; void *bus_priv; const struct mlxsw_bus_info *bus_info; + struct workqueue_struct *emad_wq; struct list_head rx_listener_list; struct list_head event_listener_list; struct { @@ -465,7 +466,7 @@ static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) { unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS); - mlxsw_core_schedule_dw(&trans->timeout_dw, timeout); + queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout); } static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, @@ -587,12 +588,18 @@ static const struct mlxsw_listener mlxsw_emad_rx_listener = static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) { + struct workqueue_struct *emad_wq; u64 tid; int err; if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0; + emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0); + if (!emad_wq) + return -ENOMEM; + mlxsw_core->emad_wq = emad_wq; + /* Set the upper 32 bits of the transaction ID field to a random * number. This allows us to discard EMADs addressed to other * devices. @@ -619,6 +626,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) err_emad_trap_set: mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, mlxsw_core); + destroy_workqueue(mlxsw_core->emad_wq); return err; } @@ -631,6 +639,7 @@ static void mlxsw_emad_fini(struct mlxsw_core *mlxsw_core) mlxsw_core->emad.use_emad = false; mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, mlxsw_core); + destroy_workqueue(mlxsw_core->emad_wq); } static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, @@ -667,7 +676,7 @@ static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, int err; dev_dbg(mlxsw_core->bus_info->dev, "EMAD reg access (tid=%llx,reg_id=%x(%s),type=%s)\n", - trans->tid, reg->id, mlxsw_reg_id_str(reg->id), + tid, reg->id, mlxsw_reg_id_str(reg->id), mlxsw_core_reg_access_type_str(type)); skb = mlxsw_emad_alloc(mlxsw_core, reg->len); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 7fb35395adf5..6e966af72fc4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -344,15 +344,17 @@ struct mlxsw_bus { u8 features; }; +struct mlxsw_fw_rev { + u16 major; + u16 minor; + u16 subminor; +}; + struct mlxsw_bus_info { const char *device_kind; const char *device_name; struct device *dev; - struct { - u16 major; - u16 minor; - u16 subminor; - } fw_rev; + struct mlxsw_fw_rev fw_rev; u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN]; u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 46304ffb9449..5ae110172c22 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -40,6 +40,7 @@ #include <linux/list.h> #include "item.h" +#include "trap.h" #include "core_acl_flex_actions.h" enum mlxsw_afa_set_type { @@ -662,6 +663,16 @@ EXPORT_SYMBOL(mlxsw_afa_block_append_vlan_modify); #define MLXSW_AFA_TRAPDISC_CODE 0x03 #define MLXSW_AFA_TRAPDISC_SIZE 1 +enum mlxsw_afa_trapdisc_trap_action { + MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP = 0, + MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP = 2, +}; + +/* afa_trapdisc_trap_action + * Trap Action. + */ +MLXSW_ITEM32(afa, trapdisc, trap_action, 0x00, 24, 4); + enum mlxsw_afa_trapdisc_forward_action { MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3, }; @@ -671,11 +682,20 @@ enum mlxsw_afa_trapdisc_forward_action { */ MLXSW_ITEM32(afa, trapdisc, forward_action, 0x00, 0, 4); +/* afa_trapdisc_trap_id + * Trap ID to configure. + */ +MLXSW_ITEM32(afa, trapdisc, trap_id, 0x04, 0, 9); + static inline void mlxsw_afa_trapdisc_pack(char *payload, - enum mlxsw_afa_trapdisc_forward_action forward_action) + enum mlxsw_afa_trapdisc_trap_action trap_action, + enum mlxsw_afa_trapdisc_forward_action forward_action, + u16 trap_id) { + mlxsw_afa_trapdisc_trap_action_set(payload, trap_action); mlxsw_afa_trapdisc_forward_action_set(payload, forward_action); + mlxsw_afa_trapdisc_trap_id_set(payload, trap_id); } int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block) @@ -686,11 +706,27 @@ int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block) if (!act) return -ENOBUFS; - mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD); + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, 0); return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_drop); +int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); + + if (!act) + return -ENOBUFS; + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, + MLXSW_TRAP_ID_ACL0); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_trap); + /* Forwarding Action * ----------------- * Forwarding Action can be used to implement Policy Based Switching (PBS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index bd8b91d02880..f99c341b2497 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -60,6 +60,7 @@ u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block); void mlxsw_afa_block_continue(struct mlxsw_afa_block *block); void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); +int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index c75e9141e3ec..f6963b0b4a55 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -56,6 +56,10 @@ enum mlxsw_afk_element { MLXSW_AFK_ELEMENT_SRC_L4_PORT, MLXSW_AFK_ELEMENT_VID, MLXSW_AFK_ELEMENT_PCP, + MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, MLXSW_AFK_ELEMENT_MAX, }; @@ -102,6 +106,10 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8), MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), + MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9), + MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x14, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x14, 9, 2), + MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x14, 11, 6), MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32), MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8), diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 12c3a4449120..c0dcfa05b077 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -294,7 +294,7 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num, write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size; mlxsw_i2c_set_slave_addr(tran_buf, off); memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox + - chunk_size * i, chunk_size); + MLXSW_I2C_BLK_MAX * i, chunk_size); j = 0; end = jiffies + timeout; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 0af3338bfcb4..a6441208e9d9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -155,7 +155,7 @@ MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14); /* pci_cqe_trap_id * Trap ID that captured the packet. */ -MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 8); +MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 9); /* pci_cqe_crc * Length include CRC. Indicates the length field includes diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 83b277c8090e..5acfbe5b8b9d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5,6 +5,7 @@ * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -958,7 +959,7 @@ enum mlxsw_flood_table_type { MLXSW_REG_SFGC_TABLE_TYPE_VID = 1, MLXSW_REG_SFGC_TABLE_TYPE_SINGLE = 2, MLXSW_REG_SFGC_TABLE_TYPE_ANY = 0, - MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST = 3, + MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET = 3, MLXSW_REG_SFGC_TABLE_TYPE_FID = 4, }; @@ -3679,15 +3680,17 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP, MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP, MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4, + MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP, MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF, MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS, MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE, MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, }; /* reg_htgt_trap_group @@ -3952,10 +3955,12 @@ MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2); */ MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8); -static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en) +static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en, + bool ipv6_en) { MLXSW_REG_ZERO(rgcr, payload); mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en); + mlxsw_reg_rgcr_ipv6_en_set(payload, ipv6_en); } /* RITR - Router Interface Table Register @@ -3988,16 +3993,18 @@ MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1); MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1); enum mlxsw_reg_ritr_if_type { + /* VLAN interface. */ MLXSW_REG_RITR_VLAN_IF, + /* FID interface. */ MLXSW_REG_RITR_FID_IF, + /* Sub-port interface. */ MLXSW_REG_RITR_SP_IF, + /* Loopback Interface. */ + MLXSW_REG_RITR_LOOPBACK_IF, }; /* reg_ritr_type - * Router interface type. - * 0 - VLAN interface. - * 1 - FID interface. - * 2 - Sub-port interface. + * Router interface type as per enum mlxsw_reg_ritr_if_type. * Access: RW */ MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3); @@ -4125,6 +4132,67 @@ MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16); */ MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12); +/* Loopback Interface */ + +enum mlxsw_reg_ritr_loopback_protocol { + /* IPinIP IPv4 underlay Unicast */ + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4, + /* IPinIP IPv6 underlay Unicast */ + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6, +}; + +/* reg_ritr_loopback_protocol + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_protocol, 0x08, 28, 4); + +enum mlxsw_reg_ritr_loopback_ipip_type { + /* Tunnel is IPinIP. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_IP, + /* Tunnel is GRE, no key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP, + /* Tunnel is GRE, with a key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP, +}; + +/* reg_ritr_loopback_ipip_type + * Encapsulation type. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_type, 0x10, 24, 4); + +enum mlxsw_reg_ritr_loopback_ipip_options { + /* The key is defined by gre_key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, +}; + +/* reg_ritr_loopback_ipip_options + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4); + +/* reg_ritr_loopback_ipip_uvr + * Underlay Virtual Router ID. + * Range is 0..cap_max_virtual_routers-1. + * Reserved for Spectrum-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16); + +/* reg_ritr_loopback_ipip_usip* + * Encapsulation Underlay source IP. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ritr, loopback_ipip_usip6, 0x18, 16); +MLXSW_ITEM32(reg, ritr, loopback_ipip_usip4, 0x24, 0, 32); + +/* reg_ritr_loopback_ipip_gre_key + * GRE Key. + * Reserved when ipip_type is not IP_IN_GRE_KEY_IN_IP. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_gre_key, 0x28, 0, 32); + /* Shared between ingress/egress */ enum mlxsw_reg_ritr_counter_set_type { /* No Count. */ @@ -4195,24 +4263,54 @@ static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag, static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, enum mlxsw_reg_ritr_if_type type, - u16 rif, u16 vr_id, u16 mtu, - const char *mac) + u16 rif, u16 vr_id, u16 mtu) { bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL; MLXSW_REG_ZERO(ritr, payload); mlxsw_reg_ritr_enable_set(payload, enable); mlxsw_reg_ritr_ipv4_set(payload, 1); + mlxsw_reg_ritr_ipv6_set(payload, 1); mlxsw_reg_ritr_type_set(payload, type); mlxsw_reg_ritr_op_set(payload, op); mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_ipv6_fe_set(payload, 1); mlxsw_reg_ritr_lb_en_set(payload, 1); mlxsw_reg_ritr_virtual_router_set(payload, vr_id); mlxsw_reg_ritr_mtu_set(payload, mtu); +} + +static inline void mlxsw_reg_ritr_mac_pack(char *payload, const char *mac) +{ mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); } +static inline void +mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload, + enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, + enum mlxsw_reg_ritr_loopback_ipip_options options, + u16 uvr_id, u32 gre_key) +{ + mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type); + mlxsw_reg_ritr_loopback_ipip_options_set(payload, options); + mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id); + mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key); +} + +static inline void +mlxsw_reg_ritr_loopback_ipip4_pack(char *payload, + enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, + enum mlxsw_reg_ritr_loopback_ipip_options options, + u16 uvr_id, u32 usip, u32 gre_key) +{ + mlxsw_reg_ritr_loopback_protocol_set(payload, + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4); + mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options, + uvr_id, gre_key); + mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip); +} + /* RATR - Router Adjacency Table Register * -------------------------------------- * The RATR register is used to configure the Router Adjacency (next-hop) @@ -4268,6 +4366,38 @@ MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1); */ MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1); +enum mlxsw_reg_ratr_type { + /* Ethernet */ + MLXSW_REG_RATR_TYPE_ETHERNET, + /* IPoIB Unicast without GRH. + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_UC, + /* IPoIB Unicast with GRH. Supported only in table 0 (Ethernet unicast + * adjacency). + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_UC_W_GRH, + /* IPoIB Multicast. + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_MC, + /* MPLS. + * Reserved for SwitchX/-2. + */ + MLXSW_REG_RATR_TYPE_MPLS, + /* IPinIP Encap. + * Reserved for SwitchX/-2. + */ + MLXSW_REG_RATR_TYPE_IPIP, +}; + +/* reg_ratr_type + * Adjacency entry type. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, type, 0x04, 28, 4); + /* reg_ratr_adjacency_index_low * Bits 15:0 of index into the adjacency table. * For SwitchX and SwitchX-2, the adjacency table is linear and @@ -4297,17 +4427,17 @@ enum mlxsw_reg_ratr_trap_action { */ MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4); -enum mlxsw_reg_ratr_trap_id { - MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0, - MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1, -}; - /* reg_ratr_adjacency_index_high * Bits 23:16 of the adjacency_index. * Access: Index */ MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8); +enum mlxsw_reg_ratr_trap_id { + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0, + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1, +}; + /* reg_ratr_trap_id * Trap ID to be reported to CPU. * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. @@ -4322,14 +4452,44 @@ MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8); */ MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6); +enum mlxsw_reg_ratr_ipip_type { + /* IPv4, address set by mlxsw_reg_ratr_ipip_ipv4_udip. */ + MLXSW_REG_RATR_IPIP_TYPE_IPV4, + /* IPv6, address set by mlxsw_reg_ratr_ipip_ipv6_ptr. */ + MLXSW_REG_RATR_IPIP_TYPE_IPV6, +}; + +/* reg_ratr_ipip_type + * Underlay destination ip type. + * Note: the type field must match the protocol of the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_type, 0x10, 16, 4); + +/* reg_ratr_ipip_ipv4_udip + * Underlay ipv4 dip. + * Reserved when ipip_type is IPv6. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32); + +/* reg_ratr_ipip_ipv6_ptr + * Pointer to IPv6 underlay destination ip address. + * For Spectrum: Pointer to KVD linear space. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24); + static inline void mlxsw_reg_ratr_pack(char *payload, enum mlxsw_reg_ratr_op op, bool valid, + enum mlxsw_reg_ratr_type type, u32 adjacency_index, u16 egress_rif) { MLXSW_REG_ZERO(ratr, payload); mlxsw_reg_ratr_op_set(payload, op); mlxsw_reg_ratr_v_set(payload, valid); + mlxsw_reg_ratr_type_set(payload, type); mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index); mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16); mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif); @@ -4341,6 +4501,12 @@ static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload, mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac); } +static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip) +{ + mlxsw_reg_ratr_ipip_type_set(payload, MLXSW_REG_RATR_IPIP_TYPE_IPV4); + mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip); +} + /* RICNT - Router Interface Counter Register * ----------------------------------------- * The RICNT register retrieves per port performance counters @@ -4712,12 +4878,13 @@ MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8); /* reg_ralue_dip* * The prefix of the route or of the marker that the object of the LPM * is compared with. The most significant bits of the dip are the prefix. - * The list significant bits must be '0' if the prefix_len is smaller + * The least significant bits must be '0' if the prefix_len is smaller * than 128 for IPv6 or smaller than 32 for IPv4. * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved. * Access: Index */ MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32); +MLXSW_ITEM_BUF(reg, ralue, dip6, 0x0C, 16); enum mlxsw_reg_ralue_entry_type { MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1, @@ -4806,7 +4973,7 @@ MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13); */ MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); -/* reg_ralue_v +/* reg_ralue_ip2me_v * Valid bit for the tunnel_ptr field. * If valid = 0 then trap to CPU as IP2ME trap ID. * If valid = 1 and the packet format allows NVE or IPinIP tunnel @@ -4816,15 +4983,15 @@ MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); * Only relevant in case of IP2ME action. * Access: RW */ -MLXSW_ITEM32(reg, ralue, v, 0x24, 31, 1); +MLXSW_ITEM32(reg, ralue, ip2me_v, 0x24, 31, 1); -/* reg_ralue_tunnel_ptr +/* reg_ralue_ip2me_tunnel_ptr * Tunnel Pointer for NVE or IPinIP tunnel decapsulation. * For Spectrum, pointer to KVD Linear. * Only relevant in case of IP2ME action. * Access: RW */ -MLXSW_ITEM32(reg, ralue, tunnel_ptr, 0x24, 0, 24); +MLXSW_ITEM32(reg, ralue, ip2me_tunnel_ptr, 0x24, 0, 24); static inline void mlxsw_reg_ralue_pack(char *payload, enum mlxsw_reg_ralxx_protocol protocol, @@ -4851,6 +5018,16 @@ static inline void mlxsw_reg_ralue_pack4(char *payload, mlxsw_reg_ralue_dip4_set(payload, dip); } +static inline void mlxsw_reg_ralue_pack6(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len, + const void *dip) +{ + mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); + mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); +} + static inline void mlxsw_reg_ralue_act_remote_pack(char *payload, enum mlxsw_reg_ralue_trap_action trap_action, @@ -4883,6 +5060,15 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload) MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); } +static inline void +mlxsw_reg_ralue_act_ip2me_tun_pack(char *payload, u32 tunnel_ptr) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); + mlxsw_reg_ralue_ip2me_v_set(payload, 1); + mlxsw_reg_ralue_ip2me_tunnel_ptr_set(payload, tunnel_ptr); +} + /* RAUHT - Router Algorithmic LPM Unicast Host Table Register * ---------------------------------------------------------- * The RAUHT register is used to configure and query the Unicast Host table in @@ -4954,6 +5140,7 @@ MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16); * Access: Index */ MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32); +MLXSW_ITEM_BUF(reg, rauht, dip6, 0x10, 16); enum mlxsw_reg_rauht_trap_action { MLXSW_REG_RAUHT_TRAP_ACTION_NOP, @@ -4982,6 +5169,15 @@ enum mlxsw_reg_rauht_trap_id { */ MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); +enum mlxsw_reg_flow_counter_set_type { + /* No count */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Count packets and bytes */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + /* reg_rauht_counter_set_type * Counter set type for flow counters * Access: RW @@ -5018,6 +5214,23 @@ static inline void mlxsw_reg_rauht_pack4(char *payload, mlxsw_reg_rauht_dip4_set(payload, dip); } +static inline void mlxsw_reg_rauht_pack6(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac, const char *dip) +{ + mlxsw_reg_rauht_pack(payload, op, rif, mac); + mlxsw_reg_rauht_type_set(payload, MLXSW_REG_RAUHT_TYPE_IPV6); + mlxsw_reg_rauht_dip6_memcpy_to(payload, dip); +} + +static inline void mlxsw_reg_rauht_pack_counter(char *payload, + u64 counter_index) +{ + mlxsw_reg_rauht_counter_index_set(payload, counter_index); + mlxsw_reg_rauht_counter_set_type_set(payload, + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); +} + /* RALEU - Router Algorithmic LPM ECMP Update Register * --------------------------------------------------- * The register enables updating the ECMP section in the action for multiple @@ -5216,6 +5429,30 @@ MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0, 32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false); +#define MLXSW_REG_RAUHTD_IPV6_ENT_LEN 0x20 + +/* reg_rauhtd_ipv6_ent_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1, + MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_rif + * Router interface. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_dip + * Destination IPv6 address. + * Access: RO + */ +MLXSW_ITEM_BUF_INDEXED(reg, rauhtd, ipv6_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x10); + static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, int ent_index, u16 *p_rif, u32 *p_dip) @@ -5224,6 +5461,141 @@ static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index); } +static inline void mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload, + int rec_index, u16 *p_rif, + char *p_dip) +{ + *p_rif = mlxsw_reg_rauhtd_ipv6_ent_rif_get(payload, rec_index); + mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip); +} + +/* RTDP - Routing Tunnel Decap Properties Register + * ----------------------------------------------- + * The RTDP register is used for configuring the tunnel decap properties of NVE + * and IPinIP. + */ +#define MLXSW_REG_RTDP_ID 0x8020 +#define MLXSW_REG_RTDP_LEN 0x44 + +MLXSW_REG_DEFINE(rtdp, MLXSW_REG_RTDP_ID, MLXSW_REG_RTDP_LEN); + +enum mlxsw_reg_rtdp_type { + MLXSW_REG_RTDP_TYPE_NVE, + MLXSW_REG_RTDP_TYPE_IPIP, +}; + +/* reg_rtdp_type + * Type of the RTDP entry as per enum mlxsw_reg_rtdp_type. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4); + +/* reg_rtdp_tunnel_index + * Index to the Decap entry. + * For Spectrum, Index to KVD Linear. + * Access: Index + */ +MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24); + +/* IPinIP */ + +/* reg_rtdp_ipip_irif + * Ingress Router Interface for the overlay router + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_irif, 0x04, 16, 16); + +enum mlxsw_reg_rtdp_ipip_sip_check { + /* No sip checks. */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_NO, + /* Filter packet if underlay is not IPv4 or if underlay SIP does not + * equal ipv4_usip. + */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4, + /* Filter packet if underlay is not IPv6 or if underlay SIP does not + * equal ipv6_usip. + */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6 = 3, +}; + +/* reg_rtdp_ipip_sip_check + * SIP check to perform. If decapsulation failed due to these configurations + * then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_sip_check, 0x04, 0, 3); + +/* If set, allow decapsulation of IPinIP (without GRE). */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_IPIP BIT(0) +/* If set, allow decapsulation of IPinGREinIP without a key. */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE BIT(1) +/* If set, allow decapsulation of IPinGREinIP with a key. */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY BIT(2) + +/* reg_rtdp_ipip_type_check + * Flags as per MLXSW_REG_RTDP_IPIP_TYPE_CHECK_*. If decapsulation failed due to + * these configurations then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_type_check, 0x08, 24, 3); + +/* reg_rtdp_ipip_gre_key_check + * Whether GRE key should be checked. When check is enabled: + * - A packet received as IPinIP (without GRE) will always pass. + * - A packet received as IPinGREinIP without a key will not pass the check. + * - A packet received as IPinGREinIP with a key will pass the check only if the + * key in the packet is equal to expected_gre_key. + * If decapsulation failed due to GRE key then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_gre_key_check, 0x08, 23, 1); + +/* reg_rtdp_ipip_ipv4_usip + * Underlay IPv4 address for ipv4 source address check. + * Reserved when sip_check is not '1'. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_ipv4_usip, 0x0C, 0, 32); + +/* reg_rtdp_ipip_ipv6_usip_ptr + * This field is valid when sip_check is "sipv6 check explicitly". This is a + * pointer to the IPv6 DIP which is configured by RIPS. For Spectrum, the index + * is to the KVD linear. + * Reserved when sip_check is not MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_ipv6_usip_ptr, 0x10, 0, 24); + +/* reg_rtdp_ipip_expected_gre_key + * GRE key for checking. + * Reserved when gre_key_check is '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_expected_gre_key, 0x14, 0, 32); + +static inline void mlxsw_reg_rtdp_pack(char *payload, + enum mlxsw_reg_rtdp_type type, + u32 tunnel_index) +{ + MLXSW_REG_ZERO(rtdp, payload); + mlxsw_reg_rtdp_type_set(payload, type); + mlxsw_reg_rtdp_tunnel_index_set(payload, tunnel_index); +} + +static inline void +mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif, + enum mlxsw_reg_rtdp_ipip_sip_check sip_check, + unsigned int type_check, bool gre_key_check, + u32 ipv4_usip, u32 expected_gre_key) +{ + mlxsw_reg_rtdp_ipip_irif_set(payload, irif); + mlxsw_reg_rtdp_ipip_sip_check_set(payload, sip_check); + mlxsw_reg_rtdp_ipip_type_check_set(payload, type_check); + mlxsw_reg_rtdp_ipip_gre_key_check_set(payload, gre_key_check); + mlxsw_reg_rtdp_ipip_ipv4_usip_set(payload, ipv4_usip); + mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -5455,6 +5827,29 @@ MLXSW_ITEM32(reg, mtmp, mtr, 0x08, 30, 1); */ MLXSW_ITEM32(reg, mtmp, max_temperature, 0x08, 0, 16); +/* reg_mtmp_tee + * Temperature Event Enable. + * 0 - Do not generate event + * 1 - Generate event + * 2 - Generate single event + * Access: RW + */ +MLXSW_ITEM32(reg, mtmp, tee, 0x0C, 30, 2); + +#define MLXSW_REG_MTMP_THRESH_HI 0x348 /* 105 Celsius */ + +/* reg_mtmp_temperature_threshold_hi + * High threshold for Temperature Warning Event. In 0.125 Celsius. + * Access: RW + */ +MLXSW_ITEM32(reg, mtmp, temperature_threshold_hi, 0x0C, 0, 16); + +/* reg_mtmp_temperature_threshold_lo + * Low threshold for Temperature Warning Event. In 0.125 Celsius. + * Access: RW + */ +MLXSW_ITEM32(reg, mtmp, temperature_threshold_lo, 0x10, 0, 16); + #define MLXSW_REG_MTMP_SENSOR_NAME_SIZE 8 /* reg_mtmp_sensor_name @@ -5471,6 +5866,8 @@ static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index, mlxsw_reg_mtmp_sensor_index_set(payload, sensor_index); mlxsw_reg_mtmp_mte_set(payload, max_temp_enable); mlxsw_reg_mtmp_mtr_set(payload, max_temp_reset); + mlxsw_reg_mtmp_temperature_threshold_hi_set(payload, + MLXSW_REG_MTMP_THRESH_HI); } static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, @@ -5491,6 +5888,81 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); } +/* MCIA - Management Cable Info Access + * ----------------------------------- + * MCIA register is used to access the SFP+ and QSFP connector's EPROM. + */ + +#define MLXSW_REG_MCIA_ID 0x9014 +#define MLXSW_REG_MCIA_LEN 0x40 + +MLXSW_REG_DEFINE(mcia, MLXSW_REG_MCIA_ID, MLXSW_REG_MCIA_LEN); + +/* reg_mcia_l + * Lock bit. Setting this bit will lock the access to the specific + * cable. Used for updating a full page in a cable EPROM. Any access + * other then subsequence writes will fail while the port is locked. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, l, 0x00, 31, 1); + +/* reg_mcia_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, mcia, module, 0x00, 16, 8); + +/* reg_mcia_status + * Module status. + * Access: RO + */ +MLXSW_ITEM32(reg, mcia, status, 0x00, 0, 8); + +/* reg_mcia_i2c_device_address + * I2C device address. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, i2c_device_address, 0x04, 24, 8); + +/* reg_mcia_page_number + * Page number. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, page_number, 0x04, 16, 8); + +/* reg_mcia_device_address + * Device address. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16); + +/* reg_mcia_size + * Number of bytes to read/write (up to 48 bytes). + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16); + +#define MLXSW_SP_REG_MCIA_EEPROM_SIZE 48 + +/* reg_mcia_eeprom + * Bytes to read/write. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_SP_REG_MCIA_EEPROM_SIZE); + +static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock, + u8 page_number, u16 device_addr, + u8 size, u8 i2c_device_addr) +{ + MLXSW_REG_ZERO(mcia, payload); + mlxsw_reg_mcia_module_set(payload, module); + mlxsw_reg_mcia_l_set(payload, lock); + mlxsw_reg_mcia_page_number_set(payload, page_number); + mlxsw_reg_mcia_device_address_set(payload, device_addr); + mlxsw_reg_mcia_size_set(payload, size); + mlxsw_reg_mcia_i2c_device_address_set(payload, i2c_device_addr); +} + /* MPAT - Monitoring Port Analyzer Table * ------------------------------------- * MPAT Register is used to query and configure the Switch PortAnalyzer Table. @@ -5643,6 +6115,222 @@ static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port, MLXSW_REG_MLCR_DURATION_MAX : 0); } +/* MCQI - Management Component Query Information + * --------------------------------------------- + * This register allows querying information about firmware components. + */ +#define MLXSW_REG_MCQI_ID 0x9061 +#define MLXSW_REG_MCQI_BASE_LEN 0x18 +#define MLXSW_REG_MCQI_CAP_LEN 0x14 +#define MLXSW_REG_MCQI_LEN (MLXSW_REG_MCQI_BASE_LEN + MLXSW_REG_MCQI_CAP_LEN) + +MLXSW_REG_DEFINE(mcqi, MLXSW_REG_MCQI_ID, MLXSW_REG_MCQI_LEN); + +/* reg_mcqi_component_index + * Index of the accessed component. + * Access: Index + */ +MLXSW_ITEM32(reg, mcqi, component_index, 0x00, 0, 16); + +enum mlxfw_reg_mcqi_info_type { + MLXSW_REG_MCQI_INFO_TYPE_CAPABILITIES, +}; + +/* reg_mcqi_info_type + * Component properties set. + * Access: RW + */ +MLXSW_ITEM32(reg, mcqi, info_type, 0x08, 0, 5); + +/* reg_mcqi_offset + * The requested/returned data offset from the section start, given in bytes. + * Must be DWORD aligned. + * Access: RW + */ +MLXSW_ITEM32(reg, mcqi, offset, 0x10, 0, 32); + +/* reg_mcqi_data_size + * The requested/returned data size, given in bytes. If data_size is not DWORD + * aligned, the last bytes are zero padded. + * Access: RW + */ +MLXSW_ITEM32(reg, mcqi, data_size, 0x14, 0, 16); + +/* reg_mcqi_cap_max_component_size + * Maximum size for this component, given in bytes. + * Access: RO + */ +MLXSW_ITEM32(reg, mcqi, cap_max_component_size, 0x20, 0, 32); + +/* reg_mcqi_cap_log_mcda_word_size + * Log 2 of the access word size in bytes. Read and write access must be aligned + * to the word size. Write access must be done for an integer number of words. + * Access: RO + */ +MLXSW_ITEM32(reg, mcqi, cap_log_mcda_word_size, 0x24, 28, 4); + +/* reg_mcqi_cap_mcda_max_write_size + * Maximal write size for MCDA register + * Access: RO + */ +MLXSW_ITEM32(reg, mcqi, cap_mcda_max_write_size, 0x24, 0, 16); + +static inline void mlxsw_reg_mcqi_pack(char *payload, u16 component_index) +{ + MLXSW_REG_ZERO(mcqi, payload); + mlxsw_reg_mcqi_component_index_set(payload, component_index); + mlxsw_reg_mcqi_info_type_set(payload, + MLXSW_REG_MCQI_INFO_TYPE_CAPABILITIES); + mlxsw_reg_mcqi_offset_set(payload, 0); + mlxsw_reg_mcqi_data_size_set(payload, MLXSW_REG_MCQI_CAP_LEN); +} + +static inline void mlxsw_reg_mcqi_unpack(char *payload, + u32 *p_cap_max_component_size, + u8 *p_cap_log_mcda_word_size, + u16 *p_cap_mcda_max_write_size) +{ + *p_cap_max_component_size = + mlxsw_reg_mcqi_cap_max_component_size_get(payload); + *p_cap_log_mcda_word_size = + mlxsw_reg_mcqi_cap_log_mcda_word_size_get(payload); + *p_cap_mcda_max_write_size = + mlxsw_reg_mcqi_cap_mcda_max_write_size_get(payload); +} + +/* MCC - Management Component Control + * ---------------------------------- + * Controls the firmware component and updates the FSM. + */ +#define MLXSW_REG_MCC_ID 0x9062 +#define MLXSW_REG_MCC_LEN 0x1C + +MLXSW_REG_DEFINE(mcc, MLXSW_REG_MCC_ID, MLXSW_REG_MCC_LEN); + +enum mlxsw_reg_mcc_instruction { + MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01, + MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02, + MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT = 0x03, + MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT = 0x04, + MLXSW_REG_MCC_INSTRUCTION_ACTIVATE = 0x06, + MLXSW_REG_MCC_INSTRUCTION_CANCEL = 0x08, +}; + +/* reg_mcc_instruction + * Command to be executed by the FSM. + * Applicable for write operation only. + * Access: RW + */ +MLXSW_ITEM32(reg, mcc, instruction, 0x00, 0, 8); + +/* reg_mcc_component_index + * Index of the accessed component. Applicable only for commands that + * refer to components. Otherwise, this field is reserved. + * Access: Index + */ +MLXSW_ITEM32(reg, mcc, component_index, 0x04, 0, 16); + +/* reg_mcc_update_handle + * Token representing the current flow executed by the FSM. + * Access: WO + */ +MLXSW_ITEM32(reg, mcc, update_handle, 0x08, 0, 24); + +/* reg_mcc_error_code + * Indicates the successful completion of the instruction, or the reason it + * failed + * Access: RO + */ +MLXSW_ITEM32(reg, mcc, error_code, 0x0C, 8, 8); + +/* reg_mcc_control_state + * Current FSM state + * Access: RO + */ +MLXSW_ITEM32(reg, mcc, control_state, 0x0C, 0, 4); + +/* reg_mcc_component_size + * Component size in bytes. Valid for UPDATE_COMPONENT instruction. Specifying + * the size may shorten the update time. Value 0x0 means that size is + * unspecified. + * Access: WO + */ +MLXSW_ITEM32(reg, mcc, component_size, 0x10, 0, 32); + +static inline void mlxsw_reg_mcc_pack(char *payload, + enum mlxsw_reg_mcc_instruction instr, + u16 component_index, u32 update_handle, + u32 component_size) +{ + MLXSW_REG_ZERO(mcc, payload); + mlxsw_reg_mcc_instruction_set(payload, instr); + mlxsw_reg_mcc_component_index_set(payload, component_index); + mlxsw_reg_mcc_update_handle_set(payload, update_handle); + mlxsw_reg_mcc_component_size_set(payload, component_size); +} + +static inline void mlxsw_reg_mcc_unpack(char *payload, u32 *p_update_handle, + u8 *p_error_code, u8 *p_control_state) +{ + if (p_update_handle) + *p_update_handle = mlxsw_reg_mcc_update_handle_get(payload); + if (p_error_code) + *p_error_code = mlxsw_reg_mcc_error_code_get(payload); + if (p_control_state) + *p_control_state = mlxsw_reg_mcc_control_state_get(payload); +} + +/* MCDA - Management Component Data Access + * --------------------------------------- + * This register allows reading and writing a firmware component. + */ +#define MLXSW_REG_MCDA_ID 0x9063 +#define MLXSW_REG_MCDA_BASE_LEN 0x10 +#define MLXSW_REG_MCDA_MAX_DATA_LEN 0x80 +#define MLXSW_REG_MCDA_LEN \ + (MLXSW_REG_MCDA_BASE_LEN + MLXSW_REG_MCDA_MAX_DATA_LEN) + +MLXSW_REG_DEFINE(mcda, MLXSW_REG_MCDA_ID, MLXSW_REG_MCDA_LEN); + +/* reg_mcda_update_handle + * Token representing the current flow executed by the FSM. + * Access: RW + */ +MLXSW_ITEM32(reg, mcda, update_handle, 0x00, 0, 24); + +/* reg_mcda_offset + * Offset of accessed address relative to component start. Accesses must be in + * accordance to log_mcda_word_size in MCQI reg. + * Access: RW + */ +MLXSW_ITEM32(reg, mcda, offset, 0x04, 0, 32); + +/* reg_mcda_size + * Size of the data accessed, given in bytes. + * Access: RW + */ +MLXSW_ITEM32(reg, mcda, size, 0x08, 0, 16); + +/* reg_mcda_data + * Data block accessed. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, mcda, data, 0x10, 0, 32, 4, 0, false); + +static inline void mlxsw_reg_mcda_pack(char *payload, u32 update_handle, + u32 offset, u16 size, u8 *data) +{ + int i; + + MLXSW_REG_ZERO(mcda, payload); + mlxsw_reg_mcda_update_handle_set(payload, update_handle); + mlxsw_reg_mcda_offset_set(payload, offset); + mlxsw_reg_mcda_size_set(payload, size); + + for (i = 0; i < size / 4; i++) + mlxsw_reg_mcda_data_set(payload, i, *(u32 *) &data[i * 4]); +} + /* MPSC - Monitoring Packet Sampling Configuration Register * -------------------------------------------------------- * MPSC Register is used to configure the Packet Sampling mechanism. @@ -5691,15 +6379,6 @@ static inline void mlxsw_reg_mpsc_pack(char *payload, u8 local_port, bool e, MLXSW_REG_DEFINE(mgpc, MLXSW_REG_MGPC_ID, MLXSW_REG_MGPC_LEN); -enum mlxsw_reg_mgpc_counter_set_type { - /* No count */ - MLXSW_REG_MGPC_COUNTER_SET_TYPE_NO_COUT = 0x00, - /* Count packets and bytes */ - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, - /* Count only packets */ - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS = 0x05, -}; - /* reg_mgpc_counter_set_type * Counter set type. * Access: OP @@ -5739,7 +6418,7 @@ MLXSW_ITEM64(reg, mgpc, packet_counter, 0x10, 0, 64); static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index, enum mlxsw_reg_mgpc_opcode opcode, - enum mlxsw_reg_mgpc_counter_set_type set_type) + enum mlxsw_reg_flow_counter_set_type set_type) { MLXSW_REG_ZERO(mgpc, payload); mlxsw_reg_mgpc_counter_index_set(payload, counter_index); @@ -5747,6 +6426,36 @@ static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index, mlxsw_reg_mgpc_opcode_set(payload, opcode); } +/* TIGCR - Tunneling IPinIP General Configuration Register + * ------------------------------------------------------- + * The TIGCR register is used for setting up the IPinIP Tunnel configuration. + */ +#define MLXSW_REG_TIGCR_ID 0xA801 +#define MLXSW_REG_TIGCR_LEN 0x10 + +MLXSW_REG_DEFINE(tigcr, MLXSW_REG_TIGCR_ID, MLXSW_REG_TIGCR_LEN); + +/* reg_tigcr_ipip_ttlc + * For IPinIP Tunnel encapsulation: whether to copy the ttl from the packet + * header. + * Access: RW + */ +MLXSW_ITEM32(reg, tigcr, ttlc, 0x04, 8, 1); + +/* reg_tigcr_ipip_ttl_uc + * The TTL for IPinIP Tunnel encapsulation of unicast packets if + * reg_tigcr_ipip_ttlc is unset. + * Access: RW + */ +MLXSW_ITEM32(reg, tigcr, ttl_uc, 0x04, 0, 8); + +static inline void mlxsw_reg_tigcr_pack(char *payload, bool ttlc, u8 ttl_uc) +{ + MLXSW_REG_ZERO(tigcr, payload); + mlxsw_reg_tigcr_ttlc_set(payload, ttlc); + mlxsw_reg_tigcr_ttl_uc_set(payload, ttl_uc); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -6203,6 +6912,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rgcr), MLXSW_REG(ritr), MLXSW_REG(ratr), + MLXSW_REG(rtdp), MLXSW_REG(ricnt), MLXSW_REG(ralta), MLXSW_REG(ralst), @@ -6217,11 +6927,16 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mfsl), MLXSW_REG(mtcap), MLXSW_REG(mtmp), + MLXSW_REG(mcia), MLXSW_REG(mpat), MLXSW_REG(mpar), MLXSW_REG(mlcr), MLXSW_REG(mpsc), + MLXSW_REG(mcqi), + MLXSW_REG(mcc), + MLXSW_REG(mcda), MLXSW_REG(mgpc), + MLXSW_REG(tigcr), MLXSW_REG(sbpr), MLXSW_REG(sbcm), MLXSW_REG(sbpm), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 88357cee7679..696b99e65a5a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -58,6 +58,7 @@ #include <net/tc_act/tc_mirred.h> #include <net/netevent.h> #include <net/tc_act/tc_sample.h> +#include <net/addrconf.h> #include "spectrum.h" #include "pci.h" @@ -68,6 +69,22 @@ #include "txheader.h" #include "spectrum_cnt.h" #include "spectrum_dpipe.h" +#include "../mlxfw/mlxfw.h" + +#define MLXSW_FWREV_MAJOR 13 +#define MLXSW_FWREV_MINOR 1420 +#define MLXSW_FWREV_SUBMINOR 122 + +static const struct mlxsw_fw_rev mlxsw_sp_supported_fw_rev = { + .major = MLXSW_FWREV_MAJOR, + .minor = MLXSW_FWREV_MINOR, + .subminor = MLXSW_FWREV_SUBMINOR +}; + +#define MLXSW_SP_FW_FILENAME \ + "mellanox/mlxsw_spectrum-" __stringify(MLXSW_FWREV_MAJOR) \ + "." __stringify(MLXSW_FWREV_MINOR) \ + "." __stringify(MLXSW_FWREV_SUBMINOR) ".mfa2" static const char mlxsw_sp_driver_name[] = "mlxsw_spectrum"; static const char mlxsw_sp_driver_version[] = "1.0"; @@ -140,6 +157,223 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16); */ MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); +struct mlxsw_sp_mlxfw_dev { + struct mlxfw_dev mlxfw_dev; + struct mlxsw_sp *mlxsw_sp; +}; + +static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev, + u16 component_index, u32 *p_max_size, + u8 *p_align_bits, u16 *p_max_write_size) +{ + struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = + container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; + char mcqi_pl[MLXSW_REG_MCQI_LEN]; + int err; + + mlxsw_reg_mcqi_pack(mcqi_pl, component_index); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcqi), mcqi_pl); + if (err) + return err; + mlxsw_reg_mcqi_unpack(mcqi_pl, p_max_size, p_align_bits, + p_max_write_size); + + *p_align_bits = max_t(u8, *p_align_bits, 2); + *p_max_write_size = min_t(u16, *p_max_write_size, + MLXSW_REG_MCDA_MAX_DATA_LEN); + return 0; +} + +static int mlxsw_sp_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) +{ + struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = + container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; + char mcc_pl[MLXSW_REG_MCC_LEN]; + u8 control_state; + int err; + + mlxsw_reg_mcc_pack(mcc_pl, 0, 0, 0, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); + if (err) + return err; + + mlxsw_reg_mcc_unpack(mcc_pl, fwhandle, NULL, &control_state); + if (control_state != MLXFW_FSM_STATE_IDLE) + return -EBUSY; + + mlxsw_reg_mcc_pack(mcc_pl, + MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, + 0, *fwhandle, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_sp_fsm_component_update(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, u16 component_index, + u32 component_size) +{ + struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = + container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT, + component_index, fwhandle, component_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_sp_fsm_block_download(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, u8 *data, u16 size, + u32 offset) +{ + struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = + container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; + char mcda_pl[MLXSW_REG_MCDA_LEN]; + + mlxsw_reg_mcda_pack(mcda_pl, fwhandle, offset, size, data); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcda), mcda_pl); +} + +static int mlxsw_sp_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, + u32 fwhandle, u16 component_index) +{ + struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = + container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT, + component_index, fwhandle, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_sp_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = + container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_ACTIVATE, 0, + fwhandle, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); +} + +static int mlxsw_sp_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err) +{ + struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = + container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; + char mcc_pl[MLXSW_REG_MCC_LEN]; + u8 control_state; + u8 error_code; + int err; + + mlxsw_reg_mcc_pack(mcc_pl, 0, 0, fwhandle, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); + if (err) + return err; + + mlxsw_reg_mcc_unpack(mcc_pl, NULL, &error_code, &control_state); + *fsm_state = control_state; + *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, + MLXFW_FSM_STATE_ERR_MAX); + return 0; +} + +static void mlxsw_sp_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = + container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_CANCEL, 0, + fwhandle, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); +} + +static void mlxsw_sp_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = + container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; + char mcc_pl[MLXSW_REG_MCC_LEN]; + + mlxsw_reg_mcc_pack(mcc_pl, + MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0, + fwhandle, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); +} + +static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = { + .component_query = mlxsw_sp_component_query, + .fsm_lock = mlxsw_sp_fsm_lock, + .fsm_component_update = mlxsw_sp_fsm_component_update, + .fsm_block_download = mlxsw_sp_fsm_block_download, + .fsm_component_verify = mlxsw_sp_fsm_component_verify, + .fsm_activate = mlxsw_sp_fsm_activate, + .fsm_query_state = mlxsw_sp_fsm_query_state, + .fsm_cancel = mlxsw_sp_fsm_cancel, + .fsm_release = mlxsw_sp_fsm_release +}; + +static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, + const struct firmware *firmware) +{ + struct mlxsw_sp_mlxfw_dev mlxsw_sp_mlxfw_dev = { + .mlxfw_dev = { + .ops = &mlxsw_sp_mlxfw_dev_ops, + .psid = mlxsw_sp->bus_info->psid, + .psid_size = strlen(mlxsw_sp->bus_info->psid), + }, + .mlxsw_sp = mlxsw_sp + }; + + return mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware); +} + +static bool mlxsw_sp_fw_rev_ge(const struct mlxsw_fw_rev *a, + const struct mlxsw_fw_rev *b) +{ + if (a->major != b->major) + return a->major > b->major; + if (a->minor != b->minor) + return a->minor > b->minor; + return a->subminor >= b->subminor; +} + +static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) +{ + const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev; + const struct firmware *firmware; + int err; + + if (mlxsw_sp_fw_rev_ge(rev, &mlxsw_sp_supported_fw_rev)) + return 0; + + dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d out of data\n", + rev->major, rev->minor, rev->subminor); + dev_info(mlxsw_sp->bus_info->dev, "Upgrading firmware using file %s\n", + MLXSW_SP_FW_FILENAME); + + err = request_firmware_direct(&firmware, MLXSW_SP_FW_FILENAME, + mlxsw_sp->bus_info->dev); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Could not request firmware file %s\n", + MLXSW_SP_FW_FILENAME); + return err; + } + + err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware); + release_firmware(firmware); + return err; +} + int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index, u64 *packets, u64 *bytes) @@ -148,12 +382,14 @@ int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, int err; mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP, - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES); + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); if (err) return err; - *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl); - *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl); + if (packets) + *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl); + if (bytes) + *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl); return 0; } @@ -163,7 +399,7 @@ static int mlxsw_sp_flow_counter_clear(struct mlxsw_sp *mlxsw_sp, char mgpc_pl[MLXSW_REG_MGPC_LEN]; mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_CLEAR, - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES); + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); } @@ -210,6 +446,41 @@ static void mlxsw_sp_txhdr_construct(struct sk_buff *skb, mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL); } +int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + u8 state) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + enum mlxsw_reg_spms_state spms_state; + char *spms_pl; + int err; + + switch (state) { + case BR_STATE_FORWARDING: + spms_state = MLXSW_REG_SPMS_STATE_FORWARDING; + break; + case BR_STATE_LEARNING: + spms_state = MLXSW_REG_SPMS_STATE_LEARNING; + break; + case BR_STATE_LISTENING: /* fall-through */ + case BR_STATE_DISABLED: /* fall-through */ + case BR_STATE_BLOCKING: + spms_state = MLXSW_REG_SPMS_STATE_DISCARDING; + break; + default: + BUG(); + } + + spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); + if (!spms_pl) + return -ENOMEM; + mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); + mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); + kfree(spms_pl); + return err; +} + static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp) { char spad_pl[MLXSW_REG_SPAD_LEN] = {0}; @@ -304,15 +575,14 @@ static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) +mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port) { - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; int i; for (i = 0; i < mlxsw_sp->span.entries_count; i++) { struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; - if (curr->used && curr->local_port == port->local_port) + if (curr->used && curr->local_port == local_port) return curr; } return NULL; @@ -323,7 +593,8 @@ static struct mlxsw_sp_span_entry { struct mlxsw_sp_span_entry *span_entry; - span_entry = mlxsw_sp_span_entry_find(port); + span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp, + port->local_port); if (span_entry) { /* Already exists, just take a reference */ span_entry->ref_count++; @@ -512,12 +783,13 @@ err_port_bind: } static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from, - struct mlxsw_sp_port *to, + u8 destination_port, enum mlxsw_sp_span_type type) { struct mlxsw_sp_span_entry *span_entry; - span_entry = mlxsw_sp_span_entry_find(to); + span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp, + destination_port); if (!span_entry) { netdev_err(from->dev, "no span entry found\n"); return; @@ -592,25 +864,16 @@ static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); } -static int __mlxsw_sp_port_swid_set(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u8 swid) -{ - char pspa_pl[MLXSW_REG_PSPA_LEN]; - - mlxsw_reg_pspa_pack(pspa_pl, swid, local_port); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl); -} - static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char pspa_pl[MLXSW_REG_PSPA_LEN]; - return __mlxsw_sp_port_swid_set(mlxsw_sp, mlxsw_sp_port->local_port, - swid); + mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl); } -static int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, - bool enable) +int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char svpe_pl[MLXSW_REG_SVPE_LEN]; @@ -619,21 +882,8 @@ static int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svpe), svpe_pl); } -int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port, - enum mlxsw_reg_svfa_mt mt, bool valid, u16 fid, - u16 vid) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char svfa_pl[MLXSW_REG_SVFA_LEN]; - - mlxsw_reg_svfa_pack(svfa_pl, mlxsw_sp_port->local_port, mt, valid, - fid, vid); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); -} - -int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid_begin, u16 vid_end, - bool learn_enable) +int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + bool learn_enable) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *spvmlr_pl; @@ -642,18 +892,56 @@ int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, spvmlr_pl = kmalloc(MLXSW_REG_SPVMLR_LEN, GFP_KERNEL); if (!spvmlr_pl) return -ENOMEM; - mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid_begin, - vid_end, learn_enable); + mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid, vid, + learn_enable); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvmlr), spvmlr_pl); kfree(spvmlr_pl); return err; } -static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid, bool learn_enable) +static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char spvid_pl[MLXSW_REG_SPVID_LEN]; + + mlxsw_reg_spvid_pack(spvid_pl, mlxsw_sp_port->local_port, vid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl); +} + +static int mlxsw_sp_port_allow_untagged_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool allow) { - return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, - learn_enable); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char spaft_pl[MLXSW_REG_SPAFT_LEN]; + + mlxsw_reg_spaft_pack(spaft_pl, mlxsw_sp_port->local_port, allow); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spaft), spaft_pl); +} + +int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + int err; + + if (!vid) { + err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, false); + if (err) + return err; + } else { + err = __mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid); + if (err) + return err; + err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, true); + if (err) + goto err_port_allow_untagged_set; + } + + mlxsw_sp_port->pvid = vid; + return 0; + +err_port_allow_untagged_set: + __mlxsw_sp_port_pvid_set(mlxsw_sp_port, mlxsw_sp_port->pvid); + return err; } static int @@ -683,13 +971,14 @@ static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_port_module_map(struct mlxsw_sp *mlxsw_sp, u8 local_port, +static int mlxsw_sp_port_module_map(struct mlxsw_sp_port *mlxsw_sp_port, u8 module, u8 width, u8 lane) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pmlp_pl[MLXSW_REG_PMLP_LEN]; int i; - mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sp_port->local_port); mlxsw_reg_pmlp_width_set(pmlp_pl, width); for (i = 0; i < width; i++) { mlxsw_reg_pmlp_module_set(pmlp_pl, i, module); @@ -699,11 +988,12 @@ static int mlxsw_sp_port_module_map(struct mlxsw_sp *mlxsw_sp, u8 local_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl); } -static int mlxsw_sp_port_module_unmap(struct mlxsw_sp *mlxsw_sp, u8 local_port) +static int mlxsw_sp_port_module_unmap(struct mlxsw_sp_port *mlxsw_sp_port) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pmlp_pl[MLXSW_REG_PMLP_LEN]; - mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sp_port->local_port); mlxsw_reg_pmlp_width_set(pmlp_pl, 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl); } @@ -1100,95 +1390,82 @@ int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, return 0; } -static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) +static void mlxsw_sp_port_vlan_flush(struct mlxsw_sp_port *mlxsw_sp_port) { - enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; - u16 vid, last_visited_vid; - int err; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, *tmp; - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, true, vid, - vid); - if (err) { - last_visited_vid = vid; - goto err_port_vid_to_fid_set; - } - } - - err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true); - if (err) { - last_visited_vid = VLAN_N_VID; - goto err_port_vid_to_fid_set; - } - - return 0; - -err_port_vid_to_fid_set: - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, last_visited_vid) - mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, false, vid, - vid); - return err; + list_for_each_entry_safe(mlxsw_sp_port_vlan, tmp, + &mlxsw_sp_port->vlans_list, list) + mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); } -static int mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) +static struct mlxsw_sp_port_vlan * +mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) { - enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; - u16 vid; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + bool untagged = vid == 1; int err; - err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, untagged); if (err) - return err; + return ERR_PTR(err); - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, false, - vid, vid); - if (err) - return err; + mlxsw_sp_port_vlan = kzalloc(sizeof(*mlxsw_sp_port_vlan), GFP_KERNEL); + if (!mlxsw_sp_port_vlan) { + err = -ENOMEM; + goto err_port_vlan_alloc; } - return 0; + mlxsw_sp_port_vlan->mlxsw_sp_port = mlxsw_sp_port; + mlxsw_sp_port_vlan->vid = vid; + list_add(&mlxsw_sp_port_vlan->list, &mlxsw_sp_port->vlans_list); + + return mlxsw_sp_port_vlan; + +err_port_vlan_alloc: + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); + return ERR_PTR(err); } -static struct mlxsw_sp_port * -mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +static void +mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { - struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + u16 vid = mlxsw_sp_port_vlan->vid; - mlxsw_sp_vport = kzalloc(sizeof(*mlxsw_sp_vport), GFP_KERNEL); - if (!mlxsw_sp_vport) - return NULL; + list_del(&mlxsw_sp_port_vlan->list); + kfree(mlxsw_sp_port_vlan); + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); +} - /* dev will be set correctly after the VLAN device is linked - * with the real device. In case of bridge SELF invocation, dev - * will remain as is. - */ - mlxsw_sp_vport->dev = mlxsw_sp_port->dev; - mlxsw_sp_vport->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - mlxsw_sp_vport->local_port = mlxsw_sp_port->local_port; - mlxsw_sp_vport->stp_state = BR_STATE_FORWARDING; - mlxsw_sp_vport->lagged = mlxsw_sp_port->lagged; - mlxsw_sp_vport->lag_id = mlxsw_sp_port->lag_id; - mlxsw_sp_vport->vport.vid = vid; +struct mlxsw_sp_port_vlan * +mlxsw_sp_port_vlan_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - list_add(&mlxsw_sp_vport->vport.list, &mlxsw_sp_port->vports_list); + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (mlxsw_sp_port_vlan) + return mlxsw_sp_port_vlan; - return mlxsw_sp_vport; + return mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid); } -static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport) +void mlxsw_sp_port_vlan_put(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { - list_del(&mlxsw_sp_vport->vport.list); - kfree(mlxsw_sp_vport); + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + + if (mlxsw_sp_port_vlan->bridge_port) + mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); + else if (fid) + mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + + mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); } static int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp_port *mlxsw_sp_vport; - bool untagged = vid == 1; - int err; /* VLAN 0 is added to HW filter when device goes up, but it is * reserved in our case, so simply return. @@ -1196,43 +1473,14 @@ static int mlxsw_sp_port_add_vid(struct net_device *dev, if (!vid) return 0; - if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) - return 0; - - mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) - return -ENOMEM; - - /* When adding the first VLAN interface on a bridged port we need to - * transition all the active 802.1Q bridge VLANs to use explicit - * {Port, VID} to FID mappings and set the port's mode to Virtual mode. - */ - if (list_is_singular(&mlxsw_sp_port->vports_list)) { - err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); - if (err) - goto err_port_vp_mode_trans; - } - - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged); - if (err) - goto err_port_add_vid; - - return 0; - -err_port_add_vid: - if (list_is_singular(&mlxsw_sp_port->vports_list)) - mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); -err_port_vp_mode_trans: - mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); - return err; + return PTR_ERR_OR_ZERO(mlxsw_sp_port_vlan_get(mlxsw_sp_port, vid)); } static int mlxsw_sp_port_kill_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp_port *mlxsw_sp_vport; - struct mlxsw_sp_fid *f; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; /* VLAN 0 is removed from HW filter when device goes down, but * it is reserved in our case, so simply return. @@ -1240,27 +1488,10 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, if (!vid) return 0; - mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (WARN_ON(!mlxsw_sp_vport)) + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (!mlxsw_sp_port_vlan) return 0; - - mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); - - /* Drop FID reference. If this was the last reference the - * resources will be freed. - */ - f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); - if (f && !WARN_ON(!f->leave)) - f->leave(mlxsw_sp_vport); - - /* When removing the last VLAN interface on a bridged port we need to - * transition all active 802.1Q bridge VLANs to use VID to FID - * mappings and set port's mode to VLAN mode. - */ - if (list_is_singular(&mlxsw_sp_port->vports_list)) - mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); - - mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); + mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); return 0; } @@ -1333,14 +1564,12 @@ static void mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_port_mall_mirror_tc_entry *mirror) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; enum mlxsw_sp_span_type span_type; - struct mlxsw_sp_port *to_port; - to_port = mlxsw_sp->ports[mirror->to_local_port]; span_type = mirror->ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; - mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type); + mlxsw_sp_span_mirror_remove(mlxsw_sp_port, mirror->to_local_port, + span_type); } static int @@ -1389,16 +1618,16 @@ mlxsw_sp_port_del_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port) } static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, - __be16 protocol, - struct tc_cls_matchall_offload *cls, + struct tc_cls_matchall_offload *f, bool ingress) { struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; + __be16 protocol = f->common.protocol; const struct tc_action *a; LIST_HEAD(actions); int err; - if (!tc_single_action(cls->exts)) { + if (!tcf_exts_has_one_action(f->exts)) { netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n"); return -EOPNOTSUPP; } @@ -1406,9 +1635,9 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); if (!mall_tc_entry) return -ENOMEM; - mall_tc_entry->cookie = cls->cookie; + mall_tc_entry->cookie = f->cookie; - tcf_exts_to_list(cls->exts, &actions); + tcf_exts_to_list(f->exts, &actions); a = list_first_entry(&actions, struct tc_action, list); if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { @@ -1420,7 +1649,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, mirror, a, ingress); } else if (is_tcf_sample(a) && protocol == htons(ETH_P_ALL)) { mall_tc_entry->type = MLXSW_SP_PORT_MALL_SAMPLE; - err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, cls, + err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, f, a, ingress); } else { err = -EOPNOTSUPP; @@ -1438,12 +1667,12 @@ err_add_action: } static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_cls_matchall_offload *cls) + struct tc_cls_matchall_offload *f) { struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; mall_tc_entry = mlxsw_sp_port_mall_tc_entry_find(mlxsw_sp_port, - cls->cookie); + f->cookie); if (!mall_tc_entry) { netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n"); return; @@ -1465,45 +1694,72 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, kfree(mall_tc_entry); } -static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle, - __be16 proto, struct tc_to_netdev *tc) +static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_cls_matchall_offload *f) +{ + bool ingress; + + if (is_classid_clsact_ingress(f->common.classid)) + ingress = true; + else if (is_classid_clsact_egress(f->common.classid)) + ingress = false; + else + return -EOPNOTSUPP; + + if (f->common.chain_index) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_CLSMATCHALL_REPLACE: + return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, f, + ingress); + case TC_CLSMATCHALL_DESTROY: + mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, f); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int +mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_cls_flower_offload *f) +{ + bool ingress; + + if (is_classid_clsact_ingress(f->common.classid)) + ingress = true; + else if (is_classid_clsact_egress(f->common.classid)) + ingress = false; + else + return -EOPNOTSUPP; + + switch (f->command) { + case TC_CLSFLOWER_REPLACE: + return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, f); + case TC_CLSFLOWER_DESTROY: + mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, f); + return 0; + case TC_CLSFLOWER_STATS: + return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress, f); + default: + return -EOPNOTSUPP; + } +} + +static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS); - - switch (tc->type) { - case TC_SETUP_MATCHALL: - switch (tc->cls_mall->command) { - case TC_CLSMATCHALL_REPLACE: - return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, - proto, - tc->cls_mall, - ingress); - case TC_CLSMATCHALL_DESTROY: - mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, - tc->cls_mall); - return 0; - default: - return -EOPNOTSUPP; - } + + switch (type) { + case TC_SETUP_CLSMATCHALL: + return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data); case TC_SETUP_CLSFLOWER: - switch (tc->cls_flower->command) { - case TC_CLSFLOWER_REPLACE: - return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, - proto, tc->cls_flower); - case TC_CLSFLOWER_DESTROY: - mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, - tc->cls_flower); - return 0; - case TC_CLSFLOWER_STATS: - return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress, - tc->cls_flower); - default: - return -EOPNOTSUPP; - } + return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data); + default: + return -EOPNOTSUPP; } - - return -EOPNOTSUPP; } static const struct net_device_ops mlxsw_sp_port_netdev_ops = { @@ -1519,12 +1775,6 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_get_offload_stats = mlxsw_sp_port_get_offload_stats, .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, - .ndo_fdb_add = switchdev_port_fdb_add, - .ndo_fdb_del = switchdev_port_fdb_del, - .ndo_fdb_dump = switchdev_port_fdb_dump, - .ndo_bridge_setlink = switchdev_port_bridge_setlink, - .ndo_bridge_getlink = switchdev_port_bridge_getlink, - .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_get_phys_port_name = mlxsw_sp_port_get_phys_port_name, }; @@ -2269,6 +2519,175 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, return 0; } +static int mlxsw_sp_flash_device(struct net_device *dev, + struct ethtool_flash *flash) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + const struct firmware *firmware; + int err; + + if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) + return -EOPNOTSUPP; + + dev_hold(dev); + rtnl_unlock(); + + err = request_firmware_direct(&firmware, flash->data, &dev->dev); + if (err) + goto out; + err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware); + release_firmware(firmware); +out: + rtnl_lock(); + dev_put(dev); + return err; +} + +#define MLXSW_SP_I2C_ADDR_LOW 0x50 +#define MLXSW_SP_I2C_ADDR_HIGH 0x51 +#define MLXSW_SP_EEPROM_PAGE_LENGTH 256 + +static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port, + u16 offset, u16 size, void *data, + unsigned int *p_read_size) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE]; + char mcia_pl[MLXSW_REG_MCIA_LEN]; + u16 i2c_addr; + int status; + int err; + + size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE); + + if (offset < MLXSW_SP_EEPROM_PAGE_LENGTH && + offset + size > MLXSW_SP_EEPROM_PAGE_LENGTH) + /* Cross pages read, read until offset 256 in low page */ + size = MLXSW_SP_EEPROM_PAGE_LENGTH - offset; + + i2c_addr = MLXSW_SP_I2C_ADDR_LOW; + if (offset >= MLXSW_SP_EEPROM_PAGE_LENGTH) { + i2c_addr = MLXSW_SP_I2C_ADDR_HIGH; + offset -= MLXSW_SP_EEPROM_PAGE_LENGTH; + } + + mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module, + 0, 0, offset, size, i2c_addr); + + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + + status = mlxsw_reg_mcia_status_get(mcia_pl); + if (status) + return -EIO; + + mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp); + memcpy(data, eeprom_tmp, size); + *p_read_size = size; + + return 0; +} + +enum mlxsw_sp_eeprom_module_info_rev_id { + MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_UNSPC = 0x00, + MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8436 = 0x01, + MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636 = 0x03, +}; + +enum mlxsw_sp_eeprom_module_info_id { + MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP = 0x03, + MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP = 0x0C, + MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D, + MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11, +}; + +enum mlxsw_sp_eeprom_module_info { + MLXSW_SP_EEPROM_MODULE_INFO_ID, + MLXSW_SP_EEPROM_MODULE_INFO_REV_ID, + MLXSW_SP_EEPROM_MODULE_INFO_SIZE, +}; + +static int mlxsw_sp_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); + u8 module_info[MLXSW_SP_EEPROM_MODULE_INFO_SIZE]; + u8 module_rev_id, module_id; + unsigned int read_size; + int err; + + err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0, + MLXSW_SP_EEPROM_MODULE_INFO_SIZE, + module_info, &read_size); + if (err) + return err; + + if (read_size < MLXSW_SP_EEPROM_MODULE_INFO_SIZE) + return -EIO; + + module_rev_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_REV_ID]; + module_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_ID]; + + switch (module_id) { + case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS: + case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28: + if (module_id == MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 || + module_rev_id >= MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636) { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + } + break; + case MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); + int offset = ee->offset; + unsigned int read_size; + int i = 0; + int err; + + if (!ee->len) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, offset, + ee->len - i, data + i, + &read_size); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Eeprom query failed\n"); + return err; + } + + i += read_size; + offset += read_size; + } + + return 0; +} + static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, @@ -2280,6 +2699,9 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_sset_count = mlxsw_sp_port_get_sset_count, .get_link_ksettings = mlxsw_sp_port_get_link_ksettings, .set_link_ksettings = mlxsw_sp_port_set_link_ksettings, + .flash_device = mlxsw_sp_flash_device, + .get_module_info = mlxsw_sp_get_module_info, + .get_module_eeprom = mlxsw_sp_get_module_eeprom, }; static int @@ -2398,51 +2820,38 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -static int mlxsw_sp_port_pvid_vport_create(struct mlxsw_sp_port *mlxsw_sp_port) -{ - mlxsw_sp_port->pvid = 1; - - return mlxsw_sp_port_add_vid(mlxsw_sp_port->dev, 0, 1); -} - -static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port) -{ - return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); -} - -static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width, u8 lane) +static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, + bool split, u8 module, u8 width, u8 lane) { + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *dev; - size_t bytes; int err; + err = mlxsw_core_port_init(mlxsw_sp->core, local_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n", + local_port); + return err; + } + dev = alloc_etherdev(sizeof(struct mlxsw_sp_port)); - if (!dev) - return -ENOMEM; + if (!dev) { + err = -ENOMEM; + goto err_alloc_etherdev; + } SET_NETDEV_DEV(dev, mlxsw_sp->bus_info->dev); mlxsw_sp_port = netdev_priv(dev); mlxsw_sp_port->dev = dev; mlxsw_sp_port->mlxsw_sp = mlxsw_sp; mlxsw_sp_port->local_port = local_port; + mlxsw_sp_port->pvid = 1; mlxsw_sp_port->split = split; mlxsw_sp_port->mapping.module = module; mlxsw_sp_port->mapping.width = width; mlxsw_sp_port->mapping.lane = lane; mlxsw_sp_port->link.autoneg = 1; - bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE); - mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL); - if (!mlxsw_sp_port->active_vlans) { - err = -ENOMEM; - goto err_port_active_vlans_alloc; - } - mlxsw_sp_port->untagged_vlans = kzalloc(bytes, GFP_KERNEL); - if (!mlxsw_sp_port->untagged_vlans) { - err = -ENOMEM; - goto err_port_untagged_vlans_alloc; - } - INIT_LIST_HEAD(&mlxsw_sp_port->vports_list); + INIT_LIST_HEAD(&mlxsw_sp_port->vlans_list); INIT_LIST_HEAD(&mlxsw_sp_port->mall_tc_list); mlxsw_sp_port->pcpu_stats = @@ -2472,6 +2881,13 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, dev->netdev_ops = &mlxsw_sp_port_netdev_ops; dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops; + err = mlxsw_sp_port_module_map(mlxsw_sp_port, module, width, lane); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to map module\n", + mlxsw_sp_port->local_port); + goto err_port_module_map; + } + err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n", @@ -2547,11 +2963,18 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_dcb_init; } - err = mlxsw_sp_port_pvid_vport_create(mlxsw_sp_port); + err = mlxsw_sp_port_fids_init(mlxsw_sp_port); if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create PVID vPort\n", + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize FIDs\n", mlxsw_sp_port->local_port); - goto err_port_pvid_vport_create; + goto err_port_fids_init; + } + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1); + if (IS_ERR(mlxsw_sp_port_vlan)) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n", + mlxsw_sp_port->local_port); + goto err_port_vlan_get; } mlxsw_sp_port_switchdev_init(mlxsw_sp_port); @@ -2572,8 +2995,10 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, err_register_netdev: mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); - mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); -err_port_pvid_vport_create: + mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); +err_port_vlan_get: + mlxsw_sp_port_fids_fini(mlxsw_sp_port); +err_port_fids_init: mlxsw_sp_port_dcb_fini(mlxsw_sp_port); err_port_dcb_init: err_port_ets_init: @@ -2585,43 +3010,21 @@ err_port_system_port_mapping_set: err_dev_addr_init: mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); err_port_swid_set: + mlxsw_sp_port_module_unmap(mlxsw_sp_port); +err_port_module_map: kfree(mlxsw_sp_port->hw_stats.cache); err_alloc_hw_stats: kfree(mlxsw_sp_port->sample); err_alloc_sample: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: - kfree(mlxsw_sp_port->untagged_vlans); -err_port_untagged_vlans_alloc: - kfree(mlxsw_sp_port->active_vlans); -err_port_active_vlans_alloc: free_netdev(dev); - return err; -} - -static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width, u8 lane) -{ - int err; - - err = mlxsw_core_port_init(mlxsw_sp->core, local_port); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n", - local_port); - return err; - } - err = __mlxsw_sp_port_create(mlxsw_sp, local_port, split, - module, width, lane); - if (err) - goto err_port_create; - return 0; - -err_port_create: +err_alloc_etherdev: mlxsw_core_port_fini(mlxsw_sp->core, local_port); return err; } -static void __mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) +static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; @@ -2630,22 +3033,16 @@ static void __mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); - mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); + mlxsw_sp_port_vlan_flush(mlxsw_sp_port); + mlxsw_sp_port_fids_fini(mlxsw_sp_port); mlxsw_sp_port_dcb_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); - mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); + mlxsw_sp_port_module_unmap(mlxsw_sp_port); kfree(mlxsw_sp_port->hw_stats.cache); kfree(mlxsw_sp_port->sample); free_percpu(mlxsw_sp_port->pcpu_stats); - kfree(mlxsw_sp_port->untagged_vlans); - kfree(mlxsw_sp_port->active_vlans); - WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list)); + WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vlans_list)); free_netdev(mlxsw_sp_port->dev); -} - -static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) -{ - __mlxsw_sp_port_remove(mlxsw_sp, local_port); mlxsw_core_port_fini(mlxsw_sp->core, local_port); } @@ -2724,19 +3121,6 @@ static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port, int err, i; for (i = 0; i < count; i++) { - err = mlxsw_sp_port_module_map(mlxsw_sp, base_port + i, module, - width, i * width); - if (err) - goto err_port_module_map; - } - - for (i = 0; i < count; i++) { - err = __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, 0); - if (err) - goto err_port_swid_set; - } - - for (i = 0; i < count; i++) { err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, module, width, i * width); if (err) @@ -2749,15 +3133,6 @@ err_port_create: for (i--; i >= 0; i--) if (mlxsw_sp_port_created(mlxsw_sp, base_port + i)) mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - i = count; -err_port_swid_set: - for (i--; i >= 0; i--) - __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, - MLXSW_PORT_SWID_DISABLED_PORT); - i = count; -err_port_module_map: - for (i--; i >= 0; i--) - mlxsw_sp_port_module_unmap(mlxsw_sp, base_port + i); return err; } @@ -2776,17 +3151,6 @@ static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp, local_port = base_port + i * 2; module = mlxsw_sp->port_to_module[local_port]; - mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width, - 0); - } - - for (i = 0; i < count; i++) - __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i * 2, 0); - - for (i = 0; i < count; i++) { - local_port = base_port + i * 2; - module = mlxsw_sp->port_to_module[local_port]; - mlxsw_sp_port_create(mlxsw_sp, local_port, false, module, width, 0); } @@ -3009,18 +3373,53 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false), + MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD, + false), /* L3 traps */ - MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_NO_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(OSPF, TRAP_TO_CPU, OSPF, false), - MLXSW_SP_RXL_NO_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), - MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false), - MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false), - MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false), + MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), + MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false), + MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false), + MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false), + MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false), + MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false), + MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false), + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false), + MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false), + MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false), + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false), /* PKT Sample trap */ MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU, - false, SP_IP2ME, DISCARD) + false, SP_IP2ME, DISCARD), + /* ACL trap */ + MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, IP2ME, false), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) @@ -3050,15 +3449,17 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) burst_size = 7; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD: rate = 16 * 1024; burst_size = 10; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: rate = 1024; burst_size = 7; break; @@ -3107,21 +3508,23 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) priority = 5; tc = 5; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: priority = 4; tc = 4; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD: priority = 3; tc = 3; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: priority = 2; tc = 2; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: priority = 1; @@ -3192,57 +3595,6 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) } } -static int __mlxsw_sp_flood_init(struct mlxsw_core *mlxsw_core, - enum mlxsw_reg_sfgc_type type, - enum mlxsw_reg_sfgc_bridge_type bridge_type) -{ - enum mlxsw_flood_table_type table_type; - enum mlxsw_sp_flood_table flood_table; - char sfgc_pl[MLXSW_REG_SFGC_LEN]; - - if (bridge_type == MLXSW_REG_SFGC_BRIDGE_TYPE_VFID) - table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID; - else - table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; - - switch (type) { - case MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST: - flood_table = MLXSW_SP_FLOOD_TABLE_UC; - break; - case MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4: - flood_table = MLXSW_SP_FLOOD_TABLE_MC; - break; - default: - flood_table = MLXSW_SP_FLOOD_TABLE_BC; - } - - mlxsw_reg_sfgc_pack(sfgc_pl, type, bridge_type, table_type, - flood_table); - return mlxsw_reg_write(mlxsw_core, MLXSW_REG(sfgc), sfgc_pl); -} - -static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) -{ - int type, err; - - for (type = 0; type < MLXSW_REG_SFGC_TYPE_MAX; type++) { - if (type == MLXSW_REG_SFGC_TYPE_RESERVED) - continue; - - err = __mlxsw_sp_flood_init(mlxsw_sp->core, type, - MLXSW_REG_SFGC_BRIDGE_TYPE_VFID); - if (err) - return err; - - err = __mlxsw_sp_flood_init(mlxsw_sp->core, type, - MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID); - if (err) - return err; - } - - return 0; -} - static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) { char slcr_pl[MLXSW_REG_SLCR_LEN]; @@ -3290,18 +3642,6 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } -static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create); - -static int mlxsw_sp_dummy_fid_init(struct mlxsw_sp *mlxsw_sp) -{ - return mlxsw_sp_vfid_op(mlxsw_sp, MLXSW_SP_DUMMY_FID, true); -} - -static void mlxsw_sp_dummy_fid_fini(struct mlxsw_sp *mlxsw_sp) -{ - mlxsw_sp_vfid_op(mlxsw_sp, MLXSW_SP_DUMMY_FID, false); -} - static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -3310,9 +3650,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->core = mlxsw_core; mlxsw_sp->bus_info = mlxsw_bus_info; - INIT_LIST_HEAD(&mlxsw_sp->fids); - INIT_LIST_HEAD(&mlxsw_sp->vfids.list); - INIT_LIST_HEAD(&mlxsw_sp->br_mids.list); + + err = mlxsw_sp_fw_rev_validate(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Could not upgrade firmware\n"); + return err; + } err = mlxsw_sp_base_mac_get(mlxsw_sp); if (err) { @@ -3320,16 +3663,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, return err; } - err = mlxsw_sp_traps_init(mlxsw_sp); + err = mlxsw_sp_fids_init(mlxsw_sp); if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to set traps\n"); + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize FIDs\n"); return err; } - err = mlxsw_sp_flood_init(mlxsw_sp); + err = mlxsw_sp_traps_init(mlxsw_sp); if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize flood tables\n"); - goto err_flood_init; + dev_err(mlxsw_sp->bus_info->dev, "Failed to set traps\n"); + goto err_traps_init; } err = mlxsw_sp_buffers_init(mlxsw_sp); @@ -3380,12 +3723,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_dpipe_init; } - err = mlxsw_sp_dummy_fid_init(mlxsw_sp); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to init dummy FID\n"); - goto err_dummy_fid_init; - } - err = mlxsw_sp_ports_create(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); @@ -3395,8 +3732,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, return 0; err_ports_create: - mlxsw_sp_dummy_fid_fini(mlxsw_sp); -err_dummy_fid_init: mlxsw_sp_dpipe_fini(mlxsw_sp); err_dpipe_init: mlxsw_sp_counter_pool_fini(mlxsw_sp); @@ -3413,8 +3748,9 @@ err_switchdev_init: err_lag_init: mlxsw_sp_buffers_fini(mlxsw_sp); err_buffers_init: -err_flood_init: mlxsw_sp_traps_fini(mlxsw_sp); +err_traps_init: + mlxsw_sp_fids_fini(mlxsw_sp); return err; } @@ -3423,7 +3759,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); mlxsw_sp_ports_remove(mlxsw_sp); - mlxsw_sp_dummy_fid_fini(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); @@ -3433,11 +3768,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); - WARN_ON(!list_empty(&mlxsw_sp->vfids.list)); - WARN_ON(!list_empty(&mlxsw_sp->fids)); + mlxsw_sp_fids_fini(mlxsw_sp); } -static struct mlxsw_config_profile mlxsw_sp_config_profile = { +static const struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, .used_max_mid = 1, @@ -3450,7 +3784,7 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_fid_offset_flood_tables = 3, .fid_offset_flood_table_size = VLAN_N_VID - 1, .max_fid_flood_tables = 3, - .fid_flood_table_size = MLXSW_SP_VFID_MAX, + .fid_flood_table_size = MLXSW_SP_FID_8021D_MAX, .used_max_ib_mc = 1, .max_ib_mc = 0, .used_max_pkey = 1, @@ -3510,7 +3844,7 @@ static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, void *data) return ret; } -static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) +struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) { struct mlxsw_sp_port *mlxsw_sp_port; @@ -3531,7 +3865,7 @@ struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) return mlxsw_sp_port ? mlxsw_sp_port->mlxsw_sp : NULL; } -static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) +struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) { struct mlxsw_sp_port *mlxsw_sp_port; @@ -3562,176 +3896,6 @@ void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) dev_put(mlxsw_sp_port->dev); } -static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port, - u16 fid) -{ - if (mlxsw_sp_fid_is_vfid(fid)) - return mlxsw_sp_port_vport_find_by_fid(lag_port, fid); - else - return test_bit(fid, lag_port->active_vlans); -} - -static bool mlxsw_sp_port_fdb_should_flush(struct mlxsw_sp_port *mlxsw_sp_port, - u16 fid) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u8 local_port = mlxsw_sp_port->local_port; - u16 lag_id = mlxsw_sp_port->lag_id; - u64 max_lag_members; - int i, count = 0; - - if (!mlxsw_sp_port->lagged) - return true; - - max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core, - MAX_LAG_MEMBERS); - for (i = 0; i < max_lag_members; i++) { - struct mlxsw_sp_port *lag_port; - - lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); - if (!lag_port || lag_port->local_port == local_port) - continue; - if (mlxsw_sp_lag_port_fid_member(lag_port, fid)) - count++; - } - - return !count; -} - -static int -mlxsw_sp_port_fdb_flush_by_port_fid(const struct mlxsw_sp_port *mlxsw_sp_port, - u16 fid) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char sfdf_pl[MLXSW_REG_SFDF_LEN]; - - mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID); - mlxsw_reg_sfdf_fid_set(sfdf_pl, fid); - mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl, - mlxsw_sp_port->local_port); - - netdev_dbg(mlxsw_sp_port->dev, "FDB flushed using Port=%d, FID=%d\n", - mlxsw_sp_port->local_port, fid); - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); -} - -static int -mlxsw_sp_port_fdb_flush_by_lag_id_fid(const struct mlxsw_sp_port *mlxsw_sp_port, - u16 fid) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char sfdf_pl[MLXSW_REG_SFDF_LEN]; - - mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID); - mlxsw_reg_sfdf_fid_set(sfdf_pl, fid); - mlxsw_reg_sfdf_lag_fid_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id); - - netdev_dbg(mlxsw_sp_port->dev, "FDB flushed using LAG ID=%d, FID=%d\n", - mlxsw_sp_port->lag_id, fid); - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); -} - -int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) -{ - if (!mlxsw_sp_port_fdb_should_flush(mlxsw_sp_port, fid)) - return 0; - - if (mlxsw_sp_port->lagged) - return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port, - fid); - else - return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, fid); -} - -static void mlxsw_sp_master_bridge_gone_sync(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_sp_fid *f, *tmp; - - list_for_each_entry_safe(f, tmp, &mlxsw_sp->fids, list) - if (--f->ref_count == 0) - mlxsw_sp_fid_destroy(mlxsw_sp, f); - else - WARN_ON_ONCE(1); -} - -static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) -{ - return !mlxsw_sp->master_bridge.dev || - mlxsw_sp->master_bridge.dev == br_dev; -} - -static void mlxsw_sp_master_bridge_inc(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) -{ - mlxsw_sp->master_bridge.dev = br_dev; - mlxsw_sp->master_bridge.ref_count++; -} - -static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp) -{ - if (--mlxsw_sp->master_bridge.ref_count == 0) { - mlxsw_sp->master_bridge.dev = NULL; - /* It's possible upper VLAN devices are still holding - * references to underlying FIDs. Drop the reference - * and release the resources if it was the last one. - * If it wasn't, then something bad happened. - */ - mlxsw_sp_master_bridge_gone_sync(mlxsw_sp); - } -} - -static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, - struct net_device *br_dev) -{ - struct net_device *dev = mlxsw_sp_port->dev; - int err; - - /* When port is not bridged untagged packets are tagged with - * PVID=VID=1, thereby creating an implicit VLAN interface in - * the device. Remove it and let bridge code take care of its - * own VLANs. - */ - err = mlxsw_sp_port_kill_vid(dev, 0, 1); - if (err) - return err; - - mlxsw_sp_master_bridge_inc(mlxsw_sp_port->mlxsw_sp, br_dev); - - mlxsw_sp_port->learning = 1; - mlxsw_sp_port->learning_sync = 1; - mlxsw_sp_port->uc_flood = 1; - mlxsw_sp_port->mc_flood = 1; - mlxsw_sp_port->mc_router = 0; - mlxsw_sp_port->mc_disabled = 1; - mlxsw_sp_port->bridged = 1; - - return 0; -} - -static void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port) -{ - struct net_device *dev = mlxsw_sp_port->dev; - - mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); - - mlxsw_sp_master_bridge_dec(mlxsw_sp_port->mlxsw_sp); - - mlxsw_sp_port->learning = 0; - mlxsw_sp_port->learning_sync = 0; - mlxsw_sp_port->uc_flood = 0; - mlxsw_sp_port->mc_flood = 0; - mlxsw_sp_port->mc_router = 0; - mlxsw_sp_port->bridged = 0; - - /* Add implicit VLAN interface in the device, so that untagged - * packets will be classified to the default vFID. - */ - mlxsw_sp_port_add_vid(dev, 0, 1); -} - static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) { char sldr_pl[MLXSW_REG_SLDR_LEN]; @@ -3850,51 +4014,11 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, return -EBUSY; } -static void -mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, - struct net_device *lag_dev, u16 lag_id) -{ - struct mlxsw_sp_port *mlxsw_sp_vport; - struct mlxsw_sp_fid *f; - - mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1); - if (WARN_ON(!mlxsw_sp_vport)) - return; - - /* If vPort is assigned a RIF, then leave it since it's no - * longer valid. - */ - f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); - if (f) - f->leave(mlxsw_sp_vport); - - mlxsw_sp_vport->lag_id = lag_id; - mlxsw_sp_vport->lagged = 1; - mlxsw_sp_vport->dev = lag_dev; -} - -static void -mlxsw_sp_port_pvid_vport_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port) -{ - struct mlxsw_sp_port *mlxsw_sp_vport; - struct mlxsw_sp_fid *f; - - mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1); - if (WARN_ON(!mlxsw_sp_vport)) - return; - - f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); - if (f) - f->leave(mlxsw_sp_vport); - - mlxsw_sp_vport->dev = mlxsw_sp_port->dev; - mlxsw_sp_vport->lagged = 0; -} - static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *lag_dev) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; struct mlxsw_sp_upper *lag; u16 lag_id; u8 port_index; @@ -3927,7 +4051,10 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->lagged = 1; lag->ref_count++; - mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_dev, lag_id); + /* Port is no longer usable as a router interface */ + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1); + if (mlxsw_sp_port_vlan->fid) + mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); return 0; @@ -3954,10 +4081,8 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); - if (mlxsw_sp_port->bridged) { - mlxsw_sp_port_active_vlans_del(mlxsw_sp_port); - mlxsw_sp_port_bridge_leave(mlxsw_sp_port); - } + /* Any VLANs configured on the port are no longer valid */ + mlxsw_sp_port_vlan_flush(mlxsw_sp_port); if (lag->ref_count == 1) mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); @@ -3967,7 +4092,9 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->lagged = 0; lag->ref_count--; - mlxsw_sp_port_pvid_vport_lag_leave(mlxsw_sp_port); + mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1); + /* Make sure untagged frames are allowed to ingress */ + mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); } static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port, @@ -4009,34 +4136,6 @@ static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); } -static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port, - struct net_device *vlan_dev) -{ - struct mlxsw_sp_port *mlxsw_sp_vport; - u16 vid = vlan_dev_vlan_id(vlan_dev); - - mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (WARN_ON(!mlxsw_sp_vport)) - return -EINVAL; - - mlxsw_sp_vport->dev = vlan_dev; - - return 0; -} - -static void mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port, - struct net_device *vlan_dev) -{ - struct mlxsw_sp_port *mlxsw_sp_vport; - u16 vid = vlan_dev_vlan_id(vlan_dev); - - mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (WARN_ON(!mlxsw_sp_vport)) - return; - - mlxsw_sp_vport->dev = mlxsw_sp_port->dev; -} - static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable) { @@ -4066,9 +4165,12 @@ static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port) { int err; - err = mlxsw_sp_port_stp_set(mlxsw_sp_port, true); + err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true); if (err) return err; + err = mlxsw_sp_port_stp_set(mlxsw_sp_port, true); + if (err) + goto err_port_stp_set; err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1, true, false); if (err) @@ -4077,6 +4179,8 @@ static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port) err_port_vlan_set: mlxsw_sp_port_stp_set(mlxsw_sp_port, false); +err_port_stp_set: + mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); return err; } @@ -4085,9 +4189,11 @@ static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1, false, false); mlxsw_sp_port_stp_set(mlxsw_sp_port, false); + mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); } -static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, +static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, + struct net_device *dev, unsigned long event, void *ptr) { struct netdev_notifier_changeupper_info *info; @@ -4110,9 +4216,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, return -EINVAL; if (!info->linking) break; - /* HW limitation forbids to put ports to multiple bridges. */ - if (netif_is_bridge_master(upper_dev) && - !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev)) + if (netdev_has_any_upper_dev(upper_dev)) return -EINVAL; if (netif_is_lag_master(upper_dev) && !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, @@ -4130,19 +4234,15 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; - if (is_vlan_dev(upper_dev)) { - if (info->linking) - err = mlxsw_sp_port_vlan_link(mlxsw_sp_port, - upper_dev); - else - mlxsw_sp_port_vlan_unlink(mlxsw_sp_port, - upper_dev); - } else if (netif_is_bridge_master(upper_dev)) { + if (netif_is_bridge_master(upper_dev)) { if (info->linking) err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, + lower_dev, upper_dev); else - mlxsw_sp_port_bridge_leave(mlxsw_sp_port); + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, + lower_dev, + upper_dev); } else if (netif_is_lag_master(upper_dev)) { if (info->linking) err = mlxsw_sp_port_lag_join(mlxsw_sp_port, @@ -4155,9 +4255,6 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, err = mlxsw_sp_port_ovs_join(mlxsw_sp_port); else mlxsw_sp_port_ovs_leave(mlxsw_sp_port); - } else { - err = -EINVAL; - WARN_ON(1); } break; } @@ -4189,15 +4286,18 @@ static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, return 0; } -static int mlxsw_sp_netdevice_port_event(struct net_device *dev, +static int mlxsw_sp_netdevice_port_event(struct net_device *lower_dev, + struct net_device *port_dev, unsigned long event, void *ptr) { switch (event) { case NETDEV_PRECHANGEUPPER: case NETDEV_CHANGEUPPER: - return mlxsw_sp_netdevice_port_upper_event(dev, event, ptr); + return mlxsw_sp_netdevice_port_upper_event(lower_dev, port_dev, + event, ptr); case NETDEV_CHANGELOWERSTATE: - return mlxsw_sp_netdevice_port_lower_event(dev, event, ptr); + return mlxsw_sp_netdevice_port_lower_event(port_dev, event, + ptr); } return 0; @@ -4212,7 +4312,8 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, netdev_for_each_lower_dev(lag_dev, dev, iter) { if (mlxsw_sp_port_dev_check(dev)) { - ret = mlxsw_sp_netdevice_port_event(dev, event, ptr); + ret = mlxsw_sp_netdevice_port_event(lag_dev, dev, event, + ptr); if (ret) return ret; } @@ -4221,299 +4322,16 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, return 0; } -static int mlxsw_sp_master_bridge_vlan_link(struct mlxsw_sp *mlxsw_sp, - struct net_device *vlan_dev) -{ - u16 fid = vlan_dev_vlan_id(vlan_dev); - struct mlxsw_sp_fid *f; - - f = mlxsw_sp_fid_find(mlxsw_sp, fid); - if (!f) { - f = mlxsw_sp_fid_create(mlxsw_sp, fid); - if (IS_ERR(f)) - return PTR_ERR(f); - } - - f->ref_count++; - - return 0; -} - -static void mlxsw_sp_master_bridge_vlan_unlink(struct mlxsw_sp *mlxsw_sp, - struct net_device *vlan_dev) -{ - u16 fid = vlan_dev_vlan_id(vlan_dev); - struct mlxsw_sp_fid *f; - - f = mlxsw_sp_fid_find(mlxsw_sp, fid); - if (f && f->rif) - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); - if (f && --f->ref_count == 0) - mlxsw_sp_fid_destroy(mlxsw_sp, f); -} - -static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, - unsigned long event, void *ptr) -{ - struct netdev_notifier_changeupper_info *info; - struct net_device *upper_dev; - struct mlxsw_sp *mlxsw_sp; - int err = 0; - - mlxsw_sp = mlxsw_sp_lower_get(br_dev); - if (!mlxsw_sp) - return 0; - - info = ptr; - - switch (event) { - case NETDEV_PRECHANGEUPPER: - upper_dev = info->upper_dev; - if (!is_vlan_dev(upper_dev)) - return -EINVAL; - if (is_vlan_dev(upper_dev) && - br_dev != mlxsw_sp->master_bridge.dev) - return -EINVAL; - break; - case NETDEV_CHANGEUPPER: - upper_dev = info->upper_dev; - if (is_vlan_dev(upper_dev)) { - if (info->linking) - err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp, - upper_dev); - else - mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, - upper_dev); - } else { - err = -EINVAL; - WARN_ON(1); - } - break; - } - - return err; -} - -static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp) -{ - return find_first_zero_bit(mlxsw_sp->vfids.mapped, - MLXSW_SP_VFID_MAX); -} - -static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create) -{ - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - - mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); -} - -static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport); - -static struct mlxsw_sp_fid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) -{ - struct device *dev = mlxsw_sp->bus_info->dev; - struct mlxsw_sp_fid *f; - u16 vfid, fid; - int err; - - vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp); - if (vfid == MLXSW_SP_VFID_MAX) { - dev_err(dev, "No available vFIDs\n"); - return ERR_PTR(-ERANGE); - } - - fid = mlxsw_sp_vfid_to_fid(vfid); - err = mlxsw_sp_vfid_op(mlxsw_sp, fid, true); - if (err) { - dev_err(dev, "Failed to create FID=%d\n", fid); - return ERR_PTR(err); - } - - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) - goto err_allocate_vfid; - - f->leave = mlxsw_sp_vport_vfid_leave; - f->fid = fid; - f->dev = br_dev; - - list_add(&f->list, &mlxsw_sp->vfids.list); - set_bit(vfid, mlxsw_sp->vfids.mapped); - - return f; - -err_allocate_vfid: - mlxsw_sp_vfid_op(mlxsw_sp, fid, false); - return ERR_PTR(-ENOMEM); -} - -static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fid *f) -{ - u16 vfid = mlxsw_sp_fid_to_vfid(f->fid); - u16 fid = f->fid; - - clear_bit(vfid, mlxsw_sp->vfids.mapped); - list_del(&f->list); - - if (f->rif) - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); - - kfree(f); - - mlxsw_sp_vfid_op(mlxsw_sp, fid, false); -} - -static int mlxsw_sp_vport_fid_map(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, - bool valid) -{ - enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; - u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - - return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, mt, valid, fid, - vid); -} - -static int mlxsw_sp_vport_vfid_join(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *br_dev) -{ - struct mlxsw_sp_fid *f; - int err; - - f = mlxsw_sp_vfid_find(mlxsw_sp_vport->mlxsw_sp, br_dev); - if (!f) { - f = mlxsw_sp_vfid_create(mlxsw_sp_vport->mlxsw_sp, br_dev); - if (IS_ERR(f)) - return PTR_ERR(f); - } - - err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, true); - if (err) - goto err_vport_flood_set; - - err = mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, true); - if (err) - goto err_vport_fid_map; - - mlxsw_sp_vport_fid_set(mlxsw_sp_vport, f); - f->ref_count++; - - netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", f->fid); - - return 0; - -err_vport_fid_map: - mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false); -err_vport_flood_set: - if (!f->ref_count) - mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); - return err; -} - -static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport) -{ - struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); - - netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid); - - mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, false); - - mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false); - - mlxsw_sp_port_fdb_flush(mlxsw_sp_vport, f->fid); - - mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); - if (--f->ref_count == 0) - mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); -} - -static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *br_dev) -{ - struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); - u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - struct net_device *dev = mlxsw_sp_vport->dev; - int err; - - if (f && !WARN_ON(!f->leave)) - f->leave(mlxsw_sp_vport); - - err = mlxsw_sp_vport_vfid_join(mlxsw_sp_vport, br_dev); - if (err) { - netdev_err(dev, "Failed to join vFID\n"); - return err; - } - - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); - if (err) { - netdev_err(dev, "Failed to enable learning\n"); - goto err_port_vid_learning_set; - } - - mlxsw_sp_vport->learning = 1; - mlxsw_sp_vport->learning_sync = 1; - mlxsw_sp_vport->uc_flood = 1; - mlxsw_sp_vport->mc_flood = 1; - mlxsw_sp_vport->mc_router = 0; - mlxsw_sp_vport->mc_disabled = 1; - mlxsw_sp_vport->bridged = 1; - - return 0; - -err_port_vid_learning_set: - mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport); - return err; -} - -static void mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport) -{ - u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - - mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); - - mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport); - - mlxsw_sp_vport->learning = 0; - mlxsw_sp_vport->learning_sync = 0; - mlxsw_sp_vport->uc_flood = 0; - mlxsw_sp_vport->mc_flood = 0; - mlxsw_sp_vport->mc_router = 0; - mlxsw_sp_vport->bridged = 0; -} - -static bool -mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port, - const struct net_device *br_dev) -{ - struct mlxsw_sp_port *mlxsw_sp_vport; - - list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, - vport.list) { - struct net_device *dev = mlxsw_sp_vport_dev_get(mlxsw_sp_vport); - - if (dev && dev == br_dev) - return false; - } - - return true; -} - -static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, - unsigned long event, void *ptr, - u16 vid) +static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, + struct net_device *dev, + unsigned long event, void *ptr, + u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct netdev_notifier_changeupper_info *info = ptr; - struct mlxsw_sp_port *mlxsw_sp_vport; struct net_device *upper_dev; int err = 0; - mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) - return 0; - switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; @@ -4521,22 +4339,20 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, return -EINVAL; if (!info->linking) break; - /* We can't have multiple VLAN interfaces configured on - * the same port and being members in the same bridge. - */ - if (netif_is_bridge_master(upper_dev) && - !mlxsw_sp_port_master_bridge_check(mlxsw_sp_port, - upper_dev)) + if (netdev_has_any_upper_dev(upper_dev)) return -EINVAL; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; if (netif_is_bridge_master(upper_dev)) { if (info->linking) - err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport, - upper_dev); + err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, + vlan_dev, + upper_dev); else - mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport); + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, + vlan_dev, + upper_dev); } else { err = -EINVAL; WARN_ON(1); @@ -4547,9 +4363,10 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, return err; } -static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev, - unsigned long event, void *ptr, - u16 vid) +static int mlxsw_sp_netdevice_lag_port_vlan_event(struct net_device *vlan_dev, + struct net_device *lag_dev, + unsigned long event, + void *ptr, u16 vid) { struct net_device *dev; struct list_head *iter; @@ -4557,8 +4374,9 @@ static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev, netdev_for_each_lower_dev(lag_dev, dev, iter) { if (mlxsw_sp_port_dev_check(dev)) { - ret = mlxsw_sp_netdevice_vport_event(dev, event, ptr, - vid); + ret = mlxsw_sp_netdevice_port_vlan_event(vlan_dev, dev, + event, ptr, + vid); if (ret) return ret; } @@ -4574,11 +4392,12 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, u16 vid = vlan_dev_vlan_id(vlan_dev); if (mlxsw_sp_port_dev_check(real_dev)) - return mlxsw_sp_netdevice_vport_event(real_dev, event, ptr, - vid); + return mlxsw_sp_netdevice_port_vlan_event(vlan_dev, real_dev, + event, ptr, vid); else if (netif_is_lag_master(real_dev)) - return mlxsw_sp_netdevice_lag_vport_event(real_dev, event, ptr, - vid); + return mlxsw_sp_netdevice_lag_port_vlan_event(vlan_dev, + real_dev, event, + ptr, vid); return 0; } @@ -4603,11 +4422,9 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); else if (mlxsw_sp_port_dev_check(dev)) - err = mlxsw_sp_netdevice_port_event(dev, event, ptr); + err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr); else if (netif_is_lag_master(dev)) err = mlxsw_sp_netdevice_lag_event(dev, event, ptr); - else if (netif_is_bridge_master(dev)) - err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr); else if (is_vlan_dev(dev)) err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr); @@ -4623,6 +4440,10 @@ static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { .priority = 10, /* Must be called before FIB notifier block */ }; +static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = { + .notifier_call = mlxsw_sp_inet6addr_event, +}; + static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { .notifier_call = mlxsw_sp_router_netevent_event, }; @@ -4643,6 +4464,7 @@ static int __init mlxsw_sp_module_init(void) register_netdevice_notifier(&mlxsw_sp_netdevice_nb); register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); + register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); register_netevent_notifier(&mlxsw_sp_router_netevent_nb); err = mlxsw_core_driver_register(&mlxsw_sp_driver); @@ -4659,6 +4481,7 @@ err_pci_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp_driver); err_core_driver_register: unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); return err; @@ -4669,6 +4492,7 @@ static void __exit mlxsw_sp_module_exit(void) mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver); mlxsw_core_driver_unregister(&mlxsw_sp_driver); unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } @@ -4680,3 +4504,4 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Spectrum driver"); MODULE_DEVICE_TABLE(pci, mlxsw_sp_pci_id_table); +MODULE_FIRMWARE(MLXSW_SP_FW_FILENAME); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 0c23bc1e946d..84ce83acdc19 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -54,12 +54,7 @@ #include "core_acl_flex_keys.h" #include "core_acl_flex_actions.h" -#define MLXSW_SP_VFID_BASE VLAN_N_VID -#define MLXSW_SP_VFID_MAX 1024 /* Bridged VLAN interfaces */ - -#define MLXSW_SP_DUMMY_FID 15359 - -#define MLXSW_SP_RFID_BASE 15360 +#define MLXSW_SP_FID_8021D_MAX 1024 #define MLXSW_SP_MID_MAX 7000 @@ -78,13 +73,20 @@ struct mlxsw_sp_upper { unsigned int ref_count; }; -struct mlxsw_sp_fid { - void (*leave)(struct mlxsw_sp_port *mlxsw_sp_vport); - struct list_head list; - unsigned int ref_count; - struct net_device *dev; - struct mlxsw_sp_rif *rif; - u16 fid; +enum mlxsw_sp_rif_type { + MLXSW_SP_RIF_TYPE_SUBPORT, + MLXSW_SP_RIF_TYPE_VLAN, + MLXSW_SP_RIF_TYPE_FID, + MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */ + MLXSW_SP_RIF_TYPE_MAX, +}; + +enum mlxsw_sp_fid_type { + MLXSW_SP_FID_TYPE_8021Q, + MLXSW_SP_FID_TYPE_8021D, + MLXSW_SP_FID_TYPE_RFID, + MLXSW_SP_FID_TYPE_DUMMY, + MLXSW_SP_FID_TYPE_MAX, }; struct mlxsw_sp_mid { @@ -95,85 +97,6 @@ struct mlxsw_sp_mid { unsigned int ref_count; }; -static inline u16 mlxsw_sp_vfid_to_fid(u16 vfid) -{ - return MLXSW_SP_VFID_BASE + vfid; -} - -static inline u16 mlxsw_sp_fid_to_vfid(u16 fid) -{ - return fid - MLXSW_SP_VFID_BASE; -} - -static inline bool mlxsw_sp_fid_is_vfid(u16 fid) -{ - return fid >= MLXSW_SP_VFID_BASE && fid < MLXSW_SP_DUMMY_FID; -} - -struct mlxsw_sp_sb_pr { - enum mlxsw_reg_sbpr_mode mode; - u32 size; -}; - -struct mlxsw_cp_sb_occ { - u32 cur; - u32 max; -}; - -struct mlxsw_sp_sb_cm { - u32 min_buff; - u32 max_buff; - u8 pool; - struct mlxsw_cp_sb_occ occ; -}; - -struct mlxsw_sp_sb_pm { - u32 min_buff; - u32 max_buff; - struct mlxsw_cp_sb_occ occ; -}; - -#define MLXSW_SP_SB_POOL_COUNT 4 -#define MLXSW_SP_SB_TC_COUNT 8 - -struct mlxsw_sp_sb_port { - struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; - struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT]; -}; - -struct mlxsw_sp_sb { - struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT]; - struct mlxsw_sp_sb_port *ports; - u32 cell_size; -}; - -#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE) - -struct mlxsw_sp_prefix_usage { - DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT); -}; - -enum mlxsw_sp_l3proto { - MLXSW_SP_L3_PROTO_IPV4, - MLXSW_SP_L3_PROTO_IPV6, -}; - -struct mlxsw_sp_lpm_tree { - u8 id; /* tree ID */ - unsigned int ref_count; - enum mlxsw_sp_l3proto proto; - struct mlxsw_sp_prefix_usage prefix_usage; -}; - -struct mlxsw_sp_fib; - -struct mlxsw_sp_vr { - u16 id; /* virtual router ID */ - u32 tb_id; /* kernel fib table id */ - unsigned int rif_count; - struct mlxsw_sp_fib *fib4; -}; - enum mlxsw_sp_span_type { MLXSW_SP_SPAN_EGRESS, MLXSW_SP_SPAN_INGRESS @@ -212,58 +135,25 @@ struct mlxsw_sp_port_mall_tc_entry { }; }; -struct mlxsw_sp_router { - struct mlxsw_sp_vr *vrs; - struct rhashtable neigh_ht; - struct rhashtable nexthop_group_ht; - struct rhashtable nexthop_ht; - struct { - struct mlxsw_sp_lpm_tree *trees; - unsigned int tree_count; - } lpm; - struct { - struct delayed_work dw; - unsigned long interval; /* ms */ - } neighs_update; - struct delayed_work nexthop_probe_dw; -#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ - struct list_head nexthop_neighs_list; - bool aborted; -}; - +struct mlxsw_sp_sb; +struct mlxsw_sp_bridge; +struct mlxsw_sp_router; struct mlxsw_sp_acl; struct mlxsw_sp_counter_pool; +struct mlxsw_sp_fid_core; struct mlxsw_sp { - struct { - struct list_head list; - DECLARE_BITMAP(mapped, MLXSW_SP_VFID_MAX); - } vfids; - struct { - struct list_head list; - DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX); - } br_mids; - struct list_head fids; /* VLAN-aware bridge FIDs */ - struct mlxsw_sp_rif **rifs; struct mlxsw_sp_port **ports; struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; unsigned char base_mac[ETH_ALEN]; - struct { - struct delayed_work dw; -#define MLXSW_SP_DEFAULT_LEARNING_INTERVAL 100 - unsigned int interval; /* ms */ - } fdb_notify; -#define MLXSW_SP_MIN_AGEING_TIME 10 -#define MLXSW_SP_MAX_AGEING_TIME 1000000 -#define MLXSW_SP_DEFAULT_AGEING_TIME 300 - u32 ageing_time; - struct mlxsw_sp_upper master_bridge; struct mlxsw_sp_upper *lags; u8 *port_to_module; - struct mlxsw_sp_sb sb; - struct mlxsw_sp_router router; + struct mlxsw_sp_sb *sb; + struct mlxsw_sp_bridge *bridge; + struct mlxsw_sp_router *router; struct mlxsw_sp_acl *acl; + struct mlxsw_sp_fid_core *fid_core; struct { DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); } kvdl; @@ -273,7 +163,6 @@ struct mlxsw_sp { struct mlxsw_sp_span_entry *entries; int entries_count; } span; - struct notifier_block fib_nb; }; static inline struct mlxsw_sp_upper * @@ -282,18 +171,6 @@ mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id) return &mlxsw_sp->lags[lag_id]; } -static inline u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, - u32 cells) -{ - return mlxsw_sp->sb.cell_size * cells; -} - -static inline u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, - u32 bytes) -{ - return DIV_ROUND_UP(bytes, mlxsw_sp->sb.cell_size); -} - struct mlxsw_sp_port_pcpu_stats { u64 rx_packets; u64 rx_bytes; @@ -310,29 +187,28 @@ struct mlxsw_sp_port_sample { bool truncate; }; +struct mlxsw_sp_bridge_port; +struct mlxsw_sp_fid; + +struct mlxsw_sp_port_vlan { + struct list_head list; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp_fid *fid; + u16 vid; + struct mlxsw_sp_bridge_port *bridge_port; + struct list_head bridge_vlan_node; +}; + struct mlxsw_sp_port { struct net_device *dev; struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; struct mlxsw_sp *mlxsw_sp; u8 local_port; - u8 stp_state; - u16 learning:1, - learning_sync:1, - uc_flood:1, - mc_flood:1, - mc_router:1, - mc_disabled:1, - bridged:1, - lagged:1, + u8 lagged:1, split:1; u16 pvid; u16 lag_id; struct { - struct list_head list; - struct mlxsw_sp_fid *f; - u16 vid; - } vport; - struct { u8 tx_pause:1, rx_pause:1, autoneg:1; @@ -347,11 +223,6 @@ struct mlxsw_sp_port { u8 width; u8 lane; } mapping; - /* 802.1Q bridge VLANs */ - unsigned long *active_vlans; - unsigned long *untagged_vlans; - /* VLAN interfaces */ - struct list_head vports_list; /* TC handles */ struct list_head mall_tc_list; struct { @@ -360,13 +231,9 @@ struct mlxsw_sp_port { struct delayed_work update_dw; } hw_stats; struct mlxsw_sp_port_sample *sample; + struct list_head vlans_list; }; -bool mlxsw_sp_port_dev_check(const struct net_device *dev); -struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev); -struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); -void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); - static inline bool mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) { @@ -385,102 +252,28 @@ mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL; } -static inline u16 -mlxsw_sp_vport_vid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) -{ - return mlxsw_sp_vport->vport.vid; -} - -static inline bool -mlxsw_sp_port_is_vport(const struct mlxsw_sp_port *mlxsw_sp_port) -{ - u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); - - return vid != 0; -} - -static inline void mlxsw_sp_vport_fid_set(struct mlxsw_sp_port *mlxsw_sp_vport, - struct mlxsw_sp_fid *f) -{ - mlxsw_sp_vport->vport.f = f; -} - -static inline struct mlxsw_sp_fid * -mlxsw_sp_vport_fid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) -{ - return mlxsw_sp_vport->vport.f; -} - -static inline struct net_device * -mlxsw_sp_vport_dev_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +static inline struct mlxsw_sp_port_vlan * +mlxsw_sp_port_vlan_find_by_vid(const struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid) { - struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - return f ? f->dev : NULL; -} - -static inline struct mlxsw_sp_port * -mlxsw_sp_port_vport_find(const struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) -{ - struct mlxsw_sp_port *mlxsw_sp_vport; - - list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, - vport.list) { - if (mlxsw_sp_vport_vid_get(mlxsw_sp_vport) == vid) - return mlxsw_sp_vport; + list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list, + list) { + if (mlxsw_sp_port_vlan->vid == vid) + return mlxsw_sp_port_vlan; } return NULL; } -static inline struct mlxsw_sp_port * -mlxsw_sp_port_vport_find_by_fid(const struct mlxsw_sp_port *mlxsw_sp_port, - u16 fid) -{ - struct mlxsw_sp_port *mlxsw_sp_vport; - - list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, - vport.list) { - struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); - - if (f && f->fid == fid) - return mlxsw_sp_vport; - } - - return NULL; -} - -static inline struct mlxsw_sp_fid *mlxsw_sp_fid_find(struct mlxsw_sp *mlxsw_sp, - u16 fid) -{ - struct mlxsw_sp_fid *f; - - list_for_each_entry(f, &mlxsw_sp->fids, list) - if (f->fid == fid) - return f; - - return NULL; -} - -static inline struct mlxsw_sp_fid * -mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev) -{ - struct mlxsw_sp_fid *f; - - list_for_each_entry(f, &mlxsw_sp->vfids.list, list) - if (f->dev == br_dev) - return f; - - return NULL; -} - -enum mlxsw_sp_flood_table { - MLXSW_SP_FLOOD_TABLE_UC, - MLXSW_SP_FLOOD_TABLE_BC, - MLXSW_SP_FLOOD_TABLE_MC, +enum mlxsw_sp_flood_type { + MLXSW_SP_FLOOD_TYPE_UC, + MLXSW_SP_FLOOD_TYPE_BC, + MLXSW_SP_FLOOD_TYPE_MC, }; +/* spectrum_buffers.c */ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port); @@ -515,26 +308,26 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); +u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells); +u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes); +/* spectrum_switchdev.c */ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port); void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port); void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port); -int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port, - enum mlxsw_reg_svfa_mt mt, bool valid, u16 fid, - u16 vid); -int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, - u16 vid_end, bool is_member, bool untagged); -int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, - bool set); -void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port); -int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); -int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid); int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, bool adding); -struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid); -void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f); +void +mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); +int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *brport_dev, + struct net_device *br_dev); +void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *brport_dev, + struct net_device *br_dev); + +/* spectrum.c */ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, bool dwrr, u8 dwrr_weight); @@ -546,27 +339,45 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, u32 maxrate); -int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid_begin, u16 vid_end, - bool learn_enable); +int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + u8 state); +int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable); +int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + bool learn_enable); +int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +struct mlxsw_sp_port_vlan * +mlxsw_sp_port_vlan_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +void mlxsw_sp_port_vlan_put(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); +int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, + u16 vid_end, bool is_member, bool untagged); +int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index, u64 *packets, + u64 *bytes); +int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, + unsigned int *p_counter_index); +void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index); +bool mlxsw_sp_port_dev_check(const struct net_device *dev); +struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev); +struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev); +struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); +void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); +struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev); +/* spectrum_dcb.c */ #ifdef CONFIG_MLXSW_SPECTRUM_DCB - int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port); void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port); - #else - static inline int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port) { return 0; } - static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) {} - #endif +/* spectrum_router.c */ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_router_netevent_event(struct notifier_block *unused, @@ -574,17 +385,19 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); int mlxsw_sp_inetaddr_event(struct notifier_block *unused, unsigned long event, void *ptr); -void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif); +int mlxsw_sp_inet6addr_event(struct notifier_block *unused, + unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); +void +mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); +void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); +/* spectrum_kvdl.c */ int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, u32 *p_entry_index); void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); -struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); - struct mlxsw_sp_acl_rule_info { unsigned int priority; struct mlxsw_afk_element_values values; @@ -605,6 +418,7 @@ struct mlxsw_sp_acl_profile_ops { int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, struct net_device *dev, bool ingress); void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv); + u16 (*ruleset_group_id)(void *ruleset_priv); size_t rule_priv_size; int (*rule_add)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, @@ -625,12 +439,19 @@ struct mlxsw_sp_acl_ops { struct mlxsw_sp_acl_ruleset; +/* spectrum_acl.c */ +struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); struct mlxsw_sp_acl_ruleset * -mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, bool ingress, +mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, + enum mlxsw_sp_acl_profile profile); +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, enum mlxsw_sp_acl_profile profile); void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset); +u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset); struct mlxsw_sp_acl_rule_info * mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl); @@ -649,6 +470,7 @@ void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, u16 group_id); int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct net_device *out_dev); @@ -683,23 +505,48 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule *rule, u64 *packets, u64 *bytes, u64 *last_use); +struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp); + int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); +/* spectrum_acl_tcam.c */ extern const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops; +/* spectrum_flower.c */ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, - __be16 protocol, struct tc_cls_flower_offload *f); + struct tc_cls_flower_offload *f); void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct tc_cls_flower_offload *f); int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct tc_cls_flower_offload *f); -int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, - unsigned int counter_index, u64 *packets, - u64 *bytes); -int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, - unsigned int *p_counter_index); -void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, - unsigned int counter_index); + +/* spectrum_fid.c */ +int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, + enum mlxsw_sp_flood_type packet_type, u8 local_port, + bool member); +int mlxsw_sp_fid_port_vid_map(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +void mlxsw_sp_fid_port_vid_unmap(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +enum mlxsw_sp_rif_type mlxsw_sp_fid_rif_type(const struct mlxsw_sp_fid *fid); +u16 mlxsw_sp_fid_index(const struct mlxsw_sp_fid *fid); +enum mlxsw_sp_fid_type mlxsw_sp_fid_type(const struct mlxsw_sp_fid *fid); +void mlxsw_sp_fid_rif_set(struct mlxsw_sp_fid *fid, struct mlxsw_sp_rif *rif); +enum mlxsw_sp_rif_type +mlxsw_sp_fid_type_rif_type(const struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type); +u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid); +struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid); +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, + int br_ifindex); +struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp, + u16 rif_index); +struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid); +int mlxsw_sp_port_fids_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 317f7b14627f..4b2455e3e079 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -53,6 +53,7 @@ struct mlxsw_sp_acl { struct mlxsw_sp *mlxsw_sp; struct mlxsw_afk *afk; struct mlxsw_afa *afa; + struct mlxsw_sp_fid *dummy_fid; const struct mlxsw_sp_acl_ops *ops; struct rhashtable ruleset_ht; struct list_head rules; @@ -73,6 +74,7 @@ struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl) struct mlxsw_sp_acl_ruleset_ht_key { struct net_device *dev; /* dev this ruleset is bound to */ bool ingress; + u32 chain_index; const struct mlxsw_sp_acl_profile_ops *ops; }; @@ -112,6 +114,11 @@ static const struct rhashtable_params mlxsw_sp_acl_rule_ht_params = { .automatic_shrinking = true, }; +struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_sp->acl->dummy_fid; +} + static struct mlxsw_sp_acl_ruleset * mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_acl_profile_ops *ops) @@ -157,7 +164,8 @@ static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset, - struct net_device *dev, bool ingress) + struct net_device *dev, bool ingress, + u32 chain_index) { const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; struct mlxsw_sp_acl *acl = mlxsw_sp->acl; @@ -165,13 +173,20 @@ static int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp, ruleset->ht_key.dev = dev; ruleset->ht_key.ingress = ingress; + ruleset->ht_key.chain_index = chain_index; err = rhashtable_insert_fast(&acl->ruleset_ht, &ruleset->ht_node, mlxsw_sp_acl_ruleset_ht_params); if (err) return err; - err = ops->ruleset_bind(mlxsw_sp, ruleset->priv, dev, ingress); - if (err) - goto err_ops_ruleset_bind; + if (!ruleset->ht_key.chain_index) { + /* We only need ruleset with chain index 0, the implicit one, + * to be directly bound to device. The rest of the rulesets + * are bound by "Goto action set". + */ + err = ops->ruleset_bind(mlxsw_sp, ruleset->priv, dev, ingress); + if (err) + goto err_ops_ruleset_bind; + } return 0; err_ops_ruleset_bind: @@ -186,7 +201,8 @@ static void mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; struct mlxsw_sp_acl *acl = mlxsw_sp->acl; - ops->ruleset_unbind(mlxsw_sp, ruleset->priv); + if (!ruleset->ht_key.chain_index) + ops->ruleset_unbind(mlxsw_sp, ruleset->priv); rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node, mlxsw_sp_acl_ruleset_ht_params); } @@ -205,14 +221,48 @@ static void mlxsw_sp_acl_ruleset_ref_dec(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_ruleset_destroy(mlxsw_sp, ruleset); } +static struct mlxsw_sp_acl_ruleset * +__mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp_acl *acl, struct net_device *dev, + bool ingress, u32 chain_index, + const struct mlxsw_sp_acl_profile_ops *ops) +{ + struct mlxsw_sp_acl_ruleset_ht_key ht_key; + + memset(&ht_key, 0, sizeof(ht_key)); + ht_key.dev = dev; + ht_key.ingress = ingress; + ht_key.chain_index = chain_index; + ht_key.ops = ops; + return rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key, + mlxsw_sp_acl_ruleset_ht_params); +} + struct mlxsw_sp_acl_ruleset * -mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, bool ingress, +mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, + enum mlxsw_sp_acl_profile profile) +{ + const struct mlxsw_sp_acl_profile_ops *ops; + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + struct mlxsw_sp_acl_ruleset *ruleset; + + ops = acl->ops->profile_ops(mlxsw_sp, profile); + if (!ops) + return ERR_PTR(-EINVAL); + ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, dev, ingress, + chain_index, ops); + if (!ruleset) + return ERR_PTR(-ENOENT); + return ruleset; +} + +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, enum mlxsw_sp_acl_profile profile) { const struct mlxsw_sp_acl_profile_ops *ops; struct mlxsw_sp_acl *acl = mlxsw_sp->acl; - struct mlxsw_sp_acl_ruleset_ht_key ht_key; struct mlxsw_sp_acl_ruleset *ruleset; int err; @@ -220,12 +270,8 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, if (!ops) return ERR_PTR(-EINVAL); - memset(&ht_key, 0, sizeof(ht_key)); - ht_key.dev = dev; - ht_key.ingress = ingress; - ht_key.ops = ops; - ruleset = rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key, - mlxsw_sp_acl_ruleset_ht_params); + ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, dev, ingress, + chain_index, ops); if (ruleset) { mlxsw_sp_acl_ruleset_ref_inc(ruleset); return ruleset; @@ -233,7 +279,8 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, ruleset = mlxsw_sp_acl_ruleset_create(mlxsw_sp, ops); if (IS_ERR(ruleset)) return ruleset; - err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, ruleset, dev, ingress); + err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, ruleset, dev, + ingress, chain_index); if (err) goto err_ruleset_bind; return ruleset; @@ -249,6 +296,13 @@ void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); } +u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset) +{ + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + + return ops->ruleset_group_id(ruleset->priv); +} + static int mlxsw_sp_acl_rulei_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei) @@ -341,6 +395,11 @@ int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) return mlxsw_afa_block_append_drop(rulei->act_block); } +int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_afa_block_append_trap(rulei->act_block); +} + int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct net_device *out_dev) @@ -358,7 +417,7 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, local_port = mlxsw_sp_port->local_port; in_port = false; } else { - /* If out_dev is NULL, the called wants to + /* If out_dev is NULL, the caller wants to * set forward to ingress port. */ local_port = 0; @@ -676,6 +735,7 @@ static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = { int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) { const struct mlxsw_sp_acl_ops *acl_ops = &mlxsw_sp_acl_tcam_ops; + struct mlxsw_sp_fid *fid; struct mlxsw_sp_acl *acl; int err; @@ -706,6 +766,13 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_rhashtable_init; + fid = mlxsw_sp_fid_dummy_get(mlxsw_sp); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + goto err_fid_get; + } + acl->dummy_fid = fid; + INIT_LIST_HEAD(&acl->rules); err = acl_ops->init(mlxsw_sp, acl->priv); if (err) @@ -721,6 +788,8 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) return 0; err_acl_ops_init: + mlxsw_sp_fid_put(fid); +err_fid_get: rhashtable_destroy(&acl->ruleset_ht); err_rhashtable_init: mlxsw_afa_destroy(acl->afa); @@ -739,6 +808,7 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw); acl_ops->fini(mlxsw_sp, acl->priv); WARN_ON(!list_empty(&acl->rules)); + mlxsw_sp_fid_put(acl->dummy_fid); rhashtable_destroy(&acl->ruleset_ht); mlxsw_afa_destroy(acl->afa); mlxsw_afk_destroy(acl->afk); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h index af7b7bad48df..fb8031828454 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h @@ -68,6 +68,14 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = { + MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32), + MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2), + MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8), + MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6), + MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */ +}; + static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = { MLXSW_AFK_ELEMENT_INST_U32(VID, 0x00, 0, 12), MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 29, 3), @@ -102,6 +110,7 @@ static const struct mlxsw_afk_block mlxsw_sp_afk_blocks[] = { MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_l2_smac_ex), MLXSW_AFK_BLOCK(0x30, mlxsw_sp_afk_element_info_ipv4_sip), MLXSW_AFK_BLOCK(0x31, mlxsw_sp_afk_element_info_ipv4_dip), + MLXSW_AFK_BLOCK(0x32, mlxsw_sp_afk_element_info_ipv4), MLXSW_AFK_BLOCK(0x33, mlxsw_sp_afk_element_info_ipv4_ex), MLXSW_AFK_BLOCK(0x60, mlxsw_sp_afk_element_info_ipv6_dip), MLXSW_AFK_BLOCK(0x65, mlxsw_sp_afk_element_info_ipv6_ex1), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 3a24289979d9..50b40de1fb91 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -295,6 +295,12 @@ mlxsw_sp_acl_tcam_group_unbind(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbt), ppbt_pl); } +static u16 +mlxsw_sp_acl_tcam_group_id(struct mlxsw_sp_acl_tcam_group *group) +{ + return group->id; +} + static unsigned int mlxsw_sp_acl_tcam_region_prio(struct mlxsw_sp_acl_tcam_region *region) { @@ -983,6 +989,10 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_SRC_L4_PORT, MLXSW_AFK_ELEMENT_VID, MLXSW_AFK_ELEMENT_PCP, + MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, }; static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = { @@ -1059,6 +1069,14 @@ mlxsw_sp_acl_tcam_flower_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_tcam_group_unbind(mlxsw_sp, &ruleset->group); } +static u16 +mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + + return mlxsw_sp_acl_tcam_group_id(&ruleset->group); +} + static int mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, @@ -1095,6 +1113,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .ruleset_del = mlxsw_sp_acl_tcam_flower_ruleset_del, .ruleset_bind = mlxsw_sp_acl_tcam_flower_ruleset_bind, .ruleset_unbind = mlxsw_sp_acl_tcam_flower_ruleset_unbind, + .ruleset_group_id = mlxsw_sp_acl_tcam_flower_ruleset_group_id, .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_rule), .rule_add = mlxsw_sp_acl_tcam_flower_rule_add, .rule_del = mlxsw_sp_acl_tcam_flower_rule_del, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 997189cfe7fd..93728c694e6d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -43,25 +43,72 @@ #include "port.h" #include "reg.h" +struct mlxsw_sp_sb_pr { + enum mlxsw_reg_sbpr_mode mode; + u32 size; +}; + +struct mlxsw_cp_sb_occ { + u32 cur; + u32 max; +}; + +struct mlxsw_sp_sb_cm { + u32 min_buff; + u32 max_buff; + u8 pool; + struct mlxsw_cp_sb_occ occ; +}; + +struct mlxsw_sp_sb_pm { + u32 min_buff; + u32 max_buff; + struct mlxsw_cp_sb_occ occ; +}; + +#define MLXSW_SP_SB_POOL_COUNT 4 +#define MLXSW_SP_SB_TC_COUNT 8 + +struct mlxsw_sp_sb_port { + struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; + struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT]; +}; + +struct mlxsw_sp_sb { + struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT]; + struct mlxsw_sp_sb_port *ports; + u32 cell_size; +}; + +u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells) +{ + return mlxsw_sp->sb->cell_size * cells; +} + +u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes) +{ + return DIV_ROUND_UP(bytes, mlxsw_sp->sb->cell_size); +} + static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp, u8 pool, enum mlxsw_reg_sbxx_dir dir) { - return &mlxsw_sp->sb.prs[dir][pool]; + return &mlxsw_sp->sb->prs[dir][pool]; } static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp, u8 local_port, u8 pg_buff, enum mlxsw_reg_sbxx_dir dir) { - return &mlxsw_sp->sb.ports[local_port].cms[dir][pg_buff]; + return &mlxsw_sp->sb->ports[local_port].cms[dir][pg_buff]; } static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp, u8 local_port, u8 pool, enum mlxsw_reg_sbxx_dir dir) { - return &mlxsw_sp->sb.ports[local_port].pms[dir][pool]; + return &mlxsw_sp->sb->ports[local_port].pms[dir][pool]; } static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u8 pool, @@ -215,16 +262,17 @@ static int mlxsw_sp_sb_ports_init(struct mlxsw_sp *mlxsw_sp) { unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); - mlxsw_sp->sb.ports = kcalloc(max_ports, sizeof(struct mlxsw_sp_sb_port), - GFP_KERNEL); - if (!mlxsw_sp->sb.ports) + mlxsw_sp->sb->ports = kcalloc(max_ports, + sizeof(struct mlxsw_sp_sb_port), + GFP_KERNEL); + if (!mlxsw_sp->sb->ports) return -ENOMEM; return 0; } static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp) { - kfree(mlxsw_sp->sb.ports); + kfree(mlxsw_sp->sb->ports); } #define MLXSW_SP_SB_PR_INGRESS_SIZE 12440000 @@ -551,15 +599,19 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE)) return -EIO; - mlxsw_sp->sb.cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE); if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE)) return -EIO; sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE); + mlxsw_sp->sb = kzalloc(sizeof(*mlxsw_sp->sb), GFP_KERNEL); + if (!mlxsw_sp->sb) + return -ENOMEM; + mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE); + err = mlxsw_sp_sb_ports_init(mlxsw_sp); if (err) - return err; + goto err_sb_ports_init; err = mlxsw_sp_sb_prs_init(mlxsw_sp); if (err) goto err_sb_prs_init; @@ -584,6 +636,8 @@ err_sb_mms_init: err_sb_cpu_port_sb_cms_init: err_sb_prs_init: mlxsw_sp_sb_ports_fini(mlxsw_sp); +err_sb_ports_init: + kfree(mlxsw_sp->sb); return err; } @@ -591,6 +645,7 @@ void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp) { devlink_sb_unregister(priv_to_devlink(mlxsw_sp->core), 0); mlxsw_sp_sb_ports_fini(mlxsw_sp); + kfree(mlxsw_sp->sb); } int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 5f0a7bc692a4..51e6846da72b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -74,6 +74,9 @@ static struct devlink_dpipe_header mlxsw_sp_dpipe_header_metadata = { static struct devlink_dpipe_header *mlxsw_dpipe_headers[] = { &mlxsw_sp_dpipe_header_metadata, + &devlink_dpipe_header_ethernet, + &devlink_dpipe_header_ipv4, + &devlink_dpipe_header_ipv6, }; static struct devlink_dpipe_headers mlxsw_sp_dpipe_headers = { @@ -114,26 +117,6 @@ static int mlxsw_sp_dpipe_table_erif_matches_dump(void *priv, return devlink_dpipe_match_put(skb, &match); } -static void mlxsw_sp_erif_entry_clear(struct devlink_dpipe_entry *entry) -{ - unsigned int value_count, value_index; - struct devlink_dpipe_value *value; - - value = entry->action_values; - value_count = entry->action_values_count; - for (value_index = 0; value_index < value_count; value_index++) { - kfree(value[value_index].value); - kfree(value[value_index].mask); - } - - value = entry->match_values; - value_count = entry->match_values_count; - for (value_index = 0; value_index < value_count; value_index++) { - kfree(value[value_index].value); - kfree(value[value_index].mask); - } -} - static void mlxsw_sp_erif_match_action_prepare(struct devlink_dpipe_match *match, struct devlink_dpipe_action *action) @@ -215,10 +198,10 @@ static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_table_erif_entries_dump(void *priv, bool counters_enabled, - struct devlink_dpipe_dump_ctx *dump_ctx) +mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) { - struct devlink_dpipe_value match_value = {{0}}, action_value = {{0}}; + struct devlink_dpipe_value match_value, action_value; struct devlink_dpipe_action action = {0}; struct devlink_dpipe_match match = {0}; struct devlink_dpipe_entry entry = {0}; @@ -227,6 +210,9 @@ mlxsw_sp_table_erif_entries_dump(void *priv, bool counters_enabled, int i, j; int err; + memset(&match_value, 0, sizeof(match_value)); + memset(&action_value, 0, sizeof(action_value)); + mlxsw_sp_erif_match_action_prepare(&match, &action); err = mlxsw_sp_erif_entry_prepare(&entry, &match_value, &match, &action_value, &action); @@ -242,10 +228,11 @@ start_again: return err; j = 0; for (; i < rif_count; i++) { - if (!mlxsw_sp->rifs[i]) + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + + if (!rif) continue; - err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, - mlxsw_sp->rifs[i], + err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif, counters_enabled); if (err) goto err_entry_get; @@ -266,55 +253,60 @@ start_again: goto start_again; rtnl_unlock(); - mlxsw_sp_erif_entry_clear(&entry); + devlink_dpipe_entry_clear(&entry); return 0; err_entry_append: err_entry_get: rtnl_unlock(); - mlxsw_sp_erif_entry_clear(&entry); + devlink_dpipe_entry_clear(&entry); return err; } -static int mlxsw_sp_table_erif_counters_update(void *priv, bool enable) +static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable) { struct mlxsw_sp *mlxsw_sp = priv; int i; rtnl_lock(); for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { - if (!mlxsw_sp->rifs[i]) + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + + if (!rif) continue; if (enable) - mlxsw_sp_rif_counter_alloc(mlxsw_sp, - mlxsw_sp->rifs[i], + mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); else - mlxsw_sp_rif_counter_free(mlxsw_sp, - mlxsw_sp->rifs[i], + mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); } rtnl_unlock(); return 0; } +static u64 mlxsw_sp_dpipe_table_erif_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); +} + static struct devlink_dpipe_table_ops mlxsw_sp_erif_ops = { .matches_dump = mlxsw_sp_dpipe_table_erif_matches_dump, .actions_dump = mlxsw_sp_dpipe_table_erif_actions_dump, - .entries_dump = mlxsw_sp_table_erif_entries_dump, - .counters_set_update = mlxsw_sp_table_erif_counters_update, + .entries_dump = mlxsw_sp_dpipe_table_erif_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_erif_counters_update, + .size_get = mlxsw_sp_dpipe_table_erif_size_get, }; static int mlxsw_sp_dpipe_erif_table_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); - u64 table_size; - table_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); return devlink_dpipe_table_register(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF, &mlxsw_sp_erif_ops, - mlxsw_sp, table_size, - false); + mlxsw_sp, false); } static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp) @@ -324,6 +316,516 @@ static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp) devlink_dpipe_table_unregister(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF); } +static int mlxsw_sp_dpipe_table_host_matches_dump(struct sk_buff *skb, int type) +{ + struct devlink_dpipe_match match = {0}; + int err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + switch (type) { + case AF_INET: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv4; + match.field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv6; + match.field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return -EINVAL; + } + + return devlink_dpipe_match_put(skb, &match); +} + +static int +mlxsw_sp_dpipe_table_host4_matches_dump(void *priv, struct sk_buff *skb) +{ + return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET); +} + +static int +mlxsw_sp_dpipe_table_host_actions_dump(void *priv, struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &devlink_dpipe_header_ethernet; + action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + return devlink_dpipe_action_put(skb, &action); +} + +enum mlxsw_sp_dpipe_table_host_match { + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF, + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP, + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT, +}; + +static void +mlxsw_sp_dpipe_table_host_match_action_prepare(struct devlink_dpipe_match *matches, + struct devlink_dpipe_action *action, + int type) +{ + struct devlink_dpipe_match *match; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + switch (type) { + case AF_INET: + match->header = &devlink_dpipe_header_ipv4; + match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match->header = &devlink_dpipe_header_ipv6; + match->field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return; + } + + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &devlink_dpipe_header_ethernet; + action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; +} + +static int +mlxsw_sp_dpipe_table_host_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_values, + struct devlink_dpipe_match *matches, + struct devlink_dpipe_value *action_value, + struct devlink_dpipe_action *action, + int type) +{ + struct devlink_dpipe_value *match_value; + struct devlink_dpipe_match *match; + + entry->match_values = match_values; + entry->match_values_count = MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT; + + entry->action_values = action_value; + entry->action_values_count = 1; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + + match_value->match = match; + switch (type) { + case AF_INET: + match_value->value_size = sizeof(u32); + break; + case AF_INET6: + match_value->value_size = sizeof(struct in6_addr); + break; + default: + WARN_ON(1); + return -EINVAL; + } + + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action_value->action = action; + action_value->value_size = sizeof(u64); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + return 0; +} + +static void +__mlxsw_sp_dpipe_table_host_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_rif *rif, + unsigned char *ha, void *dip) +{ + struct devlink_dpipe_value *value; + u32 *rif_value; + u8 *ha_value; + + /* Set Match RIF index */ + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + + rif_value = value->value; + *rif_value = mlxsw_sp_rif_index(rif); + value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + value->mapping_valid = true; + + /* Set Match DIP */ + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + memcpy(value->value, dip, value->value_size); + + /* Set Action DMAC */ + value = entry->action_values; + ha_value = value->value; + ether_addr_copy(ha_value, ha); +} + +static void +mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif) +{ + unsigned char *ha; + u32 dip; + + ha = mlxsw_sp_neigh_entry_ha(neigh_entry); + dip = mlxsw_sp_neigh4_entry_dip(neigh_entry); + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, &dip); +} + +static void +mlxsw_sp_dpipe_table_host6_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif) +{ + struct in6_addr *dip; + unsigned char *ha; + + ha = mlxsw_sp_neigh_entry_ha(neigh_entry); + dip = mlxsw_sp_neigh6_entry_dip(neigh_entry); + + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, dip); +} + +static void +mlxsw_sp_dpipe_table_host_entry_fill(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif, + int type) +{ + int err; + + switch (type) { + case AF_INET: + mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif); + break; + case AF_INET6: + mlxsw_sp_dpipe_table_host6_entry_fill(entry, neigh_entry, rif); + break; + default: + WARN_ON(1); + return; + } + + err = mlxsw_sp_neigh_counter_get(mlxsw_sp, neigh_entry, + &entry->counter); + if (!err) + entry->counter_valid = true; +} + +static int +mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx, + int type) +{ + int rif_neigh_count = 0; + int rif_neigh_skip = 0; + int neigh_count = 0; + int rif_count; + int i, j; + int err; + + rtnl_lock(); + i = 0; + rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + goto err_ctx_prepare; + j = 0; + rif_neigh_skip = rif_neigh_count; + for (; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + + rif_neigh_count = 0; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + if (rif_neigh_count < rif_neigh_skip) + goto skip; + + mlxsw_sp_dpipe_table_host_entry_fill(mlxsw_sp, entry, + neigh_entry, rif, + type); + entry->index = neigh_count; + err = devlink_dpipe_entry_ctx_append(dump_ctx, entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + else + goto out; + } + goto err_entry_append; + } + neigh_count++; + j++; +skip: + rif_neigh_count++; + } + rif_neigh_skip = 0; + } +out: + devlink_dpipe_entry_ctx_close(dump_ctx); + if (i != rif_count) + goto start_again; + + rtnl_unlock(); + return 0; + +err_ctx_prepare: +err_entry_append: + rtnl_unlock(); + return err; +} + +static int +mlxsw_sp_dpipe_table_host_entries_dump(struct mlxsw_sp *mlxsw_sp, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx, + int type) +{ + struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT]; + struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT]; + struct devlink_dpipe_value action_value; + struct devlink_dpipe_action action = {0}; + struct devlink_dpipe_entry entry = {0}; + int err; + + memset(matches, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT * + sizeof(matches[0])); + memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT * + sizeof(match_values[0])); + memset(&action_value, 0, sizeof(action_value)); + + mlxsw_sp_dpipe_table_host_match_action_prepare(matches, &action, type); + err = mlxsw_sp_dpipe_table_host_entry_prepare(&entry, match_values, + matches, &action_value, + &action, type); + if (err) + goto out; + + err = mlxsw_sp_dpipe_table_host_entries_get(mlxsw_sp, &entry, + counters_enabled, dump_ctx, + type); +out: + devlink_dpipe_entry_clear(&entry); + return err; +} + +static int +mlxsw_sp_dpipe_table_host4_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp, + counters_enabled, + dump_ctx, AF_INET); +} + +static void +mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp, + bool enable, int type) +{ + int i; + + rtnl_lock(); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + mlxsw_sp_neigh_entry_counter_update(mlxsw_sp, + neigh_entry, + enable); + } + } + rtnl_unlock(); +} + +static int mlxsw_sp_dpipe_table_host4_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET); + return 0; +} + +static u64 +mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type) +{ + u64 size = 0; + int i; + + rtnl_lock(); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + size++; + } + } + rtnl_unlock(); + + return size; +} + +static u64 mlxsw_sp_dpipe_table_host4_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET); +} + +static struct devlink_dpipe_table_ops mlxsw_sp_host4_ops = { + .matches_dump = mlxsw_sp_dpipe_table_host4_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_host4_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_host4_counters_update, + .size_get = mlxsw_sp_dpipe_table_host4_size_get, +}; + +static int mlxsw_sp_dpipe_host4_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST4, + &mlxsw_sp_host4_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_host4_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST4); +} + +static int +mlxsw_sp_dpipe_table_host6_matches_dump(void *priv, struct sk_buff *skb) +{ + return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET6); +} + +static int +mlxsw_sp_dpipe_table_host6_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp, + counters_enabled, + dump_ctx, AF_INET6); +} + +static int mlxsw_sp_dpipe_table_host6_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET6); + return 0; +} + +static u64 mlxsw_sp_dpipe_table_host6_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET6); +} + +static struct devlink_dpipe_table_ops mlxsw_sp_host6_ops = { + .matches_dump = mlxsw_sp_dpipe_table_host6_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_host6_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_host6_counters_update, + .size_get = mlxsw_sp_dpipe_table_host6_size_get, +}; + +static int mlxsw_sp_dpipe_host6_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6, + &mlxsw_sp_host6_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6); +} + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); @@ -335,10 +837,22 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) return err; err = mlxsw_sp_dpipe_erif_table_init(mlxsw_sp); if (err) - goto err_erif_register; + goto err_erif_table_init; + + err = mlxsw_sp_dpipe_host4_table_init(mlxsw_sp); + if (err) + goto err_host4_table_init; + + err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp); + if (err) + goto err_host6_table_init; return 0; -err_erif_register: +err_host6_table_init: + mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); +err_host4_table_init: + mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); +err_erif_table_init: devlink_dpipe_headers_unregister(priv_to_devlink(mlxsw_sp->core)); return err; } @@ -347,6 +861,8 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); + mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); devlink_dpipe_headers_unregister(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h index d2089298cba3..283fde4e6783 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -35,9 +35,26 @@ #ifndef _MLXSW_PIPELINE_H_ #define _MLXSW_PIPELINE_H_ +#if IS_ENABLED(CONFIG_NET_DEVLINK) + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp); +#else + +static inline int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) +{ + return 0; +} + +static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) +{ +} + +#endif + #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" +#define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4" +#define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6" #endif /* _MLXSW_PIPELINE_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c new file mode 100644 index 000000000000..bbd238e50f05 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -0,0 +1,992 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Ido Schimmel <idosch@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/if_vlan.h> +#include <linux/if_bridge.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> + +#include "spectrum.h" +#include "reg.h" + +struct mlxsw_sp_fid_family; + +struct mlxsw_sp_fid_core { + struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX]; + unsigned int *port_fid_mappings; +}; + +struct mlxsw_sp_fid { + struct list_head list; + struct mlxsw_sp_rif *rif; + unsigned int ref_count; + u16 fid_index; + struct mlxsw_sp_fid_family *fid_family; +}; + +struct mlxsw_sp_fid_8021q { + struct mlxsw_sp_fid common; + u16 vid; +}; + +struct mlxsw_sp_fid_8021d { + struct mlxsw_sp_fid common; + int br_ifindex; +}; + +struct mlxsw_sp_flood_table { + enum mlxsw_sp_flood_type packet_type; + enum mlxsw_reg_sfgc_bridge_type bridge_type; + enum mlxsw_flood_table_type table_type; + int table_index; +}; + +struct mlxsw_sp_fid_ops { + void (*setup)(struct mlxsw_sp_fid *fid, const void *arg); + int (*configure)(struct mlxsw_sp_fid *fid); + void (*deconfigure)(struct mlxsw_sp_fid *fid); + int (*index_alloc)(struct mlxsw_sp_fid *fid, const void *arg, + u16 *p_fid_index); + bool (*compare)(const struct mlxsw_sp_fid *fid, + const void *arg); + u16 (*flood_index)(const struct mlxsw_sp_fid *fid); + int (*port_vid_map)(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *port, u16 vid); + void (*port_vid_unmap)(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *port, u16 vid); +}; + +struct mlxsw_sp_fid_family { + enum mlxsw_sp_fid_type type; + size_t fid_size; + u16 start_index; + u16 end_index; + struct list_head fids_list; + unsigned long *fids_bitmap; + const struct mlxsw_sp_flood_table *flood_tables; + int nr_flood_tables; + enum mlxsw_sp_rif_type rif_type; + const struct mlxsw_sp_fid_ops *ops; + struct mlxsw_sp *mlxsw_sp; +}; + +static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { + [MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST] = 1, +}; + +static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { + [MLXSW_REG_SFGC_TYPE_BROADCAST] = 1, + [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP] = 1, + [MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL] = 1, + [MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST] = 1, +}; + +static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { + [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4] = 1, + [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, +}; + +static const int *mlxsw_sp_packet_type_sfgc_types[] = { + [MLXSW_SP_FLOOD_TYPE_UC] = mlxsw_sp_sfgc_uc_packet_types, + [MLXSW_SP_FLOOD_TYPE_BC] = mlxsw_sp_sfgc_bc_packet_types, + [MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types, +}; + +static const struct mlxsw_sp_flood_table * +mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid, + enum mlxsw_sp_flood_type packet_type) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + int i; + + for (i = 0; i < fid_family->nr_flood_tables; i++) { + if (fid_family->flood_tables[i].packet_type != packet_type) + continue; + return &fid_family->flood_tables[i]; + } + + return NULL; +} + +int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, + enum mlxsw_sp_flood_type packet_type, u8 local_port, + bool member) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + const struct mlxsw_sp_flood_table *flood_table; + char *sftr_pl; + int err; + + if (WARN_ON(!fid_family->flood_tables || !ops->flood_index)) + return -EINVAL; + + flood_table = mlxsw_sp_fid_flood_table_lookup(fid, packet_type); + if (!flood_table) + return -ESRCH; + + sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); + if (!sftr_pl) + return -ENOMEM; + + mlxsw_reg_sftr_pack(sftr_pl, flood_table->table_index, + ops->flood_index(fid), flood_table->table_type, 1, + local_port, member); + err = mlxsw_reg_write(fid_family->mlxsw_sp->core, MLXSW_REG(sftr), + sftr_pl); + kfree(sftr_pl); + return err; +} + +int mlxsw_sp_fid_port_vid_map(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + if (WARN_ON(!fid->fid_family->ops->port_vid_map)) + return -EINVAL; + return fid->fid_family->ops->port_vid_map(fid, mlxsw_sp_port, vid); +} + +void mlxsw_sp_fid_port_vid_unmap(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + fid->fid_family->ops->port_vid_unmap(fid, mlxsw_sp_port, vid); +} + +enum mlxsw_sp_rif_type mlxsw_sp_fid_rif_type(const struct mlxsw_sp_fid *fid) +{ + return fid->fid_family->rif_type; +} + +u16 mlxsw_sp_fid_index(const struct mlxsw_sp_fid *fid) +{ + return fid->fid_index; +} + +enum mlxsw_sp_fid_type mlxsw_sp_fid_type(const struct mlxsw_sp_fid *fid) +{ + return fid->fid_family->type; +} + +void mlxsw_sp_fid_rif_set(struct mlxsw_sp_fid *fid, struct mlxsw_sp_rif *rif) +{ + fid->rif = rif; +} + +enum mlxsw_sp_rif_type +mlxsw_sp_fid_type_rif_type(const struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type) +{ + struct mlxsw_sp_fid_core *fid_core = mlxsw_sp->fid_core; + + return fid_core->fid_family_arr[type]->rif_type; +} + +static struct mlxsw_sp_fid_8021q * +mlxsw_sp_fid_8021q_fid(const struct mlxsw_sp_fid *fid) +{ + return container_of(fid, struct mlxsw_sp_fid_8021q, common); +} + +u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_8021q_fid(fid)->vid; +} + +static void mlxsw_sp_fid_8021q_setup(struct mlxsw_sp_fid *fid, const void *arg) +{ + u16 vid = *(u16 *) arg; + + mlxsw_sp_fid_8021q_fid(fid)->vid = vid; +} + +static enum mlxsw_reg_sfmr_op mlxsw_sp_sfmr_op(bool valid) +{ + return valid ? MLXSW_REG_SFMR_OP_CREATE_FID : + MLXSW_REG_SFMR_OP_DESTROY_FID; +} + +static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index, + u16 fid_offset, bool valid) +{ + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, mlxsw_sp_sfmr_op(valid), fid_index, + fid_offset); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, + u16 vid, bool valid) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VID_TO_FID; + char svfa_pl[MLXSW_REG_SVFA_LEN]; + + mlxsw_reg_svfa_pack(svfa_pl, 0, mt, valid, fid_index, vid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); +} + +static int __mlxsw_sp_fid_port_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, + u8 local_port, u16 vid, bool valid) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + char svfa_pl[MLXSW_REG_SVFA_LEN]; + + mlxsw_reg_svfa_pack(svfa_pl, local_port, mt, valid, fid_index, vid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); +} + +static int mlxsw_sp_fid_8021q_configure(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + struct mlxsw_sp_fid_8021q *fid_8021q; + int err; + + err = mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, fid->fid_index, true); + if (err) + return err; + + fid_8021q = mlxsw_sp_fid_8021q_fid(fid); + err = mlxsw_sp_fid_vid_map(mlxsw_sp, fid->fid_index, fid_8021q->vid, + true); + if (err) + goto err_fid_map; + + return 0; + +err_fid_map: + mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, false); + return err; +} + +static void mlxsw_sp_fid_8021q_deconfigure(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + struct mlxsw_sp_fid_8021q *fid_8021q; + + fid_8021q = mlxsw_sp_fid_8021q_fid(fid); + mlxsw_sp_fid_vid_map(mlxsw_sp, fid->fid_index, fid_8021q->vid, false); + mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, false); +} + +static int mlxsw_sp_fid_8021q_index_alloc(struct mlxsw_sp_fid *fid, + const void *arg, u16 *p_fid_index) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + u16 vid = *(u16 *) arg; + + /* Use 1:1 mapping for simplicity although not a must */ + if (vid < fid_family->start_index || vid > fid_family->end_index) + return -EINVAL; + *p_fid_index = vid; + + return 0; +} + +static bool +mlxsw_sp_fid_8021q_compare(const struct mlxsw_sp_fid *fid, const void *arg) +{ + u16 vid = *(u16 *) arg; + + return mlxsw_sp_fid_8021q_fid(fid)->vid == vid; +} + +static u16 mlxsw_sp_fid_8021q_flood_index(const struct mlxsw_sp_fid *fid) +{ + return fid->fid_index; +} + +static int mlxsw_sp_fid_8021q_port_vid_map(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + + /* In case there are no {Port, VID} => FID mappings on the port, + * we can use the global VID => FID mapping we created when the + * FID was configured. + */ + if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 0) + return 0; + return __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, local_port, + vid, true); +} + +static void +mlxsw_sp_fid_8021q_port_vid_unmap(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + + if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 0) + return; + __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, local_port, vid, + false); +} + +static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021q_ops = { + .setup = mlxsw_sp_fid_8021q_setup, + .configure = mlxsw_sp_fid_8021q_configure, + .deconfigure = mlxsw_sp_fid_8021q_deconfigure, + .index_alloc = mlxsw_sp_fid_8021q_index_alloc, + .compare = mlxsw_sp_fid_8021q_compare, + .flood_index = mlxsw_sp_fid_8021q_flood_index, + .port_vid_map = mlxsw_sp_fid_8021q_port_vid_map, + .port_vid_unmap = mlxsw_sp_fid_8021q_port_vid_unmap, +}; + +static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021q_flood_tables[] = { + { + .packet_type = MLXSW_SP_FLOOD_TYPE_UC, + .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, + .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, + .table_index = 0, + }, + { + .packet_type = MLXSW_SP_FLOOD_TYPE_MC, + .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, + .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, + .table_index = 1, + }, + { + .packet_type = MLXSW_SP_FLOOD_TYPE_BC, + .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, + .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, + .table_index = 2, + }, +}; + +/* Range and flood configuration must match mlxsw_config_profile */ +static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021q_family = { + .type = MLXSW_SP_FID_TYPE_8021Q, + .fid_size = sizeof(struct mlxsw_sp_fid_8021q), + .start_index = 1, + .end_index = VLAN_VID_MASK, + .flood_tables = mlxsw_sp_fid_8021q_flood_tables, + .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021q_flood_tables), + .rif_type = MLXSW_SP_RIF_TYPE_VLAN, + .ops = &mlxsw_sp_fid_8021q_ops, +}; + +static struct mlxsw_sp_fid_8021d * +mlxsw_sp_fid_8021d_fid(const struct mlxsw_sp_fid *fid) +{ + return container_of(fid, struct mlxsw_sp_fid_8021d, common); +} + +static void mlxsw_sp_fid_8021d_setup(struct mlxsw_sp_fid *fid, const void *arg) +{ + int br_ifindex = *(int *) arg; + + mlxsw_sp_fid_8021d_fid(fid)->br_ifindex = br_ifindex; +} + +static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + return mlxsw_sp_fid_op(fid_family->mlxsw_sp, fid->fid_index, 0, true); +} + +static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid) +{ + mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false); +} + +static int mlxsw_sp_fid_8021d_index_alloc(struct mlxsw_sp_fid *fid, + const void *arg, u16 *p_fid_index) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + u16 nr_fids, fid_index; + + nr_fids = fid_family->end_index - fid_family->start_index + 1; + fid_index = find_first_zero_bit(fid_family->fids_bitmap, nr_fids); + if (fid_index == nr_fids) + return -ENOBUFS; + *p_fid_index = fid_family->start_index + fid_index; + + return 0; +} + +static bool +mlxsw_sp_fid_8021d_compare(const struct mlxsw_sp_fid *fid, const void *arg) +{ + int br_ifindex = *(int *) arg; + + return mlxsw_sp_fid_8021d_fid(fid)->br_ifindex == br_ifindex; +} + +static u16 mlxsw_sp_fid_8021d_flood_index(const struct mlxsw_sp_fid *fid) +{ + return fid->fid_index - fid->fid_family->start_index; +} + +static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + int err; + + list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list, + list) { + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + u16 vid = mlxsw_sp_port_vlan->vid; + + if (!fid) + continue; + + err = __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, + mlxsw_sp_port->local_port, + vid, true); + if (err) + goto err_fid_port_vid_map; + } + + err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true); + if (err) + goto err_port_vp_mode_set; + + return 0; + +err_port_vp_mode_set: +err_fid_port_vid_map: + list_for_each_entry_continue_reverse(mlxsw_sp_port_vlan, + &mlxsw_sp_port->vlans_list, list) { + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + u16 vid = mlxsw_sp_port_vlan->vid; + + if (!fid) + continue; + + __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, + mlxsw_sp_port->local_port, vid, + false); + } + return err; +} + +static void mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); + + list_for_each_entry_reverse(mlxsw_sp_port_vlan, + &mlxsw_sp_port->vlans_list, list) { + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + u16 vid = mlxsw_sp_port_vlan->vid; + + if (!fid) + continue; + + __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, + mlxsw_sp_port->local_port, vid, + false); + } +} + +static int mlxsw_sp_fid_8021d_port_vid_map(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + int err; + + err = __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, + mlxsw_sp_port->local_port, vid, true); + if (err) + return err; + + if (mlxsw_sp->fid_core->port_fid_mappings[local_port]++ == 0) { + err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); + if (err) + goto err_port_vp_mode_trans; + } + + return 0; + +err_port_vp_mode_trans: + mlxsw_sp->fid_core->port_fid_mappings[local_port]--; + __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, + mlxsw_sp_port->local_port, vid, false); + return err; +} + +static void +mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + + if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 1) + mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); + mlxsw_sp->fid_core->port_fid_mappings[local_port]--; + __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, + mlxsw_sp_port->local_port, vid, false); +} + +static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = { + .setup = mlxsw_sp_fid_8021d_setup, + .configure = mlxsw_sp_fid_8021d_configure, + .deconfigure = mlxsw_sp_fid_8021d_deconfigure, + .index_alloc = mlxsw_sp_fid_8021d_index_alloc, + .compare = mlxsw_sp_fid_8021d_compare, + .flood_index = mlxsw_sp_fid_8021d_flood_index, + .port_vid_map = mlxsw_sp_fid_8021d_port_vid_map, + .port_vid_unmap = mlxsw_sp_fid_8021d_port_vid_unmap, +}; + +static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = { + { + .packet_type = MLXSW_SP_FLOOD_TYPE_UC, + .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_VFID, + .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID, + .table_index = 0, + }, + { + .packet_type = MLXSW_SP_FLOOD_TYPE_MC, + .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_VFID, + .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID, + .table_index = 1, + }, + { + .packet_type = MLXSW_SP_FLOOD_TYPE_BC, + .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_VFID, + .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID, + .table_index = 2, + }, +}; + +/* Range and flood configuration must match mlxsw_config_profile */ +static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021d_family = { + .type = MLXSW_SP_FID_TYPE_8021D, + .fid_size = sizeof(struct mlxsw_sp_fid_8021d), + .start_index = VLAN_N_VID, + .end_index = VLAN_N_VID + MLXSW_SP_FID_8021D_MAX - 1, + .flood_tables = mlxsw_sp_fid_8021d_flood_tables, + .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables), + .rif_type = MLXSW_SP_RIF_TYPE_FID, + .ops = &mlxsw_sp_fid_8021d_ops, +}; + +static int mlxsw_sp_fid_rfid_configure(struct mlxsw_sp_fid *fid) +{ + /* rFIDs are allocated by the device during init */ + return 0; +} + +static void mlxsw_sp_fid_rfid_deconfigure(struct mlxsw_sp_fid *fid) +{ +} + +static int mlxsw_sp_fid_rfid_index_alloc(struct mlxsw_sp_fid *fid, + const void *arg, u16 *p_fid_index) +{ + u16 rif_index = *(u16 *) arg; + + *p_fid_index = fid->fid_family->start_index + rif_index; + + return 0; +} + +static bool mlxsw_sp_fid_rfid_compare(const struct mlxsw_sp_fid *fid, + const void *arg) +{ + u16 rif_index = *(u16 *) arg; + + return fid->fid_index == rif_index + fid->fid_family->start_index; +} + +static int mlxsw_sp_fid_rfid_port_vid_map(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + int err; + + /* We only need to transition the port to virtual mode since + * {Port, VID} => FID is done by the firmware upon RIF creation. + */ + if (mlxsw_sp->fid_core->port_fid_mappings[local_port]++ == 0) { + err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); + if (err) + goto err_port_vp_mode_trans; + } + + return 0; + +err_port_vp_mode_trans: + mlxsw_sp->fid_core->port_fid_mappings[local_port]--; + return err; +} + +static void +mlxsw_sp_fid_rfid_port_vid_unmap(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + + if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 1) + mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); + mlxsw_sp->fid_core->port_fid_mappings[local_port]--; +} + +static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_rfid_ops = { + .configure = mlxsw_sp_fid_rfid_configure, + .deconfigure = mlxsw_sp_fid_rfid_deconfigure, + .index_alloc = mlxsw_sp_fid_rfid_index_alloc, + .compare = mlxsw_sp_fid_rfid_compare, + .port_vid_map = mlxsw_sp_fid_rfid_port_vid_map, + .port_vid_unmap = mlxsw_sp_fid_rfid_port_vid_unmap, +}; + +#define MLXSW_SP_RFID_BASE (15 * 1024) +#define MLXSW_SP_RFID_MAX 1024 + +static const struct mlxsw_sp_fid_family mlxsw_sp_fid_rfid_family = { + .type = MLXSW_SP_FID_TYPE_RFID, + .fid_size = sizeof(struct mlxsw_sp_fid), + .start_index = MLXSW_SP_RFID_BASE, + .end_index = MLXSW_SP_RFID_BASE + MLXSW_SP_RFID_MAX - 1, + .rif_type = MLXSW_SP_RIF_TYPE_SUBPORT, + .ops = &mlxsw_sp_fid_rfid_ops, +}; + +static int mlxsw_sp_fid_dummy_configure(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + + return mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, true); +} + +static void mlxsw_sp_fid_dummy_deconfigure(struct mlxsw_sp_fid *fid) +{ + mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false); +} + +static int mlxsw_sp_fid_dummy_index_alloc(struct mlxsw_sp_fid *fid, + const void *arg, u16 *p_fid_index) +{ + *p_fid_index = fid->fid_family->start_index; + + return 0; +} + +static bool mlxsw_sp_fid_dummy_compare(const struct mlxsw_sp_fid *fid, + const void *arg) +{ + return true; +} + +static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_dummy_ops = { + .configure = mlxsw_sp_fid_dummy_configure, + .deconfigure = mlxsw_sp_fid_dummy_deconfigure, + .index_alloc = mlxsw_sp_fid_dummy_index_alloc, + .compare = mlxsw_sp_fid_dummy_compare, +}; + +static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = { + .type = MLXSW_SP_FID_TYPE_DUMMY, + .fid_size = sizeof(struct mlxsw_sp_fid), + .start_index = MLXSW_SP_RFID_BASE - 1, + .end_index = MLXSW_SP_RFID_BASE - 1, + .ops = &mlxsw_sp_fid_dummy_ops, +}; + +static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = { + [MLXSW_SP_FID_TYPE_8021Q] = &mlxsw_sp_fid_8021q_family, + [MLXSW_SP_FID_TYPE_8021D] = &mlxsw_sp_fid_8021d_family, + [MLXSW_SP_FID_TYPE_RFID] = &mlxsw_sp_fid_rfid_family, + [MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp_fid_dummy_family, +}; + +static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type, + const void *arg) +{ + struct mlxsw_sp_fid_family *fid_family; + struct mlxsw_sp_fid *fid; + u16 fid_index; + int err; + + fid_family = mlxsw_sp->fid_core->fid_family_arr[type]; + list_for_each_entry(fid, &fid_family->fids_list, list) { + if (!fid->fid_family->ops->compare(fid, arg)) + continue; + fid->ref_count++; + return fid; + } + + fid = kzalloc(fid_family->fid_size, GFP_KERNEL); + if (!fid) + return ERR_PTR(-ENOMEM); + fid->fid_family = fid_family; + + err = fid->fid_family->ops->index_alloc(fid, arg, &fid_index); + if (err) + goto err_index_alloc; + fid->fid_index = fid_index; + __set_bit(fid_index - fid_family->start_index, fid_family->fids_bitmap); + + if (fid->fid_family->ops->setup) + fid->fid_family->ops->setup(fid, arg); + + err = fid->fid_family->ops->configure(fid); + if (err) + goto err_configure; + + list_add(&fid->list, &fid_family->fids_list); + fid->ref_count++; + return fid; + +err_configure: + __clear_bit(fid_index - fid_family->start_index, + fid_family->fids_bitmap); +err_index_alloc: + kfree(fid); + return ERR_PTR(err); +} + +void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + if (--fid->ref_count == 1 && fid->rif) { + /* Destroy the associated RIF and let it drop the last + * reference on the FID. + */ + return mlxsw_sp_rif_destroy(fid->rif); + } else if (fid->ref_count == 0) { + list_del(&fid->list); + fid->fid_family->ops->deconfigure(fid); + __clear_bit(fid->fid_index - fid_family->start_index, + fid_family->fids_bitmap); + kfree(fid); + } +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid) +{ + return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021Q, &vid); +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, + int br_ifindex) +{ + return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex); +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp, + u16 rif_index) +{ + return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_RFID, &rif_index); +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_DUMMY, NULL); +} + +static int +mlxsw_sp_fid_flood_table_init(struct mlxsw_sp_fid_family *fid_family, + const struct mlxsw_sp_flood_table *flood_table) +{ + enum mlxsw_sp_flood_type packet_type = flood_table->packet_type; + const int *sfgc_packet_types; + int i; + + sfgc_packet_types = mlxsw_sp_packet_type_sfgc_types[packet_type]; + for (i = 0; i < MLXSW_REG_SFGC_TYPE_MAX; i++) { + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + char sfgc_pl[MLXSW_REG_SFGC_LEN]; + int err; + + if (!sfgc_packet_types[i]) + continue; + mlxsw_reg_sfgc_pack(sfgc_pl, i, flood_table->bridge_type, + flood_table->table_type, + flood_table->table_index); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfgc), sfgc_pl); + if (err) + return err; + } + + return 0; +} + +static int +mlxsw_sp_fid_flood_tables_init(struct mlxsw_sp_fid_family *fid_family) +{ + int i; + + for (i = 0; i < fid_family->nr_flood_tables; i++) { + const struct mlxsw_sp_flood_table *flood_table; + int err; + + flood_table = &fid_family->flood_tables[i]; + err = mlxsw_sp_fid_flood_table_init(fid_family, flood_table); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_fid_family_register(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fid_family *tmpl) +{ + u16 nr_fids = tmpl->end_index - tmpl->start_index + 1; + struct mlxsw_sp_fid_family *fid_family; + int err; + + fid_family = kmemdup(tmpl, sizeof(*fid_family), GFP_KERNEL); + if (!fid_family) + return -ENOMEM; + + fid_family->mlxsw_sp = mlxsw_sp; + INIT_LIST_HEAD(&fid_family->fids_list); + fid_family->fids_bitmap = kcalloc(BITS_TO_LONGS(nr_fids), + sizeof(unsigned long), GFP_KERNEL); + if (!fid_family->fids_bitmap) { + err = -ENOMEM; + goto err_alloc_fids_bitmap; + } + + if (fid_family->flood_tables) { + err = mlxsw_sp_fid_flood_tables_init(fid_family); + if (err) + goto err_fid_flood_tables_init; + } + + mlxsw_sp->fid_core->fid_family_arr[tmpl->type] = fid_family; + + return 0; + +err_fid_flood_tables_init: + kfree(fid_family->fids_bitmap); +err_alloc_fids_bitmap: + kfree(fid_family); + return err; +} + +static void +mlxsw_sp_fid_family_unregister(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid_family *fid_family) +{ + mlxsw_sp->fid_core->fid_family_arr[fid_family->type] = NULL; + kfree(fid_family->fids_bitmap); + WARN_ON_ONCE(!list_empty(&fid_family->fids_list)); + kfree(fid_family); +} + +int mlxsw_sp_port_fids_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + /* Track number of FIDs configured on the port with mapping type + * PORT_VID_TO_FID, so that we know when to transition the port + * back to non-virtual (VLAN) mode. + */ + mlxsw_sp->fid_core->port_fid_mappings[mlxsw_sp_port->local_port] = 0; + + return mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); +} + +void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + mlxsw_sp->fid_core->port_fid_mappings[mlxsw_sp_port->local_port] = 0; +} + +int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + struct mlxsw_sp_fid_core *fid_core; + int err, i; + + fid_core = kzalloc(sizeof(*mlxsw_sp->fid_core), GFP_KERNEL); + if (!fid_core) + return -ENOMEM; + mlxsw_sp->fid_core = fid_core; + + fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int), + GFP_KERNEL); + if (!fid_core->port_fid_mappings) { + err = -ENOMEM; + goto err_alloc_port_fid_mappings; + } + + for (i = 0; i < MLXSW_SP_FID_TYPE_MAX; i++) { + err = mlxsw_sp_fid_family_register(mlxsw_sp, + mlxsw_sp_fid_family_arr[i]); + + if (err) + goto err_fid_ops_register; + } + + return 0; + +err_fid_ops_register: + for (i--; i >= 0; i--) { + struct mlxsw_sp_fid_family *fid_family; + + fid_family = fid_core->fid_family_arr[i]; + mlxsw_sp_fid_family_unregister(mlxsw_sp, fid_family); + } + kfree(fid_core->port_fid_mappings); +err_alloc_port_fid_mappings: + kfree(fid_core); + return err; +} + +void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_fid_core *fid_core = mlxsw_sp->fid_core; + int i; + + for (i = 0; i < MLXSW_SP_FID_TYPE_MAX; i++) + mlxsw_sp_fid_family_unregister(mlxsw_sp, + fid_core->fid_family_arr[i]); + kfree(fid_core->port_fid_mappings); + kfree(fid_core); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 7d87e23578a3..8aace9a06a5d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -45,7 +45,7 @@ #include "core_acl_flex_keys.h" static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, + struct net_device *dev, bool ingress, struct mlxsw_sp_acl_rule_info *rulei, struct tcf_exts *exts) { @@ -53,7 +53,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, LIST_HEAD(actions); int err; - if (tc_no_actions(exts)) + if (!tcf_exts_has_actions(exts)) return 0; /* Count action is inserted first */ @@ -67,12 +67,34 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_rulei_act_drop(rulei); if (err) return err; + } else if (is_tcf_gact_trap(a)) { + err = mlxsw_sp_acl_rulei_act_trap(rulei); + if (err) + return err; + } else if (is_tcf_gact_goto_chain(a)) { + u32 chain_index = tcf_gact_goto_chain_index(a); + struct mlxsw_sp_acl_ruleset *ruleset; + u16 group_id; + + ruleset = mlxsw_sp_acl_ruleset_lookup(mlxsw_sp, dev, + ingress, + chain_index, + MLXSW_SP_ACL_PROFILE_FLOWER); + if (IS_ERR(ruleset)) + return PTR_ERR(ruleset); + + group_id = mlxsw_sp_acl_ruleset_group_id(ruleset); + mlxsw_sp_acl_rulei_act_jump(rulei, group_id); } else if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; + struct mlxsw_sp_fid *fid; + u16 fid_index; + fid = mlxsw_sp_acl_dummy_fid(mlxsw_sp); + fid_index = mlxsw_sp_fid_index(fid); err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei, - MLXSW_SP_DUMMY_FID); + fid_index); if (err) return err; @@ -178,11 +200,72 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct tc_cls_flower_offload *f, + u8 ip_proto) +{ + struct flow_dissector_key_tcp *key, *mask; + + if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) + return 0; + + if (ip_proto != IPPROTO_TCP) { + dev_err(mlxsw_sp->bus_info->dev, "TCP keys supported only for TCP\n"); + return -EINVAL; + } + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_TCP, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_TCP, + f->mask); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_TCP_FLAGS, + ntohs(key->flags), ntohs(mask->flags)); + return 0; +} + +static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct tc_cls_flower_offload *f, + u16 n_proto) +{ + struct flow_dissector_key_ip *key, *mask; + + if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) + return 0; + + if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) { + dev_err(mlxsw_sp->bus_info->dev, "IP keys supported only for IPv4/6\n"); + return -EINVAL; + } + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->mask); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_, + key->ttl, mask->ttl); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN, + key->tos & 0x3, mask->tos & 0x3); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP, + key->tos >> 6, mask->tos >> 6); + + return 0; +} + static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, + struct net_device *dev, bool ingress, struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { + u16 n_proto_mask = 0; + u16 n_proto_key = 0; u16 addr_type = 0; u8 ip_proto = 0; int err; @@ -194,12 +277,14 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_TCP) | + BIT(FLOW_DISSECTOR_KEY_IP) | BIT(FLOW_DISSECTOR_KEY_VLAN))) { dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n"); return -EOPNOTSUPP; } - mlxsw_sp_acl_rulei_priority(rulei, f->prio); + mlxsw_sp_acl_rulei_priority(rulei, f->common.prio); if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_dissector_key_control *key = @@ -218,8 +303,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_BASIC, f->mask); - u16 n_proto_key = ntohs(key->n_proto); - u16 n_proto_mask = ntohs(mask->n_proto); + n_proto_key = ntohs(key->n_proto); + n_proto_mask = ntohs(mask->n_proto); if (n_proto_key == ETH_P_ALL) { n_proto_key = 0; @@ -285,12 +370,20 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_flower_parse_ports(mlxsw_sp, rulei, f, ip_proto); if (err) return err; + err = mlxsw_sp_flower_parse_tcp(mlxsw_sp, rulei, f, ip_proto); + if (err) + return err; + + err = mlxsw_sp_flower_parse_ip(mlxsw_sp, rulei, f, n_proto_key & n_proto_mask); + if (err) + return err; - return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, rulei, f->exts); + return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, ingress, + rulei, f->exts); } int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, - __be16 protocol, struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct net_device *dev = mlxsw_sp_port->dev; @@ -300,6 +393,7 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, int err; ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, dev, ingress, + f->common.chain_index, MLXSW_SP_ACL_PROFILE_FLOWER); if (IS_ERR(ruleset)) return PTR_ERR(ruleset); @@ -311,7 +405,7 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, } rulei = mlxsw_sp_acl_rule_rulei(rule); - err = mlxsw_sp_flower_parse(mlxsw_sp, dev, rulei, f); + err = mlxsw_sp_flower_parse(mlxsw_sp, dev, ingress, rulei, f); if (err) goto err_flower_parse; @@ -343,7 +437,7 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct mlxsw_sp_acl_rule *rule; ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev, - ingress, + ingress, f->common.chain_index, MLXSW_SP_ACL_PROFILE_FLOWER); if (IS_ERR(ruleset)) return; @@ -363,15 +457,13 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_acl_ruleset *ruleset; struct mlxsw_sp_acl_rule *rule; - struct tc_action *a; - LIST_HEAD(actions); u64 packets; u64 lastuse; u64 bytes; int err; ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev, - ingress, + ingress, f->common.chain_index, MLXSW_SP_ACL_PROFILE_FLOWER); if (WARN_ON(IS_ERR(ruleset))) return -EINVAL; @@ -385,13 +477,7 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, if (err) goto err_rule_get_stats; - preempt_disable(); - - tcf_exts_to_list(f->exts, &actions); - list_for_each_entry(a, &actions, list) - tcf_action_stats_update(a, bytes, packets, lastuse); - - preempt_enable(); + tcf_exts_stats_update(f->exts, bytes, packets, lastuse); mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c new file mode 100644 index 000000000000..702fe945227c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -0,0 +1,214 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/ip_tunnels.h> + +#include "spectrum_ipip.h" + +static bool +mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return !!(tun->parms.i_flags & TUNNEL_KEY); +} + +static bool +mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return !!(tun->parms.o_flags & TUNNEL_KEY); +} + +static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ? + be32_to_cpu(tun->parms.i_key) : 0; +} + +static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ? + be32_to_cpu(tun->parms.o_key) : 0; +} + +static int +mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev); + char ratr_pl[MLXSW_REG_RATR_LEN]; + + mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, + true, MLXSW_REG_RATR_TYPE_IPIP, + adj_index, rif_index); + mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int +mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, + u32 tunnel_index, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev); + u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev); + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + unsigned int type_check; + u32 daddr4; + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); + + type_check = has_ikey ? + MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY : + MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE; + + /* Linux demuxes tunnels based on packet SIP (which must match tunnel + * remote IP). Thus configure decap so that it filters out packets that + * are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is + * generated for packets that fail this criterion. Linux then handles + * such packets in slow path and generates ICMP destination unreachable. + */ + daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev)); + mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index, + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4, + type_check, has_ikey, daddr4, ikey); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); +} + +static int +mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp, + u32 dip, u8 prefix_len, u16 ul_vr_id, + enum mlxsw_reg_ralue_op op, + u32 tunnel_index) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op, + ul_vr_id, prefix_len, dip); + mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + enum mlxsw_reg_ralue_op op, + u32 tunnel_index) +{ + u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb); + __be32 dip; + int err; + + err = mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(mlxsw_sp, tunnel_index, + ipip_entry); + if (err) + return err; + + dip = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, + ipip_entry->ol_dev).addr4; + return mlxsw_sp_ipip_fib_entry_op_gre4_ralue(mlxsw_sp, be32_to_cpu(dip), + 32, ul_vr_id, op, + tunnel_index); +} + +static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev); + union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev); + union mlxsw_sp_l3addr naddr = {0}; + + /* Tunnels with unset local or remote address are valid in Linux and + * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access + * (NBMA) tunnels. In principle these can be offloaded, but the driver + * currently doesn't support this. So punt. + */ + return memcmp(&saddr, &naddr, sizeof(naddr)) && + memcmp(&daddr, &naddr, sizeof(naddr)); +} + +static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev, + enum mlxsw_sp_l3proto ol_proto) +{ + struct ip_tunnel *tunnel = netdev_priv(ol_dev); + __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */ + bool inherit_ttl = tunnel->parms.iph.ttl == 0; + bool inherit_tos = tunnel->parms.iph.tos & 0x1; + + return (tunnel->parms.i_flags & ~okflags) == 0 && + (tunnel->parms.o_flags & ~okflags) == 0 && + inherit_ttl && inherit_tos && + mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev); +} + +static struct mlxsw_sp_rif_ipip_lb_config +mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; + + lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ? + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP : + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP; + return (struct mlxsw_sp_rif_ipip_lb_config){ + .lb_ipipt = lb_ipipt, + .okey = mlxsw_sp_ipip_netdev_okey(ol_dev), + .ul_protocol = MLXSW_SP_L3_PROTO_IPV4, + .saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, + ol_dev), + }; +} + +static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { + .dev_type = ARPHRD_IPGRE, + .ul_proto = MLXSW_SP_L3_PROTO_IPV4, + .nexthop_update = mlxsw_sp_ipip_nexthop_update_gre4, + .fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4, + .can_offload = mlxsw_sp_ipip_can_offload_gre4, + .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4, +}; + +const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = { + [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h new file mode 100644 index 000000000000..1c2db831d83b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -0,0 +1,79 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_IPIP_H_ +#define _MLXSW_IPIP_H_ + +#include "spectrum_router.h" +#include <net/ip_fib.h> + +enum mlxsw_sp_ipip_type { + MLXSW_SP_IPIP_TYPE_GRE4, + MLXSW_SP_IPIP_TYPE_MAX, +}; + +struct mlxsw_sp_ipip_entry { + enum mlxsw_sp_ipip_type ipipt; + struct net_device *ol_dev; /* Overlay. */ + struct mlxsw_sp_rif_ipip_lb *ol_lb; + unsigned int ref_count; /* Number of next hops using the tunnel. */ + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct list_head ipip_list_node; +}; + +struct mlxsw_sp_ipip_ops { + int dev_type; + enum mlxsw_sp_l3proto ul_proto; /* Underlay. */ + + int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry); + + bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev, + enum mlxsw_sp_l3proto ol_proto); + + /* Return a configuration for creating an overlay loopback RIF. */ + struct mlxsw_sp_rif_ipip_lb_config + (*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev); + + int (*fib_entry_op)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + enum mlxsw_reg_ralue_op op, + u32 tunnel_index); +}; + +extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[]; + +#endif /* _MLXSW_IPIP_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0744452a0b18..5189022a1c8c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1,9 +1,10 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c - * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,35 +43,119 @@ #include <linux/notifier.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> +#include <linux/if_bridge.h> +#include <linux/socket.h> +#include <linux/route.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> #include <net/ip_fib.h> +#include <net/ip6_fib.h> #include <net/fib_rules.h> +#include <net/ip_tunnels.h> #include <net/l3mdev.h> +#include <net/addrconf.h> +#include <net/ndisc.h> +#include <net/ipv6.h> +#include <net/fib_notifier.h> #include "spectrum.h" #include "core.h" #include "reg.h" #include "spectrum_cnt.h" #include "spectrum_dpipe.h" +#include "spectrum_ipip.h" #include "spectrum_router.h" +struct mlxsw_sp_vr; +struct mlxsw_sp_lpm_tree; +struct mlxsw_sp_rif_ops; + +struct mlxsw_sp_router { + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif **rifs; + struct mlxsw_sp_vr *vrs; + struct rhashtable neigh_ht; + struct rhashtable nexthop_group_ht; + struct rhashtable nexthop_ht; + struct { + struct mlxsw_sp_lpm_tree *trees; + unsigned int tree_count; + } lpm; + struct { + struct delayed_work dw; + unsigned long interval; /* ms */ + } neighs_update; + struct delayed_work nexthop_probe_dw; +#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ + struct list_head nexthop_neighs_list; + struct list_head ipip_list; + bool aborted; + struct notifier_block fib_nb; + const struct mlxsw_sp_rif_ops **rif_ops_arr; + const struct mlxsw_sp_ipip_ops **ipip_ops_arr; +}; + struct mlxsw_sp_rif { struct list_head nexthop_list; struct list_head neigh_list; struct net_device *dev; - struct mlxsw_sp_fid *f; + struct mlxsw_sp_fid *fid; unsigned char addr[ETH_ALEN]; int mtu; u16 rif_index; u16 vr_id; + const struct mlxsw_sp_rif_ops *ops; + struct mlxsw_sp *mlxsw_sp; + unsigned int counter_ingress; bool counter_ingress_valid; unsigned int counter_egress; bool counter_egress_valid; }; +struct mlxsw_sp_rif_params { + struct net_device *dev; + union { + u16 system_port; + u16 lag_id; + }; + u16 vid; + bool lag; +}; + +struct mlxsw_sp_rif_subport { + struct mlxsw_sp_rif common; + union { + u16 system_port; + u16 lag_id; + }; + u16 vid; + bool lag; +}; + +struct mlxsw_sp_rif_ipip_lb { + struct mlxsw_sp_rif common; + struct mlxsw_sp_rif_ipip_lb_config lb_config; + u16 ul_vr_id; /* Reserved for Spectrum-2. */ +}; + +struct mlxsw_sp_rif_params_ipip_lb { + struct mlxsw_sp_rif_params common; + struct mlxsw_sp_rif_ipip_lb_config lb_config; +}; + +struct mlxsw_sp_rif_ops { + enum mlxsw_sp_rif_type type; + size_t rif_size; + + void (*setup)(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params); + int (*configure)(struct mlxsw_sp_rif *rif); + void (*deconfigure)(struct mlxsw_sp_rif *rif); + struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif); +}; + static unsigned int * mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir) @@ -219,25 +304,37 @@ void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rif_counter_valid_set(rif, dir, false); } +static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct devlink *devlink; + + devlink = priv_to_devlink(mlxsw_sp->core); + if (!devlink_dpipe_table_counter_enabled(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) + return; + mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); +} + +static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + + mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); +} + static struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); -#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ - for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) +#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1) -static bool -mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1, - struct mlxsw_sp_prefix_usage *prefix_usage2) -{ - unsigned char prefix; +struct mlxsw_sp_prefix_usage { + DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT); +}; - mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) { - if (!test_bit(prefix, prefix_usage2->b)) - return false; - } - return true; -} +#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ + for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) static bool mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, @@ -284,9 +381,18 @@ enum mlxsw_sp_fib_entry_type { MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, MLXSW_SP_FIB_ENTRY_TYPE_TRAP, + + /* This is a special case of local delivery, where a packet should be + * decapsulated on reception. Note that there is no corresponding ENCAP, + * because that's a type of next hop, not of FIB entry. (There can be + * several next hops in a REMOTE entry, and some of them may be + * encapsulating entries.) + */ + MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP, }; struct mlxsw_sp_nexthop_group; +struct mlxsw_sp_fib; struct mlxsw_sp_fib_node { struct list_head entry_list; @@ -296,11 +402,9 @@ struct mlxsw_sp_fib_node { struct mlxsw_sp_fib_key key; }; -struct mlxsw_sp_fib_entry_params { - u32 tb_id; - u32 prio; - u8 tos; - u8 type; +struct mlxsw_sp_fib_entry_decap { + struct mlxsw_sp_ipip_entry *ipip_entry; + u32 tunnel_index; }; struct mlxsw_sp_fib_entry { @@ -309,8 +413,33 @@ struct mlxsw_sp_fib_entry { enum mlxsw_sp_fib_entry_type type; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; - struct mlxsw_sp_fib_entry_params params; - bool offloaded; + struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */ +}; + +struct mlxsw_sp_fib4_entry { + struct mlxsw_sp_fib_entry common; + u32 tb_id; + u32 prio; + u8 tos; + u8 type; +}; + +struct mlxsw_sp_fib6_entry { + struct mlxsw_sp_fib_entry common; + struct list_head rt6_list; + unsigned int nrt6; +}; + +struct mlxsw_sp_rt6 { + struct list_head list; + struct rt6_info *rt; +}; + +struct mlxsw_sp_lpm_tree { + u8 id; /* tree ID */ + unsigned int ref_count; + enum mlxsw_sp_l3proto proto; + struct mlxsw_sp_prefix_usage prefix_usage; }; struct mlxsw_sp_fib { @@ -323,6 +452,14 @@ struct mlxsw_sp_fib { enum mlxsw_sp_l3proto proto; }; +struct mlxsw_sp_vr { + u16 id; /* virtual router ID */ + u32 tb_id; /* kernel fib table id */ + unsigned int rif_count; + struct mlxsw_sp_fib *fib4; + struct mlxsw_sp_fib *fib6; +}; + static const struct rhashtable_params mlxsw_sp_fib_ht_params; static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr, @@ -361,8 +498,8 @@ mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp) static struct mlxsw_sp_lpm_tree *lpm_tree; int i; - for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) { - lpm_tree = &mlxsw_sp->router.lpm.trees[i]; + for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router->lpm.trees[i]; if (lpm_tree->ref_count == 0) return lpm_tree; } @@ -380,15 +517,15 @@ static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } -static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_lpm_tree *lpm_tree) +static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) { char ralta_pl[MLXSW_REG_RALTA_LEN]; mlxsw_reg_ralta_pack(ralta_pl, false, (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, lpm_tree->id); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } static int @@ -444,10 +581,10 @@ err_left_struct_set: return ERR_PTR(err); } -static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_lpm_tree *lpm_tree) +static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) { - return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); + mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); } static struct mlxsw_sp_lpm_tree * @@ -458,33 +595,30 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lpm_tree *lpm_tree; int i; - for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) { - lpm_tree = &mlxsw_sp->router.lpm.trees[i]; + for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router->lpm.trees[i]; if (lpm_tree->ref_count != 0 && lpm_tree->proto == proto && mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, prefix_usage)) - goto inc_ref_count; + return lpm_tree; } - lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, - proto); - if (IS_ERR(lpm_tree)) - return lpm_tree; + return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto); +} -inc_ref_count: +static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree) +{ lpm_tree->ref_count++; - return lpm_tree; } -static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_lpm_tree *lpm_tree) +static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) { if (--lpm_tree->ref_count == 0) - return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); - return 0; + mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); } -#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */ +#define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */ static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) { @@ -496,15 +630,15 @@ static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) return -EIO; max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES); - mlxsw_sp->router.lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN; - mlxsw_sp->router.lpm.trees = kcalloc(mlxsw_sp->router.lpm.tree_count, + mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN; + mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count, sizeof(struct mlxsw_sp_lpm_tree), GFP_KERNEL); - if (!mlxsw_sp->router.lpm.trees) + if (!mlxsw_sp->router->lpm.trees) return -ENOMEM; - for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) { - lpm_tree = &mlxsw_sp->router.lpm.trees[i]; + for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router->lpm.trees[i]; lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; } @@ -513,12 +647,12 @@ static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) { - kfree(mlxsw_sp->router.lpm.trees); + kfree(mlxsw_sp->router->lpm.trees); } static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) { - return !!vr->fib4; + return !!vr->fib4 || !!vr->fib6; } static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) @@ -527,7 +661,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) int i; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { - vr = &mlxsw_sp->router.vrs[i]; + vr = &mlxsw_sp->router->vrs[i]; if (!mlxsw_sp_vr_is_used(vr)) return vr; } @@ -535,13 +669,13 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) } static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib *fib) + const struct mlxsw_sp_fib *fib, u8 tree_id) { char raltb_pl[MLXSW_REG_RALTB_LEN]; mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, (enum mlxsw_reg_ralxx_protocol) fib->proto, - fib->lpm_tree->id); + tree_id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -573,7 +707,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, tb_id = mlxsw_sp_fix_tb_id(tb_id); for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { - vr = &mlxsw_sp->router.vrs[i]; + vr = &mlxsw_sp->router->vrs[i]; if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id) return vr; } @@ -587,7 +721,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, case MLXSW_SP_L3_PROTO_IPV4: return vr->fib4; case MLXSW_SP_L3_PROTO_IPV6: - BUG_ON(1); + return vr->fib6; } return NULL; } @@ -596,6 +730,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, u32 tb_id) { struct mlxsw_sp_vr *vr; + int err; vr = mlxsw_sp_vr_find_unused(mlxsw_sp); if (!vr) @@ -603,54 +738,26 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr->fib4)) return ERR_CAST(vr->fib4); + vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(vr->fib6)) { + err = PTR_ERR(vr->fib6); + goto err_fib6_create; + } vr->tb_id = tb_id; return vr; -} -static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) -{ +err_fib6_create: mlxsw_sp_fib_destroy(vr->fib4); vr->fib4 = NULL; + return ERR_PTR(err); } -static int -mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib, - struct mlxsw_sp_prefix_usage *req_prefix_usage) +static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) { - struct mlxsw_sp_lpm_tree *lpm_tree = fib->lpm_tree; - struct mlxsw_sp_lpm_tree *new_tree; - int err; - - if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage)) - return 0; - - new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, - fib->proto); - if (IS_ERR(new_tree)) { - /* We failed to get a tree according to the required - * prefix usage. However, the current tree might be still good - * for us if our requirement is subset of the prefixes used - * in the tree. - */ - if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, - &lpm_tree->prefix_usage)) - return 0; - return PTR_ERR(new_tree); - } - - /* Prevent packet loss by overwriting existing binding */ - fib->lpm_tree = new_tree; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib); - if (err) - goto err_tree_bind; - mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); - - return 0; - -err_tree_bind: - fib->lpm_tree = lpm_tree; - mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); - return err; + mlxsw_sp_fib_destroy(vr->fib6); + vr->fib6 = NULL; + mlxsw_sp_fib_destroy(vr->fib4); + vr->fib4 = NULL; } static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) @@ -666,10 +773,105 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) { - if (!vr->rif_count && list_empty(&vr->fib4->node_list)) + if (!vr->rif_count && list_empty(&vr->fib4->node_list) && + list_empty(&vr->fib6->node_list)) mlxsw_sp_vr_destroy(vr); } +static bool +mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto, u8 tree_id) +{ + struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto); + + if (!mlxsw_sp_vr_is_used(vr)) + return false; + if (fib->lpm_tree && fib->lpm_tree->id == tree_id) + return true; + return false; +} + +static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib, + struct mlxsw_sp_lpm_tree *new_tree) +{ + struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree; + int err; + + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); + if (err) + return err; + fib->lpm_tree = new_tree; + mlxsw_sp_lpm_tree_hold(new_tree); + mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree); + return 0; +} + +static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib, + struct mlxsw_sp_lpm_tree *new_tree) +{ + struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree; + enum mlxsw_sp_l3proto proto = fib->proto; + u8 old_id, new_id = new_tree->id; + struct mlxsw_sp_vr *vr; + int i, err; + + if (!old_tree) + goto no_replace; + old_id = old_tree->id; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + vr = &mlxsw_sp->router->vrs[i]; + if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id)) + continue; + err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp, + mlxsw_sp_vr_fib(vr, proto), + new_tree); + if (err) + goto err_tree_replace; + } + + return 0; + +err_tree_replace: + for (i--; i >= 0; i--) { + if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id)) + continue; + mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp, + mlxsw_sp_vr_fib(vr, proto), + old_tree); + } + return err; + +no_replace: + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); + if (err) + return err; + fib->lpm_tree = new_tree; + mlxsw_sp_lpm_tree_hold(new_tree); + return 0; +} + +static void +mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_l3proto proto, + struct mlxsw_sp_prefix_usage *req_prefix_usage) +{ + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; + struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto); + unsigned char prefix; + + if (!mlxsw_sp_vr_is_used(vr)) + continue; + mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage) + mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix); + } +} + static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_vr *vr; @@ -680,13 +882,13 @@ static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) return -EIO; max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); - mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr), - GFP_KERNEL); - if (!mlxsw_sp->router.vrs) + mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr), + GFP_KERNEL); + if (!mlxsw_sp->router->vrs) return -ENOMEM; for (i = 0; i < max_vrs; i++) { - vr = &mlxsw_sp->router.vrs[i]; + vr = &mlxsw_sp->router->vrs[i]; vr->id = i; } @@ -706,7 +908,375 @@ static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) */ mlxsw_core_flush_owq(); mlxsw_sp_router_fib_flush(mlxsw_sp); - kfree(mlxsw_sp->router.vrs); + kfree(mlxsw_sp->router->vrs); +} + +static struct net_device * +__mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + struct net *net = dev_net(ol_dev); + + return __dev_get_by_index(net, tun->parms.link); +} + +static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) +{ + struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + + if (d) + return l3mdev_fib_table(d) ? : RT_TABLE_MAIN; + else + return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_rif_params *params); + +static struct mlxsw_sp_rif_ipip_lb * +mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) +{ + struct mlxsw_sp_rif_params_ipip_lb lb_params; + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_rif *rif; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; + lb_params = (struct mlxsw_sp_rif_params_ipip_lb) { + .common.dev = ol_dev, + .common.lag = false, + .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev), + }; + + rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common); + if (IS_ERR(rif)) + return ERR_CAST(rif); + return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common); +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_ipip_entry *ret = NULL; + + ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL); + if (!ipip_entry) + return ERR_PTR(-ENOMEM); + + ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt, + ol_dev); + if (IS_ERR(ipip_entry->ol_lb)) { + ret = ERR_CAST(ipip_entry->ol_lb); + goto err_ol_ipip_lb_create; + } + + ipip_entry->ipipt = ipipt; + ipip_entry->ol_dev = ol_dev; + + return ipip_entry; + +err_ol_ipip_lb_create: + kfree(ipip_entry); + return ret; +} + +static void +mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp_ipip_entry *ipip_entry) +{ + WARN_ON(ipip_entry->ref_count > 0); + mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); + kfree(ipip_entry); +} + +static __be32 +mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return tun->parms.iph.saddr; +} + +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + }; + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + +__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return tun->parms.iph.daddr; +} + +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + }; + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + +static bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, + const union mlxsw_sp_l3addr *addr2) +{ + return !memcmp(addr1, addr2, sizeof(*addr1)); +} + +static bool +mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp, + const enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr saddr, + u32 ul_tb_id, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); + enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt; + union mlxsw_sp_l3addr tun_saddr; + + if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto) + return false; + + tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev); + return tun_ul_tb_id == ul_tb_id && + mlxsw_sp_l3addr_eq(&tun_saddr, &saddr); +} + +static int +mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 tunnel_index; + int err; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index); + if (err) + return err; + + ipip_entry->decap_fib_entry = fib_entry; + fib_entry->decap.ipip_entry = ipip_entry; + fib_entry->decap.tunnel_index = tunnel_index; + return 0; +} + +static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + /* Unlink this node from the IPIP entry that it's the decap entry of. */ + fib_entry->decap.ipip_entry->decap_fib_entry = NULL; + fib_entry->decap.ipip_entry = NULL; + mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index); +} + +static struct mlxsw_sp_fib_node * +mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len); +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry); + +static void +mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry; + + mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry); + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +} + +static void +mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct mlxsw_sp_fib_entry *decap_fib_entry) +{ + if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry, + ipip_entry)) + return; + decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; + + if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry)) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); +} + +/* Given an IPIP entry, find the corresponding decap route. */ +static struct mlxsw_sp_fib_entry * +mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + static struct mlxsw_sp_fib_node *fib_node; + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_fib_entry *fib_entry; + unsigned char saddr_prefix_len; + union mlxsw_sp_l3addr saddr; + struct mlxsw_sp_fib *ul_fib; + struct mlxsw_sp_vr *ul_vr; + const void *saddrp; + size_t saddr_len; + u32 ul_tb_id; + u32 saddr4; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + + ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); + ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id); + if (!ul_vr) + return NULL; + + ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto); + saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto, + ipip_entry->ol_dev); + + switch (ipip_ops->ul_proto) { + case MLXSW_SP_L3_PROTO_IPV4: + saddr4 = be32_to_cpu(saddr.addr4); + saddrp = &saddr4; + saddr_len = 4; + saddr_prefix_len = 32; + break; + case MLXSW_SP_L3_PROTO_IPV6: + WARN_ON(1); + return NULL; + } + + fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len, + saddr_prefix_len); + if (!fib_node || list_empty(&fib_node->entry_list)) + return NULL; + + fib_entry = list_first_entry(&fib_node->entry_list, + struct mlxsw_sp_fib_entry, list); + if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) + return NULL; + + return fib_entry; +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) +{ + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr saddr; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + if (ipip_entry->ol_dev == ol_dev) + goto inc_ref_count; + + /* The configuration where several tunnels have the same local + * address in the same underlay table needs special treatment in + * the HW. That is currently not implemented in the driver. + */ + ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); + if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, + ul_tb_id, ipip_entry)) + return ERR_PTR(-EEXIST); + } + + ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev); + if (IS_ERR(ipip_entry)) + return ipip_entry; + + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); + + list_add_tail(&ipip_entry->ipip_list_node, + &mlxsw_sp->router->ipip_list); + +inc_ref_count: + ++ipip_entry->ref_count; + return ipip_entry; +} + +static void +mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + if (--ipip_entry->ref_count == 0) { + list_del(&ipip_entry->ipip_list_node); + if (ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); + mlxsw_sp_ipip_entry_destroy(ipip_entry); + } +} + +static bool +mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ul_dev, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr ul_dip, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN; + enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt; + struct net_device *ipip_ul_dev; + + if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto) + return false; + + ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip, + ul_tb_id, ipip_entry) && + (!ipip_ul_dev || ipip_ul_dev == ul_dev); +} + +/* Given decap parameters, find the corresponding IPIP entry. */ +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ul_dev, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr ul_dip) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) + if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev, + ul_proto, ul_dip, + ipip_entry)) + return ipip_entry; + + return NULL; } struct mlxsw_sp_neigh_key { @@ -724,6 +1294,8 @@ struct mlxsw_sp_neigh_entry { * this neigh entry */ struct list_head nexthop_neighs_list_node; + unsigned int counter_index; + bool counter_valid; }; static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { @@ -732,6 +1304,62 @@ static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { .key_len = sizeof(struct mlxsw_sp_neigh_key), }; +struct mlxsw_sp_neigh_entry * +mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!neigh_entry) { + if (list_empty(&rif->neigh_list)) + return NULL; + else + return list_first_entry(&rif->neigh_list, + typeof(*neigh_entry), + rif_list_node); + } + if (neigh_entry->rif_list_node.next == &rif->neigh_list) + return NULL; + return list_next_entry(neigh_entry, rif_list_node); +} + +int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return neigh_entry->key.n->tbl->family; +} + +unsigned char * +mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return neigh_entry->ha; +} + +u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n; + + n = neigh_entry->key.n; + return ntohl(*((__be32 *) n->primary_key)); +} + +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n; + + n = neigh_entry->key.n; + return (struct in6_addr *) &n->primary_key; +} + +int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + u64 *p_counter) +{ + if (!neigh_entry->counter_valid) + return -EINVAL; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index, + p_counter, NULL); +} + static struct mlxsw_sp_neigh_entry * mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, u16 rif) @@ -758,7 +1386,7 @@ static int mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry) { - return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, + return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht, &neigh_entry->ht_node, mlxsw_sp_neigh_ht_params); } @@ -767,11 +1395,58 @@ static void mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry) { - rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, + rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht, &neigh_entry->ht_node, mlxsw_sp_neigh_ht_params); } +static bool +mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct devlink *devlink; + const char *table_name; + + switch (mlxsw_sp_neigh_entry_type(neigh_entry)) { + case AF_INET: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4; + break; + case AF_INET6: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6; + break; + default: + WARN_ON(1); + return false; + } + + devlink = priv_to_devlink(mlxsw_sp->core); + return devlink_dpipe_table_counter_enabled(devlink, table_name); +} + +static void +mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry)) + return; + + if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index)) + return; + + neigh_entry->counter_valid = true; +} + +static void +mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!neigh_entry->counter_valid) + return; + mlxsw_sp_flow_counter_free(mlxsw_sp, + neigh_entry->counter_index); + neigh_entry->counter_valid = false; +} + static struct mlxsw_sp_neigh_entry * mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) { @@ -791,6 +1466,7 @@ mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) if (err) goto err_neigh_entry_insert; + mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry); list_add(&neigh_entry->rif_list_node, &rif->neigh_list); return neigh_entry; @@ -805,6 +1481,7 @@ mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry) { list_del(&neigh_entry->rif_list_node); + mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry); mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); mlxsw_sp_neigh_entry_free(neigh_entry); } @@ -815,16 +1492,23 @@ mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) struct mlxsw_sp_neigh_key key; key.n = n; - return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, + return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht, &key, mlxsw_sp_neigh_ht_params); } static void mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) { - unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + unsigned long interval; - mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval); +#if IS_ENABLED(CONFIG_IPV6) + interval = min_t(unsigned long, + NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME), + NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME)); +#else + interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); +#endif + mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval); } static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, @@ -839,13 +1523,13 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); - if (!mlxsw_sp->rifs[rif]) { + if (!mlxsw_sp->router->rifs[rif]) { dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); return; } dipn = htonl(dip); - dev = mlxsw_sp->rifs[rif]->dev; + dev = mlxsw_sp->router->rifs[rif]->dev; n = neigh_lookup(&arp_tbl, &dipn, dev); if (!n) { netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", @@ -858,6 +1542,44 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } +#if IS_ENABLED(CONFIG_IPV6) +static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + struct net_device *dev; + struct neighbour *n; + struct in6_addr dip; + u16 rif; + + mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif, + (char *) &dip); + + if (!mlxsw_sp->router->rifs[rif]) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); + return; + } + + dev = mlxsw_sp->router->rifs[rif]->dev; + n = neigh_lookup(&nd_tbl, &dip, dev); + if (!n) { + netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n", + &dip); + return; + } + + netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip); + neigh_event_send(n, NULL); + neigh_release(n); +} +#else +static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ +} +#endif + static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int rec_index) @@ -881,6 +1603,15 @@ static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, } +static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + /* One record contains one entry. */ + mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); +} + static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int rec_index) { @@ -890,7 +1621,8 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, rec_index); break; case MLXSW_REG_RAUHTD_TYPE_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); break; } } @@ -915,22 +1647,20 @@ static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) return false; } -static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +static int +__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + enum mlxsw_reg_rauhtd_type type) { - char *rauhtd_pl; - u8 num_rec; - int i, err; - - rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); - if (!rauhtd_pl) - return -ENOMEM; + int i, num_rec; + int err; /* Make sure the neighbour's netdev isn't removed in the * process. */ rtnl_lock(); do { - mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); + mlxsw_reg_rauhtd_pack(rauhtd_pl, type); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), rauhtd_pl); if (err) { @@ -944,6 +1674,27 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); rtnl_unlock(); + return err; +} + +static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_rauhtd_type type; + char *rauhtd_pl; + int err; + + rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); + if (!rauhtd_pl) + return -ENOMEM; + + type = MLXSW_REG_RAUHTD_TYPE_IPV4; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); + if (err) + goto out; + + type = MLXSW_REG_RAUHTD_TYPE_IPV6; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); +out: kfree(rauhtd_pl); return err; } @@ -954,7 +1705,7 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) /* Take RTNL mutex here to prevent lists from changes */ rtnl_lock(); - list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, + list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list, nexthop_neighs_list_node) /* If this neigh have nexthops, make the kernel think this neigh * is active regardless of the traffic. @@ -966,33 +1717,35 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) static void mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp) { - unsigned long interval = mlxsw_sp->router.neighs_update.interval; + unsigned long interval = mlxsw_sp->router->neighs_update.interval; - mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, + mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, msecs_to_jiffies(interval)); } static void mlxsw_sp_router_neighs_update_work(struct work_struct *work) { - struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, - router.neighs_update.dw.work); + struct mlxsw_sp_router *router; int err; - err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp); + router = container_of(work, struct mlxsw_sp_router, + neighs_update.dw.work); + err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp); if (err) - dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); + dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); - mlxsw_sp_router_neighs_update_nh(mlxsw_sp); + mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp); - mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp); + mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp); } static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) { struct mlxsw_sp_neigh_entry *neigh_entry; - struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, - router.nexthop_probe_dw.work); + struct mlxsw_sp_router *router; + router = container_of(work, struct mlxsw_sp_router, + nexthop_probe_dw.work); /* Iterate over nexthop neighbours, find those who are unresolved and * send arp on them. This solves the chicken-egg problem when * the nexthop wouldn't get offloaded until the neighbor is resolved @@ -1002,13 +1755,13 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) * Take RTNL mutex here to prevent lists from changes. */ rtnl_lock(); - list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, + list_for_each_entry(neigh_entry, &router->nexthop_neighs_list, nexthop_neighs_list_node) if (!neigh_entry->connected) neigh_event_send(neigh_entry->key.n, NULL); rtnl_unlock(); - mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, + mlxsw_core_schedule_dw(&router->nexthop_probe_dw, MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); } @@ -1034,10 +1787,44 @@ mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, dip); + if (neigh_entry->counter_valid) + mlxsw_reg_rauht_pack_counter(rauht_pl, + neigh_entry->counter_index); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); } static void +mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + enum mlxsw_reg_rauht_op op) +{ + struct neighbour *n = neigh_entry->key.n; + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + const char *dip = n->primary_key; + + mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, + dip); + if (neigh_entry->counter_valid) + mlxsw_reg_rauht_pack_counter(rauht_pl, + neigh_entry->counter_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n = neigh_entry->key.n; + + /* Packets with a link-local destination address are trapped + * after LPM lookup and never reach the neighbour table, so + * there is no need to program such neighbours to the device. + */ + if (ipv6_addr_type((struct in6_addr *) &n->primary_key) & + IPV6_ADDR_LINKLOCAL) + return true; + return false; +} + +static void mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, bool adding) @@ -1045,11 +1832,29 @@ mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, if (!adding && !neigh_entry->connected) return; neigh_entry->connected = adding; - if (neigh_entry->key.n->tbl == &arp_tbl) + if (neigh_entry->key.n->tbl->family == AF_INET) { mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, mlxsw_sp_rauht_op(adding)); - else + } else if (neigh_entry->key.n->tbl->family == AF_INET6) { + if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + return; + mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry, + mlxsw_sp_rauht_op(adding)); + } else { WARN_ON_ONCE(1); + } +} + +void +mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool adding) +{ + if (adding) + mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry); + else + mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true); } struct mlxsw_sp_neigh_event_work { @@ -1118,7 +1923,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, p = ptr; /* We don't care about changes in the default table. */ - if (!p->dev || p->tbl != &arp_tbl) + if (!p->dev || (p->tbl->family != AF_INET && + p->tbl->family != AF_INET6)) return NOTIFY_DONE; /* We are in atomic context and can't take RTNL mutex, @@ -1130,14 +1936,14 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, mlxsw_sp = mlxsw_sp_port->mlxsw_sp; interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); - mlxsw_sp->router.neighs_update.interval = interval; + mlxsw_sp->router->neighs_update.interval = interval; mlxsw_sp_port_dev_put(mlxsw_sp_port); break; case NETEVENT_NEIGH_UPDATE: n = ptr; - if (n->tbl != &arp_tbl) + if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6) return NOTIFY_DONE; mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); @@ -1171,7 +1977,7 @@ static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) { int err; - err = rhashtable_init(&mlxsw_sp->router.neigh_ht, + err = rhashtable_init(&mlxsw_sp->router->neigh_ht, &mlxsw_sp_neigh_ht_params); if (err) return err; @@ -1182,30 +1988,20 @@ static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); /* Create the delayed works for the activity_update */ - INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw, + INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw, mlxsw_sp_router_neighs_update_work); - INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw, + INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw, mlxsw_sp_router_probe_unresolved_nexthops); - mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0); - mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0); + mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0); + mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0); return 0; } static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) { - cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw); - cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw); - rhashtable_destroy(&mlxsw_sp->router.neigh_ht); -} - -static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif *rif) -{ - char rauht_pl[MLXSW_REG_RAUHT_LEN]; - - mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, - rif->rif_index, rif->addr); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); + cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw); + cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw); + rhashtable_destroy(&mlxsw_sp->router->neigh_ht); } static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, @@ -1213,12 +2009,18 @@ static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; - mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif); list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, - rif_list_node) + rif_list_node) { + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false); mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + } } +enum mlxsw_sp_nexthop_type { + MLXSW_SP_NEXTHOP_TYPE_ETH, + MLXSW_SP_NEXTHOP_TYPE_IPIP, +}; + struct mlxsw_sp_nexthop_key { struct fib_nh *fib_nh; }; @@ -1231,6 +2033,8 @@ struct mlxsw_sp_nexthop { */ struct rhash_head ht_node; struct mlxsw_sp_nexthop_key key; + unsigned char gw_addr[sizeof(struct in6_addr)]; + int ifindex; struct mlxsw_sp_rif *rif; u8 should_offload:1, /* set indicates this neigh is connected and * should be put to KVD linear area of this group. @@ -1241,17 +2045,18 @@ struct mlxsw_sp_nexthop { update:1; /* set indicates that MAC of this neigh should be * updated in HW */ - struct mlxsw_sp_neigh_entry *neigh_entry; -}; - -struct mlxsw_sp_nexthop_group_key { - struct fib_info *fi; + enum mlxsw_sp_nexthop_type type; + union { + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + }; }; struct mlxsw_sp_nexthop_group { + void *priv; struct rhash_head ht_node; struct list_head fib_list; /* list of fib entries that use this group */ - struct mlxsw_sp_nexthop_group_key key; + struct neigh_table *neigh_tbl; u8 adj_index_valid:1, gateway:1; /* routes using the group use a gateway */ u32 adj_index; @@ -1261,16 +2066,155 @@ struct mlxsw_sp_nexthop_group { #define nh_rif nexthops[0].rif }; +static struct fib_info * +mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp) +{ + return nh_grp->priv; +} + +struct mlxsw_sp_nexthop_group_cmp_arg { + enum mlxsw_sp_l3proto proto; + union { + struct fib_info *fi; + struct mlxsw_sp_fib6_entry *fib6_entry; + }; +}; + +static bool +mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp, + const struct in6_addr *gw, int ifindex) +{ + int i; + + for (i = 0; i < nh_grp->count; i++) { + const struct mlxsw_sp_nexthop *nh; + + nh = &nh_grp->nexthops[i]; + if (nh->ifindex == ifindex && + ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr)) + return true; + } + + return false; +} + +static bool +mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + if (nh_grp->count != fib6_entry->nrt6) + return false; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct in6_addr *gw; + int ifindex; + + ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex; + gw = &mlxsw_sp_rt6->rt->rt6i_gateway; + if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex)) + return false; + } + + return true; +} + +static int +mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr) +{ + const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key; + const struct mlxsw_sp_nexthop_group *nh_grp = ptr; + + switch (cmp_arg->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp); + case MLXSW_SP_L3_PROTO_IPV6: + return !mlxsw_sp_nexthop6_group_cmp(nh_grp, + cmp_arg->fib6_entry); + default: + WARN_ON(1); + return 1; + } +} + +static int +mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp) +{ + return nh_grp->neigh_tbl->family; +} + +static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct mlxsw_sp_nexthop_group *nh_grp = data; + const struct mlxsw_sp_nexthop *nh; + struct fib_info *fi; + unsigned int val; + int i; + + switch (mlxsw_sp_nexthop_group_type(nh_grp)) { + case AF_INET: + fi = mlxsw_sp_nexthop4_group_fi(nh_grp); + return jhash(&fi, sizeof(fi), seed); + case AF_INET6: + val = nh_grp->count; + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + val ^= nh->ifindex; + } + return jhash(&val, sizeof(val), seed); + default: + WARN_ON(1); + return 0; + } +} + +static u32 +mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) +{ + unsigned int val = fib6_entry->nrt6; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + struct net_device *dev; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + dev = mlxsw_sp_rt6->rt->dst.dev; + val ^= dev->ifindex; + } + + return jhash(&val, sizeof(val), seed); +} + +static u32 +mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed) +{ + const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data; + + switch (cmp_arg->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed); + case MLXSW_SP_L3_PROTO_IPV6: + return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed); + default: + WARN_ON(1); + return 0; + } +} + static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { - .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key), .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node), - .key_len = sizeof(struct mlxsw_sp_nexthop_group_key), + .hashfn = mlxsw_sp_nexthop_group_hash, + .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj, + .obj_cmpfn = mlxsw_sp_nexthop_group_cmp, }; static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht, + if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && + !nh_grp->gateway) + return 0; + + return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht, &nh_grp->ht_node, mlxsw_sp_nexthop_group_ht_params); } @@ -1278,16 +2222,38 @@ static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht, + if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && + !nh_grp->gateway) + return; + + rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht, &nh_grp->ht_node, mlxsw_sp_nexthop_group_ht_params); } static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group_key key) +mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp, + struct fib_info *fi) { - return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key, + struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; + + cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4; + cmp_arg.fi = fi; + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, + &cmp_arg, + mlxsw_sp_nexthop_group_ht_params); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; + + cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6; + cmp_arg.fib6_entry = fib6_entry; + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, + &cmp_arg, mlxsw_sp_nexthop_group_ht_params); } @@ -1300,14 +2266,14 @@ static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = { static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht, + return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node, mlxsw_sp_nexthop_ht_params); } static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node, + rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node, mlxsw_sp_nexthop_ht_params); } @@ -1315,7 +2281,7 @@ static struct mlxsw_sp_nexthop * mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_key key) { - return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key, + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key, mlxsw_sp_nexthop_ht_params); } @@ -1364,15 +2330,26 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, char ratr_pl[MLXSW_REG_RATR_LEN]; mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, - true, adj_index, neigh_entry->rif); + true, MLXSW_REG_RATR_TYPE_ETHERNET, + adj_index, neigh_entry->rif); mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); } +static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt]; + return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry); +} + static int -mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - bool reallocate) +mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + bool reallocate) { u32 adj_index = nh_grp->adj_index; /* base */ struct mlxsw_sp_nexthop *nh; @@ -1388,8 +2365,16 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, } if (nh->update || reallocate) { - err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, - adj_index, nh); + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + err = mlxsw_sp_nexthop_mac_update + (mlxsw_sp, adj_index, nh); + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + err = mlxsw_sp_nexthop_ipip_update + (mlxsw_sp, adj_index, nh); + break; + } if (err) return err; nh->update = 0; @@ -1400,8 +2385,9 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry); +static bool +mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, + const struct mlxsw_sp_fib_entry *fib_entry); static int mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, @@ -1411,6 +2397,9 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, int err; list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node, + fib_entry)) + continue; err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); if (err) return err; @@ -1419,6 +2408,24 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, } static void +mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op, int err); + +static void +mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp) +{ + enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE; + struct mlxsw_sp_fib_entry *fib_entry; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node, + fib_entry)) + continue; + mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); + } +} + +static void mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { @@ -1440,7 +2447,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - if (nh->should_offload ^ nh->offloaded) { + if (nh->should_offload != nh->offloaded) { offload_change = true; if (nh->should_offload) nh->update = 1; @@ -1452,8 +2459,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, /* Nothing was added or removed, so no need to reallocate. Just * update MAC on existing adjacency indexes. */ - err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, - false); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; @@ -1480,7 +2486,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, nh_grp->adj_index_valid = 1; nh_grp->adj_index = adj_index; nh_grp->ecmp_size = ecmp_size; - err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; @@ -1505,6 +2511,10 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); goto set_trap; } + + /* Offload state within the group changed, so update the flags. */ + mlxsw_sp_nexthop_fib_entries_refresh(nh_grp); + return; set_trap: @@ -1524,9 +2534,9 @@ set_trap: static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, bool removing) { - if (!removing && !nh->should_offload) + if (!removing) nh->should_offload = 1; - else if (removing && nh->offloaded) + else if (nh->offloaded) nh->should_offload = 0; nh->update = 1; } @@ -1568,7 +2578,6 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry; - struct fib_nh *fib_nh = nh->key.fib_nh; struct neighbour *n; u8 nud_state, dead; int err; @@ -1577,13 +2586,14 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, return 0; /* Take a reference of neigh here ensuring that neigh would - * not be detructed before the nexthop entry is finished. + * not be destructed before the nexthop entry is finished. * The reference is taken either in neigh_lookup() or * in neigh_create() in case n is not found. */ - n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); + n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (!n) { - n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); + n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, + nh->rif->dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -1602,7 +2612,7 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, */ if (list_empty(&neigh_entry->nexthop_list)) list_add_tail(&neigh_entry->nexthop_neighs_list_node, - &mlxsw_sp->router.nexthop_neighs_list); + &mlxsw_sp->router->nexthop_neighs_list); nh->neigh_entry = neigh_entry; list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); @@ -1645,18 +2655,136 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } -static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - struct mlxsw_sp_nexthop *nh, - struct fib_nh *fib_nh) +static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev, + enum mlxsw_sp_ipip_type *p_type) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; + enum mlxsw_sp_ipip_type ipipt; + + for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { + ipip_ops = router->ipip_ops_arr[ipipt]; + if (dev->type == ipip_ops->dev_type) { + if (p_type) + *p_type = ipipt; + return true; + } + } + return false; +} + +static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct mlxsw_sp_nexthop *nh, + struct net_device *ol_dev) +{ + if (!nh->nh_grp->gateway || nh->ipip_entry) + return 0; + + nh->ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev); + if (IS_ERR(nh->ipip_entry)) + return PTR_ERR(nh->ipip_entry); + + __mlxsw_sp_nexthop_neigh_update(nh, false); + return 0; +} + +static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry; + + if (!ipip_entry) + return; + + __mlxsw_sp_nexthop_neigh_update(nh, true); + mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry); + nh->ipip_entry = NULL; +} + +static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct fib_nh *fib_nh, + enum mlxsw_sp_ipip_type *p_ipipt) { struct net_device *dev = fib_nh->nh_dev; - struct in_device *in_dev; + + return dev && + fib_nh->nh_parent->fib_type == RTN_UNICAST && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt); +} + +static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_rif_fini(nh); + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); + break; + } +} + +static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct net_device *dev = fib_nh->nh_dev; + enum mlxsw_sp_ipip_type ipipt; struct mlxsw_sp_rif *rif; int err; + if (mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fib_nh, &ipipt) && + router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV4)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + if (err) + return err; + mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); + return 0; + } + + nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + + mlxsw_sp_nexthop_rif_init(nh, rif); + err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + if (err) + goto err_neigh_init; + + return 0; + +err_neigh_init: + mlxsw_sp_nexthop_rif_fini(nh); + return err; +} + +static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); +} + +static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct net_device *dev = fib_nh->nh_dev; + struct in_device *in_dev; + int err; + nh->nh_grp = nh_grp; nh->key.fib_nh = fib_nh; + memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw)); err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); if (err) return err; @@ -1669,12 +2797,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, fib_nh->nh_flags & RTNH_F_LINKDOWN) return 0; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) - return 0; - mlxsw_sp_nexthop_rif_init(nh, rif); - - err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); if (err) goto err_nexthop_neigh_init; @@ -1685,22 +2808,20 @@ err_nexthop_neigh_init: return err; } -static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } -static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, - unsigned long event, struct fib_nh *fib_nh) +static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, + unsigned long event, struct fib_nh *fib_nh) { struct mlxsw_sp_nexthop_key key; struct mlxsw_sp_nexthop *nh; - struct mlxsw_sp_rif *rif; - if (mlxsw_sp->router.aborted) + if (mlxsw_sp->router->aborted) return; key.fib_nh = fib_nh; @@ -1708,18 +2829,12 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, if (WARN_ON_ONCE(!nh)) return; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev); - if (!rif) - return; - switch (event) { case FIB_EVENT_NH_ADD: - mlxsw_sp_nexthop_rif_init(nh, rif); - mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); break; case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); break; } @@ -1732,14 +2847,20 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, *tmp; list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); } } +static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, + const struct fib_info *fi) +{ + return fi->fib_nh->nh_scope == RT_SCOPE_LINK || + mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL); +} + static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) { struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_nexthop *nh; @@ -1753,16 +2874,19 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) nh_grp = kzalloc(alloc_size, GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); + nh_grp->priv = fi; INIT_LIST_HEAD(&nh_grp->fib_list); - nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK; + nh_grp->neigh_tbl = &arp_tbl; + + nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi); nh_grp->count = fi->fib_nhs; - nh_grp->key.fi = fi; + fib_info_hold(fi); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; fib_nh = &fi->fib_nh[i]; - err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); + err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); if (err) - goto err_nexthop_init; + goto err_nexthop4_init; } err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); if (err) @@ -1771,18 +2895,19 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) return nh_grp; err_nexthop_group_insert: -err_nexthop_init: +err_nexthop4_init: for (i--; i >= 0; i--) { nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); } + fib_info_put(fi); kfree(nh_grp); return ERR_PTR(err); } static void -mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp) +mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) { struct mlxsw_sp_nexthop *nh; int i; @@ -1790,24 +2915,23 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); } mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); WARN_ON_ONCE(nh_grp->adj_index_valid); + fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp)); kfree(nh_grp); } -static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - struct fib_info *fi) +static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct fib_info *fi) { - struct mlxsw_sp_nexthop_group_key key; struct mlxsw_sp_nexthop_group *nh_grp; - key.fi = fi; - nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key); + nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi); if (!nh_grp) { - nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); + nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi); if (IS_ERR(nh_grp)) return PTR_ERR(nh_grp); } @@ -1816,15 +2940,25 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; list_del(&fib_entry->nexthop_group_node); if (!list_empty(&nh_grp->fib_list)) return; - mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); + mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp); +} + +static bool +mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib4_entry *fib4_entry; + + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + return !fib4_entry->tos; } static bool @@ -1832,29 +2966,133 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; - if (fib_entry->params.tos) - return false; + switch (fib_entry->fib_node->fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (!mlxsw_sp_fib4_entry_should_offload(fib_entry)) + return false; + break; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: return !!nh_group->adj_index_valid; case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: return !!nh_group->nh_rif; + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + return true; default: return false; } } -static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +static struct mlxsw_sp_nexthop * +mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_rt6 *mlxsw_sp_rt6) { - fib_entry->offloaded = true; + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + if (nh->rif && nh->rif->dev == rt->dst.dev && + ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, + &rt->rt6i_gateway)) + return nh; + continue; + } + + return NULL; +} +static void +mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + int i; + + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) { + nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; + return; + } + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + if (nh->offloaded) + nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; + else + nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, + common); + + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) { + list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, + list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + return; + } + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + struct mlxsw_sp_nexthop *nh; + + nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); + if (nh && nh->offloaded) + mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + else + mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, + common); + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +{ switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - fib_info_offload_inc(fib_entry->nh_group->key.fi); + mlxsw_sp_fib4_entry_offload_set(fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_entry_offload_set(fib_entry); + break; } } @@ -1863,13 +3101,12 @@ mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) { switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - fib_info_offload_dec(fib_entry->nh_group->key.fi); + mlxsw_sp_fib4_entry_offload_unset(fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_entry_offload_unset(fib_entry); + break; } - - fib_entry->offloaded = false; } static void @@ -1878,17 +3115,13 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, { switch (op) { case MLXSW_REG_RALUE_OP_WRITE_DELETE: - if (!fib_entry->offloaded) - return; return mlxsw_sp_fib_entry_offload_unset(fib_entry); case MLXSW_REG_RALUE_OP_WRITE_WRITE: if (err) return; - if (mlxsw_sp_fib_entry_should_offload(fib_entry) && - !fib_entry->offloaded) + if (mlxsw_sp_fib_entry_should_offload(fib_entry)) mlxsw_sp_fib_entry_offload_set(fib_entry); - else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) && - fib_entry->offloaded) + else if (!mlxsw_sp_fib_entry_should_offload(fib_entry)) mlxsw_sp_fib_entry_offload_unset(fib_entry); return; default: @@ -1896,13 +3129,37 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, } } -static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static void +mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl, + const struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { - char ralue_pl[MLXSW_REG_RALUE_LEN]; struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; + enum mlxsw_reg_ralxx_protocol proto; + u32 *p_dip; + + proto = (enum mlxsw_reg_ralxx_protocol) fib->proto; + + switch (fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + p_dip = (u32 *) fib_entry->fib_node->key.addr; + mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + *p_dip); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + fib_entry->fib_node->key.addr); + break; + } +} + +static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; @@ -1921,24 +3178,19 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif; - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; enum mlxsw_reg_ralue_trap_action trap_action; char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; u16 trap_id = 0; u16 rif_index = 0; @@ -1950,42 +3202,53 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int +mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry; + const struct mlxsw_sp_ipip_ops *ipip_ops; + + if (WARN_ON(!ipip_entry)) + return -EINVAL; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op, + fib_entry->decap.tunnel_index); +} + +static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: - return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, + fib_entry, op); } return -EINVAL; } @@ -1994,16 +3257,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { - int err = -EINVAL; + int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); - switch (fib_entry->fib_node->fib->proto) { - case MLXSW_SP_L3_PROTO_IPV4: - err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); - break; - case MLXSW_SP_L3_PROTO_IPV6: - return err; - } mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); + return err; } @@ -2026,11 +3283,23 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { + union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; + struct net_device *dev = fen_info->fi->fib_dev; + struct mlxsw_sp_ipip_entry *ipip_entry; struct fib_info *fi = fen_info->fi; switch (fen_info->type) { - case RTN_BROADCAST: /* fall through */ case RTN_LOCAL: + ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV4, dip); + if (ipip_entry) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; + return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, + fib_entry, + ipip_entry); + } + /* fall through */ + case RTN_BROADCAST: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; case RTN_UNREACHABLE: /* fall through */ @@ -2043,82 +3312,87 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; return 0; case RTN_UNICAST: - if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; - else + if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; return 0; default: return -EINVAL; } } -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, const struct fib_entry_notifier_info *fen_info) { + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_entry *fib_entry; int err; - fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); - if (!fib_entry) { - err = -ENOMEM; - goto err_fib_entry_alloc; - } + fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL); + if (!fib4_entry) + return ERR_PTR(-ENOMEM); + fib_entry = &fib4_entry->common; err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); if (err) goto err_fib4_entry_type_set; - err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi); + err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi); if (err) - goto err_nexthop_group_get; + goto err_nexthop4_group_get; - fib_entry->params.prio = fen_info->fi->fib_priority; - fib_entry->params.tb_id = fen_info->tb_id; - fib_entry->params.type = fen_info->type; - fib_entry->params.tos = fen_info->tos; + fib4_entry->prio = fen_info->fi->fib_priority; + fib4_entry->tb_id = fen_info->tb_id; + fib4_entry->type = fen_info->type; + fib4_entry->tos = fen_info->tos; fib_entry->fib_node = fib_node; - return fib_entry; + return fib4_entry; -err_nexthop_group_get: +err_nexthop4_group_get: err_fib4_entry_type_set: - kfree(fib_entry); -err_fib_entry_alloc: + kfree(fib4_entry); return ERR_PTR(err); } static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) + struct mlxsw_sp_fib4_entry *fib4_entry) { - mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); - kfree(fib_entry); + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + kfree(fib4_entry); } -static struct mlxsw_sp_fib_node * -mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info); - -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; - fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); - if (IS_ERR(fib_node)) + vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id); + if (!vr) return NULL; + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); - list_for_each_entry(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id == fen_info->tb_id && - fib_entry->params.tos == fen_info->tos && - fib_entry->params.type == fen_info->type && - fib_entry->nh_group->key.fi == fen_info->fi) { - return fib_entry; + fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len); + if (!fib_node) + return NULL; + + list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { + if (fib4_entry->tb_id == fen_info->tb_id && + fib4_entry->tos == fen_info->tos && + fib4_entry->type == fen_info->type && + mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) == + fen_info->fi) { + return fib4_entry; } } @@ -2191,6 +3465,53 @@ mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, struct mlxsw_sp_fib_entry, list) == fib_entry; } +static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } }; + struct mlxsw_sp_lpm_tree *lpm_tree; + int err; + + /* Since the tree is shared between all virtual routers we must + * make sure it contains all the required prefix lengths. This + * can be computed by either adding the new prefix length to the + * existing prefix usage of a bound tree, or by aggregating the + * prefix lengths across all virtual routers and adding the new + * one as well. + */ + if (fib->lpm_tree) + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, + &fib->lpm_tree->prefix_usage); + else + mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len); + + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + fib->proto); + if (IS_ERR(lpm_tree)) + return PTR_ERR(lpm_tree); + + if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id) + return 0; + + err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); + if (err) + return err; + + return 0; +} + +static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib) +{ + if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) + return; + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); + mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree); + fib->lpm_tree = NULL; +} + static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node) { unsigned char prefix_len = fib_node->key.prefix_len; @@ -2213,8 +3534,6 @@ static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, struct mlxsw_sp_fib *fib) { - struct mlxsw_sp_prefix_usage req_prefix_usage; - struct mlxsw_sp_lpm_tree *lpm_tree; int err; err = mlxsw_sp_fib_node_insert(fib, fib_node); @@ -2222,33 +3541,15 @@ static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp, return err; fib_node->fib = fib; - mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &fib->prefix_usage); - mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len); - - if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) { - err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, - &req_prefix_usage); - if (err) - goto err_tree_check; - } else { - lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, - fib->proto); - if (IS_ERR(lpm_tree)) - return PTR_ERR(lpm_tree); - fib->lpm_tree = lpm_tree; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib); - if (err) - goto err_tree_bind; - } + err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node); + if (err) + goto err_fib_lpm_tree_link; mlxsw_sp_fib_node_prefix_inc(fib_node); return 0; -err_tree_bind: - fib->lpm_tree = NULL; - mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); -err_tree_check: +err_fib_lpm_tree_link: fib_node->fib = NULL; mlxsw_sp_fib_node_remove(fib, fib_node); return err; @@ -2257,46 +3558,34 @@ err_tree_check: static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree; struct mlxsw_sp_fib *fib = fib_node->fib; mlxsw_sp_fib_node_prefix_dec(fib_node); - - if (mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) { - mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); - fib->lpm_tree = NULL; - mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); - } else { - mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, &fib->prefix_usage); - } - + mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib); fib_node->fib = NULL; mlxsw_sp_fib_node_remove(fib, fib_node); } static struct mlxsw_sp_fib_node * -mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info) +mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr, + size_t addr_len, unsigned char prefix_len, + enum mlxsw_sp_l3proto proto) { struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib *fib; struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id); + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id); if (IS_ERR(vr)) return ERR_CAST(vr); - fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); + fib = mlxsw_sp_vr_fib(vr, proto); - fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len); + fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len); if (fib_node) return fib_node; - fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len); + fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len); if (!fib_node) { err = -ENOMEM; goto err_fib_node_create; @@ -2315,8 +3604,8 @@ err_fib_node_create: return ERR_PTR(err); } -static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_node *fib_node) +static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) { struct mlxsw_sp_vr *vr = fib_node->fib->vr; @@ -2327,95 +3616,100 @@ static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(vr); } -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib_entry_params *params) + const struct mlxsw_sp_fib4_entry *new4_entry) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; - list_for_each_entry(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id > params->tb_id) + list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { + if (fib4_entry->tb_id > new4_entry->tb_id) continue; - if (fib_entry->params.tb_id != params->tb_id) + if (fib4_entry->tb_id != new4_entry->tb_id) break; - if (fib_entry->params.tos > params->tos) + if (fib4_entry->tos > new4_entry->tos) continue; - if (fib_entry->params.prio >= params->prio || - fib_entry->params.tos < params->tos) - return fib_entry; + if (fib4_entry->prio >= new4_entry->prio || + fib4_entry->tos < new4_entry->tos) + return fib4_entry; } return NULL; } -static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry, - struct mlxsw_sp_fib_entry *new_entry) +static int +mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry, + struct mlxsw_sp_fib4_entry *new4_entry) { struct mlxsw_sp_fib_node *fib_node; - if (WARN_ON(!fib_entry)) + if (WARN_ON(!fib4_entry)) return -EINVAL; - fib_node = fib_entry->fib_node; - list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id != new_entry->params.tb_id || - fib_entry->params.tos != new_entry->params.tos || - fib_entry->params.prio != new_entry->params.prio) + fib_node = fib4_entry->common.fib_node; + list_for_each_entry_from(fib4_entry, &fib_node->entry_list, + common.list) { + if (fib4_entry->tb_id != new4_entry->tb_id || + fib4_entry->tos != new4_entry->tos || + fib4_entry->prio != new4_entry->prio) break; } - list_add_tail(&new_entry->list, &fib_entry->list); + list_add_tail(&new4_entry->common.list, &fib4_entry->common.list); return 0; } static int -mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *new_entry, +mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry, bool replace, bool append) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node; + struct mlxsw_sp_fib4_entry *fib4_entry; - fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params); + fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry); if (append) - return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry); - if (replace && WARN_ON(!fib_entry)) + return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry); + if (replace && WARN_ON(!fib4_entry)) return -EINVAL; /* Insert new entry before replaced one, so that we can later * remove the second. */ - if (fib_entry) { - list_add_tail(&new_entry->list, &fib_entry->list); + if (fib4_entry) { + list_add_tail(&new4_entry->common.list, + &fib4_entry->common.list); } else { - struct mlxsw_sp_fib_entry *last; + struct mlxsw_sp_fib4_entry *last; - list_for_each_entry(last, &fib_node->entry_list, list) { - if (new_entry->params.tb_id > last->params.tb_id) + list_for_each_entry(last, &fib_node->entry_list, common.list) { + if (new4_entry->tb_id > last->tb_id) break; - fib_entry = last; + fib4_entry = last; } - if (fib_entry) - list_add(&new_entry->list, &fib_entry->list); + if (fib4_entry) + list_add(&new4_entry->common.list, + &fib4_entry->common.list); else - list_add(&new_entry->list, &fib_node->entry_list); + list_add(&new4_entry->common.list, + &fib_node->entry_list); } return 0; } static void -mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry) { - list_del(&fib_entry->list); + list_del(&fib4_entry->common.list); } -static int -mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *fib_entry) +static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) return 0; @@ -2432,11 +3726,11 @@ mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); } -static void -mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) return; @@ -2454,54 +3748,53 @@ mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_fib4_entry *fib4_entry, bool replace, bool append) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; int err; - err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace, - append); + err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append); if (err) return err; - err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry); + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common); if (err) - goto err_fib4_node_entry_add; + goto err_fib_node_entry_add; return 0; -err_fib4_node_entry_add: - mlxsw_sp_fib4_node_list_remove(fib_entry); +err_fib_node_entry_add: + mlxsw_sp_fib4_node_list_remove(fib4_entry); return err; } static void mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) + struct mlxsw_sp_fib4_entry *fib4_entry) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib4_node_list_remove(fib4_entry); - mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry); - mlxsw_sp_fib4_node_list_remove(fib_entry); + if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) + mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common); } static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_fib4_entry *fib4_entry, bool replace) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - struct mlxsw_sp_fib_entry *replaced; + struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; + struct mlxsw_sp_fib4_entry *replaced; if (!replace) return; /* We inserted the new entry before replaced one */ - replaced = list_next_entry(fib_entry, list); + replaced = list_next_entry(fib4_entry, common.list); mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced); mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static int @@ -2509,99 +3802,795 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, bool replace, bool append) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; int err; - if (mlxsw_sp->router.aborted) + if (mlxsw_sp->router->aborted) return 0; - fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id, + &fen_info->dst, sizeof(fen_info->dst), + fen_info->dst_len, + MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(fib_node)) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n"); return PTR_ERR(fib_node); } - fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); - if (IS_ERR(fib_entry)) { + fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); + if (IS_ERR(fib4_entry)) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n"); - err = PTR_ERR(fib_entry); + err = PTR_ERR(fib4_entry); goto err_fib4_entry_create; } - err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace, + err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace, append); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); goto err_fib4_node_entry_link; } - mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace); + mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace); return 0; err_fib4_node_entry_link: - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); err_fib4_entry_create: - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); return err; } static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, struct fib_entry_notifier_info *fen_info) { + struct mlxsw_sp_fib4_entry *fib4_entry; + struct mlxsw_sp_fib_node *fib_node; + + if (mlxsw_sp->router->aborted) + return; + + fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); + if (WARN_ON(!fib4_entry)) + return; + fib_node = fib4_entry->common.fib_node; + + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + +static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt) +{ + /* Packets with link-local destination IP arriving to the router + * are trapped to the CPU, so no need to program specific routes + * for them. + */ + if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL) + return true; + + /* Multicast routes aren't supported, so ignore them. Neighbour + * Discovery packets are specifically trapped. + */ + if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) + return true; + + /* Cloned routes are irrelevant in the forwarding path. */ + if (rt->rt6i_flags & RTF_CACHE) + return true; + + return false; +} + +static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL); + if (!mlxsw_sp_rt6) + return ERR_PTR(-ENOMEM); + + /* In case of route replace, replaced route is deleted with + * no notification. Take reference to prevent accessing freed + * memory. + */ + mlxsw_sp_rt6->rt = rt; + rt6_hold(rt); + + return mlxsw_sp_rt6; +} + +#if IS_ENABLED(CONFIG_IPV6) +static void mlxsw_sp_rt6_release(struct rt6_info *rt) +{ + rt6_release(rt); +} +#else +static void mlxsw_sp_rt6_release(struct rt6_info *rt) +{ +} +#endif + +static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) +{ + mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt); + kfree(mlxsw_sp_rt6); +} + +static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt) +{ + /* RTF_CACHE routes are ignored */ + return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY; +} + +static struct rt6_info * +mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) +{ + return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, + list)->rt; +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, + const struct rt6_info *nrt, bool replace) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + + if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace) + return NULL; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same + * virtual router. + */ + if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + continue; + if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + break; + if (rt->rt6i_metric < nrt->rt6i_metric) + continue; + if (rt->rt6i_metric == nrt->rt6i_metric && + mlxsw_sp_fib6_rt_can_mp(rt)) + return fib6_entry; + if (rt->rt6i_metric > nrt->rt6i_metric) + break; + } + + return NULL; +} + +static struct mlxsw_sp_rt6 * +mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, + const struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + if (mlxsw_sp_rt6->rt == rt) + return mlxsw_sp_rt6; + } + + return NULL; +} + +static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct rt6_info *rt, + enum mlxsw_sp_ipip_type *ret) +{ + return rt->dst.dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret); +} + +static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + const struct rt6_info *rt) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct net_device *dev = rt->dst.dev; + enum mlxsw_sp_ipip_type ipipt; + struct mlxsw_sp_rif *rif; + int err; + + if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, &ipipt) && + router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV6)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + if (err) + return err; + mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); + return 0; + } + + nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + mlxsw_sp_nexthop_rif_init(nh, rif); + + err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + if (err) + goto err_nexthop_neigh_init; + + return 0; + +err_nexthop_neigh_init: + mlxsw_sp_nexthop_rif_fini(nh); + return err; +} + +static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); +} + +static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + const struct rt6_info *rt) +{ + struct net_device *dev = rt->dst.dev; + + nh->nh_grp = nh_grp; + memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + + if (!dev) + return 0; + nh->ifindex = dev->ifindex; + + return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt); +} + +static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); +} + +static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, + const struct rt6_info *rt) +{ + return rt->rt6i_flags & RTF_GATEWAY || + mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + struct mlxsw_sp_nexthop *nh; + size_t alloc_size; + int i = 0; + int err; + + alloc_size = sizeof(*nh_grp) + + fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop); + nh_grp = kzalloc(alloc_size, GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->fib_list); +#if IS_ENABLED(CONFIG_IPV6) + nh_grp->neigh_tbl = &nd_tbl; +#endif + mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); + nh_grp->count = fib6_entry->nrt6; + for (i = 0; i < nh_grp->count; i++) { + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + nh = &nh_grp->nexthops[i]; + err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt); + if (err) + goto err_nexthop6_init; + mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list); + } + + err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); + if (err) + goto err_nexthop_group_insert; + + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + return nh_grp; + +err_nexthop_group_insert: +err_nexthop6_init: + for (i--; i >= 0; i--) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + int i = nh_grp->count; + + mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); + for (i--; i >= 0; i--) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON(nh_grp->adj_index_valid); + kfree(nh_grp); +} + +static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry); + if (!nh_grp) { + nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + } + + list_add_tail(&fib6_entry->common.nexthop_group_node, + &nh_grp->fib_list); + fib6_entry->common.nh_group = nh_grp; + + return 0; +} + +static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + + list_del(&fib_entry->nexthop_group_node); + if (!list_empty(&nh_grp->fib_list)) + return; + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp); +} + +static int +mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group; + int err; + + fib6_entry->common.nh_group = NULL; + list_del(&fib6_entry->common.nexthop_group_node); + + err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_get; + + /* In case this entry is offloaded, then the adjacency index + * currently associated with it in the device's table is that + * of the old group. Start using the new one instead. + */ + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); + if (err) + goto err_fib_node_entry_add; + + if (list_empty(&old_nh_grp->fib_list)) + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp); + + return 0; + +err_fib_node_entry_add: + mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); +err_nexthop6_group_get: + list_add_tail(&fib6_entry->common.nexthop_group_node, + &old_nh_grp->fib_list); + fib6_entry->common.nh_group = old_nh_grp; + return err; +} + +static int +mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int err; + + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); + if (IS_ERR(mlxsw_sp_rt6)) + return PTR_ERR(mlxsw_sp_rt6); + + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; + + err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_update; + + return 0; + +err_nexthop6_group_update: + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + return err; +} + +static void +mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt); + if (WARN_ON(!mlxsw_sp_rt6)) + return; + + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); +} + +static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + const struct rt6_info *rt) +{ + /* Packets hitting RTF_REJECT routes need to be discarded by the + * stack. We can rely on their destination device not having a + * RIF (it's the loopback device) and can thus use action type + * local, which will cause them to be trapped with a lower + * priority than packets that need to be locally received. + */ + if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + else if (rt->rt6i_flags & RTF_REJECT) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt)) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; +} + +static void +mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp; + + list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list, + list) { + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node, + struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int err; + + fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL); + if (!fib6_entry) + return ERR_PTR(-ENOMEM); + fib_entry = &fib6_entry->common; + + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_create; + } + + mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt); + + INIT_LIST_HEAD(&fib6_entry->rt6_list); + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6 = 1; + err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_get; + + fib_entry->fib_node = fib_node; + + return fib6_entry; + +err_nexthop6_group_get: + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); +err_rt6_create: + kfree(fib6_entry); + return ERR_PTR(err); +} + +static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); + mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry); + WARN_ON(fib6_entry->nrt6); + kfree(fib6_entry); +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, + const struct rt6_info *nrt, bool replace) +{ + struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + continue; + if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + break; + if (replace && rt->rt6i_metric == nrt->rt6i_metric) { + if (mlxsw_sp_fib6_rt_can_mp(rt) == + mlxsw_sp_fib6_rt_can_mp(nrt)) + return fib6_entry; + if (mlxsw_sp_fib6_rt_can_mp(nrt)) + fallback = fallback ?: fib6_entry; + } + if (rt->rt6i_metric > nrt->rt6i_metric) + return fallback ?: fib6_entry; + } + + return fallback; +} + +static int +mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, + bool replace) +{ + struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; + struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); + struct mlxsw_sp_fib6_entry *fib6_entry; + + fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); + + if (replace && WARN_ON(!fib6_entry)) + return -EINVAL; + + if (fib6_entry) { + list_add_tail(&new6_entry->common.list, + &fib6_entry->common.list); + } else { + struct mlxsw_sp_fib6_entry *last; + + list_for_each_entry(last, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last); + + if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id) + break; + fib6_entry = last; + } + + if (fib6_entry) + list_add(&new6_entry->common.list, + &fib6_entry->common.list); + else + list_add(&new6_entry->common.list, + &fib_node->entry_list); + } + + return 0; +} + +static void +mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) +{ + list_del(&fib6_entry->common.list); +} + +static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + bool replace) +{ + int err; + + err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace); + if (err) + return err; + + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); + if (err) + goto err_fib_node_entry_add; + + return 0; + +err_fib_node_entry_add: + mlxsw_sp_fib6_node_list_remove(fib6_entry); + return err; +} + +static void +mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common); + mlxsw_sp_fib6_node_list_remove(fib6_entry); +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, + const struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id); + if (!vr) + return NULL; + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6); + + fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr, + sizeof(rt->rt6i_dst.addr), + rt->rt6i_dst.plen); + if (!fib_node) + return NULL; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id && + rt->rt6i_metric == iter_rt->rt6i_metric && + mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) + return fib6_entry; + } + + return NULL; +} + +static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + bool replace) +{ + struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; + struct mlxsw_sp_fib6_entry *replaced; + + if (!replace) + return; + + replaced = list_next_entry(fib6_entry, common.list); + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + +static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, + struct rt6_info *rt, bool replace) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + int err; + + if (mlxsw_sp->router->aborted) + return 0; + + if (rt->rt6i_src.plen) + return -EINVAL; + + if (mlxsw_sp_fib6_rt_should_ignore(rt)) + return 0; + + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id, + &rt->rt6i_dst.addr, + sizeof(rt->rt6i_dst.addr), + rt->rt6i_dst.plen, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(fib_node)) + return PTR_ERR(fib_node); + + /* Before creating a new entry, try to append route to an existing + * multipath entry. + */ + fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace); + if (fib6_entry) { + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt); + if (err) + goto err_fib6_entry_nexthop_add; + return 0; + } + + fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt); + if (IS_ERR(fib6_entry)) { + err = PTR_ERR(fib6_entry); + goto err_fib6_entry_create; + } + + err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace); + if (err) + goto err_fib6_node_entry_link; + + mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace); + + return 0; + +err_fib6_node_entry_link: + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); +err_fib6_entry_create: +err_fib6_entry_nexthop_add: + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; +} + +static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, + struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; - if (mlxsw_sp->router.aborted) + if (mlxsw_sp->router->aborted) + return; + + if (mlxsw_sp_fib6_rt_should_ignore(rt)) return; - fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); - if (WARN_ON(!fib_entry)) + fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt); + if (WARN_ON(!fib6_entry)) return; - fib_node = fib_entry->fib_node; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + /* If route is part of a multipath entry, but not the last one + * removed, then only reduce its nexthop group. + */ + if (!list_is_singular(&fib6_entry->rt6_list)) { + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt); + return; + } + + fib_node = fib6_entry->common.fib_node; + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } -static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_ralxx_protocol proto, + u8 tree_id) { char ralta_pl[MLXSW_REG_RALTA_LEN]; char ralst_pl[MLXSW_REG_RALST_LEN]; int i, err; - mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); if (err) return err; - mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); if (err) return err; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { - struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i]; + struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; char raltb_pl[MLXSW_REG_RALTB_LEN]; char ralue_pl[MLXSW_REG_RALUE_LEN]; - if (!mlxsw_sp_vr_is_used(vr)) - continue; - - mlxsw_reg_raltb_pack(raltb_pl, vr->id, - MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); if (err) return err; - mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, - MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0, - 0); + mlxsw_reg_ralue_pack(ralue_pl, proto, + MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -2612,17 +4601,33 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; + int err; + + err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, + MLXSW_SP_LPM_TREE_MIN); + if (err) + return err; + + proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; + return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, + MLXSW_SP_LPM_TREE_MIN + 1); +} + static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_fib_entry *fib_entry, *tmp; + struct mlxsw_sp_fib4_entry *fib4_entry, *tmp; - list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) { - bool do_break = &tmp->list == &fib_node->entry_list; + list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list, + common.list) { + bool do_break = &tmp->common.list == &fib_node->entry_list; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); /* Break when entry list is empty and node was freed. * Otherwise, we'll access freed memory in the next * iteration. @@ -2632,6 +4637,23 @@ static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, } } +static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_fib6_entry *fib6_entry, *tmp; + + list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list, + common.list) { + bool do_break = &tmp->common.list == &fib_node->entry_list; + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + if (do_break) + break; + } +} + static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { @@ -2640,7 +4662,7 @@ static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node); break; } } @@ -2666,23 +4688,30 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) int i; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { - struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i]; + struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; if (!mlxsw_sp_vr_is_used(vr)) continue; mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); + + /* If virtual router was only used for IPv4, then it's no + * longer used. + */ + if (!mlxsw_sp_vr_is_used(vr)) + continue; + mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); } } -static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) { int err; - if (mlxsw_sp->router.aborted) + if (mlxsw_sp->router->aborted) return; dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n"); mlxsw_sp_router_fib_flush(mlxsw_sp); - mlxsw_sp->router.aborted = true; + mlxsw_sp->router->aborted = true; err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); if (err) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); @@ -2691,6 +4720,7 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) struct mlxsw_sp_fib_event_work { struct work_struct work; union { + struct fib6_entry_notifier_info fen6_info; struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; @@ -2699,7 +4729,7 @@ struct mlxsw_sp_fib_event_work { unsigned long event; }; -static void mlxsw_sp_router_fib_event_work(struct work_struct *work) +static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) { struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); @@ -2719,7 +4749,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, replace, append); if (err) - mlxsw_sp_router_fib4_abort(mlxsw_sp); + mlxsw_sp_router_fib_abort(mlxsw_sp); fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_ENTRY_DEL: @@ -2730,13 +4760,13 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) case FIB_EVENT_RULE_DEL: rule = fib_work->fr_info.rule; if (!fib4_rule_default(rule) && !rule->l3mdev) - mlxsw_sp_router_fib4_abort(mlxsw_sp); + mlxsw_sp_router_fib_abort(mlxsw_sp); fib_rule_put(rule); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event, - fib_work->fnh_info.fib_nh); + mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, + fib_work->fnh_info.fib_nh); fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); break; } @@ -2744,31 +4774,51 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) kfree(fib_work); } -/* Called with rcu_read_lock() */ -static int mlxsw_sp_router_fib_event(struct notifier_block *nb, - unsigned long event, void *ptr) +static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) { - struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); - struct mlxsw_sp_fib_event_work *fib_work; - struct fib_notifier_info *info = ptr; - - if (!net_eq(info->net, &init_net)) - return NOTIFY_DONE; - - fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); - if (WARN_ON(!fib_work)) - return NOTIFY_BAD; + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + struct fib_rule *rule; + bool replace; + int err; - INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work); - fib_work->mlxsw_sp = mlxsw_sp; - fib_work->event = event; + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: + replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; + err = mlxsw_sp_router_fib6_add(mlxsw_sp, + fib_work->fen6_info.rt, replace); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt); + mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + rule = fib_work->fr_info.rule; + if (!fib6_rule_default(rule) && !rule->l3mdev) + mlxsw_sp_router_fib_abort(mlxsw_sp); + fib_rule_put(rule); + break; + } + rtnl_unlock(); + kfree(fib_work); +} - switch (event) { +static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info)); + memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info)); /* Take referece on fib_info to prevent it from being * freed while work is queued. Release it afterwards. */ @@ -2776,15 +4826,65 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, break; case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: - memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info)); + memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); fib_rule_get(fib_work->fr_info.rule); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: - memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info)); + memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info)); fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); break; } +} + +static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info)); + rt6_hold(fib_work->fen6_info.rt); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; + } +} + +/* Called with rcu_read_lock() */ +static int mlxsw_sp_router_fib_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlxsw_sp_fib_event_work *fib_work; + struct fib_notifier_info *info = ptr; + struct mlxsw_sp_router *router; + + if (!net_eq(info->net, &init_net) || + (info->family != AF_INET && info->family != AF_INET6)) + return NOTIFY_DONE; + + fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); + if (WARN_ON(!fib_work)) + return NOTIFY_BAD; + + router = container_of(nb, struct mlxsw_sp_router, fib_nb); + fib_work->mlxsw_sp = router->mlxsw_sp; + fib_work->event = event; + + switch (info->family) { + case AF_INET: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work); + mlxsw_sp_router_fib4_event(fib_work, info); + break; + case AF_INET6: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); + mlxsw_sp_router_fib6_event(fib_work, info); + break; + } mlxsw_core_schedule_work(&fib_work->work); @@ -2798,8 +4898,9 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, int i; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) - if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) - return mlxsw_sp->rifs[i]; + if (mlxsw_sp->router->rifs[i] && + mlxsw_sp->router->rifs[i]->dev == dev) + return mlxsw_sp->router->rifs[i]; return NULL; } @@ -2826,17 +4927,28 @@ static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); } -static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, - const struct in_device *in_dev, - unsigned long event) +static bool +mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, + unsigned long event) { + struct inet6_dev *inet6_dev; + bool addr_list_empty = true; + struct in_device *idev; + switch (event) { case NETDEV_UP: - if (!rif) - return true; - return false; + return rif == NULL; case NETDEV_DOWN: - if (rif && !in_dev->ifa_list && + idev = __in_dev_get_rtnl(dev); + if (idev && idev->ifa_list) + addr_list_empty = false; + + inet6_dev = __in6_dev_get(dev); + if (addr_list_empty && inet6_dev && + !list_empty(&inet6_dev->addr_list)) + addr_list_empty = false; + + if (rif && addr_list_empty && !netif_is_l3_slave(rif->dev)) return true; /* It is possible we already removed the RIF ourselves @@ -2849,77 +4961,49 @@ static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, return false; } -#define MLXSW_SP_INVALID_INDEX_RIF 0xffff -static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) +static enum mlxsw_sp_rif_type +mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) { - int i; + enum mlxsw_sp_fid_type type; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) - if (!mlxsw_sp->rifs[i]) - return i; + if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL)) + return MLXSW_SP_RIF_TYPE_IPIP_LB; - return MLXSW_SP_INVALID_INDEX_RIF; -} - -static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, - bool *p_lagged, u16 *p_system_port) -{ - u8 local_port = mlxsw_sp_vport->local_port; + /* Otherwise RIF type is derived from the type of the underlying FID. */ + if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev))) + type = MLXSW_SP_FID_TYPE_8021Q; + else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev)) + type = MLXSW_SP_FID_TYPE_8021Q; + else if (netif_is_bridge_master(dev)) + type = MLXSW_SP_FID_TYPE_8021D; + else + type = MLXSW_SP_FID_TYPE_RFID; - *p_lagged = mlxsw_sp_vport->lagged; - *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port; + return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type); } -static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport, - u16 vr_id, struct net_device *l3_dev, - u16 rif_index, bool create) +static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; - bool lagged = mlxsw_sp_vport->lagged; - char ritr_pl[MLXSW_REG_RITR_LEN]; - u16 system_port; - - mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif_index, - vr_id, l3_dev->mtu, l3_dev->dev_addr); - - mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port); - mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port, - mlxsw_sp_vport_vid_get(mlxsw_sp_vport)); - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); -} + int i; -static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + if (!mlxsw_sp->router->rifs[i]) { + *p_rif_index = i; + return 0; + } + } -static u16 mlxsw_sp_rif_sp_to_fid(u16 rif_index) -{ - return MLXSW_SP_RFID_BASE + rif_index; + return -ENOBUFS; } -static struct mlxsw_sp_fid * -mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev) -{ - struct mlxsw_sp_fid *f; - - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) - return NULL; - - f->leave = mlxsw_sp_vport_rif_sp_leave; - f->ref_count = 0; - f->dev = l3_dev; - f->fid = fid; - - return f; -} - -static struct mlxsw_sp_rif * -mlxsw_sp_rif_alloc(u16 rif_index, u16 vr_id, struct net_device *l3_dev, - struct mlxsw_sp_fid *f) +static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, + u16 vr_id, + struct net_device *l3_dev) { struct mlxsw_sp_rif *rif; - rif = kzalloc(sizeof(*rif), GFP_KERNEL); + rif = kzalloc(rif_size, GFP_KERNEL); if (!rif) return NULL; @@ -2930,168 +5014,225 @@ mlxsw_sp_rif_alloc(u16 rif_index, u16 vr_id, struct net_device *l3_dev, rif->vr_id = vr_id; rif->dev = l3_dev; rif->rif_index = rif_index; - rif->f = f; return rif; } +struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, + u16 rif_index) +{ + return mlxsw_sp->router->rifs[rif_index]; +} + u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif) { return rif->rif_index; } +u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + return lb_rif->common.rif_index; +} + +u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + return lb_rif->ul_vr_id; +} + int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) { return rif->dev->ifindex; } static struct mlxsw_sp_rif * -mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *l3_dev) +mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_rif_params *params) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; - u32 tb_id = l3mdev_fib_table(l3_dev); - struct mlxsw_sp_vr *vr; - struct mlxsw_sp_fid *f; + u32 tb_id = l3mdev_fib_table(params->dev); + const struct mlxsw_sp_rif_ops *ops; + struct mlxsw_sp_fid *fid = NULL; + enum mlxsw_sp_rif_type type; struct mlxsw_sp_rif *rif; - u16 fid, rif_index; + struct mlxsw_sp_vr *vr; + u16 rif_index; int err; - rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif_index == MLXSW_SP_INVALID_INDEX_RIF) - return ERR_PTR(-ERANGE); + type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev); + ops = mlxsw_sp->router->rif_ops_arr[type]; vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); if (IS_ERR(vr)) return ERR_CAST(vr); + vr->rif_count++; - err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, - rif_index, true); - if (err) - goto err_vport_rif_sp_op; - - fid = mlxsw_sp_rif_sp_to_fid(rif_index); - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true); + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); if (err) - goto err_rif_fdb_op; + goto err_rif_index_alloc; - f = mlxsw_sp_rfid_alloc(fid, l3_dev); - if (!f) { - err = -ENOMEM; - goto err_rfid_alloc; - } - - rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f); + rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev); if (!rif) { err = -ENOMEM; goto err_rif_alloc; } - - if (devlink_dpipe_table_counter_enabled(priv_to_devlink(mlxsw_sp->core), - MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) { - err = mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, - MLXSW_SP_RIF_COUNTER_EGRESS); - if (err) - netdev_dbg(mlxsw_sp_vport->dev, - "Counter alloc Failed err=%d\n", err); + rif->mlxsw_sp = mlxsw_sp; + rif->ops = ops; + + if (ops->fid_get) { + fid = ops->fid_get(rif); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + goto err_fid_get; + } + rif->fid = fid; } - f->rif = rif; - mlxsw_sp->rifs[rif_index] = rif; - vr->rif_count++; + if (ops->setup) + ops->setup(rif, params); + + err = ops->configure(rif); + if (err) + goto err_configure; + + mlxsw_sp_rif_counters_alloc(rif); + mlxsw_sp->router->rifs[rif_index] = rif; return rif; +err_configure: + if (fid) + mlxsw_sp_fid_put(fid); +err_fid_get: + kfree(rif); err_rif_alloc: - kfree(f); -err_rfid_alloc: - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); -err_rif_fdb_op: - mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index, - false); -err_vport_rif_sp_op: +err_rif_index_alloc: + vr->rif_count--; mlxsw_sp_vr_put(vr); return ERR_PTR(err); } -static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport, - struct mlxsw_sp_rif *rif) +void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; - struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id]; - struct net_device *l3_dev = rif->dev; - struct mlxsw_sp_fid *f = rif->f; - u16 rif_index = rif->rif_index; - u16 fid = f->fid; + const struct mlxsw_sp_rif_ops *ops = rif->ops; + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_fid *fid = rif->fid; + struct mlxsw_sp_vr *vr; mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); - - mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); - mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_INGRESS); - - vr->rif_count--; - mlxsw_sp->rifs[rif_index] = NULL; - f->rif = NULL; - + vr = &mlxsw_sp->router->vrs[rif->vr_id]; + + mlxsw_sp->router->rifs[rif->rif_index] = NULL; + mlxsw_sp_rif_counters_free(rif); + ops->deconfigure(rif); + if (fid) + /* Loopback RIFs are not associated with a FID. */ + mlxsw_sp_fid_put(fid); kfree(rif); + vr->rif_count--; + mlxsw_sp_vr_put(vr); +} - kfree(f); - - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); +static void +mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params, + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; - mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index, - false); - mlxsw_sp_vr_put(vr); + params->vid = mlxsw_sp_port_vlan->vid; + params->lag = mlxsw_sp_port->lagged; + if (params->lag) + params->lag_id = mlxsw_sp_port->lag_id; + else + params->system_port = mlxsw_sp_port->local_port; } -static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *l3_dev) +static int +mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct net_device *l3_dev) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 vid = mlxsw_sp_port_vlan->vid; struct mlxsw_sp_rif *rif; + struct mlxsw_sp_fid *fid; + int err; rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); if (!rif) { - rif = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev); + struct mlxsw_sp_rif_params params = { + .dev = l3_dev, + }; + + mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan); + rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms); if (IS_ERR(rif)) return PTR_ERR(rif); } - mlxsw_sp_vport_fid_set(mlxsw_sp_vport, rif->f); - rif->f->ref_count++; + /* FID was already created, just take a reference */ + fid = rif->ops->fid_get(rif); + err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid); + if (err) + goto err_fid_port_vid_map; + + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); + if (err) + goto err_port_vid_learning_set; - netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", rif->f->fid); + err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, + BR_STATE_FORWARDING); + if (err) + goto err_port_vid_stp_set; + + mlxsw_sp_port_vlan->fid = fid; return 0; + +err_port_vid_stp_set: + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); +err_port_vid_learning_set: + mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid); +err_fid_port_vid_map: + mlxsw_sp_fid_put(fid); + return err; } -static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +void +mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { - struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + u16 vid = mlxsw_sp_port_vlan->vid; - netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid); + if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID)) + return; - mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); - if (--f->ref_count == 0) - mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->rif); + mlxsw_sp_port_vlan->fid = NULL; + mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING); + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); + mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid); + /* If router port holds the last reference on the rFID, then the + * associated Sub-port RIF will be destroyed. + */ + mlxsw_sp_fid_put(fid); } -static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev, - struct net_device *port_dev, - unsigned long event, u16 vid) +static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, + struct net_device *port_dev, + unsigned long event, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev); - struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (WARN_ON(!mlxsw_sp_vport)) + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) return -EINVAL; switch (event) { case NETDEV_UP: - return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev); + return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, + l3_dev); case NETDEV_DOWN: - mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport); + mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); break; } @@ -3106,7 +5247,7 @@ static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, netif_is_ovs_port(port_dev)) return 0; - return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1); + return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1); } static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, @@ -3119,8 +5260,9 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, netdev_for_each_lower_dev(lag_dev, port_dev, iter) { if (mlxsw_sp_port_dev_check(port_dev)) { - err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev, - event, vid); + err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev, + port_dev, + event, vid); if (err) return err; } @@ -3138,189 +5280,24 @@ static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1); } -static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev) -{ - u16 fid; - - if (is_vlan_dev(l3_dev)) - fid = vlan_dev_vlan_id(l3_dev); - else if (mlxsw_sp->master_bridge.dev == l3_dev) - fid = 1; - else - return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev); - - return mlxsw_sp_fid_find(mlxsw_sp, fid); -} - -static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) -{ - return mlxsw_core_max_ports(mlxsw_sp->core) + 1; -} - -static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid) -{ - return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID : - MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; -} - -static u16 mlxsw_sp_flood_table_index_get(u16 fid) -{ - return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid; -} - -static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid, - bool set) -{ - u8 router_port = mlxsw_sp_router_port(mlxsw_sp); - enum mlxsw_flood_table_type table_type; - char *sftr_pl; - u16 index; - int err; - - sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); - if (!sftr_pl) - return -ENOMEM; - - table_type = mlxsw_sp_flood_table_type_get(fid); - index = mlxsw_sp_flood_table_index_get(fid); - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type, - 1, router_port, set); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); - - kfree(sftr_pl); - return err; -} - -static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid) -{ - if (mlxsw_sp_fid_is_vfid(fid)) - return MLXSW_REG_RITR_FID_IF; - else - return MLXSW_REG_RITR_VLAN_IF; -} - -static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, u16 vr_id, - struct net_device *l3_dev, - u16 fid, u16 rif, - bool create) -{ - enum mlxsw_reg_ritr_if_type rif_type; - char ritr_pl[MLXSW_REG_RITR_LEN]; - - rif_type = mlxsw_sp_rif_type_get(fid); - mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, vr_id, l3_dev->mtu, - l3_dev->dev_addr); - mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid); - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); -} - -static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev, - struct mlxsw_sp_fid *f) -{ - u32 tb_id = l3mdev_fib_table(l3_dev); - struct mlxsw_sp_rif *rif; - struct mlxsw_sp_vr *vr; - u16 rif_index; - int err; - - rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif_index == MLXSW_SP_INVALID_INDEX_RIF) - return -ERANGE; - - vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); - if (IS_ERR(vr)) - return PTR_ERR(vr); - - err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); - if (err) - goto err_port_flood_set; - - err = mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, - rif_index, true); - if (err) - goto err_rif_bridge_op; - - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true); - if (err) - goto err_rif_fdb_op; - - rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f); - if (!rif) { - err = -ENOMEM; - goto err_rif_alloc; - } - - f->rif = rif; - mlxsw_sp->rifs[rif_index] = rif; - vr->rif_count++; - - netdev_dbg(l3_dev, "RIF=%d created\n", rif_index); - - return 0; - -err_rif_alloc: - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); -err_rif_fdb_op: - mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index, - false); -err_rif_bridge_op: - mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); -err_port_flood_set: - mlxsw_sp_vr_put(vr); - return err; -} - -void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif) -{ - struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id]; - struct net_device *l3_dev = rif->dev; - struct mlxsw_sp_fid *f = rif->f; - u16 rif_index = rif->rif_index; - - mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); - - vr->rif_count--; - mlxsw_sp->rifs[rif_index] = NULL; - f->rif = NULL; - - kfree(rif); - - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); - - mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index, - false); - - mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); - - mlxsw_sp_vr_put(vr); - - netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif_index); -} - static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, - struct net_device *br_dev, unsigned long event) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); - struct mlxsw_sp_fid *f; - - /* FID can either be an actual FID if the L3 device is the - * VLAN-aware bridge or a VLAN device on top. Otherwise, the - * L3 device is a VLAN-unaware bridge and we get a vFID. - */ - f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev); - if (WARN_ON(!f)) - return -EINVAL; + struct mlxsw_sp_rif_params params = { + .dev = l3_dev, + }; + struct mlxsw_sp_rif *rif; switch (event) { case NETDEV_UP: - return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f); + rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms); + if (IS_ERR(rif)) + return PTR_ERR(rif); + break; case NETDEV_DOWN: - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + mlxsw_sp_rif_destroy(rif); break; } @@ -3331,22 +5308,19 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, unsigned long event) { struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); u16 vid = vlan_dev_vlan_id(vlan_dev); if (netif_is_bridge_port(vlan_dev)) return 0; if (mlxsw_sp_port_dev_check(real_dev)) - return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event, - vid); + return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev, + event, vid); else if (netif_is_lag_master(real_dev)) return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, vid); - else if (netif_is_bridge_master(real_dev) && - mlxsw_sp->master_bridge.dev == real_dev) - return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev, - event); + else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev)) + return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event); return 0; } @@ -3359,7 +5333,7 @@ static int __mlxsw_sp_inetaddr_event(struct net_device *dev, else if (netif_is_lag_master(dev)) return mlxsw_sp_inetaddr_lag_event(dev, event); else if (netif_is_bridge_master(dev)) - return mlxsw_sp_inetaddr_bridge_event(dev, dev, event); + return mlxsw_sp_inetaddr_bridge_event(dev, event); else if (is_vlan_dev(dev)) return mlxsw_sp_inetaddr_vlan_event(dev, event); else @@ -3380,7 +5354,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused, goto out; rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event)) + if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; err = __mlxsw_sp_inetaddr_event(dev, event); @@ -3388,6 +5362,61 @@ out: return notifier_from_errno(err); } +struct mlxsw_sp_inet6addr_event_work { + struct work_struct work; + struct net_device *dev; + unsigned long event; +}; + +static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) +{ + struct mlxsw_sp_inet6addr_event_work *inet6addr_work = + container_of(work, struct mlxsw_sp_inet6addr_event_work, work); + struct net_device *dev = inet6addr_work->dev; + unsigned long event = inet6addr_work->event; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + + rtnl_lock(); + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + __mlxsw_sp_inetaddr_event(dev, event); +out: + rtnl_unlock(); + dev_put(dev); + kfree(inet6addr_work); +} + +/* Called with rcu_read_lock() */ +int mlxsw_sp_inet6addr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr; + struct mlxsw_sp_inet6addr_event_work *inet6addr_work; + struct net_device *dev = if6->idev->dev; + + if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) + return NOTIFY_DONE; + + inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC); + if (!inet6addr_work) + return NOTIFY_BAD; + + INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work); + inet6addr_work->dev = dev; + inet6addr_work->event = event; + dev_hold(dev); + mlxsw_core_schedule_work(&inet6addr_work->work); + + return NOTIFY_DONE; +} + static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, const char *mac, int mtu) { @@ -3409,6 +5438,7 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) { struct mlxsw_sp *mlxsw_sp; struct mlxsw_sp_rif *rif; + u16 fid_index; int err; mlxsw_sp = mlxsw_sp_lower_get(dev); @@ -3418,8 +5448,9 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!rif) return 0; + fid_index = mlxsw_sp_fid_index(rif->fid); - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, false); + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false); if (err) return err; @@ -3428,7 +5459,7 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) if (err) goto err_rif_edit; - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, rif->f->fid, true); + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true); if (err) goto err_rif_fdb_op; @@ -3442,7 +5473,7 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) err_rif_fdb_op: mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu); err_rif_edit: - mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, true); + mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true); return err; } @@ -3495,16 +5526,408 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, return err; } +static struct mlxsw_sp_rif_subport * +mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif) +{ + return container_of(rif, struct mlxsw_sp_rif_subport, common); +} + +static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params) +{ + struct mlxsw_sp_rif_subport *rif_subport; + + rif_subport = mlxsw_sp_rif_subport_rif(rif); + rif_subport->vid = params->vid; + rif_subport->lag = params->lag; + if (params->lag) + rif_subport->lag_id = params->lag_id; + else + rif_subport->system_port = params->system_port; +} + +static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif_subport *rif_subport; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + rif_subport = mlxsw_sp_rif_subport_rif(rif); + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF, + rif->rif_index, rif->vr_id, rif->dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); + mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag, + rif_subport->lag ? rif_subport->lag_id : + rif_subport->system_port, + rif_subport->vid); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif) +{ + int err; + + err = mlxsw_sp_rif_subport_op(rif, true); + if (err) + return err; + + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + mlxsw_sp_fid_rif_set(rif->fid, rif); + return 0; + +err_rif_fdb_op: + mlxsw_sp_rif_subport_op(rif, false); + return err; +} + +static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_fid *fid = rif->fid; + + mlxsw_sp_fid_rif_set(fid, NULL); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(fid), false); + mlxsw_sp_rif_subport_op(rif, false); +} + +static struct mlxsw_sp_fid * +mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif) +{ + return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = { + .type = MLXSW_SP_RIF_TYPE_SUBPORT, + .rif_size = sizeof(struct mlxsw_sp_rif_subport), + .setup = mlxsw_sp_rif_subport_setup, + .configure = mlxsw_sp_rif_subport_configure, + .deconfigure = mlxsw_sp_rif_subport_deconfigure, + .fid_get = mlxsw_sp_rif_subport_fid_get, +}; + +static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif, + enum mlxsw_reg_ritr_if_type type, + u16 vid_fid, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id, + rif->dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); + mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_core_max_ports(mlxsw_sp->core) + 1; +} + +static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + int err; + + err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true); + if (err) + return err; + + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_bc_flood_set; + + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + mlxsw_sp_fid_rif_set(rif->fid, rif); + return 0; + +err_rif_fdb_op: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: + mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); + return err; +} + +static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) +{ + u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_fid *fid = rif->fid; + + mlxsw_sp_fid_rif_set(fid, NULL); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(fid), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); +} + +static struct mlxsw_sp_fid * +mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif) +{ + u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1; + + return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = { + .type = MLXSW_SP_RIF_TYPE_VLAN, + .rif_size = sizeof(struct mlxsw_sp_rif), + .configure = mlxsw_sp_rif_vlan_configure, + .deconfigure = mlxsw_sp_rif_vlan_deconfigure, + .fid_get = mlxsw_sp_rif_vlan_fid_get, +}; + +static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + u16 fid_index = mlxsw_sp_fid_index(rif->fid); + int err; + + err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, + true); + if (err) + return err; + + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_bc_flood_set; + + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + mlxsw_sp_fid_rif_set(rif->fid, rif); + return 0; + +err_rif_fdb_op: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: + mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); + return err; +} + +static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) +{ + u16 fid_index = mlxsw_sp_fid_index(rif->fid); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_fid *fid = rif->fid; + + mlxsw_sp_fid_rif_set(fid, NULL); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(fid), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); +} + +static struct mlxsw_sp_fid * +mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif) +{ + return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { + .type = MLXSW_SP_RIF_TYPE_FID, + .rif_size = sizeof(struct mlxsw_sp_rif), + .configure = mlxsw_sp_rif_fid_configure, + .deconfigure = mlxsw_sp_rif_fid_deconfigure, + .fid_get = mlxsw_sp_rif_fid_fid_get, +}; + +static struct mlxsw_sp_rif_ipip_lb * +mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif) +{ + return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common); +} + +static void +mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params) +{ + struct mlxsw_sp_rif_params_ipip_lb *params_lb; + struct mlxsw_sp_rif_ipip_lb *rif_lb; + + params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb, + common); + rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif); + rif_lb->lb_config = params_lb->lb_config; +} + +static int +mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, + struct mlxsw_sp_vr *ul_vr, bool enable) +{ + struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config; + struct mlxsw_sp_rif *rif = &lb_rif->common; + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + u32 saddr4; + + switch (lb_cf.ul_protocol) { + case MLXSW_SP_L3_PROTO_IPV4: + saddr4 = be32_to_cpu(lb_cf.saddr.addr4); + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, + rif->rif_index, rif->vr_id, rif->dev->mtu); + mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt, + MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, + ul_vr->id, saddr4, lb_cf.okey); + break; + + case MLXSW_SP_L3_PROTO_IPV6: + return -EAFNOSUPPORT; + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int +mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_vr *ul_vr; + int err; + + ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id); + if (IS_ERR(ul_vr)) + return PTR_ERR(ul_vr); + + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true); + if (err) + goto err_loopback_op; + + lb_rif->ul_vr_id = ul_vr->id; + ++ul_vr->rif_count; + return 0; + +err_loopback_op: + mlxsw_sp_vr_put(ul_vr); + return err; +} + +static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_vr *ul_vr; + + ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; + mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false); + + --ul_vr->rif_count; + mlxsw_sp_vr_put(ul_vr); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = { + .type = MLXSW_SP_RIF_TYPE_IPIP_LB, + .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), + .setup = mlxsw_sp_rif_ipip_lb_setup, + .configure = mlxsw_sp_rif_ipip_lb_configure, + .deconfigure = mlxsw_sp_rif_ipip_lb_deconfigure, +}; + +static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = { + [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, + [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_ops, + [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, + [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp_rif_ipip_lb_ops, +}; + +static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) +{ + u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + + mlxsw_sp->router->rifs = kcalloc(max_rifs, + sizeof(struct mlxsw_sp_rif *), + GFP_KERNEL); + if (!mlxsw_sp->router->rifs) + return -ENOMEM; + + mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr; + + return 0; +} + +static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + WARN_ON_ONCE(mlxsw_sp->router->rifs[i]); + + kfree(mlxsw_sp->router->rifs); +} + +static int +mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp) +{ + char tigcr_pl[MLXSW_REG_TIGCR_LEN]; + + mlxsw_reg_tigcr_pack(tigcr_pl, true, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl); +} + +static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr; + INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list); + return mlxsw_sp_ipip_config_tigcr(mlxsw_sp); +} + +static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp) +{ + WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list)); +} + static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) { - struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); + struct mlxsw_sp_router *router; /* Flush pending FIB notifications and then flush the device's * table before requesting another dump. The FIB notification * block is unregistered, so no need to take RTNL. */ mlxsw_core_flush_owq(); - mlxsw_sp_router_fib_flush(mlxsw_sp); + router = container_of(nb, struct mlxsw_sp_router, fib_nb); + mlxsw_sp_router_fib_flush(router->mlxsw_sp); } static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) @@ -3515,55 +5938,54 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) return -EIO; - max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); - mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *), - GFP_KERNEL); - if (!mlxsw_sp->rifs) - return -ENOMEM; - mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); if (err) - goto err_rgcr_fail; - + return err; return 0; - -err_rgcr_fail: - kfree(mlxsw_sp->rifs); - return err; } static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; - int i; - mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_rgcr_pack(rgcr_pl, false, false); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) - WARN_ON_ONCE(mlxsw_sp->rifs[i]); - - kfree(mlxsw_sp->rifs); } int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_router *router; int err; - INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); + router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL); + if (!router) + return -ENOMEM; + mlxsw_sp->router = router; + router->mlxsw_sp = mlxsw_sp; + + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list); err = __mlxsw_sp_router_init(mlxsw_sp); if (err) - return err; + goto err_router_init; + + err = mlxsw_sp_rifs_init(mlxsw_sp); + if (err) + goto err_rifs_init; + + err = mlxsw_sp_ipips_init(mlxsw_sp); + if (err) + goto err_ipips_init; - err = rhashtable_init(&mlxsw_sp->router.nexthop_ht, + err = rhashtable_init(&mlxsw_sp->router->nexthop_ht, &mlxsw_sp_nexthop_ht_params); if (err) goto err_nexthop_ht_init; - err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht, + err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht, &mlxsw_sp_nexthop_group_ht_params); if (err) goto err_nexthop_group_ht_init; @@ -3580,8 +6002,8 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_neigh_init; - mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event; - err = register_fib_notifier(&mlxsw_sp->fib_nb, + mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; + err = register_fib_notifier(&mlxsw_sp->router->fib_nb, mlxsw_sp_router_fib_dump_flush); if (err) goto err_register_fib_notifier; @@ -3595,21 +6017,30 @@ err_neigh_init: err_vrs_init: mlxsw_sp_lpm_fini(mlxsw_sp); err_lpm_init: - rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); + rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); err_nexthop_group_ht_init: - rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); + rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); err_nexthop_ht_init: + mlxsw_sp_ipips_fini(mlxsw_sp); +err_ipips_init: + mlxsw_sp_rifs_fini(mlxsw_sp); +err_rifs_init: __mlxsw_sp_router_fini(mlxsw_sp); +err_router_init: + kfree(mlxsw_sp->router); return err; } void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { - unregister_fib_notifier(&mlxsw_sp->fib_nb); + unregister_fib_notifier(&mlxsw_sp->router->fib_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); mlxsw_sp_lpm_fini(mlxsw_sp); - rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); - rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); + rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); + rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); + mlxsw_sp_ipips_fini(mlxsw_sp); + mlxsw_sp_rifs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); + kfree(mlxsw_sp->router); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index c3095fef6697..345fcc4f38e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -36,13 +36,38 @@ #define _MLXSW_ROUTER_H_ #include "spectrum.h" +#include "reg.h" + +enum mlxsw_sp_l3proto { + MLXSW_SP_L3_PROTO_IPV4, + MLXSW_SP_L3_PROTO_IPV6, +}; + +union mlxsw_sp_l3addr { + __be32 addr4; + struct in6_addr addr6; +}; + +struct mlxsw_sp_rif_ipip_lb; +struct mlxsw_sp_rif_ipip_lb_config { + enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; + u32 okey; + enum mlxsw_sp_l3proto ul_protocol; /* Underlay. */ + union mlxsw_sp_l3addr saddr; +}; enum mlxsw_sp_rif_counter_dir { MLXSW_SP_RIF_COUNTER_INGRESS, MLXSW_SP_RIF_COUNTER_EGRESS, }; +struct mlxsw_sp_neigh_entry; + +struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, + u16 rif_index); u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); +u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); +u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, @@ -54,5 +79,33 @@ void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir); +struct mlxsw_sp_neigh_entry * +mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, + struct mlxsw_sp_neigh_entry *neigh_entry); +int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry); +unsigned char * +mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry); +u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); + +#define mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) \ + for (neigh_entry = mlxsw_sp_rif_neigh_next(rif, NULL); neigh_entry; \ + neigh_entry = mlxsw_sp_rif_neigh_next(rif, neigh_entry)) +int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + u64 *p_counter); +void +mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool adding); +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry); +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev); +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev); +__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev); #endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index f4bb0c0b7c1d..d39ffbfcc436 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -52,340 +52,597 @@ #include "core.h" #include "reg.h" -static u16 mlxsw_sp_port_vid_to_fid_get(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid) -{ - struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_port); - u16 fid = vid; +struct mlxsw_sp_bridge_ops; + +struct mlxsw_sp_bridge { + struct mlxsw_sp *mlxsw_sp; + struct { + struct delayed_work dw; +#define MLXSW_SP_DEFAULT_LEARNING_INTERVAL 100 + unsigned int interval; /* ms */ + } fdb_notify; +#define MLXSW_SP_MIN_AGEING_TIME 10 +#define MLXSW_SP_MAX_AGEING_TIME 1000000 +#define MLXSW_SP_DEFAULT_AGEING_TIME 300 + u32 ageing_time; + bool vlan_enabled_exists; + struct list_head bridges_list; + struct list_head mids_list; + DECLARE_BITMAP(mids_bitmap, MLXSW_SP_MID_MAX); + const struct mlxsw_sp_bridge_ops *bridge_8021q_ops; + const struct mlxsw_sp_bridge_ops *bridge_8021d_ops; +}; + +struct mlxsw_sp_bridge_device { + struct net_device *dev; + struct list_head list; + struct list_head ports_list; + u8 vlan_enabled:1, + multicast_enabled:1; + const struct mlxsw_sp_bridge_ops *ops; +}; + +struct mlxsw_sp_bridge_port { + struct net_device *dev; + struct mlxsw_sp_bridge_device *bridge_device; + struct list_head list; + struct list_head vlans_list; + unsigned int ref_count; + u8 stp_state; + unsigned long flags; + bool mrouter; + bool lagged; + union { + u16 lag_id; + u16 system_port; + }; +}; - fid = f ? f->fid : fid; +struct mlxsw_sp_bridge_vlan { + struct list_head list; + struct list_head port_vlan_list; + u16 vid; +}; + +struct mlxsw_sp_bridge_ops { + int (*port_join)(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port); + void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port); + struct mlxsw_sp_fid * + (*fid_get)(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid); +}; + +static int +mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_port *bridge_port, + u16 fid_index); + +static struct mlxsw_sp_bridge_device * +mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge, + const struct net_device *br_dev) +{ + struct mlxsw_sp_bridge_device *bridge_device; - if (!fid) - fid = mlxsw_sp_port->pvid; + list_for_each_entry(bridge_device, &bridge->bridges_list, list) + if (bridge_device->dev == br_dev) + return bridge_device; - return fid; + return NULL; } -static struct mlxsw_sp_port * -mlxsw_sp_port_orig_get(struct net_device *dev, - struct mlxsw_sp_port *mlxsw_sp_port) +static struct mlxsw_sp_bridge_device * +mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, + struct net_device *br_dev) { - struct mlxsw_sp_port *mlxsw_sp_vport; - struct mlxsw_sp_fid *fid; - u16 vid; + struct device *dev = bridge->mlxsw_sp->bus_info->dev; + struct mlxsw_sp_bridge_device *bridge_device; + bool vlan_enabled = br_vlan_enabled(br_dev); - if (netif_is_bridge_master(dev)) { - fid = mlxsw_sp_vfid_find(mlxsw_sp_port->mlxsw_sp, - dev); - if (fid) { - mlxsw_sp_vport = - mlxsw_sp_port_vport_find_by_fid(mlxsw_sp_port, - fid->fid); - WARN_ON(!mlxsw_sp_vport); - return mlxsw_sp_vport; - } + if (vlan_enabled && bridge->vlan_enabled_exists) { + dev_err(dev, "Only one VLAN-aware bridge is supported\n"); + return ERR_PTR(-EINVAL); } - if (!is_vlan_dev(dev)) - return mlxsw_sp_port; + bridge_device = kzalloc(sizeof(*bridge_device), GFP_KERNEL); + if (!bridge_device) + return ERR_PTR(-ENOMEM); + + bridge_device->dev = br_dev; + bridge_device->vlan_enabled = vlan_enabled; + bridge_device->multicast_enabled = br_multicast_enabled(br_dev); + INIT_LIST_HEAD(&bridge_device->ports_list); + if (vlan_enabled) { + bridge->vlan_enabled_exists = true; + bridge_device->ops = bridge->bridge_8021q_ops; + } else { + bridge_device->ops = bridge->bridge_8021d_ops; + } + list_add(&bridge_device->list, &bridge->bridges_list); - vid = vlan_dev_vlan_id(dev); - mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - WARN_ON(!mlxsw_sp_vport); + return bridge_device; +} - return mlxsw_sp_vport; +static void +mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, + struct mlxsw_sp_bridge_device *bridge_device) +{ + list_del(&bridge_device->list); + if (bridge_device->vlan_enabled) + bridge->vlan_enabled_exists = false; + WARN_ON(!list_empty(&bridge_device->ports_list)); + kfree(bridge_device); } -static int mlxsw_sp_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) +static struct mlxsw_sp_bridge_device * +mlxsw_sp_bridge_device_get(struct mlxsw_sp_bridge *bridge, + struct net_device *br_dev) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; - mlxsw_sp_port = mlxsw_sp_port_orig_get(attr->orig_dev, mlxsw_sp_port); - if (!mlxsw_sp_port) - return -EINVAL; + bridge_device = mlxsw_sp_bridge_device_find(bridge, br_dev); + if (bridge_device) + return bridge_device; - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac); - memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac, - attr->u.ppid.id_len); - break; - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - attr->u.brport_flags = - (mlxsw_sp_port->learning ? BR_LEARNING : 0) | - (mlxsw_sp_port->learning_sync ? BR_LEARNING_SYNC : 0) | - (mlxsw_sp_port->uc_flood ? BR_FLOOD : 0); - break; - default: - return -EOPNOTSUPP; + return mlxsw_sp_bridge_device_create(bridge, br_dev); +} + +static void +mlxsw_sp_bridge_device_put(struct mlxsw_sp_bridge *bridge, + struct mlxsw_sp_bridge_device *bridge_device) +{ + if (list_empty(&bridge_device->ports_list)) + mlxsw_sp_bridge_device_destroy(bridge, bridge_device); +} + +static struct mlxsw_sp_bridge_port * +__mlxsw_sp_bridge_port_find(const struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *brport_dev) +{ + struct mlxsw_sp_bridge_port *bridge_port; + + list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { + if (bridge_port->dev == brport_dev) + return bridge_port; } - return 0; + return NULL; } -static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, - u8 state) +static struct mlxsw_sp_bridge_port * +mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge, + struct net_device *brport_dev) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - enum mlxsw_reg_spms_state spms_state; - char *spms_pl; - u16 vid; + struct net_device *br_dev = netdev_master_upper_dev_get(brport_dev); + struct mlxsw_sp_bridge_device *bridge_device; + + if (!br_dev) + return NULL; + + bridge_device = mlxsw_sp_bridge_device_find(bridge, br_dev); + if (!bridge_device) + return NULL; + + return __mlxsw_sp_bridge_port_find(bridge_device, brport_dev); +} + +static struct mlxsw_sp_bridge_port * +mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device, + struct net_device *brport_dev) +{ + struct mlxsw_sp_bridge_port *bridge_port; + struct mlxsw_sp_port *mlxsw_sp_port; + + bridge_port = kzalloc(sizeof(*bridge_port), GFP_KERNEL); + if (!bridge_port) + return NULL; + + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(brport_dev); + bridge_port->lagged = mlxsw_sp_port->lagged; + if (bridge_port->lagged) + bridge_port->lag_id = mlxsw_sp_port->lag_id; + else + bridge_port->system_port = mlxsw_sp_port->local_port; + bridge_port->dev = brport_dev; + bridge_port->bridge_device = bridge_device; + bridge_port->stp_state = BR_STATE_DISABLED; + bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC; + INIT_LIST_HEAD(&bridge_port->vlans_list); + list_add(&bridge_port->list, &bridge_device->ports_list); + bridge_port->ref_count = 1; + + return bridge_port; +} + +static void +mlxsw_sp_bridge_port_destroy(struct mlxsw_sp_bridge_port *bridge_port) +{ + list_del(&bridge_port->list); + WARN_ON(!list_empty(&bridge_port->vlans_list)); + kfree(bridge_port); +} + +static bool +mlxsw_sp_bridge_port_should_destroy(const struct mlxsw_sp_bridge_port * + bridge_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_port->dev); + + /* In case ports were pulled from out of a bridged LAG, then + * it's possible the reference count isn't zero, yet the bridge + * port should be destroyed, as it's no longer an upper of ours. + */ + if (!mlxsw_sp && list_empty(&bridge_port->vlans_list)) + return true; + else if (bridge_port->ref_count == 0) + return true; + else + return false; +} + +static struct mlxsw_sp_bridge_port * +mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge, + struct net_device *brport_dev) +{ + struct net_device *br_dev = netdev_master_upper_dev_get(brport_dev); + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; int err; - switch (state) { - case BR_STATE_FORWARDING: - spms_state = MLXSW_REG_SPMS_STATE_FORWARDING; - break; - case BR_STATE_LEARNING: - spms_state = MLXSW_REG_SPMS_STATE_LEARNING; - break; - case BR_STATE_LISTENING: /* fall-through */ - case BR_STATE_DISABLED: /* fall-through */ - case BR_STATE_BLOCKING: - spms_state = MLXSW_REG_SPMS_STATE_DISCARDING; - break; - default: - BUG(); + bridge_port = mlxsw_sp_bridge_port_find(bridge, brport_dev); + if (bridge_port) { + bridge_port->ref_count++; + return bridge_port; } - spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); - if (!spms_pl) - return -ENOMEM; - mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); + bridge_device = mlxsw_sp_bridge_device_get(bridge, br_dev); + if (IS_ERR(bridge_device)) + return ERR_CAST(bridge_device); - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); - mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); - } else { - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) - mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); + bridge_port = mlxsw_sp_bridge_port_create(bridge_device, brport_dev); + if (!bridge_port) { + err = -ENOMEM; + goto err_bridge_port_create; } - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); - kfree(spms_pl); - return err; + return bridge_port; + +err_bridge_port_create: + mlxsw_sp_bridge_device_put(bridge, bridge_device); + return ERR_PTR(err); } -static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, - u8 state) +static void mlxsw_sp_bridge_port_put(struct mlxsw_sp_bridge *bridge, + struct mlxsw_sp_bridge_port *bridge_port) { - if (switchdev_trans_ph_prepare(trans)) - return 0; + struct mlxsw_sp_bridge_device *bridge_device; - mlxsw_sp_port->stp_state = state; - return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, state); + bridge_port->ref_count--; + if (!mlxsw_sp_bridge_port_should_destroy(bridge_port)) + return; + bridge_device = bridge_port->bridge_device; + mlxsw_sp_bridge_port_destroy(bridge_port); + mlxsw_sp_bridge_device_put(bridge, bridge_device); } -static int __mlxsw_sp_port_flood_table_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 idx_begin, u16 idx_end, - enum mlxsw_sp_flood_table table, - bool set) +static struct mlxsw_sp_port_vlan * +mlxsw_sp_port_vlan_find_by_bridge(struct mlxsw_sp_port *mlxsw_sp_port, + const struct mlxsw_sp_bridge_device * + bridge_device, + u16 vid) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u16 local_port = mlxsw_sp_port->local_port; - enum mlxsw_flood_table_type table_type; - u16 range = idx_end - idx_begin + 1; - char *sftr_pl; - int err; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) - table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID; - else - table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; + list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list, + list) { + if (!mlxsw_sp_port_vlan->bridge_port) + continue; + if (mlxsw_sp_port_vlan->bridge_port->bridge_device != + bridge_device) + continue; + if (bridge_device->vlan_enabled && + mlxsw_sp_port_vlan->vid != vid) + continue; + return mlxsw_sp_port_vlan; + } - sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); - if (!sftr_pl) - return -ENOMEM; + return NULL; +} - mlxsw_reg_sftr_pack(sftr_pl, table, idx_begin, - table_type, range, local_port, set); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); +static struct mlxsw_sp_port_vlan* +mlxsw_sp_port_vlan_find_by_fid(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid_index) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - kfree(sftr_pl); - return err; + list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list, + list) { + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + + if (fid && mlxsw_sp_fid_index(fid) == fid_index) + return mlxsw_sp_port_vlan; + } + + return NULL; } -static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 idx_begin, u16 idx_end, bool uc_set, - bool bc_set, bool mc_set) +static struct mlxsw_sp_bridge_vlan * +mlxsw_sp_bridge_vlan_find(const struct mlxsw_sp_bridge_port *bridge_port, + u16 vid) { - int err; + struct mlxsw_sp_bridge_vlan *bridge_vlan; - err = __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, idx_begin, idx_end, - MLXSW_SP_FLOOD_TABLE_UC, uc_set); - if (err) - return err; + list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) { + if (bridge_vlan->vid == vid) + return bridge_vlan; + } - err = __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, idx_begin, idx_end, - MLXSW_SP_FLOOD_TABLE_BC, bc_set); - if (err) - goto err_flood_bm_set; + return NULL; +} - err = __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, idx_begin, idx_end, - MLXSW_SP_FLOOD_TABLE_MC, mc_set); - if (err) - goto err_flood_mc_set; - return 0; +static struct mlxsw_sp_bridge_vlan * +mlxsw_sp_bridge_vlan_create(struct mlxsw_sp_bridge_port *bridge_port, u16 vid) +{ + struct mlxsw_sp_bridge_vlan *bridge_vlan; -err_flood_mc_set: - __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, idx_begin, idx_end, - MLXSW_SP_FLOOD_TABLE_BC, !bc_set); -err_flood_bm_set: - __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, idx_begin, idx_end, - MLXSW_SP_FLOOD_TABLE_UC, !uc_set); - return err; + bridge_vlan = kzalloc(sizeof(*bridge_vlan), GFP_KERNEL); + if (!bridge_vlan) + return NULL; + + INIT_LIST_HEAD(&bridge_vlan->port_vlan_list); + bridge_vlan->vid = vid; + list_add(&bridge_vlan->list, &bridge_port->vlans_list); + + return bridge_vlan; } -static int mlxsw_sp_port_flood_table_set(struct mlxsw_sp_port *mlxsw_sp_port, - enum mlxsw_sp_flood_table table, - bool set) +static void +mlxsw_sp_bridge_vlan_destroy(struct mlxsw_sp_bridge_vlan *bridge_vlan) { - struct net_device *dev = mlxsw_sp_port->dev; - u16 vid, last_visited_vid; - int err; + list_del(&bridge_vlan->list); + WARN_ON(!list_empty(&bridge_vlan->port_vlan_list)); + kfree(bridge_vlan); +} - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - u16 fid = mlxsw_sp_vport_fid_get(mlxsw_sp_port)->fid; - u16 vfid = mlxsw_sp_fid_to_vfid(fid); +static struct mlxsw_sp_bridge_vlan * +mlxsw_sp_bridge_vlan_get(struct mlxsw_sp_bridge_port *bridge_port, u16 vid) +{ + struct mlxsw_sp_bridge_vlan *bridge_vlan; - return __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, vfid, - vfid, table, set); - } + bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid); + if (bridge_vlan) + return bridge_vlan; - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { - err = __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, vid, vid, - table, set); - if (err) { - last_visited_vid = vid; - goto err_port_flood_set; - } + return mlxsw_sp_bridge_vlan_create(bridge_port, vid); +} + +static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan) +{ + if (list_empty(&bridge_vlan->port_vlan_list)) + mlxsw_sp_bridge_vlan_destroy(bridge_vlan); +} + +static void mlxsw_sp_port_bridge_flags_get(struct mlxsw_sp_bridge *bridge, + struct net_device *dev, + unsigned long *brport_flags) +{ + struct mlxsw_sp_bridge_port *bridge_port; + + bridge_port = mlxsw_sp_bridge_port_find(bridge, dev); + if (WARN_ON(!bridge_port)) + return; + + memcpy(brport_flags, &bridge_port->flags, sizeof(*brport_flags)); +} + +static int mlxsw_sp_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac); + memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac, + attr->u.ppid.id_len); + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + mlxsw_sp_port_bridge_flags_get(mlxsw_sp->bridge, attr->orig_dev, + &attr->u.brport_flags); + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: + attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD; + break; + default: + return -EOPNOTSUPP; } return 0; +} -err_port_flood_set: - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, last_visited_vid) - __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, vid, vid, table, - !set); - netdev_err(dev, "Failed to configure unicast flooding\n"); - return err; +static int +mlxsw_sp_port_bridge_vlan_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_vlan *bridge_vlan, + u8 state) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list, + bridge_vlan_node) { + if (mlxsw_sp_port_vlan->mlxsw_sp_port != mlxsw_sp_port) + continue; + return mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, + bridge_vlan->vid, state); + } + + return 0; } -static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, - bool mc_disabled) +static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + struct net_device *orig_dev, + u8 state) { - int set; - int err = 0; + struct mlxsw_sp_bridge_port *bridge_port; + struct mlxsw_sp_bridge_vlan *bridge_vlan; + int err; if (switchdev_trans_ph_prepare(trans)) return 0; - if (mlxsw_sp_port->mc_router != mlxsw_sp_port->mc_flood) { - set = mc_disabled ? - mlxsw_sp_port->mc_flood : mlxsw_sp_port->mc_router; - err = mlxsw_sp_port_flood_table_set(mlxsw_sp_port, - MLXSW_SP_FLOOD_TABLE_MC, - set); + /* It's possible we failed to enslave the port, yet this + * operation is executed due to it being deferred. + */ + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge, + orig_dev); + if (!bridge_port) + return 0; + + list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) { + err = mlxsw_sp_port_bridge_vlan_stp_set(mlxsw_sp_port, + bridge_vlan, state); + if (err) + goto err_port_bridge_vlan_stp_set; } - if (!err) - mlxsw_sp_port->mc_disabled = mc_disabled; + bridge_port->stp_state = state; + return 0; + +err_port_bridge_vlan_stp_set: + list_for_each_entry_continue_reverse(bridge_vlan, + &bridge_port->vlans_list, list) + mlxsw_sp_port_bridge_vlan_stp_set(mlxsw_sp_port, bridge_vlan, + bridge_port->stp_state); return err; } -int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, - bool set) +static int +mlxsw_sp_port_bridge_vlan_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_vlan *bridge_vlan, + enum mlxsw_sp_flood_type packet_type, + bool member) { - bool mc_set = set; - u16 vfid; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - /* In case of vFIDs, index into the flooding table is relative to - * the start of the vFIDs range. - */ - vfid = mlxsw_sp_fid_to_vfid(fid); - - if (set) - mc_set = mlxsw_sp_vport->mc_disabled ? - mlxsw_sp_vport->mc_flood : mlxsw_sp_vport->mc_router; + list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list, + bridge_vlan_node) { + if (mlxsw_sp_port_vlan->mlxsw_sp_port != mlxsw_sp_port) + continue; + return mlxsw_sp_fid_flood_set(mlxsw_sp_port_vlan->fid, + packet_type, + mlxsw_sp_port->local_port, + member); + } - return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, set, - mc_set); + return 0; } -static int mlxsw_sp_port_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, - bool set) +static int +mlxsw_sp_bridge_port_flood_table_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + enum mlxsw_sp_flood_type packet_type, + bool member) { - u16 vid; + struct mlxsw_sp_bridge_vlan *bridge_vlan; int err; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) { + err = mlxsw_sp_port_bridge_vlan_flood_set(mlxsw_sp_port, + bridge_vlan, + packet_type, + member); + if (err) + goto err_port_bridge_vlan_flood_set; + } - return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, - set); + return 0; + +err_port_bridge_vlan_flood_set: + list_for_each_entry_continue_reverse(bridge_vlan, + &bridge_port->vlans_list, list) + mlxsw_sp_port_bridge_vlan_flood_set(mlxsw_sp_port, bridge_vlan, + packet_type, !member); + return err; +} + +static int +mlxsw_sp_port_bridge_vlan_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_vlan *bridge_vlan, + bool set) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + u16 vid = bridge_vlan->vid; + + list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list, + bridge_vlan_node) { + if (mlxsw_sp_port_vlan->mlxsw_sp_port != mlxsw_sp_port) + continue; + return mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, set); } - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { - err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, - set); + return 0; +} + +static int +mlxsw_sp_bridge_port_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + bool set) +{ + struct mlxsw_sp_bridge_vlan *bridge_vlan; + int err; + + list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) { + err = mlxsw_sp_port_bridge_vlan_learning_set(mlxsw_sp_port, + bridge_vlan, set); if (err) - goto err_port_vid_learning_set; + goto err_port_bridge_vlan_learning_set; } return 0; -err_port_vid_learning_set: - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) - __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, !set); +err_port_bridge_vlan_learning_set: + list_for_each_entry_continue_reverse(bridge_vlan, + &bridge_port->vlans_list, list) + mlxsw_sp_port_bridge_vlan_learning_set(mlxsw_sp_port, + bridge_vlan, !set); return err; } static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans, + struct net_device *orig_dev, unsigned long brport_flags) { - unsigned long learning = mlxsw_sp_port->learning ? BR_LEARNING : 0; - unsigned long uc_flood = mlxsw_sp_port->uc_flood ? BR_FLOOD : 0; + struct mlxsw_sp_bridge_port *bridge_port; int err; - if (!mlxsw_sp_port->bridged) - return -EINVAL; - if (switchdev_trans_ph_prepare(trans)) return 0; - if ((uc_flood ^ brport_flags) & BR_FLOOD) { - err = mlxsw_sp_port_flood_table_set(mlxsw_sp_port, - MLXSW_SP_FLOOD_TABLE_UC, - !mlxsw_sp_port->uc_flood); - if (err) - return err; - } + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge, + orig_dev); + if (!bridge_port) + return 0; - if ((learning ^ brport_flags) & BR_LEARNING) { - err = mlxsw_sp_port_learning_set(mlxsw_sp_port, - !mlxsw_sp_port->learning); - if (err) - goto err_port_learning_set; - } + err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, + MLXSW_SP_FLOOD_TYPE_UC, + brport_flags & BR_FLOOD); + if (err) + return err; - mlxsw_sp_port->uc_flood = brport_flags & BR_FLOOD ? 1 : 0; - mlxsw_sp_port->learning = brport_flags & BR_LEARNING ? 1 : 0; - mlxsw_sp_port->learning_sync = brport_flags & BR_LEARNING_SYNC ? 1 : 0; + err = mlxsw_sp_bridge_port_learning_set(mlxsw_sp_port, bridge_port, + brport_flags & BR_LEARNING); + if (err) + return err; - return 0; + memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags)); -err_port_learning_set: - if ((uc_flood ^ brport_flags) & BR_FLOOD) - mlxsw_sp_port_flood_table_set(mlxsw_sp_port, - MLXSW_SP_FLOOD_TABLE_UC, - mlxsw_sp_port->uc_flood); - return err; + return 0; } static int mlxsw_sp_ageing_set(struct mlxsw_sp *mlxsw_sp, u32 ageing_time) @@ -397,7 +654,7 @@ static int mlxsw_sp_ageing_set(struct mlxsw_sp *mlxsw_sp, u32 ageing_time) err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdat), sfdat_pl); if (err) return err; - mlxsw_sp->ageing_time = ageing_time; + mlxsw_sp->bridge->ageing_time = ageing_time; return 0; } @@ -426,28 +683,84 @@ static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, bool vlan_enabled) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + + if (!switchdev_trans_ph_prepare(trans)) + return 0; - /* SWITCHDEV_TRANS_PREPARE phase */ - if ((!vlan_enabled) && (mlxsw_sp->master_bridge.dev == orig_dev)) { - netdev_err(mlxsw_sp_port->dev, "Bridge must be vlan-aware\n"); + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev); + if (WARN_ON(!bridge_device)) return -EINVAL; - } - return 0; + if (bridge_device->vlan_enabled == vlan_enabled) + return 0; + + netdev_err(bridge_device->dev, "VLAN filtering can't be changed for existing bridge\n"); + return -EINVAL; } static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans, + struct net_device *orig_dev, bool is_port_mc_router) { + struct mlxsw_sp_bridge_port *bridge_port; + int err; + + if (switchdev_trans_ph_prepare(trans)) + return 0; + + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge, + orig_dev); + if (!bridge_port) + return 0; + + if (!bridge_port->bridge_device->multicast_enabled) + goto out; + + err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, + MLXSW_SP_FLOOD_TYPE_MC, + is_port_mc_router); + if (err) + return err; + +out: + bridge_port->mrouter = is_port_mc_router; + return 0; +} + +static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + struct net_device *orig_dev, + bool mc_disabled) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + int err; + if (switchdev_trans_ph_prepare(trans)) return 0; - mlxsw_sp_port->mc_router = is_port_mc_router; - if (!mlxsw_sp_port->mc_disabled) - return mlxsw_sp_port_flood_table_set(mlxsw_sp_port, - MLXSW_SP_FLOOD_TABLE_MC, - is_port_mc_router); + /* It's possible we failed to enslave the port, yet this + * operation is executed due to it being deferred. + */ + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_device) + return 0; + + list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { + enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC; + bool member = mc_disabled ? true : bridge_port->mrouter; + + err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, + bridge_port, + packet_type, member); + if (err) + return err; + } + + bridge_device->multicast_enabled = !mc_disabled; return 0; } @@ -457,19 +770,17 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, struct switchdev_trans *trans) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - int err = 0; - - mlxsw_sp_port = mlxsw_sp_port_orig_get(attr->orig_dev, mlxsw_sp_port); - if (!mlxsw_sp_port) - return -EINVAL; + int err; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans, + attr->orig_dev, attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans, + attr->orig_dev, attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: @@ -483,10 +794,12 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, break; case SWITCHDEV_ATTR_ID_PORT_MROUTER: err = mlxsw_sp_port_attr_mc_router_set(mlxsw_sp_port, trans, + attr->orig_dev, attr->u.mrouter); break; case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: err = mlxsw_sp_port_mc_disabled_set(mlxsw_sp_port, trans, + attr->orig_dev, attr->u.mc_disabled); break; default: @@ -497,371 +810,268 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, return err; } -static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create) -{ - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - - mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, fid); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); -} - -static int mlxsw_sp_fid_map(struct mlxsw_sp *mlxsw_sp, u16 fid, bool valid) -{ - enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VID_TO_FID; - char svfa_pl[MLXSW_REG_SVFA_LEN]; - - mlxsw_reg_svfa_pack(svfa_pl, 0, mt, valid, fid, fid); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); -} - -static struct mlxsw_sp_fid *mlxsw_sp_fid_alloc(u16 fid) +static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port) { - struct mlxsw_sp_fid *f; - - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) - return NULL; + const struct mlxsw_sp_bridge_device *bridge_device; - f->fid = fid; - - return f; + bridge_device = bridge_port->bridge_device; + return !bridge_device->multicast_enabled ? true : bridge_port->mrouter; } -struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid) +static int +mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct mlxsw_sp_bridge_port *bridge_port) { - struct mlxsw_sp_fid *f; + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp_bridge_device *bridge_device; + u8 local_port = mlxsw_sp_port->local_port; + u16 vid = mlxsw_sp_port_vlan->vid; + struct mlxsw_sp_fid *fid; int err; - err = mlxsw_sp_fid_op(mlxsw_sp, fid, true); - if (err) - return ERR_PTR(err); + bridge_device = bridge_port->bridge_device; + fid = bridge_device->ops->fid_get(bridge_device, vid); + if (IS_ERR(fid)) + return PTR_ERR(fid); - /* Although all the ports member in the FID might be using a - * {Port, VID} to FID mapping, we create a global VID-to-FID - * mapping. This allows a port to transition to VLAN mode, - * knowing the global mapping exists. - */ - err = mlxsw_sp_fid_map(mlxsw_sp, fid, true); + err = mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_UC, local_port, + bridge_port->flags & BR_FLOOD); if (err) - goto err_fid_map; - - f = mlxsw_sp_fid_alloc(fid); - if (!f) { - err = -ENOMEM; - goto err_allocate_fid; - } - - list_add(&f->list, &mlxsw_sp->fids); - - return f; - -err_allocate_fid: - mlxsw_sp_fid_map(mlxsw_sp, fid, false); -err_fid_map: - mlxsw_sp_fid_op(mlxsw_sp, fid, false); - return ERR_PTR(err); -} + goto err_fid_uc_flood_set; -void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f) -{ - u16 fid = f->fid; + err = mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_MC, local_port, + mlxsw_sp_mc_flood(bridge_port)); + if (err) + goto err_fid_mc_flood_set; - list_del(&f->list); + err = mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_BC, local_port, + true); + if (err) + goto err_fid_bc_flood_set; - if (f->rif) - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); + err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid); + if (err) + goto err_fid_port_vid_map; - kfree(f); + mlxsw_sp_port_vlan->fid = fid; - mlxsw_sp_fid_map(mlxsw_sp, fid, false); + return 0; - mlxsw_sp_fid_op(mlxsw_sp, fid, false); +err_fid_port_vid_map: + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_BC, local_port, false); +err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_MC, local_port, false); +err_fid_mc_flood_set: + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_UC, local_port, false); +err_fid_uc_flood_set: + mlxsw_sp_fid_put(fid); + return err; } -static int __mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port, - u16 fid) +static void +mlxsw_sp_port_vlan_fid_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { - struct mlxsw_sp_fid *f; - - if (test_bit(fid, mlxsw_sp_port->active_vlans)) - return 0; + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + u8 local_port = mlxsw_sp_port->local_port; + u16 vid = mlxsw_sp_port_vlan->vid; - f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid); - if (!f) { - f = mlxsw_sp_fid_create(mlxsw_sp_port->mlxsw_sp, fid); - if (IS_ERR(f)) - return PTR_ERR(f); - } - - f->ref_count++; - - netdev_dbg(mlxsw_sp_port->dev, "Joined FID=%d\n", fid); - - return 0; + mlxsw_sp_port_vlan->fid = NULL; + mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid); + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_BC, local_port, false); + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_MC, local_port, false); + mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_UC, local_port, false); + mlxsw_sp_fid_put(fid); } -static void __mlxsw_sp_port_fid_leave(struct mlxsw_sp_port *mlxsw_sp_port, - u16 fid) +static u16 +mlxsw_sp_port_pvid_determine(const struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid, bool is_pvid) { - struct mlxsw_sp_fid *f; - - f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid); - if (WARN_ON(!f)) - return; - - netdev_dbg(mlxsw_sp_port->dev, "Left FID=%d\n", fid); - - mlxsw_sp_port_fdb_flush(mlxsw_sp_port, fid); - - if (--f->ref_count == 0) - mlxsw_sp_fid_destroy(mlxsw_sp_port->mlxsw_sp, f); + if (is_pvid) + return vid; + else if (mlxsw_sp_port->pvid == vid) + return 0; /* Dis-allow untagged packets */ + else + return mlxsw_sp_port->pvid; } -static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid, - bool valid) +static int +mlxsw_sp_port_vlan_bridge_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct mlxsw_sp_bridge_port *bridge_port) { - enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp_bridge_vlan *bridge_vlan; + u16 vid = mlxsw_sp_port_vlan->vid; + int err; - /* If port doesn't have vPorts, then it can use the global - * VID-to-FID mapping. - */ - if (list_empty(&mlxsw_sp_port->vports_list)) + /* No need to continue if only VLAN flags were changed */ + if (mlxsw_sp_port_vlan->bridge_port) return 0; - return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, valid, fid, fid); -} - -static int mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port, - u16 fid_begin, u16 fid_end) -{ - bool mc_flood; - int fid, err; - - for (fid = fid_begin; fid <= fid_end; fid++) { - err = __mlxsw_sp_port_fid_join(mlxsw_sp_port, fid); - if (err) - goto err_port_fid_join; - } + err = mlxsw_sp_port_vlan_fid_join(mlxsw_sp_port_vlan, bridge_port); + if (err) + return err; - mc_flood = mlxsw_sp_port->mc_disabled ? - mlxsw_sp_port->mc_flood : mlxsw_sp_port->mc_router; + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, + bridge_port->flags & BR_LEARNING); + if (err) + goto err_port_vid_learning_set; - err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, - mlxsw_sp_port->uc_flood, true, - mc_flood); + err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, + bridge_port->stp_state); if (err) - goto err_port_flood_set; + goto err_port_vid_stp_set; - for (fid = fid_begin; fid <= fid_end; fid++) { - err = mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, true); - if (err) - goto err_port_fid_map; + bridge_vlan = mlxsw_sp_bridge_vlan_get(bridge_port, vid); + if (!bridge_vlan) { + err = -ENOMEM; + goto err_bridge_vlan_get; } + list_add(&mlxsw_sp_port_vlan->bridge_vlan_node, + &bridge_vlan->port_vlan_list); + + mlxsw_sp_bridge_port_get(mlxsw_sp_port->mlxsw_sp->bridge, + bridge_port->dev); + mlxsw_sp_port_vlan->bridge_port = bridge_port; + return 0; -err_port_fid_map: - for (fid--; fid >= fid_begin; fid--) - mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, false); - __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, false, - false, false); -err_port_flood_set: - fid = fid_end; -err_port_fid_join: - for (fid--; fid >= fid_begin; fid--) - __mlxsw_sp_port_fid_leave(mlxsw_sp_port, fid); +err_bridge_vlan_get: + mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_DISABLED); +err_port_vid_stp_set: + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); +err_port_vid_learning_set: + mlxsw_sp_port_vlan_fid_leave(mlxsw_sp_port_vlan); return err; } -static void mlxsw_sp_port_fid_leave(struct mlxsw_sp_port *mlxsw_sp_port, - u16 fid_begin, u16 fid_end) +void +mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { - int fid; + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; + struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + struct mlxsw_sp_bridge_vlan *bridge_vlan; + struct mlxsw_sp_bridge_port *bridge_port; + u16 vid = mlxsw_sp_port_vlan->vid; + bool last; - for (fid = fid_begin; fid <= fid_end; fid++) - mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, false); - - __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, false, - false, false); + if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021Q && + mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021D)) + return; - for (fid = fid_begin; fid <= fid_end; fid++) - __mlxsw_sp_port_fid_leave(mlxsw_sp_port, fid); -} + bridge_port = mlxsw_sp_port_vlan->bridge_port; + bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid); + last = list_is_singular(&bridge_vlan->port_vlan_list); -static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char spvid_pl[MLXSW_REG_SPVID_LEN]; + list_del(&mlxsw_sp_port_vlan->bridge_vlan_node); + mlxsw_sp_bridge_vlan_put(bridge_vlan); + mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_DISABLED); + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); + if (last) + mlxsw_sp_bridge_port_fdb_flush(mlxsw_sp_port->mlxsw_sp, + bridge_port, + mlxsw_sp_fid_index(fid)); + mlxsw_sp_port_vlan_fid_leave(mlxsw_sp_port_vlan); - mlxsw_reg_spvid_pack(spvid_pl, mlxsw_sp_port->local_port, vid); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl); + mlxsw_sp_bridge_port_put(mlxsw_sp_port->mlxsw_sp->bridge, bridge_port); + mlxsw_sp_port_vlan->bridge_port = NULL; } -static int mlxsw_sp_port_allow_untagged_set(struct mlxsw_sp_port *mlxsw_sp_port, - bool allow) +static int +mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + u16 vid, bool is_untagged, bool is_pvid) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char spaft_pl[MLXSW_REG_SPAFT_LEN]; + u16 pvid = mlxsw_sp_port_pvid_determine(mlxsw_sp_port, vid, is_pvid); + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + u16 old_pvid = mlxsw_sp_port->pvid; + int err; - mlxsw_reg_spaft_pack(spaft_pl, mlxsw_sp_port->local_port, allow); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spaft), spaft_pl); -} + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, vid); + if (IS_ERR(mlxsw_sp_port_vlan)) + return PTR_ERR(mlxsw_sp_port_vlan); -int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) -{ - struct net_device *dev = mlxsw_sp_port->dev; - int err; + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, + is_untagged); + if (err) + goto err_port_vlan_set; - if (!vid) { - err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, false); - if (err) { - netdev_err(dev, "Failed to disallow untagged traffic\n"); - return err; - } - } else { - err = __mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid); - if (err) { - netdev_err(dev, "Failed to set PVID\n"); - return err; - } + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, pvid); + if (err) + goto err_port_pvid_set; - /* Only allow if not already allowed. */ - if (!mlxsw_sp_port->pvid) { - err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, - true); - if (err) { - netdev_err(dev, "Failed to allow untagged traffic\n"); - goto err_port_allow_untagged_set; - } - } - } + err = mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port); + if (err) + goto err_port_vlan_bridge_join; - mlxsw_sp_port->pvid = vid; return 0; -err_port_allow_untagged_set: - __mlxsw_sp_port_pvid_set(mlxsw_sp_port, mlxsw_sp_port->pvid); +err_port_vlan_bridge_join: + mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid); +err_port_pvid_set: + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); +err_port_vlan_set: + mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); return err; } -static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid_begin, u16 vid_end, - bool learn_enable) +static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans) { - u16 vid, vid_e; - int err; - - for (vid = vid_begin; vid <= vid_end; - vid += MLXSW_REG_SPVMLR_REC_MAX_COUNT) { - vid_e = min((u16) (vid + MLXSW_REG_SPVMLR_REC_MAX_COUNT - 1), - vid_end); - - err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, - vid_e, learn_enable); - if (err) - return err; - } - - return 0; -} + bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *orig_dev = vlan->obj.orig_dev; + struct mlxsw_sp_bridge_port *bridge_port; + u16 vid; -static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid_begin, u16 vid_end, - bool flag_untagged, bool flag_pvid) -{ - struct net_device *dev = mlxsw_sp_port->dev; - u16 vid, old_pvid; - int err; + if (switchdev_trans_ph_prepare(trans)) + return 0; - if (!mlxsw_sp_port->bridged) + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); + if (WARN_ON(!bridge_port)) return -EINVAL; - err = mlxsw_sp_port_fid_join(mlxsw_sp_port, vid_begin, vid_end); - if (err) { - netdev_err(dev, "Failed to join FIDs\n"); - return err; - } - - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid_begin, vid_end, - true, flag_untagged); - if (err) { - netdev_err(dev, "Unable to add VIDs %d-%d\n", vid_begin, - vid_end); - goto err_port_vlans_set; - } - - old_pvid = mlxsw_sp_port->pvid; - if (flag_pvid && old_pvid != vid_begin) { - err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid_begin); - if (err) { - netdev_err(dev, "Unable to add PVID %d\n", vid_begin); - goto err_port_pvid_set; - } - } else if (!flag_pvid && old_pvid >= vid_begin && old_pvid <= vid_end) { - err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); - if (err) { - netdev_err(dev, "Unable to del PVID\n"); - goto err_port_pvid_set; - } - } - - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end, - mlxsw_sp_port->learning); - if (err) { - netdev_err(dev, "Failed to set learning for VIDs %d-%d\n", - vid_begin, vid_end); - goto err_port_vid_learning_set; - } + if (!bridge_port->bridge_device->vlan_enabled) + return 0; - /* Changing activity bits only if HW operation succeded */ - for (vid = vid_begin; vid <= vid_end; vid++) { - set_bit(vid, mlxsw_sp_port->active_vlans); - if (flag_untagged) - set_bit(vid, mlxsw_sp_port->untagged_vlans); - else - clear_bit(vid, mlxsw_sp_port->untagged_vlans); - } + for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { + int err; - /* STP state change must be done after we set active VLANs */ - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, - mlxsw_sp_port->stp_state); - if (err) { - netdev_err(dev, "Failed to set STP state\n"); - goto err_port_stp_state_set; + err = mlxsw_sp_bridge_port_vlan_add(mlxsw_sp_port, bridge_port, + vid, flag_untagged, + flag_pvid); + if (err) + return err; } return 0; +} -err_port_stp_state_set: - for (vid = vid_begin; vid <= vid_end; vid++) - clear_bit(vid, mlxsw_sp_port->active_vlans); - mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end, - false); -err_port_vid_learning_set: - if (old_pvid != mlxsw_sp_port->pvid) - mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid); -err_port_pvid_set: - mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid_begin, vid_end, - false, false); -err_port_vlans_set: - mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); - return err; +static enum mlxsw_reg_sfdf_flush_type mlxsw_sp_fdb_flush_type(bool lagged) +{ + return lagged ? MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID : + MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID; } -static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) +static int +mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_port *bridge_port, + u16 fid_index) { - bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + bool lagged = bridge_port->lagged; + char sfdf_pl[MLXSW_REG_SFDF_LEN]; + u16 system_port; - if (switchdev_trans_ph_prepare(trans)) - return 0; + system_port = lagged ? bridge_port->lag_id : bridge_port->system_port; + mlxsw_reg_sfdf_pack(sfdf_pl, mlxsw_sp_fdb_flush_type(lagged)); + mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index); + mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl, system_port); - return __mlxsw_sp_port_vlans_add(mlxsw_sp_port, - vlan->vid_begin, vlan->vid_end, - flag_untagged, flag_pvid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); } static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic) @@ -935,29 +1145,40 @@ static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, } static int -mlxsw_sp_port_fdb_static_add(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans) +mlxsw_sp_port_fdb_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_notifier_fdb_info *fdb_info, bool adding) { - u16 fid = mlxsw_sp_port_vid_to_fid_get(mlxsw_sp_port, fdb->vid); - u16 lag_vid = 0; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *orig_dev = fdb_info->info.dev; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + u16 fid_index, vid; + + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_port) + return -EINVAL; - if (switchdev_trans_ph_prepare(trans)) + bridge_device = bridge_port->bridge_device; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port, + bridge_device, + fdb_info->vid); + if (!mlxsw_sp_port_vlan) return 0; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); - } + fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); + vid = mlxsw_sp_port_vlan->vid; - if (!mlxsw_sp_port->lagged) - return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, - fdb->addr, fid, true, false); + if (!bridge_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp, + bridge_port->system_port, + fdb_info->addr, fid_index, + adding, false); else - return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->lag_id, - fdb->addr, fid, lag_vid, - true, false); + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, + bridge_port->lag_id, + fdb_info->addr, fid_index, + vid, adding, false); } static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, @@ -1006,7 +1227,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_mid *mid; - list_for_each_entry(mid, &mlxsw_sp->br_mids.list, list) { + list_for_each_entry(mid, &mlxsw_sp->bridge->mids_list, list) { if (ether_addr_equal(mid->addr, addr) && mid->fid == fid) return mid; } @@ -1020,7 +1241,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mid *mid; u16 mid_idx; - mid_idx = find_first_zero_bit(mlxsw_sp->br_mids.mapped, + mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap, MLXSW_SP_MID_MAX); if (mid_idx == MLXSW_SP_MID_MAX) return NULL; @@ -1029,12 +1250,12 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, if (!mid) return NULL; - set_bit(mid_idx, mlxsw_sp->br_mids.mapped); + set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap); ether_addr_copy(mid->addr, addr); mid->fid = fid; mid->mid = mid_idx; mid->ref_count = 0; - list_add_tail(&mid->list, &mlxsw_sp->br_mids.list); + list_add_tail(&mid->list, &mlxsw_sp->bridge->mids_list); return mid; } @@ -1044,7 +1265,7 @@ static int __mlxsw_sp_mc_dec_ref(struct mlxsw_sp *mlxsw_sp, { if (--mid->ref_count == 0) { list_del(&mid->list); - clear_bit(mid->mid, mlxsw_sp->br_mids.mapped); + clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); kfree(mid); return 1; } @@ -1056,17 +1277,34 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *orig_dev = mdb->obj.orig_dev; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; struct net_device *dev = mlxsw_sp_port->dev; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_mid *mid; - u16 fid = mlxsw_sp_port_vid_to_fid_get(mlxsw_sp_port, mdb->vid); + u16 fid_index; int err = 0; if (switchdev_trans_ph_prepare(trans)) return 0; - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid); + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_port) + return 0; + + bridge_device = bridge_port->bridge_device; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port, + bridge_device, + mdb->vid); + if (!mlxsw_sp_port_vlan) + return 0; + + fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); + + mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index); if (!mid) { - mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid); + mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid_index); if (!mid) { netdev_err(dev, "Unable to allocate MC group\n"); return -ENOMEM; @@ -1082,8 +1320,8 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, } if (mid->ref_count == 1) { - err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid, mid->mid, - true); + err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, + mid->mid, true); if (err) { netdev_err(dev, "Unable to set MC SFD\n"); goto err_out; @@ -1104,24 +1342,12 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err = 0; - mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port); - if (!mlxsw_sp_port) - return -EINVAL; - switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) - return 0; - err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); break; - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = mlxsw_sp_port_fdb_static_add(mlxsw_sp_port, - SWITCHDEV_OBJ_PORT_FDB(obj), - trans); - break; case SWITCHDEV_OBJ_ID_PORT_MDB: err = mlxsw_sp_port_mdb_add(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_MDB(obj), @@ -1135,82 +1361,72 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, return err; } -static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid_begin, u16 vid_end) +static void +mlxsw_sp_bridge_port_vlan_del(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, u16 vid) { - u16 vid, pvid; - - if (!mlxsw_sp_port->bridged) - return -EINVAL; - - mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end, - false); - - pvid = mlxsw_sp_port->pvid; - if (pvid >= vid_begin && pvid <= vid_end) - mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); + u16 pvid = mlxsw_sp_port->pvid == vid ? 0 : vid; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid_begin, vid_end, - false, false); - - mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); - - /* Changing activity bits only if HW operation succeded */ - for (vid = vid_begin; vid <= vid_end; vid++) - clear_bit(vid, mlxsw_sp_port->active_vlans); + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return; - return 0; + mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); + mlxsw_sp_port_pvid_set(mlxsw_sp_port, pvid); + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); + mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); } static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_vlan *vlan) { - return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vlan->vid_begin, - vlan->vid_end); -} - -void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port) -{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *orig_dev = vlan->obj.orig_dev; + struct mlxsw_sp_bridge_port *bridge_port; u16 vid; - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) - __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid); -} + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); + if (WARN_ON(!bridge_port)) + return -EINVAL; -static int -mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_port_fdb *fdb) -{ - u16 fid = mlxsw_sp_port_vid_to_fid_get(mlxsw_sp_port, fdb->vid); - u16 lag_vid = 0; + if (!bridge_port->bridge_device->vlan_enabled) + return 0; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); - } + for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) + mlxsw_sp_bridge_port_vlan_del(mlxsw_sp_port, bridge_port, vid); - if (!mlxsw_sp_port->lagged) - return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, - fdb->addr, fid, - false, false); - else - return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->lag_id, - fdb->addr, fid, lag_vid, - false, false); + return 0; } static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_mdb *mdb) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *orig_dev = mdb->obj.orig_dev; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct mlxsw_sp_bridge_device *bridge_device; struct net_device *dev = mlxsw_sp_port->dev; + struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_mid *mid; - u16 fid = mlxsw_sp_port_vid_to_fid_get(mlxsw_sp_port, mdb->vid); + u16 fid_index; u16 mid_idx; int err = 0; - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid); + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_port) + return 0; + + bridge_device = bridge_port->bridge_device; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port, + bridge_device, + mdb->vid); + if (!mlxsw_sp_port_vlan) + return 0; + + fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); + + mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index); if (!mid) { netdev_err(dev, "Unable to remove port from MC DB\n"); return -EINVAL; @@ -1222,8 +1438,8 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, mid_idx = mid->mid; if (__mlxsw_sp_mc_dec_ref(mlxsw_sp, mid)) { - err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid, mid_idx, - false); + err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, + mid_idx, false); if (err) netdev_err(dev, "Unable to remove MC SFD\n"); } @@ -1237,22 +1453,11 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err = 0; - mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port); - if (!mlxsw_sp_port) - return -EINVAL; - switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) - return 0; - err = mlxsw_sp_port_vlans_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port, - SWITCHDEV_OBJ_PORT_FDB(obj)); - break; case SWITCHDEV_OBJ_ID_PORT_MDB: err = mlxsw_sp_port_mdb_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_MDB(obj)); @@ -1282,188 +1487,200 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, return NULL; } -static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb, - struct net_device *orig_dev) +static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { + .switchdev_port_attr_get = mlxsw_sp_port_attr_get, + .switchdev_port_attr_set = mlxsw_sp_port_attr_set, + .switchdev_port_obj_add = mlxsw_sp_port_obj_add, + .switchdev_port_obj_del = mlxsw_sp_port_obj_del, +}; + +static int +mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_port *tmp; - struct mlxsw_sp_fid *f; - u16 vport_fid; - char *sfd_pl; - char mac[ETH_ALEN]; - u16 fid; - u8 local_port; - u16 lag_id; - u8 num_rec; - int stored_err = 0; - int i; - int err; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); - if (!sfd_pl) - return -ENOMEM; + if (is_vlan_dev(bridge_port->dev)) + return -EINVAL; - f = mlxsw_sp_vport_fid_get(mlxsw_sp_port); - vport_fid = f ? f->fid : 0; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return -EINVAL; - mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0); - do { - mlxsw_reg_sfd_num_rec_set(sfd_pl, MLXSW_REG_SFD_REC_MAX_COUNT); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); - if (err) - goto out; + /* Let VLAN-aware bridge take care of its own VLANs */ + mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); - num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); + return 0; +} - /* Even in case of error, we have to run the dump to the end - * so the session in firmware is finished. - */ - if (stored_err) - continue; +static void +mlxsw_sp_bridge_8021q_port_leave(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1); + /* Make sure untagged frames are allowed to ingress */ + mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); +} - for (i = 0; i < num_rec; i++) { - switch (mlxsw_reg_sfd_rec_type_get(sfd_pl, i)) { - case MLXSW_REG_SFD_REC_TYPE_UNICAST: - mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &fid, - &local_port); - if (local_port == mlxsw_sp_port->local_port) { - if (vport_fid && vport_fid == fid) - fdb->vid = 0; - else if (!vport_fid && - !mlxsw_sp_fid_is_vfid(fid)) - fdb->vid = fid; - else - continue; - ether_addr_copy(fdb->addr, mac); - fdb->ndm_state = NUD_REACHABLE; - err = cb(&fdb->obj); - if (err) - stored_err = err; - } - break; - case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG: - mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i, - mac, &fid, &lag_id); - tmp = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id); - if (tmp && tmp->local_port == - mlxsw_sp_port->local_port) { - /* LAG records can only point to LAG - * devices or VLAN devices on top. - */ - if (!netif_is_lag_master(orig_dev) && - !is_vlan_dev(orig_dev)) - continue; - if (vport_fid && vport_fid == fid) - fdb->vid = 0; - else if (!vport_fid && - !mlxsw_sp_fid_is_vfid(fid)) - fdb->vid = fid; - else - continue; - ether_addr_copy(fdb->addr, mac); - fdb->ndm_state = NUD_REACHABLE; - err = cb(&fdb->obj); - if (err) - stored_err = err; - } - break; - } - } - } while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT); +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); -out: - kfree(sfd_pl); - return stored_err ? stored_err : err; + return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); } -static int mlxsw_sp_port_vlan_dump(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb) +static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = { + .port_join = mlxsw_sp_bridge_8021q_port_join, + .port_leave = mlxsw_sp_bridge_8021q_port_leave, + .fid_get = mlxsw_sp_bridge_8021q_fid_get, +}; + +static bool +mlxsw_sp_port_is_br_member(const struct mlxsw_sp_port *mlxsw_sp_port, + const struct net_device *br_dev) { - u16 vid; - int err = 0; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - vlan->flags = 0; - vlan->vid_begin = mlxsw_sp_vport_vid_get(mlxsw_sp_port); - vlan->vid_end = mlxsw_sp_vport_vid_get(mlxsw_sp_port); - return cb(&vlan->obj); + list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list, + list) { + if (mlxsw_sp_port_vlan->bridge_port && + mlxsw_sp_port_vlan->bridge_port->bridge_device->dev == + br_dev) + return true; } - for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { - vlan->flags = 0; - if (vid == mlxsw_sp_port->pvid) - vlan->flags |= BRIDGE_VLAN_INFO_PVID; - if (test_bit(vid, mlxsw_sp_port->untagged_vlans)) - vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED; - vlan->vid_begin = vid; - vlan->vid_end = vid; - err = cb(&vlan->obj); - if (err) - break; - } - return err; + return false; } -static int mlxsw_sp_port_obj_dump(struct net_device *dev, - struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb) +static int +mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - int err = 0; + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + u16 vid; - mlxsw_sp_port = mlxsw_sp_port_orig_get(obj->orig_dev, mlxsw_sp_port); - if (!mlxsw_sp_port) + if (!is_vlan_dev(bridge_port->dev)) return -EINVAL; + vid = vlan_dev_vlan_id(bridge_port->dev); - switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = mlxsw_sp_port_vlan_dump(mlxsw_sp_port, - SWITCHDEV_OBJ_PORT_VLAN(obj), cb); - break; - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = mlxsw_sp_port_fdb_dump(mlxsw_sp_port, - SWITCHDEV_OBJ_PORT_FDB(obj), cb, - obj->orig_dev); - break; - default: - err = -EOPNOTSUPP; - break; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return -EINVAL; + + if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) { + netdev_err(mlxsw_sp_port->dev, "Can't bridge VLAN uppers of the same port\n"); + return -EINVAL; } - return err; + /* Port is no longer usable as a router interface */ + if (mlxsw_sp_port_vlan->fid) + mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + + return mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port); } -static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { - .switchdev_port_attr_get = mlxsw_sp_port_attr_get, - .switchdev_port_attr_set = mlxsw_sp_port_attr_set, - .switchdev_port_obj_add = mlxsw_sp_port_obj_add, - .switchdev_port_obj_del = mlxsw_sp_port_obj_del, - .switchdev_port_obj_dump = mlxsw_sp_port_obj_dump, +static void +mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + u16 vid = vlan_dev_vlan_id(bridge_port->dev); + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return; + + mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); +} + +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + + return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); +} + +static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = { + .port_join = mlxsw_sp_bridge_8021d_port_join, + .port_leave = mlxsw_sp_bridge_8021d_port_leave, + .fid_get = mlxsw_sp_bridge_8021d_fid_get, }; -static void mlxsw_sp_fdb_call_notifiers(bool learning_sync, bool adding, - char *mac, u16 vid, - struct net_device *dev) +int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *brport_dev, + struct net_device *br_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + int err; + + bridge_port = mlxsw_sp_bridge_port_get(mlxsw_sp->bridge, brport_dev); + if (IS_ERR(bridge_port)) + return PTR_ERR(bridge_port); + bridge_device = bridge_port->bridge_device; + + err = bridge_device->ops->port_join(bridge_device, bridge_port, + mlxsw_sp_port); + if (err) + goto err_port_join; + + return 0; + +err_port_join: + mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port); + return err; +} + +void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *brport_dev, + struct net_device *br_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + bridge_port = __mlxsw_sp_bridge_port_find(bridge_device, brport_dev); + if (!bridge_port) + return; + + bridge_device->ops->port_leave(bridge_device, bridge_port, + mlxsw_sp_port); + mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port); +} + +static void +mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type, + const char *mac, u16 vid, + struct net_device *dev) { struct switchdev_notifier_fdb_info info; - unsigned long notifier_type; - if (learning_sync) { - info.addr = mac; - info.vid = vid; - notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; - call_switchdev_notifiers(notifier_type, dev, &info.info); - } + info.addr = mac; + info.vid = vid; + call_switchdev_notifiers(type, dev, &info.info); } static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index, bool adding) { + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_port *mlxsw_sp_port; + enum switchdev_notifier_type type; char mac[ETH_ALEN]; u8 local_port; u16 vid, fid; @@ -1477,22 +1694,21 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, goto just_remove; } - if (mlxsw_sp_fid_is_vfid(fid)) { - struct mlxsw_sp_port *mlxsw_sp_vport; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid); + if (!mlxsw_sp_port_vlan) { + netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n"); + goto just_remove; + } - mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_fid(mlxsw_sp_port, - fid); - if (!mlxsw_sp_vport) { - netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n"); - goto just_remove; - } - vid = 0; - /* Override the physical port with the vPort. */ - mlxsw_sp_port = mlxsw_sp_vport; - } else { - vid = fid; + bridge_port = mlxsw_sp_port_vlan->bridge_port; + if (!bridge_port) { + netdev_err(mlxsw_sp_port->dev, "{Port, VID} not associated with a bridge\n"); + goto just_remove; } + bridge_device = bridge_port->bridge_device; + vid = bridge_device->vlan_enabled ? mlxsw_sp_port_vlan->vid : 0; + do_fdb_op: err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding, true); @@ -1503,8 +1719,9 @@ do_fdb_op: if (!do_notification) return; - mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync, - adding, mac, vid, mlxsw_sp_port->dev); + type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE; + mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev); + return; just_remove: @@ -1517,8 +1734,11 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index, bool adding) { + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_port *mlxsw_sp_port; - struct net_device *dev; + enum switchdev_notifier_type type; char mac[ETH_ALEN]; u16 lag_vid = 0; u16 lag_id; @@ -1533,26 +1753,22 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, goto just_remove; } - if (mlxsw_sp_fid_is_vfid(fid)) { - struct mlxsw_sp_port *mlxsw_sp_vport; - - mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_fid(mlxsw_sp_port, - fid); - if (!mlxsw_sp_vport) { - netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n"); - goto just_remove; - } + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid); + if (!mlxsw_sp_port_vlan) { + netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n"); + goto just_remove; + } - lag_vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - dev = mlxsw_sp_vport->dev; - vid = 0; - /* Override the physical port with the vPort. */ - mlxsw_sp_port = mlxsw_sp_vport; - } else { - dev = mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev; - vid = fid; + bridge_port = mlxsw_sp_port_vlan->bridge_port; + if (!bridge_port) { + netdev_err(mlxsw_sp_port->dev, "{Port, VID} not associated with a bridge\n"); + goto just_remove; } + bridge_device = bridge_port->bridge_device; + vid = bridge_device->vlan_enabled ? mlxsw_sp_port_vlan->vid : 0; + lag_vid = mlxsw_sp_port_vlan->vid; + do_fdb_op: err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid, adding, true); @@ -1563,8 +1779,9 @@ do_fdb_op: if (!do_notification) return; - mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning_sync, adding, mac, - vid, dev); + type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE; + mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev); + return; just_remove: @@ -1598,12 +1815,15 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp) { - mlxsw_core_schedule_dw(&mlxsw_sp->fdb_notify.dw, - msecs_to_jiffies(mlxsw_sp->fdb_notify.interval)); + struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge; + + mlxsw_core_schedule_dw(&bridge->fdb_notify.dw, + msecs_to_jiffies(bridge->fdb_notify.interval)); } static void mlxsw_sp_fdb_notify_work(struct work_struct *work) { + struct mlxsw_sp_bridge *bridge; struct mlxsw_sp *mlxsw_sp; char *sfn_pl; u8 num_rec; @@ -1614,7 +1834,8 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work) if (!sfn_pl) return; - mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work); + bridge = container_of(work, struct mlxsw_sp_bridge, fdb_notify.dw.work); + mlxsw_sp = bridge->mlxsw_sp; rtnl_lock(); mlxsw_reg_sfn_pack(sfn_pl); @@ -1633,8 +1854,106 @@ out: mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); } +struct mlxsw_sp_switchdev_event_work { + struct work_struct work; + struct switchdev_notifier_fdb_info fdb_info; + struct net_device *dev; + unsigned long event; +}; + +static void mlxsw_sp_switchdev_event_work(struct work_struct *work) +{ + struct mlxsw_sp_switchdev_event_work *switchdev_work = + container_of(work, struct mlxsw_sp_switchdev_event_work, work); + struct net_device *dev = switchdev_work->dev; + struct switchdev_notifier_fdb_info *fdb_info; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + rtnl_lock(); + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev); + if (!mlxsw_sp_port) + goto out; + + switch (switchdev_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + fdb_info = &switchdev_work->fdb_info; + err = mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, true); + if (err) + break; + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + fdb_info->addr, + fdb_info->vid, dev); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + fdb_info = &switchdev_work->fdb_info; + mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, false); + break; + } + +out: + rtnl_unlock(); + kfree(switchdev_work->fdb_info.addr); + kfree(switchdev_work); + dev_put(dev); +} + +/* Called under rcu_read_lock() */ +static int mlxsw_sp_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct mlxsw_sp_switchdev_event_work *switchdev_work; + struct switchdev_notifier_fdb_info *fdb_info = ptr; + + if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) + return NOTIFY_DONE; + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (!switchdev_work) + return NOTIFY_BAD; + + INIT_WORK(&switchdev_work->work, mlxsw_sp_switchdev_event_work); + switchdev_work->dev = dev; + switchdev_work->event = event; + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */ + case SWITCHDEV_FDB_DEL_TO_DEVICE: + memcpy(&switchdev_work->fdb_info, ptr, + sizeof(switchdev_work->fdb_info)); + switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); + if (!switchdev_work->fdb_info.addr) + goto err_addr_alloc; + ether_addr_copy((u8 *)switchdev_work->fdb_info.addr, + fdb_info->addr); + /* Take a reference on the device. This can be either + * upper device containig mlxsw_sp_port or just a + * mlxsw_sp_port + */ + dev_hold(dev); + break; + default: + kfree(switchdev_work); + return NOTIFY_DONE; + } + + mlxsw_core_schedule_work(&switchdev_work->work); + + return NOTIFY_DONE; + +err_addr_alloc: + kfree(switchdev_work); + return NOTIFY_BAD; +} + +static struct notifier_block mlxsw_sp_switchdev_notifier = { + .notifier_call = mlxsw_sp_switchdev_event, +}; + static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge; int err; err = mlxsw_sp_ageing_set(mlxsw_sp, MLXSW_SP_DEFAULT_AGEING_TIME); @@ -1642,25 +1961,62 @@ static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp) dev_err(mlxsw_sp->bus_info->dev, "Failed to set default ageing time\n"); return err; } - INIT_DELAYED_WORK(&mlxsw_sp->fdb_notify.dw, mlxsw_sp_fdb_notify_work); - mlxsw_sp->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL; + + err = register_switchdev_notifier(&mlxsw_sp_switchdev_notifier); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to register switchdev notifier\n"); + return err; + } + + INIT_DELAYED_WORK(&bridge->fdb_notify.dw, mlxsw_sp_fdb_notify_work); + bridge->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL; mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); return 0; } static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp) { - cancel_delayed_work_sync(&mlxsw_sp->fdb_notify.dw); + cancel_delayed_work_sync(&mlxsw_sp->bridge->fdb_notify.dw); + unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier); + +} + +static void mlxsw_sp_mids_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_mid *mid, *tmp; + + list_for_each_entry_safe(mid, tmp, &mlxsw_sp->bridge->mids_list, list) { + list_del(&mid->list); + clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); + kfree(mid); + } } int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_bridge *bridge; + + bridge = kzalloc(sizeof(*mlxsw_sp->bridge), GFP_KERNEL); + if (!bridge) + return -ENOMEM; + mlxsw_sp->bridge = bridge; + bridge->mlxsw_sp = mlxsw_sp; + + INIT_LIST_HEAD(&mlxsw_sp->bridge->bridges_list); + INIT_LIST_HEAD(&mlxsw_sp->bridge->mids_list); + + bridge->bridge_8021q_ops = &mlxsw_sp_bridge_8021q_ops; + bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops; + return mlxsw_sp_fdb_init(mlxsw_sp); } void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp_fdb_fini(mlxsw_sp); + mlxsw_sp_mids_fini(mlxsw_sp); + WARN_ON(!list_empty(&mlxsw_sp->bridge->bridges_list)); + kfree(mlxsw_sp->bridge); } void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c index 74341fe0eb25..ab7a29846bfa 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c @@ -497,7 +497,7 @@ static void mlxsw_sib_fini(struct mlxsw_core *mlxsw_core) mlxsw_sib_ports_remove(mlxsw_sib); } -static struct mlxsw_config_profile mlxsw_sib_config_profile = { +static const struct mlxsw_config_profile mlxsw_sib_config_profile = { .used_max_system_port = 1, .max_system_port = 48000, .used_max_ib_mc = 1, diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 3b0f72455681..f3c29bbf07e2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1674,7 +1674,7 @@ static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core) mlxsw_sx_ports_remove(mlxsw_sx); } -static struct mlxsw_config_profile mlxsw_sx_config_profile = { +static const struct mlxsw_config_profile mlxsw_sx_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, .used_max_mid = 1, diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index e008fdbed20f..f396a1fef633 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -61,11 +61,34 @@ enum { MLXSW_TRAP_ID_MTUERROR = 0x52, MLXSW_TRAP_ID_TTLERROR = 0x53, MLXSW_TRAP_ID_LBERROR = 0x54, - MLXSW_TRAP_ID_OSPF = 0x55, + MLXSW_TRAP_ID_IPV4_OSPF = 0x55, MLXSW_TRAP_ID_IP2ME = 0x5F, + MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_SRC = 0x62, + MLXSW_TRAP_ID_IPV6_ALL_NODES_LINK = 0x63, + MLXSW_TRAP_ID_IPV6_OSPF = 0x64, + MLXSW_TRAP_ID_IPV6_MLDV12_LISTENER_QUERY = 0x65, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_REPORT = 0x66, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_DONE = 0x67, + MLXSW_TRAP_ID_IPV6_MLDV2_LISTENER_REPORT = 0x68, + MLXSW_TRAP_ID_IPV6_DHCP = 0x69, + MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F, MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, - MLXSW_TRAP_ID_BGP_IPV4 = 0x88, + MLXSW_TRAP_ID_IPV4_BGP = 0x88, + MLXSW_TRAP_ID_IPV6_BGP = 0x89, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISMENT = 0x8B, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISMENT = 0x8D, + MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E, MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, + MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91, + MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92, + MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, + MLXSW_TRAP_ID_ACL0 = 0x1C0, MLXSW_TRAP_ID_MAX = 0x1FF }; |