diff options
73 files changed, 1497 insertions, 827 deletions
diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt index 5172799a7f1a..82a4cf2c145d 100644 --- a/Documentation/devicetree/bindings/net/sh_eth.txt +++ b/Documentation/devicetree/bindings/net/sh_eth.txt @@ -14,6 +14,7 @@ Required properties: "renesas,ether-r8a7791" if the device is a part of R8A7791 SoC. "renesas,ether-r8a7793" if the device is a part of R8A7793 SoC. "renesas,ether-r8a7794" if the device is a part of R8A7794 SoC. + "renesas,gether-r8a77980" if the device is a part of R8A77980 SoC. "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC. "renesas,rcar-gen1-ether" for a generic R-Car Gen1 device. "renesas,rcar-gen2-ether" for a generic R-Car Gen2 or RZ/G1 diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ea304a23c8d7..924bd51327b7 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -525,6 +525,19 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max tcp_sack - BOOLEAN Enable select acknowledgments (SACKS). +tcp_comp_sack_delay_ns - LONG INTEGER + TCP tries to reduce number of SACK sent, using a timer + based on 5% of SRTT, capped by this sysctl, in nano seconds. + The default is 1ms, based on TSO autosizing period. + + Default : 1,000,000 ns (1 ms) + +tcp_comp_sack_nr - INTEGER + Max numer of SACK that can be compressed. + Using 0 disables SACK compression. + + Detault : 44 + tcp_slow_start_after_idle - BOOLEAN If set, provide RFC2861 behavior and time out the congestion window after an idle period. An idle period is defined at diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 77d257ec899b..6d52ea03574e 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -849,7 +849,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, return 0; err_cqb: - kfree(*cqb); + kvfree(*cqb); err_db: mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index 740a18ba4229..c333e25620a7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -62,6 +62,18 @@ struct cudbg_hw_sched { u32 map; }; +#define SGE_QBASE_DATA_REG_NUM 4 + +struct sge_qbase_reg_field { + u32 reg_addr; + u32 reg_data[SGE_QBASE_DATA_REG_NUM]; + /* Max supported PFs */ + u32 pf_data_value[PCIE_FW_MASTER_M + 1][SGE_QBASE_DATA_REG_NUM]; + /* Max supported VFs */ + u32 vf_data_value[T6_VF_M + 1][SGE_QBASE_DATA_REG_NUM]; + u32 vfcount; /* Actual number of max vfs in current configuration */ +}; + struct ireg_field { u32 ireg_addr; u32 ireg_data; @@ -357,6 +369,11 @@ static const u32 t5_sge_dbg_index_array[2][IREG_NUM_ELEM] = { {0x10cc, 0x10d4, 0x0, 16}, }; +static const u32 t6_sge_qbase_index_array[] = { + /* 1 addr reg SGE_QBASE_INDEX and 4 data reg SGE_QBASE_MAP[0-3] */ + 0x1250, 0x1240, 0x1244, 0x1248, 0x124c, +}; + static const u32 t5_pcie_pdbg_array[][IREG_NUM_ELEM] = { {0x5a04, 0x5a0c, 0x00, 0x20}, /* t5_pcie_pdbg_regs_00_to_20 */ {0x5a04, 0x5a0c, 0x21, 0x20}, /* t5_pcie_pdbg_regs_21_to_40 */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 4feb7eca0acf..0afcfe99bff3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -1339,16 +1339,39 @@ int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } +static void cudbg_read_sge_qbase_indirect_reg(struct adapter *padap, + struct sge_qbase_reg_field *qbase, + u32 func, bool is_pf) +{ + u32 *buff, i; + + if (is_pf) { + buff = qbase->pf_data_value[func]; + } else { + buff = qbase->vf_data_value[func]; + /* In SGE_QBASE_INDEX, + * Entries 0->7 are PF0->7, Entries 8->263 are VFID0->256. + */ + func += 8; + } + + t4_write_reg(padap, qbase->reg_addr, func); + for (i = 0; i < SGE_QBASE_DATA_REG_NUM; i++, buff++) + *buff = t4_read_reg(padap, qbase->reg_data[i]); +} + int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err) { struct adapter *padap = pdbg_init->adap; struct cudbg_buffer temp_buff = { 0 }; + struct sge_qbase_reg_field *sge_qbase; struct ireg_buf *ch_sge_dbg; int i, rc; - rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(*ch_sge_dbg) * 2, + rc = cudbg_get_buff(pdbg_init, dbg_buff, + sizeof(*ch_sge_dbg) * 2 + sizeof(*sge_qbase), &temp_buff); if (rc) return rc; @@ -1370,6 +1393,28 @@ int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, sge_pio->ireg_local_offset); ch_sge_dbg++; } + + if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5) { + sge_qbase = (struct sge_qbase_reg_field *)ch_sge_dbg; + /* 1 addr reg SGE_QBASE_INDEX and 4 data reg + * SGE_QBASE_MAP[0-3] + */ + sge_qbase->reg_addr = t6_sge_qbase_index_array[0]; + for (i = 0; i < SGE_QBASE_DATA_REG_NUM; i++) + sge_qbase->reg_data[i] = + t6_sge_qbase_index_array[i + 1]; + + for (i = 0; i <= PCIE_FW_MASTER_M; i++) + cudbg_read_sge_qbase_indirect_reg(padap, sge_qbase, + i, true); + + for (i = 0; i < padap->params.arch.vfcount; i++) + cudbg_read_sge_qbase_indirect_reg(padap, sge_qbase, + i, false); + + sge_qbase->vfcount = padap->params.arch.vfcount; + } + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 085691eb2b95..8d751efcb90e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -214,7 +214,8 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) len = sizeof(struct ireg_buf) * n; break; case CUDBG_SGE_INDIRECT: - len = sizeof(struct ireg_buf) * 2; + len = sizeof(struct ireg_buf) * 2 + + sizeof(struct sge_qbase_reg_field); break; case CUDBG_ULPRX_LA: len = sizeof(struct cudbg_ulprx_la); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5ddfb93ed491..a52d92e182ee 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6698,15 +6698,8 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) rtnl_lock(); netif_device_detach(netdev); - if (netif_running(netdev)) { - /* Suspend takes a long time, device_shutdown may be - * parallelized this function, so drop lock for the - * duration of this call. - */ - rtnl_unlock(); + if (netif_running(netdev)) ixgbe_close_suspend(adapter); - rtnl_lock(); - } ixgbe_clear_interrupt_scheme(adapter); rtnl_unlock(); diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 7ba7e5938c2e..6847cd431aa0 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -1756,7 +1756,6 @@ static void mvpp2_prs_tcam_ai_update(struct mvpp2_prs_entry *pe, int i, ai_idx = MVPP2_PRS_TCAM_AI_BYTE; for (i = 0; i < MVPP2_PRS_AI_BITS; i++) { - if (!(enable & BIT(i))) continue; @@ -1840,7 +1839,6 @@ static void mvpp2_prs_sram_ai_update(struct mvpp2_prs_entry *pe, int ai_off = MVPP2_PRS_SRAM_AI_OFFS; for (i = 0; i < MVPP2_PRS_SRAM_AI_CTRL_BITS; i++) { - if (!(mask & BIT(i))) continue; @@ -2130,6 +2128,9 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add, mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK); + /* Set result info bits to 'single vlan' */ + mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_SINGLE, + MVPP2_PRS_RI_VLAN_MASK); /* If packet is tagged continue check vid filtering */ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID); } else { @@ -4936,7 +4937,7 @@ static void mvpp22_gop_mask_irq(struct mvpp2_port *port) if (port->gop_id == 0) { val = readl(port->base + MVPP22_XLG_EXT_INT_MASK); val &= ~(MVPP22_XLG_EXT_INT_MASK_XLG | - MVPP22_XLG_EXT_INT_MASK_GIG); + MVPP22_XLG_EXT_INT_MASK_GIG); writel(val, port->base + MVPP22_XLG_EXT_INT_MASK); } @@ -5470,7 +5471,6 @@ static void mvpp2_aggr_txq_pend_desc_add(struct mvpp2_port *port, int pending) MVPP2_AGGR_TXQ_UPDATE_REG, pending); } - /* Check if there are enough free descriptors in aggregated txq. * If not, update the number of occupied descriptors and repeat the check. * @@ -5550,7 +5550,7 @@ static int mvpp2_txq_reserved_desc_num_proc(struct mvpp2 *priv, txq_pcpu->reserved_num += mvpp2_txq_alloc_reserved_desc(priv, txq, req); - /* OK, the descriptor cound has been updated: check again. */ + /* OK, the descriptor could have been updated: check again. */ if (txq_pcpu->reserved_num < num) return -ENOMEM; return 0; @@ -6032,7 +6032,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port, /* Calculate base address in prefetch buffer. We reserve 16 descriptors * for each existing TXQ. * TCONTS for PON port must be continuous from 0 to MVPP2_MAX_TCONT - * GBE ports assumed to be continious from 0 to MVPP2_MAX_PORTS + * GBE ports assumed to be continuous from 0 to MVPP2_MAX_PORTS */ desc_per_txq = 16; desc = (port->id * MVPP2_MAX_TXQ * desc_per_txq) + @@ -6602,8 +6602,7 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb, mvpp2_txdesc_size_set(port, tx_desc, frag->size); buf_dma_addr = dma_map_single(port->dev->dev.parent, addr, - frag->size, - DMA_TO_DEVICE); + frag->size, DMA_TO_DEVICE); if (dma_mapping_error(port->dev->dev.parent, buf_dma_addr)) { mvpp2_txq_desc_put(txq); goto cleanup; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index d93ff567b40d..b3820a34e773 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -235,7 +235,7 @@ const char *parse_fs_dst(struct trace_seq *p, switch (dst->type) { case MLX5_FLOW_DESTINATION_TYPE_VPORT: - trace_seq_printf(p, "vport=%u\n", dst->vport_num); + trace_seq_printf(p, "vport=%u\n", dst->vport.num); break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: trace_seq_printf(p, "ft=%p\n", dst->ft); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7c930088e96e..bc91a7335c93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -634,7 +634,6 @@ struct mlx5e_flow_table { struct mlx5e_tc_table { struct mlx5_flow_table *t; - struct rhashtable_params ht_params; struct rhashtable ht; DECLARE_HASHTABLE(mod_hdr_tbl, 8); @@ -1118,9 +1117,6 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); -int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, - void *cb_priv); - /* mlx5e generic netdev management API */ struct net_device* mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 417bf2e8ab85..b5a7580b12fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3136,22 +3136,23 @@ out: #ifdef CONFIG_MLX5_ESWITCH static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *cls_flower) + struct tc_cls_flower_offload *cls_flower, + int flags) { switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, cls_flower); + return mlx5e_configure_flower(priv, cls_flower, flags); case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, cls_flower); + return mlx5e_delete_flower(priv, cls_flower, flags); case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, cls_flower); + return mlx5e_stats_flower(priv, cls_flower, flags); default: return -EOPNOTSUPP; } } -int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, - void *cb_priv) +static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) { struct mlx5e_priv *priv = cb_priv; @@ -3160,7 +3161,7 @@ int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(priv, type_data); + return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); default: return -EOPNOTSUPP; } @@ -4461,7 +4462,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) goto err_destroy_direct_tirs; } - err = mlx5e_tc_init(priv); + err = mlx5e_tc_nic_init(priv); if (err) goto err_destroy_flow_steering; @@ -4482,7 +4483,7 @@ err_destroy_indirect_rqts: static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { - mlx5e_tc_cleanup(priv); + mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_indirect_tirs(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index a689f4c90fe3..c3034f58aa33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -66,18 +66,36 @@ static const struct counter_desc sw_rep_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, }; -#define NUM_VPORT_REP_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) +struct vport_stats { + u64 vport_rx_packets; + u64 vport_tx_packets; + u64 vport_rx_bytes; + u64 vport_tx_bytes; +}; + +static const struct counter_desc vport_rep_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_packets) }, + { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_bytes) }, + { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_packets) }, + { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_bytes) }, +}; + +#define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) +#define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc) static void mlx5e_rep_get_strings(struct net_device *dev, u32 stringset, uint8_t *data) { - int i; + int i, j; switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++) + for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) strcpy(data + (i * ETH_GSTRING_LEN), sw_rep_stats_desc[i].format); + for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++) + strcpy(data + (i * ETH_GSTRING_LEN), + vport_rep_stats_desc[j].format); break; } } @@ -140,7 +158,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct mlx5e_priv *priv = netdev_priv(dev); - int i; + int i, j; if (!data) return; @@ -148,18 +166,23 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_rep_update_sw_counters(priv); + mlx5e_rep_update_hw_counters(priv); mutex_unlock(&priv->state_lock); - for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++) + for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, sw_rep_stats_desc, i); + + for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++) + data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport, + vport_rep_stats_desc, j); } static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) { switch (sset) { case ETH_SS_STATS: - return NUM_VPORT_REP_COUNTERS; + return NUM_VPORT_REP_SW_COUNTERS + NUM_VPORT_REP_HW_COUNTERS; default: return -EOPNOTSUPP; } @@ -723,15 +746,31 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, static int mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *cls_flower) + struct tc_cls_flower_offload *cls_flower, int flags) { switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, cls_flower); + return mlx5e_configure_flower(priv, cls_flower, flags); case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, cls_flower); + return mlx5e_delete_flower(priv, cls_flower, flags); case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, cls_flower); + return mlx5e_stats_flower(priv, cls_flower, flags); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct mlx5e_priv *priv = cb_priv; + + if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS); default: return -EOPNOTSUPP; } @@ -747,7 +786,7 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(priv, type_data); + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); default: return -EOPNOTSUPP; } @@ -965,14 +1004,8 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) } rpriv->vport_rx_rule = flow_rule; - err = mlx5e_tc_init(priv); - if (err) - goto err_del_flow_rule; - return 0; -err_del_flow_rule: - mlx5_del_flow_rules(rpriv->vport_rx_rule); err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_direct_rqts: @@ -984,7 +1017,6 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; - mlx5e_tc_cleanup(priv); mlx5_del_flow_rules(rpriv->vport_rx_rule); mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_direct_rqts(priv); @@ -1042,8 +1074,15 @@ mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) if (err) goto err_remove_sqs; + /* init shared tc flow table */ + err = mlx5e_tc_esw_init(&rpriv->tc_ht); + if (err) + goto err_neigh_cleanup; + return 0; +err_neigh_cleanup: + mlx5e_rep_neigh_cleanup(rpriv); err_remove_sqs: mlx5e_remove_sqs_fwd_rules(priv); return err; @@ -1058,9 +1097,8 @@ mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep) if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_remove_sqs_fwd_rules(priv); - /* clean (and re-init) existing uplink offloaded TC rules */ - mlx5e_tc_cleanup(priv); - mlx5e_tc_init(priv); + /* clean uplink offloaded TC rules, delete shared tc flow table */ + mlx5e_tc_esw_cleanup(&rpriv->tc_ht); mlx5e_rep_neigh_cleanup(rpriv); } @@ -1107,7 +1145,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH); upriv = netdev_priv(uplink_rpriv->netdev); - err = tc_setup_cb_egdev_register(netdev, mlx5e_setup_tc_block_cb, + err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev, upriv); if (err) goto err_neigh_cleanup; @@ -1122,7 +1160,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) return 0; err_egdev_cleanup: - tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb, + tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, upriv); err_neigh_cleanup: @@ -1151,7 +1189,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH); upriv = netdev_priv(uplink_rpriv->netdev); - tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb, + tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, upriv); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index b9b481f2833a..844d32d5c29f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -59,6 +59,7 @@ struct mlx5e_rep_priv { struct net_device *netdev; struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; + struct rhashtable tc_ht; /* valid for uplink rep */ }; static inline diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1dc24e3a0841..674f1d7d2737 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -62,16 +62,21 @@ struct mlx5_nic_flow_attr { struct mlx5_flow_table *hairpin_ft; }; +#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1) + enum { - MLX5E_TC_FLOW_ESWITCH = BIT(0), - MLX5E_TC_FLOW_NIC = BIT(1), - MLX5E_TC_FLOW_OFFLOADED = BIT(2), - MLX5E_TC_FLOW_HAIRPIN = BIT(3), - MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(4), + MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS, + MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS, + MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE), + MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1), + MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2), + MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3), + MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4), }; struct mlx5e_tc_flow { struct rhash_head node; + struct mlx5e_priv *priv; u64 cookie; u8 flags; struct mlx5_flow_handle *rule; @@ -839,6 +844,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; attr->out_rep = rpriv->rep; + attr->out_mdev = out_priv->mdev; } err = mlx5_eswitch_add_vlan_action(esw, attr); @@ -2076,6 +2082,20 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, return 0; } +static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + struct mlx5e_priv *peer_priv; + + peer_priv = netdev_priv(peer_netdev); + + return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && + (priv->netdev->netdev_ops == peer_netdev->netdev_ops) && + same_hw_devs(priv, peer_priv) && + MLX5_VPORT_MANAGER(peer_priv->mdev) && + (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS)); +} + static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct net_device **out_dev, @@ -2497,6 +2517,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; attr->in_rep = rpriv->rep; + attr->in_mdev = priv->mdev; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { @@ -2533,12 +2554,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, out_dev = tcf_mirred_dev(a); if (switchdev_port_same_parent_id(priv->netdev, - out_dev)) { + out_dev) || + is_merged_eswitch_dev(priv, out_dev)) { action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; attr->out_rep = rpriv->rep; + attr->out_mdev = out_priv->mdev; } else if (encap) { parse_attr->mirred_ifindex = out_dev->ifindex; parse_attr->tun_info = *info; @@ -2600,21 +2623,60 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } +static void get_flags(int flags, u8 *flow_flags) +{ + u8 __flow_flags = 0; + + if (flags & MLX5E_TC_INGRESS) + __flow_flags |= MLX5E_TC_FLOW_INGRESS; + if (flags & MLX5E_TC_EGRESS) + __flow_flags |= MLX5E_TC_FLOW_EGRESS; + + *flow_flags = __flow_flags; +} + +static const struct rhashtable_params tc_ht_params = { + .head_offset = offsetof(struct mlx5e_tc_flow, node), + .key_offset = offsetof(struct mlx5e_tc_flow, cookie), + .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), + .automatic_shrinking = true, +}; + +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *uplink_rpriv; + + if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + return &uplink_rpriv->tc_ht; + } else + return &priv->fs.tc.ht; +} + int mlx5e_configure_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, int flags) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5e_tc_table *tc = &priv->fs.tc; + struct rhashtable *tc_ht = get_tc_ht(priv); struct mlx5e_tc_flow *flow; int attr_size, err = 0; u8 flow_flags = 0; + get_flags(flags, &flow_flags); + + flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + if (flow) { + netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie); + return 0; + } + if (esw && esw->mode == SRIOV_OFFLOADS) { - flow_flags = MLX5E_TC_FLOW_ESWITCH; + flow_flags |= MLX5E_TC_FLOW_ESWITCH; attr_size = sizeof(struct mlx5_esw_flow_attr); } else { - flow_flags = MLX5E_TC_FLOW_NIC; + flow_flags |= MLX5E_TC_FLOW_NIC; attr_size = sizeof(struct mlx5_nic_flow_attr); } @@ -2627,6 +2689,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, flow->cookie = f->cookie; flow->flags = flow_flags; + flow->priv = priv; err = parse_cls_flower(priv, flow, &parse_attr->spec, f); if (err < 0) @@ -2657,8 +2720,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)) kvfree(parse_attr); - err = rhashtable_insert_fast(&tc->ht, &flow->node, - tc->ht_params); + err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); if (err) { mlx5e_tc_del_flow(priv, flow); kfree(flow); @@ -2672,18 +2734,28 @@ err_free: return err; } +#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS) +#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS) + +static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) +{ + if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK)) + return true; + + return false; +} + int mlx5e_delete_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, int flags) { + struct rhashtable *tc_ht = get_tc_ht(priv); struct mlx5e_tc_flow *flow; - struct mlx5e_tc_table *tc = &priv->fs.tc; - flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, - tc->ht_params); - if (!flow) + flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + if (!flow || !same_flow_direction(flow, flags)) return -EINVAL; - rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); + rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); mlx5e_tc_del_flow(priv, flow); @@ -2693,18 +2765,17 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, } int mlx5e_stats_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, int flags) { - struct mlx5e_tc_table *tc = &priv->fs.tc; + struct rhashtable *tc_ht = get_tc_ht(priv); struct mlx5e_tc_flow *flow; struct mlx5_fc *counter; u64 bytes; u64 packets; u64 lastuse; - flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, - tc->ht_params); - if (!flow) + flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + if (!flow || !same_flow_direction(flow, flags)) return -EINVAL; if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED)) @@ -2721,41 +2792,43 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, return 0; } -static const struct rhashtable_params mlx5e_tc_flow_ht_params = { - .head_offset = offsetof(struct mlx5e_tc_flow, node), - .key_offset = offsetof(struct mlx5e_tc_flow, cookie), - .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), - .automatic_shrinking = true, -}; - -int mlx5e_tc_init(struct mlx5e_priv *priv) +int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = &priv->fs.tc; hash_init(tc->mod_hdr_tbl); hash_init(tc->hairpin_tbl); - tc->ht_params = mlx5e_tc_flow_ht_params; - return rhashtable_init(&tc->ht, &tc->ht_params); + return rhashtable_init(&tc->ht, &tc_ht_params); } static void _mlx5e_tc_del_flow(void *ptr, void *arg) { struct mlx5e_tc_flow *flow = ptr; - struct mlx5e_priv *priv = arg; + struct mlx5e_priv *priv = flow->priv; mlx5e_tc_del_flow(priv, flow); kfree(flow); } -void mlx5e_tc_cleanup(struct mlx5e_priv *priv) +void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = &priv->fs.tc; - rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv); + rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); if (!IS_ERR_OR_NULL(tc->t)) { mlx5_destroy_flow_table(tc->t); tc->t = NULL; } } + +int mlx5e_tc_esw_init(struct rhashtable *tc_ht) +{ + return rhashtable_init(tc_ht, &tc_ht_params); +} + +void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) +{ + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index c14c263a739b..59e52b845beb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -38,16 +38,26 @@ #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff #ifdef CONFIG_MLX5_ESWITCH -int mlx5e_tc_init(struct mlx5e_priv *priv); -void mlx5e_tc_cleanup(struct mlx5e_priv *priv); + +enum { + MLX5E_TC_INGRESS = BIT(0), + MLX5E_TC_EGRESS = BIT(1), + MLX5E_TC_LAST_EXPORTED_BIT = 1, +}; + +int mlx5e_tc_nic_init(struct mlx5e_priv *priv); +void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); + +int mlx5e_tc_esw_init(struct rhashtable *tc_ht); +void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); int mlx5e_configure_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f); + struct tc_cls_flower_offload *f, int flags); int mlx5e_delete_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f); + struct tc_cls_flower_offload *f, int flags); int mlx5e_stats_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f); + struct tc_cls_flower_offload *f, int flags); struct mlx5e_encap_entry; void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -64,8 +74,8 @@ static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) } #else /* CONFIG_MLX5_ESWITCH */ -static inline int mlx5e_tc_init(struct mlx5e_priv *priv) { return 0; } -static inline void mlx5e_tc_cleanup(struct mlx5e_priv *priv) {} +static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1352d13eedb3..09f0e11c6ffc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -192,7 +192,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, } dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport_num = vport; + dest.vport.num = vport; esw_debug(esw->dev, "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index edf47a4d549e..f47a14e31b7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -237,6 +237,8 @@ enum mlx5_flow_match_level { struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_eswitch_rep *out_rep; + struct mlx5_core_dev *out_mdev; + struct mlx5_core_dev *in_mdev; int action; __be16 vlan_proto; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8dd0eca03202..b9ea464bcfa9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -71,7 +71,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport_num = attr->out_rep->vport; + dest[i].vport.num = attr->out_rep->vport; + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + dest[i].vport.vhca_id = + MLX5_CAP_GEN(attr->out_mdev, vhca_id); + dest[i].vport.vhca_id_valid = 1; + } i++; } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { @@ -88,8 +93,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); if (attr->match_level == MLX5_MATCH_NONE) spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; @@ -347,7 +360,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport_num = vport; + dest.vport.num = vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, @@ -391,7 +404,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dmac_c[0] = 0x01; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport_num = 0; + dest.vport.num = 0; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, @@ -667,7 +680,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) esw->offloads.vport_rx_group = g; out: - kfree(flow_group_in); + kvfree(flow_group_in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index ef5afd7c9325..5a00deff5457 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -372,6 +372,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { id = dst->dest_attr.ft->id; + } else if (dst->dest_attr.type == + MLX5_FLOW_DESTINATION_TYPE_VPORT) { + id = dst->dest_attr.vport.num; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, + dst->dest_attr.vport.vhca_id_valid); + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id, + dst->dest_attr.vport.vhca_id); } else { id = dst->dest_attr.tir_num; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c39c1692e674..806e95523f9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1374,6 +1374,8 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft, struct mlx5_core_dev *dev = get_dev(&ft->node); int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); void *match_criteria_addr; + u8 src_esw_owner_mask_on; + void *misc; int err; u32 *in; @@ -1386,6 +1388,14 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft, MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index); MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index + fg->max_ftes - 1); + + misc = MLX5_ADDR_OF(fte_match_param, fg->mask.match_criteria, + misc_parameters); + src_esw_owner_mask_on = !!MLX5_GET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, in, + source_eswitch_owner_vhca_id_valid, src_esw_owner_mask_on); + match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); memcpy(match_criteria_addr, fg->mask.match_criteria, @@ -1406,7 +1416,7 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, { if (d1->type == d2->type) { if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && - d1->vport_num == d2->vport_num) || + d1->vport.num == d2->vport.num) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && d1->ft == d2->ft) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 177e076b8d17..719cecb182c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -511,7 +511,7 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out, nic_vport_context.system_image_guid); - kfree(out); + kvfree(out); return 0; } @@ -531,7 +531,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) *node_guid = MLX5_GET64(query_nic_vport_context_out, out, nic_vport_context.node_guid); - kfree(out); + kvfree(out); return 0; } @@ -587,7 +587,7 @@ int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out, nic_vport_context.qkey_violation_counter); - kfree(out); + kvfree(out); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e13ac3b8dff7..a38faec45b30 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1714,15 +1714,16 @@ EXPORT_SYMBOL(mlxsw_core_port_fini); void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port, void *port_driver_priv, struct net_device *dev, - bool split, u32 split_group) + u32 port_number, bool split, + u32 split_port_subnumber) { struct mlxsw_core_port *mlxsw_core_port = &mlxsw_core->ports[local_port]; struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; mlxsw_core_port->port_driver_priv = port_driver_priv; - if (split) - devlink_port_split_set(devlink_port, split_group); + devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, + port_number, split, split_port_subnumber); devlink_port_type_eth_set(devlink_port, dev); } EXPORT_SYMBOL(mlxsw_core_port_eth_set); @@ -1762,6 +1763,17 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_port_type_get); +int mlxsw_core_port_get_phys_port_name(struct mlxsw_core *mlxsw_core, + u8 local_port, char *name, size_t len) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[local_port]; + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + return devlink_port_get_phys_port_name(devlink_port, name, len); +} +EXPORT_SYMBOL(mlxsw_core_port_get_phys_port_name); + static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core, const char *buf, size_t size) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 092d39399f3c..4eac7fbd07d5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -201,13 +201,16 @@ int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port); void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port); void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port, void *port_driver_priv, struct net_device *dev, - bool split, u32 split_group); + u32 port_number, bool split, + u32 split_port_subnumber); void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port, void *port_driver_priv); void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port, void *port_driver_priv); enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core, u8 local_port); +int mlxsw_core_port_get_phys_port_name(struct mlxsw_core *mlxsw_core, + u8 local_port, char *name, size_t len); int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay); bool mlxsw_core_schedule_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 94132f6cec61..bb252b36994d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1238,21 +1238,10 @@ static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name, size_t len) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - u8 module = mlxsw_sp_port->mapping.module; - u8 width = mlxsw_sp_port->mapping.width; - u8 lane = mlxsw_sp_port->mapping.lane; - int err; - - if (!mlxsw_sp_port->split) - err = snprintf(name, len, "p%d", module + 1); - else - err = snprintf(name, len, "p%ds%d", module + 1, - lane / width); - if (err >= len) - return -EINVAL; - - return 0; + return mlxsw_core_port_get_phys_port_name(mlxsw_sp_port->mlxsw_sp->core, + mlxsw_sp_port->local_port, + name, len); } static struct mlxsw_sp_port_mall_tc_entry * @@ -2927,8 +2916,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, } mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port, - mlxsw_sp_port, dev, mlxsw_sp_port->split, - module); + mlxsw_sp_port, dev, module + 1, + mlxsw_sp_port->split, lane / width); mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, 0); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index a655c5850aa6..3922c1cfe5f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -417,13 +417,10 @@ static int mlxsw_sx_port_get_phys_port_name(struct net_device *dev, char *name, size_t len) { struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); - int err; - - err = snprintf(name, len, "p%d", mlxsw_sx_port->mapping.module + 1); - if (err >= len) - return -EINVAL; - return 0; + return mlxsw_core_port_get_phys_port_name(mlxsw_sx_port->mlxsw_sx->core, + mlxsw_sx_port->local_port, + name, len); } static const struct net_device_ops mlxsw_sx_port_netdev_ops = { @@ -1149,7 +1146,7 @@ static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port, } mlxsw_core_port_eth_set(mlxsw_sx->core, mlxsw_sx_port->local_port, - mlxsw_sx_port, dev, false, 0); + mlxsw_sx_port, dev, module + 1, false, 0); mlxsw_sx->ports[local_port] = mlxsw_sx_port; return 0; diff --git a/drivers/net/ethernet/mscc/ocelot_qsys.h b/drivers/net/ethernet/mscc/ocelot_qsys.h index aa7267d5ca77..d8c63aa761be 100644 --- a/drivers/net/ethernet/mscc/ocelot_qsys.h +++ b/drivers/net/ethernet/mscc/ocelot_qsys.h @@ -1,7 +1,7 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ /* * Microsemi Ocelot Switch driver * - * License: Dual MIT/GPL * Copyright (c) 2017 Microsemi Corporation */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index eb0fc614673d..b1e67cf4257a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -175,8 +175,9 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) return ret; devlink_port_type_eth_set(&port->dl_port, port->netdev); - if (eth_port.is_split) - devlink_port_split_set(&port->dl_port, eth_port.label_port); + devlink_port_attrs_set(&port->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, + eth_port.label_port, eth_port.is_split, + eth_port.label_subport); devlink = priv_to_devlink(app->pf); diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 092718a03786..031f6e6ee9c1 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -683,10 +683,11 @@ static int emac_tx_q_desc_alloc(struct emac_adapter *adpt, struct emac_tx_queue *tx_q) { struct emac_ring_header *ring_header = &adpt->ring_header; + int node = dev_to_node(adpt->netdev->dev.parent); size_t size; size = sizeof(struct emac_buffer) * tx_q->tpd.count; - tx_q->tpd.tpbuff = kzalloc(size, GFP_KERNEL); + tx_q->tpd.tpbuff = kzalloc_node(size, GFP_KERNEL, node); if (!tx_q->tpd.tpbuff) return -ENOMEM; @@ -723,11 +724,12 @@ static void emac_rx_q_bufs_free(struct emac_adapter *adpt) static int emac_rx_descs_alloc(struct emac_adapter *adpt) { struct emac_ring_header *ring_header = &adpt->ring_header; + int node = dev_to_node(adpt->netdev->dev.parent); struct emac_rx_queue *rx_q = &adpt->rx_q; size_t size; size = sizeof(struct emac_buffer) * rx_q->rfd.count; - rx_q->rfd.rfbuff = kzalloc(size, GFP_KERNEL); + rx_q->rfd.rfbuff = kzalloc_node(size, GFP_KERNEL, node); if (!rx_q->rfd.rfbuff) return -ENOMEM; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 8dd41e08a6c6..83148ca61317 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -466,6 +466,9 @@ static void sh_eth_select_mii(struct net_device *ndev) u32 value; switch (mdp->phy_interface) { + case PHY_INTERFACE_MODE_RGMII ... PHY_INTERFACE_MODE_RGMII_TXID: + value = 0x3; + break; case PHY_INTERFACE_MODE_GMII: value = 0x2; break; @@ -750,6 +753,49 @@ static struct sh_eth_cpu_data rcar_gen2_data = { .rmiimode = 1, .magic = 1, }; + +/* R8A77980 */ +static struct sh_eth_cpu_data r8a77980_data = { + .soft_reset = sh_eth_soft_reset_gether, + + .set_duplex = sh_eth_set_duplex, + .set_rate = sh_eth_set_rate_gether, + + .register_type = SH_ETH_REG_GIGABIT, + + .edtrr_trns = EDTRR_TRNS_GETHER, + .ecsr_value = ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD | ECSR_MPD, + .ecsipr_value = ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP | + ECSIPR_MPDIP, + .eesipr_value = EESIPR_RFCOFIP | EESIPR_ECIIP | + EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP | + EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP | + EESIPR_RMAFIP | EESIPR_RRFIP | + EESIPR_RTLFIP | EESIPR_RTSFIP | + EESIPR_PREIP | EESIPR_CERFIP, + + .tx_check = EESR_FTC | EESR_CD | EESR_RTO, + .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | + EESR_RFE | EESR_RDE | EESR_RFRMER | + EESR_TFE | EESR_TDE | EESR_ECI, + .fdr_value = 0x0000070f, + + .apr = 1, + .mpr = 1, + .tpauser = 1, + .bculr = 1, + .hw_swap = 1, + .nbst = 1, + .rpadir = 1, + .rpadir_value = 2 << 16, + .no_trimd = 1, + .no_ade = 1, + .xdfar_rw = 1, + .hw_checksum = 1, + .select_mii = 1, + .magic = 1, + .cexcr = 1, +}; #endif /* CONFIG_OF */ static void sh_eth_set_rate_sh7724(struct net_device *ndev) @@ -1431,6 +1477,10 @@ static int sh_eth_dev_init(struct net_device *ndev) sh_eth_write(ndev, mdp->cd->trscer_err_mask, TRSCER); + /* DMA transfer burst mode */ + if (mdp->cd->nbst) + sh_eth_modify(ndev, EDMR, EDMR_NBST, EDMR_NBST); + if (mdp->cd->bculr) sh_eth_write(ndev, 0x800, BCULR); /* Burst sycle set */ @@ -3127,6 +3177,7 @@ static const struct of_device_id sh_eth_match_table[] = { { .compatible = "renesas,ether-r8a7791", .data = &rcar_gen2_data }, { .compatible = "renesas,ether-r8a7793", .data = &rcar_gen2_data }, { .compatible = "renesas,ether-r8a7794", .data = &rcar_gen2_data }, + { .compatible = "renesas,gether-r8a77980", .data = &r8a77980_data }, { .compatible = "renesas,ether-r7s72100", .data = &r7s72100_data }, { .compatible = "renesas,rcar-gen1-ether", .data = &rcar_gen1_data }, { .compatible = "renesas,rcar-gen2-ether", .data = &rcar_gen2_data }, diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index a5b792ce2ae7..7d5aaba1384a 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -184,6 +184,7 @@ enum GECMR_BIT { /* EDMR */ enum DMAC_M_BIT { + EDMR_NBST = 0x80, EDMR_EL = 0x40, /* Litte endian */ EDMR_DL1 = 0x20, EDMR_DL0 = 0x10, EDMR_SRST_GETHER = 0x03, @@ -505,6 +506,7 @@ struct sh_eth_cpu_data { unsigned bculr:1; /* EtherC have BCULR */ unsigned tsu:1; /* EtherC have TSU */ unsigned hw_swap:1; /* E-DMAC have DE bit in EDMR */ + unsigned nbst:1; /* E-DMAC has NBST bit in EDMR */ unsigned rpadir:1; /* E-DMAC have RPADIR */ unsigned no_trimd:1; /* E-DMAC DO NOT have TRIMD */ unsigned no_ade:1; /* E-DMAC DO NOT have ADE bit in EESR */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 2f7f0915f071..2e6e2a96b4f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -276,17 +276,28 @@ static int sun8i_dwmac_dma_reset(void __iomem *ioaddr) * Called from stmmac via stmmac_dma_ops->init */ static void sun8i_dwmac_dma_init(void __iomem *ioaddr, - struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx, u32 dma_rx, int atds) + struct stmmac_dma_cfg *dma_cfg, int atds) { - /* Write TX and RX descriptors address */ - writel(dma_rx, ioaddr + EMAC_RX_DESC_LIST); - writel(dma_tx, ioaddr + EMAC_TX_DESC_LIST); - writel(EMAC_RX_INT | EMAC_TX_INT, ioaddr + EMAC_INT_EN); writel(0x1FFFFFF, ioaddr + EMAC_INT_STA); } +static void sun8i_dwmac_dma_init_rx(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + u32 dma_rx_phy, u32 chan) +{ + /* Write RX descriptors address */ + writel(dma_rx_phy, ioaddr + EMAC_RX_DESC_LIST); +} + +static void sun8i_dwmac_dma_init_tx(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + u32 dma_tx_phy, u32 chan) +{ + /* Write TX descriptors address */ + writel(dma_tx_phy, ioaddr + EMAC_TX_DESC_LIST); +} + /* sun8i_dwmac_dump_regs() - Dump EMAC address space * Called from stmmac_dma_ops->dump_regs * Used for ethtool @@ -437,13 +448,36 @@ static int sun8i_dwmac_dma_interrupt(void __iomem *ioaddr, return ret; } -static void sun8i_dwmac_dma_operation_mode(void __iomem *ioaddr, int txmode, - int rxmode, int rxfifosz) +static void sun8i_dwmac_dma_operation_mode_rx(void __iomem *ioaddr, int mode, + u32 channel, int fifosz, u8 qmode) +{ + u32 v; + + v = readl(ioaddr + EMAC_RX_CTL1); + if (mode == SF_DMA_MODE) { + v |= EMAC_RX_MD; + } else { + v &= ~EMAC_RX_MD; + v &= ~EMAC_RX_TH_MASK; + if (mode < 32) + v |= EMAC_RX_TH_32; + else if (mode < 64) + v |= EMAC_RX_TH_64; + else if (mode < 96) + v |= EMAC_RX_TH_96; + else if (mode < 128) + v |= EMAC_RX_TH_128; + } + writel(v, ioaddr + EMAC_RX_CTL1); +} + +static void sun8i_dwmac_dma_operation_mode_tx(void __iomem *ioaddr, int mode, + u32 channel, int fifosz, u8 qmode) { u32 v; v = readl(ioaddr + EMAC_TX_CTL1); - if (txmode == SF_DMA_MODE) { + if (mode == SF_DMA_MODE) { v |= EMAC_TX_MD; /* Undocumented bit (called TX_NEXT_FRM in BSP), the original * comment is @@ -454,40 +488,26 @@ static void sun8i_dwmac_dma_operation_mode(void __iomem *ioaddr, int txmode, } else { v &= ~EMAC_TX_MD; v &= ~EMAC_TX_TH_MASK; - if (txmode < 64) + if (mode < 64) v |= EMAC_TX_TH_64; - else if (txmode < 128) + else if (mode < 128) v |= EMAC_TX_TH_128; - else if (txmode < 192) + else if (mode < 192) v |= EMAC_TX_TH_192; - else if (txmode < 256) + else if (mode < 256) v |= EMAC_TX_TH_256; } writel(v, ioaddr + EMAC_TX_CTL1); - - v = readl(ioaddr + EMAC_RX_CTL1); - if (rxmode == SF_DMA_MODE) { - v |= EMAC_RX_MD; - } else { - v &= ~EMAC_RX_MD; - v &= ~EMAC_RX_TH_MASK; - if (rxmode < 32) - v |= EMAC_RX_TH_32; - else if (rxmode < 64) - v |= EMAC_RX_TH_64; - else if (rxmode < 96) - v |= EMAC_RX_TH_96; - else if (rxmode < 128) - v |= EMAC_RX_TH_128; - } - writel(v, ioaddr + EMAC_RX_CTL1); } static const struct stmmac_dma_ops sun8i_dwmac_dma_ops = { .reset = sun8i_dwmac_dma_reset, .init = sun8i_dwmac_dma_init, + .init_rx_chan = sun8i_dwmac_dma_init_rx, + .init_tx_chan = sun8i_dwmac_dma_init_tx, .dump_regs = sun8i_dwmac_dump_regs, - .dma_mode = sun8i_dwmac_dma_operation_mode, + .dma_rx_mode = sun8i_dwmac_dma_operation_mode_rx, + .dma_tx_mode = sun8i_dwmac_dma_operation_mode_tx, .enable_dma_transmission = sun8i_dwmac_enable_dma_transmission, .enable_dma_irq = sun8i_dwmac_enable_dma_irq, .disable_dma_irq = sun8i_dwmac_disable_dma_irq, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c index 7ecf549c7f1c..aacc4aa80e3c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c @@ -81,8 +81,7 @@ static void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi) } static void dwmac1000_dma_init(void __iomem *ioaddr, - struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx, u32 dma_rx, int atds) + struct stmmac_dma_cfg *dma_cfg, int atds) { u32 value = readl(ioaddr + DMA_BUS_MODE); int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl; @@ -119,12 +118,22 @@ static void dwmac1000_dma_init(void __iomem *ioaddr, /* Mask interrupts by writing to CSR7 */ writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA); +} - /* RX/TX descriptor base address lists must be written into - * DMA CSR3 and CSR4, respectively - */ - writel(dma_tx, ioaddr + DMA_TX_BASE_ADDR); - writel(dma_rx, ioaddr + DMA_RCV_BASE_ADDR); +static void dwmac1000_dma_init_rx(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + u32 dma_rx_phy, u32 chan) +{ + /* RX descriptor base address list must be written into DMA CSR3 */ + writel(dma_rx_phy, ioaddr + DMA_RCV_BASE_ADDR); +} + +static void dwmac1000_dma_init_tx(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + u32 dma_tx_phy, u32 chan) +{ + /* TX descriptor base address list must be written into DMA CSR4 */ + writel(dma_tx_phy, ioaddr + DMA_TX_BASE_ADDR); } static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz) @@ -148,12 +157,40 @@ static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz) return csr6; } -static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode, - int rxmode, int rxfifosz) +static void dwmac1000_dma_operation_mode_rx(void __iomem *ioaddr, int mode, + u32 channel, int fifosz, u8 qmode) +{ + u32 csr6 = readl(ioaddr + DMA_CONTROL); + + if (mode == SF_DMA_MODE) { + pr_debug("GMAC: enable RX store and forward mode\n"); + csr6 |= DMA_CONTROL_RSF; + } else { + pr_debug("GMAC: disable RX SF mode (threshold %d)\n", mode); + csr6 &= ~DMA_CONTROL_RSF; + csr6 &= DMA_CONTROL_TC_RX_MASK; + if (mode <= 32) + csr6 |= DMA_CONTROL_RTC_32; + else if (mode <= 64) + csr6 |= DMA_CONTROL_RTC_64; + else if (mode <= 96) + csr6 |= DMA_CONTROL_RTC_96; + else + csr6 |= DMA_CONTROL_RTC_128; + } + + /* Configure flow control based on rx fifo size */ + csr6 = dwmac1000_configure_fc(csr6, fifosz); + + writel(csr6, ioaddr + DMA_CONTROL); +} + +static void dwmac1000_dma_operation_mode_tx(void __iomem *ioaddr, int mode, + u32 channel, int fifosz, u8 qmode) { u32 csr6 = readl(ioaddr + DMA_CONTROL); - if (txmode == SF_DMA_MODE) { + if (mode == SF_DMA_MODE) { pr_debug("GMAC: enable TX store and forward mode\n"); /* Transmit COE type 2 cannot be done in cut-through mode. */ csr6 |= DMA_CONTROL_TSF; @@ -162,42 +199,22 @@ static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode, */ csr6 |= DMA_CONTROL_OSF; } else { - pr_debug("GMAC: disabling TX SF (threshold %d)\n", txmode); + pr_debug("GMAC: disabling TX SF (threshold %d)\n", mode); csr6 &= ~DMA_CONTROL_TSF; csr6 &= DMA_CONTROL_TC_TX_MASK; /* Set the transmit threshold */ - if (txmode <= 32) + if (mode <= 32) csr6 |= DMA_CONTROL_TTC_32; - else if (txmode <= 64) + else if (mode <= 64) csr6 |= DMA_CONTROL_TTC_64; - else if (txmode <= 128) + else if (mode <= 128) csr6 |= DMA_CONTROL_TTC_128; - else if (txmode <= 192) + else if (mode <= 192) csr6 |= DMA_CONTROL_TTC_192; else csr6 |= DMA_CONTROL_TTC_256; } - if (rxmode == SF_DMA_MODE) { - pr_debug("GMAC: enable RX store and forward mode\n"); - csr6 |= DMA_CONTROL_RSF; - } else { - pr_debug("GMAC: disable RX SF mode (threshold %d)\n", rxmode); - csr6 &= ~DMA_CONTROL_RSF; - csr6 &= DMA_CONTROL_TC_RX_MASK; - if (rxmode <= 32) - csr6 |= DMA_CONTROL_RTC_32; - else if (rxmode <= 64) - csr6 |= DMA_CONTROL_RTC_64; - else if (rxmode <= 96) - csr6 |= DMA_CONTROL_RTC_96; - else - csr6 |= DMA_CONTROL_RTC_128; - } - - /* Configure flow control based on rx fifo size */ - csr6 = dwmac1000_configure_fc(csr6, rxfifosz); - writel(csr6, ioaddr + DMA_CONTROL); } @@ -256,9 +273,12 @@ static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt, const struct stmmac_dma_ops dwmac1000_dma_ops = { .reset = dwmac_dma_reset, .init = dwmac1000_dma_init, + .init_rx_chan = dwmac1000_dma_init_rx, + .init_tx_chan = dwmac1000_dma_init_tx, .axi = dwmac1000_dma_axi, .dump_regs = dwmac1000_dump_dma_regs, - .dma_mode = dwmac1000_dma_operation_mode, + .dma_rx_mode = dwmac1000_dma_operation_mode_rx, + .dma_tx_mode = dwmac1000_dma_operation_mode_tx, .enable_dma_transmission = dwmac_enable_dma_transmission, .enable_dma_irq = dwmac_enable_dma_irq, .disable_dma_irq = dwmac_disable_dma_irq, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c index 6502b9aa3bf5..21dee25ee570 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c @@ -29,8 +29,7 @@ #include "dwmac_dma.h" static void dwmac100_dma_init(void __iomem *ioaddr, - struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx, u32 dma_rx, int atds) + struct stmmac_dma_cfg *dma_cfg, int atds) { /* Enable Application Access by writing to DMA CSR0 */ writel(DMA_BUS_MODE_DEFAULT | (dma_cfg->pbl << DMA_BUS_MODE_PBL_SHIFT), @@ -38,12 +37,22 @@ static void dwmac100_dma_init(void __iomem *ioaddr, /* Mask interrupts by writing to CSR7 */ writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA); +} - /* RX/TX descriptor base addr lists must be written into - * DMA CSR3 and CSR4, respectively - */ - writel(dma_tx, ioaddr + DMA_TX_BASE_ADDR); - writel(dma_rx, ioaddr + DMA_RCV_BASE_ADDR); +static void dwmac100_dma_init_rx(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + u32 dma_rx_phy, u32 chan) +{ + /* RX descriptor base addr lists must be written into DMA CSR3 */ + writel(dma_rx_phy, ioaddr + DMA_RCV_BASE_ADDR); +} + +static void dwmac100_dma_init_tx(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + u32 dma_tx_phy, u32 chan) +{ + /* TX descriptor base addr lists must be written into DMA CSR4 */ + writel(dma_tx_phy, ioaddr + DMA_TX_BASE_ADDR); } /* Store and Forward capability is not used at all. @@ -51,14 +60,14 @@ static void dwmac100_dma_init(void __iomem *ioaddr, * The transmit threshold can be programmed by setting the TTC bits in the DMA * control register. */ -static void dwmac100_dma_operation_mode(void __iomem *ioaddr, int txmode, - int rxmode, int rxfifosz) +static void dwmac100_dma_operation_mode_tx(void __iomem *ioaddr, int mode, + u32 channel, int fifosz, u8 qmode) { u32 csr6 = readl(ioaddr + DMA_CONTROL); - if (txmode <= 32) + if (mode <= 32) csr6 |= DMA_CONTROL_TTC_32; - else if (txmode <= 64) + else if (mode <= 64) csr6 |= DMA_CONTROL_TTC_64; else csr6 |= DMA_CONTROL_TTC_128; @@ -112,8 +121,10 @@ static void dwmac100_dma_diagnostic_fr(void *data, struct stmmac_extra_stats *x, const struct stmmac_dma_ops dwmac100_dma_ops = { .reset = dwmac_dma_reset, .init = dwmac100_dma_init, + .init_rx_chan = dwmac100_dma_init_rx, + .init_tx_chan = dwmac100_dma_init_tx, .dump_regs = dwmac100_dump_dma_regs, - .dma_mode = dwmac100_dma_operation_mode, + .dma_tx_mode = dwmac100_dma_operation_mode_tx, .dma_diagnostic_fr = dwmac100_dma_diagnostic_fr, .enable_dma_transmission = dwmac_enable_dma_transmission, .enable_dma_irq = dwmac_enable_dma_irq, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 65ed896c13cb..20299f6f65fc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -189,9 +189,12 @@ static void dwmac4_set_tx_owner(struct dma_desc *p) p->des3 |= cpu_to_le32(TDES3_OWN); } -static void dwmac4_set_rx_owner(struct dma_desc *p) +static void dwmac4_set_rx_owner(struct dma_desc *p, int disable_rx_ic) { - p->des3 |= cpu_to_le32(RDES3_OWN); + p->des3 = cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR); + + if (!disable_rx_ic) + p->des3 |= cpu_to_le32(RDES3_INT_ON_COMPLETION_EN); } static int dwmac4_get_tx_ls(struct dma_desc *p) @@ -292,10 +295,7 @@ exit: static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { - p->des3 = cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR); - - if (!disable_rx_ic) - p->des3 |= cpu_to_le32(RDES3_INT_ON_COMPLETION_EN); + dwmac4_set_rx_owner(p, disable_rx_ic); } static void dwmac4_rd_init_tx_desc(struct dma_desc *p, int mode, int end) @@ -424,6 +424,25 @@ static void dwmac4_set_mss_ctxt(struct dma_desc *p, unsigned int mss) p->des3 = cpu_to_le32(TDES3_CONTEXT_TYPE | TDES3_CTXT_TCMSSV); } +static void dwmac4_get_addr(struct dma_desc *p, unsigned int *addr) +{ + *addr = le32_to_cpu(p->des0); +} + +static void dwmac4_set_addr(struct dma_desc *p, dma_addr_t addr) +{ + p->des0 = cpu_to_le32(addr); + p->des1 = 0; +} + +static void dwmac4_clear(struct dma_desc *p) +{ + p->des0 = 0; + p->des1 = 0; + p->des2 = 0; + p->des3 = 0; +} + const struct stmmac_desc_ops dwmac4_desc_ops = { .tx_status = dwmac4_wrback_get_tx_status, .rx_status = dwmac4_wrback_get_rx_status, @@ -445,6 +464,9 @@ const struct stmmac_desc_ops dwmac4_desc_ops = { .init_tx_desc = dwmac4_rd_init_tx_desc, .display_ring = dwmac4_display_ring, .set_mss = dwmac4_set_mss_ctxt, + .get_addr = dwmac4_get_addr, + .set_addr = dwmac4_set_addr, + .clear = dwmac4_clear, }; const struct stmmac_mode_ops dwmac4_ring_mode_ops = { }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 117c3a5288f0..bf8e5a16f11c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -94,6 +94,10 @@ static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr, value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan)); value = value | (txpbl << DMA_BUS_MODE_PBL_SHIFT); + + /* Enable OSP to get best performance */ + value |= DMA_CONTROL_OSP; + writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan)); writel(dma_tx_phy, ioaddr + DMA_CHAN_TX_BASE_ADDR(chan)); @@ -116,8 +120,7 @@ static void dwmac4_dma_init_channel(void __iomem *ioaddr, } static void dwmac4_dma_init(void __iomem *ioaddr, - struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx, u32 dma_rx, int atds) + struct stmmac_dma_cfg *dma_cfg, int atds) { u32 value = readl(ioaddr + DMA_SYS_BUS_MODE); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index 8474bf961dd0..c63c1fe3f26b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -184,7 +184,6 @@ #define DMA_CHAN0_DBG_STAT_RPS_SHIFT 8 int dwmac4_dma_reset(void __iomem *ioaddr); -void dwmac4_enable_dma_transmission(void __iomem *ioaddr, u32 tail_ptr); void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan); void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan); void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan); diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 3bfb3f584be2..77914c89d749 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -292,7 +292,7 @@ static void enh_desc_set_tx_owner(struct dma_desc *p) p->des0 |= cpu_to_le32(ETDES0_OWN); } -static void enh_desc_set_rx_owner(struct dma_desc *p) +static void enh_desc_set_rx_owner(struct dma_desc *p, int disable_rx_ic) { p->des0 |= cpu_to_le32(RDES0_OWN); } @@ -437,6 +437,21 @@ static void enh_desc_display_ring(void *head, unsigned int size, bool rx) pr_info("\n"); } +static void enh_desc_get_addr(struct dma_desc *p, unsigned int *addr) +{ + *addr = le32_to_cpu(p->des2); +} + +static void enh_desc_set_addr(struct dma_desc *p, dma_addr_t addr) +{ + p->des2 = cpu_to_le32(addr); +} + +static void enh_desc_clear(struct dma_desc *p) +{ + p->des2 = 0; +} + const struct stmmac_desc_ops enh_desc_ops = { .tx_status = enh_desc_get_tx_status, .rx_status = enh_desc_get_rx_status, @@ -457,4 +472,7 @@ const struct stmmac_desc_ops enh_desc_ops = { .get_timestamp = enh_desc_get_timestamp, .get_rx_timestamp_status = enh_desc_get_rx_timestamp_status, .display_ring = enh_desc_display_ring, + .get_addr = enh_desc_get_addr, + .set_addr = enh_desc_set_addr, + .clear = enh_desc_clear, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 9acc8d2f1039..14770fc8865e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -6,6 +6,7 @@ #include "common.h" #include "stmmac.h" +#include "stmmac_ptp.h" static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg) { @@ -72,6 +73,7 @@ static const struct stmmac_hwif_entry { bool gmac; bool gmac4; u32 min_id; + const struct stmmac_regs_off regs; const void *desc; const void *dma; const void *mac; @@ -86,6 +88,10 @@ static const struct stmmac_hwif_entry { .gmac = false, .gmac4 = false, .min_id = 0, + .regs = { + .ptp_off = PTP_GMAC3_X_OFFSET, + .mmc_off = MMC_GMAC3_X_OFFSET, + }, .desc = NULL, .dma = &dwmac100_dma_ops, .mac = &dwmac100_ops, @@ -98,6 +104,10 @@ static const struct stmmac_hwif_entry { .gmac = true, .gmac4 = false, .min_id = 0, + .regs = { + .ptp_off = PTP_GMAC3_X_OFFSET, + .mmc_off = MMC_GMAC3_X_OFFSET, + }, .desc = NULL, .dma = &dwmac1000_dma_ops, .mac = &dwmac1000_ops, @@ -110,6 +120,10 @@ static const struct stmmac_hwif_entry { .gmac = false, .gmac4 = true, .min_id = 0, + .regs = { + .ptp_off = PTP_GMAC4_OFFSET, + .mmc_off = MMC_GMAC4_OFFSET, + }, .desc = &dwmac4_desc_ops, .dma = &dwmac4_dma_ops, .mac = &dwmac4_ops, @@ -122,6 +136,10 @@ static const struct stmmac_hwif_entry { .gmac = false, .gmac4 = true, .min_id = DWMAC_CORE_4_00, + .regs = { + .ptp_off = PTP_GMAC4_OFFSET, + .mmc_off = MMC_GMAC4_OFFSET, + }, .desc = &dwmac4_desc_ops, .dma = &dwmac4_dma_ops, .mac = &dwmac410_ops, @@ -134,6 +152,10 @@ static const struct stmmac_hwif_entry { .gmac = false, .gmac4 = true, .min_id = DWMAC_CORE_4_10, + .regs = { + .ptp_off = PTP_GMAC4_OFFSET, + .mmc_off = MMC_GMAC4_OFFSET, + }, .desc = &dwmac4_desc_ops, .dma = &dwmac410_dma_ops, .mac = &dwmac410_ops, @@ -146,6 +168,10 @@ static const struct stmmac_hwif_entry { .gmac = false, .gmac4 = true, .min_id = DWMAC_CORE_5_10, + .regs = { + .ptp_off = PTP_GMAC4_OFFSET, + .mmc_off = MMC_GMAC4_OFFSET, + }, .desc = &dwmac4_desc_ops, .dma = &dwmac410_dma_ops, .mac = &dwmac510_ops, @@ -163,27 +189,35 @@ int stmmac_hwif_init(struct stmmac_priv *priv) bool needs_gmac = priv->plat->has_gmac; const struct stmmac_hwif_entry *entry; struct mac_device_info *mac; + bool needs_setup = true; int i, ret; u32 id; if (needs_gmac) { id = stmmac_get_id(priv, GMAC_VERSION); - } else { + } else if (needs_gmac4) { id = stmmac_get_id(priv, GMAC4_VERSION); + } else { + id = 0; } /* Save ID for later use */ priv->synopsys_id = id; + /* Lets assume some safe values first */ + priv->ptpaddr = priv->ioaddr + + (needs_gmac4 ? PTP_GMAC4_OFFSET : PTP_GMAC3_X_OFFSET); + priv->mmcaddr = priv->ioaddr + + (needs_gmac4 ? MMC_GMAC4_OFFSET : MMC_GMAC3_X_OFFSET); + /* Check for HW specific setup first */ if (priv->plat->setup) { - priv->hw = priv->plat->setup(priv); - if (!priv->hw) - return -ENOMEM; - return 0; + mac = priv->plat->setup(priv); + needs_setup = false; + } else { + mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL); } - mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL); if (!mac) return -ENOMEM; @@ -195,22 +229,28 @@ int stmmac_hwif_init(struct stmmac_priv *priv) continue; if (needs_gmac4 ^ entry->gmac4) continue; - if (id < entry->min_id) + /* Use synopsys_id var because some setups can override this */ + if (priv->synopsys_id < entry->min_id) continue; - mac->desc = entry->desc; - mac->dma = entry->dma; - mac->mac = entry->mac; - mac->ptp = entry->hwtimestamp; - mac->mode = entry->mode; - mac->tc = entry->tc; + /* Only use generic HW helpers if needed */ + mac->desc = mac->desc ? : entry->desc; + mac->dma = mac->dma ? : entry->dma; + mac->mac = mac->mac ? : entry->mac; + mac->ptp = mac->ptp ? : entry->hwtimestamp; + mac->mode = mac->mode ? : entry->mode; + mac->tc = mac->tc ? : entry->tc; priv->hw = mac; + priv->ptpaddr = priv->ioaddr + entry->regs.ptp_off; + priv->mmcaddr = priv->ioaddr + entry->regs.mmc_off; /* Entry found */ - ret = entry->setup(priv); - if (ret) - return ret; + if (needs_setup) { + ret = entry->setup(priv); + if (ret) + return ret; + } /* Run quirks, if needed */ if (entry->quirks) { diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index b7539a14e6ad..f499a7fad6f0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -59,7 +59,7 @@ struct stmmac_desc_ops { /* Get the buffer size from the descriptor */ int (*get_tx_len)(struct dma_desc *p); /* Handle extra events on specific interrupts hw dependent */ - void (*set_rx_owner)(struct dma_desc *p); + void (*set_rx_owner)(struct dma_desc *p, int disable_rx_ic); /* Get the receive frame size */ int (*get_rx_frame_len)(struct dma_desc *p, int rx_coe_type); /* Return the reception status looking at the RDES1 */ @@ -79,6 +79,12 @@ struct stmmac_desc_ops { void (*display_ring)(void *head, unsigned int size, bool rx); /* set MSS via context descriptor */ void (*set_mss)(struct dma_desc *p, unsigned int mss); + /* get descriptor skbuff address */ + void (*get_addr)(struct dma_desc *p, unsigned int *addr); + /* set descriptor skbuff address */ + void (*set_addr)(struct dma_desc *p, dma_addr_t addr); + /* clear descriptor */ + void (*clear)(struct dma_desc *p); }; #define stmmac_init_rx_desc(__priv, __args...) \ @@ -123,6 +129,12 @@ struct stmmac_desc_ops { stmmac_do_void_callback(__priv, desc, display_ring, __args) #define stmmac_set_mss(__priv, __args...) \ stmmac_do_void_callback(__priv, desc, set_mss, __args) +#define stmmac_get_desc_addr(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, get_addr, __args) +#define stmmac_set_desc_addr(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, set_addr, __args) +#define stmmac_clear_desc(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, clear, __args) struct stmmac_dma_cfg; struct dma_features; @@ -132,7 +144,7 @@ struct stmmac_dma_ops { /* DMA core initialization */ int (*reset)(void __iomem *ioaddr); void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx, u32 dma_rx, int atds); + int atds); void (*init_chan)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, u32 chan); void (*init_rx_chan)(void __iomem *ioaddr, @@ -145,10 +157,6 @@ struct stmmac_dma_ops { void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi); /* Dump DMA registers */ void (*dump_regs)(void __iomem *ioaddr, u32 *reg_space); - /* Set tx/rx threshold in the csr6 register - * An invalid value enables the store-and-forward mode */ - void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode, - int rxfifosz); void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel, int fifosz, u8 qmode); void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel, @@ -191,8 +199,6 @@ struct stmmac_dma_ops { stmmac_do_void_callback(__priv, dma, axi, __args) #define stmmac_dump_dma_regs(__priv, __args...) \ stmmac_do_void_callback(__priv, dma, dump_regs, __args) -#define stmmac_dma_mode(__priv, __args...) \ - stmmac_do_void_callback(__priv, dma, dma_mode, __args) #define stmmac_dma_rx_mode(__priv, __args...) \ stmmac_do_void_callback(__priv, dma, dma_rx_mode, __args) #define stmmac_dma_tx_mode(__priv, __args...) \ @@ -440,6 +446,11 @@ struct stmmac_tc_ops { #define stmmac_tc_setup_cls_u32(__priv, __args...) \ stmmac_do_callback(__priv, tc, setup_cls_u32, __args) +struct stmmac_regs_off { + u32 ptp_off; + u32 mmc_off; +}; + extern const struct stmmac_ops dwmac100_ops; extern const struct stmmac_dma_ops dwmac100_dma_ops; extern const struct stmmac_ops dwmac1000_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index 7b1d901bf5bc..de65bb29feba 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -168,7 +168,7 @@ static void ndesc_set_tx_owner(struct dma_desc *p) p->des0 |= cpu_to_le32(TDES0_OWN); } -static void ndesc_set_rx_owner(struct dma_desc *p) +static void ndesc_set_rx_owner(struct dma_desc *p, int disable_rx_ic) { p->des0 |= cpu_to_le32(RDES0_OWN); } @@ -297,6 +297,21 @@ static void ndesc_display_ring(void *head, unsigned int size, bool rx) pr_info("\n"); } +static void ndesc_get_addr(struct dma_desc *p, unsigned int *addr) +{ + *addr = le32_to_cpu(p->des2); +} + +static void ndesc_set_addr(struct dma_desc *p, dma_addr_t addr) +{ + p->des2 = cpu_to_le32(addr); +} + +static void ndesc_clear(struct dma_desc *p) +{ + p->des2 = 0; +} + const struct stmmac_desc_ops ndesc_ops = { .tx_status = ndesc_get_tx_status, .rx_status = ndesc_get_rx_status, @@ -316,4 +331,7 @@ const struct stmmac_desc_ops ndesc_ops = { .get_timestamp = ndesc_get_timestamp, .get_rx_timestamp_status = ndesc_get_rx_timestamp_status, .display_ring = ndesc_display_ring, + .get_addr = ndesc_get_addr, + .set_addr = ndesc_set_addr, + .clear = ndesc_clear, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 42fc76e76bf9..4d425b1a0c59 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -105,6 +105,7 @@ struct stmmac_priv { u32 tx_count_frames; u32 tx_coal_frames; u32 tx_coal_timer; + bool tx_timer_armed; int tx_coalesce; int hwts_tx_en; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d9dbe1355896..c32de53a00d3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1156,10 +1156,7 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, return -EINVAL; } - if (priv->synopsys_id >= DWMAC_CORE_4_00) - p->des0 = cpu_to_le32(rx_q->rx_skbuff_dma[i]); - else - p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[i]); + stmmac_set_desc_addr(priv, p, rx_q->rx_skbuff_dma[i]); if (priv->dma_buf_sz == BUF_SIZE_16KiB) stmmac_init_desc3(priv, p); @@ -1344,14 +1341,7 @@ static int init_dma_tx_desc_rings(struct net_device *dev) else p = tx_q->dma_tx + i; - if (priv->synopsys_id >= DWMAC_CORE_4_00) { - p->des0 = 0; - p->des1 = 0; - p->des2 = 0; - p->des3 = 0; - } else { - p->des2 = 0; - } + stmmac_clear_desc(priv, p); tx_q->tx_skbuff_dma[i].buf = 0; tx_q->tx_skbuff_dma[i].map_as_page = false; @@ -1797,22 +1787,18 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) } /* configure all channels */ - if (priv->synopsys_id >= DWMAC_CORE_4_00) { - for (chan = 0; chan < rx_channels_count; chan++) { - qmode = priv->plat->rx_queues_cfg[chan].mode_to_use; + for (chan = 0; chan < rx_channels_count; chan++) { + qmode = priv->plat->rx_queues_cfg[chan].mode_to_use; - stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, - rxfifosz, qmode); - } + stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, + rxfifosz, qmode); + } - for (chan = 0; chan < tx_channels_count; chan++) { - qmode = priv->plat->tx_queues_cfg[chan].mode_to_use; + for (chan = 0; chan < tx_channels_count; chan++) { + qmode = priv->plat->tx_queues_cfg[chan].mode_to_use; - stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan, - txfifosz, qmode); - } - } else { - stmmac_dma_mode(priv, priv->ioaddr, txmode, rxmode, rxfifosz); + stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan, + txfifosz, qmode); } } @@ -1981,23 +1967,14 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, rxfifosz /= rx_channels_count; txfifosz /= tx_channels_count; - if (priv->synopsys_id >= DWMAC_CORE_4_00) { - stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz, - rxqmode); - stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan, txfifosz, - txqmode); - } else { - stmmac_dma_mode(priv, priv->ioaddr, txmode, rxmode, rxfifosz); - } + stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz, rxqmode); + stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan, txfifosz, txqmode); } static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv) { - int ret = false; + int ret; - /* Safety features are only available in cores >= 5.10 */ - if (priv->synopsys_id < DWMAC_CORE_5_10) - return ret; ret = stmmac_safety_feat_irq_status(priv, priv->dev, priv->ioaddr, priv->dma_cap.asp, &priv->sstats); if (ret && (ret != -EINVAL)) { @@ -2108,14 +2085,6 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv) unsigned int mode = MMC_CNTRL_RESET_ON_READ | MMC_CNTRL_COUNTER_RESET | MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET; - if (priv->synopsys_id >= DWMAC_CORE_4_00) { - priv->ptpaddr = priv->ioaddr + PTP_GMAC4_OFFSET; - priv->mmcaddr = priv->ioaddr + MMC_GMAC4_OFFSET; - } else { - priv->ptpaddr = priv->ioaddr + PTP_GMAC3_X_OFFSET; - priv->mmcaddr = priv->ioaddr + MMC_GMAC3_X_OFFSET; - } - dwmac_mmc_intr_all_mask(priv->mmcaddr); if (priv->dma_cap.rmon) { @@ -2169,10 +2138,9 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) { u32 rx_channels_count = priv->plat->rx_queues_to_use; u32 tx_channels_count = priv->plat->tx_queues_to_use; + u32 dma_csr_ch = max(rx_channels_count, tx_channels_count); struct stmmac_rx_queue *rx_q; struct stmmac_tx_queue *tx_q; - u32 dummy_dma_rx_phy = 0; - u32 dummy_dma_tx_phy = 0; u32 chan = 0; int atds = 0; int ret = 0; @@ -2191,48 +2159,39 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) return ret; } - if (priv->synopsys_id >= DWMAC_CORE_4_00) { - /* DMA Configuration */ - stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg, - dummy_dma_tx_phy, dummy_dma_rx_phy, atds); - - /* DMA RX Channel Configuration */ - for (chan = 0; chan < rx_channels_count; chan++) { - rx_q = &priv->rx_queue[chan]; - - stmmac_init_rx_chan(priv, priv->ioaddr, - priv->plat->dma_cfg, rx_q->dma_rx_phy, - chan); - - rx_q->rx_tail_addr = rx_q->dma_rx_phy + - (DMA_RX_SIZE * sizeof(struct dma_desc)); - stmmac_set_rx_tail_ptr(priv, priv->ioaddr, - rx_q->rx_tail_addr, chan); - } - - /* DMA TX Channel Configuration */ - for (chan = 0; chan < tx_channels_count; chan++) { - tx_q = &priv->tx_queue[chan]; + /* DMA RX Channel Configuration */ + for (chan = 0; chan < rx_channels_count; chan++) { + rx_q = &priv->rx_queue[chan]; - stmmac_init_chan(priv, priv->ioaddr, - priv->plat->dma_cfg, chan); + stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, + rx_q->dma_rx_phy, chan); - stmmac_init_tx_chan(priv, priv->ioaddr, - priv->plat->dma_cfg, tx_q->dma_tx_phy, - chan); + rx_q->rx_tail_addr = rx_q->dma_rx_phy + + (DMA_RX_SIZE * sizeof(struct dma_desc)); + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, + rx_q->rx_tail_addr, chan); + } - tx_q->tx_tail_addr = tx_q->dma_tx_phy + - (DMA_TX_SIZE * sizeof(struct dma_desc)); - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, - tx_q->tx_tail_addr, chan); - } - } else { - rx_q = &priv->rx_queue[chan]; + /* DMA TX Channel Configuration */ + for (chan = 0; chan < tx_channels_count; chan++) { tx_q = &priv->tx_queue[chan]; - stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg, - tx_q->dma_tx_phy, rx_q->dma_rx_phy, atds); + + stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, + tx_q->dma_tx_phy, chan); + + tx_q->tx_tail_addr = tx_q->dma_tx_phy + + (DMA_TX_SIZE * sizeof(struct dma_desc)); + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, + tx_q->tx_tail_addr, chan); } + /* DMA CSR Channel configuration */ + for (chan = 0; chan < dma_csr_ch; chan++) + stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan); + + /* DMA Configuration */ + stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg, atds); + if (priv->plat->axi) stmmac_axi(priv, priv->ioaddr, priv->plat->axi); @@ -2515,12 +2474,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) stmmac_core_init(priv, priv->hw, dev); /* Initialize MTL*/ - if (priv->synopsys_id >= DWMAC_CORE_4_00) - stmmac_mtl_configuration(priv); + stmmac_mtl_configuration(priv); /* Initialize Safety Features */ - if (priv->synopsys_id >= DWMAC_CORE_5_10) - stmmac_safety_feat_configuration(priv); + stmmac_safety_feat_configuration(priv); ret = stmmac_rx_ipc(priv, priv->hw); if (!ret) { @@ -3074,10 +3031,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (enh_desc) is_jumbo = stmmac_is_jumbo_frm(priv, skb->len, enh_desc); - if (unlikely(is_jumbo) && likely(priv->synopsys_id < - DWMAC_CORE_4_00)) { + if (unlikely(is_jumbo)) { entry = stmmac_jumbo_frm(priv, tx_q, skb, csum_insertion); - if (unlikely(entry < 0)) + if (unlikely(entry < 0) && (entry != -EINVAL)) goto dma_map_err; } @@ -3100,10 +3056,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) goto dma_map_err; /* should reuse desc w/o issues */ tx_q->tx_skbuff_dma[entry].buf = des; - if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) - desc->des0 = cpu_to_le32(des); - else - desc->des2 = cpu_to_le32(des); + + stmmac_set_desc_addr(priv, desc, des); tx_q->tx_skbuff_dma[entry].map_as_page = true; tx_q->tx_skbuff_dma[entry].len = len; @@ -3158,13 +3112,16 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) * element in case of no SG. */ priv->tx_count_frames += nfrags + 1; - if (likely(priv->tx_coal_frames > priv->tx_count_frames)) { + if (likely(priv->tx_coal_frames > priv->tx_count_frames) && + !priv->tx_timer_armed) { mod_timer(&priv->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer)); + priv->tx_timer_armed = true; } else { priv->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; + priv->tx_timer_armed = false; } skb_tx_timestamp(skb); @@ -3182,10 +3139,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) goto dma_map_err; tx_q->tx_skbuff_dma[first_entry].buf = des; - if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) - first->des0 = cpu_to_le32(des); - else - first->des2 = cpu_to_le32(des); + + stmmac_set_desc_addr(priv, first, des); tx_q->tx_skbuff_dma[first_entry].len = nopaged_len; tx_q->tx_skbuff_dma[first_entry].last_segment = last_segment; @@ -3211,11 +3166,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); - if (priv->synopsys_id < DWMAC_CORE_4_00) - stmmac_enable_dma_transmission(priv, priv->ioaddr); - else - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, - queue); + stmmac_enable_dma_transmission(priv, priv->ioaddr); + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); return NETDEV_TX_OK; @@ -3299,13 +3251,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) break; } - if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) { - p->des0 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]); - p->des1 = 0; - } else { - p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]); - } - + stmmac_set_desc_addr(priv, p, rx_q->rx_skbuff_dma[entry]); stmmac_refill_desc3(priv, rx_q, p); if (rx_q->rx_zeroc_thresh > 0) @@ -3316,10 +3262,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) } dma_wmb(); - if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) - stmmac_init_rx_desc(priv, p, priv->use_riwt, 0, 0); - else - stmmac_set_rx_owner(priv, p); + stmmac_set_rx_owner(priv, p, priv->use_riwt); dma_wmb(); @@ -3407,11 +3350,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) int frame_len; unsigned int des; - if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) - des = le32_to_cpu(p->des0); - else - des = le32_to_cpu(p->des2); - + stmmac_get_desc_addr(priv, p, &des); frame_len = stmmac_get_rx_frame_len(priv, p, coe); /* If frame length is greater than skb buffer size @@ -3705,6 +3644,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) /* To handle GMAC own interrupts */ if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) { int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats); + int mtl_status; if (unlikely(status)) { /* For LPI we need to save the tx status */ @@ -3714,20 +3654,18 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) priv->tx_path_in_lpi_mode = false; } - if (priv->synopsys_id >= DWMAC_CORE_4_00) { - for (queue = 0; queue < queues_count; queue++) { - struct stmmac_rx_queue *rx_q = - &priv->rx_queue[queue]; + for (queue = 0; queue < queues_count; queue++) { + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - status |= stmmac_host_mtl_irq_status(priv, - priv->hw, queue); + mtl_status = stmmac_host_mtl_irq_status(priv, priv->hw, + queue); + if (mtl_status != -EINVAL) + status |= mtl_status; - if (status & CORE_IRQ_MTL_RX_OVERFLOW) - stmmac_set_rx_tail_ptr(priv, - priv->ioaddr, - rx_q->rx_tail_addr, - queue); - } + if (status & CORE_IRQ_MTL_RX_OVERFLOW) + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, + rx_q->rx_tail_addr, + queue); } /* PCS link status */ diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 1ab97d99b9ba..f41116488079 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -867,7 +867,7 @@ static u32 rr_handle_event(struct net_device *dev, u32 prodidx, u32 eidx) dev->name); goto drop; case E_FRM_ERR: - printk(KERN_WARNING "%s: Framming Error\n", + printk(KERN_WARNING "%s: Framing Error\n", dev->name); goto drop; case E_FLG_SYN_ERR: diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 47aecc4fa8c2..9f4d32e41c06 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -90,8 +90,12 @@ struct mlx5_flow_destination { union { u32 tir_num; struct mlx5_flow_table *ft; - u32 vport_num; struct mlx5_fc *counter; + struct { + u16 num; + u16 vhca_id; + bool vhca_id_valid; + } vport; }; }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b8918a1da11f..b4ea8a9914c4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -396,7 +396,7 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_0[0x8]; u8 source_sqn[0x18]; - u8 reserved_at_20[0x10]; + u8 source_eswitch_owner_vhca_id[0x10]; u8 source_port[0x10]; u8 outer_second_prio[0x3]; @@ -541,7 +541,8 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x19]; + u8 reserved_at_5[0x18]; + u8 merged_eswitch[0x1]; u8 nic_vport_node_guid_modify[0x1]; u8 nic_vport_port_guid_modify[0x1]; @@ -1131,8 +1132,9 @@ enum mlx5_flow_destination_type { struct mlx5_ifc_dest_format_struct_bits { u8 destination_type[0x8]; u8 destination_id[0x18]; - - u8 reserved_at_20[0x20]; + u8 destination_eswitch_owner_vhca_id_valid[0x1]; + u8 reserved_at_21[0xf]; + u8 destination_eswitch_owner_vhca_id[0x10]; }; struct mlx5_ifc_flow_counter_list_bits { @@ -6977,7 +6979,9 @@ struct mlx5_ifc_create_flow_group_in_bits { u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_at_c0[0x20]; + u8 source_eswitch_owner_vhca_id_valid[0x1]; + + u8 reserved_at_c1[0x1f]; u8 start_flow_index[0x20]; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 807776928cb8..72705eaf4b84 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -218,6 +218,7 @@ struct tcp_sock { reord:1; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ + u8 compressed_ack; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ @@ -297,6 +298,7 @@ struct tcp_sock { u32 sacked_out; /* SACK'd packets */ struct hrtimer pacing_timer; + struct hrtimer compressed_ack_timer; /* from STCP, retrans queue hinting */ struct sk_buff* lost_skb_hint; diff --git a/include/net/devlink.h b/include/net/devlink.h index 2e4f71e16e95..9686a1aa4ec9 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -35,6 +35,14 @@ struct devlink { char priv[0] __aligned(NETDEV_ALIGN); }; +struct devlink_port_attrs { + bool set; + enum devlink_port_flavour flavour; + u32 port_number; /* same value as "split group" */ + bool split; + u32 split_subport_number; +}; + struct devlink_port { struct list_head list; struct devlink *devlink; @@ -43,8 +51,7 @@ struct devlink_port { enum devlink_port_type type; enum devlink_port_type desired_type; void *type_dev; - bool split; - u32 split_group; + struct devlink_port_attrs attrs; }; struct devlink_sb_pool_info { @@ -367,8 +374,12 @@ void devlink_port_type_eth_set(struct devlink_port *devlink_port, void devlink_port_type_ib_set(struct devlink_port *devlink_port, struct ib_device *ibdev); void devlink_port_type_clear(struct devlink_port *devlink_port); -void devlink_port_split_set(struct devlink_port *devlink_port, - u32 split_group); +void devlink_port_attrs_set(struct devlink_port *devlink_port, + enum devlink_port_flavour flavour, + u32 port_number, bool split, + u32 split_subport_number); +int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, + char *name, size_t len); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, @@ -466,11 +477,20 @@ static inline void devlink_port_type_clear(struct devlink_port *devlink_port) { } -static inline void devlink_port_split_set(struct devlink_port *devlink_port, - u32 split_group) +static inline void devlink_port_attrs_set(struct devlink_port *devlink_port, + enum devlink_port_flavour flavour, + u32 port_number, bool split, + u32 split_subport_number) { } +static inline int +devlink_port_get_phys_port_name(struct devlink_port *devlink_port, + char *name, size_t len) +{ + return -EOPNOTSUPP; +} + static inline int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8491bc9c86b1..661348f23ea5 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -160,6 +160,8 @@ struct netns_ipv4 { int sysctl_tcp_pacing_ca_ratio; int sysctl_tcp_wmem[3]; int sysctl_tcp_rmem[3]; + int sysctl_tcp_comp_sack_nr; + unsigned long sysctl_tcp_comp_sack_delay_ns; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 6deb540297cc..952d842a604a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -559,7 +559,10 @@ void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) { if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1) - sock_put(sk); + __sock_put(sk); + + if (hrtimer_try_to_cancel(&tcp_sk(sk)->compressed_ack_timer) == 1) + __sock_put(sk); inet_csk_clear_xmit_timers(sk); } diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 1df65a4c2044..75cb5450c851 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -132,6 +132,16 @@ enum devlink_eswitch_encap_mode { DEVLINK_ESWITCH_ENCAP_MODE_BASIC, }; +enum devlink_port_flavour { + DEVLINK_PORT_FLAVOUR_PHYSICAL, /* Any kind of a port physically + * facing the user. + */ + DEVLINK_PORT_FLAVOUR_CPU, /* CPU port */ + DEVLINK_PORT_FLAVOUR_DSA, /* Distributed switch architecture + * interconnect port. + */ +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -224,6 +234,10 @@ enum devlink_attr { DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID, /* u64 */ DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,/* u64 */ + DEVLINK_ATTR_PORT_FLAVOUR, /* u16 */ + DEVLINK_ATTR_PORT_NUMBER, /* u32 */ + DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index d02e859301ff..750d89120335 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -278,6 +278,7 @@ enum LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */ LINUX_MIB_TCPDELIVERED, /* TCPDelivered */ LINUX_MIB_TCPDELIVEREDCE, /* TCPDeliveredCE */ + LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */ __LINUX_MIB_MAX }; diff --git a/net/core/devlink.c b/net/core/devlink.c index ad1317376798..5c8a40e1a01e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -453,6 +453,27 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } +static int devlink_nl_port_attrs_put(struct sk_buff *msg, + struct devlink_port *devlink_port) +{ + struct devlink_port_attrs *attrs = &devlink_port->attrs; + + if (!attrs->set) + return 0; + if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour)) + return -EMSGSIZE; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, attrs->port_number)) + return -EMSGSIZE; + if (!attrs->split) + return 0; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, attrs->port_number)) + return -EMSGSIZE; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, + attrs->split_subport_number)) + return -EMSGSIZE; + return 0; +} + static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_port *devlink_port, enum devlink_command cmd, u32 portid, @@ -492,9 +513,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, ibdev->name)) goto nla_put_failure; } - if (devlink_port->split && - nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, - devlink_port->split_group)) + if (devlink_nl_port_attrs_put(msg, devlink_port)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -2971,19 +2990,64 @@ void devlink_port_type_clear(struct devlink_port *devlink_port) EXPORT_SYMBOL_GPL(devlink_port_type_clear); /** - * devlink_port_split_set - Set port is split + * devlink_port_attrs_set - Set port attributes * * @devlink_port: devlink port - * @split_group: split group - identifies group split port is part of + * @flavour: flavour of the port + * @port_number: number of the port that is facing user, for example + * the front panel port number + * @split: indicates if this is split port + * @split_subport_number: if the port is split, this is the number + * of subport. */ -void devlink_port_split_set(struct devlink_port *devlink_port, - u32 split_group) -{ - devlink_port->split = true; - devlink_port->split_group = split_group; +void devlink_port_attrs_set(struct devlink_port *devlink_port, + enum devlink_port_flavour flavour, + u32 port_number, bool split, + u32 split_subport_number) +{ + struct devlink_port_attrs *attrs = &devlink_port->attrs; + + attrs->set = true; + attrs->flavour = flavour; + attrs->port_number = port_number; + attrs->split = split; + attrs->split_subport_number = split_subport_number; devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); } -EXPORT_SYMBOL_GPL(devlink_port_split_set); +EXPORT_SYMBOL_GPL(devlink_port_attrs_set); + +int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, + char *name, size_t len) +{ + struct devlink_port_attrs *attrs = &devlink_port->attrs; + int n = 0; + + if (!attrs->set) + return -EOPNOTSUPP; + + switch (attrs->flavour) { + case DEVLINK_PORT_FLAVOUR_PHYSICAL: + if (!attrs->split) + n = snprintf(name, len, "p%u", attrs->port_number); + else + n = snprintf(name, len, "p%us%u", attrs->port_number, + attrs->split_subport_number); + break; + case DEVLINK_PORT_FLAVOUR_CPU: + case DEVLINK_PORT_FLAVOUR_DSA: + /* As CPU and DSA ports do not have a netdevice associated + * case should not ever happen. + */ + WARN_ON(1); + return -EINVAL; + } + + if (n >= len) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_port_get_phys_port_name); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index adf50fbc4c13..00126cda4319 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -270,7 +270,28 @@ static int dsa_port_setup(struct dsa_port *dp) case DSA_PORT_TYPE_UNUSED: break; case DSA_PORT_TYPE_CPU: + /* dp->index is used now as port_number. However + * CPU ports should have separate numbering + * independent from front panel port numbers. + */ + devlink_port_attrs_set(&dp->devlink_port, + DEVLINK_PORT_FLAVOUR_CPU, + dp->index, false, 0); + err = dsa_port_link_register_of(dp); + if (err) { + dev_err(ds->dev, "failed to setup link for port %d.%d\n", + ds->index, dp->index); + return err; + } + break; case DSA_PORT_TYPE_DSA: + /* dp->index is used now as port_number. However + * DSA ports should have separate numbering + * independent from front panel port numbers. + */ + devlink_port_attrs_set(&dp->devlink_port, + DEVLINK_PORT_FLAVOUR_DSA, + dp->index, false, 0); err = dsa_port_link_register_of(dp); if (err) { dev_err(ds->dev, "failed to setup link for port %d.%d\n", @@ -279,6 +300,9 @@ static int dsa_port_setup(struct dsa_port *dp) } break; case DSA_PORT_TYPE_USER: + devlink_port_attrs_set(&dp->devlink_port, + DEVLINK_PORT_FLAVOUR_PHYSICAL, + dp->index, false, 0); err = dsa_slave_create(dp); if (err) dev_err(ds->dev, "failed to create slave for port %d.%d\n", diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 261b71d0ccc5..6c1ff89a60fa 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -298,6 +298,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS), SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED), SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE), + SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4b195bac8ac0..d2eed3ddcb0a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -46,6 +46,7 @@ static int tcp_syn_retries_min = 1; static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; +static int comp_sack_nr_max = 255; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -1152,6 +1153,22 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &one, }, { + .procname = "tcp_comp_sack_delay_ns", + .data = &init_net.ipv4.sysctl_tcp_comp_sack_delay_ns, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "tcp_comp_sack_nr", + .data = &init_net.ipv4.sysctl_tcp_comp_sack_nr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &comp_sack_nr_max, + }, + { .procname = "udp_rmem_min", .data = &init_net.ipv4.sysctl_udp_rmem_min, .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 62b776f90037..0a2ea0bbf867 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2595,6 +2595,7 @@ int tcp_disconnect(struct sock *sk, int flags) dst_release(sk->sk_rx_dst); sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); + tp->compressed_ack = 0; /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0bf032839548..aebb29ab2fdf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4249,6 +4249,8 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) * If the sack array is full, forget about the last one. */ if (this_sack >= TCP_NUM_SACKS) { + if (tp->compressed_ack) + tcp_send_ack(sk); this_sack--; tp->rx_opt.num_sacks--; sp--; @@ -4715,8 +4717,6 @@ drop: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) goto out_of_window; - tcp_enter_quickack_mode(sk); - if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { /* Partial packet, seq < rcv_next < end_seq */ SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n", @@ -5083,6 +5083,7 @@ static inline void tcp_data_snd_check(struct sock *sk) static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) { struct tcp_sock *tp = tcp_sk(sk); + unsigned long rtt, delay; /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && @@ -5094,15 +5095,36 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || __tcp_select_window(sk) >= tp->rcv_wnd)) || /* We ACK each frame or... */ - tcp_in_quickack_mode(sk) || - /* We have out of order data. */ - (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) { - /* Then ack it now */ + tcp_in_quickack_mode(sk)) { +send_now: tcp_send_ack(sk); - } else { - /* Else, send delayed ack. */ + return; + } + + if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tcp_send_delayed_ack(sk); + return; } + + if (!tcp_is_sack(tp) || + tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) + goto send_now; + tp->compressed_ack++; + + if (hrtimer_is_queued(&tp->compressed_ack_timer)) + return; + + /* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */ + + rtt = tp->rcv_rtt_est.rtt_us; + if (tp->srtt_us && tp->srtt_us < rtt) + rtt = tp->srtt_us; + + delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, + rtt * (NSEC_PER_USEC >> 3)/20); + sock_hold(sk); + hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay), + HRTIMER_MODE_REL_PINNED_SOFT); } static inline void tcp_ack_snd_check(struct sock *sk) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index caf23de88f8a..adbdb503db0c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2572,6 +2572,8 @@ static int __net_init tcp_sk_init(struct net *net) init_net.ipv4.sysctl_tcp_wmem, sizeof(init_net.ipv4.sysctl_tcp_wmem)); } + net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC; + net->ipv4.sysctl_tcp_comp_sack_nr = 44; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0d8f950a9006..437bb7ceba7f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -162,6 +162,15 @@ static void tcp_event_data_sent(struct tcp_sock *tp, /* Account for an ACK we sent. */ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) { + struct tcp_sock *tp = tcp_sk(sk); + + if (unlikely(tp->compressed_ack)) { + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED, + tp->compressed_ack); + tp->compressed_ack = 0; + if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1) + __sock_put(sk); + } tcp_dec_quickack_mode(sk, pkts); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 30cbfb69b1de..71593e4400ab 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -21,7 +21,7 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) return t1 > t2 || (t1 == t2 && after(seq1, seq2)); } -u32 tcp_rack_reo_wnd(const struct sock *sk) +static u32 tcp_rack_reo_wnd(const struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 92bdf64fffae..3b3611729928 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -708,6 +708,27 @@ out: sock_put(sk); } +static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer) +{ + struct tcp_sock *tp = container_of(timer, struct tcp_sock, compressed_ack_timer); + struct sock *sk = (struct sock *)tp; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { + if (tp->compressed_ack) + tcp_send_ack(sk); + } else { + if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, + &sk->sk_tsq_flags)) + sock_hold(sk); + } + bh_unlock_sock(sk); + + sock_put(sk); + + return HRTIMER_NORESTART; +} + void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, @@ -715,4 +736,8 @@ void tcp_init_xmit_timers(struct sock *sk) hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_SOFT); tcp_sk(sk)->pacing_timer.function = tcp_pace_kick; + + hrtimer_init(&tcp_sk(sk)->compressed_ack_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_PINNED_SOFT); + tcp_sk(sk)->compressed_ack_timer.function = tcp_compressed_ack_kick; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6ad4f6c771c3..48530dab5c94 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -46,11 +46,6 @@ static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group * creation */ -struct smc_lgr_list smc_lgr_list = { /* established link groups */ - .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), - .list = LIST_HEAD_INIT(smc_lgr_list.list), -}; - static void smc_tcp_listen_work(struct work_struct *); static void smc_set_keepalive(struct sock *sk, int val) @@ -382,10 +377,13 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) static void smc_conn_save_peer_info(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { - smc->conn.peer_conn_idx = clc->conn_idx; + int bufsize = smc_uncompress_bufsize(clc->rmbe_size); + + smc->conn.peer_rmbe_idx = clc->rmbe_idx; smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); - smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size); + smc->conn.peer_rmbe_size = bufsize; atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); + smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); } static void smc_link_save_peer_info(struct smc_link *link, @@ -398,165 +396,186 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } -/* setup for RDMA connection of client */ -static int smc_connect_rdma(struct smc_sock *smc) +/* fall back during connect */ +static int smc_connect_fallback(struct smc_sock *smc) { - struct smc_clc_msg_accept_confirm aclc; - int local_contact = SMC_FIRST_CONTACT; - struct smc_ib_device *smcibdev; - struct smc_link *link; - u8 srv_first_contact; - int reason_code = 0; - int rc = 0; - u8 ibport; - - sock_hold(&smc->sk); /* sock put in passive closing */ + smc->use_fallback = true; + smc_copy_sock_settings_to_clc(smc); + if (smc->sk.sk_state == SMC_INIT) + smc->sk.sk_state = SMC_ACTIVE; + return 0; +} - if (smc->use_fallback) - goto out_connected; +/* decline and fall back during connect */ +static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) +{ + int rc; - if (!tcp_sk(smc->clcsock->sk)->syn_smc) { - /* peer has not signalled SMC-capability */ - smc->use_fallback = true; - goto out_connected; + if (reason_code < 0) /* error, fallback is not possible */ + return reason_code; + if (reason_code != SMC_CLC_DECL_REPLY) { + rc = smc_clc_send_decline(smc, reason_code); + if (rc < 0) + return rc; } + return smc_connect_fallback(smc); +} - /* IPSec connections opt out of SMC-R optimizations */ - if (using_ipsec(smc)) { - reason_code = SMC_CLC_DECL_IPSEC; - goto decline_rdma; - } +/* abort connecting */ +static int smc_connect_abort(struct smc_sock *smc, int reason_code, + int local_contact) +{ + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); + mutex_unlock(&smc_create_lgr_pending); + smc_conn_free(&smc->conn); + if (reason_code < 0 && smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ + return reason_code; +} + +/* check if there is a rdma device available for this connection. */ +/* called for connect and listen */ +static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, + u8 *ibport) +{ + int reason_code = 0; /* PNET table look up: search active ib_device and port * within same PNETID that also contains the ethernet device * used for the internal TCP socket */ - smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport); - if (!smcibdev) { + smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport); + if (!(*ibdev)) reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ - goto decline_rdma; - } + + return reason_code; +} + +/* CLC handshake during connect */ +static int smc_connect_clc(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *aclc, + struct smc_ib_device *ibdev, u8 ibport) +{ + int rc = 0; /* do inband token exchange */ - reason_code = smc_clc_send_proposal(smc, smcibdev, ibport); - if (reason_code < 0) { - rc = reason_code; - goto out_err; - } - if (reason_code > 0) /* configuration error */ - goto decline_rdma; + rc = smc_clc_send_proposal(smc, ibdev, ibport); + if (rc) + return rc; /* receive SMC Accept CLC message */ - reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc), - SMC_CLC_ACCEPT); - if (reason_code < 0) { - rc = reason_code; - goto out_err; - } - if (reason_code > 0) - goto decline_rdma; + return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT); +} + +/* setup for RDMA connection of client */ +static int smc_connect_rdma(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *aclc, + struct smc_ib_device *ibdev, u8 ibport) +{ + int local_contact = SMC_FIRST_CONTACT; + struct smc_link *link; + int reason_code = 0; - srv_first_contact = aclc.hdr.flag; mutex_lock(&smc_create_lgr_pending); - local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl, - srv_first_contact); + local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl, + aclc->hdr.flag); if (local_contact < 0) { - rc = local_contact; - if (rc == -ENOMEM) + if (local_contact == -ENOMEM) reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ - else if (rc == -ENOLINK) + else if (local_contact == -ENOLINK) reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ else reason_code = SMC_CLC_DECL_INTERR; /* other error */ - goto decline_rdma_unlock; + return smc_connect_abort(smc, reason_code, 0); } link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; - smc_conn_save_peer_info(smc, &aclc); + smc_conn_save_peer_info(smc, aclc); /* create send buffer and rmb */ - rc = smc_buf_create(smc); - if (rc) { - reason_code = SMC_CLC_DECL_MEM; - goto decline_rdma_unlock; - } + if (smc_buf_create(smc)) + return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); if (local_contact == SMC_FIRST_CONTACT) - smc_link_save_peer_info(link, &aclc); + smc_link_save_peer_info(link, aclc); - rc = smc_rmb_rtoken_handling(&smc->conn, &aclc); - if (rc) { - reason_code = SMC_CLC_DECL_INTERR; - goto decline_rdma_unlock; - } + if (smc_rmb_rtoken_handling(&smc->conn, aclc)) + return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, + local_contact); smc_close_init(smc); smc_rx_init(smc); if (local_contact == SMC_FIRST_CONTACT) { - rc = smc_ib_ready_link(link); - if (rc) { - reason_code = SMC_CLC_DECL_INTERR; - goto decline_rdma_unlock; - } + if (smc_ib_ready_link(link)) + return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, + local_contact); } else { - if (!smc->conn.rmb_desc->reused) { - if (smc_reg_rmb(link, smc->conn.rmb_desc, true)) { - reason_code = SMC_CLC_DECL_INTERR; - goto decline_rdma_unlock; - } - } + if (!smc->conn.rmb_desc->reused && + smc_reg_rmb(link, smc->conn.rmb_desc, true)) + return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, + local_contact); } smc_rmb_sync_sg_for_device(&smc->conn); - rc = smc_clc_send_confirm(smc); - if (rc) - goto out_err_unlock; + reason_code = smc_clc_send_confirm(smc); + if (reason_code) + return smc_connect_abort(smc, reason_code, local_contact); + + smc_tx_init(smc); if (local_contact == SMC_FIRST_CONTACT) { /* QP confirmation over RoCE fabric */ reason_code = smc_clnt_conf_first_link(smc); - if (reason_code < 0) { - rc = reason_code; - goto out_err_unlock; - } - if (reason_code > 0) - goto decline_rdma_unlock; + if (reason_code) + return smc_connect_abort(smc, reason_code, + local_contact); } - mutex_unlock(&smc_create_lgr_pending); - smc_tx_init(smc); -out_connected: smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) smc->sk.sk_state = SMC_ACTIVE; - return rc ? rc : local_contact; + return 0; +} + +/* perform steps before actually connecting */ +static int __smc_connect(struct smc_sock *smc) +{ + struct smc_clc_msg_accept_confirm aclc; + struct smc_ib_device *ibdev; + int rc = 0; + u8 ibport; -decline_rdma_unlock: - if (local_contact == SMC_FIRST_CONTACT) - smc_lgr_forget(smc->conn.lgr); - mutex_unlock(&smc_create_lgr_pending); - smc_conn_free(&smc->conn); -decline_rdma: - /* RDMA setup failed, switch back to TCP */ - smc->use_fallback = true; - if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - rc = smc_clc_send_decline(smc, reason_code); - if (rc < 0) - goto out_err; - } - goto out_connected; + sock_hold(&smc->sk); /* sock put in passive closing */ -out_err_unlock: - if (local_contact == SMC_FIRST_CONTACT) - smc_lgr_forget(smc->conn.lgr); - mutex_unlock(&smc_create_lgr_pending); - smc_conn_free(&smc->conn); -out_err: - if (smc->sk.sk_state == SMC_INIT) - sock_put(&smc->sk); /* passive closing */ - return rc; + if (smc->use_fallback) + return smc_connect_fallback(smc); + + /* if peer has not signalled SMC-capability, fall back */ + if (!tcp_sk(smc->clcsock->sk)->syn_smc) + return smc_connect_fallback(smc); + + /* IPSec connections opt out of SMC-R optimizations */ + if (using_ipsec(smc)) + return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); + + /* check if a RDMA device is available; if not, fall back */ + if (smc_check_rdma(smc, &ibdev, &ibport)) + return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); + + /* perform CLC handshake */ + rc = smc_connect_clc(smc, &aclc, ibdev, ibport); + if (rc) + return smc_connect_decline_fallback(smc, rc); + + /* connect using rdma */ + rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); + if (rc) + return smc_connect_decline_fallback(smc, rc); + + return 0; } static int smc_connect(struct socket *sock, struct sockaddr *addr, @@ -592,8 +611,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, if (rc) goto out; - /* setup RDMA connection */ - rc = smc_connect_rdma(smc); + rc = __smc_connect(smc); if (rc < 0) goto out; else @@ -791,182 +809,239 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) return 0; } -/* setup for RDMA connection of server */ -static void smc_listen_work(struct work_struct *work) +/* listen worker: finish */ +static void smc_listen_out(struct smc_sock *new_smc) { - struct smc_sock *new_smc = container_of(work, struct smc_sock, - smc_listen_work); - struct smc_clc_msg_proposal_prefix *pclc_prfx; - struct socket *newclcsock = new_smc->clcsock; struct smc_sock *lsmc = new_smc->listen_smc; - struct smc_clc_msg_accept_confirm cclc; - int local_contact = SMC_REUSE_CONTACT; struct sock *newsmcsk = &new_smc->sk; - struct smc_clc_msg_proposal *pclc; - struct smc_ib_device *smcibdev; - u8 buf[SMC_CLC_MAX_LEN]; - struct smc_link *link; - int reason_code = 0; - int rc = 0; - u8 ibport; - - if (new_smc->use_fallback) - goto out_connected; - /* check if peer is smc capable */ - if (!tcp_sk(newclcsock->sk)->syn_smc) { - new_smc->use_fallback = true; - goto out_connected; + lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); + if (lsmc->sk.sk_state == SMC_LISTEN) { + smc_accept_enqueue(&lsmc->sk, newsmcsk); + } else { /* no longer listening */ + smc_close_non_accepted(newsmcsk); } + release_sock(&lsmc->sk); - /* do inband token exchange - - *wait for and receive SMC Proposal CLC message - */ - reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf), - SMC_CLC_PROPOSAL); - if (reason_code < 0) - goto out_err; - if (reason_code > 0) - goto decline_rdma; + /* Wake up accept */ + lsmc->sk.sk_data_ready(&lsmc->sk); + sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ +} - /* IPSec connections opt out of SMC-R optimizations */ - if (using_ipsec(new_smc)) { - reason_code = SMC_CLC_DECL_IPSEC; - goto decline_rdma; - } +/* listen worker: finish in state connected */ +static void smc_listen_out_connected(struct smc_sock *new_smc) +{ + struct sock *newsmcsk = &new_smc->sk; - /* PNET table look up: search active ib_device and port - * within same PNETID that also contains the ethernet device - * used for the internal TCP socket - */ - smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport); - if (!smcibdev) { - reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ - goto decline_rdma; + sk_refcnt_debug_inc(newsmcsk); + if (newsmcsk->sk_state == SMC_INIT) + newsmcsk->sk_state = SMC_ACTIVE; + + smc_listen_out(new_smc); +} + +/* listen worker: finish in error state */ +static void smc_listen_out_err(struct smc_sock *new_smc) +{ + struct sock *newsmcsk = &new_smc->sk; + + if (newsmcsk->sk_state == SMC_INIT) + sock_put(&new_smc->sk); /* passive closing */ + newsmcsk->sk_state = SMC_CLOSED; + smc_conn_free(&new_smc->conn); + + smc_listen_out(new_smc); +} + +/* listen worker: decline and fall back if possible */ +static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, + int local_contact) +{ + /* RDMA setup failed, switch back to TCP */ + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); + if (reason_code < 0) { /* error, no fallback possible */ + smc_listen_out_err(new_smc); + return; + } + smc_conn_free(&new_smc->conn); + new_smc->use_fallback = true; + if (reason_code && reason_code != SMC_CLC_DECL_REPLY) { + if (smc_clc_send_decline(new_smc, reason_code) < 0) { + smc_listen_out_err(new_smc); + return; + } } + smc_listen_out_connected(new_smc); +} + +/* listen worker: check prefixes */ +static int smc_listen_rdma_check(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc) +{ + struct smc_clc_msg_proposal_prefix *pclc_prfx; + struct socket *newclcsock = new_smc->clcsock; - pclc = (struct smc_clc_msg_proposal *)&buf; pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (smc_clc_prfx_match(newclcsock, pclc_prfx)) + return SMC_CLC_DECL_CNFERR; - rc = smc_clc_prfx_match(newclcsock, pclc_prfx); - if (rc) { - reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ - goto decline_rdma; - } + return 0; +} +/* listen worker: initialize connection and buffers */ +static int smc_listen_rdma_init(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_ib_device *ibdev, u8 ibport, + int *local_contact) +{ /* allocate connection / link group */ - mutex_lock(&smc_create_lgr_pending); - local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl, - 0); - if (local_contact < 0) { - rc = local_contact; - if (rc == -ENOMEM) - reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ - goto decline_rdma_unlock; + *local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0); + if (*local_contact < 0) { + if (*local_contact == -ENOMEM) + return SMC_CLC_DECL_MEM;/* insufficient memory*/ + return SMC_CLC_DECL_INTERR; /* other error */ } - link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; /* create send buffer and rmb */ - rc = smc_buf_create(new_smc); - if (rc) { - reason_code = SMC_CLC_DECL_MEM; - goto decline_rdma_unlock; - } + if (smc_buf_create(new_smc)) + return SMC_CLC_DECL_MEM; - smc_close_init(new_smc); - smc_rx_init(new_smc); + return 0; +} + +/* listen worker: register buffers */ +static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) +{ + struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; if (local_contact != SMC_FIRST_CONTACT) { if (!new_smc->conn.rmb_desc->reused) { - if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) { - reason_code = SMC_CLC_DECL_INTERR; - goto decline_rdma_unlock; - } + if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) + return SMC_CLC_DECL_INTERR; } } smc_rmb_sync_sg_for_device(&new_smc->conn); - rc = smc_clc_send_accept(new_smc, local_contact); - if (rc) - goto out_err_unlock; + return 0; +} + +/* listen worker: finish RDMA setup */ +static void smc_listen_rdma_finish(struct smc_sock *new_smc, + struct smc_clc_msg_accept_confirm *cclc, + int local_contact) +{ + struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; + int reason_code = 0; - /* receive SMC Confirm CLC message */ - reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), - SMC_CLC_CONFIRM); - if (reason_code < 0) - goto out_err_unlock; - if (reason_code > 0) - goto decline_rdma_unlock; - smc_conn_save_peer_info(new_smc, &cclc); if (local_contact == SMC_FIRST_CONTACT) - smc_link_save_peer_info(link, &cclc); + smc_link_save_peer_info(link, cclc); - rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); - if (rc) { + if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) { reason_code = SMC_CLC_DECL_INTERR; - goto decline_rdma_unlock; + goto decline; } if (local_contact == SMC_FIRST_CONTACT) { - rc = smc_ib_ready_link(link); - if (rc) { + if (smc_ib_ready_link(link)) { reason_code = SMC_CLC_DECL_INTERR; - goto decline_rdma_unlock; + goto decline; } /* QP confirmation over RoCE fabric */ reason_code = smc_serv_conf_first_link(new_smc); - if (reason_code < 0) - /* peer is not aware of a problem */ - goto out_err_unlock; - if (reason_code > 0) - goto decline_rdma_unlock; + if (reason_code) + goto decline; } + return; - smc_tx_init(new_smc); +decline: mutex_unlock(&smc_create_lgr_pending); + smc_listen_decline(new_smc, reason_code, local_contact); +} -out_connected: - sk_refcnt_debug_inc(newsmcsk); - if (newsmcsk->sk_state == SMC_INIT) - newsmcsk->sk_state = SMC_ACTIVE; -enqueue: - lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); - if (lsmc->sk.sk_state == SMC_LISTEN) { - smc_accept_enqueue(&lsmc->sk, newsmcsk); - } else { /* no longer listening */ - smc_close_non_accepted(newsmcsk); +/* setup for RDMA connection of server */ +static void smc_listen_work(struct work_struct *work) +{ + struct smc_sock *new_smc = container_of(work, struct smc_sock, + smc_listen_work); + struct socket *newclcsock = new_smc->clcsock; + struct smc_clc_msg_accept_confirm cclc; + struct smc_clc_msg_proposal *pclc; + struct smc_ib_device *ibdev; + u8 buf[SMC_CLC_MAX_LEN]; + int local_contact = 0; + int reason_code = 0; + int rc = 0; + u8 ibport; + + if (new_smc->use_fallback) { + smc_listen_out_connected(new_smc); + return; } - release_sock(&lsmc->sk); - /* Wake up accept */ - lsmc->sk.sk_data_ready(&lsmc->sk); - sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ - return; + /* check if peer is smc capable */ + if (!tcp_sk(newclcsock->sk)->syn_smc) { + new_smc->use_fallback = true; + smc_listen_out_connected(new_smc); + return; + } -decline_rdma_unlock: - if (local_contact == SMC_FIRST_CONTACT) - smc_lgr_forget(new_smc->conn.lgr); - mutex_unlock(&smc_create_lgr_pending); -decline_rdma: - /* RDMA setup failed, switch back to TCP */ - smc_conn_free(&new_smc->conn); - new_smc->use_fallback = true; - if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - if (smc_clc_send_decline(new_smc, reason_code) < 0) - goto out_err; + /* do inband token exchange - + * wait for and receive SMC Proposal CLC message + */ + pclc = (struct smc_clc_msg_proposal *)&buf; + reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, + SMC_CLC_PROPOSAL); + if (reason_code) { + smc_listen_decline(new_smc, reason_code, 0); + return; } - goto out_connected; -out_err_unlock: - if (local_contact == SMC_FIRST_CONTACT) - smc_lgr_forget(new_smc->conn.lgr); + /* IPSec connections opt out of SMC-R optimizations */ + if (using_ipsec(new_smc)) { + smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0); + return; + } + + mutex_lock(&smc_create_lgr_pending); + smc_close_init(new_smc); + smc_rx_init(new_smc); + smc_tx_init(new_smc); + + /* check if RDMA is available */ + if (smc_check_rdma(new_smc, &ibdev, &ibport) || + smc_listen_rdma_check(new_smc, pclc) || + smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, + &local_contact) || + smc_listen_rdma_reg(new_smc, local_contact)) { + /* SMC not supported, decline */ + mutex_unlock(&smc_create_lgr_pending); + smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact); + return; + } + + /* send SMC Accept CLC message */ + rc = smc_clc_send_accept(new_smc, local_contact); + if (rc) { + mutex_unlock(&smc_create_lgr_pending); + smc_listen_decline(new_smc, rc, local_contact); + return; + } + + /* receive SMC Confirm CLC message */ + reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), + SMC_CLC_CONFIRM); + if (reason_code) { + mutex_unlock(&smc_create_lgr_pending); + smc_listen_decline(new_smc, reason_code, local_contact); + return; + } + + /* finish worker */ + smc_listen_rdma_finish(new_smc, &cclc, local_contact); + smc_conn_save_peer_info(new_smc, &cclc); mutex_unlock(&smc_create_lgr_pending); -out_err: - if (newsmcsk->sk_state == SMC_INIT) - sock_put(&new_smc->sk); /* passive closing */ - newsmcsk->sk_state = SMC_CLOSED; - smc_conn_free(&new_smc->conn); - goto enqueue; /* queue new sock with sk_err set */ + smc_listen_out_connected(new_smc); } static void smc_tcp_listen_work(struct work_struct *work) @@ -1227,7 +1302,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, if (sk->sk_state == SMC_INIT && mask & EPOLLOUT && smc->clcsock->sk->sk_state != TCP_CLOSE) { - rc = smc_connect_rdma(smc); + rc = __smc_connect(smc); if (rc < 0) mask |= EPOLLERR; /* success cases including fallback */ @@ -1421,7 +1496,7 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd, /* output queue size (not send + not acked) */ if (smc->sk.sk_state == SMC_LISTEN) return -EINVAL; - answ = smc->conn.sndbuf_size - + answ = smc->conn.sndbuf_desc->len - atomic_read(&smc->conn.sndbuf_space); break; case SIOCOUTQNSD: @@ -1637,19 +1712,7 @@ out_pnet: static void __exit smc_exit(void) { - struct smc_link_group *lgr, *lg; - LIST_HEAD(lgr_freeing_list); - - spin_lock_bh(&smc_lgr_list.lock); - if (!list_empty(&smc_lgr_list.list)) - list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); - spin_unlock_bh(&smc_lgr_list.lock); - list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { - list_del_init(&lgr->list); - smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); - cancel_delayed_work_sync(&lgr->free_work); - smc_lgr_free(lgr); /* free link group */ - } + smc_core_exit(); static_branch_disable(&tcp_have_smc); smc_ib_unregister_client(); sock_unregister(PF_SMC); diff --git a/net/smc/smc.h b/net/smc/smc.h index ec209cd48d42..a1467e411645 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -118,7 +118,7 @@ struct smc_connection { struct rb_node alert_node; struct smc_link_group *lgr; /* link group of connection */ u32 alert_token_local; /* unique conn. id */ - u8 peer_conn_idx; /* from tcp handshake */ + u8 peer_rmbe_idx; /* from tcp handshake */ int peer_rmbe_size; /* size of peer rx buffer */ atomic_t peer_rmbe_space;/* remaining free bytes in peer * rmbe @@ -126,9 +126,7 @@ struct smc_connection { int rtoken_idx; /* idx to peer RMB rkey/addr */ struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */ - int sndbuf_size; /* sndbuf size <== sock wmem */ struct smc_buf_desc *rmb_desc; /* RMBE descriptor */ - int rmbe_size; /* RMBE size <== sock rmem */ int rmbe_size_short;/* compressed notation */ int rmbe_update_limit; /* lower limit for consumer @@ -153,6 +151,7 @@ struct smc_connection { u16 tx_cdc_seq; /* sequence # for CDC send */ spinlock_t send_lock; /* protect wr_sends */ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ + u32 tx_off; /* base offset in peer rmb */ struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl. * .prod cf. TCP rcv_nxt @@ -221,41 +220,6 @@ static inline u32 ntoh24(u8 *net) return be32_to_cpu(t); } -#define SMC_BUF_MIN_SIZE 16384 /* minimum size of an RMB */ - -#define SMC_RMBE_SIZES 16 /* number of distinct sizes for an RMBE */ -/* theoretically, the RFC states that largest size would be 512K, - * i.e. compressed 5 and thus 6 sizes (0..5), despite - * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15) - */ - -/* convert the RMB size into the compressed notation - minimum 16K. - * In contrast to plain ilog2, this rounds towards the next power of 2, - * so the socket application gets at least its desired sndbuf / rcvbuf size. - */ -static inline u8 smc_compress_bufsize(int size) -{ - u8 compressed; - - if (size <= SMC_BUF_MIN_SIZE) - return 0; - - size = (size - 1) >> 14; - compressed = ilog2(size) + 1; - if (compressed >= SMC_RMBE_SIZES) - compressed = SMC_RMBE_SIZES - 1; - return compressed; -} - -/* convert the RMB size from compressed notation into integer */ -static inline int smc_uncompress_bufsize(u8 compressed) -{ - u32 size; - - size = 0x00000001 << (((int)compressed) + 14); - return (int)size; -} - #ifdef CONFIG_XFRM static inline bool using_ipsec(struct smc_sock *smc) { @@ -269,12 +233,6 @@ static inline bool using_ipsec(struct smc_sock *smc) } #endif -struct smc_clc_msg_local; - -void smc_conn_free(struct smc_connection *conn); -int smc_conn_create(struct smc_sock *smc, - struct smc_ib_device *smcibdev, u8 ibport, - struct smc_clc_msg_local *lcl, int srv_first_contact); struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); void smc_close_non_accepted(struct sock *sk); diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 42ad57365eca..8d2c079c87b0 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -44,13 +44,13 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, smc = container_of(cdcpend->conn, struct smc_sock, conn); bh_lock_sock(&smc->sk); if (!wc_status) { - diff = smc_curs_diff(cdcpend->conn->sndbuf_size, + diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len, &cdcpend->conn->tx_curs_fin, &cdcpend->cursor); /* sndbuf_space is decreased in smc_sendmsg */ smp_mb__before_atomic(); atomic_add(diff, &cdcpend->conn->sndbuf_space); - /* guarantee 0 <= sndbuf_space <= sndbuf_size */ + /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ smp_mb__after_atomic(); smc_curs_write(&cdcpend->conn->tx_curs_fin, smc_curs_read(&cdcpend->cursor, cdcpend->conn), @@ -165,19 +165,12 @@ static inline bool smc_cdc_before(u16 seq1, u16 seq2) } static void smc_cdc_msg_recv_action(struct smc_sock *smc, - struct smc_link *link, struct smc_cdc_msg *cdc) { union smc_host_cursor cons_old, prod_old; struct smc_connection *conn = &smc->conn; int diff_cons, diff_prod; - if (!cdc->prod_flags.failover_validation) { - if (smc_cdc_before(ntohs(cdc->seqno), - conn->local_rx_ctrl.seqno)) - /* received seqno is old */ - return; - } smc_curs_write(&prod_old, smc_curs_read(&conn->local_rx_ctrl.prod, conn), conn); @@ -198,13 +191,13 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smp_mb__after_atomic(); } - diff_prod = smc_curs_diff(conn->rmbe_size, &prod_old, + diff_prod = smc_curs_diff(conn->rmb_desc->len, &prod_old, &conn->local_rx_ctrl.prod); if (diff_prod) { /* bytes_to_rcv is decreased in smc_recvmsg */ smp_mb__before_atomic(); atomic_add(diff_prod, &conn->bytes_to_rcv); - /* guarantee 0 <= bytes_to_rcv <= rmbe_size */ + /* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */ smp_mb__after_atomic(); smc->sk.sk_data_ready(&smc->sk); } else if ((conn->local_rx_ctrl.prod_flags.write_blocked) || @@ -236,26 +229,11 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, } /* called under tasklet context */ -static inline void smc_cdc_msg_recv(struct smc_cdc_msg *cdc, - struct smc_link *link, u64 wr_id) +static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc) { - struct smc_link_group *lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); - struct smc_connection *connection; - struct smc_sock *smc; - - /* lookup connection */ - read_lock_bh(&lgr->conns_lock); - connection = smc_lgr_find_conn(ntohl(cdc->token), lgr); - if (!connection) { - read_unlock_bh(&lgr->conns_lock); - return; - } - smc = container_of(connection, struct smc_sock, conn); sock_hold(&smc->sk); - read_unlock_bh(&lgr->conns_lock); bh_lock_sock(&smc->sk); - smc_cdc_msg_recv_action(smc, link, cdc); + smc_cdc_msg_recv_action(smc, cdc); bh_unlock_sock(&smc->sk); sock_put(&smc->sk); /* no free sk in softirq-context */ } @@ -266,12 +244,31 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) { struct smc_link *link = (struct smc_link *)wc->qp->qp_context; struct smc_cdc_msg *cdc = buf; + struct smc_connection *conn; + struct smc_link_group *lgr; + struct smc_sock *smc; if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved)) return; /* short message */ if (cdc->len != SMC_WR_TX_SIZE) return; /* invalid message */ - smc_cdc_msg_recv(cdc, link, wc->wr_id); + + /* lookup connection */ + lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + read_lock_bh(&lgr->conns_lock); + conn = smc_lgr_find_conn(ntohl(cdc->token), lgr); + read_unlock_bh(&lgr->conns_lock); + if (!conn) + return; + smc = container_of(conn, struct smc_sock, conn); + + if (!cdc->prod_flags.failover_validation) { + if (smc_cdc_before(ntohs(cdc->seqno), + conn->local_rx_ctrl.seqno)) + /* received seqno is old */ + return; + } + smc_cdc_msg_recv(smc, cdc); } static struct smc_wr_rx_handler smc_cdc_rx_handlers[] = { diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 236cb3f12c71..717449b1da0b 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -442,7 +442,7 @@ int smc_clc_send_confirm(struct smc_sock *smc) hton24(cclc.qpn, link->roce_qp->qp_num); cclc.rmb_rkey = htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); - cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ + cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ cclc.rmbe_alert_token = htonl(conn->alert_token_local); cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); cclc.rmbe_size = conn->rmbe_size_short; @@ -494,7 +494,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) hton24(aclc.qpn, link->roce_qp->qp_num); aclc.rmb_rkey = htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); - aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ + aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */ aclc.rmbe_alert_token = htonl(conn->alert_token_local); aclc.qp_mtu = link->path_mtu; aclc.rmbe_size = conn->rmbe_size_short, diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 63bf1dc2c1f9..41ff9ea96139 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -97,7 +97,7 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_local lcl; u8 qpn[3]; /* QP number */ __be32 rmb_rkey; /* RMB rkey */ - u8 conn_idx; /* Connection index, which RMBE in RMB */ + u8 rmbe_idx; /* Index of RMBE in RMB */ __be32 rmbe_alert_token;/* unique connection id */ #if defined(__BIG_ENDIAN_BITFIELD) u8 rmbe_size : 4, /* RMBE buf size (compressed notation) */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 08c05cd0bbae..1e5c0e90a706 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -30,10 +30,14 @@ #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10) -static u32 smc_lgr_num; /* unique link group number */ +static struct smc_lgr_list smc_lgr_list = { /* established link groups */ + .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), + .list = LIST_HEAD_INIT(smc_lgr_list.list), + .num = 0, +}; -static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, - bool is_rmb); +static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, + struct smc_buf_desc *buf_desc); static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) { @@ -181,8 +185,8 @@ static int smc_lgr_create(struct smc_sock *smc, INIT_LIST_HEAD(&lgr->sndbufs[i]); INIT_LIST_HEAD(&lgr->rmbs[i]); } - smc_lgr_num += SMC_LGR_NUM_INCR; - memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE); + smc_lgr_list.num += SMC_LGR_NUM_INCR; + memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); lgr->conns_all = RB_ROOT; @@ -236,26 +240,21 @@ out: static void smc_buf_unuse(struct smc_connection *conn) { - if (conn->sndbuf_desc) { + if (conn->sndbuf_desc) conn->sndbuf_desc->used = 0; - conn->sndbuf_size = 0; - } if (conn->rmb_desc) { if (!conn->rmb_desc->regerr) { conn->rmb_desc->reused = 1; conn->rmb_desc->used = 0; - conn->rmbe_size = 0; } else { /* buf registration failed, reuse not possible */ struct smc_link_group *lgr = conn->lgr; - struct smc_link *lnk; write_lock_bh(&lgr->rmbs_lock); list_del(&conn->rmb_desc->list); write_unlock_bh(&lgr->rmbs_lock); - lnk = &lgr->lnk[SMC_SINGLE_LINK]; - smc_buf_free(conn->rmb_desc, lnk, true); + smc_buf_free(lgr, true, conn->rmb_desc); } } } @@ -281,9 +280,11 @@ static void smc_link_clear(struct smc_link *lnk) smc_wr_free_link_mem(lnk); } -static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, - bool is_rmb) +static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, + struct smc_buf_desc *buf_desc) { + struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + if (is_rmb) { if (buf_desc->mr_rx[SMC_SINGLE_LINK]) smc_ib_put_memory_region( @@ -302,7 +303,6 @@ static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) { - struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; struct smc_buf_desc *buf_desc, *bf_desc; struct list_head *buf_list; int i; @@ -315,7 +315,7 @@ static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) list_for_each_entry_safe(buf_desc, bf_desc, buf_list, list) { list_del(&buf_desc->list); - smc_buf_free(buf_desc, lnk, is_rmb); + smc_buf_free(lgr, is_rmb, buf_desc); } } } @@ -377,6 +377,18 @@ void smc_lgr_terminate(struct smc_link_group *lgr) smc_lgr_schedule_free_work(lgr); } +/* Called when IB port is terminated */ +void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) +{ + struct smc_link_group *lgr, *l; + + list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { + if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && + lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) + smc_lgr_terminate(lgr); + } +} + /* Determine vlan of internal TCP socket. * @vlan_id: address to store the determined vlan id into */ @@ -461,10 +473,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_clc_msg_local *lcl, int srv_first_contact) { struct smc_connection *conn = &smc->conn; + int local_contact = SMC_FIRST_CONTACT; struct smc_link_group *lgr; unsigned short vlan_id; enum smc_lgr_role role; - int local_contact = SMC_FIRST_CONTACT; int rc = 0; role = smc->listen_smc ? SMC_SERV : SMC_CLNT; @@ -530,14 +542,39 @@ out: return rc ? rc : local_contact; } +/* convert the RMB size into the compressed notation - minimum 16K. + * In contrast to plain ilog2, this rounds towards the next power of 2, + * so the socket application gets at least its desired sndbuf / rcvbuf size. + */ +static u8 smc_compress_bufsize(int size) +{ + u8 compressed; + + if (size <= SMC_BUF_MIN_SIZE) + return 0; + + size = (size - 1) >> 14; + compressed = ilog2(size) + 1; + if (compressed >= SMC_RMBE_SIZES) + compressed = SMC_RMBE_SIZES - 1; + return compressed; +} + +/* convert the RMB size from compressed notation into integer */ +int smc_uncompress_bufsize(u8 compressed) +{ + u32 size; + + size = 0x00000001 << (((int)compressed) + 14); + return (int)size; +} + /* try to reuse a sndbuf or rmb description slot for a certain * buffer size; if not available, return NULL */ -static inline -struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr, - int compressed_bufsize, - rwlock_t *lock, - struct list_head *buf_list) +static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, + rwlock_t *lock, + struct list_head *buf_list) { struct smc_buf_desc *buf_slot; @@ -589,7 +626,7 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, GFP_KERNEL); if (rc) { - smc_buf_free(buf_desc, lnk, is_rmb); + smc_buf_free(lgr, is_rmb, buf_desc); return ERR_PTR(rc); } sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, @@ -600,7 +637,7 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); /* SMC protocol depends on mapping to one DMA address only */ if (rc != 1) { - smc_buf_free(buf_desc, lnk, is_rmb); + smc_buf_free(lgr, is_rmb, buf_desc); return ERR_PTR(-EAGAIN); } @@ -611,19 +648,20 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, IB_ACCESS_LOCAL_WRITE, buf_desc); if (rc) { - smc_buf_free(buf_desc, lnk, is_rmb); + smc_buf_free(lgr, is_rmb, buf_desc); return ERR_PTR(rc); } } + buf_desc->len = bufsize; return buf_desc; } static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) { + struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); struct smc_connection *conn = &smc->conn; struct smc_link_group *lgr = conn->lgr; - struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); struct list_head *buf_list; int bufsize, bufsize_short; int sk_buf_size; @@ -651,7 +689,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) continue; /* check for reusable slot in the link group */ - buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list); + buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); if (buf_desc) { memset(buf_desc->cpu_addr, 0, bufsize); break; /* found reusable slot */ @@ -675,14 +713,12 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) if (is_rmb) { conn->rmb_desc = buf_desc; - conn->rmbe_size = bufsize; conn->rmbe_size_short = bufsize_short; smc->sk.sk_rcvbuf = bufsize * 2; atomic_set(&conn->bytes_to_rcv, 0); conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); } else { conn->sndbuf_desc = buf_desc; - conn->sndbuf_size = bufsize; smc->sk.sk_sndbuf = bufsize * 2; atomic_set(&conn->sndbuf_space, bufsize); } @@ -738,8 +774,7 @@ int smc_buf_create(struct smc_sock *smc) /* create rmb */ rc = __smc_buf_create(smc, true); if (rc) - smc_buf_free(smc->conn.sndbuf_desc, - &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false); + smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); return rc; } @@ -806,3 +841,21 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn, return conn->rtoken_idx; return 0; } + +/* Called (from smc_exit) when module is removed */ +void smc_core_exit(void) +{ + struct smc_link_group *lgr, *lg; + LIST_HEAD(lgr_freeing_list); + + spin_lock_bh(&smc_lgr_list.lock); + if (!list_empty(&smc_lgr_list.list)) + list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); + spin_unlock_bh(&smc_lgr_list.lock); + list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { + list_del_init(&lgr->list); + smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); + cancel_delayed_work_sync(&lgr->free_work); + smc_lgr_free(lgr); /* free link group */ + } +} diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 845dc073de13..93cb3523bf50 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -23,10 +23,9 @@ struct smc_lgr_list { /* list of link group definition */ struct list_head list; spinlock_t lock; /* protects list of link groups */ + u32 num; /* unique link group number */ }; -extern struct smc_lgr_list smc_lgr_list; /* list of link groups */ - enum smc_lgr_role { /* possible roles of a link group */ SMC_CLNT, /* client */ SMC_SERV /* server */ @@ -124,6 +123,7 @@ struct smc_buf_desc { struct list_head list; void *cpu_addr; /* virtual address of buffer */ struct page *pages; + int len; /* length of buffer */ struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */ struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; /* for rmb only: memory region @@ -141,6 +141,12 @@ struct smc_rtoken { /* address/key of remote RMB */ }; #define SMC_LGR_ID_SIZE 4 +#define SMC_BUF_MIN_SIZE 16384 /* minimum size of an RMB */ +#define SMC_RMBE_SIZES 16 /* number of distinct RMBE sizes */ +/* theoretically, the RFC states that largest size would be 512K, + * i.e. compressed 5 and thus 6 sizes (0..5), despite + * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15) + */ struct smc_link_group { struct list_head list; @@ -205,11 +211,14 @@ static inline struct smc_connection *smc_lgr_find_conn( struct smc_sock; struct smc_clc_msg_accept_confirm; +struct smc_clc_msg_local; void smc_lgr_free(struct smc_link_group *lgr); void smc_lgr_forget(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr); +void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); int smc_buf_create(struct smc_sock *smc); +int smc_uncompress_bufsize(u8 compressed); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc); int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey); @@ -218,4 +227,9 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); void smc_rmb_sync_sg_for_device(struct smc_connection *conn); +void smc_conn_free(struct smc_connection *conn); +int smc_conn_create(struct smc_sock *smc, + struct smc_ib_device *smcibdev, u8 ibport, + struct smc_clc_msg_local *lcl, int srv_first_contact); +void smc_core_exit(void); #endif diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 05dd7e6d314d..839354402215 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -101,8 +101,9 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, struct smc_connection *conn = &smc->conn; struct smc_diag_conninfo cinfo = { .token = conn->alert_token_local, - .sndbuf_size = conn->sndbuf_size, - .rmbe_size = conn->rmbe_size, + .sndbuf_size = conn->sndbuf_desc ? + conn->sndbuf_desc->len : 0, + .rmbe_size = conn->rmb_desc ? conn->rmb_desc->len : 0, .peer_rmbe_size = conn->peer_rmbe_size, .rx_prod.wrap = conn->local_rx_ctrl.prod.wrap, diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 26df554f7588..0eed7ab9f28b 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -143,17 +143,6 @@ out: return rc; } -static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) -{ - struct smc_link_group *lgr, *l; - - list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { - if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && - lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) - smc_lgr_terminate(lgr); - } -} - /* process context wrapper for might_sleep smc_ib_remember_port_attr */ static void smc_ib_port_event_work(struct work_struct *work) { @@ -165,7 +154,7 @@ static void smc_ib_port_event_work(struct work_struct *work) smc_ib_remember_port_attr(smcibdev, port_idx + 1); clear_bit(port_idx, &smcibdev->port_event_mask); if (!smc_ib_port_active(smcibdev, port_idx + 1)) - smc_ib_port_terminate(smcibdev, port_idx + 1); + smc_port_terminate(smcibdev, port_idx + 1); } } diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index ed45569289f5..290a434471d1 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -51,7 +51,7 @@ static void smc_rx_wake_up(struct sock *sk) static void smc_rx_update_consumer(struct smc_connection *conn, union smc_host_cursor cons, size_t len) { - smc_curs_add(conn->rmbe_size, &cons, len); + smc_curs_add(conn->rmb_desc->len, &cons, len); smc_curs_write(&conn->local_tx_ctrl.cons, smc_curs_read(&cons, conn), conn); /* send consumer cursor update if required */ @@ -288,11 +288,11 @@ copy: conn); /* subsequent splice() calls pick up where previous left */ if (splbytes) - smc_curs_add(conn->rmbe_size, &cons, splbytes); + smc_curs_add(conn->rmb_desc->len, &cons, splbytes); /* determine chunks where to read from rcvbuf */ /* either unwrapped case, or 1st chunk of wrapped case */ - chunk_len = min_t(size_t, - copylen, conn->rmbe_size - cons.count); + chunk_len = min_t(size_t, copylen, conn->rmb_desc->len - + cons.count); chunk_len_sum = chunk_len; chunk_off = cons.count; smc_rmb_sync_sg_for_cpu(conn); @@ -331,7 +331,7 @@ copy: /* increased in recv tasklet smc_cdc_msg_rcv() */ smp_mb__before_atomic(); atomic_sub(copylen, &conn->bytes_to_rcv); - /* guarantee 0 <= bytes_to_rcv <= rmbe_size */ + /* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */ smp_mb__after_atomic(); if (msg) smc_rx_update_consumer(conn, cons, copylen); diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 08a7de98bb03..1f4a38b857f0 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -180,8 +180,8 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) tx_cnt_prep = prep.count; /* determine chunks where to write into sndbuf */ /* either unwrapped case, or 1st chunk of wrapped case */ - chunk_len = min_t(size_t, - copylen, conn->sndbuf_size - tx_cnt_prep); + chunk_len = min_t(size_t, copylen, conn->sndbuf_desc->len - + tx_cnt_prep); chunk_len_sum = chunk_len; chunk_off = tx_cnt_prep; smc_sndbuf_sync_sg_for_cpu(conn); @@ -206,21 +206,21 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) } smc_sndbuf_sync_sg_for_device(conn); /* update cursors */ - smc_curs_add(conn->sndbuf_size, &prep, copylen); + smc_curs_add(conn->sndbuf_desc->len, &prep, copylen); smc_curs_write(&conn->tx_curs_prep, smc_curs_read(&prep, conn), conn); /* increased in send tasklet smc_cdc_tx_handler() */ smp_mb__before_atomic(); atomic_sub(copylen, &conn->sndbuf_space); - /* guarantee 0 <= sndbuf_space <= sndbuf_size */ + /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ smp_mb__after_atomic(); /* since we just produced more new data into sndbuf, * trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) && (atomic_read(&conn->sndbuf_space) > - (conn->sndbuf_size >> 1))) + (conn->sndbuf_desc->len >> 1))) /* for a corked socket defer the RDMA writes if there * is still sufficient sndbuf_space available */ @@ -261,7 +261,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, rdma_wr.remote_addr = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr + /* RMBE within RMB */ - ((conn->peer_conn_idx - 1) * conn->peer_rmbe_size) + + conn->tx_off + /* offset within RMBE */ peer_rmbe_offset; rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; @@ -286,7 +286,7 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn, atomic_sub(len, &conn->peer_rmbe_space); /* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */ smp_mb__after_atomic(); - smc_curs_add(conn->sndbuf_size, sent, len); + smc_curs_add(conn->sndbuf_desc->len, sent, len); } /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; @@ -309,7 +309,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn); smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn); /* cf. wmem_alloc - (snd_max - snd_una) */ - to_send = smc_curs_diff(conn->sndbuf_size, &sent, &prep); + to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); if (to_send <= 0) return 0; @@ -351,12 +351,12 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) dst_len_sum = dst_len; src_off = sent.count; /* dst_len determines the maximum src_len */ - if (sent.count + dst_len <= conn->sndbuf_size) { + if (sent.count + dst_len <= conn->sndbuf_desc->len) { /* unwrapped src case: single chunk of entire dst_len */ src_len = dst_len; } else { /* wrapped src case: 2 chunks of sum dst_len; start with 1st: */ - src_len = conn->sndbuf_size - sent.count; + src_len = conn->sndbuf_desc->len - sent.count; } src_len_sum = src_len; dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); @@ -368,8 +368,8 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; num_sges++; src_off += src_len; - if (src_off >= conn->sndbuf_size) - src_off -= conn->sndbuf_size; + if (src_off >= conn->sndbuf_desc->len) + src_off -= conn->sndbuf_desc->len; /* modulo in send ring */ if (src_len_sum == dst_len) break; /* either on 1st or 2nd iteration */ @@ -387,7 +387,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) dst_len = len - dst_len; /* remainder */ dst_len_sum += dst_len; src_len = min_t(int, - dst_len, conn->sndbuf_size - sent.count); + dst_len, conn->sndbuf_desc->len - sent.count); src_len_sum = src_len; } @@ -484,11 +484,11 @@ void smc_tx_consumer_update(struct smc_connection *conn) smc_curs_write(&cfed, smc_curs_read(&conn->rx_curs_confirmed, conn), conn); - to_confirm = smc_curs_diff(conn->rmbe_size, &cfed, &cons); + to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons); if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || ((to_confirm > conn->rmbe_update_limit) && - ((to_confirm > (conn->rmbe_size / 2)) || + ((to_confirm > (conn->rmb_desc->len / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && conn->alert_token_local) { /* connection healthy */ diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h index 8f64b12bf03c..44d077942976 100644 --- a/net/smc/smc_tx.h +++ b/net/smc/smc_tx.h @@ -24,7 +24,7 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn) smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn); smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn); - return smc_curs_diff(conn->sndbuf_size, &sent, &prep); + return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); } void smc_tx_work(struct work_struct *work); |