From bbac1c94880cb8c7e093718897f4822f3933dd3c Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 1 Jun 2015 22:48:34 -0700 Subject: s390/bpf: fix stack allocation On s390x we have to provide 160 bytes stack space before we can call the next function. From the 160 bytes that we got from the previous function we only use 11 * 8 bytes and have 160 - 11 * 8 bytes left. Currently for BPF we allocate additional 160 - 11 * 8 bytes for the next function. This is wrong because then the next function only gets: (160 - 11 * 8) + (160 - 11 * 8) = 2 * 72 = 144 bytes Fix this and allocate enough memory for the next function. Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Michael Holzheu Acked-by: Heiko Carstens Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h index ba8593a515ba..de156ba3bd71 100644 --- a/arch/s390/net/bpf_jit.h +++ b/arch/s390/net/bpf_jit.h @@ -48,7 +48,9 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; * We get 160 bytes stack space from calling function, but only use * 11 * 8 byte (old backchain + r15 - r6) for storing registers. */ -#define STK_OFF (MAX_BPF_STACK + 8 + 4 + 4 + (160 - 11 * 8)) +#define STK_SPACE (MAX_BPF_STACK + 8 + 4 + 4 + 160) +#define STK_160_UNUSED (160 - 11 * 8) +#define STK_OFF (STK_SPACE - STK_160_UNUSED) #define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */ #define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */ -- cgit v1.2.3-55-g7522 From 88aeca15d637c279171ba441730ef41e4c4ce0ed Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 1 Jun 2015 22:48:35 -0700 Subject: s390/bpf: fix bpf frame pointer setup Currently the bpf frame pointer is set to the old r15. This is wrong because of packed stack. Fix this and adjust the frame pointer to respect packed stack. This now generates a prolog like the following: 3ff8001c3fa: eb67f0480024 stmg %r6,%r7,72(%r15) 3ff8001c400: ebcff0780024 stmg %r12,%r15,120(%r15) 3ff8001c406: b904001f lgr %r1,%r15 <- load backchain 3ff8001c40a: 41d0f048 la %r13,72(%r15) <- load adjusted bfp 3ff8001c40e: a7fbfd98 aghi %r15,-616 3ff8001c412: e310f0980024 stg %r1,152(%r15) <- save backchain Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Michael Holzheu Acked-by: Heiko Carstens Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 20c146d1251a..55423d8be580 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -384,13 +384,16 @@ static void bpf_jit_prologue(struct bpf_jit *jit) } /* Setup stack and backchain */ if (jit->seen & SEEN_STACK) { - /* lgr %bfp,%r15 (BPF frame pointer) */ - EMIT4(0xb9040000, BPF_REG_FP, REG_15); + if (jit->seen & SEEN_FUNC) + /* lgr %w1,%r15 (backchain) */ + EMIT4(0xb9040000, REG_W1, REG_15); + /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ + EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); /* aghi %r15,-STK_OFF */ EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF); if (jit->seen & SEEN_FUNC) - /* stg %bfp,152(%r15) (backchain) */ - EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_FP, REG_0, + /* stg %w1,152(%r15) (backchain) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, 152); } /* -- cgit v1.2.3-55-g7522 From 640b2b107cec23c754214b62a811465fa8f9257f Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 2 Jun 2015 14:36:34 +0200 Subject: openvswitch: disable LRO Currently, openvswitch tries to disable LRO from the user space. This does not work correctly when the device added is a vlan interface, though. Instead of dealing with possibly complex stacked cross name space relations in the user space, do the same as bridging does and call dev_disable_lro in the kernel. Signed-off-by: Jiri Benc Acked-by: Flavio Leitner Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/vport-netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 4776282c6417..33e6d6e2908f 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -125,6 +125,7 @@ static struct vport *netdev_create(const struct vport_parms *parms) if (err) goto error_master_upper_dev_unlink; + dev_disable_lro(netdev_vport->dev); dev_set_promiscuity(netdev_vport->dev, 1); netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; rtnl_unlock(); -- cgit v1.2.3-55-g7522 From 6e540309326188f769e03bb4c6dd8ff6752930c2 Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Wed, 3 Jun 2015 16:27:38 -0500 Subject: ipv4/udp: Verify multicast group is ours in upd_v4_early_demux() 421b3885bf6d56391297844f43fb7154a6396e12 "udp: ipv4: Add udp early demux" introduced a regression that allowed sockets bound to INADDR_ANY to receive packets from multicast groups that the socket had not joined. For example a socket that had joined 224.168.2.9 could also receive packets from 225.168.2.9 despite not having joined that group if ip_early_demux is enabled. Fix this by calling ip_check_mc_rcu() in udp_v4_early_demux() to verify that the multicast packet is indeed ours. Signed-off-by: Shawn Bohrer Reported-by: Yurij M. Plotnikov Signed-off-by: David S. Miller --- net/ipv4/udp.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1c92ea67baef..83aa604f9273 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include #include @@ -1960,6 +1961,7 @@ void udp_v4_early_demux(struct sk_buff *skb) struct sock *sk; struct dst_entry *dst; int dif = skb->dev->ifindex; + int ours; /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) @@ -1969,14 +1971,24 @@ void udp_v4_early_demux(struct sk_buff *skb) uh = udp_hdr(skb); if (skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) + skb->pkt_type == PACKET_MULTICAST) { + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + + if (!in_dev) + return; + + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return; sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); - else if (skb->pkt_type == PACKET_HOST) + } else if (skb->pkt_type == PACKET_HOST) { sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); - else + } else { return; + } if (!sk) return; -- cgit v1.2.3-55-g7522 From 30520831f058cd9d75c0f6b360bc5c5ae49b5f27 Mon Sep 17 00:00:00 2001 From: Anjali Singhai Jain Date: Fri, 8 May 2015 15:35:52 -0700 Subject: i40e/i40evf: Fix mixed size frags and linearization This patch fixes a bug where the i40e Tx queue will hang if this skb is passed to the driver. With mixed size fragments while using TSO there was a corner case where we needed to linearize but we were not. This was seen with iSCSI traffic and could be reproduced with a frag list that looks like this: num_frags = 17, gso_segs = 17, hdr_len = 66, skb_shinfo(skb)->gso_size = 1448 size = 3002, j = 1, frag_size = 2936, num_frags = 17 size = 4268, j = 1, frag_size = 4096, num_frags = 16 size = 5534, j = 1, frag_size = 4096, num_frags = 15 size = 5352, j = 1, frag_size = 4096, num_frags = 14 size = 5170, j = 1, frag_size = 4096, num_frags = 13 size = 3468, j = 1, frag_size = 2576, num_frags = 12 size = 750, j = 1, frag_size = 112, num_frags = 11 size = 862, j = 2, frag_size = 112, num_frags = 10 size = 974, j = 3, frag_size = 112, num_frags = 9 size = 1126, j = 4, frag_size = 152, num_frags = 8 size = 1330, j = 5, frag_size = 204, num_frags = 7 size = 1534, j = 6, frag_size = 204, num_frags = 6 size = 356, j = 1, frag_size = 204, num_frags = 5 size = 560, j = 2, frag_size = 204, num_frags = 4 size = 764, j = 3, frag_size = 204, num_frags = 3 size = 968, j = 4, frag_size = 204, num_frags = 2 size = 1140, j = 5, frag_size = 172, num_frags = 1 result: linearize = 0, j = 6 Change-ID: I79bb1aeab0af255fe2ce28e93672a85d85bf47e8 Signed-off-by: Anjali Singhai Jain Signed-off-by: Jesse Brandeburg Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 25 ++++++++++--------------- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 25 ++++++++++--------------- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 4bd3a80aba82..9d95042d5a0f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2410,14 +2410,12 @@ static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) * i40e_chk_linearize - Check if there are more than 8 fragments per packet * @skb: send buffer * @tx_flags: collected send information - * @hdr_len: size of the packet header * * Note: Our HW can't scatter-gather more than 8 fragments to build * a packet on the wire and so we need to figure out the cases where we * need to linearize the skb. **/ -static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, - const u8 hdr_len) +static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags) { struct skb_frag_struct *frag; bool linearize = false; @@ -2429,7 +2427,7 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, gso_segs = skb_shinfo(skb)->gso_segs; if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { - u16 j = 1; + u16 j = 0; if (num_frags < (I40E_MAX_BUFFER_TXD)) goto linearize_chk_done; @@ -2440,21 +2438,18 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, goto linearize_chk_done; } frag = &skb_shinfo(skb)->frags[0]; - size = hdr_len; /* we might still have more fragments per segment */ do { size += skb_frag_size(frag); frag++; j++; + if ((size >= skb_shinfo(skb)->gso_size) && + (j < I40E_MAX_BUFFER_TXD)) { + size = (size % skb_shinfo(skb)->gso_size); + j = (size) ? 1 : 0; + } if (j == I40E_MAX_BUFFER_TXD) { - if (size < skb_shinfo(skb)->gso_size) { - linearize = true; - break; - } - j = 1; - size -= skb_shinfo(skb)->gso_size; - if (size) - j++; - size += hdr_len; + linearize = true; + break; } num_frags--; } while (num_frags); @@ -2724,7 +2719,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, if (tsyn) tx_flags |= I40E_TX_FLAGS_TSYN; - if (i40e_chk_linearize(skb, tx_flags, hdr_len)) + if (i40e_chk_linearize(skb, tx_flags)) if (skb_linearize(skb)) goto out_drop; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index b077e02a0cc7..458fbb421090 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1619,14 +1619,12 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, * i40e_chk_linearize - Check if there are more than 8 fragments per packet * @skb: send buffer * @tx_flags: collected send information - * @hdr_len: size of the packet header * * Note: Our HW can't scatter-gather more than 8 fragments to build * a packet on the wire and so we need to figure out the cases where we * need to linearize the skb. **/ -static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, - const u8 hdr_len) +static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags) { struct skb_frag_struct *frag; bool linearize = false; @@ -1638,7 +1636,7 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, gso_segs = skb_shinfo(skb)->gso_segs; if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { - u16 j = 1; + u16 j = 0; if (num_frags < (I40E_MAX_BUFFER_TXD)) goto linearize_chk_done; @@ -1649,21 +1647,18 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, goto linearize_chk_done; } frag = &skb_shinfo(skb)->frags[0]; - size = hdr_len; /* we might still have more fragments per segment */ do { size += skb_frag_size(frag); frag++; j++; + if ((size >= skb_shinfo(skb)->gso_size) && + (j < I40E_MAX_BUFFER_TXD)) { + size = (size % skb_shinfo(skb)->gso_size); + j = (size) ? 1 : 0; + } if (j == I40E_MAX_BUFFER_TXD) { - if (size < skb_shinfo(skb)->gso_size) { - linearize = true; - break; - } - j = 1; - size -= skb_shinfo(skb)->gso_size; - if (size) - j++; - size += hdr_len; + linearize = true; + break; } num_frags--; } while (num_frags); @@ -1950,7 +1945,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (tso) tx_flags |= I40E_TX_FLAGS_TSO; - if (i40e_chk_linearize(skb, tx_flags, hdr_len)) + if (i40e_chk_linearize(skb, tx_flags)) if (skb_linearize(skb)) goto out_drop; -- cgit v1.2.3-55-g7522 From fc60861e9b00388fd11d7995a60bf0b1e61dba93 Mon Sep 17 00:00:00 2001 From: Anjali Singhai Jain Date: Fri, 8 May 2015 15:35:57 -0700 Subject: i40e: start up in VEPA mode by default The patch fixes a bug in the default configuration which prevented a software bridge loaded on the PF interface from working correctly because broadcast packets are incorrectly looped back. Fix the general case, by loading the driver in VEPA mode Until a VF or VMDq VSI is added. This way loopback on the Main VSI is turned off until needed and can resolve the issue of unnecessary reflection for users that do not have VF or VMDq VSIs setup. The driver must now coordinate the loopback setting for the Flow Director (FDIR) VSI to make sure it is in sync with the current VEB or VEPA mode setting. The user can still switch bridge modes from the bridge commands and choose to be in VEPA mode with VF VSIs. Because of hardware requirements, the call to switch to VEB mode when no VF/VMDqs are present will be rejected. NOTE: This patch uses BIT_ULL as that is preferred going forward, a followup patch in the lower priority queue to net-next will fix up the remaining 1 << usages. Change-ID: Ib121ddb18fe4b3c4f52e9deda6fcbeb9105683d1 Signed-off-by: Anjali Singhai Jain Signed-off-by: Jesse Brandeburg Tested-by: Jim Young Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 9 +++++++++ drivers/net/ethernet/intel/i40e/i40e_main.c | 21 +++++++++++++++++---- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 10 +++++++++- 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 33c35d3b7420..5d47307121ab 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -317,6 +317,7 @@ struct i40e_pf { #endif #define I40E_FLAG_PORT_ID_VALID (u64)(1 << 28) #define I40E_FLAG_DCB_CAPABLE (u64)(1 << 29) +#define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40) /* tracks features that get auto disabled by errors */ u64 auto_disable_flags; diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 34170eabca7d..da0faf478af0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1021,6 +1021,15 @@ static ssize_t i40e_dbg_command_write(struct file *filp, goto command_write_done; } + /* By default we are in VEPA mode, if this is the first VF/VMDq + * VSI to be added switch to VEB mode. + */ + if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { + pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + i40e_do_reset_safe(pf, + BIT_ULL(__I40E_PF_RESET_REQUESTED)); + } + vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0); if (vsi) dev_info(&pf->pdev->dev, "added VSI %d to relay %d\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a54c14491e3b..853eb2f7e558 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6097,6 +6097,10 @@ static int i40e_reconstitute_veb(struct i40e_veb *veb) if (ret) goto end_reconstitute; + if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) + veb->bridge_mode = BRIDGE_MODE_VEB; + else + veb->bridge_mode = BRIDGE_MODE_VEPA; i40e_config_bridge_mode(veb); /* create the remaining VSIs attached to this VEB */ @@ -8031,7 +8035,12 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, } else if (mode != veb->bridge_mode) { /* Existing HW bridge but different mode needs reset */ veb->bridge_mode = mode; - i40e_do_reset(pf, (1 << __I40E_PF_RESET_REQUESTED)); + /* TODO: If no VFs or VMDq VSIs, disallow VEB mode */ + if (mode == BRIDGE_MODE_VEB) + pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + else + pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; + i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED)); break; } } @@ -8343,11 +8352,12 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) ctxt.uplink_seid = vsi->uplink_seid; ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; ctxt.flags = I40E_AQ_VSI_TYPE_PF; - if (i40e_is_vsi_uplink_mode_veb(vsi)) { + if ((pf->flags & I40E_FLAG_VEB_MODE_ENABLED) && + (i40e_is_vsi_uplink_mode_veb(vsi))) { ctxt.info.valid_sections |= - cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); ctxt.info.switch_id = - cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); + cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); } i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true); break; @@ -8746,6 +8756,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, __func__); return NULL; } + /* We come up by default in VEPA mode */ + veb->bridge_mode = BRIDGE_MODE_VEPA; + pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; i40e_config_bridge_mode(veb); } for (i = 0; i < I40E_MAX_VEB && !veb; i++) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 78d1c4ff565e..4e9376da0518 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1018,11 +1018,19 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) { struct i40e_pf *pf = pci_get_drvdata(pdev); - if (num_vfs) + if (num_vfs) { + if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { + pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + i40e_do_reset_safe(pf, + BIT_ULL(__I40E_PF_RESET_REQUESTED)); + } return i40e_pci_sriov_enable(pdev, num_vfs); + } if (!pci_vfs_assigned(pf->pdev)) { i40e_free_vfs(pf); + pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; + i40e_do_reset_safe(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED)); } else { dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); return -EINVAL; -- cgit v1.2.3-55-g7522 From fa11cb3d16a9b9b296a2b811a49faf1356240348 Mon Sep 17 00:00:00 2001 From: Anjali Singhai Jain Date: Wed, 27 May 2015 12:06:14 -0400 Subject: i40e: Make sure to be in VEB mode if SRIOV is enabled at probe If SRIOV is enabled we need to be in VEB mode not VEPA mode at probe. This fixes an NPAR bug when SRIOV is enabled in the BIOS. Change-ID: Ibf006abafd9a0ca3698ec24848cd771cf345cbbc Signed-off-by: Anjali Singhai Jain Tested-by: Jim Young Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 853eb2f7e558..5b5bea159bd5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8756,9 +8756,14 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, __func__); return NULL; } - /* We come up by default in VEPA mode */ - veb->bridge_mode = BRIDGE_MODE_VEPA; - pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; + /* We come up by default in VEPA mode if SRIOV is not + * already enabled, in which case we can't force VEPA + * mode. + */ + if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { + veb->bridge_mode = BRIDGE_MODE_VEPA; + pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; + } i40e_config_bridge_mode(veb); } for (i = 0; i < I40E_MAX_VEB && !veb; i++) { @@ -9869,6 +9874,15 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_switch_setup; } +#ifdef CONFIG_PCI_IOV + /* prep for VF support */ + if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && + (pf->flags & I40E_FLAG_MSIX_ENABLED) && + !test_bit(__I40E_BAD_EEPROM, &pf->state)) { + if (pci_num_vf(pdev)) + pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + } +#endif err = i40e_setup_pf_switch(pf, false); if (err) { dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); -- cgit v1.2.3-55-g7522 From 6d7954130c8d7100b7aba3c986fc4eefedf1a1ad Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 6 Jun 2015 22:07:23 +0200 Subject: rhashtable: add missing import rhashtable uses EXPORT_SYMBOL_GPL() without importing linux/export.h directly it is only imported indirectly through some other includes. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- lib/rhashtable.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4396434e4715..8609378e6505 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -26,6 +26,7 @@ #include #include #include +#include #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U -- cgit v1.2.3-55-g7522 From 078b29d7e92e4254b6de16097d0369dde17efe21 Mon Sep 17 00:00:00 2001 From: Lendacky, Thomas Date: Fri, 5 Jun 2015 16:02:26 -0500 Subject: amd-xgbe: Use disable_irq_nosync from within timer function Since the Tx timer function runs in softirq context the driver needs to call disable_irq_nosync instead of a disable_irq. Reported-by: Josh Stone Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index db84ddcfec84..9fd6c69a8bac 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -423,7 +423,7 @@ static void xgbe_tx_timer(unsigned long data) if (napi_schedule_prep(napi)) { /* Disable Tx and Rx interrupts */ if (pdata->per_channel_irq) - disable_irq(channel->dma_irq); + disable_irq_nosync(channel->dma_irq); else xgbe_disable_rx_tx_ints(pdata); -- cgit v1.2.3-55-g7522 From 1d7c49037b12016e7056b9f2c990380e2187e766 Mon Sep 17 00:00:00 2001 From: Wilson Kok Date: Fri, 5 Jun 2015 00:52:57 -0700 Subject: bridge: use _bh spinlock variant for br_fdb_update to avoid lockup br_fdb_update() can be called in process context in the following way: br_fdb_add() -> __br_fdb_add() -> br_fdb_update() (if NTF_USE flag is set) so we need to use spin_lock_bh because there are softirq users of the hash_lock. One easy way to reproduce this is to modify the bridge utility to set NTF_USE, enable stp and then set maxageing to a low value so br_fdb_cleanup() is called frequently and then just add new entries in a loop. This happens because br_fdb_cleanup() is called from timer/softirq context. These locks were _bh before commit f8ae737deea1 ("[BRIDGE]: forwarding remove unneeded preempt and bh diasables") and at the time that commit was correct because br_fdb_update() couldn't be called from process context, but that changed after commit: 292d1398983f ("bridge: add NTF_USE support") Signed-off-by: Wilson Kok Signed-off-by: Nikolay Aleksandrov Fixes: 292d1398983f ("bridge: add NTF_USE support") Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e0670d7054f9..7eacc8ae9779 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -569,7 +569,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb_notify(br, fdb, RTM_NEWNEIGH); } } else { - spin_lock(&br->hash_lock); + spin_lock_bh(&br->hash_lock); if (likely(!fdb_find(head, addr, vid))) { fdb = fdb_create(head, source, addr, vid); if (fdb) { @@ -581,7 +581,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, /* else we lose race and someone else inserts * it first, don't bother updating */ - spin_unlock(&br->hash_lock); + spin_unlock_bh(&br->hash_lock); } } -- cgit v1.2.3-55-g7522 From e51000db4c880165eab06ec0990605f24e75203f Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Fri, 5 Jun 2015 15:33:59 +0530 Subject: be2net: Replace dma/pci_alloc_coherent() calls with dma_zalloc_coherent() There are several places in the driver (all in control paths) where coherent dma memory is being allocated using either dma_alloc_coherent() or the deprecated pci_alloc_consistent(). All these calls should be changed to use dma_zalloc_coherent() to avoid uninitialized fields in data structures backed by this memory. Reported-by: Joerg Roedel Tested-by: Joerg Roedel Signed-off-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 87 +++++++++++++++----------- drivers/net/ethernet/emulex/benet/be_ethtool.c | 18 +++--- drivers/net/ethernet/emulex/benet/be_main.c | 15 +++-- 3 files changed, 67 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index fb140faeafb1..c5e1d0ac75f9 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1720,9 +1720,9 @@ int be_cmd_get_regs(struct be_adapter *adapter, u32 buf_len, void *buf) total_size = buf_len; get_fat_cmd.size = sizeof(struct be_cmd_req_get_fat) + 60*1024; - get_fat_cmd.va = pci_alloc_consistent(adapter->pdev, - get_fat_cmd.size, - &get_fat_cmd.dma); + get_fat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, + get_fat_cmd.size, + &get_fat_cmd.dma, GFP_ATOMIC); if (!get_fat_cmd.va) { dev_err(&adapter->pdev->dev, "Memory allocation failure while reading FAT data\n"); @@ -1767,8 +1767,8 @@ int be_cmd_get_regs(struct be_adapter *adapter, u32 buf_len, void *buf) log_offset += buf_size; } err: - pci_free_consistent(adapter->pdev, get_fat_cmd.size, - get_fat_cmd.va, get_fat_cmd.dma); + dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size, + get_fat_cmd.va, get_fat_cmd.dma); spin_unlock_bh(&adapter->mcc_lock); return status; } @@ -2215,12 +2215,12 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, return -EINVAL; cmd.size = sizeof(struct be_cmd_resp_port_type); - cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); + cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, + GFP_ATOMIC); if (!cmd.va) { dev_err(&adapter->pdev->dev, "Memory allocation failed\n"); return -ENOMEM; } - memset(cmd.va, 0, cmd.size); spin_lock_bh(&adapter->mcc_lock); @@ -2245,7 +2245,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, } err: spin_unlock_bh(&adapter->mcc_lock); - pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); + dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); return status; } @@ -2720,7 +2720,8 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) goto err; } cmd.size = sizeof(struct be_cmd_req_get_phy_info); - cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); + cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, + GFP_ATOMIC); if (!cmd.va) { dev_err(&adapter->pdev->dev, "Memory alloc failure\n"); status = -ENOMEM; @@ -2754,7 +2755,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) BE_SUPPORTED_SPEED_1GBPS; } } - pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); + dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); err: spin_unlock_bh(&adapter->mcc_lock); return status; @@ -2805,8 +2806,9 @@ int be_cmd_get_cntl_attributes(struct be_adapter *adapter) memset(&attribs_cmd, 0, sizeof(struct be_dma_mem)); attribs_cmd.size = sizeof(struct be_cmd_resp_cntl_attribs); - attribs_cmd.va = pci_alloc_consistent(adapter->pdev, attribs_cmd.size, - &attribs_cmd.dma); + attribs_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, + attribs_cmd.size, + &attribs_cmd.dma, GFP_ATOMIC); if (!attribs_cmd.va) { dev_err(&adapter->pdev->dev, "Memory allocation failure\n"); status = -ENOMEM; @@ -2833,8 +2835,8 @@ int be_cmd_get_cntl_attributes(struct be_adapter *adapter) err: mutex_unlock(&adapter->mbox_lock); if (attribs_cmd.va) - pci_free_consistent(adapter->pdev, attribs_cmd.size, - attribs_cmd.va, attribs_cmd.dma); + dma_free_coherent(&adapter->pdev->dev, attribs_cmd.size, + attribs_cmd.va, attribs_cmd.dma); return status; } @@ -2972,9 +2974,10 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, memset(&get_mac_list_cmd, 0, sizeof(struct be_dma_mem)); get_mac_list_cmd.size = sizeof(struct be_cmd_resp_get_mac_list); - get_mac_list_cmd.va = pci_alloc_consistent(adapter->pdev, - get_mac_list_cmd.size, - &get_mac_list_cmd.dma); + get_mac_list_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, + get_mac_list_cmd.size, + &get_mac_list_cmd.dma, + GFP_ATOMIC); if (!get_mac_list_cmd.va) { dev_err(&adapter->pdev->dev, @@ -3047,8 +3050,8 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, out: spin_unlock_bh(&adapter->mcc_lock); - pci_free_consistent(adapter->pdev, get_mac_list_cmd.size, - get_mac_list_cmd.va, get_mac_list_cmd.dma); + dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size, + get_mac_list_cmd.va, get_mac_list_cmd.dma); return status; } @@ -3101,8 +3104,8 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, memset(&cmd, 0, sizeof(struct be_dma_mem)); cmd.size = sizeof(struct be_cmd_req_set_mac_list); - cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, - &cmd.dma, GFP_KERNEL); + cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, + GFP_KERNEL); if (!cmd.va) return -ENOMEM; @@ -3291,7 +3294,8 @@ int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter) memset(&cmd, 0, sizeof(struct be_dma_mem)); cmd.size = sizeof(struct be_cmd_resp_acpi_wol_magic_config_v1); - cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); + cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, + GFP_ATOMIC); if (!cmd.va) { dev_err(&adapter->pdev->dev, "Memory allocation failure\n"); status = -ENOMEM; @@ -3326,7 +3330,8 @@ int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter) err: mutex_unlock(&adapter->mbox_lock); if (cmd.va) - pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); + dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, + cmd.dma); return status; } @@ -3340,8 +3345,9 @@ int be_cmd_set_fw_log_level(struct be_adapter *adapter, u32 level) memset(&extfat_cmd, 0, sizeof(struct be_dma_mem)); extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps); - extfat_cmd.va = pci_alloc_consistent(adapter->pdev, extfat_cmd.size, - &extfat_cmd.dma); + extfat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, + extfat_cmd.size, &extfat_cmd.dma, + GFP_ATOMIC); if (!extfat_cmd.va) return -ENOMEM; @@ -3363,8 +3369,8 @@ int be_cmd_set_fw_log_level(struct be_adapter *adapter, u32 level) status = be_cmd_set_ext_fat_capabilites(adapter, &extfat_cmd, cfgs); err: - pci_free_consistent(adapter->pdev, extfat_cmd.size, extfat_cmd.va, - extfat_cmd.dma); + dma_free_coherent(&adapter->pdev->dev, extfat_cmd.size, extfat_cmd.va, + extfat_cmd.dma); return status; } @@ -3377,8 +3383,9 @@ int be_cmd_get_fw_log_level(struct be_adapter *adapter) memset(&extfat_cmd, 0, sizeof(struct be_dma_mem)); extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps); - extfat_cmd.va = pci_alloc_consistent(adapter->pdev, extfat_cmd.size, - &extfat_cmd.dma); + extfat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, + extfat_cmd.size, &extfat_cmd.dma, + GFP_ATOMIC); if (!extfat_cmd.va) { dev_err(&adapter->pdev->dev, "%s: Memory allocation failure\n", @@ -3396,8 +3403,8 @@ int be_cmd_get_fw_log_level(struct be_adapter *adapter) level = cfgs->module[0].trace_lvl[j].dbg_lvl; } } - pci_free_consistent(adapter->pdev, extfat_cmd.size, extfat_cmd.va, - extfat_cmd.dma); + dma_free_coherent(&adapter->pdev->dev, extfat_cmd.size, extfat_cmd.va, + extfat_cmd.dma); err: return level; } @@ -3595,7 +3602,8 @@ int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res) memset(&cmd, 0, sizeof(struct be_dma_mem)); cmd.size = sizeof(struct be_cmd_resp_get_func_config); - cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); + cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, + GFP_ATOMIC); if (!cmd.va) { dev_err(&adapter->pdev->dev, "Memory alloc failure\n"); status = -ENOMEM; @@ -3635,7 +3643,8 @@ int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res) err: mutex_unlock(&adapter->mbox_lock); if (cmd.va) - pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); + dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, + cmd.dma); return status; } @@ -3656,7 +3665,8 @@ int be_cmd_get_profile_config(struct be_adapter *adapter, memset(&cmd, 0, sizeof(struct be_dma_mem)); cmd.size = sizeof(struct be_cmd_resp_get_profile_config); - cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); + cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, + GFP_ATOMIC); if (!cmd.va) return -ENOMEM; @@ -3702,7 +3712,8 @@ int be_cmd_get_profile_config(struct be_adapter *adapter, res->vf_if_cap_flags = vf_res->cap_flags; err: if (cmd.va) - pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); + dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, + cmd.dma); return status; } @@ -3717,7 +3728,8 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc, memset(&cmd, 0, sizeof(struct be_dma_mem)); cmd.size = sizeof(struct be_cmd_req_set_profile_config); - cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); + cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, + GFP_ATOMIC); if (!cmd.va) return -ENOMEM; @@ -3733,7 +3745,8 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc, status = be_cmd_notify_wait(adapter, &wrb); if (cmd.va) - pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); + dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, + cmd.dma); return status; } diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index b765c24625bf..2835dee5dc39 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -264,8 +264,8 @@ static int lancer_cmd_read_file(struct be_adapter *adapter, u8 *file_name, int status = 0; read_cmd.size = LANCER_READ_FILE_CHUNK; - read_cmd.va = pci_alloc_consistent(adapter->pdev, read_cmd.size, - &read_cmd.dma); + read_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, read_cmd.size, + &read_cmd.dma, GFP_ATOMIC); if (!read_cmd.va) { dev_err(&adapter->pdev->dev, @@ -289,8 +289,8 @@ static int lancer_cmd_read_file(struct be_adapter *adapter, u8 *file_name, break; } } - pci_free_consistent(adapter->pdev, read_cmd.size, read_cmd.va, - read_cmd.dma); + dma_free_coherent(&adapter->pdev->dev, read_cmd.size, read_cmd.va, + read_cmd.dma); return status; } @@ -818,8 +818,9 @@ static int be_test_ddr_dma(struct be_adapter *adapter) }; ddrdma_cmd.size = sizeof(struct be_cmd_req_ddrdma_test); - ddrdma_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, ddrdma_cmd.size, - &ddrdma_cmd.dma, GFP_KERNEL); + ddrdma_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, + ddrdma_cmd.size, &ddrdma_cmd.dma, + GFP_KERNEL); if (!ddrdma_cmd.va) return -ENOMEM; @@ -941,8 +942,9 @@ static int be_read_eeprom(struct net_device *netdev, memset(&eeprom_cmd, 0, sizeof(struct be_dma_mem)); eeprom_cmd.size = sizeof(struct be_cmd_req_seeprom_read); - eeprom_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, eeprom_cmd.size, - &eeprom_cmd.dma, GFP_KERNEL); + eeprom_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, + eeprom_cmd.size, &eeprom_cmd.dma, + GFP_KERNEL); if (!eeprom_cmd.va) return -ENOMEM; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 6f9ffb9026cd..e43cc8a73ea7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4605,8 +4605,8 @@ static int lancer_fw_download(struct be_adapter *adapter, flash_cmd.size = sizeof(struct lancer_cmd_req_write_object) + LANCER_FW_DOWNLOAD_CHUNK; - flash_cmd.va = dma_alloc_coherent(dev, flash_cmd.size, - &flash_cmd.dma, GFP_KERNEL); + flash_cmd.va = dma_zalloc_coherent(dev, flash_cmd.size, + &flash_cmd.dma, GFP_KERNEL); if (!flash_cmd.va) return -ENOMEM; @@ -4739,8 +4739,8 @@ static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw) } flash_cmd.size = sizeof(struct be_cmd_write_flashrom); - flash_cmd.va = dma_alloc_coherent(dev, flash_cmd.size, &flash_cmd.dma, - GFP_KERNEL); + flash_cmd.va = dma_zalloc_coherent(dev, flash_cmd.size, &flash_cmd.dma, + GFP_KERNEL); if (!flash_cmd.va) return -ENOMEM; @@ -5291,16 +5291,15 @@ static int be_drv_init(struct be_adapter *adapter) int status = 0; mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16; - mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size, - &mbox_mem_alloc->dma, - GFP_KERNEL); + mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size, + &mbox_mem_alloc->dma, + GFP_KERNEL); if (!mbox_mem_alloc->va) return -ENOMEM; mbox_mem_align->size = sizeof(struct be_mcc_mailbox); mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16); mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16); - memset(mbox_mem_align->va, 0, sizeof(struct be_mcc_mailbox)); rx_filter->size = sizeof(struct be_cmd_req_rx_filter); rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size, -- cgit v1.2.3-55-g7522 From 25cc8f0763c972911b1a65099cd10d9f8a45a7b0 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Fri, 5 Jun 2015 18:54:45 +0100 Subject: mpls: fix possible use after free of device The mpls device is used in an RCU read context without a lock being held. As the memory is freed without waiting for the RCU grace period to elapse, the freed memory could still be in use. Address this by using kfree_rcu to free the memory for the mpls device after the RCU grace period has elapsed. Fixes: 03c57747a702 ("mpls: Per-device MPLS state") Signed-off-by: Robert Shearman Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 2 +- net/mpls/internal.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 7b3f732269e4..bff427f31924 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -541,7 +541,7 @@ static void mpls_ifdown(struct net_device *dev) RCU_INIT_POINTER(dev->mpls_ptr, NULL); - kfree(mdev); + kfree_rcu(mdev, rcu); } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, diff --git a/net/mpls/internal.h b/net/mpls/internal.h index b064c345042c..8cabeb5a1cb9 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -16,6 +16,7 @@ struct mpls_dev { int input_enabled; struct ctl_table_header *sysctl; + struct rcu_head rcu; }; struct sk_buff; -- cgit v1.2.3-55-g7522 From 7ff46e79fb7df5b09c46c36857929fdf039f8b31 Mon Sep 17 00:00:00 2001 From: David S. Miller Date: Sun, 7 Jun 2015 19:43:47 -0700 Subject: Revert "bridge: use _bh spinlock variant for br_fdb_update to avoid lockup" This reverts commit 1d7c49037b12016e7056b9f2c990380e2187e766. Nikolay Aleksandrov has a better version of this fix. Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 7eacc8ae9779..e0670d7054f9 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -569,7 +569,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb_notify(br, fdb, RTM_NEWNEIGH); } } else { - spin_lock_bh(&br->hash_lock); + spin_lock(&br->hash_lock); if (likely(!fdb_find(head, addr, vid))) { fdb = fdb_create(head, source, addr, vid); if (fdb) { @@ -581,7 +581,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, /* else we lose race and someone else inserts * it first, don't bother updating */ - spin_unlock_bh(&br->hash_lock); + spin_unlock(&br->hash_lock); } } -- cgit v1.2.3-55-g7522 From c4c832f89dc468cf11dc0dd17206bace44526651 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 6 Jun 2015 06:49:00 -0700 Subject: bridge: disable softirqs around br_fdb_update to avoid lockup br_fdb_update() can be called in process context in the following way: br_fdb_add() -> __br_fdb_add() -> br_fdb_update() (if NTF_USE flag is set) so we need to disable softirqs because there are softirq users of the hash_lock. One easy way to reproduce this is to modify the bridge utility to set NTF_USE, enable stp and then set maxageing to a low value so br_fdb_cleanup() is called frequently and then just add new entries in a loop. This happens because br_fdb_cleanup() is called from timer/softirq context. The spin locks in br_fdb_update were _bh before commit f8ae737deea1 ("[BRIDGE]: forwarding remove unneeded preempt and bh diasables") and at the time that commit was correct because br_fdb_update() couldn't be called from process context, but that changed after commit: 292d1398983f ("bridge: add NTF_USE support") Using local_bh_disable/enable around br_fdb_update() allows us to keep using the spin_lock/unlock in br_fdb_update for the fast-path. Signed-off-by: Nikolay Aleksandrov Fixes: 292d1398983f ("bridge: add NTF_USE support") Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e0670d7054f9..659fb96672e4 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -796,9 +796,11 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, int err = 0; if (ndm->ndm_flags & NTF_USE) { + local_bh_disable(); rcu_read_lock(); br_fdb_update(p->br, p, addr, vid, true); rcu_read_unlock(); + local_bh_enable(); } else { spin_lock_bh(&p->br->hash_lock); err = fdb_add_entry(p, addr, ndm->ndm_state, -- cgit v1.2.3-55-g7522 From 1489bdeeae1a47171926e255956c9fc251db13a0 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 7 Jun 2015 14:11:48 +0200 Subject: b44: call netif_napi_del() When the driver gets unregistered a call to netif_napi_del() was missing, this all was also missing in the error paths of b44_init_one(). Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 77363d680532..a3b1c07ae0af 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2464,6 +2464,7 @@ err_out_powerdown: ssb_bus_may_powerdown(sdev->bus); err_out_free_dev: + netif_napi_del(&bp->napi); free_netdev(dev); out: @@ -2480,6 +2481,7 @@ static void b44_remove_one(struct ssb_device *sdev) b44_unregister_phy_one(bp); ssb_device_disable(sdev, 0); ssb_bus_may_powerdown(sdev->bus); + netif_napi_del(&bp->napi); free_netdev(dev); ssb_pcihost_set_power_state(sdev, PCI_D3hot); ssb_set_drvdata(sdev, NULL); -- cgit v1.2.3-55-g7522 From 27e41fcfa6b326ad44eee7e0b1930d080b270895 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Fri, 5 Jun 2015 18:51:54 +0100 Subject: ipv6: fix possible use after free of dev stats The memory pointed to by idev->stats.icmpv6msgdev, idev->stats.icmpv6dev and idev->stats.ipv6 can each be used in an RCU read context without taking a reference on idev. For example, through IP6_*_STATS_* calls in ip6_rcv. These memory blocks are freed without waiting for an RCU grace period to elapse. This could lead to the memory being written to after it has been freed. Fix this by using call_rcu to free the memory used for stats, as well as idev after an RCU grace period has elapsed. Signed-off-by: Robert Shearman Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf_core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index d873ceea86e6..ca09bf49ac68 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -133,6 +133,14 @@ static void snmp6_free_dev(struct inet6_dev *idev) free_percpu(idev->stats.ipv6); } +static void in6_dev_finish_destroy_rcu(struct rcu_head *head) +{ + struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); + + snmp6_free_dev(idev); + kfree(idev); +} + /* Nobody refers to this device, we may destroy it. */ void in6_dev_finish_destroy(struct inet6_dev *idev) @@ -151,7 +159,6 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) pr_warn("Freeing alive inet6 device %p\n", idev); return; } - snmp6_free_dev(idev); - kfree_rcu(idev, rcu); + call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); } EXPORT_SYMBOL(in6_dev_finish_destroy); -- cgit v1.2.3-55-g7522 From 0243508edd317ff1fa63b495643a7c192fbfcd92 Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Mon, 8 Jun 2015 12:00:59 -0400 Subject: ipv6: Fix protocol resubmission UDP encapsulation is broken on IPv6. This is because the logic to resubmit the nexthdr is inverted, checking for a ret value > 0 instead of < 0. Also, the resubmit label is in the wrong position since we already get the nexthdr value when performing decapsulation. In addition the skb pull is no longer necessary either. This changes the return value check to look for < 0, using it for the nexthdr on the next iteration, and moves the resubmit label to the proper location. With these changes the v6 code now matches what we do in the v4 ip input code wrt resubmitting when decapsulating. Signed-off-by: Josh Hunt Acked-by: "Tom Herbert" Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f2e464eba5ef..41a73da371a9 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -212,13 +212,13 @@ static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) */ rcu_read_lock(); -resubmit: idev = ip6_dst_idev(skb_dst(skb)); if (!pskb_pull(skb, skb_transport_offset(skb))) goto discard; nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff]; +resubmit: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot) { @@ -246,10 +246,12 @@ resubmit: goto discard; ret = ipprot->handler(skb); - if (ret > 0) + if (ret < 0) { + nexthdr = -ret; goto resubmit; - else if (ret == 0) + } else if (ret == 0) { IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); + } } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { -- cgit v1.2.3-55-g7522 From afe3f907d20f39c0eaf81f2baec247ba672f34a9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 8 Jun 2015 10:47:57 -0700 Subject: net: bcmgenet: power on MII block for all MII modes The RGMII block is currently only powered on when using RGMII or RGMII_NO_ID, which is not correct when using the GENET interface in MII or Reverse MII modes. We always need to power on the RGMII interface for this block to properly work, regardless of the MII mode in which we operate. Fixes: aa09677cba423 ("net: bcmgenet: add MDIO routines") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index e7651b3c6c57..420949cc55aa 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -299,9 +299,6 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) phy_name = "external RGMII (no delay)"; else phy_name = "external RGMII (TX delay)"; - reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); - reg |= RGMII_MODE_EN | id_mode_dis; - bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); bcmgenet_sys_writel(priv, PORT_MODE_EXT_GPHY, SYS_PORT_CTRL); break; @@ -310,6 +307,15 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) return -EINVAL; } + /* This is an external PHY (xMII), so we need to enable the RGMII + * block for the interface to work + */ + if (priv->ext_phy) { + reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); + reg |= RGMII_MODE_EN | id_mode_dis; + bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + } + if (init) dev_info(kdev, "configuring instance for %s\n", phy_name); -- cgit v1.2.3-55-g7522 From bbbf2df0039d31c6a0a9708ce4fe220a54bd5379 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 8 Jun 2015 11:53:08 -0400 Subject: net: replace last open coded skb_orphan_frags with function call Commit 70008aa50e92 ("skbuff: convert to skb_orphan_frags") replaced open coded tests of SKBTX_DEV_ZEROCOPY and skb_copy_ubufs with calls to helper function skb_orphan_frags. Apply that to the last remaining open coded site. Signed-off-by: Willem de Bruijn Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 2c1c67fad64d..aa82f9ab6a36 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1718,15 +1718,8 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { - if (skb_copy_ubufs(skb, GFP_ATOMIC)) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - } - - if (unlikely(!is_skb_forwardable(dev, skb))) { + if (skb_orphan_frags(skb, GFP_ATOMIC) || + unlikely(!is_skb_forwardable(dev, skb))) { atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); return NET_RX_DROP; -- cgit v1.2.3-55-g7522