From 90017accff61ae89283ad9a51f9ac46ca01633fb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 2 Jun 2016 15:05:43 -0300 Subject: sctp: Add GSO support SCTP has this pecualiarity that its packets cannot be just segmented to (P)MTU. Its chunks must be contained in IP segments, padding respected. So we can't just generate a big skb, set gso_size to the fragmentation point and deliver it to IP layer. This patch takes a different approach. SCTP will now build a skb as it would be if it was received using GRO. That is, there will be a cover skb with protocol headers and children ones containing the actual segments, already segmented to a way that respects SCTP RFCs. With that, we can tell skb_segment() to just split based on frag_list, trusting its sizes are already in accordance. This way SCTP can benefit from GSO and instead of passing several packets through the stack, it can pass a single large packet. v2: - Added support for receiving GSO frames, as requested by Dave Miller. - Clear skb->cb if packet is GSO (otherwise it's not used by SCTP) - Added heuristics similar to what we have in TCP for not generating single GSO packets that fills cwnd. v3: - consider sctphdr size in skb_gso_transport_seglen() - rebased due to 5c7cdf339af5 ("gso: Remove arbitrary checks for unsupported GSO") Signed-off-by: Marcelo Ricardo Leitner Tested-by: Xin Long Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f45929ce8157..fa6df2699532 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4012,6 +4012,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } -- cgit v1.2.3-55-g7522 From f9eb8aea2a1e12fc2f584d1627deeb957435a801 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jun 2016 09:37:15 -0700 Subject: net_sched: transform qdisc running bit into a seqcount Instead of using a single bit (__QDISC___STATE_RUNNING) in sch->__state, use a seqcount. This adds lockdep support, but more importantly it will allow us to sample qdisc/class statistics without having to grab qdisc root lock. Signed-off-by: Eric Dumazet Cc: Cong Wang Cc: Jamal Hadi Salim Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 ++ drivers/net/ppp/ppp_generic.c | 3 +++ drivers/net/team/team.c | 2 ++ include/linux/netdevice.h | 1 + include/net/sch_generic.h | 15 ++++----------- net/bluetooth/6lowpan.c | 2 ++ net/core/dev.c | 2 +- net/ieee802154/6lowpan/core.c | 3 +++ net/l2tp/l2tp_eth.c | 4 ++++ net/sched/sch_generic.c | 14 ++++++++++---- 10 files changed, 32 insertions(+), 16 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 941ec99cd3b6..681af31a60ed 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4610,6 +4610,7 @@ static int bond_check_params(struct bond_params *params) static struct lock_class_key bonding_netdev_xmit_lock_key; static struct lock_class_key bonding_netdev_addr_lock_key; static struct lock_class_key bonding_tx_busylock_key; +static struct lock_class_key bonding_qdisc_running_key; static void bond_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, @@ -4625,6 +4626,7 @@ static void bond_set_lockdep_class(struct net_device *dev) &bonding_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL); dev->qdisc_tx_busylock = &bonding_tx_busylock_key; + dev->qdisc_running_key = &bonding_qdisc_running_key; } /* Called from registration process */ diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 8dedafa1a95d..aeabaa42317f 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1313,9 +1313,12 @@ ppp_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64) } static struct lock_class_key ppp_tx_busylock; +static struct lock_class_key ppp_qdisc_running_key; + static int ppp_dev_init(struct net_device *dev) { dev->qdisc_tx_busylock = &ppp_tx_busylock; + dev->qdisc_running_key = &ppp_qdisc_running_key; return 0; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 2ace126533cd..00eb38956a2c 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1577,6 +1577,7 @@ static const struct team_option team_options[] = { static struct lock_class_key team_netdev_xmit_lock_key; static struct lock_class_key team_netdev_addr_lock_key; static struct lock_class_key team_tx_busylock_key; +static struct lock_class_key team_qdisc_running_key; static void team_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, @@ -1590,6 +1591,7 @@ static void team_set_lockdep_class(struct net_device *dev) lockdep_set_class(&dev->addr_list_lock, &team_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, team_set_lockdep_class_one, NULL); dev->qdisc_tx_busylock = &team_tx_busylock_key; + dev->qdisc_running_key = &team_qdisc_running_key; } static int team_init(struct net_device *dev) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fa6df2699532..59d7e06d88d5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1862,6 +1862,7 @@ struct net_device { #endif struct phy_device *phydev; struct lock_class_key *qdisc_tx_busylock; + struct lock_class_key *qdisc_running_key; bool proto_down; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a1fd76c22a59..bff8d895ef8a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -29,13 +29,6 @@ enum qdisc_state_t { __QDISC_STATE_THROTTLED, }; -/* - * following bits are only changed while qdisc lock is held - */ -enum qdisc___state_t { - __QDISC___STATE_RUNNING = 1, -}; - struct qdisc_size_table { struct rcu_head rcu; struct list_head list; @@ -93,7 +86,7 @@ struct Qdisc { unsigned long state; struct sk_buff_head q; struct gnet_stats_basic_packed bstats; - unsigned int __state; + seqcount_t running; struct gnet_stats_queue qstats; struct rcu_head rcu_head; int padded; @@ -104,20 +97,20 @@ struct Qdisc { static inline bool qdisc_is_running(const struct Qdisc *qdisc) { - return (qdisc->__state & __QDISC___STATE_RUNNING) ? true : false; + return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc_is_running(qdisc)) return false; - qdisc->__state |= __QDISC___STATE_RUNNING; + write_seqcount_begin(&qdisc->running); return true; } static inline void qdisc_run_end(struct Qdisc *qdisc) { - qdisc->__state &= ~__QDISC___STATE_RUNNING; + write_seqcount_end(&qdisc->running); } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 780089d75915..977a11e418d0 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -629,6 +629,7 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) static struct lock_class_key bt_tx_busylock; static struct lock_class_key bt_netdev_xmit_lock_key; +static struct lock_class_key bt_qdisc_running_key; static void bt_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, @@ -641,6 +642,7 @@ static int bt_dev_init(struct net_device *dev) { netdev_for_each_tx_queue(dev, bt_set_lockdep_class_one, NULL); dev->qdisc_tx_busylock = &bt_tx_busylock; + dev->qdisc_running_key = &bt_qdisc_running_key; return 0; } diff --git a/net/core/dev.c b/net/core/dev.c index 896b686d1966..e0bcc39f4a7d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3075,7 +3075,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. - * This permits __QDISC___STATE_RUNNING owner to get the lock more + * This permits qdisc->running owner to get the lock more * often and dequeue packets faster. */ contended = qdisc_is_running(q); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index dd085db8580e..14aa5effd29a 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -60,6 +60,7 @@ static struct header_ops lowpan_header_ops = { static struct lock_class_key lowpan_tx_busylock; static struct lock_class_key lowpan_netdev_xmit_lock_key; +static struct lock_class_key lowpan_qdisc_running_key; static void lowpan_set_lockdep_class_one(struct net_device *ldev, struct netdev_queue *txq, @@ -73,6 +74,8 @@ static int lowpan_dev_init(struct net_device *ldev) { netdev_for_each_tx_queue(ldev, lowpan_set_lockdep_class_one, NULL); ldev->qdisc_tx_busylock = &lowpan_tx_busylock; + ldev->qdisc_running_key = &lowpan_qdisc_running_key; + return 0; } diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index e253c26f31ac..c00d72d182fa 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -68,6 +68,8 @@ static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net) } static struct lock_class_key l2tp_eth_tx_busylock; +static struct lock_class_key l2tp_qdisc_running_key; + static int l2tp_eth_dev_init(struct net_device *dev) { struct l2tp_eth *priv = netdev_priv(dev); @@ -76,6 +78,8 @@ static int l2tp_eth_dev_init(struct net_device *dev) eth_hw_addr_random(dev); eth_broadcast_addr(dev->broadcast); dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock; + dev->qdisc_running_key = &l2tp_qdisc_running_key; + return 0; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 269dd71b3828..cebea73e70ac 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -110,7 +110,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, /* * Transmit possibly several skbs, and handle the return status as - * required. Holding the __QDISC___STATE_RUNNING bit guarantees that + * required. Owning running seqcount bit guarantees that * only one CPU can execute this function. * * Returns to the caller: @@ -137,10 +137,10 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, HARD_TX_UNLOCK(dev, txq); } else { - spin_lock(root_lock); + spin_lock_nested(root_lock, SINGLE_DEPTH_NESTING); return qdisc_qlen(q); } - spin_lock(root_lock); + spin_lock_nested(root_lock, SINGLE_DEPTH_NESTING); if (dev_xmit_complete(ret)) { /* Driver sent out skb successfully or skb was consumed */ @@ -163,7 +163,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, /* * NOTE: Called under qdisc_lock(q) with locally disabled BH. * - * __QDISC___STATE_RUNNING guarantees only one CPU can process + * running seqcount guarantees only one CPU can process * this qdisc at a time. qdisc_lock(q) serializes queue accesses for * this queue. * @@ -379,6 +379,7 @@ struct Qdisc noop_qdisc = { .list = LIST_HEAD_INIT(noop_qdisc.list), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, + .running = SEQCNT_ZERO(noop_qdisc.running), .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), }; EXPORT_SYMBOL(noop_qdisc); @@ -537,6 +538,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { EXPORT_SYMBOL(pfifo_fast_ops); static struct lock_class_key qdisc_tx_busylock; +static struct lock_class_key qdisc_running_key; struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops) @@ -570,6 +572,10 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, lockdep_set_class(&sch->busylock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + seqcount_init(&sch->running); + lockdep_set_class(&sch->running, + dev->qdisc_running_key ?: &qdisc_running_key); + sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; -- cgit v1.2.3-55-g7522 From 123b36526592f009bf8eccb7c8833aeda296d9cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2016 07:22:49 -0700 Subject: net: sched: fix missing doc annotations "make htmldocs" complains otherwise: .//net/core/gen_stats.c:168: warning: No description found for parameter 'running' .//include/linux/netdevice.h:1867: warning: No description found for parameter 'qdisc_running_key' Fixes: f9eb8aea2a1e ("net_sched: transform qdisc running bit into a seqcount") Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump") Signed-off-by: Eric Dumazet Reported-by: kbuild test robot Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- net/core/gen_stats.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 59d7e06d88d5..541562333ba5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1594,7 +1594,8 @@ enum netdev_priv_flags { * @phydev: Physical device may attach itself * for hardware timestamping * - * @qdisc_tx_busylock: XXX: need comments on this one + * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock + * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount * * @proto_down: protocol port state information can be sent to the * switch driver and used to set the phys state of the diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index d9c210caff32..32207e6a942c 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -150,6 +150,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic); /** * gnet_stats_copy_basic - copy basic statistics into statistic TLV + * @running: seqcount_t pointer * @d: dumping handle * @cpu: copy statistic per cpu * @b: basic statistics -- cgit v1.2.3-55-g7522 From d3fff6c443fe8f8a5ef2bdcea45e2ff39db948c7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Jun 2016 07:45:12 -0700 Subject: net: add netdev_lockdep_set_classes() helper It is time to add netdev_lockdep_set_classes() helper so that lockdep annotations per device type are easier to manage. This removes a lot of copies and missing annotations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 24 +----------------------- drivers/net/ppp/ppp_generic.c | 6 +----- drivers/net/team/team.c | 21 +-------------------- include/linux/netdevice.h | 17 +++++++++++++++++ net/bluetooth/6lowpan.c | 15 +-------------- net/ieee802154/6lowpan/core.c | 16 +--------------- net/l2tp/l2tp_eth.c | 6 +----- 7 files changed, 23 insertions(+), 82 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 681af31a60ed..90157e20357e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4607,28 +4607,6 @@ static int bond_check_params(struct bond_params *params) return 0; } -static struct lock_class_key bonding_netdev_xmit_lock_key; -static struct lock_class_key bonding_netdev_addr_lock_key; -static struct lock_class_key bonding_tx_busylock_key; -static struct lock_class_key bonding_qdisc_running_key; - -static void bond_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, - &bonding_netdev_xmit_lock_key); -} - -static void bond_set_lockdep_class(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, - &bonding_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL); - dev->qdisc_tx_busylock = &bonding_tx_busylock_key; - dev->qdisc_running_key = &bonding_qdisc_running_key; -} - /* Called from registration process */ static int bond_init(struct net_device *bond_dev) { @@ -4641,7 +4619,7 @@ static int bond_init(struct net_device *bond_dev) if (!bond->wq) return -ENOMEM; - bond_set_lockdep_class(bond_dev); + netdev_lockdep_set_classes(bond_dev); list_add_tail(&bond->bond_list, &bn->dev_list); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index aeabaa42317f..17953ab15000 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1312,13 +1312,9 @@ ppp_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64) return stats64; } -static struct lock_class_key ppp_tx_busylock; -static struct lock_class_key ppp_qdisc_running_key; - static int ppp_dev_init(struct net_device *dev) { - dev->qdisc_tx_busylock = &ppp_tx_busylock; - dev->qdisc_running_key = &ppp_qdisc_running_key; + netdev_lockdep_set_classes(dev); return 0; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 00eb38956a2c..0a1bb8387d96 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1574,25 +1574,6 @@ static const struct team_option team_options[] = { }, }; -static struct lock_class_key team_netdev_xmit_lock_key; -static struct lock_class_key team_netdev_addr_lock_key; -static struct lock_class_key team_tx_busylock_key; -static struct lock_class_key team_qdisc_running_key; - -static void team_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *unused) -{ - lockdep_set_class(&txq->_xmit_lock, &team_netdev_xmit_lock_key); -} - -static void team_set_lockdep_class(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, &team_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, team_set_lockdep_class_one, NULL); - dev->qdisc_tx_busylock = &team_tx_busylock_key; - dev->qdisc_running_key = &team_qdisc_running_key; -} static int team_init(struct net_device *dev) { @@ -1628,7 +1609,7 @@ static int team_init(struct net_device *dev) goto err_options_register; netif_carrier_off(dev); - team_set_lockdep_class(dev); + netdev_lockdep_set_classes(dev); return 0; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 541562333ba5..4f234b102892 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1946,6 +1946,23 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } +#define netdev_lockdep_set_classes(dev) \ +{ \ + static struct lock_class_key qdisc_tx_busylock_key; \ + static struct lock_class_key qdisc_running_key; \ + static struct lock_class_key qdisc_xmit_lock_key; \ + static struct lock_class_key dev_addr_list_lock_key; \ + unsigned int i; \ + \ + (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ + (dev)->qdisc_running_key = &qdisc_running_key; \ + lockdep_set_class(&(dev)->addr_list_lock, \ + &dev_addr_list_lock_key); \ + for (i = 0; i < (dev)->num_tx_queues; i++) \ + lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ + &qdisc_xmit_lock_key); \ +} + struct netdev_queue *netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, void *accel_priv); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 977a11e418d0..d020299baba4 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -627,22 +627,9 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) return err < 0 ? NET_XMIT_DROP : err; } -static struct lock_class_key bt_tx_busylock; -static struct lock_class_key bt_netdev_xmit_lock_key; -static struct lock_class_key bt_qdisc_running_key; - -static void bt_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, &bt_netdev_xmit_lock_key); -} - static int bt_dev_init(struct net_device *dev) { - netdev_for_each_tx_queue(dev, bt_set_lockdep_class_one, NULL); - dev->qdisc_tx_busylock = &bt_tx_busylock; - dev->qdisc_running_key = &bt_qdisc_running_key; + netdev_lockdep_set_classes(dev); return 0; } diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 14aa5effd29a..4e2b30894224 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -58,23 +58,9 @@ static struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; -static struct lock_class_key lowpan_tx_busylock; -static struct lock_class_key lowpan_netdev_xmit_lock_key; -static struct lock_class_key lowpan_qdisc_running_key; - -static void lowpan_set_lockdep_class_one(struct net_device *ldev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, - &lowpan_netdev_xmit_lock_key); -} - static int lowpan_dev_init(struct net_device *ldev) { - netdev_for_each_tx_queue(ldev, lowpan_set_lockdep_class_one, NULL); - ldev->qdisc_tx_busylock = &lowpan_tx_busylock; - ldev->qdisc_running_key = &lowpan_qdisc_running_key; + netdev_lockdep_set_classes(ldev); return 0; } diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index c00d72d182fa..57fc5a46ce06 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -67,9 +67,6 @@ static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net) return net_generic(net, l2tp_eth_net_id); } -static struct lock_class_key l2tp_eth_tx_busylock; -static struct lock_class_key l2tp_qdisc_running_key; - static int l2tp_eth_dev_init(struct net_device *dev) { struct l2tp_eth *priv = netdev_priv(dev); @@ -77,8 +74,7 @@ static int l2tp_eth_dev_init(struct net_device *dev) priv->dev = dev; eth_hw_addr_random(dev); eth_broadcast_addr(dev->broadcast); - dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock; - dev->qdisc_running_key = &l2tp_qdisc_running_key; + netdev_lockdep_set_classes(dev); return 0; } -- cgit v1.2.3-55-g7522 From a70b506efe899dc8d650eafcc0b11fc9ee746627 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 10 Jun 2016 21:19:06 +0200 Subject: bpf: enforce recursion limit on redirects Respect the stack's xmit_recursion limit for calls into dev_queue_xmit(). Currently, they are not handeled by the limiter when attached to clsact's egress parent, for example, and a buggy program redirecting it to the same device again could run into stack overflow eventually. It would be good if we could notify an admin to give him a chance to react. We reuse xmit_recursion instead of having one private to eBPF, so that the stack's current recursion depth will be taken into account as well. Follow-up to commit 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper") and 27b29f63058d ("bpf: add bpf_redirect() helper"). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 6 ++---- net/core/filter.c | 55 +++++++++++++++++++++++++++++------------------ 3 files changed, 38 insertions(+), 25 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4f234b102892..94eef356a65f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2389,6 +2389,8 @@ void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); DECLARE_PER_CPU(int, xmit_recursion); +#define XMIT_RECURSION_LIMIT 10 + static inline int dev_recursion_level(void) { return this_cpu_read(xmit_recursion); diff --git a/net/core/dev.c b/net/core/dev.c index c43c9d2a88cf..b14835757141 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3144,8 +3144,6 @@ static void skb_update_prio(struct sk_buff *skb) DEFINE_PER_CPU(int, xmit_recursion); EXPORT_SYMBOL(xmit_recursion); -#define RECURSION_LIMIT 10 - /** * dev_loopback_xmit - loop back @skb * @net: network namespace this loopback is happening in @@ -3388,8 +3386,8 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) int cpu = smp_processor_id(); /* ok because BHs are off */ if (txq->xmit_lock_owner != cpu) { - - if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) + if (unlikely(__this_cpu_read(xmit_recursion) > + XMIT_RECURSION_LIMIT)) goto recursion_alert; skb = validate_xmit_skb(skb, dev); diff --git a/net/core/filter.c b/net/core/filter.c index 68adb5f52110..d11744d10e00 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1603,9 +1603,36 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { .arg5_type = ARG_ANYTHING, }; +static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) +{ + if (skb_at_tc_ingress(skb)) + skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); + + return dev_forward_skb(dev, skb); +} + +static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) +{ + int ret; + + if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) { + net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); + kfree_skb(skb); + return -ENETDOWN; + } + + skb->dev = dev; + + __this_cpu_inc(xmit_recursion); + ret = dev_queue_xmit(skb); + __this_cpu_dec(xmit_recursion); + + return ret; +} + static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) { - struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; + struct sk_buff *skb = (struct sk_buff *) (long) r1; struct net_device *dev; if (unlikely(flags & ~(BPF_F_INGRESS))) @@ -1615,19 +1642,12 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!dev)) return -EINVAL; - skb2 = skb_clone(skb, GFP_ATOMIC); - if (unlikely(!skb2)) + skb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb)) return -ENOMEM; - if (flags & BPF_F_INGRESS) { - if (skb_at_tc_ingress(skb2)) - skb_postpush_rcsum(skb2, skb_mac_header(skb2), - skb2->mac_len); - return dev_forward_skb(dev, skb2); - } - - skb2->dev = dev; - return dev_queue_xmit(skb2); + return flags & BPF_F_INGRESS ? + __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); } static const struct bpf_func_proto bpf_clone_redirect_proto = { @@ -1671,15 +1691,8 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - if (ri->flags & BPF_F_INGRESS) { - if (skb_at_tc_ingress(skb)) - skb_postpush_rcsum(skb, skb_mac_header(skb), - skb->mac_len); - return dev_forward_skb(dev, skb); - } - - skb->dev = dev; - return dev_queue_xmit(skb); + return ri->flags & BPF_F_INGRESS ? + __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); } static const struct bpf_func_proto bpf_redirect_proto = { -- cgit v1.2.3-55-g7522 From 99860208bc62d8ebd5c57495b84856506fe075bc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 11 Jun 2016 12:46:04 +0200 Subject: sched: remove NET_XMIT_POLICED sch_atm returns this when TC_ACT_SHOT classification occurs. But all other schedulers that use tc_classify (htb, hfsc, drr, fq_codel ...) return NET_XMIT_SUCCESS | __BYPASS in this case so just do that in atm. BATMAN uses it as an intermediate return value to signal forwarding vs. buffering, but it did not return POLICED to callers outside of BATMAN. Reviewed-by: Sven Eckelmann Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - net/batman-adv/routing.c | 2 +- net/batman-adv/send.c | 4 ++-- net/core/pktgen.c | 1 - net/sched/sch_api.c | 2 -- net/sched/sch_atm.c | 2 +- 6 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94eef356a65f..d101e4d904ba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -90,7 +90,6 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, #define NET_XMIT_SUCCESS 0x00 #define NET_XMIT_DROP 0x01 /* skb dropped */ #define NET_XMIT_CN 0x02 /* congestion notification */ -#define NET_XMIT_POLICED 0x03 /* skb is shot by police */ #define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */ /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index e3857ed4057f..f75091c983ee 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -653,7 +653,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, len + ETH_HLEN); ret = NET_RX_SUCCESS; - } else if (res == NET_XMIT_POLICED) { + } else if (res == -EINPROGRESS) { /* skb was buffered and consumed */ ret = NET_RX_SUCCESS; } diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index f2f125684ed9..b1a4e8a811c8 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -156,7 +156,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb, * attempted. * * Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or - * NET_XMIT_POLICED if the skb is buffered for later transmit. + * -EINPROGRESS if the skb is buffered for later transmit. */ int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, @@ -188,7 +188,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, * network coding fails, then send the packet as usual. */ if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) { - ret = NET_XMIT_POLICED; + ret = -EINPROGRESS; } else { batadv_send_unicast_skb(skb, neigh_node); ret = NET_XMIT_SUCCESS; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8b02df0d354d..f74ab9c3b38f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3463,7 +3463,6 @@ xmit_more: break; case NET_XMIT_DROP: case NET_XMIT_CN: - case NET_XMIT_POLICED: /* skb has been consumed */ pkt_dev->errors++; break; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 401eda6de682..12ebde845523 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -95,8 +95,6 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, Expected action: do not backoff, but wait until queue will clear. NET_XMIT_CN - probably this packet enqueued, but another one dropped. Expected action: backoff or ignore - NET_XMIT_POLICED - dropped by police. - Expected action: backoff or error to real-time apps. Auxiliary routines: diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 7e6c12dfc66a..0785b239ddf9 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -363,7 +363,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct atm_flow_data *flow; struct tcf_result res; int result; - int ret = NET_XMIT_POLICED; + int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); result = TC_POLICE_OK; /* be nice to gcc */ -- cgit v1.2.3-55-g7522 From 8626a0c83b0d471d859bcd908d016874df951fc3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 15 Jun 2016 21:20:16 +0200 Subject: 6lowpan: add private neighbour data This patch will introduce a 6lowpan neighbour private data. Like the interface private data we handle private data for generic 6lowpan and for link-layer specific 6lowpan. The current first use case if to save the short address for a 802.15.4 6lowpan neighbour. Cc: David S. Miller Reviewed-by: Stefan Schmidt Acked-by: YOSHIFUJI Hideaki Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- include/net/6lowpan.h | 10 ++++++++++ net/ieee802154/6lowpan/core.c | 12 ++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d101e4d904ba..36e43bd422f8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1483,8 +1483,7 @@ enum netdev_priv_flags { * @perm_addr: Permanent hw address * @addr_assign_type: Hw address assignment type * @addr_len: Hardware address length - * @neigh_priv_len; Used in neigh_alloc(), - * initialized only in atm/clip.c + * @neigh_priv_len: Used in neigh_alloc() * @dev_id: Used to differentiate devices that share * the same link layer address * @dev_port: Used to differentiate devices that share diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index da84cf920b78..2d9b9d39221e 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -141,6 +141,16 @@ struct lowpan_dev { u8 priv[0] __aligned(sizeof(void *)); }; +struct lowpan_802154_neigh { + __le16 short_addr; +}; + +static inline +struct lowpan_802154_neigh *lowpan_802154_neigh(void *neigh_priv) +{ + return neigh_priv; +} + static inline struct lowpan_dev *lowpan_dev(const struct net_device *dev) { diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 4e2b30894224..8c004a0c8d64 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -81,11 +81,21 @@ static int lowpan_stop(struct net_device *dev) return 0; } +static int lowpan_neigh_construct(struct neighbour *n) +{ + struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n)); + + /* default no short_addr is available for a neighbour */ + neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); + return 0; +} + static const struct net_device_ops lowpan_netdev_ops = { .ndo_init = lowpan_dev_init, .ndo_start_xmit = lowpan_xmit, .ndo_open = lowpan_open, .ndo_stop = lowpan_stop, + .ndo_neigh_construct = lowpan_neigh_construct, }; static void lowpan_setup(struct net_device *ldev) @@ -150,6 +160,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, wdev->needed_headroom; ldev->needed_tailroom = wdev->needed_tailroom; + ldev->neigh_priv_len = sizeof(struct lowpan_802154_neigh); + ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154); if (ret < 0) { dev_put(wdev); -- cgit v1.2.3-55-g7522 From f997c55c1dc8841b3ee4df0493d0ac7966d42165 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 15 Jun 2016 21:20:23 +0200 Subject: ipv6: introduce neighbour discovery ops This patch introduces neighbour discovery ops callback structure. The idea is to separate the handling for 6LoWPAN into the 6lowpan module. These callback offers 6lowpan different handling, such as 802.15.4 short address handling or RFC6775 (Neighbor Discovery Optimization for IPv6 over 6LoWPANs). Cc: David S. Miller Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Acked-by: YOSHIFUJI Hideaki Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++ include/net/ndisc.h | 197 +++++++++++++++++++++++++++++++++++++++++++++- net/ipv6/addrconf.c | 13 ++- net/ipv6/ndisc.c | 101 ++++++++++++++++-------- net/ipv6/route.c | 8 +- 5 files changed, 284 insertions(+), 40 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 36e43bd422f8..890158e99159 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1456,6 +1456,8 @@ enum netdev_priv_flags { * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @ethtool_ops: Management operations + * @ndisc_ops: Includes callbacks for different IPv6 neighbour + * discovery handling. Necessary for e.g. 6LoWPAN. * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * @@ -1672,6 +1674,9 @@ struct net_device { #ifdef CONFIG_NET_L3_MASTER_DEV const struct l3mdev_ops *l3mdev_ops; #endif +#if IS_ENABLED(CONFIG_IPV6) + const struct ndisc_ops *ndisc_ops; +#endif const struct header_ops *header_ops; diff --git a/include/net/ndisc.h b/include/net/ndisc.h index c8962adf24d0..a5e276703cb3 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -58,6 +58,7 @@ struct inet6_dev; struct net_device; struct net_proto_family; struct sk_buff; +struct prefix_info; extern struct neigh_table nd_tbl; @@ -110,9 +111,182 @@ struct ndisc_options { #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) -struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, +struct ndisc_options *ndisc_parse_options(const struct net_device *dev, + u8 *opt, int opt_len, struct ndisc_options *ndopts); +#define NDISC_OPS_REDIRECT_DATA_SPACE 2 + +/* + * This structure defines the hooks for IPv6 neighbour discovery. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. + * + * int (*is_useropt)(u8 nd_opt_type): + * This function is called when IPv6 decide RA userspace options. if + * this function returns 1 then the option given by nd_opt_type will + * be handled as userspace option additional to the IPv6 options. + * + * int (*parse_options)(const struct net_device *dev, + * struct nd_opt_hdr *nd_opt, + * struct ndisc_options *ndopts): + * This function is called while parsing ndisc ops and put each position + * as pointer into ndopts. If this function return unequal 0, then this + * function took care about the ndisc option, if 0 then the IPv6 ndisc + * option parser will take care about that option. + * + * void (*update)(const struct net_device *dev, struct neighbour *n, + * u32 flags, u8 icmp6_type, + * const struct ndisc_options *ndopts): + * This function is called when IPv6 ndisc updates the neighbour cache + * entry. Additional options which can be updated may be previously + * parsed by parse_opts callback and accessible over ndopts parameter. + * + * int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type, + * struct neighbour *neigh, u8 *ha_buf, + * u8 **ha): + * This function is called when the necessary option space will be + * calculated before allocating a skb. The parameters neigh, ha_buf + * abd ha are available on NDISC_REDIRECT messages only. + * + * void (*fill_addr_option)(const struct net_device *dev, + * struct sk_buff *skb, u8 icmp6_type, + * const u8 *ha): + * This function is called when the skb will finally fill the option + * fields inside skb. NOTE: this callback should fill the option + * fields to the skb which are previously indicated by opt_space + * parameter. That means the decision to add such option should + * not lost between these two callbacks, e.g. protected by interface + * up state. + * + * void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev, + * const struct prefix_info *pinfo, + * struct inet6_dev *in6_dev, + * struct in6_addr *addr, + * int addr_type, u32 addr_flags, + * bool sllao, bool tokenized, + * __u32 valid_lft, u32 prefered_lft, + * bool dev_addr_generated): + * This function is called when a RA messages is received with valid + * PIO option fields and an IPv6 address will be added to the interface + * for autoconfiguration. The parameter dev_addr_generated reports about + * if the address was based on dev->dev_addr or not. This can be used + * to add a second address if link-layer operates with two link layer + * addresses. E.g. 802.15.4 6LoWPAN. + */ +struct ndisc_ops { + int (*is_useropt)(u8 nd_opt_type); + int (*parse_options)(const struct net_device *dev, + struct nd_opt_hdr *nd_opt, + struct ndisc_options *ndopts); + void (*update)(const struct net_device *dev, struct neighbour *n, + u32 flags, u8 icmp6_type, + const struct ndisc_options *ndopts); + int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type, + struct neighbour *neigh, u8 *ha_buf, + u8 **ha); + void (*fill_addr_option)(const struct net_device *dev, + struct sk_buff *skb, u8 icmp6_type, + const u8 *ha); + void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev, + const struct prefix_info *pinfo, + struct inet6_dev *in6_dev, + struct in6_addr *addr, + int addr_type, u32 addr_flags, + bool sllao, bool tokenized, + __u32 valid_lft, u32 prefered_lft, + bool dev_addr_generated); +}; + +#if IS_ENABLED(CONFIG_IPV6) +static inline int ndisc_ops_is_useropt(const struct net_device *dev, + u8 nd_opt_type) +{ + if (dev->ndisc_ops && dev->ndisc_ops->is_useropt) + return dev->ndisc_ops->is_useropt(nd_opt_type); + else + return 0; +} + +static inline int ndisc_ops_parse_options(const struct net_device *dev, + struct nd_opt_hdr *nd_opt, + struct ndisc_options *ndopts) +{ + if (dev->ndisc_ops && dev->ndisc_ops->parse_options) + return dev->ndisc_ops->parse_options(dev, nd_opt, ndopts); + else + return 0; +} + +static inline void ndisc_ops_update(const struct net_device *dev, + struct neighbour *n, u32 flags, + u8 icmp6_type, + const struct ndisc_options *ndopts) +{ + if (dev->ndisc_ops && dev->ndisc_ops->update) + dev->ndisc_ops->update(dev, n, flags, icmp6_type, ndopts); +} + +static inline int ndisc_ops_opt_addr_space(const struct net_device *dev, + u8 icmp6_type) +{ + if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space && + icmp6_type != NDISC_REDIRECT) + return dev->ndisc_ops->opt_addr_space(dev, icmp6_type, NULL, + NULL, NULL); + else + return 0; +} + +static inline int ndisc_ops_redirect_opt_addr_space(const struct net_device *dev, + struct neighbour *neigh, + u8 *ha_buf, u8 **ha) +{ + if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space) + return dev->ndisc_ops->opt_addr_space(dev, NDISC_REDIRECT, + neigh, ha_buf, ha); + else + return 0; +} + +static inline void ndisc_ops_fill_addr_option(const struct net_device *dev, + struct sk_buff *skb, + u8 icmp6_type) +{ + if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option && + icmp6_type != NDISC_REDIRECT) + dev->ndisc_ops->fill_addr_option(dev, skb, icmp6_type, NULL); +} + +static inline void ndisc_ops_fill_redirect_addr_option(const struct net_device *dev, + struct sk_buff *skb, + const u8 *ha) +{ + if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option) + dev->ndisc_ops->fill_addr_option(dev, skb, NDISC_REDIRECT, ha); +} + +static inline void ndisc_ops_prefix_rcv_add_addr(struct net *net, + struct net_device *dev, + const struct prefix_info *pinfo, + struct inet6_dev *in6_dev, + struct in6_addr *addr, + int addr_type, u32 addr_flags, + bool sllao, bool tokenized, + __u32 valid_lft, + u32 prefered_lft, + bool dev_addr_generated) +{ + if (dev->ndisc_ops && dev->ndisc_ops->prefix_rcv_add_addr) + dev->ndisc_ops->prefix_rcv_add_addr(net, dev, pinfo, in6_dev, + addr, addr_type, + addr_flags, sllao, + tokenized, valid_lft, + prefered_lft, + dev_addr_generated); +} +#endif + /* * Return the padding between the option length and the start of the * link addr. Currently only IP-over-InfiniBand needs this, although @@ -132,11 +306,25 @@ static inline int __ndisc_opt_addr_space(unsigned char addr_len, int pad) return NDISC_OPT_SPACE(addr_len + pad); } -static inline int ndisc_opt_addr_space(struct net_device *dev) +#if IS_ENABLED(CONFIG_IPV6) +static inline int ndisc_opt_addr_space(struct net_device *dev, u8 icmp6_type) +{ + return __ndisc_opt_addr_space(dev->addr_len, + ndisc_addr_option_pad(dev->type)) + + ndisc_ops_opt_addr_space(dev, icmp6_type); +} + +static inline int ndisc_redirect_opt_addr_space(struct net_device *dev, + struct neighbour *neigh, + u8 *ops_data_buf, + u8 **ops_data) { return __ndisc_opt_addr_space(dev->addr_len, - ndisc_addr_option_pad(dev->type)); + ndisc_addr_option_pad(dev->type)) + + ndisc_ops_redirect_opt_addr_space(dev, neigh, ops_data_buf, + ops_data); } +#endif static inline u8 *__ndisc_opt_addr_data(struct nd_opt_hdr *p, unsigned char addr_len, int prepad) @@ -205,6 +393,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target); int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir); +void ndisc_update(const struct net_device *dev, struct neighbour *neigh, + const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type, + struct ndisc_options *ndopts); /* * IGMP diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 44c1cefdb299..b6e9bdc610f2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2531,7 +2531,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) if (pinfo->autoconf && in6_dev->cnf.autoconf) { struct in6_addr addr; - bool tokenized = false; + bool tokenized = false, dev_addr_generated = false; if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); @@ -2550,6 +2550,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { goto put; + } else { + dev_addr_generated = true; } goto ok; } @@ -2565,6 +2567,15 @@ ok: prefered_lft); if (err) goto put; + + /* Ignore error case here because previous prefix add addr was + * successful which will be notified. + */ + ndisc_ops_prefix_rcv_add_addr(net, dev, pinfo, in6_dev, &addr, + addr_type, addr_flags, sllao, + tokenized, valid_lft, + prefered_lft, + dev_addr_generated); } inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); put: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a7b9468e684a..2f4afd17954b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -172,10 +172,19 @@ static void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, } static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type, - void *data) + void *data, u8 icmp6_type) { __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len, ndisc_addr_option_pad(skb->dev->type)); + ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type); +} + +static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb, + void *ha, + const u8 *ops_data) +{ + ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT); + ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data); } static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, @@ -191,24 +200,28 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, return cur <= end && cur->nd_opt_type == type ? cur : NULL; } -static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) +static inline int ndisc_is_useropt(const struct net_device *dev, + struct nd_opt_hdr *opt) { return opt->nd_opt_type == ND_OPT_RDNSS || - opt->nd_opt_type == ND_OPT_DNSSL; + opt->nd_opt_type == ND_OPT_DNSSL || + ndisc_ops_is_useropt(dev, opt->nd_opt_type); } -static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, +static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev, + struct nd_opt_hdr *cur, struct nd_opt_hdr *end) { if (!cur || !end || cur >= end) return NULL; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); - } while (cur < end && !ndisc_is_useropt(cur)); - return cur <= end && ndisc_is_useropt(cur) ? cur : NULL; + } while (cur < end && !ndisc_is_useropt(dev, cur)); + return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL; } -struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, +struct ndisc_options *ndisc_parse_options(const struct net_device *dev, + u8 *opt, int opt_len, struct ndisc_options *ndopts) { struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt; @@ -223,6 +236,8 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, l = nd_opt->nd_opt_len << 3; if (opt_len < l || l == 0) return NULL; + if (ndisc_ops_parse_options(dev, nd_opt, ndopts)) + goto next_opt; switch (nd_opt->nd_opt_type) { case ND_OPT_SOURCE_LL_ADDR: case ND_OPT_TARGET_LL_ADDR: @@ -249,7 +264,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, break; #endif default: - if (ndisc_is_useropt(nd_opt)) { + if (ndisc_is_useropt(dev, nd_opt)) { ndopts->nd_useropts_end = nd_opt; if (!ndopts->nd_useropts) ndopts->nd_useropts = nd_opt; @@ -266,6 +281,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, nd_opt->nd_opt_len); } } +next_opt: opt_len -= l; nd_opt = ((void *)nd_opt) + l; } @@ -515,7 +531,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, if (!dev->addr_len) inc_opt = 0; if (inc_opt) - optlen += ndisc_opt_addr_space(dev); + optlen += ndisc_opt_addr_space(dev, + NDISC_NEIGHBOUR_ADVERTISEMENT); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) @@ -534,8 +551,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, if (inc_opt) ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, - dev->dev_addr); - + dev->dev_addr, + NDISC_NEIGHBOUR_ADVERTISEMENT); ndisc_send_skb(skb, daddr, src_addr); } @@ -580,7 +597,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, if (ipv6_addr_any(saddr)) inc_opt = false; if (inc_opt) - optlen += ndisc_opt_addr_space(dev); + optlen += ndisc_opt_addr_space(dev, + NDISC_NEIGHBOUR_SOLICITATION); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) @@ -596,7 +614,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, if (inc_opt) ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, - dev->dev_addr); + dev->dev_addr, + NDISC_NEIGHBOUR_SOLICITATION); ndisc_send_skb(skb, daddr, saddr); } @@ -632,7 +651,7 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, } #endif if (send_sllao) - optlen += ndisc_opt_addr_space(dev); + optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) @@ -647,7 +666,8 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, if (send_sllao) ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, - dev->dev_addr); + dev->dev_addr, + NDISC_ROUTER_SOLICITATION); ndisc_send_skb(skb, daddr, saddr); } @@ -708,6 +728,15 @@ static int pndisc_is_router(const void *pkey, return ret; } +void ndisc_update(const struct net_device *dev, struct neighbour *neigh, + const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type, + struct ndisc_options *ndopts) +{ + neigh_update(neigh, lladdr, new, flags); + /* report ndisc ops about neighbour update */ + ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts); +} + static void ndisc_recv_ns(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); @@ -744,7 +773,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) return; } - if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { + if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) { ND_PRINTK(2, warn, "NS: invalid ND options\n"); return; } @@ -862,9 +891,10 @@ have_ifp: neigh = __neigh_lookup(&nd_tbl, saddr, dev, !inc || lladdr || !dev->addr_len); if (neigh) - neigh_update(neigh, lladdr, NUD_STALE, + ndisc_update(dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| - NEIGH_UPDATE_F_OVERRIDE); + NEIGH_UPDATE_F_OVERRIDE, + NDISC_NEIGHBOUR_SOLICITATION, &ndopts); if (neigh || !dev->header_ops) { ndisc_send_na(dev, saddr, &msg->target, !!is_router, true, (ifp != NULL && inc), inc); @@ -917,7 +947,7 @@ static void ndisc_recv_na(struct sk_buff *skb) idev->cnf.drop_unsolicited_na) return; - if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { + if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) { ND_PRINTK(2, warn, "NS: invalid ND option\n"); return; } @@ -973,12 +1003,13 @@ static void ndisc_recv_na(struct sk_buff *skb) goto out; } - neigh_update(neigh, lladdr, + ndisc_update(dev, neigh, lladdr, msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| - (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0)); + (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0), + NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts); if ((old_flags & ~neigh->flags) & NTF_ROUTER) { /* @@ -1023,7 +1054,7 @@ static void ndisc_recv_rs(struct sk_buff *skb) goto out; /* Parse ND options */ - if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) { + if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) { ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n"); goto out; } @@ -1037,10 +1068,11 @@ static void ndisc_recv_rs(struct sk_buff *skb) neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1); if (neigh) { - neigh_update(neigh, lladdr, NUD_STALE, + ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| - NEIGH_UPDATE_F_OVERRIDE_ISROUTER); + NEIGH_UPDATE_F_OVERRIDE_ISROUTER, + NDISC_ROUTER_SOLICITATION, &ndopts); neigh_release(neigh); } out: @@ -1141,7 +1173,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - if (!ndisc_parse_options(opt, optlen, &ndopts)) { + if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) { ND_PRINTK(2, warn, "RA: invalid ND options\n"); return; } @@ -1335,11 +1367,12 @@ skip_linkparms: goto out; } } - neigh_update(neigh, lladdr, NUD_STALE, + ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| - NEIGH_UPDATE_F_ISROUTER); + NEIGH_UPDATE_F_ISROUTER, + NDISC_ROUTER_ADVERTISEMENT, &ndopts); } if (!ipv6_accept_ra(in6_dev)) { @@ -1427,7 +1460,8 @@ skip_routeinfo: struct nd_opt_hdr *p; for (p = ndopts.nd_useropts; p; - p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) { + p = ndisc_next_useropt(skb->dev, p, + ndopts.nd_useropts_end)) { ndisc_ra_useropt(skb, p); } } @@ -1465,7 +1499,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) + if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts)) return; if (!ndopts.nd_opts_rh) { @@ -1510,7 +1544,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) struct dst_entry *dst; struct flowi6 fl6; int rd_len; - u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; + u8 ha_buf[MAX_ADDR_LEN], *ha = NULL, + ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; int oif = l3mdev_fib_oif(dev); bool ret; @@ -1569,7 +1604,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) memcpy(ha_buf, neigh->ha, dev->addr_len); read_unlock_bh(&neigh->lock); ha = ha_buf; - optlen += ndisc_opt_addr_space(dev); + optlen += ndisc_redirect_opt_addr_space(dev, neigh, + ops_data_buf, + &ops_data); } else read_unlock_bh(&neigh->lock); @@ -1600,7 +1637,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) */ if (ha) - ndisc_fill_addr_option(buff, ND_OPT_TARGET_LL_ADDR, ha); + ndisc_fill_redirect_addr_option(buff, ha, ops_data); /* * build redirect option and copy skb over to the new packet. diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d51a1a48b839..9e1516785dac 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2201,7 +2201,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * first-hop router for the specified ICMP Destination Address. */ - if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) { + if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) { net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); return; } @@ -2236,12 +2236,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * We have finally decided to accept it. */ - neigh_update(neigh, lladdr, NUD_STALE, + ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| - NEIGH_UPDATE_F_ISROUTER)) - ); + NEIGH_UPDATE_F_ISROUTER)), + NDISC_REDIRECT, &ndopts); nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); if (!nrt) -- cgit v1.2.3-55-g7522 From 7c46a640de6fcc4f35d0702710356a024eadf68f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 16 Jun 2016 12:21:00 -0700 Subject: net: Merge VXLAN and GENEVE push notifiers into a single notifier This patch merges the notifiers for VXLAN and GENEVE into a single UDP tunnel notifier. The idea is that we will want to only have to make one notifier call to receive the list of ports for VXLAN and GENEVE tunnels that need to be offloaded. In addition we add a new set of ndo functions named ndo_udp_tunnel_add and ndo_udp_tunnel_del that are meant to allow us to track the tunnel meta-data such as port and address family as tunnels are added and removed. The tunnel meta-data is now transported in a structure named udp_tunnel_info which for now carries the type, address family, and port number. In the future this could be updated so that we can include a tuple of values including things such as the destination IP address and other fields. I also ended up going with a naming scheme that consisted of using the prefix udp_tunnel on function names. I applied this to the notifier and ndo ops as well so that it hopefully points to the fact that these are primarily used in the udp_tunnel functions. Signed-off-by: Alexander Duyck Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- drivers/net/vxlan.c | 2 +- include/linux/netdevice.h | 22 ++++++++++++++++++++-- include/net/geneve.h | 3 +-- include/net/udp_tunnel.h | 6 ++++++ include/net/vxlan.h | 4 ++-- net/ipv4/udp_tunnel.c | 10 ++++++++++ 7 files changed, 41 insertions(+), 8 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index af6f676ca444..aa61708bea69 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1497,7 +1497,7 @@ static int geneve_netdevice_event(struct notifier_block *unused, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - if (event == NETDEV_OFFLOAD_PUSH_GENEVE) + if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) geneve_push_rx_ports(dev); return NOTIFY_DONE; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index adbd979da22d..31aeec967175 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3239,7 +3239,7 @@ static int vxlan_netdevice_event(struct notifier_block *unused, if (event == NETDEV_UNREGISTER) vxlan_handle_lowerdev_unregister(vn, dev); - else if (event == NETDEV_OFFLOAD_PUSH_VXLAN) + else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) vxlan_push_rx_ports(dev); return NOTIFY_DONE; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 890158e99159..577d2a1814b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -61,6 +61,8 @@ struct wireless_dev; /* 802.15.4 specific */ struct wpan_dev; struct mpls_dev; +/* UDP Tunnel offloads */ +struct udp_tunnel_info; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -1050,6 +1052,19 @@ struct tc_to_netdev { * address family that vxlan is not listening to anymore. The operation * is protected by the vxlan_net->sock_lock. * + * void (*ndo_udp_tunnel_add)(struct net_device *dev, + * struct udp_tunnel_info *ti); + * Called by UDP tunnel to notify a driver about the UDP port and socket + * address family that a UDP tunnel is listnening to. It is called only + * when a new port starts listening. The operation is protected by the + * RTNL. + * + * void (*ndo_udp_tunnel_del)(struct net_device *dev, + * struct udp_tunnel_info *ti); + * Called by UDP tunnel to notify the driver about a UDP port and socket + * address family that the UDP tunnel is not listening to anymore. The + * operation is protected by the RTNL. + * * void* (*ndo_dfwd_add_station)(struct net_device *pdev, * struct net_device *dev) * Called by upper layer devices to accelerate switching or other @@ -1269,6 +1284,10 @@ struct net_device_ops { void (*ndo_del_geneve_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); + void (*ndo_udp_tunnel_add)(struct net_device *dev, + struct udp_tunnel_info *ti); + void (*ndo_udp_tunnel_del)(struct net_device *dev, + struct udp_tunnel_info *ti); void* (*ndo_dfwd_add_station)(struct net_device *pdev, struct net_device *dev); void (*ndo_dfwd_del_station)(struct net_device *pdev, @@ -2255,8 +2274,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_BONDING_INFO 0x0019 #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B -#define NETDEV_OFFLOAD_PUSH_VXLAN 0x001C -#define NETDEV_OFFLOAD_PUSH_GENEVE 0x001D +#define NETDEV_UDP_TUNNEL_PUSH_INFO 0x001C int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); diff --git a/include/net/geneve.h b/include/net/geneve.h index f8aff18d6702..3410c4b5a382 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -61,8 +61,7 @@ struct genevehdr { static inline void geneve_get_rx_port(struct net_device *netdev) { - ASSERT_RTNL(); - call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_GENEVE, netdev); + udp_tunnel_get_rx_info(netdev); } #ifdef CONFIG_INET diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 71afbea873a0..1c9408a04213 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -117,6 +117,12 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); +static inline void udp_tunnel_get_rx_info(struct net_device *dev) +{ + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); +} + /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 7d944941f32f..c62e2ed1c3af 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -4,6 +4,7 @@ #include #include #include +#include /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -390,8 +391,7 @@ static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) static inline void vxlan_get_rx_port(struct net_device *netdev) { - ASSERT_RTNL(); - call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_VXLAN, netdev); + udp_tunnel_get_rx_info(netdev); } static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 8174753e6494..683e494d9000 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -79,6 +79,11 @@ EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); static void __udp_tunnel_push_rx_port(struct net_device *dev, struct udp_tunnel_info *ti) { + if (dev->netdev_ops->ndo_udp_tunnel_add) { + dev->netdev_ops->ndo_udp_tunnel_add(dev, ti); + return; + } + switch (ti->type) { case UDP_TUNNEL_TYPE_VXLAN: if (!dev->netdev_ops->ndo_add_vxlan_port) @@ -137,6 +142,11 @@ EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port); static void __udp_tunnel_pull_rx_port(struct net_device *dev, struct udp_tunnel_info *ti) { + if (dev->netdev_ops->ndo_udp_tunnel_del) { + dev->netdev_ops->ndo_udp_tunnel_del(dev, ti); + return; + } + switch (ti->type) { case UDP_TUNNEL_TYPE_VXLAN: if (!dev->netdev_ops->ndo_del_vxlan_port) -- cgit v1.2.3-55-g7522 From 1938ee1fd3de74d761a60806b048df652666afec Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 16 Jun 2016 12:23:12 -0700 Subject: net: Remove deprecated tunnel specific UDP offload functions Now that we have all the drivers using udp_tunnel_get_rx_ports, ndo_add_udp_enc_rx_port, and ndo_del_udp_enc_rx_port we can drop the function calls that were specific to VXLAN and GENEVE. Signed-off-by: Alexander Duyck Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 38 ----------------------- include/net/geneve.h | 5 --- include/net/vxlan.h | 5 --- net/ipv4/udp_tunnel.c | 79 +++++++++-------------------------------------- 4 files changed, 14 insertions(+), 113 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 577d2a1814b1..e84d9d23c2d5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1026,32 +1026,6 @@ struct tc_to_netdev { * not implement this, it is assumed that the hw is not able to have * multiple net devices on single physical port. * - * void (*ndo_add_vxlan_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); - * Called by vxlan to notify a driver about the UDP port and socket - * address family that vxlan is listening to. It is called only when - * a new port starts listening. The operation is protected by the - * vxlan_net->sock_lock. - * - * void (*ndo_add_geneve_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); - * Called by geneve to notify a driver about the UDP port and socket - * address family that geneve is listnening to. It is called only when - * a new port starts listening. The operation is protected by the - * geneve_net->sock_lock. - * - * void (*ndo_del_geneve_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); - * Called by geneve to notify the driver about a UDP port and socket - * address family that geneve is not listening to anymore. The operation - * is protected by the geneve_net->sock_lock. - * - * void (*ndo_del_vxlan_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); - * Called by vxlan to notify the driver about a UDP port and socket - * address family that vxlan is not listening to anymore. The operation - * is protected by the vxlan_net->sock_lock. - * * void (*ndo_udp_tunnel_add)(struct net_device *dev, * struct udp_tunnel_info *ti); * Called by UDP tunnel to notify a driver about the UDP port and socket @@ -1272,18 +1246,6 @@ struct net_device_ops { struct netdev_phys_item_id *ppid); int (*ndo_get_phys_port_name)(struct net_device *dev, char *name, size_t len); - void (*ndo_add_vxlan_port)(struct net_device *dev, - sa_family_t sa_family, - __be16 port); - void (*ndo_del_vxlan_port)(struct net_device *dev, - sa_family_t sa_family, - __be16 port); - void (*ndo_add_geneve_port)(struct net_device *dev, - sa_family_t sa_family, - __be16 port); - void (*ndo_del_geneve_port)(struct net_device *dev, - sa_family_t sa_family, - __be16 port); void (*ndo_udp_tunnel_add)(struct net_device *dev, struct udp_tunnel_info *ti); void (*ndo_udp_tunnel_del)(struct net_device *dev, diff --git a/include/net/geneve.h b/include/net/geneve.h index 3410c4b5a382..ec0327d4331b 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -59,11 +59,6 @@ struct genevehdr { struct geneve_opt options[]; }; -static inline void geneve_get_rx_port(struct net_device *netdev) -{ - udp_tunnel_get_rx_info(netdev); -} - #ifdef CONFIG_INET struct net_device *geneve_dev_create_fb(struct net *net, const char *name, u8 name_assign_type, u16 dst_port); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index c62e2ed1c3af..b96d0360c095 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -389,11 +389,6 @@ static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) return vni_field; } -static inline void vxlan_get_rx_port(struct net_device *netdev) -{ - udp_tunnel_get_rx_info(netdev); -} - static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) { return vs->sock->sk->sk_family; diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 683e494d9000..58bd39fb14b4 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -76,47 +76,20 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); -static void __udp_tunnel_push_rx_port(struct net_device *dev, - struct udp_tunnel_info *ti) -{ - if (dev->netdev_ops->ndo_udp_tunnel_add) { - dev->netdev_ops->ndo_udp_tunnel_add(dev, ti); - return; - } - - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - if (!dev->netdev_ops->ndo_add_vxlan_port) - break; - - dev->netdev_ops->ndo_add_vxlan_port(dev, - ti->sa_family, - ti->port); - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (!dev->netdev_ops->ndo_add_geneve_port) - break; - - dev->netdev_ops->ndo_add_geneve_port(dev, - ti->sa_family, - ti->port); - break; - default: - break; - } -} - void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, unsigned short type) { struct sock *sk = sock->sk; struct udp_tunnel_info ti; + if (!dev->netdev_ops->ndo_udp_tunnel_add) + return; + ti.type = type; ti.sa_family = sk->sk_family; ti.port = inet_sk(sk)->inet_sport; - __udp_tunnel_push_rx_port(dev, &ti); + dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti); } EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port); @@ -133,42 +106,15 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) ti.port = inet_sk(sk)->inet_sport; rcu_read_lock(); - for_each_netdev_rcu(net, dev) - __udp_tunnel_push_rx_port(dev, &ti); + for_each_netdev_rcu(net, dev) { + if (!dev->netdev_ops->ndo_udp_tunnel_add) + continue; + dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti); + } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port); -static void __udp_tunnel_pull_rx_port(struct net_device *dev, - struct udp_tunnel_info *ti) -{ - if (dev->netdev_ops->ndo_udp_tunnel_del) { - dev->netdev_ops->ndo_udp_tunnel_del(dev, ti); - return; - } - - switch (ti->type) { - case UDP_TUNNEL_TYPE_VXLAN: - if (!dev->netdev_ops->ndo_del_vxlan_port) - break; - - dev->netdev_ops->ndo_del_vxlan_port(dev, - ti->sa_family, - ti->port); - break; - case UDP_TUNNEL_TYPE_GENEVE: - if (!dev->netdev_ops->ndo_del_geneve_port) - break; - - dev->netdev_ops->ndo_del_geneve_port(dev, - ti->sa_family, - ti->port); - break; - default: - break; - } -} - /* Notify netdevs that UDP port is no more listening */ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type) { @@ -182,8 +128,11 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type) ti.port = inet_sk(sk)->inet_sport; rcu_read_lock(); - for_each_netdev_rcu(net, dev) - __udp_tunnel_pull_rx_port(dev, &ti); + for_each_netdev_rcu(net, dev) { + if (!dev->netdev_ops->ndo_udp_tunnel_del) + continue; + dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); + } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port); -- cgit v1.2.3-55-g7522 From 08294a26e15d7baf1e14ee569e9f2bc82a7ae768 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 30 Jun 2016 14:45:35 +0800 Subject: net: introduce NETDEV_CHANGE_TX_QUEUE_LEN This patch introduces a new event - NETDEV_CHANGE_TX_QUEUE_LEN, this will be triggered when tx_queue_len. It could be used by net device who want to do some processing at that time. An example is tun who may want to resize tx array when tx_queue_len is changed. Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/net-sysfs.c | 15 ++++++++++++++- net/core/rtnetlink.c | 16 ++++++++++++---- 3 files changed, 27 insertions(+), 5 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e84d9d23c2d5..7dc2ec74122a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2237,6 +2237,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B #define NETDEV_UDP_TUNNEL_PUSH_INFO 0x001C +#define NETDEV_CHANGE_TX_QUEUE_LEN 0x001E int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7a0b616557ab..6e4f34721080 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -322,7 +322,20 @@ NETDEVICE_SHOW_RW(flags, fmt_hex); static int change_tx_queue_len(struct net_device *dev, unsigned long new_len) { - dev->tx_queue_len = new_len; + int res, orig_len = dev->tx_queue_len; + + if (new_len != orig_len) { + dev->tx_queue_len = new_len; + res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev); + res = notifier_to_errno(res); + if (res) { + netdev_err(dev, + "refused to change device tx_queue_len\n"); + dev->tx_queue_len = orig_len; + return -EFAULT; + } + } + return 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cfed7bc14ee6..a9e3805af739 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1927,11 +1927,19 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_TXQLEN]) { unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]); - - if (dev->tx_queue_len ^ value) + unsigned long orig_len = dev->tx_queue_len; + + if (dev->tx_queue_len ^ value) { + dev->tx_queue_len = value; + err = call_netdevice_notifiers( + NETDEV_CHANGE_TX_QUEUE_LEN, dev); + err = notifier_to_errno(err); + if (err) { + dev->tx_queue_len = orig_len; + goto errout; + } status |= DO_SETLINK_NOTIFY; - - dev->tx_queue_len = value; + } } if (tb[IFLA_OPERSTATE]) -- cgit v1.2.3-55-g7522 From 7ce856aaaf13a5dc969ac5f998e5daaf1abe4cd2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 4 Jul 2016 08:23:12 +0200 Subject: mlxsw: spectrum: Add couple of lower device helper functions Add functions that iterate over lower devices and find port device. As a dependency add netdev_for_each_all_lower_dev and netdev_for_each_all_lower_dev_rcu macro with netdev_all_lower_get_next and netdev_all_lower_get_next_rcu shelpers. Also, add functions to return mlxsw struct according to lower device found and mlxsw_port struct with a reference to lower device. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 65 ++++++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 3 ++ include/linux/netdevice.h | 18 +++++++ net/core/dev.c | 46 ++++++++++++++++++ 4 files changed, 127 insertions(+), 5 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index f0799898817d..f54fd6ab6ec9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2567,6 +2567,66 @@ static struct mlxsw_driver mlxsw_sp_driver = { .profile = &mlxsw_sp_config_profile, }; +static bool mlxsw_sp_port_dev_check(const struct net_device *dev) +{ + return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; +} + +static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (mlxsw_sp_port_dev_check(dev)) + return netdev_priv(dev); + + netdev_for_each_all_lower_dev(dev, lower_dev, iter) { + if (mlxsw_sp_port_dev_check(lower_dev)) + return netdev_priv(lower_dev); + } + return NULL; +} + +static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev); + return mlxsw_sp_port ? mlxsw_sp_port->mlxsw_sp : NULL; +} + +static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (mlxsw_sp_port_dev_check(dev)) + return netdev_priv(dev); + + netdev_for_each_all_lower_dev_rcu(dev, lower_dev, iter) { + if (mlxsw_sp_port_dev_check(lower_dev)) + return netdev_priv(lower_dev); + } + return NULL; +} + +struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + rcu_read_lock(); + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev); + if (mlxsw_sp_port) + dev_hold(mlxsw_sp_port->dev); + rcu_read_unlock(); + return mlxsw_sp_port; +} + +void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) +{ + dev_put(mlxsw_sp_port->dev); +} + static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port, u16 fid) { @@ -2647,11 +2707,6 @@ int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, fid); } -static bool mlxsw_sp_port_dev_check(const struct net_device *dev) -{ - return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; -} - static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp, struct net_device *br_dev) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 877a8793fd97..fefff255b30d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -292,6 +292,9 @@ struct mlxsw_sp_port { struct list_head vports_list; }; +struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); +void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); + static inline bool mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7dc2ec74122a..0c6ee2c5099f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3804,12 +3804,30 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter); + #define netdev_for_each_lower_dev(dev, ldev, iter) \ for (iter = (dev)->adj_list.lower.next, \ ldev = netdev_lower_get_next(dev, &(iter)); \ ldev; \ ldev = netdev_lower_get_next(dev, &(iter))) +struct net_device *netdev_all_lower_get_next(struct net_device *dev, + struct list_head **iter); +struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, + struct list_head **iter); + +#define netdev_for_each_all_lower_dev(dev, ldev, iter) \ + for (iter = (dev)->all_adj_list.lower.next, \ + ldev = netdev_all_lower_get_next(dev, &(iter)); \ + ldev; \ + ldev = netdev_all_lower_get_next(dev, &(iter))) + +#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \ + for (iter = (dev)->all_adj_list.lower.next, \ + ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \ + ldev; \ + ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) + void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index aba10d2a8bc3..a4f3b0a9aeaf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5444,6 +5444,52 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) } EXPORT_SYMBOL(netdev_lower_get_next); +/** + * netdev_all_lower_get_next - Get the next device from all lower neighbour list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's all lower neighbour + * list, starting from iter position. The caller must hold RTNL lock or + * its own locking that guarantees that the neighbour all lower + * list will remain unchanged. + */ +struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry(*iter, struct netdev_adjacent, list); + + if (&lower->list == &dev->all_adj_list.lower) + return NULL; + + *iter = lower->list.next; + + return lower->dev; +} +EXPORT_SYMBOL(netdev_all_lower_get_next); + +/** + * netdev_all_lower_get_next_rcu - Get the next device from all + * lower neighbour list, RCU variant + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's all lower neighbour + * list, starting from iter position. The caller must hold RCU read lock. + */ +struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_first_or_null_rcu(&dev->all_adj_list.lower, + struct netdev_adjacent, list); + + return lower ? lower->dev : NULL; +} +EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); + /** * netdev_lower_get_first_private_rcu - Get the first ->private from the * lower neighbour list, RCU -- cgit v1.2.3-55-g7522 From 503eebc265dcf5c512454fd5a6b6673ea4f1d7f2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 5 Jul 2016 11:27:37 +0200 Subject: net: add dev arg to ndo_neigh_construct/destroy As the following patch will allow upper devices to follow the call down lower devices, we need to add dev here and not rely on n->dev. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_main.c | 3 ++- include/linux/netdevice.h | 6 ++++-- net/atm/clip.c | 2 +- net/core/neighbour.c | 4 ++-- net/ieee802154/6lowpan/core.c | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 28b775e5a9ad..f0b09b05ed3f 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1996,7 +1996,8 @@ static int rocker_port_change_proto_down(struct net_device *dev, return 0; } -static void rocker_port_neigh_destroy(struct neighbour *n) +static void rocker_port_neigh_destroy(struct net_device *dev, + struct neighbour *n) { struct rocker_port *rocker_port = netdev_priv(n->dev); int err; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0c6ee2c5099f..91af73c9dd51 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1209,8 +1209,10 @@ struct net_device_ops { netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); - int (*ndo_neigh_construct)(struct neighbour *n); - void (*ndo_neigh_destroy)(struct neighbour *n); + int (*ndo_neigh_construct)(struct net_device *dev, + struct neighbour *n); + void (*ndo_neigh_destroy)(struct net_device *dev, + struct neighbour *n); int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], diff --git a/net/atm/clip.c b/net/atm/clip.c index e07f551a863c..53b4ac09e7b7 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -286,7 +286,7 @@ static const struct neigh_ops clip_neigh_ops = { .connected_output = neigh_direct_output, }; -static int clip_constructor(struct neighbour *neigh) +static int clip_constructor(struct net_device *dev, struct neighbour *neigh) { struct atmarp_entry *entry = neighbour_priv(neigh); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 510cd62fcb99..952aabb5aa56 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -473,7 +473,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, } if (dev->netdev_ops->ndo_neigh_construct) { - error = dev->netdev_ops->ndo_neigh_construct(n); + error = dev->netdev_ops->ndo_neigh_construct(dev, n); if (error < 0) { rc = ERR_PTR(error); goto out_neigh_release; @@ -701,7 +701,7 @@ void neigh_destroy(struct neighbour *neigh) neigh->arp_queue_len_bytes = 0; if (dev->netdev_ops->ndo_neigh_destroy) - dev->netdev_ops->ndo_neigh_destroy(neigh); + dev->netdev_ops->ndo_neigh_destroy(dev, neigh); dev_put(dev); neigh_parms_put(neigh->parms); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 8c004a0c8d64..935ab932e841 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -81,7 +81,7 @@ static int lowpan_stop(struct net_device *dev) return 0; } -static int lowpan_neigh_construct(struct neighbour *n) +static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n) { struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n)); -- cgit v1.2.3-55-g7522 From 18bfb924f0005a728caadd90ba755b2a660bf441 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 5 Jul 2016 11:27:38 +0200 Subject: net: introduce default neigh_construct/destroy ndo calls for L2 upper devices L2 upper device needs to propagate neigh_construct/destroy calls down to lower devices. Do this by defining default ndo functions and use them in team, bond, bridge and vlan. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 ++ drivers/net/team/team.c | 2 ++ include/linux/netdevice.h | 4 ++++ net/8021q/vlan_dev.c | 2 ++ net/bridge/br_device.c | 2 ++ net/core/dev.c | 44 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 56 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 90157e20357e..480d73ac7d1b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4137,6 +4137,8 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_add_slave = bond_enslave, .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index f9eebea83516..a380649bf6b5 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2002,6 +2002,8 @@ static const struct net_device_ops team_netdev_ops = { .ndo_add_slave = team_add_slave, .ndo_del_slave = team_del_slave, .ndo_fix_features = team_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_change_carrier = team_change_carrier, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 91af73c9dd51..49736a31acaa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3845,6 +3845,10 @@ void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); void netdev_lower_state_changed(struct net_device *lower_dev, void *lower_state_info); +int netdev_default_l2upper_neigh_construct(struct net_device *dev, + struct neighbour *n); +void netdev_default_l2upper_neigh_destroy(struct net_device *dev, + struct neighbour *n); /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 86ae75b77390..c8f422c90856 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -790,6 +790,8 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_fdb_add = switchdev_port_fdb_add, .ndo_fdb_del = switchdev_port_fdb_del, .ndo_fdb_dump = switchdev_port_fdb_dump, diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 0c39e0f6da09..8eecd0ec22f2 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -349,6 +349,8 @@ static const struct net_device_ops br_netdev_ops = { .ndo_add_slave = br_add_slave, .ndo_del_slave = br_del_slave, .ndo_fix_features = br_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_fdb_add = br_fdb_add, .ndo_fdb_del = br_fdb_delete, .ndo_fdb_dump = br_fdb_dump, diff --git a/net/core/dev.c b/net/core/dev.c index a4f3b0a9aeaf..b92d63bfde7a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6087,6 +6087,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev, } EXPORT_SYMBOL(netdev_lower_state_changed); +int netdev_default_l2upper_neigh_construct(struct net_device *dev, + struct neighbour *n) +{ + struct net_device *lower_dev, *stop_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (!lower_dev->netdev_ops->ndo_neigh_construct) + continue; + err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n); + if (err) { + stop_dev = lower_dev; + goto rollback; + } + } + return 0; + +rollback: + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (lower_dev == stop_dev) + break; + if (!lower_dev->netdev_ops->ndo_neigh_destroy) + continue; + lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); + } + return err; +} +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct); + +void netdev_default_l2upper_neigh_destroy(struct net_device *dev, + struct neighbour *n) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (!lower_dev->netdev_ops->ndo_neigh_destroy) + continue; + lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); + } +} +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; -- cgit v1.2.3-55-g7522 From a7862b45849fe2f8610a2bec89235580f55d337f Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:48 -0700 Subject: net: add ndo to setup/query xdp prog in adapter rx Add one new netdev op for drivers implementing the BPF_PROG_TYPE_XDP filter. The single op is used for both setup/query of the xdp program, modelled after ndo_setup_tc. Signed-off-by: Brenden Blanco Signed-off-by: David S. Miller --- include/linux/netdevice.h | 34 ++++++++++++++++++++++++++++++++++ net/core/dev.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 49736a31acaa..fab9a1c2a2ac 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,6 +63,7 @@ struct wpan_dev; struct mpls_dev; /* UDP Tunnel offloads */ struct udp_tunnel_info; +struct bpf_prog; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -799,6 +800,33 @@ struct tc_to_netdev { }; }; +/* These structures hold the attributes of xdp state that are being passed + * to the netdevice through the xdp op. + */ +enum xdp_netdev_command { + /* Set or clear a bpf program used in the earliest stages of packet + * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee + * is responsible for calling bpf_prog_put on any old progs that are + * stored. In case of error, the callee need not release the new prog + * reference, but on success it takes ownership and must bpf_prog_put + * when it is no longer used. + */ + XDP_SETUP_PROG, + /* Check if a bpf program is set on the device. The callee should + * return true if a program is currently attached and running. + */ + XDP_QUERY_PROG, +}; + +struct netdev_xdp { + enum xdp_netdev_command command; + union { + /* XDP_SETUP_PROG */ + struct bpf_prog *prog; + /* XDP_QUERY_PROG */ + bool prog_attached; + }; +}; /* * This structure defines the management hooks for network devices. @@ -1087,6 +1115,9 @@ struct tc_to_netdev { * appropriate rx headroom value allows avoiding skb head copy on * forward. Setting a negative value resets the rx headroom to the * default value. + * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); + * This function is used to set or query state related to XDP on the + * netdevice. See definition of enum xdp_netdev_command for details. * */ struct net_device_ops { @@ -1271,6 +1302,8 @@ struct net_device_ops { struct sk_buff *skb); void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); + int (*ndo_xdp)(struct net_device *dev, + struct netdev_xdp *xdp); }; /** @@ -3257,6 +3290,7 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); +int dev_change_xdp_fd(struct net_device *dev, int fd); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/net/core/dev.c b/net/core/dev.c index 7894e406c806..2a9c39f8824e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -94,6 +94,7 @@ #include #include #include +#include #include #include #include @@ -6614,6 +6615,38 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); +/** + * dev_change_xdp_fd - set or clear a bpf program for a device rx path + * @dev: device + * @fd: new program fd or negative value to clear + * + * Set or clear a bpf program for a device + */ +int dev_change_xdp_fd(struct net_device *dev, int fd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct bpf_prog *prog = NULL; + struct netdev_xdp xdp = {}; + int err; + + if (!ops->ndo_xdp) + return -EOPNOTSUPP; + if (fd >= 0) { + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + xdp.command = XDP_SETUP_PROG; + xdp.prog = prog; + err = ops->ndo_xdp(dev, &xdp); + if (err < 0 && prog) + bpf_prog_put(prog); + + return err; +} +EXPORT_SYMBOL(dev_change_xdp_fd); + /** * dev_new_index - allocate an ifindex * @net: the applicable net namespace -- cgit v1.2.3-55-g7522 From b87f7936a93246804cf70e7e2e0568799c948bb1 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Thu, 21 Jul 2016 12:03:12 +0200 Subject: net/sched: Add match-all classifier hw offloading. Following the work that have been done on offloading classifiers like u32 and flower, now the match-all classifier hw offloading is possible. if the interface supports tc offloading. To control the offloading, two tc flags have been introduced: skip_sw and skip_hw. Typical usage: tc filter add dev eth25 parent ffff: \ matchall skip_sw \ action mirred egress mirror \ dev eth27 Signed-off-by: Yotam Gigi Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ include/net/pkt_cls.h | 11 +++++++ include/uapi/linux/pkt_cls.h | 1 + net/sched/cls_matchall.c | 76 ++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 87 insertions(+), 3 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 43c749b1b619..076df5360ba5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -787,6 +787,7 @@ enum { TC_SETUP_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, + TC_SETUP_MATCHALL, }; struct tc_cls_u32_offload; @@ -797,6 +798,7 @@ struct tc_to_netdev { u8 tc; struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; + struct tc_cls_matchall_offload *cls_mall; }; }; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 3722dda0199d..6f8d65342d3a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -442,4 +442,15 @@ struct tc_cls_flower_offload { struct tcf_exts *exts; }; +enum tc_matchall_command { + TC_CLSMATCHALL_REPLACE, + TC_CLSMATCHALL_DESTROY, +}; + +struct tc_cls_matchall_offload { + enum tc_matchall_command command; + struct tcf_exts *exts; + unsigned long cookie; +}; + #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index a32494887e01..d1c1ccaba787 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -439,6 +439,7 @@ enum { TCA_MATCHALL_UNSPEC, TCA_MATCHALL_CLASSID, TCA_MATCHALL_ACT, + TCA_MATCHALL_FLAGS, __TCA_MATCHALL_MAX, }; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 8a6b4de7a99a..25927b6c4436 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -21,6 +21,7 @@ struct cls_mall_filter { struct tcf_result res; u32 handle; struct rcu_head rcu; + u32 flags; }; struct cls_mall_head { @@ -34,6 +35,9 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_mall_head *head = rcu_dereference_bh(tp->root); struct cls_mall_filter *f = head->filter; + if (tc_skip_sw(f->flags)) + return -1; + return tcf_exts_exec(skb, &f->exts, res); } @@ -55,18 +59,61 @@ static void mall_destroy_filter(struct rcu_head *head) struct cls_mall_filter *f = container_of(head, struct cls_mall_filter, rcu); tcf_exts_destroy(&f->exts); + kfree(f); } +static int mall_replace_hw_filter(struct tcf_proto *tp, + struct cls_mall_filter *f, + unsigned long cookie) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_to_netdev offload; + struct tc_cls_matchall_offload mall_offload = {0}; + + offload.type = TC_SETUP_MATCHALL; + offload.cls_mall = &mall_offload; + offload.cls_mall->command = TC_CLSMATCHALL_REPLACE; + offload.cls_mall->exts = &f->exts; + offload.cls_mall->cookie = cookie; + + return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, + &offload); +} + +static void mall_destroy_hw_filter(struct tcf_proto *tp, + struct cls_mall_filter *f, + unsigned long cookie) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_to_netdev offload; + struct tc_cls_matchall_offload mall_offload = {0}; + + offload.type = TC_SETUP_MATCHALL; + offload.cls_mall = &mall_offload; + offload.cls_mall->command = TC_CLSMATCHALL_DESTROY; + offload.cls_mall->exts = NULL; + offload.cls_mall->cookie = cookie; + + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, + &offload); +} + static bool mall_destroy(struct tcf_proto *tp, bool force) { struct cls_mall_head *head = rtnl_dereference(tp->root); + struct net_device *dev = tp->q->dev_queue->dev; + struct cls_mall_filter *f = head->filter; - if (!force && head->filter) + if (!force && f) return false; - if (head->filter) - call_rcu(&head->filter->rcu, mall_destroy_filter); + if (f) { + if (tc_should_offload(dev, tp, f->flags)) + mall_destroy_hw_filter(tp, f, (unsigned long) f); + + call_rcu(&f->rcu, mall_destroy_filter); + } RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; @@ -117,8 +164,10 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, { struct cls_mall_head *head = rtnl_dereference(tp->root); struct cls_mall_filter *fold = (struct cls_mall_filter *) *arg; + struct net_device *dev = tp->q->dev_queue->dev; struct cls_mall_filter *f; struct nlattr *tb[TCA_MATCHALL_MAX + 1]; + u32 flags = 0; int err; if (!tca[TCA_OPTIONS]) @@ -135,6 +184,12 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; + if (tb[TCA_MATCHALL_FLAGS]) { + flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]); + if (!tc_flags_valid(flags)) + return -EINVAL; + } + f = kzalloc(sizeof(*f), GFP_KERNEL); if (!f) return -ENOBUFS; @@ -144,11 +199,22 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (!handle) handle = 1; f->handle = handle; + f->flags = flags; err = mall_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); if (err) goto errout; + if (tc_should_offload(dev, tp, flags)) { + err = mall_replace_hw_filter(tp, f, (unsigned long) f); + if (err) { + if (tc_skip_sw(flags)) + goto errout; + else + err = 0; + } + } + *arg = (unsigned long) f; rcu_assign_pointer(head->filter, f); @@ -163,6 +229,10 @@ static int mall_delete(struct tcf_proto *tp, unsigned long arg) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct cls_mall_filter *f = (struct cls_mall_filter *) arg; + struct net_device *dev = tp->q->dev_queue->dev; + + if (tc_should_offload(dev, tp, f->flags)) + mall_destroy_hw_filter(tp, f, (unsigned long) f); RCU_INIT_POINTER(head->filter, NULL); tcf_unbind_filter(tp, &f->res); -- cgit v1.2.3-55-g7522