summaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c593
1 files changed, 431 insertions, 162 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 11596a302a26..dda9d7b9a840 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
+#include <linux/net_namespace.h>
#include "net-sysfs.h"
@@ -162,7 +163,6 @@ static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info);
static struct napi_struct *napi_by_id(unsigned int napi_id);
@@ -188,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id);
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+static DEFINE_MUTEX(ifalias_mutex);
+
/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);
@@ -1062,7 +1064,10 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
unsigned long *inuse;
struct net_device *d;
- p = strnchr(name, IFNAMSIZ-1, '%');
+ if (!dev_valid_name(name))
+ return -EINVAL;
+
+ p = strchr(name, '%');
if (p) {
/*
* Verify the string as this thing may have come from
@@ -1093,8 +1098,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
free_page((unsigned long) inuse);
}
- if (buf != name)
- snprintf(buf, IFNAMSIZ, name, i);
+ snprintf(buf, IFNAMSIZ, name, i);
if (!__dev_get_by_name(net, buf))
return i;
@@ -1105,6 +1109,20 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
return -ENFILE;
}
+static int dev_alloc_name_ns(struct net *net,
+ struct net_device *dev,
+ const char *name)
+{
+ char buf[IFNAMSIZ];
+ int ret;
+
+ BUG_ON(!net);
+ ret = __dev_alloc_name(net, name, buf);
+ if (ret >= 0)
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return ret;
+}
+
/**
* dev_alloc_name - allocate a name for a device
* @dev: device
@@ -1121,32 +1139,10 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
int dev_alloc_name(struct net_device *dev, const char *name)
{
- char buf[IFNAMSIZ];
- struct net *net;
- int ret;
-
- BUG_ON(!dev_net(dev));
- net = dev_net(dev);
- ret = __dev_alloc_name(net, name, buf);
- if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
- return ret;
+ return dev_alloc_name_ns(dev_net(dev), dev, name);
}
EXPORT_SYMBOL(dev_alloc_name);
-static int dev_alloc_name_ns(struct net *net,
- struct net_device *dev,
- const char *name)
-{
- char buf[IFNAMSIZ];
- int ret;
-
- ret = __dev_alloc_name(net, name, buf);
- if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
- return ret;
-}
-
int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
@@ -1265,29 +1261,53 @@ rollback:
*/
int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
{
- char *new_ifalias;
-
- ASSERT_RTNL();
+ struct dev_ifalias *new_alias = NULL;
if (len >= IFALIASZ)
return -EINVAL;
- if (!len) {
- kfree(dev->ifalias);
- dev->ifalias = NULL;
- return 0;
+ if (len) {
+ new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
+ if (!new_alias)
+ return -ENOMEM;
+
+ memcpy(new_alias->ifalias, alias, len);
+ new_alias->ifalias[len] = 0;
}
- new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
- if (!new_ifalias)
- return -ENOMEM;
- dev->ifalias = new_ifalias;
- memcpy(dev->ifalias, alias, len);
- dev->ifalias[len] = 0;
+ mutex_lock(&ifalias_mutex);
+ rcu_swap_protected(dev->ifalias, new_alias,
+ mutex_is_locked(&ifalias_mutex));
+ mutex_unlock(&ifalias_mutex);
+
+ if (new_alias)
+ kfree_rcu(new_alias, rcuhead);
return len;
}
+/**
+ * dev_get_alias - get ifalias of a device
+ * @dev: device
+ * @name: buffer to store name of ifalias
+ * @len: size of buffer
+ *
+ * get ifalias for a device. Caller must make sure dev cannot go
+ * away, e.g. rcu read lock or own a reference count to device.
+ */
+int dev_get_alias(const struct net_device *dev, char *name, size_t len)
+{
+ const struct dev_ifalias *alias;
+ int ret = 0;
+
+ rcu_read_lock();
+ alias = rcu_dereference(dev->ifalias);
+ if (alias)
+ ret = snprintf(name, len, "%s", alias->ifalias);
+ rcu_read_unlock();
+
+ return ret;
+}
/**
* netdev_features_change - device changes features
@@ -1312,10 +1332,11 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info.dev = dev,
+ };
- change_info.flags_changed = 0;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGE,
&change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
@@ -1533,12 +1554,30 @@ void dev_disable_lro(struct net_device *dev)
}
EXPORT_SYMBOL(dev_disable_lro);
+/**
+ * dev_disable_gro_hw - disable HW Generic Receive Offload on a device
+ * @dev: device
+ *
+ * Disable HW Generic Receive Offload (GRO_HW) on a net device. Must be
+ * called under RTNL. This is needed if Generic XDP is installed on
+ * the device.
+ */
+static void dev_disable_gro_hw(struct net_device *dev)
+{
+ dev->wanted_features &= ~NETIF_F_GRO_HW;
+ netdev_update_features(dev);
+
+ if (unlikely(dev->features & NETIF_F_GRO_HW))
+ netdev_WARN(dev, "failed to disable GRO_HW!\n");
+}
+
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- netdev_notifier_info_init(&info, dev);
return nb->notifier_call(nb, val, &info);
}
@@ -1655,7 +1694,6 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
/**
* call_netdevice_notifiers_info - call all network notifier blocks
* @val: value passed unmodified to notifier function
- * @dev: net_device pointer passed unmodified to notifier function
* @info: notifier information data
*
* Call all network notifier blocks. Parameters and return value
@@ -1663,11 +1701,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
*/
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info)
{
ASSERT_RTNL();
- netdev_notifier_info_init(info, dev);
return raw_notifier_call_chain(&netdev_chain, val, info);
}
@@ -1682,9 +1718,11 @@ static int call_netdevice_notifiers_info(unsigned long val,
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- return call_netdevice_notifiers_info(val, dev, &info);
+ return call_netdevice_notifiers_info(val, &info);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -2012,6 +2050,7 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
return 0;
}
+EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS
static DEFINE_MUTEX(xps_map_mutex);
@@ -2735,7 +2774,8 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
{
if (tx_path)
- return skb->ip_summed != CHECKSUM_PARTIAL;
+ return skb->ip_summed != CHECKSUM_PARTIAL &&
+ skb->ip_summed != CHECKSUM_UNNECESSARY;
return skb->ip_summed == CHECKSUM_NONE;
}
@@ -2791,7 +2831,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
segs = skb_mac_gso_segment(skb, features);
- if (unlikely(skb_needs_check(skb, tx_path)))
+ if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
skb_warn_bad_offload(skb);
return segs;
@@ -3030,7 +3070,7 @@ int skb_csum_hwoffload_help(struct sk_buff *skb,
}
EXPORT_SYMBOL(skb_csum_hwoffload_help);
-static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)
{
netdev_features_t features;
@@ -3054,9 +3094,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
__skb_linearize(skb))
goto out_kfree_skb;
- if (validate_xmit_xfrm(skb, features))
- goto out_kfree_skb;
-
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
* checksumming here.
@@ -3073,6 +3110,8 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
}
}
+ skb = validate_xmit_xfrm(skb, features, again);
+
return skb;
out_kfree_skb:
@@ -3082,7 +3121,7 @@ out_null:
return NULL;
}
-struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again)
{
struct sk_buff *next, *head = NULL, *tail;
@@ -3093,7 +3132,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
/* in case skb wont be segmented, point to itself */
skb->prev = skb;
- skb = validate_xmit_skb(skb, dev);
+ skb = validate_xmit_skb(skb, dev, again);
if (!skb)
continue;
@@ -3127,10 +3166,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
/* + transport layer */
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- hdr_len += tcp_hdrlen(skb);
- else
- hdr_len += sizeof(struct udphdr);
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+ const struct tcphdr *th;
+ struct tcphdr _tcphdr;
+
+ th = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_tcphdr), &_tcphdr);
+ if (likely(th))
+ hdr_len += __tcp_hdrlen(th);
+ } else {
+ struct udphdr _udphdr;
+
+ if (skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_udphdr), &_udphdr))
+ hdr_len += sizeof(struct udphdr);
+ }
if (shinfo->gso_type & SKB_GSO_DODGY)
gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
@@ -3150,6 +3200,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
int rc;
qdisc_calculate_pkt_len(skb, q);
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+ __qdisc_drop(skb, &to_free);
+ rc = NET_XMIT_DROP;
+ } else {
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ __qdisc_run(q);
+ }
+
+ if (unlikely(to_free))
+ kfree_skb_list(to_free);
+ return rc;
+ }
+
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
@@ -3180,9 +3245,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
contended = false;
}
__qdisc_run(q);
- } else
- qdisc_run_end(q);
+ }
+ qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
@@ -3192,6 +3257,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
contended = false;
}
__qdisc_run(q);
+ qdisc_run_end(q);
}
}
spin_unlock(root_lock);
@@ -3245,22 +3311,22 @@ EXPORT_SYMBOL(dev_loopback_xmit);
static struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
- struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
- if (!cl)
+ if (!miniq)
return skb;
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
*ret = NET_XMIT_DROP;
kfree_skb(skb);
return NULL;
@@ -3364,8 +3430,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
else
queue_index = __netdev_pick_tx(dev, skb);
- if (!accel_priv)
- queue_index = netdev_cap_txqueue(dev, queue_index);
+ queue_index = netdev_cap_txqueue(dev, queue_index);
}
skb_set_queue_mapping(skb, queue_index);
@@ -3404,6 +3469,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
struct netdev_queue *txq;
struct Qdisc *q;
int rc = -ENOMEM;
+ bool again = false;
skb_reset_mac_header(skb);
@@ -3465,7 +3531,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
XMIT_RECURSION_LIMIT))
goto recursion_alert;
- skb = validate_xmit_skb(skb, dev);
+ skb = validate_xmit_skb(skb, dev, &again);
if (!skb)
goto out;
@@ -3725,7 +3791,7 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (flow_table && flow_id <= flow_table->mask) {
rflow = &flow_table->flows[flow_id];
- cpu = ACCESS_ONCE(rflow->cpu);
+ cpu = READ_ONCE(rflow->cpu);
if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
((int)(per_cpu(softnet_data, cpu).input_queue_head -
rflow->last_qtail) <
@@ -3861,11 +3927,35 @@ drop:
return NET_RX_DROP;
}
+static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct netdev_rx_queue *rxqueue;
+
+ rxqueue = dev->_rx;
+
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+
+ if (unlikely(index >= dev->real_num_rx_queues)) {
+ WARN_ONCE(dev->real_num_rx_queues > 1,
+ "%s received packet on queue %u, but number "
+ "of RX queues is %u\n",
+ dev->name, index, dev->real_num_rx_queues);
+
+ return rxqueue; /* Return first rxqueue */
+ }
+ rxqueue += index;
+ }
+ return rxqueue;
+}
+
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
+ struct netdev_rx_queue *rxqueue;
+ u32 metalen, act = XDP_DROP;
struct xdp_buff xdp;
- u32 act = XDP_DROP;
void *orig_data;
int hlen, off;
u32 mac_len;
@@ -3876,8 +3966,25 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (skb_cloned(skb))
return XDP_PASS;
- if (skb_linearize(skb))
- goto do_drop;
+ /* XDP packets must be linear and must have sufficient headroom
+ * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+ * native XDP provides, thus we need to do it here as well.
+ */
+ if (skb_is_nonlinear(skb) ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ int troom = skb->tail + skb->data_len - skb->end;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ if (pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+ goto do_drop;
+ if (skb_linearize(skb))
+ goto do_drop;
+ }
/* The XDP program wants to see the packet starting at the MAC
* header.
@@ -3885,10 +3992,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
mac_len = skb->data - skb_mac_header(skb);
hlen = skb_headlen(skb) + mac_len;
xdp.data = skb->data - mac_len;
+ xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + hlen;
xdp.data_hard_start = skb->data - skb_headroom(skb);
orig_data = xdp.data;
+ rxqueue = netif_get_rxqueue(skb);
+ xdp.rxq = &rxqueue->xdp_rxq;
+
act = bpf_prog_run_xdp(xdp_prog, &xdp);
off = xdp.data - orig_data;
@@ -3902,10 +4013,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
case XDP_REDIRECT:
case XDP_TX:
__skb_push(skb, mac_len);
- /* fall through */
+ break;
case XDP_PASS:
+ metalen = xdp.data - xdp.data_meta;
+ if (metalen)
+ skb_metadata_set(skb, metalen);
break;
-
default:
bpf_warn_invalid_xdp_action(act);
/* fall through */
@@ -4111,21 +4224,26 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
while (head) {
struct Qdisc *q = head;
- spinlock_t *root_lock;
+ spinlock_t *root_lock = NULL;
head = head->next_sched;
- root_lock = qdisc_lock(q);
- spin_lock(root_lock);
+ if (!(q->flags & TCQ_F_NOLOCK)) {
+ root_lock = qdisc_lock(q);
+ spin_lock(root_lock);
+ }
/* We need to make sure head->next_sched is read
* before clearing __QDISC_STATE_SCHED
*/
smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED, &q->state);
qdisc_run(q);
- spin_unlock(root_lock);
+ if (root_lock)
+ spin_unlock(root_lock);
}
}
+
+ xfrm_dev_backlog(sd);
}
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
@@ -4140,7 +4258,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
#ifdef CONFIG_NET_CLS_ACT
- struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
struct tcf_result cl_res;
/* If there's at least one ingress present somewhere (so
@@ -4148,8 +4266,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
* that are not configured with an ingress qdisc will bail
* out here.
*/
- if (!cl)
+ if (!miniq)
return skb;
+
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
@@ -4157,15 +4276,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
qdisc_skb_cb(skb)->pkt_len = skb->len;
skb->tc_at_ingress = 1;
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
kfree_skb(skb);
return NULL;
case TC_ACT_STOLEN:
@@ -4443,6 +4562,33 @@ out:
return ret;
}
+/**
+ * netif_receive_skb_core - special purpose version of netif_receive_skb
+ * @skb: buffer to process
+ *
+ * More direct receive version of netif_receive_skb(). It should
+ * only be used by callers that have a need to skip RPS and Generic XDP.
+ * Caller must also take care of handling if (page_is_)pfmemalloc.
+ *
+ * This function may only be called from softirq context and interrupts
+ * should be enabled.
+ *
+ * Return values (usually ignored):
+ * NET_RX_SUCCESS: no congestion
+ * NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb_core(struct sk_buff *skb)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __netif_receive_skb_core(skb, false);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(netif_receive_skb_core);
+
static int __netif_receive_skb(struct sk_buff *skb)
{
int ret;
@@ -4468,7 +4614,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
return ret;
}
-static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
{
struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
struct bpf_prog *new = xdp->prog;
@@ -4485,6 +4631,7 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
} else if (new && !old) {
static_key_slow_inc(&generic_xdp_needed);
dev_disable_lro(dev);
+ dev_disable_gro_hw(dev);
}
break;
@@ -4695,6 +4842,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= p->vlan_tci ^ skb->vlan_tci;
diffs |= skb_metadata_dst_cmp(p, skb);
+ diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
skb_mac_header(skb));
@@ -6228,9 +6376,19 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
- void *upper_priv, void *upper_info)
-{
- struct netdev_notifier_changeupper_info changeupper_info;
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ .extack = extack,
+ },
+ .upper_dev = upper_dev,
+ .master = master,
+ .linking = true,
+ .upper_info = upper_info,
+ };
int ret = 0;
ASSERT_RTNL();
@@ -6248,12 +6406,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- changeupper_info.upper_dev = upper_dev;
- changeupper_info.master = master;
- changeupper_info.linking = true;
- changeupper_info.upper_info = upper_info;
-
- ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6264,7 +6417,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
return ret;
- ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6282,6 +6435,7 @@ rollback:
* netdev_upper_dev_link - Add a link to the upper device
* @dev: device
* @upper_dev: new upper device
+ * @extack: netlink extended ack
*
* Adds a link to device which is upper to this one. The caller must hold
* the RTNL lock. On a failure a negative errno code is returned.
@@ -6289,9 +6443,11 @@ rollback:
* returns zero.
*/
int netdev_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ struct netlink_ext_ack *extack)
{
- return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, false,
+ NULL, NULL, extack);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -6301,6 +6457,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
* @upper_dev: new upper device
* @upper_priv: upper device private
* @upper_info: upper info to be passed down via notifier
+ * @extack: netlink extended ack
*
* Adds a link to device which is upper to this one. In this case, only
* one master upper device can be linked, although other non-master devices
@@ -6310,10 +6467,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
*/
int netdev_master_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev,
- void *upper_priv, void *upper_info)
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
{
return __netdev_upper_dev_link(dev, upper_dev, true,
- upper_priv, upper_info);
+ upper_priv, upper_info, extack);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
@@ -6328,20 +6486,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- struct netdev_notifier_changeupper_info changeupper_info;
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ },
+ .upper_dev = upper_dev,
+ .linking = false,
+ };
ASSERT_RTNL();
- changeupper_info.upper_dev = upper_dev;
changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
- changeupper_info.linking = false;
- call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -6357,11 +6519,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink);
void netdev_bonding_info_change(struct net_device *dev,
struct netdev_bonding_info *bonding_info)
{
- struct netdev_notifier_bonding_info info;
+ struct netdev_notifier_bonding_info info = {
+ .info.dev = dev,
+ };
memcpy(&info.bonding_info, bonding_info,
sizeof(struct netdev_bonding_info));
- call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+ call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
&info.info);
}
EXPORT_SYMBOL(netdev_bonding_info_change);
@@ -6487,11 +6651,13 @@ EXPORT_SYMBOL(dev_get_nest_level);
void netdev_lower_state_changed(struct net_device *lower_dev,
void *lower_state_info)
{
- struct netdev_notifier_changelowerstate_info changelowerstate_info;
+ struct netdev_notifier_changelowerstate_info changelowerstate_info = {
+ .info.dev = lower_dev,
+ };
ASSERT_RTNL();
changelowerstate_info.lower_state_info = lower_state_info;
- call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
&changelowerstate_info.info);
}
EXPORT_SYMBOL(netdev_lower_state_changed);
@@ -6782,11 +6948,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
if (dev->flags & IFF_UP &&
(changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info = {
+ .dev = dev,
+ },
+ .flags_changed = changes,
+ };
- change_info.flags_changed = changes;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
- &change_info.info);
+ call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
}
}
@@ -6879,6 +7048,35 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
EXPORT_SYMBOL(dev_set_mtu);
/**
+ * dev_change_tx_queue_len - Change TX queue length of a netdevice
+ * @dev: device
+ * @new_len: new tx queue length
+ */
+int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
+{
+ unsigned int orig_len = dev->tx_queue_len;
+ int res;
+
+ if (new_len != (unsigned int)new_len)
+ return -ERANGE;
+
+ if (new_len != orig_len) {
+ dev->tx_queue_len = new_len;
+ res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+ res = notifier_to_errno(res);
+ if (res) {
+ netdev_err(dev,
+ "refused to change device tx_queue_len\n");
+ dev->tx_queue_len = orig_len;
+ return res;
+ }
+ return dev_qdisc_change_tx_queue_len(dev);
+ }
+
+ return 0;
+}
+
+/**
* dev_set_group - Change group this device belongs to
* @dev: device
* @new_group: group this device should belong to
@@ -6993,26 +7191,30 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
-u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id)
+void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
+ struct netdev_bpf *xdp)
{
- struct netdev_xdp xdp;
-
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
+ memset(xdp, 0, sizeof(*xdp));
+ xdp->command = XDP_QUERY_PROG;
/* Query must always succeed. */
- WARN_ON(xdp_op(dev, &xdp) < 0);
- if (prog_id)
- *prog_id = xdp.prog_id;
+ WARN_ON(bpf_op(dev, xdp) < 0);
+}
+
+static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
+{
+ struct netdev_bpf xdp;
+
+ __dev_xdp_query(dev, bpf_op, &xdp);
return xdp.prog_attached;
}
-static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
struct netlink_ext_ack *extack, u32 flags,
struct bpf_prog *prog)
{
- struct netdev_xdp xdp;
+ struct netdev_bpf xdp;
memset(&xdp, 0, sizeof(xdp));
if (flags & XDP_FLAGS_HW_MODE)
@@ -7023,7 +7225,28 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
xdp.flags = flags;
xdp.prog = prog;
- return xdp_op(dev, &xdp);
+ return bpf_op(dev, &xdp);
+}
+
+static void dev_xdp_uninstall(struct net_device *dev)
+{
+ struct netdev_bpf xdp;
+ bpf_op_t ndo_bpf;
+
+ /* Remove generic XDP */
+ WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
+
+ /* Remove from the driver */
+ ndo_bpf = dev->netdev_ops->ndo_bpf;
+ if (!ndo_bpf)
+ return;
+
+ __dev_xdp_query(dev, ndo_bpf, &xdp);
+ if (xdp.prog_attached == XDP_ATTACHED_NONE)
+ return;
+
+ /* Program removal should always succeed */
+ WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL));
}
/**
@@ -7040,32 +7263,40 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
{
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
- xdp_op_t xdp_op, xdp_chk;
+ bpf_op_t bpf_op, bpf_chk;
int err;
ASSERT_RTNL();
- xdp_op = xdp_chk = ops->ndo_xdp;
- if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
+ bpf_op = bpf_chk = ops->ndo_bpf;
+ if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
return -EOPNOTSUPP;
- if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
- xdp_op = generic_xdp_install;
- if (xdp_op == xdp_chk)
- xdp_chk = generic_xdp_install;
+ if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
+ bpf_op = generic_xdp_install;
+ if (bpf_op == bpf_chk)
+ bpf_chk = generic_xdp_install;
if (fd >= 0) {
- if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL))
+ if (bpf_chk && __dev_xdp_attached(dev, bpf_chk))
return -EEXIST;
if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- __dev_xdp_attached(dev, xdp_op, NULL))
+ __dev_xdp_attached(dev, bpf_op))
return -EBUSY;
- prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+ bpf_op == ops->ndo_bpf);
if (IS_ERR(prog))
return PTR_ERR(prog);
+
+ if (!(flags & XDP_FLAGS_HW_MODE) &&
+ bpf_prog_is_dev_bound(prog->aux)) {
+ NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported");
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
}
- err = dev_xdp_install(dev, xdp_op, extack, flags, prog);
+ err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
if (err < 0 && prog)
bpf_prog_put(prog);
@@ -7148,6 +7379,7 @@ static void rollback_registered_many(struct list_head *head)
/* Shutdown queueing discipline. */
dev_shutdown(dev);
+ dev_xdp_uninstall(dev);
/* Notify protocols, that we are about to destroy
* this device. They should clean all the things.
@@ -7157,7 +7389,7 @@ static void rollback_registered_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL);
+ GFP_KERNEL, NULL, 0);
/*
* Flush the unicast and multicast chains
@@ -7291,6 +7523,18 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~dev->gso_partial_features;
}
+ if (!(features & NETIF_F_RXCSUM)) {
+ /* NETIF_F_GRO_HW implies doing RXCSUM since every packet
+ * successfully merged by hardware must also have the
+ * checksum verified by hardware. If the user does not
+ * want to enable RXCSUM, logically, we should disable GRO_HW.
+ */
+ if (features & NETIF_F_GRO_HW) {
+ netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
return features;
}
@@ -7424,12 +7668,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);
-#ifdef CONFIG_SYSFS
static int netif_alloc_rx_queues(struct net_device *dev)
{
unsigned int i, count = dev->num_rx_queues;
struct netdev_rx_queue *rx;
size_t sz = count * sizeof(*rx);
+ int err = 0;
BUG_ON(count < 1);
@@ -7439,11 +7683,38 @@ static int netif_alloc_rx_queues(struct net_device *dev)
dev->_rx = rx;
- for (i = 0; i < count; i++)
+ for (i = 0; i < count; i++) {
rx[i].dev = dev;
+
+ /* XDP RX-queue setup */
+ err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
+ if (err < 0)
+ goto err_rxq_info;
+ }
return 0;
+
+err_rxq_info:
+ /* Rollback successful reg's and free other resources */
+ while (i--)
+ xdp_rxq_info_unreg(&rx[i].xdp_rxq);
+ kvfree(dev->_rx);
+ dev->_rx = NULL;
+ return err;
+}
+
+static void netif_free_rx_queues(struct net_device *dev)
+{
+ unsigned int i, count = dev->num_rx_queues;
+
+ /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
+ if (!dev->_rx)
+ return;
+
+ for (i = 0; i < count; i++)
+ xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq);
+
+ kvfree(dev->_rx);
}
-#endif
static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue, void *_unused)
@@ -7994,7 +8265,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
- size_t alloc_size;
+ unsigned int alloc_size;
struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -8004,12 +8275,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
return NULL;
}
-#ifdef CONFIG_SYSFS
if (rxqs < 1) {
pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
return NULL;
}
-#endif
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
@@ -8066,12 +8335,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (netif_alloc_netdev_queues(dev))
goto free_all;
-#ifdef CONFIG_SYSFS
dev->num_rx_queues = rxqs;
dev->real_num_rx_queues = rxqs;
if (netif_alloc_rx_queues(dev))
goto free_all;
-#endif
strcpy(dev->name, name);
dev->name_assign_type = name_assign_type;
@@ -8107,13 +8374,10 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
- struct bpf_prog *prog;
might_sleep();
netif_free_tx_queues(dev);
-#ifdef CONFIG_SYSFS
- kvfree(dev->_rx);
-#endif
+ netif_free_rx_queues(dev);
kfree(rcu_dereference_protected(dev->ingress_queue, 1));
@@ -8126,12 +8390,6 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
- prog = rcu_dereference_protected(dev->xdp_prog, 1);
- if (prog) {
- bpf_prog_put(prog);
- static_key_slow_dec(&generic_xdp_needed);
- }
-
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
@@ -8244,7 +8502,7 @@ EXPORT_SYMBOL(unregister_netdev);
int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
{
- int err;
+ int err, new_nsid, new_ifindex;
ASSERT_RTNL();
@@ -8300,7 +8558,16 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+
+ new_nsid = peernet2id_alloc(dev_net(dev), net);
+ /* If there is an ifindex conflict assign a new one */
+ if (__dev_get_by_index(net, dev->ifindex))
+ new_ifindex = dev_new_index(net);
+ else
+ new_ifindex = dev->ifindex;
+
+ rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
+ new_ifindex);
/*
* Flush the unicast and multicast chains
@@ -8314,10 +8581,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* Actually switch the network namespace */
dev_net_set(dev, net);
-
- /* If there is an ifindex conflict assign a new one */
- if (__dev_get_by_index(net, dev->ifindex))
- dev->ifindex = dev_new_index(net);
+ dev->ifindex = new_ifindex;
/* Send a netdev-add uevent to the new namespace */
kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
@@ -8562,6 +8826,8 @@ static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
+ if (net != &init_net)
+ WARN_ON_ONCE(!list_empty(&net->dev_base_head));
}
static struct pernet_operations __net_initdata netdev_net_ops = {
@@ -8713,6 +8979,9 @@ static int __init net_dev_init(void)
skb_queue_head_init(&sd->input_pkt_queue);
skb_queue_head_init(&sd->process_queue);
+#ifdef CONFIG_XFRM_OFFLOAD
+ skb_queue_head_init(&sd->xfrm_backlog);
+#endif
INIT_LIST_HEAD(&sd->poll_list);
sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS