summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c14
-rw-r--r--net/core/dev.c1001
-rw-r--r--net/core/drop_monitor.c33
-rw-r--r--net/core/dst.c41
-rw-r--r--net/core/ethtool.c203
-rw-r--r--net/core/fib_rules.c37
-rw-r--r--net/core/filter.c226
-rw-r--r--net/core/flow.c91
-rw-r--r--net/core/gen_estimator.c17
-rw-r--r--net/core/gen_stats.c14
-rw-r--r--net/core/iovec.c32
-rw-r--r--net/core/link_watch.c1
-rw-r--r--net/core/neighbour.c486
-rw-r--r--net/core/net-sysfs.c78
-rw-r--r--net/core/net-sysfs.h4
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/netevent.c5
-rw-r--r--net/core/netpoll.c186
-rw-r--r--net/core/pktgen.c261
-rw-r--r--net/core/rtnetlink.c50
-rw-r--r--net/core/scm.c33
-rw-r--r--net/core/skbuff.c115
-rw-r--r--net/core/sock.c68
-rw-r--r--net/core/stream.c14
-rw-r--r--net/core/sysctl_net_core.c3
-rw-r--r--net/core/timestamping.c126
-rw-r--r--net/core/utils.c18
29 files changed, 1975 insertions, 1189 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 51c3eec850ef..8a04dd22cf77 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,4 +18,4 @@ obj-$(CONFIG_NET_DMA) += user_dma.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
obj-$(CONFIG_TRACEPOINTS) += net-traces.o
obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
-
+obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f5b6f43a4c2e..cd1e039c8755 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -219,6 +219,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, err);
}
+EXPORT_SYMBOL(skb_recv_datagram);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{
@@ -242,6 +243,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
unlock_sock_fast(sk, slow);
/* skb is now orphaned, can be freed outside of locked section */
+ trace_kfree_skb(skb, skb_free_datagram_locked);
__kfree_skb(skb);
}
EXPORT_SYMBOL(skb_free_datagram_locked);
@@ -288,7 +290,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
return err;
}
-
EXPORT_SYMBOL(skb_kill_datagram);
/**
@@ -373,6 +374,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
fault:
return -EFAULT;
}
+EXPORT_SYMBOL(skb_copy_datagram_iovec);
/**
* skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
@@ -716,6 +718,7 @@ csum_error:
fault:
return -EFAULT;
}
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
/**
* datagram_poll - generic datagram poll
@@ -744,13 +747,12 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP;
+ mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- (sk->sk_shutdown & RCV_SHUTDOWN))
+ if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
/* Connection-based need to check for termination and startup */
@@ -770,8 +772,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
return mask;
}
-
EXPORT_SYMBOL(datagram_poll);
-EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
-EXPORT_SYMBOL(skb_copy_datagram_iovec);
-EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1f466e82ac33..35dfb8318483 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -101,8 +101,6 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stat.h>
-#include <linux/if_bridge.h>
-#include <linux/if_macvlan.h>
#include <net/dst.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
@@ -130,7 +128,10 @@
#include <linux/jhash.h>
#include <linux/random.h>
#include <trace/events/napi.h>
+#include <trace/events/net.h>
+#include <trace/events/skb.h>
#include <linux/pci.h>
+#include <linux/inetdevice.h>
#include "net-sysfs.h"
@@ -373,6 +374,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
* --ANK (980803)
*/
+static inline struct list_head *ptype_head(const struct packet_type *pt)
+{
+ if (pt->type == htons(ETH_P_ALL))
+ return &ptype_all;
+ else
+ return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+}
+
/**
* dev_add_pack - add packet handler
* @pt: packet type declaration
@@ -388,16 +397,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
void dev_add_pack(struct packet_type *pt)
{
- int hash;
+ struct list_head *head = ptype_head(pt);
- spin_lock_bh(&ptype_lock);
- if (pt->type == htons(ETH_P_ALL))
- list_add_rcu(&pt->list, &ptype_all);
- else {
- hash = ntohs(pt->type) & PTYPE_HASH_MASK;
- list_add_rcu(&pt->list, &ptype_base[hash]);
- }
- spin_unlock_bh(&ptype_lock);
+ spin_lock(&ptype_lock);
+ list_add_rcu(&pt->list, head);
+ spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(dev_add_pack);
@@ -416,15 +420,10 @@ EXPORT_SYMBOL(dev_add_pack);
*/
void __dev_remove_pack(struct packet_type *pt)
{
- struct list_head *head;
+ struct list_head *head = ptype_head(pt);
struct packet_type *pt1;
- spin_lock_bh(&ptype_lock);
-
- if (pt->type == htons(ETH_P_ALL))
- head = &ptype_all;
- else
- head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+ spin_lock(&ptype_lock);
list_for_each_entry(pt1, head, list) {
if (pt == pt1) {
@@ -435,7 +434,7 @@ void __dev_remove_pack(struct packet_type *pt)
printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
out:
- spin_unlock_bh(&ptype_lock);
+ spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(__dev_remove_pack);
@@ -803,35 +802,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
EXPORT_SYMBOL(dev_getfirstbyhwtype);
/**
- * dev_get_by_flags - find any device with given flags
+ * dev_get_by_flags_rcu - find any device with given flags
* @net: the applicable net namespace
* @if_flags: IFF_* values
* @mask: bitmask of bits in if_flags to check
*
* Search for any interface with the given flags. Returns NULL if a device
- * is not found or a pointer to the device. The device returned has
- * had a reference added and the pointer is safe until the user calls
- * dev_put to indicate they have finished with it.
+ * is not found or a pointer to the device. Must be called inside
+ * rcu_read_lock(), and result refcount is unchanged.
*/
-struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
+struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
unsigned short mask)
{
struct net_device *dev, *ret;
ret = NULL;
- rcu_read_lock();
for_each_netdev_rcu(net, dev) {
if (((dev->flags ^ if_flags) & mask) == 0) {
- dev_hold(dev);
ret = dev;
break;
}
}
- rcu_read_unlock();
return ret;
}
-EXPORT_SYMBOL(dev_get_by_flags);
+EXPORT_SYMBOL(dev_get_by_flags_rcu);
/**
* dev_valid_name - check if name is okay for network device
@@ -1490,8 +1485,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
skb_orphan(skb);
nf_reset(skb);
- if (!(dev->flags & IFF_UP) ||
- (skb->len > (dev->mtu + dev->hard_header_len))) {
+ if (unlikely(!(dev->flags & IFF_UP) ||
+ (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
+ atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -1542,7 +1538,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
if (net_ratelimit())
printk(KERN_CRIT "protocol %04x is "
"buggy, dev %s\n",
- skb2->protocol, dev->name);
+ ntohs(skb2->protocol),
+ dev->name);
skb_reset_network_header(skb2);
}
@@ -1558,21 +1555,56 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
* greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
*/
-void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
- unsigned int real_num = dev->real_num_tx_queues;
+ if (txq < 1 || txq > dev->num_tx_queues)
+ return -EINVAL;
+
+ if (dev->reg_state == NETREG_REGISTERED) {
+ ASSERT_RTNL();
- if (unlikely(txq > dev->num_tx_queues))
- ;
- else if (txq > real_num)
- dev->real_num_tx_queues = txq;
- else if (txq < real_num) {
- dev->real_num_tx_queues = txq;
- qdisc_reset_all_tx_gt(dev, txq);
+ if (txq < dev->real_num_tx_queues)
+ qdisc_reset_all_tx_gt(dev, txq);
}
+
+ dev->real_num_tx_queues = txq;
+ return 0;
}
EXPORT_SYMBOL(netif_set_real_num_tx_queues);
+#ifdef CONFIG_RPS
+/**
+ * netif_set_real_num_rx_queues - set actual number of RX queues used
+ * @dev: Network device
+ * @rxq: Actual number of RX queues
+ *
+ * This must be called either with the rtnl_lock held or before
+ * registration of the net device. Returns 0 on success, or a
+ * negative error code. If called before registration, it always
+ * succeeds.
+ */
+int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
+{
+ int rc;
+
+ if (rxq < 1 || rxq > dev->num_rx_queues)
+ return -EINVAL;
+
+ if (dev->reg_state == NETREG_REGISTERED) {
+ ASSERT_RTNL();
+
+ rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
+ rxq);
+ if (rc)
+ return rc;
+ }
+
+ dev->real_num_rx_queues = rxq;
+ return 0;
+}
+EXPORT_SYMBOL(netif_set_real_num_rx_queues);
+#endif
+
static inline void __netif_reschedule(struct Qdisc *q)
{
struct softnet_data *sd;
@@ -1653,10 +1685,10 @@ EXPORT_SYMBOL(netif_device_attach);
static bool can_checksum_protocol(unsigned long features, __be16 protocol)
{
- return ((features & NETIF_F_GEN_CSUM) ||
- ((features & NETIF_F_IP_CSUM) &&
+ return ((features & NETIF_F_NO_CSUM) ||
+ ((features & NETIF_F_V4_CSUM) &&
protocol == htons(ETH_P_IP)) ||
- ((features & NETIF_F_IPV6_CSUM) &&
+ ((features & NETIF_F_V6_CSUM) &&
protocol == htons(ETH_P_IPV6)) ||
((features & NETIF_F_FCOE_CRC) &&
protocol == htons(ETH_P_FCOE)));
@@ -1664,17 +1696,18 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
{
- if (can_checksum_protocol(dev->features, skb->protocol))
- return true;
+ __be16 protocol = skb->protocol;
+ int features = dev->features;
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ if (vlan_tx_tag_present(skb)) {
+ features &= dev->vlan_features;
+ } else if (protocol == htons(ETH_P_8021Q)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- if (can_checksum_protocol(dev->features & dev->vlan_features,
- veh->h_vlan_encapsulated_proto))
- return true;
+ protocol = veh->h_vlan_encapsulated_proto;
+ features &= dev->vlan_features;
}
- return false;
+ return can_checksum_protocol(features, protocol);
}
/**
@@ -1763,6 +1796,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
__be16 type = skb->protocol;
int err;
+ if (type == htons(ETH_P_8021Q)) {
+ struct vlan_ethhdr *veh;
+
+ if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+ return ERR_PTR(-EINVAL);
+
+ veh = (struct vlan_ethhdr *)skb->data;
+ type = veh->h_vlan_encapsulated_proto;
+ }
+
skb_reset_mac_header(skb);
skb->mac_len = skb->network_header - skb->mac_header;
__skb_pull(skb, skb->mac_len);
@@ -1907,14 +1950,14 @@ static int dev_gso_segment(struct sk_buff *skb)
/*
* Try to orphan skb early, right before transmission by the device.
- * We cannot orphan skb if tx timestamp is requested, since
- * drivers need to call skb_tstamp_tx() to send the timestamp.
+ * We cannot orphan skb if tx timestamp is requested or the sk-reference
+ * is needed on driver level for other reasons, e.g. see net/can/raw.c
*/
static inline void skb_orphan_try(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
- if (sk && !skb_tx(skb)->flags) {
+ if (sk && !skb_shinfo(skb)->tx_flags) {
/* skb_tx_hash() wont be able to get sk.
* We copy sk_hash into skb->rxhash
*/
@@ -1924,6 +1967,27 @@ static inline void skb_orphan_try(struct sk_buff *skb)
}
}
+/*
+ * Returns true if either:
+ * 1. skb has frag_list and the device doesn't support FRAGLIST, or
+ * 2. skb is fragmented and the device does not support SG, or if
+ * at least one of fragments is in highmem and device does not
+ * support DMA from it.
+ */
+static inline int skb_needs_linearize(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ int features = dev->features;
+
+ if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
+ features &= dev->vlan_features;
+
+ return skb_is_nonlinear(skb) &&
+ ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
+ (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
+ illegal_highdma(dev, skb))));
+}
+
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
@@ -1943,14 +2007,40 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb_orphan_try(skb);
+ if (vlan_tx_tag_present(skb) &&
+ !(dev->features & NETIF_F_HW_VLAN_TX)) {
+ skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+ if (unlikely(!skb))
+ goto out;
+
+ skb->vlan_tci = 0;
+ }
+
if (netif_needs_gso(dev, skb)) {
if (unlikely(dev_gso_segment(skb)))
goto out_kfree_skb;
if (skb->next)
goto gso;
+ } else {
+ if (skb_needs_linearize(skb, dev) &&
+ __skb_linearize(skb))
+ goto out_kfree_skb;
+
+ /* If packet is not checksummed and device does not
+ * support checksumming for this protocol, complete
+ * checksumming here.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ skb_set_transport_header(skb, skb->csum_start -
+ skb_headroom(skb));
+ if (!dev_can_checksum(dev, skb) &&
+ skb_checksum_help(skb))
+ goto out_kfree_skb;
+ }
}
rc = ops->ndo_start_xmit(skb, dev);
+ trace_net_dev_xmit(skb, rc);
if (rc == NETDEV_TX_OK)
txq_trans_update(txq);
return rc;
@@ -1971,6 +2061,7 @@ gso:
skb_dst_drop(nskb);
rc = ops->ndo_start_xmit(nskb, dev);
+ trace_net_dev_xmit(nskb, rc);
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
goto out_kfree_gso_skb;
@@ -1988,6 +2079,7 @@ out_kfree_gso_skb:
skb->destructor = DEV_GSO_CB(skb)->destructor;
out_kfree_skb:
kfree_skb(skb);
+out:
return rc;
}
@@ -2031,16 +2123,16 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
int queue_index;
- struct sock *sk = skb->sk;
+ const struct net_device_ops *ops = dev->netdev_ops;
- queue_index = sk_tx_queue_get(sk);
- if (queue_index < 0) {
- const struct net_device_ops *ops = dev->netdev_ops;
+ if (ops->ndo_select_queue) {
+ queue_index = ops->ndo_select_queue(dev, skb);
+ queue_index = dev_cap_txqueue(dev, queue_index);
+ } else {
+ struct sock *sk = skb->sk;
+ queue_index = sk_tx_queue_get(sk);
+ if (queue_index < 0) {
- if (ops->ndo_select_queue) {
- queue_index = ops->ndo_select_queue(dev, skb);
- queue_index = dev_cap_txqueue(dev, queue_index);
- } else {
queue_index = 0;
if (dev->real_num_tx_queues > 1)
queue_index = skb_tx_hash(dev, skb);
@@ -2063,14 +2155,24 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
+ bool contended = qdisc_is_running(q);
int rc;
+ /*
+ * Heuristic to force contended enqueues to serialize on a
+ * separate lock before trying to get qdisc main lock.
+ * This permits __QDISC_STATE_RUNNING owner to get the lock more often
+ * and dequeue packets faster.
+ */
+ if (unlikely(contended))
+ spin_lock(&q->busylock);
+
spin_lock(root_lock);
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
kfree_skb(skb);
rc = NET_XMIT_DROP;
} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
- !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
+ qdisc_run_begin(q)) {
/*
* This is a work-conserving queue; there are no old skbs
* waiting to be sent out; and the qdisc is not running -
@@ -2079,36 +2181,35 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
skb_dst_force(skb);
__qdisc_update_bstats(q, skb->len);
- if (sch_direct_xmit(skb, q, dev, txq, root_lock))
+ if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+ if (unlikely(contended)) {
+ spin_unlock(&q->busylock);
+ contended = false;
+ }
__qdisc_run(q);
- else
- clear_bit(__QDISC_STATE_RUNNING, &q->state);
+ } else
+ qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
skb_dst_force(skb);
rc = qdisc_enqueue_root(skb, q);
- qdisc_run(q);
+ if (qdisc_run_begin(q)) {
+ if (unlikely(contended)) {
+ spin_unlock(&q->busylock);
+ contended = false;
+ }
+ __qdisc_run(q);
+ }
}
spin_unlock(root_lock);
-
+ if (unlikely(contended))
+ spin_unlock(&q->busylock);
return rc;
}
-/*
- * Returns true if either:
- * 1. skb has frag_list and the device doesn't support FRAGLIST, or
- * 2. skb is fragmented and the device does not support SG, or if
- * at least one of fragments is in highmem and device does not
- * support DMA from it.
- */
-static inline int skb_needs_linearize(struct sk_buff *skb,
- struct net_device *dev)
-{
- return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
- (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
- illegal_highdma(dev, skb)));
-}
+static DEFINE_PER_CPU(int, xmit_recursion);
+#define RECURSION_LIMIT 10
/**
* dev_queue_xmit - transmit a buffer
@@ -2142,25 +2243,6 @@ int dev_queue_xmit(struct sk_buff *skb)
struct Qdisc *q;
int rc = -ENOMEM;
- /* GSO will handle the following emulations directly. */
- if (netif_needs_gso(dev, skb))
- goto gso;
-
- /* Convert a paged skb to linear, if required */
- if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
- goto out_kfree_skb;
-
- /* If packet is not checksummed and device does not support
- * checksumming for this protocol, complete checksumming here.
- */
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- skb_set_transport_header(skb, skb->csum_start -
- skb_headroom(skb));
- if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
- goto out_kfree_skb;
- }
-
-gso:
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
@@ -2172,6 +2254,7 @@ gso:
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
#endif
+ trace_net_dev_queue(skb);
if (q->enqueue) {
rc = __dev_xmit_skb(skb, q, dev, txq);
goto out;
@@ -2194,10 +2277,15 @@ gso:
if (txq->xmit_lock_owner != cpu) {
+ if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+ goto recursion_alert;
+
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_tx_queue_stopped(txq)) {
+ __this_cpu_inc(xmit_recursion);
rc = dev_hard_start_xmit(skb, dev, txq);
+ __this_cpu_dec(xmit_recursion);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
goto out;
@@ -2209,7 +2297,9 @@ gso:
"queue packet!\n", dev->name);
} else {
/* Recursion is detected! It is possible,
- * unfortunately */
+ * unfortunately
+ */
+recursion_alert:
if (net_ratelimit())
printk(KERN_CRIT "Dead loop on virtual device "
"%s, fix it urgently!\n", dev->name);
@@ -2219,7 +2309,6 @@ gso:
rc = -ENETDOWN;
rcu_read_unlock_bh();
-out_kfree_skb:
kfree_skb(skb);
return rc;
out:
@@ -2246,69 +2335,44 @@ static inline void ____napi_schedule(struct softnet_data *sd,
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
-#ifdef CONFIG_RPS
-
-/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
-EXPORT_SYMBOL(rps_sock_flow_table);
-
/*
- * get_rps_cpu is called from netif_receive_skb and returns the target
- * CPU from the RPS map of the receiving queue for a given skb.
- * rcu_read_lock must be held on entry.
+ * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
+ * and src/dst port numbers. Returns a non-zero hash number on success
+ * and 0 on failure.
*/
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
- struct rps_dev_flow **rflowp)
+__u32 __skb_get_rxhash(struct sk_buff *skb)
{
+ int nhoff, hash = 0, poff;
struct ipv6hdr *ip6;
struct iphdr *ip;
- struct netdev_rx_queue *rxqueue;
- struct rps_map *map;
- struct rps_dev_flow_table *flow_table;
- struct rps_sock_flow_table *sock_flow_table;
- int cpu = -1;
u8 ip_proto;
- u16 tcpu;
u32 addr1, addr2, ihl;
union {
u32 v32;
u16 v16[2];
} ports;
- if (skb_rx_queue_recorded(skb)) {
- u16 index = skb_get_rx_queue(skb);
- if (unlikely(index >= dev->num_rx_queues)) {
- WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
- "on queue %u, but number of RX queues is %u\n",
- dev->name, index, dev->num_rx_queues);
- goto done;
- }
- rxqueue = dev->_rx + index;
- } else
- rxqueue = dev->_rx;
-
- if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
- goto done;
-
- if (skb->rxhash)
- goto got_hash; /* Skip hash computation on packet header */
+ nhoff = skb_network_offset(skb);
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
- if (!pskb_may_pull(skb, sizeof(*ip)))
+ if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
goto done;
- ip = (struct iphdr *) skb->data;
- ip_proto = ip->protocol;
+ ip = (struct iphdr *) (skb->data + nhoff);
+ if (ip->frag_off & htons(IP_MF | IP_OFFSET))
+ ip_proto = 0;
+ else
+ ip_proto = ip->protocol;
addr1 = (__force u32) ip->saddr;
addr2 = (__force u32) ip->daddr;
ihl = ip->ihl;
break;
case __constant_htons(ETH_P_IPV6):
- if (!pskb_may_pull(skb, sizeof(*ip6)))
+ if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
goto done;
- ip6 = (struct ipv6hdr *) skb->data;
+ ip6 = (struct ipv6hdr *) (skb->data + nhoff);
ip_proto = ip6->nexthdr;
addr1 = (__force u32) ip6->saddr.s6_addr32[3];
addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2317,33 +2381,81 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
default:
goto done;
}
- switch (ip_proto) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_DCCP:
- case IPPROTO_ESP:
- case IPPROTO_AH:
- case IPPROTO_SCTP:
- case IPPROTO_UDPLITE:
- if (pskb_may_pull(skb, (ihl * 4) + 4)) {
- ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
+
+ ports.v32 = 0;
+ poff = proto_ports_offset(ip_proto);
+ if (poff >= 0) {
+ nhoff += ihl * 4 + poff;
+ if (pskb_may_pull(skb, nhoff + 4)) {
+ ports.v32 = * (__force u32 *) (skb->data + nhoff);
if (ports.v16[1] < ports.v16[0])
swap(ports.v16[0], ports.v16[1]);
- break;
}
- default:
- ports.v32 = 0;
- break;
}
/* get a consistent hash (same value on both flow directions) */
if (addr2 < addr1)
swap(addr1, addr2);
- skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
- if (!skb->rxhash)
- skb->rxhash = 1;
-got_hash:
+ hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
+ if (!hash)
+ hash = 1;
+
+done:
+ return hash;
+}
+EXPORT_SYMBOL(__skb_get_rxhash);
+
+#ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+ struct rps_dev_flow **rflowp)
+{
+ struct netdev_rx_queue *rxqueue;
+ struct rps_map *map;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_sock_flow_table *sock_flow_table;
+ int cpu = -1;
+ u16 tcpu;
+
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+ if (unlikely(index >= dev->real_num_rx_queues)) {
+ WARN_ONCE(dev->real_num_rx_queues > 1,
+ "%s received packet on queue %u, but number "
+ "of RX queues is %u\n",
+ dev->name, index, dev->real_num_rx_queues);
+ goto done;
+ }
+ rxqueue = dev->_rx + index;
+ } else
+ rxqueue = dev->_rx;
+
+ map = rcu_dereference(rxqueue->rps_map);
+ if (map) {
+ if (map->len == 1) {
+ tcpu = map->cpus[0];
+ if (cpu_online(tcpu))
+ cpu = tcpu;
+ goto done;
+ }
+ } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
+ goto done;
+ }
+
+ skb_reset_network_header(skb);
+ if (!skb_get_rxhash(skb))
+ goto done;
+
flow_table = rcu_dereference(rxqueue->rps_flow_table);
sock_flow_table = rcu_dereference(rps_sock_flow_table);
if (flow_table && sock_flow_table) {
@@ -2383,7 +2495,6 @@ got_hash:
}
}
- map = rcu_dereference(rxqueue->rps_map);
if (map) {
tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
@@ -2469,6 +2580,7 @@ enqueue:
local_irq_restore(flags);
+ atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -2499,11 +2611,13 @@ int netif_rx(struct sk_buff *skb)
if (netdev_tstamp_prequeue)
net_timestamp_check(skb);
+ trace_netif_rx(skb);
#ifdef CONFIG_RPS
{
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
+ preempt_disable();
rcu_read_lock();
cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -2513,6 +2627,7 @@ int netif_rx(struct sk_buff *skb)
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
+ preempt_enable();
}
#else
{
@@ -2556,6 +2671,7 @@ static void net_tx_action(struct softirq_action *h)
clist = clist->next;
WARN_ON(atomic_read(&skb->users));
+ trace_kfree_skb(skb, net_tx_action);
__kfree_skb(skb);
}
}
@@ -2604,70 +2720,14 @@ static inline int deliver_skb(struct sk_buff *skb,
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+ (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
/* This hook is defined here for ATM LANE */
int (*br_fdb_test_addr_hook)(struct net_device *dev,
unsigned char *addr) __read_mostly;
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#endif
-/*
- * If bridge module is loaded call bridging hook.
- * returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
- struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
- struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev)
-{
- struct net_bridge_port *port;
-
- if (skb->pkt_type == PACKET_LOOPBACK ||
- (port = rcu_dereference(skb->dev->br_port)) == NULL)
- return skb;
-
- if (*pt_prev) {
- *ret = deliver_skb(skb, *pt_prev, orig_dev);
- *pt_prev = NULL;
- }
-
- return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
- struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
- struct packet_type **pt_prev,
- int *ret,
- struct net_device *orig_dev)
-{
- struct macvlan_port *port;
-
- port = rcu_dereference(skb->dev->macvlan_port);
- if (!port)
- return skb;
-
- if (*pt_prev) {
- *ret = deliver_skb(skb, *pt_prev, orig_dev);
- *pt_prev = NULL;
- }
- return macvlan_handle_frame_hook(port, skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
-#endif
-
#ifdef CONFIG_NET_CLS_ACT
/* TODO: Maybe we should just force sch_ingress to be compiled in
* when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2677,26 +2737,23 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
* the ingress scheduler, you just cant add policies on ingress.
*
*/
-static int ing_filter(struct sk_buff *skb)
+static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
{
struct net_device *dev = skb->dev;
u32 ttl = G_TC_RTTL(skb->tc_verd);
- struct netdev_queue *rxq;
int result = TC_ACT_OK;
struct Qdisc *q;
- if (MAX_RED_LOOP < ttl++) {
- printk(KERN_WARNING
- "Redir loop detected Dropping packet (%d->%d)\n",
- skb->skb_iif, dev->ifindex);
+ if (unlikely(MAX_RED_LOOP < ttl++)) {
+ if (net_ratelimit())
+ pr_warning( "Redir loop detected Dropping packet (%d->%d)\n",
+ skb->skb_iif, dev->ifindex);
return TC_ACT_SHOT;
}
skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- rxq = &dev->rx_queue;
-
q = rxq->qdisc;
if (q != &noop_qdisc) {
spin_lock(qdisc_lock(q));
@@ -2712,18 +2769,17 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
- if (skb->dev->rx_queue.qdisc == &noop_qdisc)
+ struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+
+ if (!rxq || rxq->qdisc == &noop_qdisc)
goto out;
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
- } else {
- /* Huh? Why does turning on AF_PACKET affect this? */
- skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}
- switch (ing_filter(skb)) {
+ switch (ing_filter(skb, rxq)) {
case TC_ACT_SHOT:
case TC_ACT_STOLEN:
kfree_skb(skb);
@@ -2736,32 +2792,50 @@ out:
}
#endif
-/*
- * netif_nit_deliver - deliver received packets to network taps
- * @skb: buffer
+/**
+ * netdev_rx_handler_register - register receive handler
+ * @dev: device to register a handler for
+ * @rx_handler: receive handler to register
+ * @rx_handler_data: data pointer that is used by rx handler
+ *
+ * Register a receive hander for a device. This handler will then be
+ * called from __netif_receive_skb. A negative errno code is returned
+ * on a failure.
*
- * This function is used to deliver incoming packets to network
- * taps. It should be used when the normal netif_receive_skb path
- * is bypassed, for example because of VLAN acceleration.
+ * The caller must hold the rtnl_mutex.
*/
-void netif_nit_deliver(struct sk_buff *skb)
+int netdev_rx_handler_register(struct net_device *dev,
+ rx_handler_func_t *rx_handler,
+ void *rx_handler_data)
{
- struct packet_type *ptype;
+ ASSERT_RTNL();
- if (list_empty(&ptype_all))
- return;
+ if (dev->rx_handler)
+ return -EBUSY;
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
- skb->mac_len = skb->network_header - skb->mac_header;
+ rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
+ rcu_assign_pointer(dev->rx_handler, rx_handler);
- rcu_read_lock();
- list_for_each_entry_rcu(ptype, &ptype_all, list) {
- if (!ptype->dev || ptype->dev == skb->dev)
- deliver_skb(skb, ptype, skb->dev);
- }
- rcu_read_unlock();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
+
+/**
+ * netdev_rx_handler_unregister - unregister receive handler
+ * @dev: device to unregister a handler from
+ *
+ * Unregister a receive hander from a device.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev)
+{
+
+ ASSERT_RTNL();
+ rcu_assign_pointer(dev->rx_handler, NULL);
+ rcu_assign_pointer(dev->rx_handler_data, NULL);
}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
struct net_device *master)
@@ -2784,7 +2858,8 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
if (master->priv_flags & IFF_MASTER_ARPMON)
dev->last_rx = jiffies;
- if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
+ if ((master->priv_flags & IFF_MASTER_ALB) &&
+ (master->priv_flags & IFF_BRIDGE_PORT)) {
/* Do address unmangle. The local destination address
* will be always the one master has. Provides the right
* functionality in a bridge.
@@ -2815,6 +2890,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
static int __netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
+ rx_handler_func_t *rx_handler;
struct net_device *orig_dev;
struct net_device *master;
struct net_device *null_or_orig;
@@ -2825,8 +2901,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
if (!netdev_tstamp_prequeue)
net_timestamp_check(skb);
- if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
- return NET_RX_SUCCESS;
+ trace_netif_receive_skb(skb);
/* if we've gotten here through NAPI, check netpoll */
if (netpoll_receive_skb(skb))
@@ -2840,8 +2915,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
* be delivered to pkt handlers that are exact matches. Also
* the deliver_no_wcard flag will be set. If packet handlers
* are sensitive to duplicate packets these skbs will need to
- * be dropped at the handler. The vlan accel path may have
- * already set the deliver_no_wcard flag.
+ * be dropped at the handler.
*/
null_or_orig = NULL;
orig_dev = skb->dev;
@@ -2856,8 +2930,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
skb->dev = master;
}
- __get_cpu_var(softnet_data).processed++;
-
+ __this_cpu_inc(softnet_data.processed);
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb->mac_len = skb->network_header - skb->mac_header;
@@ -2889,12 +2962,29 @@ static int __netif_receive_skb(struct sk_buff *skb)
ncls:
#endif
- skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
- if (!skb)
- goto out;
- skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
- if (!skb)
- goto out;
+ /* Handle special case of bridge or macvlan */
+ rx_handler = rcu_dereference(skb->dev->rx_handler);
+ if (rx_handler) {
+ if (pt_prev) {
+ ret = deliver_skb(skb, pt_prev, orig_dev);
+ pt_prev = NULL;
+ }
+ skb = rx_handler(skb);
+ if (!skb)
+ goto out;
+ }
+
+ if (vlan_tx_tag_present(skb)) {
+ if (pt_prev) {
+ ret = deliver_skb(skb, pt_prev, orig_dev);
+ pt_prev = NULL;
+ }
+ if (vlan_hwaccel_do_receive(&skb)) {
+ ret = __netif_receive_skb(skb);
+ goto out;
+ } else if (unlikely(!skb))
+ goto out;
+ }
/*
* Make sure frames received on VLAN interfaces stacked on
@@ -2923,6 +3013,7 @@ ncls:
if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
+ atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
@@ -2955,6 +3046,9 @@ int netif_receive_skb(struct sk_buff *skb)
if (netdev_tstamp_prequeue)
net_timestamp_check(skb);
+ if (skb_defer_rx_timestamp(skb))
+ return NET_RX_SUCCESS;
+
#ifdef CONFIG_RPS
{
struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -3040,7 +3134,7 @@ out:
return netif_receive_skb(skb);
}
-static void napi_gro_flush(struct napi_struct *napi)
+inline void napi_gro_flush(struct napi_struct *napi)
{
struct sk_buff *skb, *next;
@@ -3053,6 +3147,7 @@ static void napi_gro_flush(struct napi_struct *napi)
napi->gro_count = 0;
napi->gro_list = NULL;
}
+EXPORT_SYMBOL(napi_gro_flush);
enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
@@ -3064,10 +3159,10 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
int mac_len;
enum gro_result ret;
- if (!(skb->dev->features & NETIF_F_GRO))
+ if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
goto normal;
- if (skb_is_gso(skb) || skb_has_frags(skb))
+ if (skb_is_gso(skb) || skb_has_frag_list(skb))
goto normal;
rcu_read_lock();
@@ -3133,7 +3228,7 @@ pull:
put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
- --skb_shinfo(skb)->nr_frags);
+ --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
}
}
@@ -3146,19 +3241,19 @@ normal:
}
EXPORT_SYMBOL(dev_gro_receive);
-static gro_result_t
+static inline gro_result_t
__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff *p;
- if (netpoll_rx_on(skb))
- return GRO_NORMAL;
-
for (p = napi->gro_list; p; p = p->next) {
- NAPI_GRO_CB(p)->same_flow =
- (p->dev == skb->dev) &&
- !compare_ether_header(skb_mac_header(p),
+ unsigned long diffs;
+
+ diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+ diffs |= p->vlan_tci ^ skb->vlan_tci;
+ diffs |= compare_ether_header(skb_mac_header(p),
skb_gro_mac_header(skb));
+ NAPI_GRO_CB(p)->same_flow = !diffs;
NAPI_GRO_CB(p)->flush = 0;
}
@@ -3211,14 +3306,14 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
}
EXPORT_SYMBOL(napi_gro_receive);
-void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
{
__skb_pull(skb, skb_headlen(skb));
skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+ skb->vlan_tci = 0;
napi->skb = skb;
}
-EXPORT_SYMBOL(napi_reuse_skb);
struct sk_buff *napi_get_frags(struct napi_struct *napi)
{
@@ -3719,10 +3814,11 @@ void dev_seq_stop(struct seq_file *seq, void *v)
static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
{
- const struct net_device_stats *stats = dev_get_stats(dev);
+ struct rtnl_link_stats64 temp;
+ const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
- seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
- "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+ seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+ "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
dev->name, stats->rx_bytes, stats->rx_packets,
stats->rx_errors,
stats->rx_dropped + stats->rx_missed_errors,
@@ -4837,7 +4933,7 @@ static void rollback_registered_many(struct list_head *head)
dev = list_first_entry(head, struct net_device, unreg_list);
call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
- synchronize_net();
+ rcu_barrier();
list_for_each_entry(dev, head, unreg_list)
dev_put(dev);
@@ -4851,21 +4947,6 @@ static void rollback_registered(struct net_device *dev)
rollback_registered_many(&single);
}
-static void __netdev_init_queue_locks_one(struct net_device *dev,
- struct netdev_queue *dev_queue,
- void *_unused)
-{
- spin_lock_init(&dev_queue->_xmit_lock);
- netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
- dev_queue->xmit_lock_owner = -1;
-}
-
-static void netdev_init_queue_locks(struct net_device *dev)
-{
- netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
- __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
-}
-
unsigned long netdev_fix_features(unsigned long features, const char *name)
{
/* Fix illegal SG+CSUM combinations. */
@@ -4933,6 +5014,66 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);
+static int netif_alloc_rx_queues(struct net_device *dev)
+{
+#ifdef CONFIG_RPS
+ unsigned int i, count = dev->num_rx_queues;
+ struct netdev_rx_queue *rx;
+
+ BUG_ON(count < 1);
+
+ rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+ if (!rx) {
+ pr_err("netdev: Unable to allocate %u rx queues.\n", count);
+ return -ENOMEM;
+ }
+ dev->_rx = rx;
+
+ /*
+ * Set a pointer to first element in the array which holds the
+ * reference count.
+ */
+ for (i = 0; i < count; i++)
+ rx[i].first = rx;
+#endif
+ return 0;
+}
+
+static int netif_alloc_netdev_queues(struct net_device *dev)
+{
+ unsigned int count = dev->num_tx_queues;
+ struct netdev_queue *tx;
+
+ BUG_ON(count < 1);
+
+ tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
+ if (!tx) {
+ pr_err("netdev: Unable to allocate %u tx queues.\n",
+ count);
+ return -ENOMEM;
+ }
+ dev->_tx = tx;
+ return 0;
+}
+
+static void netdev_init_one_queue(struct net_device *dev,
+ struct netdev_queue *queue,
+ void *_unused)
+{
+ queue->dev = dev;
+
+ /* Initialize queue lock */
+ spin_lock_init(&queue->_xmit_lock);
+ netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+ queue->xmit_lock_owner = -1;
+}
+
+static void netdev_init_queues(struct net_device *dev)
+{
+ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+ spin_lock_init(&dev->tx_global_lock);
+}
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -4966,28 +5107,19 @@ int register_netdevice(struct net_device *dev)
spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);
- netdev_init_queue_locks(dev);
dev->iflink = -1;
-#ifdef CONFIG_RPS
- if (!dev->num_rx_queues) {
- /*
- * Allocate a single RX queue if driver never called
- * alloc_netdev_mq
- */
+ ret = netif_alloc_rx_queues(dev);
+ if (ret)
+ goto out;
- dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
- if (!dev->_rx) {
- ret = -ENOMEM;
- goto out;
- }
+ ret = netif_alloc_netdev_queues(dev);
+ if (ret)
+ goto out;
+
+ netdev_init_queues(dev);
- dev->_rx->first = dev->_rx;
- atomic_set(&dev->_rx->count, 1);
- dev->num_rx_queues = 1;
- }
-#endif
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@ -5027,6 +5159,12 @@ int register_netdevice(struct net_device *dev)
if (dev->features & NETIF_F_SG)
dev->features |= NETIF_F_GSO;
+ /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
+ * vlan_dev_init() will do the dev->features check, so these features
+ * are enabled only if supported by underlying device.
+ */
+ dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
+
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
@@ -5097,9 +5235,6 @@ int init_dummy_netdev(struct net_device *dev)
*/
dev->reg_state = NETREG_DUMMY;
- /* initialize the ref count */
- atomic_set(&dev->refcnt, 1);
-
/* NAPI wants this */
INIT_LIST_HEAD(&dev->napi_list);
@@ -5107,6 +5242,11 @@ int init_dummy_netdev(struct net_device *dev)
set_bit(__LINK_STATE_PRESENT, &dev->state);
set_bit(__LINK_STATE_START, &dev->state);
+ /* Note : We dont allocate pcpu_refcnt for dummy devices,
+ * because users of this 'device' dont need to change
+ * its refcount.
+ */
+
return 0;
}
EXPORT_SYMBOL_GPL(init_dummy_netdev);
@@ -5148,6 +5288,16 @@ out:
}
EXPORT_SYMBOL(register_netdev);
+int netdev_refcnt_read(const struct net_device *dev)
+{
+ int i, refcnt = 0;
+
+ for_each_possible_cpu(i)
+ refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
+ return refcnt;
+}
+EXPORT_SYMBOL(netdev_refcnt_read);
+
/*
* netdev_wait_allrefs - wait until all references are gone.
*
@@ -5162,11 +5312,14 @@ EXPORT_SYMBOL(register_netdev);
static void netdev_wait_allrefs(struct net_device *dev)
{
unsigned long rebroadcast_time, warning_time;
+ int refcnt;
linkwatch_forget_dev(dev);
rebroadcast_time = warning_time = jiffies;
- while (atomic_read(&dev->refcnt) != 0) {
+ refcnt = netdev_refcnt_read(dev);
+
+ while (refcnt != 0) {
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
rtnl_lock();
@@ -5193,11 +5346,13 @@ static void netdev_wait_allrefs(struct net_device *dev)
msleep(250);
+ refcnt = netdev_refcnt_read(dev);
+
if (time_after(jiffies, warning_time + 10 * HZ)) {
printk(KERN_EMERG "unregister_netdevice: "
"waiting for %s to become free. Usage "
"count = %d\n",
- dev->name, atomic_read(&dev->refcnt));
+ dev->name, refcnt);
warning_time = jiffies;
}
}
@@ -5255,9 +5410,9 @@ void netdev_run_todo(void)
netdev_wait_allrefs(dev);
/* paranoia */
- BUG_ON(atomic_read(&dev->refcnt));
- WARN_ON(dev->ip_ptr);
- WARN_ON(dev->ip6_ptr);
+ BUG_ON(netdev_refcnt_read(dev));
+ WARN_ON(rcu_dereference_raw(dev->ip_ptr));
+ WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
if (dev->destructor)
@@ -5271,20 +5426,22 @@ void netdev_run_todo(void)
/**
* dev_txq_stats_fold - fold tx_queues stats
* @dev: device to get statistics from
- * @stats: struct net_device_stats to hold results
+ * @stats: struct rtnl_link_stats64 to hold results
*/
void dev_txq_stats_fold(const struct net_device *dev,
- struct net_device_stats *stats)
+ struct rtnl_link_stats64 *stats)
{
- unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+ u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
unsigned int i;
struct netdev_queue *txq;
for (i = 0; i < dev->num_tx_queues; i++) {
txq = netdev_get_tx_queue(dev, i);
+ spin_lock_bh(&txq->_xmit_lock);
tx_bytes += txq->tx_bytes;
tx_packets += txq->tx_packets;
tx_dropped += txq->tx_dropped;
+ spin_unlock_bh(&txq->_xmit_lock);
}
if (tx_bytes || tx_packets || tx_dropped) {
stats->tx_bytes = tx_bytes;
@@ -5294,38 +5451,72 @@ void dev_txq_stats_fold(const struct net_device *dev,
}
EXPORT_SYMBOL(dev_txq_stats_fold);
+/* Convert net_device_stats to rtnl_link_stats64. They have the same
+ * fields in the same order, with only the type differing.
+ */
+static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
+ const struct net_device_stats *netdev_stats)
+{
+#if BITS_PER_LONG == 64
+ BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
+ memcpy(stats64, netdev_stats, sizeof(*stats64));
+#else
+ size_t i, n = sizeof(*stats64) / sizeof(u64);
+ const unsigned long *src = (const unsigned long *)netdev_stats;
+ u64 *dst = (u64 *)stats64;
+
+ BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
+ sizeof(*stats64) / sizeof(u64));
+ for (i = 0; i < n; i++)
+ dst[i] = src[i];
+#endif
+}
+
/**
* dev_get_stats - get network device statistics
* @dev: device to get statistics from
+ * @storage: place to store stats
*
- * Get network statistics from device. The device driver may provide
- * its own method by setting dev->netdev_ops->get_stats; otherwise
- * the internal statistics structure is used.
+ * Get network statistics from device. Return @storage.
+ * The device driver may provide its own method by setting
+ * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
+ * otherwise the internal statistics structure is used.
*/
-const struct net_device_stats *dev_get_stats(struct net_device *dev)
+struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *storage)
{
const struct net_device_ops *ops = dev->netdev_ops;
- if (ops->ndo_get_stats)
- return ops->ndo_get_stats(dev);
-
- dev_txq_stats_fold(dev, &dev->stats);
- return &dev->stats;
+ if (ops->ndo_get_stats64) {
+ memset(storage, 0, sizeof(*storage));
+ ops->ndo_get_stats64(dev, storage);
+ } else if (ops->ndo_get_stats) {
+ netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
+ } else {
+ netdev_stats_to_stats64(storage, &dev->stats);
+ dev_txq_stats_fold(dev, storage);
+ }
+ storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
+ return storage;
}
EXPORT_SYMBOL(dev_get_stats);
-static void netdev_init_one_queue(struct net_device *dev,
- struct netdev_queue *queue,
- void *_unused)
+struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
{
- queue->dev = dev;
-}
+ struct netdev_queue *queue = dev_ingress_queue(dev);
-static void netdev_init_queues(struct net_device *dev)
-{
- netdev_init_one_queue(dev, &dev->rx_queue, NULL);
- netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
- spin_lock_init(&dev->tx_global_lock);
+#ifdef CONFIG_NET_CLS_ACT
+ if (queue)
+ return queue;
+ queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+ if (!queue)
+ return NULL;
+ netdev_init_one_queue(dev, queue, NULL);
+ queue->qdisc = &noop_qdisc;
+ queue->qdisc_sleeping = &noop_qdisc;
+ rcu_assign_pointer(dev->ingress_queue, queue);
+#endif
+ return queue;
}
/**
@@ -5342,17 +5533,18 @@ static void netdev_init_queues(struct net_device *dev)
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
{
- struct netdev_queue *tx;
struct net_device *dev;
size_t alloc_size;
struct net_device *p;
-#ifdef CONFIG_RPS
- struct netdev_rx_queue *rx;
- int i;
-#endif
BUG_ON(strlen(name) >= sizeof(dev->name));
+ if (queue_count < 1) {
+ pr_err("alloc_netdev: Unable to allocate device "
+ "with zero queues.\n");
+ return NULL;
+ }
+
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
@@ -5368,55 +5560,31 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
return NULL;
}
- tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
- if (!tx) {
- printk(KERN_ERR "alloc_netdev: Unable to allocate "
- "tx qdiscs.\n");
- goto free_p;
- }
-
-#ifdef CONFIG_RPS
- rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
- if (!rx) {
- printk(KERN_ERR "alloc_netdev: Unable to allocate "
- "rx queues.\n");
- goto free_tx;
- }
-
- atomic_set(&rx->count, queue_count);
-
- /*
- * Set a pointer to first element in the array which holds the
- * reference count.
- */
- for (i = 0; i < queue_count; i++)
- rx[i].first = rx;
-#endif
-
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
+ dev->pcpu_refcnt = alloc_percpu(int);
+ if (!dev->pcpu_refcnt)
+ goto free_p;
+
if (dev_addr_init(dev))
- goto free_rx;
+ goto free_pcpu;
dev_mc_init(dev);
dev_uc_init(dev);
dev_net_set(dev, &init_net);
- dev->_tx = tx;
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
#ifdef CONFIG_RPS
- dev->_rx = rx;
dev->num_rx_queues = queue_count;
+ dev->real_num_rx_queues = queue_count;
#endif
dev->gso_max_size = GSO_MAX_SIZE;
- netdev_init_queues(dev);
-
INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
dev->ethtool_ntuple_list.count = 0;
INIT_LIST_HEAD(&dev->napi_list);
@@ -5427,12 +5595,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
strcpy(dev->name, name);
return dev;
-free_rx:
-#ifdef CONFIG_RPS
- kfree(rx);
-free_tx:
-#endif
- kfree(tx);
+free_pcpu:
+ free_percpu(dev->pcpu_refcnt);
free_p:
kfree(p);
return NULL;
@@ -5455,6 +5619,8 @@ void free_netdev(struct net_device *dev)
kfree(dev->_tx);
+ kfree(rcu_dereference_raw(dev->ingress_queue));
+
/* Flush device addresses */
dev_addr_flush(dev);
@@ -5464,6 +5630,9 @@ void free_netdev(struct net_device *dev)
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
+ free_percpu(dev->pcpu_refcnt);
+ dev->pcpu_refcnt = NULL;
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
@@ -5618,6 +5787,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* Notify protocols, that we are about to destroy
this device. They should clean all the things.
+
+ Note that dev->reg_state stays at NETREG_REGISTERED.
+ This is wanted because this way 8021q and macvlan know
+ the device is just moving and can keep their slaves up.
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
@@ -5815,6 +5988,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
return buffer;
}
+static int __netdev_printk(const char *level, const struct net_device *dev,
+ struct va_format *vaf)
+{
+ int r;
+
+ if (dev && dev->dev.parent)
+ r = dev_printk(level, dev->dev.parent, "%s: %pV",
+ netdev_name(dev), vaf);
+ else if (dev)
+ r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
+ else
+ r = printk("%s(NULL net_device): %pV", level, vaf);
+
+ return r;
+}
+
+int netdev_printk(const char *level, const struct net_device *dev,
+ const char *format, ...)
+{
+ struct va_format vaf;
+ va_list args;
+ int r;
+
+ va_start(args, format);
+
+ vaf.fmt = format;
+ vaf.va = &args;
+
+ r = __netdev_printk(level, dev, &vaf);
+ va_end(args);
+
+ return r;
+}
+EXPORT_SYMBOL(netdev_printk);
+
+#define define_netdev_printk_level(func, level) \
+int func(const struct net_device *dev, const char *fmt, ...) \
+{ \
+ int r; \
+ struct va_format vaf; \
+ va_list args; \
+ \
+ va_start(args, fmt); \
+ \
+ vaf.fmt = fmt; \
+ vaf.va = &args; \
+ \
+ r = __netdev_printk(level, dev, &vaf); \
+ va_end(args); \
+ \
+ return r; \
+} \
+EXPORT_SYMBOL(func);
+
+define_netdev_printk_level(netdev_emerg, KERN_EMERG);
+define_netdev_printk_level(netdev_alert, KERN_ALERT);
+define_netdev_printk_level(netdev_crit, KERN_CRIT);
+define_netdev_printk_level(netdev_err, KERN_ERR);
+define_netdev_printk_level(netdev_warn, KERN_WARNING);
+define_netdev_printk_level(netdev_notice, KERN_NOTICE);
+define_netdev_printk_level(netdev_info, KERN_INFO);
+
static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index ad41529fb60f..36e603c78ce9 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -223,6 +223,11 @@ static int set_all_monitor_traces(int state)
spin_lock(&trace_state_lock);
+ if (state == trace_state) {
+ rc = -EAGAIN;
+ goto out_unlock;
+ }
+
switch (state) {
case TRACE_ON:
rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
@@ -251,11 +256,12 @@ static int set_all_monitor_traces(int state)
if (!rc)
trace_state = state;
+ else
+ rc = -EINPROGRESS;
+out_unlock:
spin_unlock(&trace_state_lock);
- if (rc)
- return -EINPROGRESS;
return rc;
}
@@ -341,9 +347,9 @@ static struct notifier_block dropmon_net_notifier = {
static int __init init_net_drop_monitor(void)
{
- int cpu;
- int rc, i, ret;
struct per_cpu_dm_data *data;
+ int cpu, rc;
+
printk(KERN_INFO "Initalizing network drop monitor service\n");
if (sizeof(void *) > 8) {
@@ -351,21 +357,12 @@ static int __init init_net_drop_monitor(void)
return -ENOSPC;
}
- if (genl_register_family(&net_drop_monitor_family) < 0) {
+ rc = genl_register_family_with_ops(&net_drop_monitor_family,
+ dropmon_ops,
+ ARRAY_SIZE(dropmon_ops));
+ if (rc) {
printk(KERN_ERR "Could not create drop monitor netlink family\n");
- return -EFAULT;
- }
-
- rc = -EFAULT;
-
- for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) {
- ret = genl_register_ops(&net_drop_monitor_family,
- &dropmon_ops[i]);
- if (ret) {
- printk(KERN_CRIT "Failed to register operation %d\n",
- dropmon_ops[i].cmd);
- goto out_unreg;
- }
+ return rc;
}
rc = register_netdevice_notifier(&dropmon_net_notifier);
diff --git a/net/core/dst.c b/net/core/dst.c
index 9920722cc82b..8abe628b79f1 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -168,7 +168,7 @@ void *dst_alloc(struct dst_ops *ops)
{
struct dst_entry *dst;
- if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+ if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
if (ops->gc(ops))
return NULL;
}
@@ -183,7 +183,7 @@ void *dst_alloc(struct dst_ops *ops)
#if RT_CACHE_DEBUG >= 2
atomic_inc(&dst_total);
#endif
- atomic_inc(&ops->entries);
+ dst_entries_add(ops, 1);
return dst;
}
EXPORT_SYMBOL(dst_alloc);
@@ -197,7 +197,6 @@ static void ___dst_free(struct dst_entry *dst)
dst->input = dst->output = dst_discard;
dst->obsolete = 2;
}
-EXPORT_SYMBOL(__dst_free);
void __dst_free(struct dst_entry *dst)
{
@@ -213,6 +212,7 @@ void __dst_free(struct dst_entry *dst)
}
spin_unlock_bh(&dst_garbage.lock);
}
+EXPORT_SYMBOL(__dst_free);
struct dst_entry *dst_destroy(struct dst_entry * dst)
{
@@ -228,15 +228,15 @@ again:
child = dst->child;
dst->hh = NULL;
- if (hh && atomic_dec_and_test(&hh->hh_refcnt))
- kfree(hh);
+ if (hh)
+ hh_cache_put(hh);
if (neigh) {
dst->neighbour = NULL;
neigh_release(neigh);
}
- atomic_dec(&dst->ops->entries);
+ dst_entries_add(dst->ops, -1);
if (dst->ops->destroy)
dst->ops->destroy(dst);
@@ -271,13 +271,40 @@ void dst_release(struct dst_entry *dst)
if (dst) {
int newrefcnt;
- smp_mb__before_atomic_dec();
newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
+ if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
+ dst = dst_destroy(dst);
+ if (dst)
+ __dst_free(dst);
+ }
}
}
EXPORT_SYMBOL(dst_release);
+/**
+ * skb_dst_set_noref - sets skb dst, without a reference
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was not taken on dst
+ * skb_dst_drop() should not dst_release() this dst
+ */
+void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+{
+ WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ /* If dst not in cache, we must take a reference, because
+ * dst_release() will destroy dst as soon as its refcount becomes zero
+ */
+ if (unlikely(dst->flags & DST_NOCACHE)) {
+ dst_hold(dst);
+ skb_dst_set(skb, dst);
+ } else {
+ skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
+ }
+}
+EXPORT_SYMBOL(skb_dst_set_noref);
+
/* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat
* this mistake in 2.3, but we have no choice
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 75e4ffeb8cc9..956a9f4971cb 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -19,6 +19,7 @@
#include <linux/netdevice.h>
#include <linux/bitops.h>
#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
#include <linux/slab.h>
/*
@@ -131,7 +132,8 @@ EXPORT_SYMBOL(ethtool_op_set_ufo);
* NETIF_F_xxx values in include/linux/netdevice.h
*/
static const u32 flags_dup_features =
- (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
+ (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
+ ETH_FLAG_RXHASH);
u32 ethtool_op_get_flags(struct net_device *dev)
{
@@ -144,31 +146,13 @@ u32 ethtool_op_get_flags(struct net_device *dev)
}
EXPORT_SYMBOL(ethtool_op_get_flags);
-int ethtool_op_set_flags(struct net_device *dev, u32 data)
+int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
{
- const struct ethtool_ops *ops = dev->ethtool_ops;
- unsigned long features = dev->features;
-
- if (data & ETH_FLAG_LRO)
- features |= NETIF_F_LRO;
- else
- features &= ~NETIF_F_LRO;
-
- if (data & ETH_FLAG_NTUPLE) {
- if (!ops->set_rx_ntuple)
- return -EOPNOTSUPP;
- features |= NETIF_F_NTUPLE;
- } else {
- /* safe to clear regardless */
- features &= ~NETIF_F_NTUPLE;
- }
-
- if (data & ETH_FLAG_RXHASH)
- features |= NETIF_F_RXHASH;
- else
- features &= ~NETIF_F_RXHASH;
+ if (data & ~supported)
+ return -EINVAL;
- dev->features = features;
+ dev->features = ((dev->features & ~flags_dup_features) |
+ (data & flags_dup_features));
return 0;
}
EXPORT_SYMBOL(ethtool_op_set_flags);
@@ -223,18 +207,24 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo info;
const struct ethtool_ops *ops = dev->ethtool_ops;
- if (!ops->get_drvinfo)
- return -EOPNOTSUPP;
-
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GDRVINFO;
- ops->get_drvinfo(dev, &info);
+ if (ops && ops->get_drvinfo) {
+ ops->get_drvinfo(dev, &info);
+ } else if (dev->dev.parent && dev->dev.parent->driver) {
+ strlcpy(info.bus_info, dev_name(dev->dev.parent),
+ sizeof(info.bus_info));
+ strlcpy(info.driver, dev->dev.parent->driver->name,
+ sizeof(info.driver));
+ } else {
+ return -EOPNOTSUPP;
+ }
/*
* this method of obtaining string set info is deprecated;
* Use ETHTOOL_GSSET_INFO instead.
*/
- if (ops->get_sset_count) {
+ if (ops && ops->get_sset_count) {
int rc;
rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -247,9 +237,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
if (rc >= 0)
info.n_priv_flags = rc;
}
- if (ops->get_regs_len)
+ if (ops && ops->get_regs_len)
info.regdump_len = ops->get_regs_len(dev);
- if (ops->get_eeprom_len)
+ if (ops && ops->get_eeprom_len)
info.eedump_len = ops->get_eeprom_len(dev);
if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -366,7 +356,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
if (info.rule_cnt > 0) {
if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
- rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
+ rule_buf = kzalloc(info.rule_cnt * sizeof(u32),
GFP_USER);
if (!rule_buf)
return -ENOMEM;
@@ -395,6 +385,80 @@ err_out:
return ret;
}
+static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
+ void __user *useraddr)
+{
+ struct ethtool_rxfh_indir *indir;
+ u32 table_size;
+ size_t full_size;
+ int ret;
+
+ if (!dev->ethtool_ops->get_rxfh_indir)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&table_size,
+ useraddr + offsetof(struct ethtool_rxfh_indir, size),
+ sizeof(table_size)))
+ return -EFAULT;
+
+ if (table_size >
+ (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
+ return -ENOMEM;
+ full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
+ indir = kzalloc(full_size, GFP_USER);
+ if (!indir)
+ return -ENOMEM;
+
+ indir->cmd = ETHTOOL_GRXFHINDIR;
+ indir->size = table_size;
+ ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+ if (ret)
+ goto out;
+
+ if (copy_to_user(useraddr, indir, full_size))
+ ret = -EFAULT;
+
+out:
+ kfree(indir);
+ return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
+ void __user *useraddr)
+{
+ struct ethtool_rxfh_indir *indir;
+ u32 table_size;
+ size_t full_size;
+ int ret;
+
+ if (!dev->ethtool_ops->set_rxfh_indir)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&table_size,
+ useraddr + offsetof(struct ethtool_rxfh_indir, size),
+ sizeof(table_size)))
+ return -EFAULT;
+
+ if (table_size >
+ (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
+ return -ENOMEM;
+ full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
+ indir = kmalloc(full_size, GFP_USER);
+ if (!indir)
+ return -ENOMEM;
+
+ if (copy_from_user(indir, useraddr, full_size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
+
+out:
+ kfree(indir);
+ return ret;
+}
+
static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
struct ethtool_rx_ntuple_flow_spec *spec,
struct ethtool_rx_ntuple_flow_spec_container *fsc)
@@ -423,6 +487,38 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
list->count++;
}
+/*
+ * ethtool does not (or did not) set masks for flow parameters that are
+ * not specified, so if both value and mask are 0 then this must be
+ * treated as equivalent to a mask with all bits set. Implement that
+ * here rather than in drivers.
+ */
+static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec;
+ struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec;
+
+ if (fs->flow_type != TCP_V4_FLOW &&
+ fs->flow_type != UDP_V4_FLOW &&
+ fs->flow_type != SCTP_V4_FLOW)
+ return;
+
+ if (!(entry->ip4src | mask->ip4src))
+ mask->ip4src = htonl(0xffffffff);
+ if (!(entry->ip4dst | mask->ip4dst))
+ mask->ip4dst = htonl(0xffffffff);
+ if (!(entry->psrc | mask->psrc))
+ mask->psrc = htons(0xffff);
+ if (!(entry->pdst | mask->pdst))
+ mask->pdst = htons(0xffff);
+ if (!(entry->tos | mask->tos))
+ mask->tos = 0xff;
+ if (!(fs->vlan_tag | fs->vlan_tag_mask))
+ fs->vlan_tag_mask = 0xffff;
+ if (!(fs->data | fs->data_mask))
+ fs->data_mask = 0xffffffffffffffffULL;
+}
+
static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
void __user *useraddr)
{
@@ -437,6 +533,8 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
+ rx_ntuple_fix_masks(&cmd.fs);
+
/*
* Cache filter in dev struct for GET operation only if
* the underlying driver doesn't have its own GET operation, and
@@ -482,7 +580,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
gstrings.len = ret;
- data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+ data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
if (!data)
return -ENOMEM;
@@ -611,19 +709,19 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
break;
case IP_USER_FLOW:
sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.raw_ip4_spec.ip4src);
+ fsc->fs.h_u.usr_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.raw_ip4_spec.ip4src);
+ fsc->fs.m_u.usr_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.raw_ip4_spec.ip4dst);
+ fsc->fs.h_u.usr_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.raw_ip4_spec.ip4dst);
+ fsc->fs.m_u.usr_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
break;
@@ -719,7 +817,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
if (regs.len > reglen)
regs.len = reglen;
- regbuf = kmalloc(reglen, GFP_USER);
+ regbuf = vmalloc(reglen);
if (!regbuf)
return -ENOMEM;
@@ -734,7 +832,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
ret = 0;
out:
- kfree(regbuf);
+ vfree(regbuf);
return ret;
}
@@ -1119,8 +1217,11 @@ static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
return -EFAULT;
if (edata.data) {
- if (!dev->ethtool_ops->get_rx_csum ||
- !dev->ethtool_ops->get_rx_csum(dev))
+ u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
+ dev->ethtool_ops->get_rx_csum(dev) :
+ ethtool_op_get_rx_csum(dev);
+
+ if (!rxcsum)
return -EINVAL;
dev->features |= NETIF_F_GRO;
} else
@@ -1346,14 +1447,22 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
if (!dev || !netif_device_present(dev))
return -ENODEV;
- if (!dev->ethtool_ops)
- return -EOPNOTSUPP;
-
if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
return -EFAULT;
+ if (!dev->ethtool_ops) {
+ /* ETHTOOL_GDRVINFO does not require any driver support.
+ * It is also unprivileged and does not change anything,
+ * so we can take a shortcut to it. */
+ if (ethcmd == ETHTOOL_GDRVINFO)
+ return ethtool_get_drvinfo(dev, useraddr);
+ else
+ return -EOPNOTSUPP;
+ }
+
/* Allow some commands to be done by anyone */
switch (ethcmd) {
+ case ETHTOOL_GSET:
case ETHTOOL_GDRVINFO:
case ETHTOOL_GMSGLVL:
case ETHTOOL_GCOALESCE:
@@ -1563,6 +1672,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GSSET_INFO:
rc = ethtool_get_sset_info(dev, useraddr);
break;
+ case ETHTOOL_GRXFHINDIR:
+ rc = ethtool_get_rxfh_indir(dev, useraddr);
+ break;
+ case ETHTOOL_SRXFHINDIR:
+ rc = ethtool_set_rxfh_indir(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 42e84e08a1be..82a4369ae150 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -144,7 +144,7 @@ fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
}
EXPORT_SYMBOL_GPL(fib_rules_register);
-void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
+static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
{
struct fib_rule *rule, *tmp;
@@ -153,7 +153,6 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
fib_rule_put(rule);
}
}
-EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
static void fib_rules_put_rcu(struct rcu_head *head)
{
@@ -182,7 +181,8 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
{
int ret = 0;
- if (rule->iifindex && (rule->iifindex != fl->iif))
+ if (rule->iifindex && (rule->iifindex != fl->iif) &&
+ !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF))
goto out;
if (rule->oifindex && (rule->oifindex != fl->oif))
@@ -225,9 +225,12 @@ jumped:
err = ops->action(rule, fl, flags, arg);
if (err != -EAGAIN) {
- fib_rule_get(rule);
- arg->rule = rule;
- goto out;
+ if ((arg->flags & FIB_LOOKUP_NOREF) ||
+ likely(atomic_inc_not_zero(&rule->refcnt))) {
+ arg->rule = rule;
+ goto out;
+ }
+ break;
}
}
@@ -348,12 +351,12 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref == rule->target) {
- rule->ctarget = r;
+ RCU_INIT_POINTER(rule->ctarget, r);
break;
}
}
- if (rule->ctarget == NULL)
+ if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
unresolved = 1;
} else if (rule->action == FR_ACT_GOTO)
goto errout_free;
@@ -370,6 +373,11 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
fib_rule_get(rule);
+ if (last)
+ list_add_rcu(&rule->list, &last->list);
+ else
+ list_add_rcu(&rule->list, &ops->rules_list);
+
if (ops->unresolved_rules) {
/*
* There are unresolved goto rules in the list, check if
@@ -378,7 +386,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
list_for_each_entry(r, &ops->rules_list, list) {
if (r->action == FR_ACT_GOTO &&
r->target == rule->pref) {
- BUG_ON(r->ctarget != NULL);
+ BUG_ON(rtnl_dereference(r->ctarget) != NULL);
rcu_assign_pointer(r->ctarget, rule);
if (--ops->unresolved_rules == 0)
break;
@@ -392,11 +400,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (unresolved)
ops->unresolved_rules++;
- if (last)
- list_add_rcu(&rule->list, &last->list);
- else
- list_add_rcu(&rule->list, &ops->rules_list);
-
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -484,14 +487,13 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
*/
if (ops->nr_goto_rules > 0) {
list_for_each_entry(tmp, &ops->rules_list, list) {
- if (tmp->ctarget == rule) {
+ if (rtnl_dereference(tmp->ctarget) == rule) {
rcu_assign_pointer(tmp->ctarget, NULL);
ops->unresolved_rules++;
}
}
}
- synchronize_rcu();
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).pid);
fib_rule_put(rule);
@@ -543,7 +545,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->action = rule->action;
frh->flags = rule->flags;
- if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
+ if (rule->action == FR_ACT_GOTO &&
+ rcu_dereference_raw(rule->ctarget) == NULL)
frh->flags |= FIB_RULE_UNRESOLVED;
if (rule->iifname[0]) {
diff --git a/net/core/filter.c b/net/core/filter.c
index da69fb728d32..7beaec36b541 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -89,8 +89,8 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = sk_run_filter(skb, filter->insns,
- filter->len);
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
rcu_read_unlock_bh();
@@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
fentry = &filter[pc];
switch (fentry->code) {
- case BPF_ALU|BPF_ADD|BPF_X:
+ case BPF_S_ALU_ADD_X:
A += X;
continue;
- case BPF_ALU|BPF_ADD|BPF_K:
+ case BPF_S_ALU_ADD_K:
A += fentry->k;
continue;
- case BPF_ALU|BPF_SUB|BPF_X:
+ case BPF_S_ALU_SUB_X:
A -= X;
continue;
- case BPF_ALU|BPF_SUB|BPF_K:
+ case BPF_S_ALU_SUB_K:
A -= fentry->k;
continue;
- case BPF_ALU|BPF_MUL|BPF_X:
+ case BPF_S_ALU_MUL_X:
A *= X;
continue;
- case BPF_ALU|BPF_MUL|BPF_K:
+ case BPF_S_ALU_MUL_K:
A *= fentry->k;
continue;
- case BPF_ALU|BPF_DIV|BPF_X:
+ case BPF_S_ALU_DIV_X:
if (X == 0)
return 0;
A /= X;
continue;
- case BPF_ALU|BPF_DIV|BPF_K:
+ case BPF_S_ALU_DIV_K:
A /= fentry->k;
continue;
- case BPF_ALU|BPF_AND|BPF_X:
+ case BPF_S_ALU_AND_X:
A &= X;
continue;
- case BPF_ALU|BPF_AND|BPF_K:
+ case BPF_S_ALU_AND_K:
A &= fentry->k;
continue;
- case BPF_ALU|BPF_OR|BPF_X:
+ case BPF_S_ALU_OR_X:
A |= X;
continue;
- case BPF_ALU|BPF_OR|BPF_K:
+ case BPF_S_ALU_OR_K:
A |= fentry->k;
continue;
- case BPF_ALU|BPF_LSH|BPF_X:
+ case BPF_S_ALU_LSH_X:
A <<= X;
continue;
- case BPF_ALU|BPF_LSH|BPF_K:
+ case BPF_S_ALU_LSH_K:
A <<= fentry->k;
continue;
- case BPF_ALU|BPF_RSH|BPF_X:
+ case BPF_S_ALU_RSH_X:
A >>= X;
continue;
- case BPF_ALU|BPF_RSH|BPF_K:
+ case BPF_S_ALU_RSH_K:
A >>= fentry->k;
continue;
- case BPF_ALU|BPF_NEG:
+ case BPF_S_ALU_NEG:
A = -A;
continue;
- case BPF_JMP|BPF_JA:
+ case BPF_S_JMP_JA:
pc += fentry->k;
continue;
- case BPF_JMP|BPF_JGT|BPF_K:
+ case BPF_S_JMP_JGT_K:
pc += (A > fentry->k) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JGE|BPF_K:
+ case BPF_S_JMP_JGE_K:
pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JEQ|BPF_K:
+ case BPF_S_JMP_JEQ_K:
pc += (A == fentry->k) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JSET|BPF_K:
+ case BPF_S_JMP_JSET_K:
pc += (A & fentry->k) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JGT|BPF_X:
+ case BPF_S_JMP_JGT_X:
pc += (A > X) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JGE|BPF_X:
+ case BPF_S_JMP_JGE_X:
pc += (A >= X) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JEQ|BPF_X:
+ case BPF_S_JMP_JEQ_X:
pc += (A == X) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JSET|BPF_X:
+ case BPF_S_JMP_JSET_X:
pc += (A & X) ? fentry->jt : fentry->jf;
continue;
- case BPF_LD|BPF_W|BPF_ABS:
+ case BPF_S_LD_W_ABS:
k = fentry->k;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
@@ -217,7 +217,7 @@ load_w:
continue;
}
break;
- case BPF_LD|BPF_H|BPF_ABS:
+ case BPF_S_LD_H_ABS:
k = fentry->k;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
@@ -226,7 +226,7 @@ load_h:
continue;
}
break;
- case BPF_LD|BPF_B|BPF_ABS:
+ case BPF_S_LD_B_ABS:
k = fentry->k;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
@@ -235,54 +235,54 @@ load_b:
continue;
}
break;
- case BPF_LD|BPF_W|BPF_LEN:
+ case BPF_S_LD_W_LEN:
A = skb->len;
continue;
- case BPF_LDX|BPF_W|BPF_LEN:
+ case BPF_S_LDX_W_LEN:
X = skb->len;
continue;
- case BPF_LD|BPF_W|BPF_IND:
+ case BPF_S_LD_W_IND:
k = X + fentry->k;
goto load_w;
- case BPF_LD|BPF_H|BPF_IND:
+ case BPF_S_LD_H_IND:
k = X + fentry->k;
goto load_h;
- case BPF_LD|BPF_B|BPF_IND:
+ case BPF_S_LD_B_IND:
k = X + fentry->k;
goto load_b;
- case BPF_LDX|BPF_B|BPF_MSH:
+ case BPF_S_LDX_B_MSH:
ptr = load_pointer(skb, fentry->k, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
- case BPF_LD|BPF_IMM:
+ case BPF_S_LD_IMM:
A = fentry->k;
continue;
- case BPF_LDX|BPF_IMM:
+ case BPF_S_LDX_IMM:
X = fentry->k;
continue;
- case BPF_LD|BPF_MEM:
+ case BPF_S_LD_MEM:
A = mem[fentry->k];
continue;
- case BPF_LDX|BPF_MEM:
+ case BPF_S_LDX_MEM:
X = mem[fentry->k];
continue;
- case BPF_MISC|BPF_TAX:
+ case BPF_S_MISC_TAX:
X = A;
continue;
- case BPF_MISC|BPF_TXA:
+ case BPF_S_MISC_TXA:
A = X;
continue;
- case BPF_RET|BPF_K:
+ case BPF_S_RET_K:
return fentry->k;
- case BPF_RET|BPF_A:
+ case BPF_S_RET_A:
return A;
- case BPF_ST:
+ case BPF_S_ST:
mem[fentry->k] = A;
continue;
- case BPF_STX:
+ case BPF_S_STX:
mem[fentry->k] = X;
continue;
default:
@@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
/* Only allow valid instructions */
switch (ftest->code) {
case BPF_ALU|BPF_ADD|BPF_K:
+ ftest->code = BPF_S_ALU_ADD_K;
+ break;
case BPF_ALU|BPF_ADD|BPF_X:
+ ftest->code = BPF_S_ALU_ADD_X;
+ break;
case BPF_ALU|BPF_SUB|BPF_K:
+ ftest->code = BPF_S_ALU_SUB_K;
+ break;
case BPF_ALU|BPF_SUB|BPF_X:
+ ftest->code = BPF_S_ALU_SUB_X;
+ break;
case BPF_ALU|BPF_MUL|BPF_K:
+ ftest->code = BPF_S_ALU_MUL_K;
+ break;
case BPF_ALU|BPF_MUL|BPF_X:
+ ftest->code = BPF_S_ALU_MUL_X;
+ break;
case BPF_ALU|BPF_DIV|BPF_X:
+ ftest->code = BPF_S_ALU_DIV_X;
+ break;
case BPF_ALU|BPF_AND|BPF_K:
+ ftest->code = BPF_S_ALU_AND_K;
+ break;
case BPF_ALU|BPF_AND|BPF_X:
+ ftest->code = BPF_S_ALU_AND_X;
+ break;
case BPF_ALU|BPF_OR|BPF_K:
+ ftest->code = BPF_S_ALU_OR_K;
+ break;
case BPF_ALU|BPF_OR|BPF_X:
+ ftest->code = BPF_S_ALU_OR_X;
+ break;
case BPF_ALU|BPF_LSH|BPF_K:
+ ftest->code = BPF_S_ALU_LSH_K;
+ break;
case BPF_ALU|BPF_LSH|BPF_X:
+ ftest->code = BPF_S_ALU_LSH_X;
+ break;
case BPF_ALU|BPF_RSH|BPF_K:
+ ftest->code = BPF_S_ALU_RSH_K;
+ break;
case BPF_ALU|BPF_RSH|BPF_X:
+ ftest->code = BPF_S_ALU_RSH_X;
+ break;
case BPF_ALU|BPF_NEG:
+ ftest->code = BPF_S_ALU_NEG;
+ break;
case BPF_LD|BPF_W|BPF_ABS:
+ ftest->code = BPF_S_LD_W_ABS;
+ break;
case BPF_LD|BPF_H|BPF_ABS:
+ ftest->code = BPF_S_LD_H_ABS;
+ break;
case BPF_LD|BPF_B|BPF_ABS:
+ ftest->code = BPF_S_LD_B_ABS;
+ break;
case BPF_LD|BPF_W|BPF_LEN:
+ ftest->code = BPF_S_LD_W_LEN;
+ break;
case BPF_LD|BPF_W|BPF_IND:
+ ftest->code = BPF_S_LD_W_IND;
+ break;
case BPF_LD|BPF_H|BPF_IND:
+ ftest->code = BPF_S_LD_H_IND;
+ break;
case BPF_LD|BPF_B|BPF_IND:
+ ftest->code = BPF_S_LD_B_IND;
+ break;
case BPF_LD|BPF_IMM:
+ ftest->code = BPF_S_LD_IMM;
+ break;
case BPF_LDX|BPF_W|BPF_LEN:
+ ftest->code = BPF_S_LDX_W_LEN;
+ break;
case BPF_LDX|BPF_B|BPF_MSH:
+ ftest->code = BPF_S_LDX_B_MSH;
+ break;
case BPF_LDX|BPF_IMM:
+ ftest->code = BPF_S_LDX_IMM;
+ break;
case BPF_MISC|BPF_TAX:
+ ftest->code = BPF_S_MISC_TAX;
+ break;
case BPF_MISC|BPF_TXA:
+ ftest->code = BPF_S_MISC_TXA;
+ break;
case BPF_RET|BPF_K:
+ ftest->code = BPF_S_RET_K;
+ break;
case BPF_RET|BPF_A:
+ ftest->code = BPF_S_RET_A;
break;
/* Some instructions need special checks */
- case BPF_ALU|BPF_DIV|BPF_K:
/* check for division by zero */
+ case BPF_ALU|BPF_DIV|BPF_K:
if (ftest->k == 0)
return -EINVAL;
+ ftest->code = BPF_S_ALU_DIV_K;
break;
+ /* check for invalid memory addresses */
case BPF_LD|BPF_MEM:
+ if (ftest->k >= BPF_MEMWORDS)
+ return -EINVAL;
+ ftest->code = BPF_S_LD_MEM;
+ break;
case BPF_LDX|BPF_MEM:
+ if (ftest->k >= BPF_MEMWORDS)
+ return -EINVAL;
+ ftest->code = BPF_S_LDX_MEM;
+ break;
case BPF_ST:
+ if (ftest->k >= BPF_MEMWORDS)
+ return -EINVAL;
+ ftest->code = BPF_S_ST;
+ break;
case BPF_STX:
- /* check for invalid memory addresses */
if (ftest->k >= BPF_MEMWORDS)
return -EINVAL;
+ ftest->code = BPF_S_STX;
break;
case BPF_JMP|BPF_JA:
@@ -447,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
*/
if (ftest->k >= (unsigned)(flen-pc-1))
return -EINVAL;
+ ftest->code = BPF_S_JMP_JA;
break;
case BPF_JMP|BPF_JEQ|BPF_K:
+ ftest->code = BPF_S_JMP_JEQ_K;
+ break;
case BPF_JMP|BPF_JEQ|BPF_X:
+ ftest->code = BPF_S_JMP_JEQ_X;
+ break;
case BPF_JMP|BPF_JGE|BPF_K:
+ ftest->code = BPF_S_JMP_JGE_K;
+ break;
case BPF_JMP|BPF_JGE|BPF_X:
+ ftest->code = BPF_S_JMP_JGE_X;
+ break;
case BPF_JMP|BPF_JGT|BPF_K:
+ ftest->code = BPF_S_JMP_JGT_K;
+ break;
case BPF_JMP|BPF_JGT|BPF_X:
+ ftest->code = BPF_S_JMP_JGT_X;
+ break;
case BPF_JMP|BPF_JSET|BPF_K:
+ ftest->code = BPF_S_JMP_JSET_K;
+ break;
case BPF_JMP|BPF_JSET|BPF_X:
+ ftest->code = BPF_S_JMP_JSET_X;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
/* for conditionals both must be safe */
+ switch (ftest->code) {
+ case BPF_S_JMP_JEQ_K:
+ case BPF_S_JMP_JEQ_X:
+ case BPF_S_JMP_JGE_K:
+ case BPF_S_JMP_JGE_X:
+ case BPF_S_JMP_JGT_K:
+ case BPF_S_JMP_JGT_X:
+ case BPF_S_JMP_JSET_X:
+ case BPF_S_JMP_JSET_K:
if (pc + ftest->jt + 1 >= flen ||
pc + ftest->jf + 1 >= flen)
return -EINVAL;
- break;
+ }
+ }
+ /* last instruction must be a RET code */
+ switch (filter[flen - 1].code) {
+ case BPF_S_RET_K:
+ case BPF_S_RET_A:
+ return 0;
+ break;
default:
return -EINVAL;
}
- }
-
- return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);
@@ -528,10 +638,9 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
return err;
}
- rcu_read_lock_bh();
- old_fp = rcu_dereference_bh(sk->sk_filter);
+ old_fp = rcu_dereference_protected(sk->sk_filter,
+ sock_owned_by_user(sk));
rcu_assign_pointer(sk->sk_filter, fp);
- rcu_read_unlock_bh();
if (old_fp)
sk_filter_delayed_uncharge(sk, old_fp);
@@ -544,14 +653,13 @@ int sk_detach_filter(struct sock *sk)
int ret = -ENOENT;
struct sk_filter *filter;
- rcu_read_lock_bh();
- filter = rcu_dereference_bh(sk->sk_filter);
+ filter = rcu_dereference_protected(sk->sk_filter,
+ sock_owned_by_user(sk));
if (filter) {
rcu_assign_pointer(sk->sk_filter, NULL);
sk_filter_delayed_uncharge(sk, filter);
ret = 0;
}
- rcu_read_unlock_bh();
return ret;
}
EXPORT_SYMBOL_GPL(sk_detach_filter);
diff --git a/net/core/flow.c b/net/core/flow.c
index 161900674009..127c8a7ffd61 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -53,8 +53,7 @@ struct flow_flush_info {
struct flow_cache {
u32 hash_shift;
- unsigned long order;
- struct flow_cache_percpu *percpu;
+ struct flow_cache_percpu __percpu *percpu;
struct notifier_block hotcpu_notifier;
int low_watermark;
int high_watermark;
@@ -62,8 +61,9 @@ struct flow_cache {
};
atomic_t flow_cache_genid = ATOMIC_INIT(0);
+EXPORT_SYMBOL(flow_cache_genid);
static struct flow_cache flow_cache_global;
-static struct kmem_cache *flow_cachep;
+static struct kmem_cache *flow_cachep __read_mostly;
static DEFINE_SPINLOCK(flow_cache_gc_lock);
static LIST_HEAD(flow_cache_gc_list);
@@ -176,15 +176,11 @@ static u32 flow_hash_code(struct flow_cache *fc,
{
u32 *k = (u32 *) key;
- return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
- & (flow_cache_hash_size(fc) - 1));
+ return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
+ & (flow_cache_hash_size(fc) - 1);
}
-#if (BITS_PER_LONG == 64)
-typedef u64 flow_compare_t;
-#else
-typedef u32 flow_compare_t;
-#endif
+typedef unsigned long flow_compare_t;
/* I hear what you're saying, use memcmp. But memcmp cannot make
* important assumptions that we can here, such as alignment and
@@ -222,7 +218,7 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
unsigned int hash;
local_bh_disable();
- fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+ fcp = this_cpu_ptr(fc->percpu);
fle = NULL;
flo = NULL;
@@ -291,6 +287,7 @@ ret_object:
local_bh_enable();
return flo;
}
+EXPORT_SYMBOL(flow_cache_lookup);
static void flow_cache_flush_tasklet(unsigned long data)
{
@@ -302,7 +299,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
LIST_HEAD(gc_list);
int i, deleted = 0;
- fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+ fcp = this_cpu_ptr(fc->percpu);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
hlist_for_each_entry_safe(fle, entry, tmp,
&fcp->hash_table[i], u.hlist) {
@@ -355,62 +352,73 @@ void flow_cache_flush(void)
put_online_cpus();
}
-static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
- struct flow_cache_percpu *fcp)
+static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
{
- fcp->hash_table = (struct hlist_head *)
- __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
- if (!fcp->hash_table)
- panic("NET: failed to allocate flow cache order %lu\n", fc->order);
+ struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
+ size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
- fcp->hash_rnd_recalc = 1;
- fcp->hash_count = 0;
- tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
+ if (!fcp->hash_table) {
+ fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
+ if (!fcp->hash_table) {
+ pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
+ return -ENOMEM;
+ }
+ fcp->hash_rnd_recalc = 1;
+ fcp->hash_count = 0;
+ tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
+ }
+ return 0;
}
-static int flow_cache_cpu(struct notifier_block *nfb,
+static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
- int cpu = (unsigned long) hcpu;
+ int res, cpu = (unsigned long) hcpu;
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ res = flow_cache_cpu_prepare(fc, cpu);
+ if (res)
+ return notifier_from_errno(res);
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
__flow_cache_shrink(fc, fcp, 0);
+ break;
+ }
return NOTIFY_OK;
}
-static int flow_cache_init(struct flow_cache *fc)
+static int __init flow_cache_init(struct flow_cache *fc)
{
- unsigned long order;
int i;
fc->hash_shift = 10;
fc->low_watermark = 2 * flow_cache_hash_size(fc);
fc->high_watermark = 4 * flow_cache_hash_size(fc);
- for (order = 0;
- (PAGE_SIZE << order) <
- (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
- order++)
- /* NOTHING */;
- fc->order = order;
fc->percpu = alloc_percpu(struct flow_cache_percpu);
+ if (!fc->percpu)
+ return -ENOMEM;
- setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
- (unsigned long) fc);
- fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
- add_timer(&fc->rnd_timer);
-
- for_each_possible_cpu(i)
- flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
-
+ for_each_online_cpu(i) {
+ if (flow_cache_cpu_prepare(fc, i))
+ return -ENOMEM;
+ }
fc->hotcpu_notifier = (struct notifier_block){
.notifier_call = flow_cache_cpu,
};
register_hotcpu_notifier(&fc->hotcpu_notifier);
+ setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
+ (unsigned long) fc);
+ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+ add_timer(&fc->rnd_timer);
+
return 0;
}
@@ -424,6 +432,3 @@ static int __init flow_cache_init_global(void)
}
module_init(flow_cache_init_global);
-
-EXPORT_SYMBOL(flow_cache_genid);
-EXPORT_SYMBOL(flow_cache_lookup);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 785e5276a300..7c2373321b74 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -232,7 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
est->last_packets = bstats->packets;
est->avpps = rate_est->pps<<10;
- spin_lock(&est_tree_lock);
+ spin_lock_bh(&est_tree_lock);
if (!elist[idx].timer.function) {
INIT_LIST_HEAD(&elist[idx].list);
setup_timer(&elist[idx].timer, est_timer, idx);
@@ -243,7 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
list_add_rcu(&est->list, &elist[idx].list);
gen_add_node(est);
- spin_unlock(&est_tree_lock);
+ spin_unlock_bh(&est_tree_lock);
return 0;
}
@@ -263,24 +263,25 @@ static void __gen_kill_estimator(struct rcu_head *head)
*
* Removes the rate estimator specified by &bstats and &rate_est.
*
+ * Note : Caller should respect an RCU grace period before freeing stats_lock
*/
void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_rate_est *rate_est)
{
struct gen_estimator *e;
- spin_lock(&est_tree_lock);
+ spin_lock_bh(&est_tree_lock);
while ((e = gen_find_node(bstats, rate_est))) {
rb_erase(&e->node, &est_root);
- write_lock_bh(&est_lock);
+ write_lock(&est_lock);
e->bstats = NULL;
- write_unlock_bh(&est_lock);
+ write_unlock(&est_lock);
list_del_rcu(&e->list);
call_rcu(&e->e_rcu, __gen_kill_estimator);
}
- spin_unlock(&est_tree_lock);
+ spin_unlock_bh(&est_tree_lock);
}
EXPORT_SYMBOL(gen_kill_estimator);
@@ -319,9 +320,9 @@ bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
ASSERT_RTNL();
- spin_lock(&est_tree_lock);
+ spin_lock_bh(&est_tree_lock);
res = gen_find_node(bstats, rate_est) != NULL;
- spin_unlock(&est_tree_lock);
+ spin_unlock_bh(&est_tree_lock);
return res;
}
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 393b1d8618e2..0452eb27a272 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -73,6 +73,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
return 0;
}
+EXPORT_SYMBOL(gnet_stats_start_copy_compat);
/**
* gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
@@ -93,6 +94,7 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
{
return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
}
+EXPORT_SYMBOL(gnet_stats_start_copy);
/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
@@ -123,6 +125,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
}
return 0;
}
+EXPORT_SYMBOL(gnet_stats_copy_basic);
/**
* gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
@@ -154,6 +157,7 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
return 0;
}
+EXPORT_SYMBOL(gnet_stats_copy_rate_est);
/**
* gnet_stats_copy_queue - copy queue statistics into statistics TLV
@@ -181,6 +185,7 @@ gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
return 0;
}
+EXPORT_SYMBOL(gnet_stats_copy_queue);
/**
* gnet_stats_copy_app - copy application specific statistics into statistics TLV
@@ -208,6 +213,7 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
return 0;
}
+EXPORT_SYMBOL(gnet_stats_copy_app);
/**
* gnet_stats_finish_copy - finish dumping procedure
@@ -241,12 +247,4 @@ gnet_stats_finish_copy(struct gnet_dump *d)
spin_unlock_bh(d->lock);
return 0;
}
-
-
-EXPORT_SYMBOL(gnet_stats_start_copy);
-EXPORT_SYMBOL(gnet_stats_start_copy_compat);
-EXPORT_SYMBOL(gnet_stats_copy_basic);
-EXPORT_SYMBOL(gnet_stats_copy_rate_est);
-EXPORT_SYMBOL(gnet_stats_copy_queue);
-EXPORT_SYMBOL(gnet_stats_copy_app);
EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 1e7f4e91a935..c40f27e7d208 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -37,11 +37,13 @@
int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
{
- int size, err, ct;
+ int size, ct, err;
if (m->msg_namelen) {
if (mode == VERIFY_READ) {
- err = move_addr_to_kernel(m->msg_name, m->msg_namelen,
+ void __user *namep;
+ namep = (void __user __force *) m->msg_name;
+ err = move_addr_to_kernel(namep, m->msg_namelen,
address);
if (err < 0)
return err;
@@ -52,21 +54,20 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
}
size = m->msg_iovlen * sizeof(struct iovec);
- if (copy_from_user(iov, m->msg_iov, size))
+ if (copy_from_user(iov, (void __user __force *) m->msg_iov, size))
return -EFAULT;
m->msg_iov = iov;
err = 0;
for (ct = 0; ct < m->msg_iovlen; ct++) {
- err += iov[ct].iov_len;
- /*
- * Goal is not to verify user data, but to prevent returning
- * negative value, which is interpreted as errno.
- * Overflow is still possible, but it is harmless.
- */
- if (err < 0)
- return -EMSGSIZE;
+ size_t len = iov[ct].iov_len;
+
+ if (len > INT_MAX - err) {
+ len = INT_MAX - err;
+ iov[ct].iov_len = len;
+ }
+ err += len;
}
return err;
@@ -95,6 +96,7 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
return 0;
}
+EXPORT_SYMBOL(memcpy_toiovec);
/*
* Copy kernel to iovec. Returns -EFAULT on error.
@@ -120,6 +122,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
return 0;
}
+EXPORT_SYMBOL(memcpy_toiovecend);
/*
* Copy iovec to kernel. Returns -EFAULT on error.
@@ -144,6 +147,7 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
return 0;
}
+EXPORT_SYMBOL(memcpy_fromiovec);
/*
* Copy iovec from kernel. Returns -EFAULT on error.
@@ -172,6 +176,7 @@ int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
return 0;
}
+EXPORT_SYMBOL(memcpy_fromiovecend);
/*
* And now for the all-in-one: copy and checksum from a user iovec
@@ -256,9 +261,4 @@ out_fault:
err = -EFAULT;
goto out;
}
-
EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
-EXPORT_SYMBOL(memcpy_fromiovec);
-EXPORT_SYMBOL(memcpy_fromiovecend);
-EXPORT_SYMBOL(memcpy_toiovec);
-EXPORT_SYMBOL(memcpy_toiovecend);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bdbce2f5875b..01a1101b5936 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -243,5 +243,4 @@ void linkwatch_fire_event(struct net_device *dev)
linkwatch_schedule_work(urgent);
}
-
EXPORT_SYMBOL(linkwatch_fire_event);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a4e0a7482c2b..8cc8f9a79db9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,7 +122,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
unsigned long neigh_rand_reach_time(unsigned long base)
{
- return (base ? (net_random() % base) + (base >> 1) : 0);
+ return base ? (net_random() % base) + (base >> 1) : 0;
}
EXPORT_SYMBOL(neigh_rand_reach_time);
@@ -131,15 +131,20 @@ static int neigh_forced_gc(struct neigh_table *tbl)
{
int shrunk = 0;
int i;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
write_lock_bh(&tbl->lock);
- for (i = 0; i <= tbl->hash_mask; i++) {
- struct neighbour *n, **np;
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+ for (i = 0; i <= nht->hash_mask; i++) {
+ struct neighbour *n;
+ struct neighbour __rcu **np;
- np = &tbl->hash_buckets[i];
- while ((n = *np) != NULL) {
+ np = &nht->hash_buckets[i];
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
/* Neighbour record may be discarded if:
* - nobody refers to it.
* - it is not permanent
@@ -147,7 +152,9 @@ static int neigh_forced_gc(struct neigh_table *tbl)
write_lock(&n->lock);
if (atomic_read(&n->refcnt) == 1 &&
!(n->nud_state & NUD_PERMANENT)) {
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
n->dead = 1;
shrunk = 1;
write_unlock(&n->lock);
@@ -199,16 +206,24 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
{
int i;
+ struct neigh_hash_table *nht;
- for (i = 0; i <= tbl->hash_mask; i++) {
- struct neighbour *n, **np = &tbl->hash_buckets[i];
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
- while ((n = *np) != NULL) {
+ for (i = 0; i <= nht->hash_mask; i++) {
+ struct neighbour *n;
+ struct neighbour __rcu **np = &nht->hash_buckets[i];
+
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
if (dev && n->dev != dev) {
np = &n->next;
continue;
}
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
write_lock(&n->lock);
neigh_del_timer(n);
n->dead = 1;
@@ -279,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
skb_queue_head_init(&n->arp_queue);
rwlock_init(&n->lock);
+ seqlock_init(&n->ha_lock);
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
@@ -297,64 +313,86 @@ out_entries:
goto out;
}
-static struct neighbour **neigh_hash_alloc(unsigned int entries)
+static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
{
- unsigned long size = entries * sizeof(struct neighbour *);
- struct neighbour **ret;
+ size_t size = entries * sizeof(struct neighbour *);
+ struct neigh_hash_table *ret;
+ struct neighbour **buckets;
- if (size <= PAGE_SIZE) {
- ret = kzalloc(size, GFP_ATOMIC);
- } else {
- ret = (struct neighbour **)
- __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
+ ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
+ if (!ret)
+ return NULL;
+ if (size <= PAGE_SIZE)
+ buckets = kzalloc(size, GFP_ATOMIC);
+ else
+ buckets = (struct neighbour **)
+ __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
+ get_order(size));
+ if (!buckets) {
+ kfree(ret);
+ return NULL;
}
+ rcu_assign_pointer(ret->hash_buckets, buckets);
+ ret->hash_mask = entries - 1;
+ get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
return ret;
}
-static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
+static void neigh_hash_free_rcu(struct rcu_head *head)
{
- unsigned long size = entries * sizeof(struct neighbour *);
+ struct neigh_hash_table *nht = container_of(head,
+ struct neigh_hash_table,
+ rcu);
+ size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
+ struct neighbour **buckets = nht->hash_buckets;
if (size <= PAGE_SIZE)
- kfree(hash);
+ kfree(buckets);
else
- free_pages((unsigned long)hash, get_order(size));
+ free_pages((unsigned long)buckets, get_order(size));
+ kfree(nht);
}
-static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
+static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
+ unsigned long new_entries)
{
- struct neighbour **new_hash, **old_hash;
- unsigned int i, new_hash_mask, old_entries;
+ unsigned int i, hash;
+ struct neigh_hash_table *new_nht, *old_nht;
NEIGH_CACHE_STAT_INC(tbl, hash_grows);
BUG_ON(!is_power_of_2(new_entries));
- new_hash = neigh_hash_alloc(new_entries);
- if (!new_hash)
- return;
-
- old_entries = tbl->hash_mask + 1;
- new_hash_mask = new_entries - 1;
- old_hash = tbl->hash_buckets;
+ old_nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+ new_nht = neigh_hash_alloc(new_entries);
+ if (!new_nht)
+ return old_nht;
- get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
- for (i = 0; i < old_entries; i++) {
+ for (i = 0; i <= old_nht->hash_mask; i++) {
struct neighbour *n, *next;
- for (n = old_hash[i]; n; n = next) {
- unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
-
- hash_val &= new_hash_mask;
- next = n->next;
-
- n->next = new_hash[hash_val];
- new_hash[hash_val] = n;
+ for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
+ lockdep_is_held(&tbl->lock));
+ n != NULL;
+ n = next) {
+ hash = tbl->hash(n->primary_key, n->dev,
+ new_nht->hash_rnd);
+
+ hash &= new_nht->hash_mask;
+ next = rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock));
+
+ rcu_assign_pointer(n->next,
+ rcu_dereference_protected(
+ new_nht->hash_buckets[hash],
+ lockdep_is_held(&tbl->lock)));
+ rcu_assign_pointer(new_nht->hash_buckets[hash], n);
}
}
- tbl->hash_buckets = new_hash;
- tbl->hash_mask = new_hash_mask;
- neigh_hash_free(old_hash, old_entries);
+ rcu_assign_pointer(tbl->nht, new_nht);
+ call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
+ return new_nht;
}
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
@@ -363,19 +401,26 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
struct neighbour *n;
int key_len = tbl->key_len;
u32 hash_val;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, lookups);
- read_lock_bh(&tbl->lock);
- hash_val = tbl->hash(pkey, dev);
- for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+ hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
+
+ for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+ n != NULL;
+ n = rcu_dereference_bh(n->next)) {
if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
- neigh_hold(n);
+ if (!atomic_inc_not_zero(&n->refcnt))
+ n = NULL;
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
}
}
- read_unlock_bh(&tbl->lock);
+
+ rcu_read_unlock_bh();
return n;
}
EXPORT_SYMBOL(neigh_lookup);
@@ -386,20 +431,27 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
struct neighbour *n;
int key_len = tbl->key_len;
u32 hash_val;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, lookups);
- read_lock_bh(&tbl->lock);
- hash_val = tbl->hash(pkey, NULL);
- for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+ hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
+
+ for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+ n != NULL;
+ n = rcu_dereference_bh(n->next)) {
if (!memcmp(n->primary_key, pkey, key_len) &&
net_eq(dev_net(n->dev), net)) {
- neigh_hold(n);
+ if (!atomic_inc_not_zero(&n->refcnt))
+ n = NULL;
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
}
}
- read_unlock_bh(&tbl->lock);
+
+ rcu_read_unlock_bh();
return n;
}
EXPORT_SYMBOL(neigh_lookup_nodev);
@@ -411,6 +463,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
int key_len = tbl->key_len;
int error;
struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+ struct neigh_hash_table *nht;
if (!n) {
rc = ERR_PTR(-ENOBUFS);
@@ -437,18 +490,24 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
write_lock_bh(&tbl->lock);
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
- if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
- neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
+ if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
+ nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
- hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+ hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
goto out_tbl_unlock;
}
- for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
+ for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
+ lockdep_is_held(&tbl->lock));
+ n1 != NULL;
+ n1 = rcu_dereference_protected(n1->next,
+ lockdep_is_held(&tbl->lock))) {
if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
neigh_hold(n1);
rc = n1;
@@ -456,10 +515,12 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
}
}
- n->next = tbl->hash_buckets[hash_val];
- tbl->hash_buckets[hash_val] = n;
n->dead = 0;
neigh_hold(n);
+ rcu_assign_pointer(n->next,
+ rcu_dereference_protected(nht->hash_buckets[hash_val],
+ lockdep_is_held(&tbl->lock)));
+ rcu_assign_pointer(nht->hash_buckets[hash_val], n);
write_unlock_bh(&tbl->lock);
NEIGH_PRINTK2("neigh %p is created.\n", n);
rc = n;
@@ -616,6 +677,12 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
neigh_parms_destroy(parms);
}
+static void neigh_destroy_rcu(struct rcu_head *head)
+{
+ struct neighbour *neigh = container_of(head, struct neighbour, rcu);
+
+ kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+}
/*
* neighbour must already be out of the table;
*
@@ -643,8 +710,7 @@ void neigh_destroy(struct neighbour *neigh)
write_seqlock_bh(&hh->hh_lock);
hh->hh_output = neigh_blackhole;
write_sequnlock_bh(&hh->hh_lock);
- if (atomic_dec_and_test(&hh->hh_refcnt))
- kfree(hh);
+ hh_cache_put(hh);
}
skb_queue_purge(&neigh->arp_queue);
@@ -655,7 +721,7 @@ void neigh_destroy(struct neighbour *neigh)
NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
atomic_dec(&neigh->tbl->entries);
- kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+ call_rcu(&neigh->rcu, neigh_destroy_rcu);
}
EXPORT_SYMBOL(neigh_destroy);
@@ -696,12 +762,16 @@ static void neigh_connect(struct neighbour *neigh)
static void neigh_periodic_work(struct work_struct *work)
{
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
- struct neighbour *n, **np;
+ struct neighbour *n;
+ struct neighbour __rcu **np;
unsigned int i;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
write_lock_bh(&tbl->lock);
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
/*
* periodically recompute ReachableTime from random function
@@ -715,10 +785,11 @@ static void neigh_periodic_work(struct work_struct *work)
neigh_rand_reach_time(p->base_reachable_time);
}
- for (i = 0 ; i <= tbl->hash_mask; i++) {
- np = &tbl->hash_buckets[i];
+ for (i = 0 ; i <= nht->hash_mask; i++) {
+ np = &nht->hash_buckets[i];
- while ((n = *np) != NULL) {
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
unsigned int state;
write_lock(&n->lock);
@@ -766,9 +837,9 @@ next_elt:
static __inline__ int neigh_max_probes(struct neighbour *n)
{
struct neigh_parms *p = n->parms;
- return (n->nud_state & NUD_PROBE ?
+ return (n->nud_state & NUD_PROBE) ?
p->ucast_probes :
- p->ucast_probes + p->app_probes + p->mcast_probes);
+ p->ucast_probes + p->app_probes + p->mcast_probes;
}
static void neigh_invalidate(struct neighbour *neigh)
@@ -945,7 +1016,7 @@ out_unlock_bh:
}
EXPORT_SYMBOL(__neigh_event_send);
-static void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(const struct neighbour *neigh)
{
struct hh_cache *hh;
void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1081,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
}
if (lladdr != neigh->ha) {
+ write_seqlock(&neigh->ha_lock);
memcpy(&neigh->ha, lladdr, dev->addr_len);
+ write_sequnlock(&neigh->ha_lock);
neigh_update_hhs(neigh);
if (!(new & NUD_CONNECTED))
neigh->confirmed = jiffies -
@@ -1139,44 +1212,73 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
}
EXPORT_SYMBOL(neigh_event_ns);
+static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
+ __be16 protocol)
+{
+ struct hh_cache *hh;
+
+ smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
+ for (hh = n->hh; hh; hh = hh->hh_next) {
+ if (hh->hh_type == protocol) {
+ atomic_inc(&hh->hh_refcnt);
+ if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
+ hh_cache_put(hh);
+ return true;
+ }
+ }
+ return false;
+}
+
+/* called with read_lock_bh(&n->lock); */
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
__be16 protocol)
{
struct hh_cache *hh;
struct net_device *dev = dst->dev;
- for (hh = n->hh; hh; hh = hh->hh_next)
- if (hh->hh_type == protocol)
- break;
+ if (likely(neigh_hh_lookup(n, dst, protocol)))
+ return;
- if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
- seqlock_init(&hh->hh_lock);
- hh->hh_type = protocol;
- atomic_set(&hh->hh_refcnt, 0);
- hh->hh_next = NULL;
+ /* slow path */
+ hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
+ if (!hh)
+ return;
- if (dev->header_ops->cache(n, hh)) {
- kfree(hh);
- hh = NULL;
- } else {
- atomic_inc(&hh->hh_refcnt);
- hh->hh_next = n->hh;
- n->hh = hh;
- if (n->nud_state & NUD_CONNECTED)
- hh->hh_output = n->ops->hh_output;
- else
- hh->hh_output = n->ops->output;
- }
+ seqlock_init(&hh->hh_lock);
+ hh->hh_type = protocol;
+ atomic_set(&hh->hh_refcnt, 2);
+
+ if (dev->header_ops->cache(n, hh)) {
+ kfree(hh);
+ return;
}
- if (hh) {
- atomic_inc(&hh->hh_refcnt);
- dst->hh = hh;
+
+ write_lock_bh(&n->lock);
+
+ /* must check if another thread already did the insert */
+ if (neigh_hh_lookup(n, dst, protocol)) {
+ kfree(hh);
+ goto end;
}
+
+ if (n->nud_state & NUD_CONNECTED)
+ hh->hh_output = n->ops->hh_output;
+ else
+ hh->hh_output = n->ops->output;
+
+ hh->hh_next = n->hh;
+ smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
+ n->hh = hh;
+
+ if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
+ hh_cache_put(hh);
+end:
+ write_unlock_bh(&n->lock);
}
/* This function can be used in contexts, where only old dev_queue_xmit
- worked, f.e. if you want to override normal output path (eql, shaper),
- but resolution is not made yet.
+ * worked, f.e. if you want to override normal output path (eql, shaper),
+ * but resolution is not made yet.
*/
int neigh_compat_output(struct sk_buff *skb)
@@ -1210,19 +1312,19 @@ int neigh_resolve_output(struct sk_buff *skb)
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
- if (dev->header_ops->cache && !dst->hh) {
- write_lock_bh(&neigh->lock);
- if (!dst->hh)
- neigh_hh_init(neigh, dst, dst->ops->protocol);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
- write_unlock_bh(&neigh->lock);
- } else {
- read_lock_bh(&neigh->lock);
+ unsigned int seq;
+
+ if (dev->header_ops->cache &&
+ !dst->hh &&
+ !(dst->flags & DST_NOCACHE))
+ neigh_hh_init(neigh, dst, dst->ops->protocol);
+
+ do {
+ seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
- read_unlock_bh(&neigh->lock);
- }
+ } while (read_seqretry(&neigh->ha_lock, seq));
+
if (err >= 0)
rc = neigh->ops->queue_xmit(skb);
else
@@ -1248,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
struct neighbour *neigh = dst->neighbour;
struct net_device *dev = neigh->dev;
+ unsigned int seq;
__skb_pull(skb, skb_network_offset(skb));
- read_lock_bh(&neigh->lock);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
- read_unlock_bh(&neigh->lock);
+ do {
+ seq = read_seqbegin(&neigh->ha_lock);
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ neigh->ha, NULL, skb->len);
+ } while (read_seqretry(&neigh->ha_lock, seq));
+
if (err >= 0)
err = neigh->ops->queue_xmit(skb);
else {
@@ -1436,17 +1541,14 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
panic("cannot create neighbour proc dir entry");
#endif
- tbl->hash_mask = 1;
- tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
+ tbl->nht = neigh_hash_alloc(8);
phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
- if (!tbl->hash_buckets || !tbl->phash_buckets)
+ if (!tbl->nht || !tbl->phash_buckets)
panic("cannot allocate neighbour cache hashes");
- get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
-
rwlock_init(&tbl->lock);
INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
@@ -1486,8 +1588,7 @@ int neigh_table_clear(struct neigh_table *tbl)
struct neigh_table **tp;
/* It is not clean... Fix it to unload IPv6 module safely */
- cancel_delayed_work(&tbl->gc_work);
- flush_scheduled_work();
+ cancel_delayed_work_sync(&tbl->gc_work);
del_timer_sync(&tbl->proxy_timer);
pneigh_queue_purge(&tbl->proxy_queue);
neigh_ifdown(tbl, NULL);
@@ -1502,8 +1603,8 @@ int neigh_table_clear(struct neigh_table *tbl)
}
write_unlock(&neigh_tbl_lock);
- neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
- tbl->hash_buckets = NULL;
+ call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+ tbl->nht = NULL;
kfree(tbl->phash_buckets);
tbl->phash_buckets = NULL;
@@ -1529,6 +1630,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct net_device *dev = NULL;
int err = -EINVAL;
+ ASSERT_RTNL();
if (nlmsg_len(nlh) < sizeof(*ndm))
goto out;
@@ -1538,7 +1640,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
- dev = dev_get_by_index(net, ndm->ndm_ifindex);
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
err = -ENODEV;
goto out;
@@ -1554,34 +1656,31 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
read_unlock(&neigh_tbl_lock);
if (nla_len(dst_attr) < tbl->key_len)
- goto out_dev_put;
+ goto out;
if (ndm->ndm_flags & NTF_PROXY) {
err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
- goto out_dev_put;
+ goto out;
}
if (dev == NULL)
- goto out_dev_put;
+ goto out;
neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
if (neigh == NULL) {
err = -ENOENT;
- goto out_dev_put;
+ goto out;
}
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE |
NEIGH_UPDATE_F_ADMIN);
neigh_release(neigh);
- goto out_dev_put;
+ goto out;
}
read_unlock(&neigh_tbl_lock);
err = -EAFNOSUPPORT;
-out_dev_put:
- if (dev)
- dev_put(dev);
out:
return err;
}
@@ -1595,6 +1694,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct net_device *dev = NULL;
int err;
+ ASSERT_RTNL();
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
if (err < 0)
goto out;
@@ -1605,14 +1705,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
- dev = dev_get_by_index(net, ndm->ndm_ifindex);
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
err = -ENODEV;
goto out;
}
if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
- goto out_dev_put;
+ goto out;
}
read_lock(&neigh_tbl_lock);
@@ -1626,7 +1726,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
read_unlock(&neigh_tbl_lock);
if (nla_len(tb[NDA_DST]) < tbl->key_len)
- goto out_dev_put;
+ goto out;
dst = nla_data(tb[NDA_DST]);
lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
@@ -1639,29 +1739,29 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
pn->flags = ndm->ndm_flags;
err = 0;
}
- goto out_dev_put;
+ goto out;
}
if (dev == NULL)
- goto out_dev_put;
+ goto out;
neigh = neigh_lookup(tbl, dst, dev);
if (neigh == NULL) {
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
err = -ENOENT;
- goto out_dev_put;
+ goto out;
}
neigh = __neigh_lookup_errno(tbl, dst, dev);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
- goto out_dev_put;
+ goto out;
}
} else {
if (nlh->nlmsg_flags & NLM_F_EXCL) {
err = -EEXIST;
neigh_release(neigh);
- goto out_dev_put;
+ goto out;
}
if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
@@ -1674,15 +1774,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
} else
err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
neigh_release(neigh);
- goto out_dev_put;
+ goto out;
}
read_unlock(&neigh_tbl_lock);
err = -EAFNOSUPPORT;
-
-out_dev_put:
- if (dev)
- dev_put(dev);
out:
return err;
}
@@ -1748,18 +1844,22 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
unsigned long now = jiffies;
unsigned int flush_delta = now - tbl->last_flush;
unsigned int rand_delta = now - tbl->last_rand;
-
+ struct neigh_hash_table *nht;
struct ndt_config ndc = {
.ndtc_key_len = tbl->key_len,
.ndtc_entry_size = tbl->entry_size,
.ndtc_entries = atomic_read(&tbl->entries),
.ndtc_last_flush = jiffies_to_msecs(flush_delta),
.ndtc_last_rand = jiffies_to_msecs(rand_delta),
- .ndtc_hash_rnd = tbl->hash_rnd,
- .ndtc_hash_mask = tbl->hash_mask,
.ndtc_proxy_qlen = tbl->proxy_queue.qlen,
};
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+ ndc.ndtc_hash_rnd = nht->hash_rnd;
+ ndc.ndtc_hash_mask = nht->hash_mask;
+ rcu_read_unlock_bh();
+
NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
}
@@ -2056,10 +2156,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
read_lock_bh(&neigh->lock);
ndm->ndm_state = neigh->nud_state;
- if ((neigh->nud_state & NUD_VALID) &&
- nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
- read_unlock_bh(&neigh->lock);
- goto nla_put_failure;
+ if (neigh->nud_state & NUD_VALID) {
+ char haddr[MAX_ADDR_LEN];
+
+ neigh_ha_snapshot(haddr, neigh, neigh->dev);
+ if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
+ read_unlock_bh(&neigh->lock);
+ goto nla_put_failure;
+ }
}
ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
@@ -2087,18 +2191,23 @@ static void neigh_update_notify(struct neighbour *neigh)
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct net * net = sock_net(skb->sk);
+ struct net *net = sock_net(skb->sk);
struct neighbour *n;
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
+ struct neigh_hash_table *nht;
- read_lock_bh(&tbl->lock);
- for (h = 0; h <= tbl->hash_mask; h++) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+
+ for (h = 0; h <= nht->hash_mask; h++) {
if (h < s_h)
continue;
if (h > s_h)
s_idx = 0;
- for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
+ for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
+ n != NULL;
+ n = rcu_dereference_bh(n->next)) {
if (!net_eq(dev_net(n->dev), net))
continue;
if (idx < s_idx)
@@ -2107,17 +2216,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
NLM_F_MULTI) <= 0) {
- read_unlock_bh(&tbl->lock);
rc = -1;
goto out;
}
- next:
+next:
idx++;
}
}
- read_unlock_bh(&tbl->lock);
rc = skb->len;
out:
+ rcu_read_unlock_bh();
cb->args[1] = h;
cb->args[2] = idx;
return rc;
@@ -2150,15 +2258,22 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
{
int chain;
+ struct neigh_hash_table *nht;
- read_lock_bh(&tbl->lock);
- for (chain = 0; chain <= tbl->hash_mask; chain++) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+
+ read_lock(&tbl->lock); /* avoid resizes */
+ for (chain = 0; chain <= nht->hash_mask; chain++) {
struct neighbour *n;
- for (n = tbl->hash_buckets[chain]; n; n = n->next)
+ for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
+ n != NULL;
+ n = rcu_dereference_bh(n->next))
cb(n, cookie);
}
- read_unlock_bh(&tbl->lock);
+ read_unlock(&tbl->lock);
+ rcu_read_unlock_bh();
}
EXPORT_SYMBOL(neigh_for_each);
@@ -2167,18 +2282,25 @@ void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *))
{
int chain;
+ struct neigh_hash_table *nht;
- for (chain = 0; chain <= tbl->hash_mask; chain++) {
- struct neighbour *n, **np;
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+ for (chain = 0; chain <= nht->hash_mask; chain++) {
+ struct neighbour *n;
+ struct neighbour __rcu **np;
- np = &tbl->hash_buckets[chain];
- while ((n = *np) != NULL) {
+ np = &nht->hash_buckets[chain];
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
int release;
write_lock(&n->lock);
release = cb(n);
if (release) {
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
n->dead = 1;
} else
np = &n->next;
@@ -2196,13 +2318,13 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
{
struct neigh_seq_state *state = seq->private;
struct net *net = seq_file_net(seq);
- struct neigh_table *tbl = state->tbl;
+ struct neigh_hash_table *nht = state->nht;
struct neighbour *n = NULL;
int bucket = state->bucket;
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
- for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
- n = tbl->hash_buckets[bucket];
+ for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
+ n = rcu_dereference_bh(nht->hash_buckets[bucket]);
while (n) {
if (!net_eq(dev_net(n->dev), net))
@@ -2219,8 +2341,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
break;
if (n->nud_state & ~NUD_NOARP)
break;
- next:
- n = n->next;
+next:
+ n = rcu_dereference_bh(n->next);
}
if (n)
@@ -2237,14 +2359,14 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
{
struct neigh_seq_state *state = seq->private;
struct net *net = seq_file_net(seq);
- struct neigh_table *tbl = state->tbl;
+ struct neigh_hash_table *nht = state->nht;
if (state->neigh_sub_iter) {
void *v = state->neigh_sub_iter(state, n, pos);
if (v)
return n;
}
- n = n->next;
+ n = rcu_dereference_bh(n->next);
while (1) {
while (n) {
@@ -2261,17 +2383,17 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
if (n->nud_state & ~NUD_NOARP)
break;
- next:
- n = n->next;
+next:
+ n = rcu_dereference_bh(n->next);
}
if (n)
break;
- if (++state->bucket > tbl->hash_mask)
+ if (++state->bucket > nht->hash_mask)
break;
- n = tbl->hash_buckets[state->bucket];
+ n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
}
if (n && pos)
@@ -2369,7 +2491,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
}
void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
- __acquires(tbl->lock)
+ __acquires(rcu_bh)
{
struct neigh_seq_state *state = seq->private;
@@ -2377,7 +2499,8 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
state->bucket = 0;
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
- read_lock_bh(&tbl->lock);
+ rcu_read_lock_bh();
+ state->nht = rcu_dereference_bh(tbl->nht);
return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
}
@@ -2411,12 +2534,9 @@ out:
EXPORT_SYMBOL(neigh_seq_next);
void neigh_seq_stop(struct seq_file *seq, void *v)
- __releases(tbl->lock)
+ __releases(rcu_bh)
{
- struct neigh_seq_state *state = seq->private;
- struct neigh_table *tbl = state->tbl;
-
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock_bh();
}
EXPORT_SYMBOL(neigh_seq_stop);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 99e7052d7323..a5ff5a89f376 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -29,6 +29,7 @@ static const char fmt_hex[] = "%#x\n";
static const char fmt_long_hex[] = "%#lx\n";
static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
+static const char fmt_u64[] = "%llu\n";
static inline int dev_isalive(const struct net_device *dev)
{
@@ -94,6 +95,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
}
NETDEVICE_SHOW(dev_id, fmt_hex);
+NETDEVICE_SHOW(addr_assign_type, fmt_dec);
NETDEVICE_SHOW(addr_len, fmt_dec);
NETDEVICE_SHOW(iflink, fmt_dec);
NETDEVICE_SHOW(ifindex, fmt_dec);
@@ -294,6 +296,7 @@ static ssize_t show_ifalias(struct device *dev,
}
static struct device_attribute net_class_attributes[] = {
+ __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
__ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
__ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
@@ -324,14 +327,15 @@ static ssize_t netstat_show(const struct device *d,
struct net_device *dev = to_net_dev(d);
ssize_t ret = -EINVAL;
- WARN_ON(offset > sizeof(struct net_device_stats) ||
- offset % sizeof(unsigned long) != 0);
+ WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
+ offset % sizeof(u64) != 0);
read_lock(&dev_base_lock);
if (dev_isalive(dev)) {
- const struct net_device_stats *stats = dev_get_stats(dev);
- ret = sprintf(buf, fmt_ulong,
- *(unsigned long *)(((u8 *) stats) + offset));
+ struct rtnl_link_stats64 temp;
+ const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+
+ ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
}
read_unlock(&dev_base_lock);
return ret;
@@ -343,7 +347,7 @@ static ssize_t show_##name(struct device *d, \
struct device_attribute *attr, char *buf) \
{ \
return netstat_show(d, attr, buf, \
- offsetof(struct net_device_stats, name)); \
+ offsetof(struct rtnl_link_stats64, name)); \
} \
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
@@ -511,7 +515,7 @@ static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
return attribute->store(queue, attribute, buf, count);
}
-static struct sysfs_ops rx_queue_sysfs_ops = {
+static const struct sysfs_ops rx_queue_sysfs_ops = {
.show = rx_queue_attr_show,
.store = rx_queue_attr_store,
};
@@ -594,7 +598,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
}
spin_lock(&rps_map_lock);
- old_map = queue->rps_map;
+ old_map = rcu_dereference_protected(queue->rps_map,
+ lockdep_is_held(&rps_map_lock));
rcu_assign_pointer(queue->rps_map, map);
spin_unlock(&rps_map_lock);
@@ -673,7 +678,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
table = NULL;
spin_lock(&rps_dev_flow_lock);
- old_table = queue->rps_flow_table;
+ old_table = rcu_dereference_protected(queue->rps_flow_table,
+ lockdep_is_held(&rps_dev_flow_lock));
rcu_assign_pointer(queue->rps_flow_table, table);
spin_unlock(&rps_dev_flow_lock);
@@ -701,13 +707,17 @@ static void rx_queue_release(struct kobject *kobj)
{
struct netdev_rx_queue *queue = to_rx_queue(kobj);
struct netdev_rx_queue *first = queue->first;
+ struct rps_map *map;
+ struct rps_dev_flow_table *flow_table;
+
- if (queue->rps_map)
- call_rcu(&queue->rps_map->rcu, rps_map_release);
+ map = rcu_dereference_raw(queue->rps_map);
+ if (map)
+ call_rcu(&map->rcu, rps_map_release);
- if (queue->rps_flow_table)
- call_rcu(&queue->rps_flow_table->rcu,
- rps_dev_flow_table_release);
+ flow_table = rcu_dereference_raw(queue->rps_flow_table);
+ if (flow_table)
+ call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
if (atomic_dec_and_test(&first->count))
kfree(first);
@@ -722,6 +732,7 @@ static struct kobj_type rx_queue_ktype = {
static int rx_queue_add_kobject(struct net_device *net, int index)
{
struct netdev_rx_queue *queue = net->_rx + index;
+ struct netdev_rx_queue *first = queue->first;
struct kobject *kobj = &queue->kobj;
int error = 0;
@@ -734,38 +745,43 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
}
kobject_uevent(kobj, KOBJ_ADD);
+ atomic_inc(&first->count);
return error;
}
-static int rx_queue_register_kobjects(struct net_device *net)
+int
+net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
{
int i;
int error = 0;
- net->queues_kset = kset_create_and_add("queues",
- NULL, &net->dev.kobj);
- if (!net->queues_kset)
- return -ENOMEM;
- for (i = 0; i < net->num_rx_queues; i++) {
+ for (i = old_num; i < new_num; i++) {
error = rx_queue_add_kobject(net, i);
- if (error)
+ if (error) {
+ new_num = old_num;
break;
+ }
}
- if (error)
- while (--i >= 0)
- kobject_put(&net->_rx[i].kobj);
+ while (--i >= new_num)
+ kobject_put(&net->_rx[i].kobj);
return error;
}
-static void rx_queue_remove_kobjects(struct net_device *net)
+static int rx_queue_register_kobjects(struct net_device *net)
{
- int i;
+ net->queues_kset = kset_create_and_add("queues",
+ NULL, &net->dev.kobj);
+ if (!net->queues_kset)
+ return -ENOMEM;
+ return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+}
- for (i = 0; i < net->num_rx_queues; i++)
- kobject_put(&net->_rx[i].kobj);
+static void rx_queue_remove_kobjects(struct net_device *net)
+{
+ net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
kset_unregister(net->queues_kset);
}
#endif /* CONFIG_RPS */
@@ -785,12 +801,13 @@ static const void *net_netlink_ns(struct sock *sk)
return sock_net(sk);
}
-static struct kobj_ns_type_operations net_ns_type_operations = {
+struct kobj_ns_type_operations net_ns_type_operations = {
.type = KOBJ_NS_TYPE_NET,
.current_ns = net_current_ns,
.netlink_ns = net_netlink_ns,
.initial_ns = net_initial_ns,
};
+EXPORT_SYMBOL_GPL(net_ns_type_operations);
static void net_kobj_ns_exit(struct net *net)
{
@@ -922,13 +939,12 @@ int netdev_class_create_file(struct class_attribute *class_attr)
{
return class_create_file(&net_class, class_attr);
}
+EXPORT_SYMBOL(netdev_class_create_file);
void netdev_class_remove_file(struct class_attribute *class_attr)
{
class_remove_file(&net_class, class_attr);
}
-
-EXPORT_SYMBOL(netdev_class_create_file);
EXPORT_SYMBOL(netdev_class_remove_file);
int netdev_kobject_init(void)
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 805555e8b187..778e1571548d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,4 +4,8 @@
int netdev_kobject_init(void);
int netdev_register_kobject(struct net_device *);
void netdev_unregister_kobject(struct net_device *);
+#ifdef CONFIG_RPS
+int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
+#endif
+
#endif
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index afa6380ed88a..7f1bb2aba03b 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -26,6 +26,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/skb.h>
+#include <trace/events/net.h>
#include <trace/events/napi.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index c988e685433a..3f860261c5ee 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -42,7 +42,9 @@ static int net_assign_generic(struct net *net, int id, void *data)
BUG_ON(!mutex_is_locked(&net_mutex));
BUG_ON(id == 0);
- ng = old_ng = net->gen;
+ old_ng = rcu_dereference_protected(net->gen,
+ lockdep_is_held(&net_mutex));
+ ng = old_ng;
if (old_ng->len >= id)
goto assign;
diff --git a/net/core/netevent.c b/net/core/netevent.c
index 95f81de87502..865f0ceb81fb 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -35,6 +35,7 @@ int register_netevent_notifier(struct notifier_block *nb)
err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
return err;
}
+EXPORT_SYMBOL_GPL(register_netevent_notifier);
/**
* netevent_unregister_notifier - unregister a netevent notifier block
@@ -50,6 +51,7 @@ int unregister_netevent_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
}
+EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
/**
* call_netevent_notifiers - call all netevent notifier blocks
@@ -64,7 +66,4 @@ int call_netevent_notifiers(unsigned long val, void *v)
{
return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
}
-
-EXPORT_SYMBOL_GPL(register_netevent_notifier);
-EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
EXPORT_SYMBOL_GPL(call_netevent_notifiers);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 94825b109551..4e98ffac3af0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -199,11 +199,13 @@ void netpoll_poll_dev(struct net_device *dev)
zap_completion_queue();
}
+EXPORT_SYMBOL(netpoll_poll_dev);
void netpoll_poll(struct netpoll *np)
{
netpoll_poll_dev(np->dev);
}
+EXPORT_SYMBOL(netpoll_poll);
static void refill_skbs(void)
{
@@ -286,12 +288,13 @@ static int netpoll_owner_active(struct net_device *dev)
return 0;
}
-void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
+ struct net_device *dev)
{
int status = NETDEV_TX_BUSY;
unsigned long tries;
- struct net_device *dev = np->dev;
const struct net_device_ops *ops = dev->netdev_ops;
+ /* It is up to the caller to keep npinfo alive. */
struct netpoll_info *npinfo = np->dev->npinfo;
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
@@ -343,6 +346,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
schedule_delayed_work(&npinfo->tx_work,0);
}
}
+EXPORT_SYMBOL(netpoll_send_skb_on_dev);
void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
@@ -404,6 +408,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
netpoll_send_skb(np, skb);
}
+EXPORT_SYMBOL(netpoll_send_udp);
static void arp_reply(struct sk_buff *skb)
{
@@ -630,6 +635,7 @@ void netpoll_print_options(struct netpoll *np)
printk(KERN_INFO "%s: remote ethernet address %pM\n",
np->name, np->remote_mac);
}
+EXPORT_SYMBOL(netpoll_print_options);
int netpoll_parse_options(struct netpoll *np, char *opt)
{
@@ -722,30 +728,29 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
np->name, cur);
return -1;
}
+EXPORT_SYMBOL(netpoll_parse_options);
-int netpoll_setup(struct netpoll *np)
+int __netpoll_setup(struct netpoll *np)
{
- struct net_device *ndev = NULL;
- struct in_device *in_dev;
+ struct net_device *ndev = np->dev;
struct netpoll_info *npinfo;
- struct netpoll *npe, *tmp;
+ const struct net_device_ops *ops;
unsigned long flags;
int err;
- if (np->dev_name)
- ndev = dev_get_by_name(&init_net, np->dev_name);
- if (!ndev) {
- printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
+ if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+ !ndev->netdev_ops->ndo_poll_controller) {
+ printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
np->name, np->dev_name);
- return -ENODEV;
+ err = -ENOTSUPP;
+ goto out;
}
- np->dev = ndev;
if (!ndev->npinfo) {
npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
if (!npinfo) {
err = -ENOMEM;
- goto put;
+ goto out;
}
npinfo->rx_flags = 0;
@@ -757,6 +762,13 @@ int netpoll_setup(struct netpoll *np)
INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
atomic_set(&npinfo->refcnt, 1);
+
+ ops = np->dev->netdev_ops;
+ if (ops->ndo_netpoll_setup) {
+ err = ops->ndo_netpoll_setup(ndev, npinfo);
+ if (err)
+ goto free_npinfo;
+ }
} else {
npinfo = ndev->npinfo;
atomic_inc(&npinfo->refcnt);
@@ -764,12 +776,37 @@ int netpoll_setup(struct netpoll *np)
npinfo->netpoll = np;
- if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
- !ndev->netdev_ops->ndo_poll_controller) {
- printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
+ if (np->rx_hook) {
+ spin_lock_irqsave(&npinfo->rx_lock, flags);
+ npinfo->rx_flags |= NETPOLL_RX_ENABLED;
+ list_add_tail(&np->rx, &npinfo->rx_np);
+ spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+ }
+
+ /* last thing to do is link it to the net device structure */
+ rcu_assign_pointer(ndev->npinfo, npinfo);
+
+ return 0;
+
+free_npinfo:
+ kfree(npinfo);
+out:
+ return err;
+}
+EXPORT_SYMBOL_GPL(__netpoll_setup);
+
+int netpoll_setup(struct netpoll *np)
+{
+ struct net_device *ndev = NULL;
+ struct in_device *in_dev;
+ int err;
+
+ if (np->dev_name)
+ ndev = dev_get_by_name(&init_net, np->dev_name);
+ if (!ndev) {
+ printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
np->name, np->dev_name);
- err = -ENOTSUPP;
- goto release;
+ return -ENODEV;
}
if (!netif_running(ndev)) {
@@ -785,7 +822,7 @@ int netpoll_setup(struct netpoll *np)
if (err) {
printk(KERN_ERR "%s: failed to open %s\n",
np->name, ndev->name);
- goto release;
+ goto put;
}
atleast = jiffies + HZ/10;
@@ -822,7 +859,7 @@ int netpoll_setup(struct netpoll *np)
printk(KERN_ERR "%s: no IP address for %s, aborting\n",
np->name, np->dev_name);
err = -EDESTADDRREQ;
- goto release;
+ goto put;
}
np->local_ip = in_dev->ifa_list->ifa_local;
@@ -830,38 +867,25 @@ int netpoll_setup(struct netpoll *np)
printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip);
}
- if (np->rx_hook) {
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- npinfo->rx_flags |= NETPOLL_RX_ENABLED;
- list_add_tail(&np->rx, &npinfo->rx_np);
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- }
+ np->dev = ndev;
/* fill up the skb queue */
refill_skbs();
- /* last thing to do is link it to the net device structure */
- ndev->npinfo = npinfo;
+ rtnl_lock();
+ err = __netpoll_setup(np);
+ rtnl_unlock();
- /* avoid racing with NAPI reading npinfo */
- synchronize_rcu();
+ if (err)
+ goto put;
return 0;
- release:
- if (!ndev->npinfo) {
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) {
- npe->dev = NULL;
- }
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-
- kfree(npinfo);
- }
put:
dev_put(ndev);
return err;
}
+EXPORT_SYMBOL(netpoll_setup);
static int __init netpoll_init(void)
{
@@ -870,49 +894,65 @@ static int __init netpoll_init(void)
}
core_initcall(netpoll_init);
-void netpoll_cleanup(struct netpoll *np)
+void __netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
unsigned long flags;
- if (np->dev) {
- npinfo = np->dev->npinfo;
- if (npinfo) {
- if (!list_empty(&npinfo->rx_np)) {
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_del(&np->rx);
- if (list_empty(&npinfo->rx_np))
- npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- }
+ npinfo = np->dev->npinfo;
+ if (!npinfo)
+ return;
- if (atomic_dec_and_test(&npinfo->refcnt)) {
- const struct net_device_ops *ops;
- skb_queue_purge(&npinfo->arp_tx);
- skb_queue_purge(&npinfo->txq);
- cancel_rearming_delayed_work(&npinfo->tx_work);
-
- /* clean after last, unfinished work */
- __skb_queue_purge(&npinfo->txq);
- kfree(npinfo);
- ops = np->dev->netdev_ops;
- if (ops->ndo_netpoll_cleanup)
- ops->ndo_netpoll_cleanup(np->dev);
- else
- np->dev->npinfo = NULL;
- }
- }
+ if (!list_empty(&npinfo->rx_np)) {
+ spin_lock_irqsave(&npinfo->rx_lock, flags);
+ list_del(&np->rx);
+ if (list_empty(&npinfo->rx_np))
+ npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+ spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+ }
+
+ if (atomic_dec_and_test(&npinfo->refcnt)) {
+ const struct net_device_ops *ops;
+
+ ops = np->dev->netdev_ops;
+ if (ops->ndo_netpoll_cleanup)
+ ops->ndo_netpoll_cleanup(np->dev);
+
+ rcu_assign_pointer(np->dev->npinfo, NULL);
+
+ /* avoid racing with NAPI reading npinfo */
+ synchronize_rcu_bh();
+
+ skb_queue_purge(&npinfo->arp_tx);
+ skb_queue_purge(&npinfo->txq);
+ cancel_rearming_delayed_work(&npinfo->tx_work);
- dev_put(np->dev);
+ /* clean after last, unfinished work */
+ __skb_queue_purge(&npinfo->txq);
+ kfree(npinfo);
}
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
+
+void netpoll_cleanup(struct netpoll *np)
+{
+ if (!np->dev)
+ return;
+ rtnl_lock();
+ __netpoll_cleanup(np);
+ rtnl_unlock();
+
+ dev_put(np->dev);
np->dev = NULL;
}
+EXPORT_SYMBOL(netpoll_cleanup);
int netpoll_trap(void)
{
return atomic_read(&trapped);
}
+EXPORT_SYMBOL(netpoll_trap);
void netpoll_set_trap(int trap)
{
@@ -921,14 +961,4 @@ void netpoll_set_trap(int trap)
else
atomic_dec(&trapped);
}
-
-EXPORT_SYMBOL(netpoll_send_skb);
EXPORT_SYMBOL(netpoll_set_trap);
-EXPORT_SYMBOL(netpoll_trap);
-EXPORT_SYMBOL(netpoll_print_options);
-EXPORT_SYMBOL(netpoll_parse_options);
-EXPORT_SYMBOL(netpoll_setup);
-EXPORT_SYMBOL(netpoll_cleanup);
-EXPORT_SYMBOL(netpoll_send_udp);
-EXPORT_SYMBOL(netpoll_poll_dev);
-EXPORT_SYMBOL(netpoll_poll);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1dacd7ba8dbb..fbce4b05a53e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -115,6 +115,9 @@
* command by Adit Ranadive <adit.262@gmail.com>
*
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/sys.h>
#include <linux/types.h>
#include <linux/module.h>
@@ -169,11 +172,13 @@
#include <asm/dma.h>
#include <asm/div64.h> /* do_div */
-#define VERSION "2.73"
+#define VERSION "2.74"
#define IP_NAME_SZ 32
#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
#define MPLS_STACK_BOTTOM htonl(0x00000100)
+#define func_enter() pr_debug("entering %s\n", __func__);
+
/* Device flag bits */
#define F_IPSRC_RND (1<<0) /* IP-Src Random */
#define F_IPDST_RND (1<<1) /* IP-Dst Random */
@@ -424,7 +429,8 @@ static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
}
static const char version[] =
- "pktgen " VERSION ": Packet Generator for packet performance testing.\n";
+ "Packet Generator for packet performance testing. "
+ "Version: " VERSION "\n";
static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i);
static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
@@ -495,7 +501,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
pktgen_reset_all_threads();
else
- printk(KERN_WARNING "pktgen: Unknown command: %s\n", data);
+ pr_warning("Unknown command: %s\n", data);
err = count;
@@ -723,16 +729,14 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen,
*num = 0;
for (; i < maxlen; i++) {
+ int value;
char c;
*num <<= 4;
if (get_user(c, &user_buffer[i]))
return -EFAULT;
- if ((c >= '0') && (c <= '9'))
- *num |= c - '0';
- else if ((c >= 'a') && (c <= 'f'))
- *num |= c - 'a' + 10;
- else if ((c >= 'A') && (c <= 'F'))
- *num |= c - 'A' + 10;
+ value = hex_to_bin(c);
+ if (value >= 0)
+ *num |= value;
else
break;
}
@@ -767,10 +771,10 @@ done:
static unsigned long num_arg(const char __user * user_buffer,
unsigned long maxlen, unsigned long *num)
{
- int i = 0;
+ int i;
*num = 0;
- for (; i < maxlen; i++) {
+ for (i = 0; i < maxlen; i++) {
char c;
if (get_user(c, &user_buffer[i]))
return -EFAULT;
@@ -785,9 +789,9 @@ static unsigned long num_arg(const char __user * user_buffer,
static int strn_len(const char __user * user_buffer, unsigned int maxlen)
{
- int i = 0;
+ int i;
- for (; i < maxlen; i++) {
+ for (i = 0; i < maxlen; i++) {
char c;
if (get_user(c, &user_buffer[i]))
return -EFAULT;
@@ -840,9 +844,9 @@ static ssize_t pktgen_if_write(struct file *file,
const char __user * user_buffer, size_t count,
loff_t * offset)
{
- struct seq_file *seq = (struct seq_file *)file->private_data;
+ struct seq_file *seq = file->private_data;
struct pktgen_dev *pkt_dev = seq->private;
- int i = 0, max, len;
+ int i, max, len;
char name[16], valstr[32];
unsigned long value = 0;
char *pg_result = NULL;
@@ -852,17 +856,17 @@ static ssize_t pktgen_if_write(struct file *file,
pg_result = &(pkt_dev->result[0]);
if (count < 1) {
- printk(KERN_WARNING "pktgen: wrong command format\n");
+ pr_warning("wrong command format\n");
return -EINVAL;
}
- max = count - i;
- tmp = count_trail_chars(&user_buffer[i], max);
+ max = count;
+ tmp = count_trail_chars(user_buffer, max);
if (tmp < 0) {
- printk(KERN_WARNING "pktgen: illegal format\n");
+ pr_warning("illegal format\n");
return tmp;
}
- i += tmp;
+ i = tmp;
/* Read variable name */
@@ -883,10 +887,11 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
if (debug) {
- char tb[count + 1];
- if (copy_from_user(tb, user_buffer, count))
+ size_t copy = min(count, 1023);
+ char tb[copy + 1];
+ if (copy_from_user(tb, user_buffer, copy))
return -EFAULT;
- tb[count] = 0;
+ tb[copy] = 0;
printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name,
(unsigned long)count, tb);
}
@@ -980,6 +985,36 @@ static ssize_t pktgen_if_write(struct file *file,
(unsigned long long) pkt_dev->delay);
return count;
}
+ if (!strcmp(name, "rate")) {
+ len = num_arg(&user_buffer[i], 10, &value);
+ if (len < 0)
+ return len;
+
+ i += len;
+ if (!value)
+ return len;
+ pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value;
+ if (debug)
+ pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
+
+ sprintf(pg_result, "OK: rate=%lu", value);
+ return count;
+ }
+ if (!strcmp(name, "ratep")) {
+ len = num_arg(&user_buffer[i], 10, &value);
+ if (len < 0)
+ return len;
+
+ i += len;
+ if (!value)
+ return len;
+ pkt_dev->delay = NSEC_PER_SEC/value;
+ if (debug)
+ pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
+
+ sprintf(pg_result, "OK: rate=%lu", value);
+ return count;
+ }
if (!strcmp(name, "udp_src_min")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
@@ -1398,18 +1433,12 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) {
- if (*v >= '0' && *v <= '9') {
- *m *= 16;
- *m += *v - '0';
- }
- if (*v >= 'A' && *v <= 'F') {
- *m *= 16;
- *m += *v - 'A' + 10;
- }
- if (*v >= 'a' && *v <= 'f') {
- *m *= 16;
- *m += *v - 'a' + 10;
- }
+ int value;
+
+ value = hex_to_bin(*v);
+ if (value >= 0)
+ *m = *m * 16 + value;
+
if (*v == ':') {
m++;
*m = 0;
@@ -1440,18 +1469,12 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) {
- if (*v >= '0' && *v <= '9') {
- *m *= 16;
- *m += *v - '0';
- }
- if (*v >= 'A' && *v <= 'F') {
- *m *= 16;
- *m += *v - 'A' + 10;
- }
- if (*v >= 'a' && *v <= 'f') {
- *m *= 16;
- *m += *v - 'a' + 10;
- }
+ int value;
+
+ value = hex_to_bin(*v);
+ if (value >= 0)
+ *m = *m * 16 + value;
+
if (*v == ':') {
m++;
*m = 0;
@@ -1740,9 +1763,9 @@ static ssize_t pktgen_thread_write(struct file *file,
const char __user * user_buffer,
size_t count, loff_t * offset)
{
- struct seq_file *seq = (struct seq_file *)file->private_data;
+ struct seq_file *seq = file->private_data;
struct pktgen_thread *t = seq->private;
- int i = 0, max, len, ret;
+ int i, max, len, ret;
char name[40];
char *pg_result;
@@ -1751,12 +1774,12 @@ static ssize_t pktgen_thread_write(struct file *file,
return -EINVAL;
}
- max = count - i;
- len = count_trail_chars(&user_buffer[i], max);
+ max = count;
+ len = count_trail_chars(user_buffer, max);
if (len < 0)
return len;
- i += len;
+ i = len;
/* Read variable name */
@@ -1781,7 +1804,7 @@ static ssize_t pktgen_thread_write(struct file *file,
name, (unsigned long)count);
if (!t) {
- printk(KERN_ERR "pktgen: ERROR: No thread\n");
+ pr_err("ERROR: No thread\n");
ret = -EINVAL;
goto out;
}
@@ -1874,7 +1897,7 @@ static void pktgen_mark_device(const char *ifname)
int i = 0;
mutex_lock(&pktgen_thread_lock);
- pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname);
+ pr_debug("%s: marking %s for removal\n", __func__, ifname);
while (1) {
@@ -1883,15 +1906,14 @@ static void pktgen_mark_device(const char *ifname)
break; /* success */
mutex_unlock(&pktgen_thread_lock);
- pr_debug("pktgen: pktgen_mark_device waiting for %s "
- "to disappear....\n", ifname);
+ pr_debug("%s: waiting for %s to disappear....\n",
+ __func__, ifname);
schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
mutex_lock(&pktgen_thread_lock);
if (++i >= max_tries) {
- printk(KERN_ERR "pktgen_mark_device: timed out after "
- "waiting %d msec for device %s to be removed\n",
- msec_per_try * i, ifname);
+ pr_err("%s: timed out after waiting %d msec for device %s to be removed\n",
+ __func__, msec_per_try * i, ifname);
break;
}
@@ -1918,8 +1940,8 @@ static void pktgen_change_name(struct net_device *dev)
&pktgen_if_fops,
pkt_dev);
if (!pkt_dev->entry)
- printk(KERN_ERR "pktgen: can't move proc "
- " entry for '%s'\n", dev->name);
+ pr_err("can't move proc entry for '%s'\n",
+ dev->name);
break;
}
}
@@ -1954,7 +1976,7 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
const char *ifname)
{
char b[IFNAMSIZ+5];
- int i = 0;
+ int i;
for (i = 0; ifname[i] != '@'; i++) {
if (i == IFNAMSIZ)
@@ -1983,15 +2005,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
odev = pktgen_dev_get_by_name(pkt_dev, ifname);
if (!odev) {
- printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname);
+ pr_err("no such netdevice: \"%s\"\n", ifname);
return -ENODEV;
}
if (odev->type != ARPHRD_ETHER) {
- printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname);
+ pr_err("not an ethernet device: \"%s\"\n", ifname);
err = -EINVAL;
} else if (!netif_running(odev)) {
- printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname);
+ pr_err("device is down: \"%s\"\n", ifname);
err = -ENETDOWN;
} else {
pkt_dev->odev = odev;
@@ -2010,8 +2032,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
int ntxq;
if (!pkt_dev->odev) {
- printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in "
- "setup_inject.\n");
+ pr_err("ERROR: pkt_dev->odev == NULL in setup_inject\n");
sprintf(pkt_dev->result,
"ERROR: pkt_dev->odev == NULL in setup_inject.\n");
return;
@@ -2021,19 +2042,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
ntxq = pkt_dev->odev->real_num_tx_queues;
if (ntxq <= pkt_dev->queue_map_min) {
- printk(KERN_WARNING "pktgen: WARNING: Requested "
- "queue_map_min (zero-based) (%d) exceeds valid range "
- "[0 - %d] for (%d) queues on %s, resetting\n",
- pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
- pkt_dev->odevname);
+ pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+ pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
+ pkt_dev->odevname);
pkt_dev->queue_map_min = ntxq - 1;
}
if (pkt_dev->queue_map_max >= ntxq) {
- printk(KERN_WARNING "pktgen: WARNING: Requested "
- "queue_map_max (zero-based) (%d) exceeds valid range "
- "[0 - %d] for (%d) queues on %s, resetting\n",
- pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
- pkt_dev->odevname);
+ pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+ pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
+ pkt_dev->odevname);
pkt_dev->queue_map_max = ntxq - 1;
}
@@ -2093,8 +2110,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
}
rcu_read_unlock();
if (err)
- printk(KERN_ERR "pktgen: ERROR: IPv6 link "
- "address not availble.\n");
+ pr_err("ERROR: IPv6 link address not available\n");
}
#endif
} else {
@@ -2142,15 +2158,15 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
hrtimer_set_expires(&t.timer, spin_until);
- remaining = ktime_to_us(hrtimer_expires_remaining(&t.timer));
+ remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
if (remaining <= 0) {
pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
return;
}
start_time = ktime_now();
- if (remaining < 100)
- udelay(remaining); /* really small just spin */
+ if (remaining < 100000)
+ ndelay(remaining); /* really small just spin */
else {
/* see do_nanosleep */
hrtimer_init_sleeper(&t, current);
@@ -2504,8 +2520,8 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
{
if (pkt_dev->cflows) {
/* let go of the SAs if we have them */
- int i = 0;
- for (; i < pkt_dev->cflows; i++) {
+ int i;
+ for (i = 0; i < pkt_dev->cflows; i++) {
struct xfrm_state *x = pkt_dev->flows[i].x;
if (x) {
xfrm_state_put(x);
@@ -2528,8 +2544,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
if (nhead > 0) {
ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
if (ret < 0) {
- printk(KERN_ERR "Error expanding "
- "ipsec packet %d\n", ret);
+ pr_err("Error expanding ipsec packet %d\n",
+ ret);
goto err;
}
}
@@ -2538,8 +2554,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
skb_pull(skb, ETH_HLEN);
ret = pktgen_output_ipsec(skb, pkt_dev);
if (ret) {
- printk(KERN_ERR "Error creating ipsec "
- "packet %d\n", ret);
+ pr_err("Error creating ipsec packet %d\n", ret);
goto err;
}
/* restore ll */
@@ -3015,8 +3030,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
if (datalen < sizeof(struct pktgen_hdr)) {
datalen = sizeof(struct pktgen_hdr);
if (net_ratelimit())
- printk(KERN_INFO "pktgen: increased datalen to %d\n",
- datalen);
+ pr_info("increased datalen to %d\n", datalen);
}
udph->source = htons(pkt_dev->cur_udp_src);
@@ -3143,7 +3157,7 @@ static void pktgen_run(struct pktgen_thread *t)
struct pktgen_dev *pkt_dev;
int started = 0;
- pr_debug("pktgen: entering pktgen_run. %p\n", t);
+ func_enter();
if_lock(t);
list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -3176,7 +3190,7 @@ static void pktgen_stop_all_threads_ifs(void)
{
struct pktgen_thread *t;
- pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n");
+ func_enter();
mutex_lock(&pktgen_thread_lock);
@@ -3241,7 +3255,7 @@ static void pktgen_run_all_threads(void)
{
struct pktgen_thread *t;
- pr_debug("pktgen: entering pktgen_run_all_threads.\n");
+ func_enter();
mutex_lock(&pktgen_thread_lock);
@@ -3260,7 +3274,7 @@ static void pktgen_reset_all_threads(void)
{
struct pktgen_thread *t;
- pr_debug("pktgen: entering pktgen_reset_all_threads.\n");
+ func_enter();
mutex_lock(&pktgen_thread_lock);
@@ -3310,8 +3324,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1;
if (!pkt_dev->running) {
- printk(KERN_WARNING "pktgen: interface: %s is already "
- "stopped\n", pkt_dev->odevname);
+ pr_warning("interface: %s is already stopped\n",
+ pkt_dev->odevname);
return -EINVAL;
}
@@ -3347,7 +3361,7 @@ static void pktgen_stop(struct pktgen_thread *t)
{
struct pktgen_dev *pkt_dev;
- pr_debug("pktgen: entering pktgen_stop\n");
+ func_enter();
if_lock(t);
@@ -3367,7 +3381,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
struct list_head *q, *n;
struct pktgen_dev *cur;
- pr_debug("pktgen: entering pktgen_rem_one_if\n");
+ func_enter();
if_lock(t);
@@ -3393,9 +3407,10 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
struct list_head *q, *n;
struct pktgen_dev *cur;
+ func_enter();
+
/* Remove all devices, free mem */
- pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
if_lock(t);
list_for_each_safe(q, n, &t->if_list) {
@@ -3477,8 +3492,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->skb = fill_packet(odev, pkt_dev);
if (pkt_dev->skb == NULL) {
- printk(KERN_ERR "pktgen: ERROR: couldn't "
- "allocate skb in fill_packet.\n");
+ pr_err("ERROR: couldn't allocate skb in fill_packet\n");
schedule();
pkt_dev->clone_count--; /* back out increment, OOM */
return;
@@ -3558,8 +3572,7 @@ static int pktgen_thread_worker(void *arg)
init_waitqueue_head(&t->queue);
complete(&t->start_done);
- pr_debug("pktgen: starting pktgen/%d: pid=%d\n",
- cpu, task_pid_nr(current));
+ pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
set_current_state(TASK_INTERRUPTIBLE);
@@ -3612,13 +3625,13 @@ static int pktgen_thread_worker(void *arg)
set_current_state(TASK_INTERRUPTIBLE);
}
- pr_debug("pktgen: %s stopping all device\n", t->tsk->comm);
+ pr_debug("%s stopping all device\n", t->tsk->comm);
pktgen_stop(t);
- pr_debug("pktgen: %s removing all device\n", t->tsk->comm);
+ pr_debug("%s removing all device\n", t->tsk->comm);
pktgen_rem_all_ifs(t);
- pr_debug("pktgen: %s removing thread.\n", t->tsk->comm);
+ pr_debug("%s removing thread\n", t->tsk->comm);
pktgen_rem_thread(t);
return 0;
@@ -3642,7 +3655,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
}
if_unlock(t);
- pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev);
+ pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
return pkt_dev;
}
@@ -3658,8 +3671,7 @@ static int add_dev_to_thread(struct pktgen_thread *t,
if_lock(t);
if (pkt_dev->pg_thread) {
- printk(KERN_ERR "pktgen: ERROR: already assigned "
- "to a thread.\n");
+ pr_err("ERROR: already assigned to a thread\n");
rv = -EBUSY;
goto out;
}
@@ -3685,7 +3697,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev = __pktgen_NN_threads(ifname, FIND);
if (pkt_dev) {
- printk(KERN_ERR "pktgen: ERROR: interface already used.\n");
+ pr_err("ERROR: interface already used\n");
return -EBUSY;
}
@@ -3730,7 +3742,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
&pktgen_if_fops, pkt_dev);
if (!pkt_dev->entry) {
- printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
+ pr_err("cannot create %s/%s procfs entry\n",
PG_PROC_DIR, ifname);
err = -EINVAL;
goto out2;
@@ -3761,8 +3773,7 @@ static int __init pktgen_create_thread(int cpu)
t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL,
cpu_to_node(cpu));
if (!t) {
- printk(KERN_ERR "pktgen: ERROR: out of memory, can't "
- "create new thread.\n");
+ pr_err("ERROR: out of memory, can't create new thread\n");
return -ENOMEM;
}
@@ -3776,8 +3787,7 @@ static int __init pktgen_create_thread(int cpu)
p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
if (IS_ERR(p)) {
- printk(KERN_ERR "pktgen: kernel_thread() failed "
- "for cpu %d\n", t->cpu);
+ pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
list_del(&t->th_list);
kfree(t);
return PTR_ERR(p);
@@ -3788,7 +3798,7 @@ static int __init pktgen_create_thread(int cpu)
pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir,
&pktgen_thread_fops, t);
if (!pe) {
- printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
+ pr_err("cannot create %s/%s procfs entry\n",
PG_PROC_DIR, t->tsk->comm);
kthread_stop(p);
list_del(&t->th_list);
@@ -3822,11 +3832,10 @@ static int pktgen_remove_device(struct pktgen_thread *t,
struct pktgen_dev *pkt_dev)
{
- pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev);
+ pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
if (pkt_dev->running) {
- printk(KERN_WARNING "pktgen: WARNING: trying to remove a "
- "running interface, stopping it now.\n");
+ pr_warning("WARNING: trying to remove a running interface, stopping it now\n");
pktgen_stop_device(pkt_dev);
}
@@ -3857,7 +3866,7 @@ static int __init pg_init(void)
int cpu;
struct proc_dir_entry *pe;
- printk(KERN_INFO "%s", version);
+ pr_info("%s", version);
pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
if (!pg_proc_dir)
@@ -3865,8 +3874,7 @@ static int __init pg_init(void)
pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops);
if (pe == NULL) {
- printk(KERN_ERR "pktgen: ERROR: cannot create %s "
- "procfs entry.\n", PGCTRL);
+ pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL);
proc_net_remove(&init_net, PG_PROC_DIR);
return -EINVAL;
}
@@ -3879,13 +3887,12 @@ static int __init pg_init(void)
err = pktgen_create_thread(cpu);
if (err)
- printk(KERN_WARNING "pktgen: WARNING: Cannot create "
- "thread for cpu %d (%d)\n", cpu, err);
+ pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n",
+ cpu, err);
}
if (list_empty(&pktgen_threads)) {
- printk(KERN_ERR "pktgen: ERROR: Initialization failed for "
- "all threads\n");
+ pr_err("ERROR: Initialization failed for all threads\n");
unregister_netdevice_notifier(&pktgen_notifier_block);
remove_proc_entry(PGCTRL, pg_proc_dir);
proc_net_remove(&init_net, PG_PROC_DIR);
@@ -3899,8 +3906,6 @@ static void __exit pg_cleanup(void)
{
struct pktgen_thread *t;
struct list_head *q, *n;
- wait_queue_head_t queue;
- init_waitqueue_head(&queue);
/* Stop all interfaces & threads */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1a2af24e9e3d..8121268ddbdd 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -299,14 +299,6 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
unregister_netdevice_many(&list_kill);
}
-void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
-{
- rtnl_lock();
- __rtnl_kill_links(net, ops);
- rtnl_unlock();
-}
-EXPORT_SYMBOL_GPL(rtnl_kill_links);
-
/**
* __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
* @ops: struct rtnl_link_ops * to unregister
@@ -579,7 +571,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
}
static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
- const struct net_device_stats *b)
+ const struct rtnl_link_stats64 *b)
{
a->rx_packets = b->rx_packets;
a->tx_packets = b->tx_packets;
@@ -610,38 +602,9 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
a->tx_compressed = b->tx_compressed;
}
-static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b)
+static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
{
- struct rtnl_link_stats64 a;
-
- a.rx_packets = b->rx_packets;
- a.tx_packets = b->tx_packets;
- a.rx_bytes = b->rx_bytes;
- a.tx_bytes = b->tx_bytes;
- a.rx_errors = b->rx_errors;
- a.tx_errors = b->tx_errors;
- a.rx_dropped = b->rx_dropped;
- a.tx_dropped = b->tx_dropped;
-
- a.multicast = b->multicast;
- a.collisions = b->collisions;
-
- a.rx_length_errors = b->rx_length_errors;
- a.rx_over_errors = b->rx_over_errors;
- a.rx_crc_errors = b->rx_crc_errors;
- a.rx_frame_errors = b->rx_frame_errors;
- a.rx_fifo_errors = b->rx_fifo_errors;
- a.rx_missed_errors = b->rx_missed_errors;
-
- a.tx_aborted_errors = b->tx_aborted_errors;
- a.tx_carrier_errors = b->tx_carrier_errors;
- a.tx_fifo_errors = b->tx_fifo_errors;
- a.tx_heartbeat_errors = b->tx_heartbeat_errors;
- a.tx_window_errors = b->tx_window_errors;
-
- a.rx_compressed = b->rx_compressed;
- a.tx_compressed = b->tx_compressed;
- memcpy(v, &a, sizeof(a));
+ memcpy(v, b, sizeof(*b));
}
/* All VF info */
@@ -686,7 +649,7 @@ static size_t rtnl_port_size(const struct net_device *dev)
return port_self_size;
}
-static inline size_t if_nlmsg_size(const struct net_device *dev)
+static noinline size_t if_nlmsg_size(const struct net_device *dev)
{
return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
@@ -791,7 +754,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
- const struct net_device_stats *stats;
+ struct rtnl_link_stats64 temp;
+ const struct rtnl_link_stats64 *stats;
struct nlattr *attr;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -847,7 +811,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (attr == NULL)
goto nla_put_failure;
- stats = dev_get_stats(dev);
+ stats = dev_get_stats(dev, &temp);
copy_rtnl_link_stats(nla_data(attr), stats);
attr = nla_reserve(skb, IFLA_STATS64,
diff --git a/net/core/scm.c b/net/core/scm.c
index b88f6f9d0b97..413cab89017d 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -130,6 +130,7 @@ void __scm_destroy(struct scm_cookie *scm)
}
}
}
+EXPORT_SYMBOL(__scm_destroy);
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
@@ -170,6 +171,30 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
err = scm_check_creds(&p->creds);
if (err)
goto error;
+
+ if (pid_vnr(p->pid) != p->creds.pid) {
+ struct pid *pid;
+ err = -ESRCH;
+ pid = find_get_pid(p->creds.pid);
+ if (!pid)
+ goto error;
+ put_pid(p->pid);
+ p->pid = pid;
+ }
+
+ if ((p->cred->euid != p->creds.uid) ||
+ (p->cred->egid != p->creds.gid)) {
+ struct cred *cred;
+ err = -ENOMEM;
+ cred = prepare_creds();
+ if (!cred)
+ goto error;
+
+ cred->uid = cred->euid = p->creds.uid;
+ cred->gid = cred->egid = p->creds.uid;
+ put_cred(p->cred);
+ p->cred = cred;
+ }
break;
default:
goto error;
@@ -187,6 +212,7 @@ error:
scm_destroy(p);
return err;
}
+EXPORT_SYMBOL(__scm_send);
int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
{
@@ -225,6 +251,7 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
out:
return err;
}
+EXPORT_SYMBOL(put_cmsg);
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
{
@@ -294,6 +321,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
*/
__scm_destroy(scm);
}
+EXPORT_SYMBOL(scm_detach_fds);
struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
{
@@ -311,9 +339,4 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
}
return new_fpl;
}
-
-EXPORT_SYMBOL(__scm_destroy);
-EXPORT_SYMBOL(__scm_send);
-EXPORT_SYMBOL(put_cmsg);
-EXPORT_SYMBOL(scm_detach_fds);
EXPORT_SYMBOL(scm_fp_dup);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ce88293a34e2..104f8444754a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -202,8 +202,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
skb->data = data;
skb_reset_tail_pointer(skb);
skb->end = skb->tail + size;
- kmemcheck_annotate_bitfield(skb, flags1);
- kmemcheck_annotate_bitfield(skb, flags2);
#ifdef NET_SKBUFF_DATA_USES_OFFSET
skb->mac_header = ~0U;
#endif
@@ -249,10 +247,9 @@ EXPORT_SYMBOL(__alloc_skb);
struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
unsigned int length, gfp_t gfp_mask)
{
- int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
struct sk_buff *skb;
- skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
+ skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
if (likely(skb)) {
skb_reserve(skb, NET_SKB_PAD);
skb->dev = dev;
@@ -261,16 +258,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
}
EXPORT_SYMBOL(__netdev_alloc_skb);
-struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
-{
- int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
- struct page *page;
-
- page = alloc_pages_node(node, gfp_mask, 0);
- return page;
-}
-EXPORT_SYMBOL(__netdev_alloc_page);
-
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
int size)
{
@@ -340,7 +327,7 @@ static void skb_release_data(struct sk_buff *skb)
put_page(skb_shinfo(skb)->frags[i].page);
}
- if (skb_has_frags(skb))
+ if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
kfree(skb->head);
@@ -466,6 +453,7 @@ void consume_skb(struct sk_buff *skb)
smp_rmb();
else if (likely(!atomic_dec_and_test(&skb->users)))
return;
+ trace_consume_skb(skb);
__kfree_skb(skb);
}
EXPORT_SYMBOL(consume_skb);
@@ -685,16 +673,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
{
- int headerlen = skb->data - skb->head;
- /*
- * Allocate the copy buffer
- */
- struct sk_buff *n;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- n = alloc_skb(skb->end + skb->data_len, gfp_mask);
-#else
- n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
-#endif
+ int headerlen = skb_headroom(skb);
+ unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
+ struct sk_buff *n = alloc_skb(size, gfp_mask);
+
if (!n)
return NULL;
@@ -726,20 +708,14 @@ EXPORT_SYMBOL(skb_copy);
struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
{
- /*
- * Allocate the copy buffer
- */
- struct sk_buff *n;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- n = alloc_skb(skb->end, gfp_mask);
-#else
- n = alloc_skb(skb->end - skb->head, gfp_mask);
-#endif
+ unsigned int size = skb_end_pointer(skb) - skb->head;
+ struct sk_buff *n = alloc_skb(size, gfp_mask);
+
if (!n)
goto out;
/* Set the data pointer */
- skb_reserve(n, skb->data - skb->head);
+ skb_reserve(n, skb_headroom(skb));
/* Set the tail pointer and length */
skb_put(n, skb_headlen(skb));
/* Copy the bytes */
@@ -759,7 +735,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
skb_shinfo(n)->nr_frags = i;
}
- if (skb_has_frags(skb)) {
+ if (skb_has_frag_list(skb)) {
skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
skb_clone_fraglist(n);
}
@@ -791,12 +767,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
{
int i;
u8 *data;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- int size = nhead + skb->end + ntail;
-#else
- int size = nhead + (skb->end - skb->head) + ntail;
-#endif
+ int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
long off;
+ bool fastpath;
BUG_ON(nhead < 0);
@@ -810,23 +783,36 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
goto nodata;
/* Copy only real data... and, alas, header. This should be
- * optimized for the cases when header is void. */
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- memcpy(data + nhead, skb->head, skb->tail);
-#else
- memcpy(data + nhead, skb->head, skb->tail - skb->head);
-#endif
- memcpy(data + size, skb_end_pointer(skb),
- sizeof(struct skb_shared_info));
+ * optimized for the cases when header is void.
+ */
+ memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ memcpy((struct skb_shared_info *)(data + size),
+ skb_shinfo(skb),
+ offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
- if (skb_has_frags(skb))
- skb_clone_fraglist(skb);
+ /* Check if we can avoid taking references on fragments if we own
+ * the last reference on skb->head. (see skb_release_data())
+ */
+ if (!skb->cloned)
+ fastpath = true;
+ else {
+ int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
- skb_release_data(skb);
+ fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
+ }
+
+ if (fastpath) {
+ kfree(skb->head);
+ } else {
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ get_page(skb_shinfo(skb)->frags[i].page);
+ if (skb_has_frag_list(skb))
+ skb_clone_fraglist(skb);
+
+ skb_release_data(skb);
+ }
off = (data + nhead) - skb->head;
skb->head = data;
@@ -1099,7 +1085,7 @@ drop_pages:
for (; i < nfrags; i++)
put_page(skb_shinfo(skb)->frags[i].page);
- if (skb_has_frags(skb))
+ if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
goto done;
}
@@ -1194,7 +1180,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
/* Optimization: no fragments, no reasons to preestimate
* size of pulled pages. Superb.
*/
- if (!skb_has_frags(skb))
+ if (!skb_has_frag_list(skb))
goto pull_pages;
/* Estimate size of pulled pages. */
@@ -2323,7 +2309,7 @@ next_skb:
st->frag_data = NULL;
}
- if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) {
+ if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
st->frag_idx = 0;
goto next_skb;
@@ -2486,7 +2472,6 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
skb_postpull_rcsum(skb, skb->data, len);
return skb->data += len;
}
-
EXPORT_SYMBOL_GPL(skb_pull_rcsum);
/**
@@ -2574,6 +2559,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
__copy_skb_header(nskb, skb);
nskb->mac_len = skb->mac_len;
+ /* nskb and skb might have different headroom */
+ if (nskb->ip_summed == CHECKSUM_PARTIAL)
+ nskb->csum_start += skb_headroom(nskb) - headroom;
+
skb_reset_mac_header(nskb);
skb_set_network_header(nskb, skb->mac_len);
nskb->transport_header = (nskb->network_header +
@@ -2704,7 +2693,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
return -E2BIG;
headroom = skb_headroom(p);
- nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
+ nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);
if (unlikely(!nskb))
return -ENOMEM;
@@ -2890,7 +2879,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
return -ENOMEM;
/* Easy case. Most of packets will go this way. */
- if (!skb_has_frags(skb)) {
+ if (!skb_has_frag_list(skb)) {
/* A little of trouble, not enough of space for trailer.
* This should not happen, when stack is tuned to generate
* good frames. OK, on miss we reallocate and reserve even more
@@ -2925,7 +2914,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
if (skb1->next == NULL && tailbits) {
if (skb_shinfo(skb1)->nr_frags ||
- skb_has_frags(skb1) ||
+ skb_has_frag_list(skb1) ||
skb_tailroom(skb1) < tailbits)
ntail = tailbits + 128;
}
@@ -2934,7 +2923,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
skb_cloned(skb1) ||
ntail ||
skb_shinfo(skb1)->nr_frags ||
- skb_has_frags(skb1)) {
+ skb_has_frag_list(skb1)) {
struct sk_buff *skb2;
/* Fuck, we are miserable poor guys... */
@@ -3017,7 +3006,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
} else {
/*
* no hardware time stamps available,
- * so keep the skb_shared_tx and only
+ * so keep the shared tx_flags and only
* store software time stamp
*/
skb->tstamp = ktime_get_real();
diff --git a/net/core/sock.c b/net/core/sock.c
index 2cf7f9f7e775..3eed5424e659 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -110,6 +110,7 @@
#include <linux/tcp.h>
#include <linux/init.h>
#include <linux/highmem.h>
+#include <linux/user_namespace.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -156,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
- "sk_lock-AF_IEEE802154",
+ "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" ,
"sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
@@ -172,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-27" , "slock-28" , "slock-AF_CAN" ,
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
- "slock-AF_IEEE802154",
+ "slock-AF_IEEE802154", "slock-AF_CAIF" ,
"slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
@@ -188,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-27" , "clock-28" , "clock-AF_CAN" ,
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
- "clock-AF_IEEE802154",
+ "clock-AF_IEEE802154", "clock-AF_CAIF" ,
"clock-AF_MAX"
};
@@ -749,6 +750,20 @@ set_rcvbuf:
EXPORT_SYMBOL(sock_setsockopt);
+void cred_to_ucred(struct pid *pid, const struct cred *cred,
+ struct ucred *ucred)
+{
+ ucred->pid = pid_vnr(pid);
+ ucred->uid = ucred->gid = -1;
+ if (cred) {
+ struct user_namespace *current_ns = current_user_ns();
+
+ ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid);
+ ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
+ }
+}
+EXPORT_SYMBOL_GPL(cred_to_ucred);
+
int sock_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -901,11 +916,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_PEERCRED:
- if (len > sizeof(sk->sk_peercred))
- len = sizeof(sk->sk_peercred);
- if (copy_to_user(optval, &sk->sk_peercred, len))
+ {
+ struct ucred peercred;
+ if (len > sizeof(peercred))
+ len = sizeof(peercred);
+ cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+ if (copy_to_user(optval, &peercred, len))
return -EFAULT;
goto lenout;
+ }
case SO_PEERNAME:
{
@@ -1059,8 +1078,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
#ifdef CONFIG_CGROUPS
void sock_update_classid(struct sock *sk)
{
- u32 classid = task_cls_classid(current);
+ u32 classid;
+ rcu_read_lock(); /* doing current task, which cannot vanish. */
+ classid = task_cls_classid(current);
+ rcu_read_unlock();
if (classid && classid != sk->sk_classid)
sk->sk_classid = classid;
}
@@ -1119,6 +1141,9 @@ static void __sk_free(struct sock *sk)
printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
__func__, atomic_read(&sk->sk_omem_alloc));
+ if (sk->sk_peer_cred)
+ put_cred(sk->sk_peer_cred);
+ put_pid(sk->sk_peer_pid);
put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
}
@@ -1200,7 +1225,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
sock_reset_flag(newsk, SOCK_DONE);
skb_queue_head_init(&newsk->sk_error_queue);
- filter = newsk->sk_filter;
+ filter = rcu_dereference_protected(newsk->sk_filter, 1);
if (filter != NULL)
sk_filter_charge(newsk, filter);
@@ -1317,9 +1342,10 @@ EXPORT_SYMBOL(sock_wfree);
void sock_rfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
+ unsigned int len = skb->truesize;
- atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
- sk_mem_uncharge(skb->sk, skb->truesize);
+ atomic_sub(len, &sk->sk_rmem_alloc);
+ sk_mem_uncharge(sk, len);
}
EXPORT_SYMBOL(sock_rfree);
@@ -1328,9 +1354,9 @@ int sock_i_uid(struct sock *sk)
{
int uid;
- read_lock(&sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
- read_unlock(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
return uid;
}
EXPORT_SYMBOL(sock_i_uid);
@@ -1339,9 +1365,9 @@ unsigned long sock_i_ino(struct sock *sk)
{
unsigned long ino;
- read_lock(&sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
- read_unlock(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
return ino;
}
EXPORT_SYMBOL(sock_i_ino);
@@ -1534,6 +1560,8 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
EXPORT_SYMBOL(sock_alloc_send_skb);
static void __lock_sock(struct sock *sk)
+ __releases(&sk->sk_lock.slock)
+ __acquires(&sk->sk_lock.slock)
{
DEFINE_WAIT(wait);
@@ -1550,6 +1578,8 @@ static void __lock_sock(struct sock *sk)
}
static void __release_sock(struct sock *sk)
+ __releases(&sk->sk_lock.slock)
+ __acquires(&sk->sk_lock.slock)
{
struct sk_buff *skb = sk->sk_backlog.head;
@@ -1954,9 +1984,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0;
- sk->sk_peercred.pid = 0;
- sk->sk_peercred.uid = -1;
- sk->sk_peercred.gid = -1;
+ sk->sk_peer_pid = NULL;
+ sk->sk_peer_cred = NULL;
sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
@@ -2210,8 +2239,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
#ifdef CONFIG_NET_NS
void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
{
- int cpu = smp_processor_id();
- per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
+ __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
}
EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
@@ -2257,7 +2285,7 @@ static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
{
- __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
+ __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
}
EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
diff --git a/net/core/stream.c b/net/core/stream.c
index cc196f42b8d8..f5df85dcd20b 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -43,7 +43,6 @@ void sk_stream_write_space(struct sock *sk)
rcu_read_unlock();
}
}
-
EXPORT_SYMBOL(sk_stream_write_space);
/**
@@ -81,7 +80,6 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
} while (!done);
return 0;
}
-
EXPORT_SYMBOL(sk_stream_wait_connect);
/**
@@ -109,7 +107,6 @@ void sk_stream_wait_close(struct sock *sk, long timeout)
finish_wait(sk_sleep(sk), &wait);
}
}
-
EXPORT_SYMBOL(sk_stream_wait_close);
/**
@@ -144,10 +141,10 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk->sk_write_pending++;
- sk_wait_event(sk, &current_timeo, !sk->sk_err &&
- !(sk->sk_shutdown & SEND_SHUTDOWN) &&
- sk_stream_memory_free(sk) &&
- vm_wait);
+ sk_wait_event(sk, &current_timeo, sk->sk_err ||
+ (sk->sk_shutdown & SEND_SHUTDOWN) ||
+ (sk_stream_memory_free(sk) &&
+ !vm_wait));
sk->sk_write_pending--;
if (vm_wait) {
@@ -174,7 +171,6 @@ do_interrupted:
err = sock_intr_errno(*timeo_p);
goto out;
}
-
EXPORT_SYMBOL(sk_stream_wait_memory);
int sk_stream_error(struct sock *sk, int flags, int err)
@@ -185,7 +181,6 @@ int sk_stream_error(struct sock *sk, int flags, int err)
send_sig(SIGPIPE, current, 0);
return err;
}
-
EXPORT_SYMBOL(sk_stream_error);
void sk_stream_kill_queues(struct sock *sk)
@@ -210,5 +205,4 @@ void sk_stream_kill_queues(struct sock *sk)
* have gone away, only the net layer knows can touch it.
*/
}
-
EXPORT_SYMBOL(sk_stream_kill_queues);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 01eee5d984be..385b6095fdc4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -34,7 +34,8 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
mutex_lock(&sock_flow_mutex);
- orig_sock_table = rps_sock_flow_table;
+ orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
+ lockdep_is_held(&sock_flow_mutex));
size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
new file mode 100644
index 000000000000..0ae6c22da85b
--- /dev/null
+++ b/net/core/timestamping.c
@@ -0,0 +1,126 @@
+/*
+ * PTP 1588 clock support - support for timestamping in PHY devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/errqueue.h>
+#include <linux/phy.h>
+#include <linux/ptp_classify.h>
+#include <linux/skbuff.h>
+
+static struct sock_filter ptp_filter[] = {
+ PTP_FILTER
+};
+
+static unsigned int classify(struct sk_buff *skb)
+{
+ if (likely(skb->dev &&
+ skb->dev->phydev &&
+ skb->dev->phydev->drv))
+ return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+ else
+ return PTP_CLASS_NONE;
+}
+
+void skb_clone_tx_timestamp(struct sk_buff *skb)
+{
+ struct phy_device *phydev;
+ struct sk_buff *clone;
+ struct sock *sk = skb->sk;
+ unsigned int type;
+
+ if (!sk)
+ return;
+
+ type = classify(skb);
+
+ switch (type) {
+ case PTP_CLASS_V1_IPV4:
+ case PTP_CLASS_V1_IPV6:
+ case PTP_CLASS_V2_IPV4:
+ case PTP_CLASS_V2_IPV6:
+ case PTP_CLASS_V2_L2:
+ case PTP_CLASS_V2_VLAN:
+ phydev = skb->dev->phydev;
+ if (likely(phydev->drv->txtstamp)) {
+ clone = skb_clone(skb, GFP_ATOMIC);
+ if (!clone)
+ return;
+ clone->sk = sk;
+ phydev->drv->txtstamp(phydev, clone, type);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void skb_complete_tx_timestamp(struct sk_buff *skb,
+ struct skb_shared_hwtstamps *hwtstamps)
+{
+ struct sock *sk = skb->sk;
+ struct sock_exterr_skb *serr;
+ int err;
+
+ if (!hwtstamps)
+ return;
+
+ *skb_hwtstamps(skb) = *hwtstamps;
+ serr = SKB_EXT_ERR(skb);
+ memset(serr, 0, sizeof(*serr));
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+ skb->sk = NULL;
+ err = sock_queue_err_skb(sk, skb);
+ if (err)
+ kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
+
+bool skb_defer_rx_timestamp(struct sk_buff *skb)
+{
+ struct phy_device *phydev;
+ unsigned int type;
+
+ skb_push(skb, ETH_HLEN);
+
+ type = classify(skb);
+
+ skb_pull(skb, ETH_HLEN);
+
+ switch (type) {
+ case PTP_CLASS_V1_IPV4:
+ case PTP_CLASS_V1_IPV6:
+ case PTP_CLASS_V2_IPV4:
+ case PTP_CLASS_V2_IPV6:
+ case PTP_CLASS_V2_L2:
+ case PTP_CLASS_V2_VLAN:
+ phydev = skb->dev->phydev;
+ if (likely(phydev->drv->rxtstamp))
+ return phydev->drv->rxtstamp(phydev, skb, type);
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+void __init skb_timestamping_init(void)
+{
+ BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter)));
+}
diff --git a/net/core/utils.c b/net/core/utils.c
index 838250241d26..5fea0ab21902 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -75,9 +75,8 @@ __be32 in_aton(const char *str)
str++;
}
}
- return(htonl(l));
+ return htonl(l);
}
-
EXPORT_SYMBOL(in_aton);
#define IN6PTON_XDIGIT 0x00010000
@@ -93,18 +92,19 @@ EXPORT_SYMBOL(in_aton);
static inline int xdigit2bin(char c, int delim)
{
+ int val;
+
if (c == delim || c == '\0')
return IN6PTON_DELIM;
if (c == ':')
return IN6PTON_COLON_MASK;
if (c == '.')
return IN6PTON_DOT;
- if (c >= '0' && c <= '9')
- return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0'));
- if (c >= 'a' && c <= 'f')
- return (IN6PTON_XDIGIT | (c - 'a' + 10));
- if (c >= 'A' && c <= 'F')
- return (IN6PTON_XDIGIT | (c - 'A' + 10));
+
+ val = hex_to_bin(c);
+ if (val >= 0)
+ return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0);
+
if (delim == -1)
return IN6PTON_DELIM;
return IN6PTON_UNKNOWN;
@@ -162,7 +162,6 @@ out:
*end = s;
return ret;
}
-
EXPORT_SYMBOL(in4_pton);
int in6_pton(const char *src, int srclen,
@@ -280,7 +279,6 @@ out:
*end = s;
return ret;
}
-
EXPORT_SYMBOL(in6_pton);
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,