summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c18
-rw-r--r--net/8021q/vlan_core.c9
-rw-r--r--net/8021q/vlan_dev.c12
-rw-r--r--net/atm/br2684.c36
-rw-r--r--net/batman-adv/Kconfig11
-rw-r--r--net/batman-adv/Makefile1
-rw-r--r--net/batman-adv/bat_iv_ogm.c51
-rw-r--r--net/batman-adv/bitarray.c23
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c148
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h6
-rw-r--r--net/batman-adv/debugfs.c60
-rw-r--r--net/batman-adv/distributed-arp-table.c1066
-rw-r--r--net/batman-adv/distributed-arp-table.h167
-rw-r--r--net/batman-adv/gateway_client.c19
-rw-r--r--net/batman-adv/hard-interface.c48
-rw-r--r--net/batman-adv/hash.h22
-rw-r--r--net/batman-adv/icmp_socket.c16
-rw-r--r--net/batman-adv/main.c89
-rw-r--r--net/batman-adv/main.h36
-rw-r--r--net/batman-adv/originator.c22
-rw-r--r--net/batman-adv/packet.h84
-rw-r--r--net/batman-adv/routing.c309
-rw-r--r--net/batman-adv/send.c37
-rw-r--r--net/batman-adv/send.h3
-rw-r--r--net/batman-adv/soft-interface.c97
-rw-r--r--net/batman-adv/sysfs.c58
-rw-r--r--net/batman-adv/translation-table.c541
-rw-r--r--net/batman-adv/translation-table.h8
-rw-r--r--net/batman-adv/types.h104
-rw-r--r--net/batman-adv/unicast.c143
-rw-r--r--net/batman-adv/unicast.h36
-rw-r--r--net/batman-adv/vis.c44
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_input.c17
-rw-r--r--net/bridge/br_ioctl.c25
-rw-r--r--net/bridge/br_netlink.c247
-rw-r--r--net/bridge/br_private.h18
-rw-r--r--net/bridge/br_stp.c22
-rw-r--r--net/bridge/br_stp_bpdu.c7
-rw-r--r--net/bridge/br_sysfs_br.c22
-rw-r--r--net/bridge/br_sysfs_if.c44
-rw-r--r--net/caif/cfctrl.c3
-rw-r--r--net/can/gw.c6
-rw-r--r--net/ceph/messenger.c6
-rw-r--r--net/core/dev.c158
-rw-r--r--net/core/dev_addr_lists.c3
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/filter.c139
-rw-r--r--net/core/flow.c4
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/net_namespace.c23
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/pktgen.c47
-rw-r--r--net/core/rtnetlink.c229
-rw-r--r--net/core/scm.c6
-rw-r--r--net/core/skbuff.c30
-rw-r--r--net/core/sock.c84
-rw-r--r--net/core/sysctl_net_core.c5
-rw-r--r--net/dcb/dcbnl.c3
-rw-r--r--net/dccp/minisocks.c3
-rw-r--r--net/decnet/dn_dev.c6
-rw-r--r--net/decnet/dn_fib.c6
-rw-r--r--net/dsa/Kconfig18
-rw-r--r--net/ipv4/af_inet.c93
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/devinet.c192
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/inet_connection_sock.c25
-rw-r--r--net/ipv4/inet_diag.c10
-rw-r--r--net/ipv4/ip_fragment.c4
-rw-r--r--net/ipv4/ip_gre.c32
-rw-r--r--net/ipv4/ip_options.c6
-rw-r--r--net/ipv4/ip_sockglue.c40
-rw-r--r--net/ipv4/ip_vti.c31
-rw-r--r--net/ipv4/ipconfig.c6
-rw-r--r--net/ipv4/ipip.c271
-rw-r--r--net/ipv4/ipmr.c30
-rw-r--r--net/ipv4/netfilter/arp_tables.c8
-rw-r--r--net/ipv4/netfilter/ip_tables.c8
-rw-r--r--net/ipv4/netfilter/iptable_nat.c8
-rw-r--r--net/ipv4/protocol.c21
-rw-r--r--net/ipv4/route.c22
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c3
-rw-r--r--net/ipv4/tcp.c21
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_illinois.c8
-rw-r--r--net/ipv4/tcp_input.c76
-rw-r--r--net/ipv4/tcp_ipv4.c39
-rw-r--r--net/ipv4/tcp_metrics.c14
-rw-r--r--net/ipv4/tcp_minisocks.c9
-rw-r--r--net/ipv4/tcp_output.c9
-rw-r--r--net/ipv4/tcp_timer.c12
-rw-r--r--net/ipv4/xfrm4_policy.c13
-rw-r--r--net/ipv6/Makefile5
-rw-r--r--net/ipv6/addrconf.c204
-rw-r--r--net/ipv6/af_inet6.c245
-rw-r--r--net/ipv6/ah6.c10
-rw-r--r--net/ipv6/anycast.c4
-rw-r--r--net/ipv6/datagram.c8
-rw-r--r--net/ipv6/exthdrs.c70
-rw-r--r--net/ipv6/exthdrs_core.c44
-rw-r--r--net/ipv6/exthdrs_offload.c41
-rw-r--r--net/ipv6/fib6_rules.c2
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/inet6_connection_sock.c3
-rw-r--r--net/ipv6/ip6_fib.c57
-rw-r--r--net/ipv6/ip6_flowlabel.c3
-rw-r--r--net/ipv6/ip6_gre.c37
-rw-r--r--net/ipv6/ip6_offload.c282
-rw-r--r--net/ipv6/ip6_offload.h18
-rw-r--r--net/ipv6/ip6_output.c72
-rw-r--r--net/ipv6/ip6_tunnel.c239
-rw-r--r--net/ipv6/ip6mr.c14
-rw-r--r--net/ipv6/ipv6_sockglue.c10
-rw-r--r--net/ipv6/mcast.c4
-rw-r--r--net/ipv6/ndisc.c20
-rw-r--r--net/ipv6/netfilter/ip6_tables.c14
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c70
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c4
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c4
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c6
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c2
-rw-r--r--net/ipv6/output_core.c76
-rw-r--r--net/ipv6/protocol.c25
-rw-r--r--net/ipv6/raw.c6
-rw-r--r--net/ipv6/reassembly.c4
-rw-r--r--net/ipv6/route.c190
-rw-r--r--net/ipv6/sit.c426
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c124
-rw-r--r--net/ipv6/tcpv6_offload.c95
-rw-r--r--net/ipv6/udp.c94
-rw-r--r--net/ipv6/udp_offload.c120
-rw-r--r--net/ipv6/xfrm6_policy.c21
-rw-r--r--net/ipv6/xfrm6_state.c4
-rw-r--r--net/irda/ircomm/ircomm_tty.c2
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_eth.c1
-rw-r--r--net/l2tp/l2tp_netlink.c2
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c7
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c7
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c7
-rw-r--r--net/netfilter/ipvs/Kconfig7
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c404
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c9
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c42
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c40
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c41
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c81
-rw-r--r--net/netfilter/nf_conntrack_acct.c4
-rw-r--r--net/netfilter/nf_conntrack_ecache.c4
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c3
-rw-r--r--net/netfilter/nf_conntrack_helper.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c8
-rw-r--r--net/netfilter/nf_conntrack_standalone.c4
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c4
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c3
-rw-r--r--net/netfilter/xt_CT.c10
-rw-r--r--net/netfilter/xt_TEE.c1
-rw-r--r--net/netfilter/xt_ipvs.c4
-rw-r--r--net/netfilter/xt_nat.c8
-rw-r--r--net/netlink/af_netlink.c21
-rw-r--r--net/packet/af_packet.c50
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/phonet/pn_netlink.c6
-rw-r--r--net/rds/ib.h2
-rw-r--r--net/rds/ib_recv.c24
-rw-r--r--net/sched/Kconfig2
-rw-r--r--net/sched/act_api.c3
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sched/cls_cgroup.c24
-rw-r--r--net/sched/sch_api.c18
-rw-r--r--net/sched/sch_cbq.c3
-rw-r--r--net/sched/sch_htb.c139
-rw-r--r--net/sched/sch_qfq.c109
-rw-r--r--net/sctp/Kconfig39
-rw-r--r--net/sctp/proc.c8
-rw-r--r--net/sctp/protocol.c9
-rw-r--r--net/sctp/sm_make_chunk.c19
-rw-r--r--net/sctp/sm_sideeffect.c57
-rw-r--r--net/sctp/socket.c21
-rw-r--r--net/sctp/sysctl.c59
-rw-r--r--net/sctp/tsnmap.c8
-rw-r--r--net/sctp/ulpqueue.c3
-rw-r--r--net/socket.c8
-rw-r--r--net/sunrpc/backchannel_rqst.c2
-rw-r--r--net/sunrpc/cache.c4
-rw-r--r--net/sunrpc/xprtsock.c41
-rw-r--r--net/sysctl_net.c15
-rw-r--r--net/tipc/Kconfig13
-rw-r--r--net/tipc/bcast.c27
-rw-r--r--net/tipc/bearer.c110
-rw-r--r--net/tipc/bearer.h24
-rw-r--r--net/tipc/core.c5
-rw-r--r--net/tipc/discover.c2
-rw-r--r--net/tipc/handler.c1
-rw-r--r--net/tipc/link.c188
-rw-r--r--net/tipc/link.h4
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/node.c15
-rw-r--r--net/tipc/node.h6
-rw-r--r--net/tipc/socket.c58
-rw-r--r--net/unix/diag.c3
-rw-r--r--net/unix/sysctl_net_unix.c4
-rw-r--r--net/xfrm/xfrm_ipcomp.c8
-rw-r--r--net/xfrm/xfrm_replay.c13
-rw-r--r--net/xfrm/xfrm_sysctl.c4
-rw-r--r--net/xfrm/xfrm_user.c2
227 files changed, 7290 insertions, 2933 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9096bcb08132..afba51e60310 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -294,7 +294,7 @@ static void vlan_transfer_features(struct net_device *dev,
else
vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
#endif
@@ -463,7 +463,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_PRE_TYPE_CHANGE:
/* Forbid underlaying device to change its type. */
- return NOTIFY_BAD;
+ if (vlan_uses_dev(dev))
+ return NOTIFY_BAD;
+ break;
case NETDEV_NOTIFY_PEERS:
case NETDEV_BONDING_FAILOVER:
@@ -527,7 +529,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
switch (args.cmd) {
case SET_VLAN_INGRESS_PRIORITY_CMD:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
vlan_dev_set_ingress_priority(dev,
args.u.skb_priority,
@@ -537,7 +539,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
case SET_VLAN_EGRESS_PRIORITY_CMD:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = vlan_dev_set_egress_priority(dev,
args.u.skb_priority,
@@ -546,7 +548,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
case SET_VLAN_FLAG_CMD:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = vlan_dev_change_flags(dev,
args.vlan_qos ? args.u.flag : 0,
@@ -555,7 +557,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
case SET_VLAN_NAME_TYPE_CMD:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
if ((args.u.name_type >= 0) &&
(args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
@@ -571,14 +573,14 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
case ADD_VLAN_CMD:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = register_vlan_device(dev, args.u.VID);
break;
case DEL_VLAN_CMD:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
unregister_vlan_dev(dev, NULL);
err = 0;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index fbbf1fa00940..65e06abe023f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -366,6 +366,13 @@ EXPORT_SYMBOL(vlan_vids_del_by_dev);
bool vlan_uses_dev(const struct net_device *dev)
{
- return rtnl_dereference(dev->vlan_info) ? true : false;
+ struct vlan_info *vlan_info;
+
+ ASSERT_RTNL();
+
+ vlan_info = rtnl_dereference(dev->vlan_info);
+ if (!vlan_info)
+ return false;
+ return vlan_info->grp.nr_vlan_devs ? true : false;
}
EXPORT_SYMBOL(vlan_uses_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 402442402af7..4a6d31a082b9 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -409,7 +409,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa)
return err;
}
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid,
struct scatterlist *sgl, unsigned int sgc)
{
@@ -531,6 +531,10 @@ static const struct header_ops vlan_header_ops = {
.parse = eth_header_parse,
};
+static struct device_type vlan_type = {
+ .name = "vlan",
+};
+
static const struct net_device_ops vlan_netdev_ops;
static int vlan_dev_init(struct net_device *dev)
@@ -564,7 +568,7 @@ static int vlan_dev_init(struct net_device *dev)
if (is_zero_ether_addr(dev->broadcast))
memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid;
#endif
@@ -579,6 +583,8 @@ static int vlan_dev_init(struct net_device *dev)
dev->netdev_ops = &vlan_netdev_ops;
+ SET_NETDEV_DEVTYPE(dev, &vlan_type);
+
if (is_vlan_dev(real_dev))
subclass = 1;
@@ -741,7 +747,7 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_do_ioctl = vlan_dev_ioctl,
.ndo_neigh_setup = vlan_dev_neigh_setup,
.ndo_get_stats64 = vlan_dev_get_stats64,
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
.ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
.ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
.ndo_fcoe_enable = vlan_dev_fcoe_enable,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 4819d31533e0..8eb6fbe8d8dd 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -74,6 +74,7 @@ struct br2684_vcc {
struct br2684_filter filter;
#endif /* CONFIG_ATM_BR2684_IPFILTER */
unsigned int copies_needed, copies_failed;
+ atomic_t qspace;
};
struct br2684_dev {
@@ -181,18 +182,15 @@ static struct notifier_block atm_dev_notifier = {
static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
{
struct br2684_vcc *brvcc = BR2684_VCC(vcc);
- struct net_device *net_dev = skb->dev;
- pr_debug("(vcc %p ; net_dev %p )\n", vcc, net_dev);
+ pr_debug("(vcc %p ; net_dev %p )\n", vcc, brvcc->device);
brvcc->old_pop(vcc, skb);
- if (!net_dev)
- return;
-
- if (atm_may_send(vcc, 0))
- netif_wake_queue(net_dev);
-
+ /* If the queue space just went up from zero, wake */
+ if (atomic_inc_return(&brvcc->qspace) == 1)
+ netif_wake_queue(brvcc->device);
}
+
/*
* Send a packet out a particular vcc. Not to useful right now, but paves
* the way for multiple vcc's per itf. Returns true if we can send,
@@ -256,16 +254,19 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
ATM_SKB(skb)->atm_options = atmvcc->atm_options;
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
- atmvcc->send(atmvcc, skb);
- if (!atm_may_send(atmvcc, 0)) {
+ if (atomic_dec_return(&brvcc->qspace) < 1) {
+ /* No more please! */
netif_stop_queue(brvcc->device);
- /*check for race with br2684_pop*/
- if (atm_may_send(atmvcc, 0))
- netif_start_queue(brvcc->device);
+ /* We might have raced with br2684_pop() */
+ if (unlikely(atomic_read(&brvcc->qspace) > 0))
+ netif_wake_queue(brvcc->device);
}
- return 1;
+ /* If this fails immediately, the skb will be freed and br2684_pop()
+ will wake the queue if appropriate. Just return an error so that
+ the stats are updated correctly */
+ return !atmvcc->send(atmvcc, skb);
}
static inline struct br2684_vcc *pick_outgoing_vcc(const struct sk_buff *skb,
@@ -504,6 +505,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
brvcc = kzalloc(sizeof(struct br2684_vcc), GFP_KERNEL);
if (!brvcc)
return -ENOMEM;
+ /*
+ * Allow two packets in the ATM queue. One actually being sent, and one
+ * for the ATM 'TX done' handler to send. It shouldn't take long to get
+ * the next one from the netdev queue, when we need it. More than that
+ * would be bufferbloat.
+ */
+ atomic_set(&brvcc->qspace, 2);
write_lock_irq(&devs_lock);
net_dev = br2684_find_dev(&be.ifspec);
if (net_dev == NULL) {
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 53f5244e28f8..8d8afb134b3a 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -6,6 +6,7 @@ config BATMAN_ADV
tristate "B.A.T.M.A.N. Advanced Meshing Protocol"
depends on NET
select CRC16
+ select LIBCRC32C
default n
help
B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
@@ -25,6 +26,16 @@ config BATMAN_ADV_BLA
more than one mesh node in the same LAN, you can safely remove
this feature and save some space.
+config BATMAN_ADV_DAT
+ bool "Distributed ARP Table"
+ depends on BATMAN_ADV && INET
+ default n
+ help
+ This option enables DAT (Distributed ARP Table), a DHT based
+ mechanism that increases ARP reliability on sparse wireless
+ mesh networks. If you think that your network does not need
+ this option you can safely remove it and save some space.
+
config BATMAN_ADV_DEBUG
bool "B.A.T.M.A.N. debugging"
depends on BATMAN_ADV
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 8676d2b1d574..e45e3b4e32e3 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -23,6 +23,7 @@ batman-adv-y += bat_iv_ogm.o
batman-adv-y += bitarray.o
batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
batman-adv-y += debugfs.o
+batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o
batman-adv-y += gateway_client.o
batman-adv-y += gateway_common.o
batman-adv-y += hard-interface.o
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index b02b75dae3a8..9f3925a85aab 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -57,20 +57,22 @@ out:
static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
{
struct batadv_ogm_packet *batadv_ogm_packet;
+ unsigned char *ogm_buff;
uint32_t random_seqno;
int res = -ENOMEM;
/* randomize initial seqno to avoid collision */
get_random_bytes(&random_seqno, sizeof(random_seqno));
- atomic_set(&hard_iface->seqno, random_seqno);
+ atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno);
- hard_iface->packet_len = BATADV_OGM_HLEN;
- hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC);
-
- if (!hard_iface->packet_buff)
+ hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN;
+ ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC);
+ if (!ogm_buff)
goto out;
- batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff;
+ hard_iface->bat_iv.ogm_buff = ogm_buff;
+
+ batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
batadv_ogm_packet->header.packet_type = BATADV_IV_OGM;
batadv_ogm_packet->header.version = BATADV_COMPAT_VERSION;
batadv_ogm_packet->header.ttl = 2;
@@ -87,15 +89,16 @@ out:
static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
{
- kfree(hard_iface->packet_buff);
- hard_iface->packet_buff = NULL;
+ kfree(hard_iface->bat_iv.ogm_buff);
+ hard_iface->bat_iv.ogm_buff = NULL;
}
static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface)
{
struct batadv_ogm_packet *batadv_ogm_packet;
+ unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
- batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff;
+ batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
memcpy(batadv_ogm_packet->orig,
hard_iface->net_dev->dev_addr, ETH_ALEN);
memcpy(batadv_ogm_packet->prev_sender,
@@ -106,8 +109,9 @@ static void
batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface)
{
struct batadv_ogm_packet *batadv_ogm_packet;
+ unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
- batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff;
+ batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP;
batadv_ogm_packet->header.ttl = BATADV_TTL;
}
@@ -407,9 +411,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
if ((atomic_read(&bat_priv->aggregated_ogms)) &&
(packet_len < BATADV_MAX_AGGREGATION_BYTES))
- skb_size = BATADV_MAX_AGGREGATION_BYTES + ETH_HLEN;
+ skb_size = BATADV_MAX_AGGREGATION_BYTES;
else
- skb_size = packet_len + ETH_HLEN;
+ skb_size = packet_len;
+
+ skb_size += ETH_HLEN + NET_IP_ALIGN;
forw_packet_aggr->skb = dev_alloc_skb(skb_size);
if (!forw_packet_aggr->skb) {
@@ -418,7 +424,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
kfree(forw_packet_aggr);
goto out;
}
- skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
+ skb_reserve(forw_packet_aggr->skb, ETH_HLEN + NET_IP_ALIGN);
INIT_HLIST_NODE(&forw_packet_aggr->list);
@@ -590,8 +596,10 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+ unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff;
struct batadv_ogm_packet *batadv_ogm_packet;
struct batadv_hard_iface *primary_if;
+ int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len;
int vis_server, tt_num_changes = 0;
uint32_t seqno;
uint8_t bandwidth;
@@ -600,17 +608,16 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
primary_if = batadv_primary_if_get_selected(bat_priv);
if (hard_iface == primary_if)
- tt_num_changes = batadv_tt_append_diff(bat_priv,
- &hard_iface->packet_buff,
- &hard_iface->packet_len,
+ tt_num_changes = batadv_tt_append_diff(bat_priv, ogm_buff,
+ ogm_buff_len,
BATADV_OGM_HLEN);
- batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff;
+ batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff);
/* change sequence number to network order */
- seqno = (uint32_t)atomic_read(&hard_iface->seqno);
+ seqno = (uint32_t)atomic_read(&hard_iface->bat_iv.ogm_seqno);
batadv_ogm_packet->seqno = htonl(seqno);
- atomic_inc(&hard_iface->seqno);
+ atomic_inc(&hard_iface->bat_iv.ogm_seqno);
batadv_ogm_packet->ttvn = atomic_read(&bat_priv->tt.vn);
batadv_ogm_packet->tt_crc = htons(bat_priv->tt.local_crc);
@@ -631,8 +638,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
}
batadv_slide_own_bcast_window(hard_iface);
- batadv_iv_ogm_queue_add(bat_priv, hard_iface->packet_buff,
- hard_iface->packet_len, hard_iface, 1,
+ batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff,
+ hard_iface->bat_iv.ogm_buff_len, hard_iface, 1,
batadv_iv_ogm_emit_send_time(bat_priv));
if (primary_if)
@@ -1015,7 +1022,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
return;
/* could be changed by schedule_own_packet() */
- if_incoming_seqno = atomic_read(&if_incoming->seqno);
+ if_incoming_seqno = atomic_read(&if_incoming->bat_iv.ogm_seqno);
if (batadv_ogm_packet->flags & BATADV_DIRECTLINK)
has_directlink_flag = 1;
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index aea174cdbfbd..5453b17d8df2 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -79,20 +79,17 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits,
* or the old packet got delayed somewhere in the network. The
* packet should be dropped without calling this function if the
* seqno window is protected.
+ *
+ * seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE
+ * or
+ * seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE
*/
- if (seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE ||
- seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) {
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "Other host probably restarted!\n");
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Other host probably restarted!\n");
-
- bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
- if (set_mark)
- batadv_set_bit(seq_bits, 0);
-
- return 1;
- }
+ bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
+ if (set_mark)
+ batadv_set_bit(seq_bits, 0);
- /* never reached */
- return 0;
+ return 1;
}
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 0a9084ad19a6..5aebe9327d68 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -40,15 +40,11 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
/* return the index of the claim */
static inline uint32_t batadv_choose_claim(const void *data, uint32_t size)
{
- const unsigned char *key = data;
+ struct batadv_claim *claim = (struct batadv_claim *)data;
uint32_t hash = 0;
- size_t i;
- for (i = 0; i < ETH_ALEN + sizeof(short); i++) {
- hash += key[i];
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
+ hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr));
+ hash = batadv_hash_bytes(hash, &claim->vid, sizeof(claim->vid));
hash += (hash << 3);
hash ^= (hash >> 11);
@@ -61,15 +57,11 @@ static inline uint32_t batadv_choose_claim(const void *data, uint32_t size)
static inline uint32_t batadv_choose_backbone_gw(const void *data,
uint32_t size)
{
- const unsigned char *key = data;
+ struct batadv_claim *claim = (struct batadv_claim *)data;
uint32_t hash = 0;
- size_t i;
- for (i = 0; i < ETH_ALEN + sizeof(short); i++) {
- hash += key[i];
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
+ hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr));
+ hash = batadv_hash_bytes(hash, &claim->vid, sizeof(claim->vid));
hash += (hash << 3);
hash ^= (hash >> 11);
@@ -85,8 +77,15 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node,
{
const void *data1 = container_of(node, struct batadv_backbone_gw,
hash_entry);
+ const struct batadv_backbone_gw *gw1 = data1, *gw2 = data2;
+
+ if (!batadv_compare_eth(gw1->orig, gw2->orig))
+ return 0;
- return (memcmp(data1, data2, ETH_ALEN + sizeof(short)) == 0 ? 1 : 0);
+ if (gw1->vid != gw2->vid)
+ return 0;
+
+ return 1;
}
/* compares address and vid of two claims */
@@ -95,8 +94,15 @@ static int batadv_compare_claim(const struct hlist_node *node,
{
const void *data1 = container_of(node, struct batadv_claim,
hash_entry);
+ const struct batadv_claim *cl1 = data1, *cl2 = data2;
- return (memcmp(data1, data2, ETH_ALEN + sizeof(short)) == 0 ? 1 : 0);
+ if (!batadv_compare_eth(cl1->addr, cl2->addr))
+ return 0;
+
+ if (cl1->vid != cl2->vid)
+ return 0;
+
+ return 1;
}
/* free a backbone gw */
@@ -362,7 +368,7 @@ out:
*/
static struct batadv_backbone_gw *
batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
- short vid)
+ short vid, bool own_backbone)
{
struct batadv_backbone_gw *entry;
struct batadv_orig_node *orig_node;
@@ -386,6 +392,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
entry->crc = BATADV_BLA_CRC_INIT;
entry->bat_priv = bat_priv;
atomic_set(&entry->request_sent, 0);
+ atomic_set(&entry->wait_periods, 0);
memcpy(entry->orig, orig, ETH_ALEN);
/* one for the hash, one for returning */
@@ -409,6 +416,16 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
"became a backbone gateway");
batadv_orig_node_free_ref(orig_node);
}
+
+ if (own_backbone) {
+ batadv_bla_send_announce(bat_priv, entry);
+
+ /* this will be decreased in the worker thread */
+ atomic_inc(&entry->request_sent);
+ atomic_set(&entry->wait_periods, BATADV_BLA_WAIT_PERIODS);
+ atomic_inc(&bat_priv->bla.num_requests);
+ }
+
return entry;
}
@@ -424,7 +441,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
backbone_gw = batadv_bla_get_backbone_gw(bat_priv,
primary_if->net_dev->dev_addr,
- vid);
+ vid, true);
if (unlikely(!backbone_gw))
return;
@@ -632,7 +649,8 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
if (memcmp(an_addr, batadv_announce_mac, 4) != 0)
return 0;
- backbone_gw = batadv_bla_get_backbone_gw(bat_priv, backbone_addr, vid);
+ backbone_gw = batadv_bla_get_backbone_gw(bat_priv, backbone_addr, vid,
+ false);
if (unlikely(!backbone_gw))
return 1;
@@ -730,7 +748,8 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
/* register the gateway if not yet available, and add the claim. */
- backbone_gw = batadv_bla_get_backbone_gw(bat_priv, backbone_addr, vid);
+ backbone_gw = batadv_bla_get_backbone_gw(bat_priv, backbone_addr, vid,
+ false);
if (unlikely(!backbone_gw))
return 1;
@@ -1140,6 +1159,24 @@ static void batadv_bla_periodic_work(struct work_struct *work)
backbone_gw->lasttime = jiffies;
batadv_bla_send_announce(bat_priv, backbone_gw);
+
+ /* request_sent is only set after creation to avoid
+ * problems when we are not yet known as backbone gw
+ * in the backbone.
+ *
+ * We can reset this now after we waited some periods
+ * to give bridge forward delays and bla group forming
+ * some grace time.
+ */
+
+ if (atomic_read(&backbone_gw->request_sent) == 0)
+ continue;
+
+ if (!atomic_dec_and_test(&backbone_gw->wait_periods))
+ continue;
+
+ atomic_dec(&backbone_gw->bat_priv->bla.num_requests);
+ atomic_set(&backbone_gw->request_sent, 0);
}
rcu_read_unlock();
}
@@ -1167,6 +1204,8 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
uint16_t crc;
unsigned long entrytime;
+ spin_lock_init(&bat_priv->bla.bcast_duplist_lock);
+
batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla hash registering\n");
/* setting claim destination address */
@@ -1210,8 +1249,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
/**
* batadv_bla_check_bcast_duplist
* @bat_priv: the bat priv with all the soft interface information
- * @bcast_packet: originator mac address
- * @hdr_size: maximum length of the frame
+ * @skb: contains the bcast_packet to be checked
*
* check if it is on our broadcast list. Another gateway might
* have sent the same packet because it is connected to the same backbone,
@@ -1223,20 +1261,19 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
* the same host however as this might be intended.
*/
int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
- struct batadv_bcast_packet *bcast_packet,
- int hdr_size)
+ struct sk_buff *skb)
{
- int i, length, curr;
- uint8_t *content;
- uint16_t crc;
+ int i, curr, ret = 0;
+ __be32 crc;
+ struct batadv_bcast_packet *bcast_packet;
struct batadv_bcast_duplist_entry *entry;
- length = hdr_size - sizeof(*bcast_packet);
- content = (uint8_t *)bcast_packet;
- content += sizeof(*bcast_packet);
+ bcast_packet = (struct batadv_bcast_packet *)skb->data;
/* calculate the crc ... */
- crc = crc16(0, content, length);
+ crc = batadv_skb_crc32(skb, (u8 *)(bcast_packet + 1));
+
+ spin_lock_bh(&bat_priv->bla.bcast_duplist_lock);
for (i = 0; i < BATADV_DUPLIST_SIZE; i++) {
curr = (bat_priv->bla.bcast_duplist_curr + i);
@@ -1259,9 +1296,12 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
/* this entry seems to match: same crc, not too old,
* and from another gw. therefore return 1 to forbid it.
*/
- return 1;
+ ret = 1;
+ goto out;
}
- /* not found, add a new entry (overwrite the oldest entry) */
+ /* not found, add a new entry (overwrite the oldest entry)
+ * and allow it, its the first occurence.
+ */
curr = (bat_priv->bla.bcast_duplist_curr + BATADV_DUPLIST_SIZE - 1);
curr %= BATADV_DUPLIST_SIZE;
entry = &bat_priv->bla.bcast_duplist[curr];
@@ -1270,8 +1310,10 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
memcpy(entry->orig, bcast_packet->orig, ETH_ALEN);
bat_priv->bla.bcast_duplist_curr = curr;
- /* allow it, its the first occurence. */
- return 0;
+out:
+ spin_unlock_bh(&bat_priv->bla.bcast_duplist_lock);
+
+ return ret;
}
@@ -1576,23 +1618,11 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
struct hlist_head *head;
uint32_t i;
bool is_own;
- int ret = 0;
uint8_t *primary_addr;
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
- net_dev->name);
- goto out;
- }
-
- if (primary_if->if_status != BATADV_IF_ACTIVE) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - primary interface not active\n",
- net_dev->name);
+ primary_if = batadv_seq_print_text_primary_if_get(seq);
+ if (!primary_if)
goto out;
- }
primary_addr = primary_if->net_dev->dev_addr;
seq_printf(seq,
@@ -1619,7 +1649,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
- return ret;
+ return 0;
}
int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
@@ -1634,23 +1664,11 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
int secs, msecs;
uint32_t i;
bool is_own;
- int ret = 0;
uint8_t *primary_addr;
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
- net_dev->name);
- goto out;
- }
-
- if (primary_if->if_status != BATADV_IF_ACTIVE) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - primary interface not active\n",
- net_dev->name);
+ primary_if = batadv_seq_print_text_primary_if_get(seq);
+ if (!primary_if)
goto out;
- }
primary_addr = primary_if->net_dev->dev_addr;
seq_printf(seq,
@@ -1684,5 +1702,5 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
- return ret;
+ return 0;
}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 789cb73bde67..196d9a0254bc 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -31,8 +31,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
void *offset);
int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig);
int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
- struct batadv_bcast_packet *bcast_packet,
- int hdr_size);
+ struct sk_buff *skb);
void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
struct batadv_hard_iface *oldif);
@@ -81,8 +80,7 @@ static inline int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv,
static inline int
batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
- struct batadv_bcast_packet *bcast_packet,
- int hdr_size)
+ struct sk_buff *skb)
{
return 0;
}
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 391d4fb2026f..6f58ddd53bff 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -31,6 +31,7 @@
#include "vis.h"
#include "icmp_socket.h"
#include "bridge_loop_avoidance.h"
+#include "distributed-arp-table.h"
static struct dentry *batadv_debugfs;
@@ -99,15 +100,17 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
static int batadv_log_open(struct inode *inode, struct file *file)
{
+ if (!try_module_get(THIS_MODULE))
+ return -EBUSY;
+
nonseekable_open(inode, file);
file->private_data = inode->i_private;
- batadv_inc_module_count();
return 0;
}
static int batadv_log_release(struct inode *inode, struct file *file)
{
- batadv_dec_module_count();
+ module_put(THIS_MODULE);
return 0;
}
@@ -278,6 +281,19 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
#endif
+#ifdef CONFIG_BATMAN_ADV_DAT
+/**
+ * batadv_dat_cache_open - Prepare file handler for reads from dat_chache
+ * @inode: inode which was opened
+ * @file: file handle to be initialized
+ */
+static int batadv_dat_cache_open(struct inode *inode, struct file *file)
+{
+ struct net_device *net_dev = (struct net_device *)inode->i_private;
+ return single_open(file, batadv_dat_cache_seq_print_text, net_dev);
+}
+#endif
+
static int batadv_transtable_local_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
@@ -307,7 +323,17 @@ struct batadv_debuginfo batadv_debuginfo_##_name = { \
} \
};
+/* the following attributes are general and therefore they will be directly
+ * placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs
+ */
static BATADV_DEBUGINFO(routing_algos, S_IRUGO, batadv_algorithms_open);
+
+static struct batadv_debuginfo *batadv_general_debuginfos[] = {
+ &batadv_debuginfo_routing_algos,
+ NULL,
+};
+
+/* The following attributes are per soft interface */
static BATADV_DEBUGINFO(originators, S_IRUGO, batadv_originators_open);
static BATADV_DEBUGINFO(gateways, S_IRUGO, batadv_gateways_open);
static BATADV_DEBUGINFO(transtable_global, S_IRUGO,
@@ -317,6 +343,9 @@ static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open);
static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO,
batadv_bla_backbone_table_open);
#endif
+#ifdef CONFIG_BATMAN_ADV_DAT
+static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open);
+#endif
static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
batadv_transtable_local_open);
static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open);
@@ -329,6 +358,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
&batadv_debuginfo_bla_claim_table,
&batadv_debuginfo_bla_backbone_table,
#endif
+#ifdef CONFIG_BATMAN_ADV_DAT
+ &batadv_debuginfo_dat_cache,
+#endif
&batadv_debuginfo_transtable_local,
&batadv_debuginfo_vis_data,
NULL,
@@ -336,7 +368,7 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
void batadv_debugfs_init(void)
{
- struct batadv_debuginfo *bat_debug;
+ struct batadv_debuginfo **bat_debug;
struct dentry *file;
batadv_debugfs = debugfs_create_dir(BATADV_DEBUGFS_SUBDIR, NULL);
@@ -344,17 +376,23 @@ void batadv_debugfs_init(void)
batadv_debugfs = NULL;
if (!batadv_debugfs)
- goto out;
+ goto err;
- bat_debug = &batadv_debuginfo_routing_algos;
- file = debugfs_create_file(bat_debug->attr.name,
- S_IFREG | bat_debug->attr.mode,
- batadv_debugfs, NULL, &bat_debug->fops);
- if (!file)
- pr_err("Can't add debugfs file: %s\n", bat_debug->attr.name);
+ for (bat_debug = batadv_general_debuginfos; *bat_debug; ++bat_debug) {
+ file = debugfs_create_file(((*bat_debug)->attr).name,
+ S_IFREG | ((*bat_debug)->attr).mode,
+ batadv_debugfs, NULL,
+ &(*bat_debug)->fops);
+ if (!file) {
+ pr_err("Can't add general debugfs file: %s\n",
+ ((*bat_debug)->attr).name);
+ goto err;
+ }
+ }
-out:
return;
+err:
+ debugfs_remove_recursive(batadv_debugfs);
}
void batadv_debugfs_destroy(void)
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
new file mode 100644
index 000000000000..8e1d89d2b1c1
--- /dev/null
+++ b/net/batman-adv/distributed-arp-table.c
@@ -0,0 +1,1066 @@
+/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+ *
+ * Antonio Quartulli
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <net/arp.h>
+
+#include "main.h"
+#include "hash.h"
+#include "distributed-arp-table.h"
+#include "hard-interface.h"
+#include "originator.h"
+#include "send.h"
+#include "types.h"
+#include "translation-table.h"
+#include "unicast.h"
+
+static void batadv_dat_purge(struct work_struct *work);
+
+/**
+ * batadv_dat_start_timer - initialise the DAT periodic worker
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
+{
+ INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge);
+ queue_delayed_work(batadv_event_workqueue, &bat_priv->dat.work,
+ msecs_to_jiffies(10000));
+}
+
+/**
+ * batadv_dat_entry_free_ref - decrements the dat_entry refcounter and possibly
+ * free it
+ * @dat_entry: the oentry to free
+ */
+static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry)
+{
+ if (atomic_dec_and_test(&dat_entry->refcount))
+ kfree_rcu(dat_entry, rcu);
+}
+
+/**
+ * batadv_dat_to_purge - checks whether a dat_entry has to be purged or not
+ * @dat_entry: the entry to check
+ *
+ * Returns true if the entry has to be purged now, false otherwise
+ */
+static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry)
+{
+ return batadv_has_timed_out(dat_entry->last_update,
+ BATADV_DAT_ENTRY_TIMEOUT);
+}
+
+/**
+ * __batadv_dat_purge - delete entries from the DAT local storage
+ * @bat_priv: the bat priv with all the soft interface information
+ * @to_purge: function in charge to decide whether an entry has to be purged or
+ * not. This function takes the dat_entry as argument and has to
+ * returns a boolean value: true is the entry has to be deleted,
+ * false otherwise
+ *
+ * Loops over each entry in the DAT local storage and delete it if and only if
+ * the to_purge function passed as argument returns true
+ */
+static void __batadv_dat_purge(struct batadv_priv *bat_priv,
+ bool (*to_purge)(struct batadv_dat_entry *))
+{
+ spinlock_t *list_lock; /* protects write access to the hash lists */
+ struct batadv_dat_entry *dat_entry;
+ struct hlist_node *node, *node_tmp;
+ struct hlist_head *head;
+ uint32_t i;
+
+ if (!bat_priv->dat.hash)
+ return;
+
+ for (i = 0; i < bat_priv->dat.hash->size; i++) {
+ head = &bat_priv->dat.hash->table[i];
+ list_lock = &bat_priv->dat.hash->list_locks[i];
+
+ spin_lock_bh(list_lock);
+ hlist_for_each_entry_safe(dat_entry, node, node_tmp, head,
+ hash_entry) {
+ /* if an helper function has been passed as parameter,
+ * ask it if the entry has to be purged or not
+ */
+ if (to_purge && !to_purge(dat_entry))
+ continue;
+
+ hlist_del_rcu(node);
+ batadv_dat_entry_free_ref(dat_entry);
+ }
+ spin_unlock_bh(list_lock);
+ }
+}
+
+/**
+ * batadv_dat_purge - periodic task that deletes old entries from the local DAT
+ * hash table
+ * @work: kernel work struct
+ */
+static void batadv_dat_purge(struct work_struct *work)
+{
+ struct delayed_work *delayed_work;
+ struct batadv_priv_dat *priv_dat;
+ struct batadv_priv *bat_priv;
+
+ delayed_work = container_of(work, struct delayed_work, work);
+ priv_dat = container_of(delayed_work, struct batadv_priv_dat, work);
+ bat_priv = container_of(priv_dat, struct batadv_priv, dat);
+
+ __batadv_dat_purge(bat_priv, batadv_dat_to_purge);
+ batadv_dat_start_timer(bat_priv);
+}
+
+/**
+ * batadv_compare_dat - comparing function used in the local DAT hash table
+ * @node: node in the local table
+ * @data2: second object to compare the node to
+ *
+ * Returns 1 if the two entry are the same, 0 otherwise
+ */
+static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
+{
+ const void *data1 = container_of(node, struct batadv_dat_entry,
+ hash_entry);
+
+ return (memcmp(data1, data2, sizeof(__be32)) == 0 ? 1 : 0);
+}
+
+/**
+ * batadv_arp_hw_src - extract the hw_src field from an ARP packet
+ * @skb: ARP packet
+ * @hdr_size: size of the possible header before the ARP packet
+ *
+ * Returns the value of the hw_src field in the ARP packet
+ */
+static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
+{
+ uint8_t *addr;
+
+ addr = (uint8_t *)(skb->data + hdr_size);
+ addr += ETH_HLEN + sizeof(struct arphdr);
+
+ return addr;
+}
+
+/**
+ * batadv_arp_ip_src - extract the ip_src field from an ARP packet
+ * @skb: ARP packet
+ * @hdr_size: size of the possible header before the ARP packet
+ *
+ * Returns the value of the ip_src field in the ARP packet
+ */
+static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
+{
+ return *(__be32 *)(batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN);
+}
+
+/**
+ * batadv_arp_hw_dst - extract the hw_dst field from an ARP packet
+ * @skb: ARP packet
+ * @hdr_size: size of the possible header before the ARP packet
+ *
+ * Returns the value of the hw_dst field in the ARP packet
+ */
+static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
+{
+ return batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN + 4;
+}
+
+/**
+ * batadv_arp_ip_dst - extract the ip_dst field from an ARP packet
+ * @skb: ARP packet
+ * @hdr_size: size of the possible header before the ARP packet
+ *
+ * Returns the value of the ip_dst field in the ARP packet
+ */
+static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
+{
+ return *(__be32 *)(batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN * 2 + 4);
+}
+
+/**
+ * batadv_hash_dat - compute the hash value for an IP address
+ * @data: data to hash
+ * @size: size of the hash table
+ *
+ * Returns the selected index in the hash table for the given data
+ */
+static uint32_t batadv_hash_dat(const void *data, uint32_t size)
+{
+ const unsigned char *key = data;
+ uint32_t hash = 0;
+ size_t i;
+
+ for (i = 0; i < 4; i++) {
+ hash += key[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return hash % size;
+}
+
+/**
+ * batadv_dat_entry_hash_find - looks for a given dat_entry in the local hash
+ * table
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ip: search key
+ *
+ * Returns the dat_entry if found, NULL otherwise
+ */
+static struct batadv_dat_entry *
+batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct batadv_dat_entry *dat_entry, *dat_entry_tmp = NULL;
+ struct batadv_hashtable *hash = bat_priv->dat.hash;
+ uint32_t index;
+
+ if (!hash)
+ return NULL;
+
+ index = batadv_hash_dat(&ip, hash->size);
+ head = &hash->table[index];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) {
+ if (dat_entry->ip != ip)
+ continue;
+
+ if (!atomic_inc_not_zero(&dat_entry->refcount))
+ continue;
+
+ dat_entry_tmp = dat_entry;
+ break;
+ }
+ rcu_read_unlock();
+
+ return dat_entry_tmp;
+}
+
+/**
+ * batadv_dat_entry_add - add a new dat entry or update it if already exists
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ip: ipv4 to add/edit
+ * @mac_addr: mac address to assign to the given ipv4
+ */
+static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
+ uint8_t *mac_addr)
+{
+ struct batadv_dat_entry *dat_entry;
+ int hash_added;
+
+ dat_entry = batadv_dat_entry_hash_find(bat_priv, ip);
+ /* if this entry is already known, just update it */
+ if (dat_entry) {
+ if (!batadv_compare_eth(dat_entry->mac_addr, mac_addr))
+ memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN);
+ dat_entry->last_update = jiffies;
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "Entry updated: %pI4 %pM\n", &dat_entry->ip,
+ dat_entry->mac_addr);
+ goto out;
+ }
+
+ dat_entry = kmalloc(sizeof(*dat_entry), GFP_ATOMIC);
+ if (!dat_entry)
+ goto out;
+
+ dat_entry->ip = ip;
+ memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN);
+ dat_entry->last_update = jiffies;
+ atomic_set(&dat_entry->refcount, 2);
+
+ hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat,
+ batadv_hash_dat, &dat_entry->ip,
+ &dat_entry->hash_entry);
+
+ if (unlikely(hash_added != 0)) {
+ /* remove the reference for the hash */
+ batadv_dat_entry_free_ref(dat_entry);
+ goto out;
+ }
+
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM\n",
+ &dat_entry->ip, dat_entry->mac_addr);
+
+out:
+ if (dat_entry)
+ batadv_dat_entry_free_ref(dat_entry);
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+
+/**
+ * batadv_dbg_arp - print a debug message containing all the ARP packet details
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: ARP packet
+ * @type: ARP type
+ * @hdr_size: size of the possible header before the ARP packet
+ * @msg: message to print together with the debugging information
+ */
+static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ uint16_t type, int hdr_size, char *msg)
+{
+ struct batadv_unicast_4addr_packet *unicast_4addr_packet;
+ struct batadv_bcast_packet *bcast_pkt;
+ uint8_t *orig_addr;
+ __be32 ip_src, ip_dst;
+
+ if (msg)
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "%s\n", msg);
+
+ ip_src = batadv_arp_ip_src(skb, hdr_size);
+ ip_dst = batadv_arp_ip_dst(skb, hdr_size);
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "ARP MSG = [src: %pM-%pI4 dst: %pM-%pI4]\n",
+ batadv_arp_hw_src(skb, hdr_size), &ip_src,
+ batadv_arp_hw_dst(skb, hdr_size), &ip_dst);
+
+ if (hdr_size == 0)
+ return;
+
+ /* if the ARP packet is encapsulated in a batman packet, let's print
+ * some debug messages
+ */
+ unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
+
+ switch (unicast_4addr_packet->u.header.packet_type) {
+ case BATADV_UNICAST:
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "* encapsulated within a UNICAST packet\n");
+ break;
+ case BATADV_UNICAST_4ADDR:
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "* encapsulated within a UNICAST_4ADDR packet (src: %pM)\n",
+ unicast_4addr_packet->src);
+ switch (unicast_4addr_packet->subtype) {
+ case BATADV_P_DAT_DHT_PUT:
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DAT_DHT_PUT\n");
+ break;
+ case BATADV_P_DAT_DHT_GET:
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DAT_DHT_GET\n");
+ break;
+ case BATADV_P_DAT_CACHE_REPLY:
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "* type: DAT_CACHE_REPLY\n");
+ break;
+ case BATADV_P_DATA:
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DATA\n");
+ break;
+ default:
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: Unknown (%u)!\n",
+ unicast_4addr_packet->u.header.packet_type);
+ }
+ break;
+ case BATADV_BCAST:
+ bcast_pkt = (struct batadv_bcast_packet *)unicast_4addr_packet;
+ orig_addr = bcast_pkt->orig;
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "* encapsulated within a BCAST packet (src: %pM)\n",
+ orig_addr);
+ break;
+ default:
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "* encapsulated within an unknown packet type (0x%x)\n",
+ unicast_4addr_packet->u.header.packet_type);
+ }
+}
+
+#else
+
+static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ uint16_t type, int hdr_size, char *msg)
+{
+}
+
+#endif /* CONFIG_BATMAN_ADV_DEBUG */
+
+/**
+ * batadv_is_orig_node_eligible - check whether a node can be a DHT candidate
+ * @res: the array with the already selected candidates
+ * @select: number of already selected candidates
+ * @tmp_max: address of the currently evaluated node
+ * @max: current round max address
+ * @last_max: address of the last selected candidate
+ * @candidate: orig_node under evaluation
+ * @max_orig_node: last selected candidate
+ *
+ * Returns true if the node has been elected as next candidate or false othrwise
+ */
+static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
+ int select, batadv_dat_addr_t tmp_max,
+ batadv_dat_addr_t max,
+ batadv_dat_addr_t last_max,
+ struct batadv_orig_node *candidate,
+ struct batadv_orig_node *max_orig_node)
+{
+ bool ret = false;
+ int j;
+
+ /* Check if this node has already been selected... */
+ for (j = 0; j < select; j++)
+ if (res[j].orig_node == candidate)
+ break;
+ /* ..and possibly skip it */
+ if (j < select)
+ goto out;
+ /* sanity check: has it already been selected? This should not happen */
+ if (tmp_max > last_max)
+ goto out;
+ /* check if during this iteration an originator with a closer dht
+ * address has already been found
+ */
+ if (tmp_max < max)
+ goto out;
+ /* this is an hash collision with the temporary selected node. Choose
+ * the one with the lowest address
+ */
+ if ((tmp_max == max) &&
+ (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0))
+ goto out;
+
+ ret = true;
+out:
+ return ret;
+}
+
+/**
+ * batadv_choose_next_candidate - select the next DHT candidate
+ * @bat_priv: the bat priv with all the soft interface information
+ * @cands: candidates array
+ * @select: number of candidates already present in the array
+ * @ip_key: key to look up in the DHT
+ * @last_max: pointer where the address of the selected candidate will be saved
+ */
+static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
+ struct batadv_dat_candidate *cands,
+ int select, batadv_dat_addr_t ip_key,
+ batadv_dat_addr_t *last_max)
+{
+ batadv_dat_addr_t max = 0, tmp_max = 0;
+ struct batadv_orig_node *orig_node, *max_orig_node = NULL;
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ struct hlist_node *node;
+ struct hlist_head *head;
+ int i;
+
+ /* if no node is eligible as candidate, leave the candidate type as
+ * NOT_FOUND
+ */
+ cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND;
+
+ /* iterate over the originator list and find the node with closest
+ * dat_address which has not been selected yet
+ */
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+ /* the dht space is a ring and addresses are unsigned */
+ tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr +
+ ip_key;
+
+ if (!batadv_is_orig_node_eligible(cands, select,
+ tmp_max, max,
+ *last_max, orig_node,
+ max_orig_node))
+ continue;
+
+ if (!atomic_inc_not_zero(&orig_node->refcount))
+ continue;
+
+ max = tmp_max;
+ if (max_orig_node)
+ batadv_orig_node_free_ref(max_orig_node);
+ max_orig_node = orig_node;
+ }
+ rcu_read_unlock();
+ }
+ if (max_orig_node) {
+ cands[select].type = BATADV_DAT_CANDIDATE_ORIG;
+ cands[select].orig_node = max_orig_node;
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "dat_select_candidates() %d: selected %pM addr=%u dist=%u\n",
+ select, max_orig_node->orig, max_orig_node->dat_addr,
+ max);
+ }
+ *last_max = max;
+}
+
+/**
+ * batadv_dat_select_candidates - selects the nodes which the DHT message has to
+ * be sent to
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ip_dst: ipv4 to look up in the DHT
+ *
+ * An originator O is selected if and only if its DHT_ID value is one of three
+ * closest values (from the LEFT, with wrap around if needed) then the hash
+ * value of the key. ip_dst is the key.
+ *
+ * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM
+ */
+static struct batadv_dat_candidate *
+batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
+{
+ int select;
+ batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key;
+ struct batadv_dat_candidate *res;
+
+ if (!bat_priv->orig_hash)
+ return NULL;
+
+ res = kmalloc(BATADV_DAT_CANDIDATES_NUM * sizeof(*res), GFP_ATOMIC);
+ if (!res)
+ return NULL;
+
+ ip_key = (batadv_dat_addr_t)batadv_hash_dat(&ip_dst,
+ BATADV_DAT_ADDR_MAX);
+
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "dat_select_candidates(): IP=%pI4 hash(IP)=%u\n", &ip_dst,
+ ip_key);
+
+ for (select = 0; select < BATADV_DAT_CANDIDATES_NUM; select++)
+ batadv_choose_next_candidate(bat_priv, res, select, ip_key,
+ &last_max);
+
+ return res;
+}
+
+/**
+ * batadv_dat_send_data - send a payload to the selected candidates
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: payload to send
+ * @ip: the DHT key
+ * @packet_subtype: unicast4addr packet subtype to use
+ *
+ * In this function the skb is copied by means of pskb_copy() and is sent as
+ * unicast packet to each of the selected candidates
+ *
+ * Returns true if the packet is sent to at least one candidate, false otherwise
+ */
+static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, __be32 ip,
+ int packet_subtype)
+{
+ int i;
+ bool ret = false;
+ int send_status;
+ struct batadv_neigh_node *neigh_node = NULL;
+ struct sk_buff *tmp_skb;
+ struct batadv_dat_candidate *cand;
+
+ cand = batadv_dat_select_candidates(bat_priv, ip);
+ if (!cand)
+ goto out;
+
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "DHT_SEND for %pI4\n", &ip);
+
+ for (i = 0; i < BATADV_DAT_CANDIDATES_NUM; i++) {
+ if (cand[i].type == BATADV_DAT_CANDIDATE_NOT_FOUND)
+ continue;
+
+ neigh_node = batadv_orig_node_get_router(cand[i].orig_node);
+ if (!neigh_node)
+ goto free_orig;
+
+ tmp_skb = pskb_copy(skb, GFP_ATOMIC);
+ if (!batadv_unicast_4addr_prepare_skb(bat_priv, tmp_skb,
+ cand[i].orig_node,
+ packet_subtype)) {
+ kfree_skb(tmp_skb);
+ goto free_neigh;
+ }
+
+ send_status = batadv_send_skb_packet(tmp_skb,
+ neigh_node->if_incoming,
+ neigh_node->addr);
+ if (send_status == NET_XMIT_SUCCESS) {
+ /* count the sent packet */
+ switch (packet_subtype) {
+ case BATADV_P_DAT_DHT_GET:
+ batadv_inc_counter(bat_priv,
+ BATADV_CNT_DAT_GET_TX);
+ break;
+ case BATADV_P_DAT_DHT_PUT:
+ batadv_inc_counter(bat_priv,
+ BATADV_CNT_DAT_PUT_TX);
+ break;
+ }
+
+ /* packet sent to a candidate: return true */
+ ret = true;
+ }
+free_neigh:
+ batadv_neigh_node_free_ref(neigh_node);
+free_orig:
+ batadv_orig_node_free_ref(cand[i].orig_node);
+ }
+
+out:
+ kfree(cand);
+ return ret;
+}
+
+/**
+ * batadv_dat_hash_free - free the local DAT hash table
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_dat_hash_free(struct batadv_priv *bat_priv)
+{
+ if (!bat_priv->dat.hash)
+ return;
+
+ __batadv_dat_purge(bat_priv, NULL);
+
+ batadv_hash_destroy(bat_priv->dat.hash);
+
+ bat_priv->dat.hash = NULL;
+}
+
+/**
+ * batadv_dat_init - initialise the DAT internals
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+int batadv_dat_init(struct batadv_priv *bat_priv)
+{
+ if (bat_priv->dat.hash)
+ return 0;
+
+ bat_priv->dat.hash = batadv_hash_new(1024);
+
+ if (!bat_priv->dat.hash)
+ return -ENOMEM;
+
+ batadv_dat_start_timer(bat_priv);
+
+ return 0;
+}
+
+/**
+ * batadv_dat_free - free the DAT internals
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_dat_free(struct batadv_priv *bat_priv)
+{
+ cancel_delayed_work_sync(&bat_priv->dat.work);
+
+ batadv_dat_hash_free(bat_priv);
+}
+
+/**
+ * batadv_dat_cache_seq_print_text - print the local DAT hash table
+ * @seq: seq file to print on
+ * @offset: not used
+ */
+int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
+{
+ struct net_device *net_dev = (struct net_device *)seq->private;
+ struct batadv_priv *bat_priv = netdev_priv(net_dev);
+ struct batadv_hashtable *hash = bat_priv->dat.hash;
+ struct batadv_dat_entry *dat_entry;
+ struct batadv_hard_iface *primary_if;
+ struct hlist_node *node;
+ struct hlist_head *head;
+ unsigned long last_seen_jiffies;
+ int last_seen_msecs, last_seen_secs, last_seen_mins;
+ uint32_t i;
+
+ primary_if = batadv_seq_print_text_primary_if_get(seq);
+ if (!primary_if)
+ goto out;
+
+ seq_printf(seq, "Distributed ARP Table (%s):\n", net_dev->name);
+ seq_printf(seq, " %-7s %-13s %5s\n", "IPv4", "MAC",
+ "last-seen");
+
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) {
+ last_seen_jiffies = jiffies - dat_entry->last_update;
+ last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
+ last_seen_mins = last_seen_msecs / 60000;
+ last_seen_msecs = last_seen_msecs % 60000;
+ last_seen_secs = last_seen_msecs / 1000;
+
+ seq_printf(seq, " * %15pI4 %14pM %6i:%02i\n",
+ &dat_entry->ip, dat_entry->mac_addr,
+ last_seen_mins, last_seen_secs);
+ }
+ rcu_read_unlock();
+ }
+
+out:
+ if (primary_if)
+ batadv_hardif_free_ref(primary_if);
+ return 0;
+}
+
+/**
+ * batadv_arp_get_type - parse an ARP packet and gets the type
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: packet to analyse
+ * @hdr_size: size of the possible header before the ARP packet in the skb
+ *
+ * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise
+ */
+static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int hdr_size)
+{
+ struct arphdr *arphdr;
+ struct ethhdr *ethhdr;
+ __be32 ip_src, ip_dst;
+ uint16_t type = 0;
+
+ /* pull the ethernet header */
+ if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN)))
+ goto out;
+
+ ethhdr = (struct ethhdr *)(skb->data + hdr_size);
+
+ if (ethhdr->h_proto != htons(ETH_P_ARP))
+ goto out;
+
+ /* pull the ARP payload */
+ if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN +
+ arp_hdr_len(skb->dev))))
+ goto out;
+
+ arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN);
+
+ /* Check whether the ARP packet carries a valid
+ * IP information
+ */
+ if (arphdr->ar_hrd != htons(ARPHRD_ETHER))
+ goto out;
+
+ if (arphdr->ar_pro != htons(ETH_P_IP))
+ goto out;
+
+ if (arphdr->ar_hln != ETH_ALEN)
+ goto out;
+
+ if (arphdr->ar_pln != 4)
+ goto out;
+
+ /* Check for bad reply/request. If the ARP message is not sane, DAT
+ * will simply ignore it
+ */
+ ip_src = batadv_arp_ip_src(skb, hdr_size);
+ ip_dst = batadv_arp_ip_dst(skb, hdr_size);
+ if (ipv4_is_loopback(ip_src) || ipv4_is_multicast(ip_src) ||
+ ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst))
+ goto out;
+
+ type = ntohs(arphdr->ar_op);
+out:
+ return type;
+}
+
+/**
+ * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to
+ * answer using DAT
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: packet to check
+ *
+ * Returns true if the message has been sent to the dht candidates, false
+ * otherwise. In case of true the message has to be enqueued to permit the
+ * fallback
+ */
+bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ uint16_t type = 0;
+ __be32 ip_dst, ip_src;
+ uint8_t *hw_src;
+ bool ret = false;
+ struct batadv_dat_entry *dat_entry = NULL;
+ struct sk_buff *skb_new;
+ struct batadv_hard_iface *primary_if = NULL;
+
+ if (!atomic_read(&bat_priv->distributed_arp_table))
+ goto out;
+
+ type = batadv_arp_get_type(bat_priv, skb, 0);
+ /* If the node gets an ARP_REQUEST it has to send a DHT_GET unicast
+ * message to the selected DHT candidates
+ */
+ if (type != ARPOP_REQUEST)
+ goto out;
+
+ batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REQUEST");
+
+ ip_src = batadv_arp_ip_src(skb, 0);
+ hw_src = batadv_arp_hw_src(skb, 0);
+ ip_dst = batadv_arp_ip_dst(skb, 0);
+
+ batadv_dat_entry_add(bat_priv, ip_src, hw_src);
+
+ dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
+ if (dat_entry) {
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ goto out;
+
+ skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
+ primary_if->soft_iface, ip_dst, hw_src,
+ dat_entry->mac_addr, hw_src);
+ if (!skb_new)
+ goto out;
+
+ skb_reset_mac_header(skb_new);
+ skb_new->protocol = eth_type_trans(skb_new,
+ primary_if->soft_iface);
+ bat_priv->stats.rx_packets++;
+ bat_priv->stats.rx_bytes += skb->len + ETH_HLEN;
+ primary_if->soft_iface->last_rx = jiffies;
+
+ netif_rx(skb_new);
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
+ ret = true;
+ } else {
+ /* Send the request on the DHT */
+ ret = batadv_dat_send_data(bat_priv, skb, ip_dst,
+ BATADV_P_DAT_DHT_GET);
+ }
+out:
+ if (dat_entry)
+ batadv_dat_entry_free_ref(dat_entry);
+ if (primary_if)
+ batadv_hardif_free_ref(primary_if);
+ return ret;
+}
+
+/**
+ * batadv_dat_snoop_incoming_arp_request - snoop the ARP request and try to
+ * answer using the local DAT storage
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: packet to check
+ * @hdr_size: size of the encapsulation header
+ *
+ * Returns true if the request has been answered, false otherwise
+ */
+bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int hdr_size)
+{
+ uint16_t type;
+ __be32 ip_src, ip_dst;
+ uint8_t *hw_src;
+ struct sk_buff *skb_new;
+ struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_dat_entry *dat_entry = NULL;
+ bool ret = false;
+ int err;
+
+ if (!atomic_read(&bat_priv->distributed_arp_table))
+ goto out;
+
+ type = batadv_arp_get_type(bat_priv, skb, hdr_size);
+ if (type != ARPOP_REQUEST)
+ goto out;
+
+ hw_src = batadv_arp_hw_src(skb, hdr_size);
+ ip_src = batadv_arp_ip_src(skb, hdr_size);
+ ip_dst = batadv_arp_ip_dst(skb, hdr_size);
+
+ batadv_dbg_arp(bat_priv, skb, type, hdr_size,
+ "Parsing incoming ARP REQUEST");
+
+ batadv_dat_entry_add(bat_priv, ip_src, hw_src);
+
+ dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
+ if (!dat_entry)
+ goto out;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ goto out;
+
+ skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
+ primary_if->soft_iface, ip_dst, hw_src,
+ dat_entry->mac_addr, hw_src);
+
+ if (!skb_new)
+ goto out;
+
+ /* to preserve backwards compatibility, here the node has to answer
+ * using the same packet type it received for the request. This is due
+ * to that if a node is not using the 4addr packet format it may not
+ * support it.
+ */
+ if (hdr_size == sizeof(struct batadv_unicast_4addr_packet))
+ err = batadv_unicast_4addr_send_skb(bat_priv, skb_new,
+ BATADV_P_DAT_CACHE_REPLY);
+ else
+ err = batadv_unicast_send_skb(bat_priv, skb_new);
+
+ if (!err) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX);
+ ret = true;
+ }
+out:
+ if (dat_entry)
+ batadv_dat_entry_free_ref(dat_entry);
+ if (primary_if)
+ batadv_hardif_free_ref(primary_if);
+ if (ret)
+ kfree_skb(skb);
+ return ret;
+}
+
+/**
+ * batadv_dat_snoop_outgoing_arp_reply - snoop the ARP reply and fill the DHT
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: packet to check
+ */
+void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ uint16_t type;
+ __be32 ip_src, ip_dst;
+ uint8_t *hw_src, *hw_dst;
+
+ if (!atomic_read(&bat_priv->distributed_arp_table))
+ return;
+
+ type = batadv_arp_get_type(bat_priv, skb, 0);
+ if (type != ARPOP_REPLY)
+ return;
+
+ batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REPLY");
+
+ hw_src = batadv_arp_hw_src(skb, 0);
+ ip_src = batadv_arp_ip_src(skb, 0);
+ hw_dst = batadv_arp_hw_dst(skb, 0);
+ ip_dst = batadv_arp_ip_dst(skb, 0);
+
+ batadv_dat_entry_add(bat_priv, ip_src, hw_src);
+ batadv_dat_entry_add(bat_priv, ip_dst, hw_dst);
+
+ /* Send the ARP reply to the candidates for both the IP addresses that
+ * the node got within the ARP reply
+ */
+ batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT);
+ batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT);
+}
+/**
+ * batadv_dat_snoop_incoming_arp_reply - snoop the ARP reply and fill the local
+ * DAT storage only
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: packet to check
+ * @hdr_size: siaze of the encapsulation header
+ */
+bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int hdr_size)
+{
+ uint16_t type;
+ __be32 ip_src, ip_dst;
+ uint8_t *hw_src, *hw_dst;
+ bool ret = false;
+
+ if (!atomic_read(&bat_priv->distributed_arp_table))
+ goto out;
+
+ type = batadv_arp_get_type(bat_priv, skb, hdr_size);
+ if (type != ARPOP_REPLY)
+ goto out;
+
+ batadv_dbg_arp(bat_priv, skb, type, hdr_size,
+ "Parsing incoming ARP REPLY");
+
+ hw_src = batadv_arp_hw_src(skb, hdr_size);
+ ip_src = batadv_arp_ip_src(skb, hdr_size);
+ hw_dst = batadv_arp_hw_dst(skb, hdr_size);
+ ip_dst = batadv_arp_ip_dst(skb, hdr_size);
+
+ /* Update our internal cache with both the IP addresses the node got
+ * within the ARP reply
+ */
+ batadv_dat_entry_add(bat_priv, ip_src, hw_src);
+ batadv_dat_entry_add(bat_priv, ip_dst, hw_dst);
+
+ /* if this REPLY is directed to a client of mine, let's deliver the
+ * packet to the interface
+ */
+ ret = !batadv_is_my_client(bat_priv, hw_dst);
+out:
+ /* if ret == false -> packet has to be delivered to the interface */
+ return ret;
+}
+
+/**
+ * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped
+ * (because the node has already got the reply via DAT) or not
+ * @bat_priv: the bat priv with all the soft interface information
+ * @forw_packet: the broadcast packet
+ *
+ * Returns true if the node can drop the packet, false otherwise
+ */
+bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
+ struct batadv_forw_packet *forw_packet)
+{
+ uint16_t type;
+ __be32 ip_dst;
+ struct batadv_dat_entry *dat_entry = NULL;
+ bool ret = false;
+ const size_t bcast_len = sizeof(struct batadv_bcast_packet);
+
+ if (!atomic_read(&bat_priv->distributed_arp_table))
+ goto out;
+
+ /* If this packet is an ARP_REQUEST and the node already has the
+ * information that it is going to ask, then the packet can be dropped
+ */
+ if (forw_packet->num_packets)
+ goto out;
+
+ type = batadv_arp_get_type(bat_priv, forw_packet->skb, bcast_len);
+ if (type != ARPOP_REQUEST)
+ goto out;
+
+ ip_dst = batadv_arp_ip_dst(forw_packet->skb, bcast_len);
+ dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
+ /* check if the node already got this entry */
+ if (!dat_entry) {
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "ARP Request for %pI4: fallback\n", &ip_dst);
+ goto out;
+ }
+
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "ARP Request for %pI4: fallback prevented\n", &ip_dst);
+ ret = true;
+
+out:
+ if (dat_entry)
+ batadv_dat_entry_free_ref(dat_entry);
+ return ret;
+}
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
new file mode 100644
index 000000000000..d060c033e7de
--- /dev/null
+++ b/net/batman-adv/distributed-arp-table.h
@@ -0,0 +1,167 @@
+/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
+ *
+ * Antonio Quartulli
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef _NET_BATMAN_ADV_ARP_H_
+#define _NET_BATMAN_ADV_ARP_H_
+
+#ifdef CONFIG_BATMAN_ADV_DAT
+
+#include "types.h"
+#include "originator.h"
+
+#include <linux/if_arp.h>
+
+#define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0)
+
+bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
+bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int hdr_size);
+void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
+bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int hdr_size);
+bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
+ struct batadv_forw_packet *forw_packet);
+
+/**
+ * batadv_dat_init_orig_node_addr - assign a DAT address to the orig_node
+ * @orig_node: the node to assign the DAT address to
+ */
+static inline void
+batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node)
+{
+ uint32_t addr;
+
+ addr = batadv_choose_orig(orig_node->orig, BATADV_DAT_ADDR_MAX);
+ orig_node->dat_addr = (batadv_dat_addr_t)addr;
+}
+
+/**
+ * batadv_dat_init_own_addr - assign a DAT address to the node itself
+ * @bat_priv: the bat priv with all the soft interface information
+ * @primary_if: a pointer to the primary interface
+ */
+static inline void
+batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *primary_if)
+{
+ uint32_t addr;
+
+ addr = batadv_choose_orig(primary_if->net_dev->dev_addr,
+ BATADV_DAT_ADDR_MAX);
+
+ bat_priv->dat.addr = (batadv_dat_addr_t)addr;
+}
+
+int batadv_dat_init(struct batadv_priv *bat_priv);
+void batadv_dat_free(struct batadv_priv *bat_priv);
+int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
+
+/**
+ * batadv_dat_inc_counter - increment the correct DAT packet counter
+ * @bat_priv: the bat priv with all the soft interface information
+ * @subtype: the 4addr subtype of the packet to be counted
+ *
+ * Updates the ethtool statistics for the received packet if it is a DAT subtype
+ */
+static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
+ uint8_t subtype)
+{
+ switch (subtype) {
+ case BATADV_P_DAT_DHT_GET:
+ batadv_inc_counter(bat_priv,
+ BATADV_CNT_DAT_GET_RX);
+ break;
+ case BATADV_P_DAT_DHT_PUT:
+ batadv_inc_counter(bat_priv,
+ BATADV_CNT_DAT_PUT_RX);
+ break;
+ }
+}
+
+#else
+
+static inline bool
+batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return false;
+}
+
+static inline bool
+batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int hdr_size)
+{
+ return false;
+}
+
+static inline bool
+batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return false;
+}
+
+static inline bool
+batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int hdr_size)
+{
+ return false;
+}
+
+static inline bool
+batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
+ struct batadv_forw_packet *forw_packet)
+{
+ return false;
+}
+
+static inline void
+batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node)
+{
+}
+
+static inline void batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *iface)
+{
+}
+
+static inline void batadv_arp_change_timeout(struct net_device *soft_iface,
+ const char *name)
+{
+}
+
+static inline int batadv_dat_init(struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline void batadv_dat_free(struct batadv_priv *bat_priv)
+{
+}
+
+static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
+ uint8_t subtype)
+{
+}
+
+#endif /* CONFIG_BATMAN_ADV_DAT */
+
+#endif /* _NET_BATMAN_ADV_ARP_H_ */
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 15d67abc10a4..dd07c7e3654f 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -477,22 +477,11 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_hard_iface *primary_if;
struct batadv_gw_node *gw_node;
struct hlist_node *node;
- int gw_count = 0, ret = 0;
+ int gw_count = 0;
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
- net_dev->name);
+ primary_if = batadv_seq_print_text_primary_if_get(seq);
+ if (!primary_if)
goto out;
- }
-
- if (primary_if->if_status != BATADV_IF_ACTIVE) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - primary interface not active\n",
- net_dev->name);
- goto out;
- }
seq_printf(seq,
" %-12s (%s/%i) %17s [%10s]: gw_class ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
@@ -519,7 +508,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
- return ret;
+ return 0;
}
static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index d112fd6750b0..365ed74f3946 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -18,6 +18,7 @@
*/
#include "main.h"
+#include "distributed-arp-table.h"
#include "hard-interface.h"
#include "soft-interface.h"
#include "send.h"
@@ -58,6 +59,45 @@ out:
return hard_iface;
}
+/**
+ * batadv_is_on_batman_iface - check if a device is a batman iface descendant
+ * @net_dev: the device to check
+ *
+ * If the user creates any virtual device on top of a batman-adv interface, it
+ * is important to prevent this new interface to be used to create a new mesh
+ * network (this behaviour would lead to a batman-over-batman configuration).
+ * This function recursively checks all the fathers of the device passed as
+ * argument looking for a batman-adv soft interface.
+ *
+ * Returns true if the device is descendant of a batman-adv mesh interface (or
+ * if it is a batman-adv interface itself), false otherwise
+ */
+static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
+{
+ struct net_device *parent_dev;
+ bool ret;
+
+ /* check if this is a batman-adv mesh interface */
+ if (batadv_softif_is_valid(net_dev))
+ return true;
+
+ /* no more parents..stop recursion */
+ if (net_dev->iflink == net_dev->ifindex)
+ return false;
+
+ /* recurse over the parent device */
+ parent_dev = dev_get_by_index(&init_net, net_dev->iflink);
+ /* if we got a NULL parent_dev there is something broken.. */
+ if (WARN(!parent_dev, "Cannot find parent device"))
+ return false;
+
+ ret = batadv_is_on_batman_iface(parent_dev);
+
+ if (parent_dev)
+ dev_put(parent_dev);
+ return ret;
+}
+
static int batadv_is_valid_iface(const struct net_device *net_dev)
{
if (net_dev->flags & IFF_LOOPBACK)
@@ -70,7 +110,7 @@ static int batadv_is_valid_iface(const struct net_device *net_dev)
return 0;
/* no batman over batman */
- if (batadv_softif_is_valid(net_dev))
+ if (batadv_is_on_batman_iface(net_dev))
return 0;
return 1;
@@ -109,6 +149,8 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv,
if (!primary_if)
goto out;
+ batadv_dat_init_own_addr(bat_priv, primary_if);
+
skb = bat_priv->vis.my_info->skb_packet;
vis_packet = (struct batadv_vis_packet *)skb->data;
memcpy(vis_packet->vis_orig, primary_if->net_dev->dev_addr, ETH_ALEN);
@@ -450,8 +492,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
/* This can't be called via a bat_priv callback because
* we have no bat_priv yet.
*/
- atomic_set(&hard_iface->seqno, 1);
- hard_iface->packet_buff = NULL;
+ atomic_set(&hard_iface->bat_iv.ogm_seqno, 1);
+ hard_iface->bat_iv.ogm_buff = NULL;
return hard_iface;
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 977de9c75fc2..e05333905afd 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -82,6 +82,28 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash,
}
/**
+ * batadv_hash_bytes - hash some bytes and add them to the previous hash
+ * @hash: previous hash value
+ * @data: data to be hashed
+ * @size: number of bytes to be hashed
+ *
+ * Returns the new hash value.
+ */
+static inline uint32_t batadv_hash_bytes(uint32_t hash, void *data,
+ uint32_t size)
+{
+ const unsigned char *key = data;
+ int i;
+
+ for (i = 0; i < size; i++) {
+ hash += key[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ return hash;
+}
+
+/**
* batadv_hash_add - adds data to the hashtable
* @hash: storage hash table
* @compare: callback to determine if 2 hash elements are identical
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index bde3cf747507..87ca8095b011 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -42,12 +42,16 @@ static int batadv_socket_open(struct inode *inode, struct file *file)
unsigned int i;
struct batadv_socket_client *socket_client;
+ if (!try_module_get(THIS_MODULE))
+ return -EBUSY;
+
nonseekable_open(inode, file);
socket_client = kmalloc(sizeof(*socket_client), GFP_KERNEL);
-
- if (!socket_client)
+ if (!socket_client) {
+ module_put(THIS_MODULE);
return -ENOMEM;
+ }
for (i = 0; i < ARRAY_SIZE(batadv_socket_client_hash); i++) {
if (!batadv_socket_client_hash[i]) {
@@ -59,6 +63,7 @@ static int batadv_socket_open(struct inode *inode, struct file *file)
if (i == ARRAY_SIZE(batadv_socket_client_hash)) {
pr_err("Error - can't add another packet client: maximum number of clients reached\n");
kfree(socket_client);
+ module_put(THIS_MODULE);
return -EXFULL;
}
@@ -71,7 +76,6 @@ static int batadv_socket_open(struct inode *inode, struct file *file)
file->private_data = socket_client;
- batadv_inc_module_count();
return 0;
}
@@ -96,7 +100,7 @@ static int batadv_socket_release(struct inode *inode, struct file *file)
spin_unlock_bh(&socket_client->lock);
kfree(socket_client);
- batadv_dec_module_count();
+ module_put(THIS_MODULE);
return 0;
}
@@ -173,13 +177,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
if (len >= sizeof(struct batadv_icmp_packet_rr))
packet_len = sizeof(struct batadv_icmp_packet_rr);
- skb = dev_alloc_skb(packet_len + ETH_HLEN);
+ skb = dev_alloc_skb(packet_len + ETH_HLEN + NET_IP_ALIGN);
if (!skb) {
len = -ENOMEM;
goto out;
}
- skb_reserve(skb, ETH_HLEN);
+ skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len);
if (copy_from_user(icmp_packet, buff, packet_len)) {
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index b4aa470bc4a6..f65a222b7b83 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -17,6 +17,8 @@
* 02110-1301, USA
*/
+#include <linux/crc32c.h>
+#include <linux/highmem.h>
#include "main.h"
#include "sysfs.h"
#include "debugfs.h"
@@ -29,6 +31,7 @@
#include "hard-interface.h"
#include "gateway_client.h"
#include "bridge_loop_avoidance.h"
+#include "distributed-arp-table.h"
#include "vis.h"
#include "hash.h"
#include "bat_algo.h"
@@ -128,6 +131,10 @@ int batadv_mesh_init(struct net_device *soft_iface)
if (ret < 0)
goto err;
+ ret = batadv_dat_init(bat_priv);
+ if (ret < 0)
+ goto err;
+
atomic_set(&bat_priv->gw.reselect, 0);
atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE);
@@ -155,21 +162,13 @@ void batadv_mesh_free(struct net_device *soft_iface)
batadv_bla_free(bat_priv);
+ batadv_dat_free(bat_priv);
+
free_percpu(bat_priv->bat_counters);
atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
}
-void batadv_inc_module_count(void)
-{
- try_module_get(THIS_MODULE);
-}
-
-void batadv_dec_module_count(void)
-{
- module_put(THIS_MODULE);
-}
-
int batadv_is_my_mac(const uint8_t *addr)
{
const struct batadv_hard_iface *hard_iface;
@@ -188,6 +187,42 @@ int batadv_is_my_mac(const uint8_t *addr)
return 0;
}
+/**
+ * batadv_seq_print_text_primary_if_get - called from debugfs table printing
+ * function that requires the primary interface
+ * @seq: debugfs table seq_file struct
+ *
+ * Returns primary interface if found or NULL otherwise.
+ */
+struct batadv_hard_iface *
+batadv_seq_print_text_primary_if_get(struct seq_file *seq)
+{
+ struct net_device *net_dev = (struct net_device *)seq->private;
+ struct batadv_priv *bat_priv = netdev_priv(net_dev);
+ struct batadv_hard_iface *primary_if;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+
+ if (!primary_if) {
+ seq_printf(seq,
+ "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
+ net_dev->name);
+ goto out;
+ }
+
+ if (primary_if->if_status == BATADV_IF_ACTIVE)
+ goto out;
+
+ seq_printf(seq,
+ "BATMAN mesh %s disabled - primary interface not active\n",
+ net_dev->name);
+ batadv_hardif_free_ref(primary_if);
+ primary_if = NULL;
+
+out:
+ return primary_if;
+}
+
static int batadv_recv_unhandled_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
{
@@ -274,6 +309,8 @@ static void batadv_recv_handler_init(void)
/* batman icmp packet */
batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet;
+ /* unicast with 4 addresses packet */
+ batadv_rx_handler[BATADV_UNICAST_4ADDR] = batadv_recv_unicast_packet;
/* unicast packet */
batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet;
/* fragmented unicast packet */
@@ -385,6 +422,38 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
return 0;
}
+/**
+ * batadv_skb_crc32 - calculate CRC32 of the whole packet and skip bytes in
+ * the header
+ * @skb: skb pointing to fragmented socket buffers
+ * @payload_ptr: Pointer to position inside the head buffer of the skb
+ * marking the start of the data to be CRC'ed
+ *
+ * payload_ptr must always point to an address in the skb head buffer and not to
+ * a fragment.
+ */
+__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
+{
+ u32 crc = 0;
+ unsigned int from;
+ unsigned int to = skb->len;
+ struct skb_seq_state st;
+ const u8 *data;
+ unsigned int len;
+ unsigned int consumed = 0;
+
+ from = (unsigned int)(payload_ptr - skb->data);
+
+ skb_prepare_seq_read(skb, from, to, &st);
+ while ((len = skb_seq_read(consumed, &data, &st)) != 0) {
+ crc = crc32c(crc, data, len);
+ consumed += len;
+ }
+ skb_abort_seq_read(&st);
+
+ return htonl(crc);
+}
+
static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
{
struct batadv_algo_ops *bat_algo_ops;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index d57b746219de..2f85577086a7 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2012.4.0"
+#define BATADV_SOURCE_VERSION "2012.5.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -44,6 +44,7 @@
#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */
#define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */
#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */
+#define BATADV_DAT_ENTRY_TIMEOUT (5*60000) /* 5 mins in milliseconds */
/* sliding packet range of received originator messages in sequence numbers
* (should be a multiple of our word size)
*/
@@ -73,6 +74,11 @@
#define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */
+/* msecs after which an ARP_REQUEST is sent in broadcast as fallback */
+#define ARP_REQ_DELAY 250
+/* numbers of originator to contact for any PUT/GET DHT operation */
+#define BATADV_DAT_CANDIDATES_NUM 3
+
#define BATADV_VIS_INTERVAL 5000 /* 5 seconds */
/* how much worse secondary interfaces may be to be considered as bonding
@@ -89,6 +95,7 @@
#define BATADV_BLA_PERIOD_LENGTH 10000 /* 10 seconds */
#define BATADV_BLA_BACKBONE_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 3)
#define BATADV_BLA_CLAIM_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 10)
+#define BATADV_BLA_WAIT_PERIODS 3
#define BATADV_DUPLIST_SIZE 16
#define BATADV_DUPLIST_TIMEOUT 500 /* 500 ms */
@@ -117,6 +124,9 @@ enum batadv_uev_type {
#define BATADV_GW_THRESHOLD 50
+#define BATADV_DAT_CANDIDATE_NOT_FOUND 0
+#define BATADV_DAT_CANDIDATE_ORIG 1
+
/* Debug Messages */
#ifdef pr_fmt
#undef pr_fmt
@@ -150,9 +160,9 @@ extern struct workqueue_struct *batadv_event_workqueue;
int batadv_mesh_init(struct net_device *soft_iface);
void batadv_mesh_free(struct net_device *soft_iface);
-void batadv_inc_module_count(void);
-void batadv_dec_module_count(void);
int batadv_is_my_mac(const uint8_t *addr);
+struct batadv_hard_iface *
+batadv_seq_print_text_primary_if_get(struct seq_file *seq);
int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *ptype,
struct net_device *orig_dev);
@@ -164,14 +174,24 @@ void batadv_recv_handler_unregister(uint8_t packet_type);
int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
+__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);
-/* all messages related to routing / flooding / broadcasting / etc */
+/**
+ * enum batadv_dbg_level - available log levels
+ * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
+ * @BATADV_DBG_ROUTES: route added / changed / deleted
+ * @BATADV_DBG_TT: translation table messages
+ * @BATADV_DBG_BLA: bridge loop avoidance messages
+ * @BATADV_DBG_DAT: ARP snooping and DAT related messages
+ * @BATADV_DBG_ALL: the union of all the above log levels
+ */
enum batadv_dbg_level {
BATADV_DBG_BATMAN = BIT(0),
- BATADV_DBG_ROUTES = BIT(1), /* route added / changed / deleted */
- BATADV_DBG_TT = BIT(2), /* translation table operations */
- BATADV_DBG_BLA = BIT(3), /* bridge loop avoidance */
- BATADV_DBG_ALL = 15,
+ BATADV_DBG_ROUTES = BIT(1),
+ BATADV_DBG_TT = BIT(2),
+ BATADV_DBG_BLA = BIT(3),
+ BATADV_DBG_DAT = BIT(4),
+ BATADV_DBG_ALL = 31,
};
#ifdef CONFIG_BATMAN_ADV_DEBUG
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index ac9bdf8f80a6..8c32cf1c2dec 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -18,6 +18,7 @@
*/
#include "main.h"
+#include "distributed-arp-table.h"
#include "originator.h"
#include "hash.h"
#include "translation-table.h"
@@ -220,9 +221,9 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
atomic_set(&orig_node->refcount, 2);
orig_node->tt_initialised = false;
- orig_node->tt_poss_change = false;
orig_node->bat_priv = bat_priv;
memcpy(orig_node->orig, addr, ETH_ALEN);
+ batadv_dat_init_orig_node_addr(orig_node);
orig_node->router = NULL;
orig_node->tt_crc = 0;
atomic_set(&orig_node->last_ttvn, 0);
@@ -415,23 +416,10 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
int last_seen_msecs;
unsigned long last_seen_jiffies;
uint32_t i;
- int ret = 0;
- primary_if = batadv_primary_if_get_selected(bat_priv);
-
- if (!primary_if) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
- net_dev->name);
- goto out;
- }
-
- if (primary_if->if_status != BATADV_IF_ACTIVE) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - primary interface not active\n",
- net_dev->name);
+ primary_if = batadv_seq_print_text_primary_if_get(seq);
+ if (!primary_if)
goto out;
- }
seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
BATADV_SOURCE_VERSION, primary_if->net_dev->name,
@@ -485,7 +473,7 @@ next:
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
- return ret;
+ return 0;
}
static int batadv_orig_node_add_if(struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 2d23a14c220e..1c5454d33f67 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -23,14 +23,29 @@
#define BATADV_ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */
enum batadv_packettype {
- BATADV_IV_OGM = 0x01,
- BATADV_ICMP = 0x02,
- BATADV_UNICAST = 0x03,
- BATADV_BCAST = 0x04,
- BATADV_VIS = 0x05,
- BATADV_UNICAST_FRAG = 0x06,
- BATADV_TT_QUERY = 0x07,
- BATADV_ROAM_ADV = 0x08,
+ BATADV_IV_OGM = 0x01,
+ BATADV_ICMP = 0x02,
+ BATADV_UNICAST = 0x03,
+ BATADV_BCAST = 0x04,
+ BATADV_VIS = 0x05,
+ BATADV_UNICAST_FRAG = 0x06,
+ BATADV_TT_QUERY = 0x07,
+ BATADV_ROAM_ADV = 0x08,
+ BATADV_UNICAST_4ADDR = 0x09,
+};
+
+/**
+ * enum batadv_subtype - packet subtype for unicast4addr
+ * @BATADV_P_DATA: user payload
+ * @BATADV_P_DAT_DHT_GET: DHT request message
+ * @BATADV_P_DAT_DHT_PUT: DHT store message
+ * @BATADV_P_DAT_CACHE_REPLY: ARP reply generated by DAT
+ */
+enum batadv_subtype {
+ BATADV_P_DATA = 0x01,
+ BATADV_P_DAT_DHT_GET = 0x02,
+ BATADV_P_DAT_DHT_PUT = 0x03,
+ BATADV_P_DAT_CACHE_REPLY = 0x04,
};
/* this file is included by batctl which needs these defines */
@@ -106,13 +121,16 @@ struct batadv_bla_claim_dst {
uint8_t magic[3]; /* FF:43:05 */
uint8_t type; /* bla_claimframe */
__be16 group; /* group id */
-} __packed;
+};
struct batadv_header {
uint8_t packet_type;
uint8_t version; /* batman version field */
uint8_t ttl;
-} __packed;
+ /* the parent struct has to add a byte after the header to make
+ * everything 4 bytes aligned again
+ */
+};
struct batadv_ogm_packet {
struct batadv_header header;
@@ -137,7 +155,7 @@ struct batadv_icmp_packet {
__be16 seqno;
uint8_t uid;
uint8_t reserved;
-} __packed;
+};
#define BATADV_RR_LEN 16
@@ -153,13 +171,44 @@ struct batadv_icmp_packet_rr {
uint8_t uid;
uint8_t rr_cur;
uint8_t rr[BATADV_RR_LEN][ETH_ALEN];
-} __packed;
+};
+
+/* All packet headers in front of an ethernet header have to be completely
+ * divisible by 2 but not by 4 to make the payload after the ethernet
+ * header again 4 bytes boundary aligned.
+ *
+ * A packing of 2 is necessary to avoid extra padding at the end of the struct
+ * caused by a structure member which is larger than two bytes. Otherwise
+ * the structure would not fulfill the previously mentioned rule to avoid the
+ * misalignment of the payload after the ethernet header. It may also lead to
+ * leakage of information when the padding it not initialized before sending.
+ */
+#pragma pack(2)
struct batadv_unicast_packet {
struct batadv_header header;
uint8_t ttvn; /* destination translation table version number */
uint8_t dest[ETH_ALEN];
-} __packed;
+ /* "4 bytes boundary + 2 bytes" long to make the payload after the
+ * following ethernet header again 4 bytes boundary aligned
+ */
+};
+
+/**
+ * struct batadv_unicast_4addr_packet - extended unicast packet
+ * @u: common unicast packet header
+ * @src: address of the source
+ * @subtype: packet subtype
+ */
+struct batadv_unicast_4addr_packet {
+ struct batadv_unicast_packet u;
+ uint8_t src[ETH_ALEN];
+ uint8_t subtype;
+ uint8_t reserved;
+ /* "4 bytes boundary + 2 bytes" long to make the payload after the
+ * following ethernet header again 4 bytes boundary aligned
+ */
+};
struct batadv_unicast_frag_packet {
struct batadv_header header;
@@ -176,7 +225,12 @@ struct batadv_bcast_packet {
uint8_t reserved;
__be32 seqno;
uint8_t orig[ETH_ALEN];
-} __packed;
+ /* "4 bytes boundary + 2 bytes" long to make the payload after the
+ * following ethernet header again 4 bytes boundary aligned
+ */
+};
+
+#pragma pack()
struct batadv_vis_packet {
struct batadv_header header;
@@ -187,7 +241,7 @@ struct batadv_vis_packet {
uint8_t vis_orig[ETH_ALEN]; /* originator reporting its neighbors */
uint8_t target_orig[ETH_ALEN]; /* who should receive this packet */
uint8_t sender_orig[ETH_ALEN]; /* who sent or forwarded this packet */
-} __packed;
+};
struct batadv_tt_query_packet {
struct batadv_header header;
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 939fc01371df..1aa1722d0187 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -28,6 +28,7 @@
#include "vis.h"
#include "unicast.h"
#include "bridge_loop_avoidance.h"
+#include "distributed-arp-table.h"
static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
@@ -284,7 +285,6 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
{
struct batadv_hard_iface *primary_if = NULL;
struct batadv_orig_node *orig_node = NULL;
- struct batadv_neigh_node *router = NULL;
struct batadv_icmp_packet_rr *icmp_packet;
int ret = NET_RX_DROP;
@@ -306,10 +306,6 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
if (!orig_node)
goto out;
- router = batadv_orig_node_get_router(orig_node);
- if (!router)
- goto out;
-
/* create a copy of the skb, if needed, to modify it. */
if (skb_cow(skb, ETH_HLEN) < 0)
goto out;
@@ -321,14 +317,12 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
icmp_packet->msg_type = BATADV_ECHO_REPLY;
icmp_packet->header.ttl = BATADV_TTL;
- batadv_send_skb_packet(skb, router->if_incoming, router->addr);
- ret = NET_RX_SUCCESS;
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL))
+ ret = NET_RX_SUCCESS;
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
- if (router)
- batadv_neigh_node_free_ref(router);
if (orig_node)
batadv_orig_node_free_ref(orig_node);
return ret;
@@ -339,7 +333,6 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
{
struct batadv_hard_iface *primary_if = NULL;
struct batadv_orig_node *orig_node = NULL;
- struct batadv_neigh_node *router = NULL;
struct batadv_icmp_packet *icmp_packet;
int ret = NET_RX_DROP;
@@ -361,10 +354,6 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
if (!orig_node)
goto out;
- router = batadv_orig_node_get_router(orig_node);
- if (!router)
- goto out;
-
/* create a copy of the skb, if needed, to modify it. */
if (skb_cow(skb, ETH_HLEN) < 0)
goto out;
@@ -376,14 +365,12 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
icmp_packet->msg_type = BATADV_TTL_EXCEEDED;
icmp_packet->header.ttl = BATADV_TTL;
- batadv_send_skb_packet(skb, router->if_incoming, router->addr);
- ret = NET_RX_SUCCESS;
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL))
+ ret = NET_RX_SUCCESS;
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
- if (router)
- batadv_neigh_node_free_ref(router);
if (orig_node)
batadv_orig_node_free_ref(orig_node);
return ret;
@@ -397,7 +384,6 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
struct batadv_icmp_packet_rr *icmp_packet;
struct ethhdr *ethhdr;
struct batadv_orig_node *orig_node = NULL;
- struct batadv_neigh_node *router = NULL;
int hdr_size = sizeof(struct batadv_icmp_packet);
int ret = NET_RX_DROP;
@@ -446,10 +432,6 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
if (!orig_node)
goto out;
- router = batadv_orig_node_get_router(orig_node);
- if (!router)
- goto out;
-
/* create a copy of the skb, if needed, to modify it. */
if (skb_cow(skb, ETH_HLEN) < 0)
goto out;
@@ -460,12 +442,10 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
icmp_packet->header.ttl--;
/* route it */
- batadv_send_skb_packet(skb, router->if_incoming, router->addr);
- ret = NET_RX_SUCCESS;
+ if (batadv_send_skb_to_orig(skb, orig_node, recv_if))
+ ret = NET_RX_SUCCESS;
out:
- if (router)
- batadv_neigh_node_free_ref(router);
if (orig_node)
batadv_orig_node_free_ref(orig_node);
return ret;
@@ -549,25 +529,18 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,
if (tmp_neigh_node->if_incoming == recv_if)
continue;
- if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+ if (router && tmp_neigh_node->tq_avg <= router->tq_avg)
continue;
- /* if we don't have a router yet
- * or this one is better, choose it.
- */
- if ((!router) ||
- (tmp_neigh_node->tq_avg > router->tq_avg)) {
- /* decrement refcount of
- * previously selected router
- */
- if (router)
- batadv_neigh_node_free_ref(router);
+ if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+ continue;
- router = tmp_neigh_node;
- atomic_inc_not_zero(&router->refcount);
- }
+ /* decrement refcount of previously selected router */
+ if (router)
+ batadv_neigh_node_free_ref(router);
- batadv_neigh_node_free_ref(tmp_neigh_node);
+ /* we found a better router (or at least one valid router) */
+ router = tmp_neigh_node;
}
/* use the first candidate if nothing was found. */
@@ -687,21 +660,8 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
struct batadv_roam_adv_packet *roam_adv_packet;
struct batadv_orig_node *orig_node;
- struct ethhdr *ethhdr;
- /* drop packet if it has not necessary minimum size */
- if (unlikely(!pskb_may_pull(skb,
- sizeof(struct batadv_roam_adv_packet))))
- goto out;
-
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
-
- /* packet with unicast indication but broadcast recipient */
- if (is_broadcast_ether_addr(ethhdr->h_dest))
- goto out;
-
- /* packet with broadcast sender address */
- if (is_broadcast_ether_addr(ethhdr->h_source))
+ if (batadv_check_unicast_packet(skb, sizeof(*roam_adv_packet)) < 0)
goto out;
batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX);
@@ -730,12 +690,6 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
BATADV_TT_CLIENT_ROAM,
atomic_read(&orig_node->last_ttvn) + 1);
- /* Roaming phase starts: I have new information but the ttvn has not
- * been incremented yet. This flag will make me check all the incoming
- * packets for the correct destination.
- */
- bat_priv->tt.poss_change = true;
-
batadv_orig_node_free_ref(orig_node);
out:
/* returning NET_RX_DROP will make the caller function kfree the skb */
@@ -907,8 +861,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
skb->len + ETH_HLEN);
/* route it */
- batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
- ret = NET_RX_SUCCESS;
+ if (batadv_send_skb_to_orig(skb, orig_node, recv_if))
+ ret = NET_RX_SUCCESS;
out:
if (neigh_node)
@@ -918,80 +872,161 @@ out:
return ret;
}
+/**
+ * batadv_reroute_unicast_packet - update the unicast header for re-routing
+ * @bat_priv: the bat priv with all the soft interface information
+ * @unicast_packet: the unicast header to be updated
+ * @dst_addr: the payload destination
+ *
+ * Search the translation table for dst_addr and update the unicast header with
+ * the new corresponding information (originator address where the destination
+ * client currently is and its known TTVN)
+ *
+ * Returns true if the packet header has been updated, false otherwise
+ */
+static bool
+batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
+ struct batadv_unicast_packet *unicast_packet,
+ uint8_t *dst_addr)
+{
+ struct batadv_orig_node *orig_node = NULL;
+ struct batadv_hard_iface *primary_if = NULL;
+ bool ret = false;
+ uint8_t *orig_addr, orig_ttvn;
+
+ if (batadv_is_my_client(bat_priv, dst_addr)) {
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ goto out;
+ orig_addr = primary_if->net_dev->dev_addr;
+ orig_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
+ } else {
+ orig_node = batadv_transtable_search(bat_priv, NULL, dst_addr);
+ if (!orig_node)
+ goto out;
+
+ if (batadv_compare_eth(orig_node->orig, unicast_packet->dest))
+ goto out;
+
+ orig_addr = orig_node->orig;
+ orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
+ }
+
+ /* update the packet header */
+ memcpy(unicast_packet->dest, orig_addr, ETH_ALEN);
+ unicast_packet->ttvn = orig_ttvn;
+
+ ret = true;
+out:
+ if (primary_if)
+ batadv_hardif_free_ref(primary_if);
+ if (orig_node)
+ batadv_orig_node_free_ref(orig_node);
+
+ return ret;
+}
+
static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
struct sk_buff *skb) {
- uint8_t curr_ttvn;
+ uint8_t curr_ttvn, old_ttvn;
struct batadv_orig_node *orig_node;
struct ethhdr *ethhdr;
struct batadv_hard_iface *primary_if;
struct batadv_unicast_packet *unicast_packet;
- bool tt_poss_change;
int is_old_ttvn;
- /* I could need to modify it */
- if (skb_cow(skb, sizeof(struct batadv_unicast_packet)) < 0)
+ /* check if there is enough data before accessing it */
+ if (pskb_may_pull(skb, sizeof(*unicast_packet) + ETH_HLEN) < 0)
+ return 0;
+
+ /* create a copy of the skb (in case of for re-routing) to modify it. */
+ if (skb_cow(skb, sizeof(*unicast_packet)) < 0)
return 0;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
+ ethhdr = (struct ethhdr *)(skb->data + sizeof(*unicast_packet));
- if (batadv_is_my_mac(unicast_packet->dest)) {
- tt_poss_change = bat_priv->tt.poss_change;
- curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
- } else {
+ /* check if the destination client was served by this node and it is now
+ * roaming. In this case, it means that the node has got a ROAM_ADV
+ * message and that it knows the new destination in the mesh to re-route
+ * the packet to
+ */
+ if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest)) {
+ if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+ ethhdr->h_dest))
+ net_ratelimited_function(batadv_dbg, BATADV_DBG_TT,
+ bat_priv,
+ "Rerouting unicast packet to %pM (dst=%pM): Local Roaming\n",
+ unicast_packet->dest,
+ ethhdr->h_dest);
+ /* at this point the mesh destination should have been
+ * substituted with the originator address found in the global
+ * table. If not, let the packet go untouched anyway because
+ * there is nothing the node can do
+ */
+ return 1;
+ }
+
+ /* retrieve the TTVN known by this node for the packet destination. This
+ * value is used later to check if the node which sent (or re-routed
+ * last time) the packet had an updated information or not
+ */
+ curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
+ if (!batadv_is_my_mac(unicast_packet->dest)) {
orig_node = batadv_orig_hash_find(bat_priv,
unicast_packet->dest);
-
+ /* if it is not possible to find the orig_node representing the
+ * destination, the packet can immediately be dropped as it will
+ * not be possible to deliver it
+ */
if (!orig_node)
return 0;
curr_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
- tt_poss_change = orig_node->tt_poss_change;
batadv_orig_node_free_ref(orig_node);
}
- /* Check whether I have to reroute the packet */
+ /* check if the TTVN contained in the packet is fresher than what the
+ * node knows
+ */
is_old_ttvn = batadv_seq_before(unicast_packet->ttvn, curr_ttvn);
- if (is_old_ttvn || tt_poss_change) {
- /* check if there is enough data before accessing it */
- if (pskb_may_pull(skb, sizeof(struct batadv_unicast_packet) +
- ETH_HLEN) < 0)
- return 0;
+ if (!is_old_ttvn)
+ return 1;
- ethhdr = (struct ethhdr *)(skb->data + sizeof(*unicast_packet));
+ old_ttvn = unicast_packet->ttvn;
+ /* the packet was forged based on outdated network information. Its
+ * destination can possibly be updated and forwarded towards the new
+ * target host
+ */
+ if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+ ethhdr->h_dest)) {
+ net_ratelimited_function(batadv_dbg, BATADV_DBG_TT, bat_priv,
+ "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
+ unicast_packet->dest, ethhdr->h_dest,
+ old_ttvn, curr_ttvn);
+ return 1;
+ }
- /* we don't have an updated route for this client, so we should
- * not try to reroute the packet!!
- */
- if (batadv_tt_global_client_is_roaming(bat_priv,
- ethhdr->h_dest))
- return 1;
+ /* the packet has not been re-routed: either the destination is
+ * currently served by this node or there is no destination at all and
+ * it is possible to drop the packet
+ */
+ if (!batadv_is_my_client(bat_priv, ethhdr->h_dest))
+ return 0;
- orig_node = batadv_transtable_search(bat_priv, NULL,
- ethhdr->h_dest);
-
- if (!orig_node) {
- if (!batadv_is_my_client(bat_priv, ethhdr->h_dest))
- return 0;
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- return 0;
- memcpy(unicast_packet->dest,
- primary_if->net_dev->dev_addr, ETH_ALEN);
- batadv_hardif_free_ref(primary_if);
- } else {
- memcpy(unicast_packet->dest, orig_node->orig,
- ETH_ALEN);
- curr_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
- batadv_orig_node_free_ref(orig_node);
- }
+ /* update the header in order to let the packet be delivered to this
+ * node's soft interface
+ */
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ return 0;
- batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
- "TTVN mismatch (old_ttvn %u new_ttvn %u)! Rerouting unicast packet (for %pM) to %pM\n",
- unicast_packet->ttvn, curr_ttvn, ethhdr->h_dest,
- unicast_packet->dest);
+ memcpy(unicast_packet->dest, primary_if->net_dev->dev_addr, ETH_ALEN);
+
+ batadv_hardif_free_ref(primary_if);
+
+ unicast_packet->ttvn = curr_ttvn;
- unicast_packet->ttvn = curr_ttvn;
- }
return 1;
}
@@ -1000,7 +1035,19 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
struct batadv_unicast_packet *unicast_packet;
+ struct batadv_unicast_4addr_packet *unicast_4addr_packet;
+ uint8_t *orig_addr;
+ struct batadv_orig_node *orig_node = NULL;
int hdr_size = sizeof(*unicast_packet);
+ bool is4addr;
+
+ unicast_packet = (struct batadv_unicast_packet *)skb->data;
+ unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
+
+ is4addr = unicast_packet->header.packet_type == BATADV_UNICAST_4ADDR;
+ /* the caller function should have already pulled 2 bytes */
+ if (is4addr)
+ hdr_size = sizeof(*unicast_4addr_packet);
if (batadv_check_unicast_packet(skb, hdr_size) < 0)
return NET_RX_DROP;
@@ -1008,12 +1055,28 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
if (!batadv_check_unicast_ttvn(bat_priv, skb))
return NET_RX_DROP;
- unicast_packet = (struct batadv_unicast_packet *)skb->data;
-
/* packet for me */
if (batadv_is_my_mac(unicast_packet->dest)) {
+ if (is4addr) {
+ batadv_dat_inc_counter(bat_priv,
+ unicast_4addr_packet->subtype);
+ orig_addr = unicast_4addr_packet->src;
+ orig_node = batadv_orig_hash_find(bat_priv, orig_addr);
+ }
+
+ if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb,
+ hdr_size))
+ goto rx_success;
+ if (batadv_dat_snoop_incoming_arp_reply(bat_priv, skb,
+ hdr_size))
+ goto rx_success;
+
batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size,
- NULL);
+ orig_node);
+
+rx_success:
+ if (orig_node)
+ batadv_orig_node_free_ref(orig_node);
return NET_RX_SUCCESS;
}
@@ -1050,8 +1113,17 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
if (!new_skb)
return NET_RX_SUCCESS;
+ if (batadv_dat_snoop_incoming_arp_request(bat_priv, new_skb,
+ hdr_size))
+ goto rx_success;
+ if (batadv_dat_snoop_incoming_arp_reply(bat_priv, new_skb,
+ hdr_size))
+ goto rx_success;
+
batadv_interface_rx(recv_if->soft_iface, new_skb, recv_if,
sizeof(struct batadv_unicast_packet), NULL);
+
+rx_success:
return NET_RX_SUCCESS;
}
@@ -1125,7 +1197,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
spin_unlock_bh(&orig_node->bcast_seqno_lock);
/* check whether this has been sent by another originator before */
- if (batadv_bla_check_bcast_duplist(bat_priv, bcast_packet, hdr_size))
+ if (batadv_bla_check_bcast_duplist(bat_priv, skb))
goto out;
/* rebroadcast packet */
@@ -1137,9 +1209,16 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
if (batadv_bla_is_backbone_gw(skb, orig_node, hdr_size))
goto out;
+ if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, hdr_size))
+ goto rx_success;
+ if (batadv_dat_snoop_incoming_arp_reply(bat_priv, skb, hdr_size))
+ goto rx_success;
+
/* broadcast for me */
batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size,
orig_node);
+
+rx_success:
ret = NET_RX_SUCCESS;
goto out;
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 570a8bce0364..c7f702376535 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -18,6 +18,7 @@
*/
#include "main.h"
+#include "distributed-arp-table.h"
#include "send.h"
#include "routing.h"
#include "translation-table.h"
@@ -77,6 +78,39 @@ send_skb_err:
return NET_XMIT_DROP;
}
+/**
+ * batadv_send_skb_to_orig - Lookup next-hop and transmit skb.
+ * @skb: Packet to be transmitted.
+ * @orig_node: Final destination of the packet.
+ * @recv_if: Interface used when receiving the packet (can be NULL).
+ *
+ * Looks up the best next-hop towards the passed originator and passes the
+ * skb on for preparation of MAC header. If the packet originated from this
+ * host, NULL can be passed as recv_if and no interface alternating is
+ * attempted.
+ *
+ * Returns TRUE on success; FALSE otherwise.
+ */
+bool batadv_send_skb_to_orig(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ struct batadv_hard_iface *recv_if)
+{
+ struct batadv_priv *bat_priv = orig_node->bat_priv;
+ struct batadv_neigh_node *neigh_node;
+
+ /* batadv_find_router() increases neigh_nodes refcount if found. */
+ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
+ if (!neigh_node)
+ return false;
+
+ /* route it */
+ batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
+
+ batadv_neigh_node_free_ref(neigh_node);
+
+ return true;
+}
+
void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -209,6 +243,9 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
goto out;
+ if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet))
+ goto out;
+
/* rebroadcast packet */
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 643329b787ed..0078dece1abc 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -23,6 +23,9 @@
int batadv_send_skb_packet(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface,
const uint8_t *dst_addr);
+bool batadv_send_skb_to_orig(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ struct batadv_hard_iface *recv_if);
void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface);
int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
const struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index b9a28d2dd3e8..54800c783f96 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -20,6 +20,7 @@
#include "main.h"
#include "soft-interface.h"
#include "hard-interface.h"
+#include "distributed-arp-table.h"
#include "routing.h"
#include "send.h"
#include "debugfs.h"
@@ -146,13 +147,16 @@ static int batadv_interface_tx(struct sk_buff *skb,
struct batadv_bcast_packet *bcast_packet;
struct vlan_ethhdr *vhdr;
__be16 ethertype = __constant_htons(BATADV_ETH_P_BATMAN);
- static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00,
- 0x00};
+ static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00,
+ 0x00, 0x00};
+ static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00,
+ 0x00, 0x00};
unsigned int header_len = 0;
int data_len = skb->len, ret;
short vid __maybe_unused = -1;
bool do_bcast = false;
uint32_t seqno;
+ unsigned long brd_delay = 1;
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dropped;
@@ -180,10 +184,16 @@ static int batadv_interface_tx(struct sk_buff *skb,
/* don't accept stp packets. STP does not help in meshes.
* better use the bridge loop avoidance ...
+ *
+ * The same goes for ECTP sent at least by some Cisco Switches,
+ * it might confuse the mesh when used with bridge loop avoidance.
*/
if (batadv_compare_eth(ethhdr->h_dest, stp_addr))
goto dropped;
+ if (batadv_compare_eth(ethhdr->h_dest, ectp_addr))
+ goto dropped;
+
if (is_multicast_ether_addr(ethhdr->h_dest)) {
do_bcast = true;
@@ -216,6 +226,13 @@ static int batadv_interface_tx(struct sk_buff *skb,
if (!primary_if)
goto dropped;
+ /* in case of ARP request, we do not immediately broadcasti the
+ * packet, instead we first wait for DAT to try to retrieve the
+ * correct ARP entry
+ */
+ if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb))
+ brd_delay = msecs_to_jiffies(ARP_REQ_DELAY);
+
if (batadv_skb_head_push(skb, sizeof(*bcast_packet)) < 0)
goto dropped;
@@ -237,7 +254,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
seqno = atomic_inc_return(&bat_priv->bcast_seqno);
bcast_packet->seqno = htonl(seqno);
- batadv_add_bcast_packet_to_list(bat_priv, skb, 1);
+ batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay);
/* a copy is stored in the bcast list, therefore removing
* the original skb.
@@ -252,7 +269,12 @@ static int batadv_interface_tx(struct sk_buff *skb,
goto dropped;
}
- ret = batadv_unicast_send_skb(skb, bat_priv);
+ if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb))
+ goto dropped;
+
+ batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb);
+
+ ret = batadv_unicast_send_skb(bat_priv, skb);
if (ret != 0)
goto dropped_freed;
}
@@ -325,6 +347,12 @@ void batadv_interface_rx(struct net_device *soft_iface,
soft_iface->last_rx = jiffies;
+ /* Let the bridge loop avoidance check the packet. If will
+ * not handle it, we can safely push it up.
+ */
+ if (batadv_bla_rx(bat_priv, skb, vid, is_bcast))
+ goto out;
+
if (orig_node)
batadv_tt_add_temporary_global_entry(bat_priv, orig_node,
ethhdr->h_source);
@@ -332,12 +360,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest))
goto dropped;
- /* Let the bridge loop avoidance check the packet. If will
- * not handle it, we can safely push it up.
- */
- if (batadv_bla_rx(bat_priv, skb, vid, is_bcast))
- goto out;
-
netif_rx(skb);
goto out;
@@ -347,7 +369,51 @@ out:
return;
}
+/* batman-adv network devices have devices nesting below it and are a special
+ * "super class" of normal network devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key batadv_netdev_xmit_lock_key;
+static struct lock_class_key batadv_netdev_addr_lock_key;
+
+/**
+ * batadv_set_lockdep_class_one - Set lockdep class for a single tx queue
+ * @dev: device which owns the tx queue
+ * @txq: tx queue to modify
+ * @_unused: always NULL
+ */
+static void batadv_set_lockdep_class_one(struct net_device *dev,
+ struct netdev_queue *txq,
+ void *_unused)
+{
+ lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key);
+}
+
+/**
+ * batadv_set_lockdep_class - Set txq and addr_list lockdep class
+ * @dev: network device to modify
+ */
+static void batadv_set_lockdep_class(struct net_device *dev)
+{
+ lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key);
+ netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL);
+}
+
+/**
+ * batadv_softif_init - Late stage initialization of soft interface
+ * @dev: registered network device to modify
+ *
+ * Returns error code on failures
+ */
+static int batadv_softif_init(struct net_device *dev)
+{
+ batadv_set_lockdep_class(dev);
+
+ return 0;
+}
+
static const struct net_device_ops batadv_netdev_ops = {
+ .ndo_init = batadv_softif_init,
.ndo_open = batadv_interface_open,
.ndo_stop = batadv_interface_release,
.ndo_get_stats = batadv_interface_stats,
@@ -414,6 +480,9 @@ struct net_device *batadv_softif_create(const char *name)
atomic_set(&bat_priv->aggregated_ogms, 1);
atomic_set(&bat_priv->bonding, 0);
atomic_set(&bat_priv->bridge_loop_avoidance, 0);
+#ifdef CONFIG_BATMAN_ADV_DAT
+ atomic_set(&bat_priv->distributed_arp_table, 1);
+#endif
atomic_set(&bat_priv->ap_isolation, 0);
atomic_set(&bat_priv->vis_mode, BATADV_VIS_TYPE_CLIENT_UPDATE);
atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF);
@@ -436,7 +505,6 @@ struct net_device *batadv_softif_create(const char *name)
#endif
bat_priv->tt.last_changeset = NULL;
bat_priv->tt.last_changeset_len = 0;
- bat_priv->tt.poss_change = false;
bat_priv->primary_if = NULL;
bat_priv->num_ifaces = 0;
@@ -556,6 +624,13 @@ static const struct {
{ "tt_response_rx" },
{ "tt_roam_adv_tx" },
{ "tt_roam_adv_rx" },
+#ifdef CONFIG_BATMAN_ADV_DAT
+ { "dat_get_tx" },
+ { "dat_get_rx" },
+ { "dat_put_tx" },
+ { "dat_put_rx" },
+ { "dat_cached_reply_tx" },
+#endif
};
static void batadv_get_strings(struct net_device *dev, uint32_t stringset,
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 66518c75c217..84a55cb19b0b 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -20,6 +20,7 @@
#include "main.h"
#include "sysfs.h"
#include "translation-table.h"
+#include "distributed-arp-table.h"
#include "originator.h"
#include "hard-interface.h"
#include "gateway_common.h"
@@ -122,55 +123,6 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \
batadv_store_##_name)
-#define BATADV_ATTR_HIF_STORE_UINT(_name, _min, _max, _post_func) \
-ssize_t batadv_store_##_name(struct kobject *kobj, \
- struct attribute *attr, char *buff, \
- size_t count) \
-{ \
- struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
- struct batadv_hard_iface *hard_iface; \
- ssize_t length; \
- \
- hard_iface = batadv_hardif_get_by_netdev(net_dev); \
- if (!hard_iface) \
- return 0; \
- \
- length = __batadv_store_uint_attr(buff, count, _min, _max, \
- _post_func, attr, \
- &hard_iface->_name, net_dev); \
- \
- batadv_hardif_free_ref(hard_iface); \
- return length; \
-}
-
-#define BATADV_ATTR_HIF_SHOW_UINT(_name) \
-ssize_t batadv_show_##_name(struct kobject *kobj, \
- struct attribute *attr, char *buff) \
-{ \
- struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
- struct batadv_hard_iface *hard_iface; \
- ssize_t length; \
- \
- hard_iface = batadv_hardif_get_by_netdev(net_dev); \
- if (!hard_iface) \
- return 0; \
- \
- length = sprintf(buff, "%i\n", atomic_read(&hard_iface->_name));\
- \
- batadv_hardif_free_ref(hard_iface); \
- return length; \
-}
-
-/* Use this, if you are going to set [name] in hard_iface to an
- * unsigned integer value
- */
-#define BATADV_ATTR_HIF_UINT(_name, _mode, _min, _max, _post_func) \
- static BATADV_ATTR_HIF_STORE_UINT(_name, _min, _max, _post_func)\
- static BATADV_ATTR_HIF_SHOW_UINT(_name) \
- static BATADV_ATTR(_name, _mode, batadv_show_##_name, \
- batadv_store_##_name)
-
-
static int batadv_store_bool_attr(char *buff, size_t count,
struct net_device *net_dev,
const char *attr_name, atomic_t *attr)
@@ -469,6 +421,9 @@ BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
#ifdef CONFIG_BATMAN_ADV_BLA
BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, NULL);
#endif
+#ifdef CONFIG_BATMAN_ADV_DAT
+BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, NULL);
+#endif
BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu);
BATADV_ATTR_SIF_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
static BATADV_ATTR(vis_mode, S_IRUGO | S_IWUSR, batadv_show_vis_mode,
@@ -494,6 +449,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
#ifdef CONFIG_BATMAN_ADV_BLA
&batadv_attr_bridge_loop_avoidance,
#endif
+#ifdef CONFIG_BATMAN_ADV_DAT
+ &batadv_attr_distributed_arp_table,
+#endif
&batadv_attr_fragmentation,
&batadv_attr_ap_isolation,
&batadv_attr_vis_mode,
@@ -730,7 +688,7 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
enum batadv_uev_action action, const char *data)
{
int ret = -ENOMEM;
- struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_hard_iface *primary_if;
struct kobject *bat_kobj;
char *uevent_env[4] = { NULL, NULL, NULL, NULL };
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 112edd371b2f..22457a7952ba 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -238,92 +238,134 @@ static int batadv_tt_local_init(struct batadv_priv *bat_priv)
return 0;
}
+static void batadv_tt_global_free(struct batadv_priv *bat_priv,
+ struct batadv_tt_global_entry *tt_global,
+ const char *message)
+{
+ batadv_dbg(BATADV_DBG_TT, bat_priv,
+ "Deleting global tt entry %pM: %s\n",
+ tt_global->common.addr, message);
+
+ batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
+ batadv_choose_orig, tt_global->common.addr);
+ batadv_tt_global_entry_free_ref(tt_global);
+
+}
+
void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
int ifindex)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
- struct batadv_tt_local_entry *tt_local_entry = NULL;
- struct batadv_tt_global_entry *tt_global_entry = NULL;
+ struct batadv_tt_local_entry *tt_local;
+ struct batadv_tt_global_entry *tt_global;
struct hlist_head *head;
struct hlist_node *node;
struct batadv_tt_orig_list_entry *orig_entry;
int hash_added;
+ bool roamed_back = false;
- tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr);
+ tt_local = batadv_tt_local_hash_find(bat_priv, addr);
+ tt_global = batadv_tt_global_hash_find(bat_priv, addr);
- if (tt_local_entry) {
- tt_local_entry->last_seen = jiffies;
- /* possibly unset the BATADV_TT_CLIENT_PENDING flag */
- tt_local_entry->common.flags &= ~BATADV_TT_CLIENT_PENDING;
- goto out;
+ if (tt_local) {
+ tt_local->last_seen = jiffies;
+ if (tt_local->common.flags & BATADV_TT_CLIENT_PENDING) {
+ batadv_dbg(BATADV_DBG_TT, bat_priv,
+ "Re-adding pending client %pM\n", addr);
+ /* whatever the reason why the PENDING flag was set,
+ * this is a client which was enqueued to be removed in
+ * this orig_interval. Since it popped up again, the
+ * flag can be reset like it was never enqueued
+ */
+ tt_local->common.flags &= ~BATADV_TT_CLIENT_PENDING;
+ goto add_event;
+ }
+
+ if (tt_local->common.flags & BATADV_TT_CLIENT_ROAM) {
+ batadv_dbg(BATADV_DBG_TT, bat_priv,
+ "Roaming client %pM came back to its original location\n",
+ addr);
+ /* the ROAM flag is set because this client roamed away
+ * and the node got a roaming_advertisement message. Now
+ * that the client popped up again at its original
+ * location such flag can be unset
+ */
+ tt_local->common.flags &= ~BATADV_TT_CLIENT_ROAM;
+ roamed_back = true;
+ }
+ goto check_roaming;
}
- tt_local_entry = kmalloc(sizeof(*tt_local_entry), GFP_ATOMIC);
- if (!tt_local_entry)
+ tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC);
+ if (!tt_local)
goto out;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new local tt entry: %pM (ttvn: %d)\n", addr,
(uint8_t)atomic_read(&bat_priv->tt.vn));
- memcpy(tt_local_entry->common.addr, addr, ETH_ALEN);
- tt_local_entry->common.flags = BATADV_NO_FLAGS;
+ memcpy(tt_local->common.addr, addr, ETH_ALEN);
+ tt_local->common.flags = BATADV_NO_FLAGS;
if (batadv_is_wifi_iface(ifindex))
- tt_local_entry->common.flags |= BATADV_TT_CLIENT_WIFI;
- atomic_set(&tt_local_entry->common.refcount, 2);
- tt_local_entry->last_seen = jiffies;
- tt_local_entry->common.added_at = tt_local_entry->last_seen;
+ tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
+ atomic_set(&tt_local->common.refcount, 2);
+ tt_local->last_seen = jiffies;
+ tt_local->common.added_at = tt_local->last_seen;
/* the batman interface mac address should never be purged */
if (batadv_compare_eth(addr, soft_iface->dev_addr))
- tt_local_entry->common.flags |= BATADV_TT_CLIENT_NOPURGE;
+ tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE;
/* The local entry has to be marked as NEW to avoid to send it in
* a full table response going out before the next ttvn increment
* (consistency check)
*/
- tt_local_entry->common.flags |= BATADV_TT_CLIENT_NEW;
+ tt_local->common.flags |= BATADV_TT_CLIENT_NEW;
hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt,
- batadv_choose_orig,
- &tt_local_entry->common,
- &tt_local_entry->common.hash_entry);
+ batadv_choose_orig, &tt_local->common,
+ &tt_local->common.hash_entry);
if (unlikely(hash_added != 0)) {
/* remove the reference for the hash */
- batadv_tt_local_entry_free_ref(tt_local_entry);
+ batadv_tt_local_entry_free_ref(tt_local);
goto out;
}
- batadv_tt_local_event(bat_priv, addr, tt_local_entry->common.flags);
-
- /* remove address from global hash if present */
- tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr);
+add_event:
+ batadv_tt_local_event(bat_priv, addr, tt_local->common.flags);
- /* Check whether it is a roaming! */
- if (tt_global_entry) {
+check_roaming:
+ /* Check whether it is a roaming, but don't do anything if the roaming
+ * process has already been handled
+ */
+ if (tt_global && !(tt_global->common.flags & BATADV_TT_CLIENT_ROAM)) {
/* These node are probably going to update their tt table */
- head = &tt_global_entry->orig_list;
+ head = &tt_global->orig_list;
rcu_read_lock();
hlist_for_each_entry_rcu(orig_entry, node, head, list) {
- orig_entry->orig_node->tt_poss_change = true;
-
- batadv_send_roam_adv(bat_priv,
- tt_global_entry->common.addr,
+ batadv_send_roam_adv(bat_priv, tt_global->common.addr,
orig_entry->orig_node);
}
rcu_read_unlock();
- /* The global entry has to be marked as ROAMING and
- * has to be kept for consistency purpose
- */
- tt_global_entry->common.flags |= BATADV_TT_CLIENT_ROAM;
- tt_global_entry->roam_at = jiffies;
+ if (roamed_back) {
+ batadv_tt_global_free(bat_priv, tt_global,
+ "Roaming canceled");
+ tt_global = NULL;
+ } else {
+ /* The global entry has to be marked as ROAMING and
+ * has to be kept for consistency purpose
+ */
+ tt_global->common.flags |= BATADV_TT_CLIENT_ROAM;
+ tt_global->roam_at = jiffies;
+ }
}
+
out:
- if (tt_local_entry)
- batadv_tt_local_entry_free_ref(tt_local_entry);
- if (tt_global_entry)
- batadv_tt_global_entry_free_ref(tt_global_entry);
+ if (tt_local)
+ batadv_tt_local_entry_free_ref(tt_local);
+ if (tt_global)
+ batadv_tt_global_entry_free_ref(tt_global);
}
static void batadv_tt_realloc_packet_buff(unsigned char **packet_buff,
@@ -434,22 +476,10 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
struct hlist_node *node;
struct hlist_head *head;
uint32_t i;
- int ret = 0;
-
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
- net_dev->name);
- goto out;
- }
- if (primary_if->if_status != BATADV_IF_ACTIVE) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - primary interface not active\n",
- net_dev->name);
+ primary_if = batadv_seq_print_text_primary_if_get(seq);
+ if (!primary_if)
goto out;
- }
seq_printf(seq,
"Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n",
@@ -479,7 +509,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
- return ret;
+ return 0;
}
static void
@@ -501,24 +531,57 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
tt_local_entry->common.addr, message);
}
-void batadv_tt_local_remove(struct batadv_priv *bat_priv, const uint8_t *addr,
- const char *message, bool roaming)
+/**
+ * batadv_tt_local_remove - logically remove an entry from the local table
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: the MAC address of the client to remove
+ * @message: message to append to the log on deletion
+ * @roaming: true if the deletion is due to a roaming event
+ *
+ * Returns the flags assigned to the local entry before being deleted
+ */
+uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
+ const uint8_t *addr, const char *message,
+ bool roaming)
{
- struct batadv_tt_local_entry *tt_local_entry = NULL;
- uint16_t flags;
+ struct batadv_tt_local_entry *tt_local_entry;
+ uint16_t flags, curr_flags = BATADV_NO_FLAGS;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr);
if (!tt_local_entry)
goto out;
+ curr_flags = tt_local_entry->common.flags;
+
flags = BATADV_TT_CLIENT_DEL;
- if (roaming)
+ /* if this global entry addition is due to a roaming, the node has to
+ * mark the local entry as "roamed" in order to correctly reroute
+ * packets later
+ */
+ if (roaming) {
flags |= BATADV_TT_CLIENT_ROAM;
+ /* mark the local client as ROAMed */
+ tt_local_entry->common.flags |= BATADV_TT_CLIENT_ROAM;
+ }
+
+ if (!(tt_local_entry->common.flags & BATADV_TT_CLIENT_NEW)) {
+ batadv_tt_local_set_pending(bat_priv, tt_local_entry, flags,
+ message);
+ goto out;
+ }
+ /* if this client has been added right now, it is possible to
+ * immediately purge it
+ */
+ batadv_tt_local_event(bat_priv, tt_local_entry->common.addr,
+ curr_flags | BATADV_TT_CLIENT_DEL);
+ hlist_del_rcu(&tt_local_entry->common.hash_entry);
+ batadv_tt_local_entry_free_ref(tt_local_entry);
- batadv_tt_local_set_pending(bat_priv, tt_local_entry, flags, message);
out:
if (tt_local_entry)
batadv_tt_local_entry_free_ref(tt_local_entry);
+
+ return curr_flags;
}
static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
@@ -721,12 +784,23 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv,
const unsigned char *tt_addr, uint8_t flags,
uint8_t ttvn)
{
- struct batadv_tt_global_entry *tt_global_entry = NULL;
+ struct batadv_tt_global_entry *tt_global_entry;
+ struct batadv_tt_local_entry *tt_local_entry;
int ret = 0;
int hash_added;
struct batadv_tt_common_entry *common;
+ uint16_t local_flags;
tt_global_entry = batadv_tt_global_hash_find(bat_priv, tt_addr);
+ tt_local_entry = batadv_tt_local_hash_find(bat_priv, tt_addr);
+
+ /* if the node already has a local client for this entry, it has to wait
+ * for a roaming advertisement instead of manually messing up the global
+ * table
+ */
+ if ((flags & BATADV_TT_CLIENT_TEMP) && tt_local_entry &&
+ !(tt_local_entry->common.flags & BATADV_TT_CLIENT_NEW))
+ goto out;
if (!tt_global_entry) {
tt_global_entry = kzalloc(sizeof(*tt_global_entry), GFP_ATOMIC);
@@ -738,6 +812,12 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv,
common->flags = flags;
tt_global_entry->roam_at = 0;
+ /* node must store current time in case of roaming. This is
+ * needed to purge this entry out on timeout (if nobody claims
+ * it)
+ */
+ if (flags & BATADV_TT_CLIENT_ROAM)
+ tt_global_entry->roam_at = jiffies;
atomic_set(&common->refcount, 2);
common->added_at = jiffies;
@@ -755,19 +835,37 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv,
goto out_remove;
}
} else {
+ common = &tt_global_entry->common;
/* If there is already a global entry, we can use this one for
* our processing.
- * But if we are trying to add a temporary client we can exit
- * directly because the temporary information should never
- * override any already known client state (whatever it is)
+ * But if we are trying to add a temporary client then here are
+ * two options at this point:
+ * 1) the global client is not a temporary client: the global
+ * client has to be left as it is, temporary information
+ * should never override any already known client state
+ * 2) the global client is a temporary client: purge the
+ * originator list and add the new one orig_entry
*/
- if (flags & BATADV_TT_CLIENT_TEMP)
- goto out;
+ if (flags & BATADV_TT_CLIENT_TEMP) {
+ if (!(common->flags & BATADV_TT_CLIENT_TEMP))
+ goto out;
+ if (batadv_tt_global_entry_has_orig(tt_global_entry,
+ orig_node))
+ goto out_remove;
+ batadv_tt_global_del_orig_list(tt_global_entry);
+ goto add_orig_entry;
+ }
/* if the client was temporary added before receiving the first
* OGM announcing it, we have to clear the TEMP flag
*/
- tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_TEMP;
+ common->flags &= ~BATADV_TT_CLIENT_TEMP;
+
+ /* the change can carry possible "attribute" flags like the
+ * TT_CLIENT_WIFI, therefore they have to be copied in the
+ * client entry
+ */
+ tt_global_entry->common.flags |= flags;
/* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only
* one originator left in the list and we previously received a
@@ -776,33 +874,81 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv,
* We should first delete the old originator before adding the
* new one.
*/
- if (tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM) {
+ if (common->flags & BATADV_TT_CLIENT_ROAM) {
batadv_tt_global_del_orig_list(tt_global_entry);
- tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_ROAM;
+ common->flags &= ~BATADV_TT_CLIENT_ROAM;
tt_global_entry->roam_at = 0;
}
}
+add_orig_entry:
/* add the new orig_entry (if needed) or update it */
batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn);
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new global tt entry: %pM (via %pM)\n",
- tt_global_entry->common.addr, orig_node->orig);
+ common->addr, orig_node->orig);
+ ret = 1;
out_remove:
+
/* remove address from local hash if present */
- batadv_tt_local_remove(bat_priv, tt_global_entry->common.addr,
- "global tt received",
- flags & BATADV_TT_CLIENT_ROAM);
- ret = 1;
+ local_flags = batadv_tt_local_remove(bat_priv, tt_addr,
+ "global tt received",
+ !!(flags & BATADV_TT_CLIENT_ROAM));
+ tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI;
+
+ if (!(flags & BATADV_TT_CLIENT_ROAM))
+ /* this is a normal global add. Therefore the client is not in a
+ * roaming state anymore.
+ */
+ tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_ROAM;
+
out:
if (tt_global_entry)
batadv_tt_global_entry_free_ref(tt_global_entry);
+ if (tt_local_entry)
+ batadv_tt_local_entry_free_ref(tt_local_entry);
return ret;
}
-/* print all orig nodes who announce the address for this global entry.
- * it is assumed that the caller holds rcu_read_lock();
+/* batadv_transtable_best_orig - Get best originator list entry from tt entry
+ * @tt_global_entry: global translation table entry to be analyzed
+ *
+ * This functon assumes the caller holds rcu_read_lock().
+ * Returns best originator list entry or NULL on errors.
+ */
+static struct batadv_tt_orig_list_entry *
+batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry)
+{
+ struct batadv_neigh_node *router = NULL;
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL;
+ int best_tq = 0;
+
+ head = &tt_global_entry->orig_list;
+ hlist_for_each_entry_rcu(orig_entry, node, head, list) {
+ router = batadv_orig_node_get_router(orig_entry->orig_node);
+ if (!router)
+ continue;
+
+ if (router->tq_avg > best_tq) {
+ best_entry = orig_entry;
+ best_tq = router->tq_avg;
+ }
+
+ batadv_neigh_node_free_ref(router);
+ }
+
+ return best_entry;
+}
+
+/* batadv_tt_global_print_entry - print all orig nodes who announce the address
+ * for this global entry
+ * @tt_global_entry: global translation table entry to be printed
+ * @seq: debugfs table seq_file struct
+ *
+ * This functon assumes the caller holds rcu_read_lock().
*/
static void
batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
@@ -810,21 +956,37 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
{
struct hlist_head *head;
struct hlist_node *node;
- struct batadv_tt_orig_list_entry *orig_entry;
+ struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
struct batadv_tt_common_entry *tt_common_entry;
uint16_t flags;
uint8_t last_ttvn;
tt_common_entry = &tt_global_entry->common;
+ flags = tt_common_entry->flags;
+
+ best_entry = batadv_transtable_best_orig(tt_global_entry);
+ if (best_entry) {
+ last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn);
+ seq_printf(seq, " %c %pM (%3u) via %pM (%3u) [%c%c%c]\n",
+ '*', tt_global_entry->common.addr,
+ best_entry->ttvn, best_entry->orig_node->orig,
+ last_ttvn,
+ (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
+ (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
+ (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.'));
+ }
head = &tt_global_entry->orig_list;
hlist_for_each_entry_rcu(orig_entry, node, head, list) {
- flags = tt_common_entry->flags;
+ if (best_entry == orig_entry)
+ continue;
+
last_ttvn = atomic_read(&orig_entry->orig_node->last_ttvn);
- seq_printf(seq, " * %pM (%3u) via %pM (%3u) [%c%c%c]\n",
- tt_global_entry->common.addr, orig_entry->ttvn,
- orig_entry->orig_node->orig, last_ttvn,
+ seq_printf(seq, " %c %pM (%3u) via %pM (%3u) [%c%c%c]\n",
+ '+', tt_global_entry->common.addr,
+ orig_entry->ttvn, orig_entry->orig_node->orig,
+ last_ttvn,
(flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
(flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
(flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.'));
@@ -842,22 +1004,10 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
struct hlist_node *node;
struct hlist_head *head;
uint32_t i;
- int ret = 0;
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
- net_dev->name);
- goto out;
- }
-
- if (primary_if->if_status != BATADV_IF_ACTIVE) {
- ret = seq_printf(seq,
- "BATMAN mesh %s disabled - primary interface not active\n",
- net_dev->name);
+ primary_if = batadv_seq_print_text_primary_if_get(seq);
+ if (!primary_if)
goto out;
- }
seq_printf(seq,
"Globally announced TT entries received via the mesh %s\n",
@@ -881,7 +1031,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
- return ret;
+ return 0;
}
/* deletes the orig list of a tt_global_entry */
@@ -927,21 +1077,6 @@ batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv,
spin_unlock_bh(&tt_global_entry->list_lock);
}
-static void
-batadv_tt_global_del_struct(struct batadv_priv *bat_priv,
- struct batadv_tt_global_entry *tt_global_entry,
- const char *message)
-{
- batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Deleting global tt entry %pM: %s\n",
- tt_global_entry->common.addr, message);
-
- batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
- batadv_choose_orig, tt_global_entry->common.addr);
- batadv_tt_global_entry_free_ref(tt_global_entry);
-
-}
-
/* If the client is to be deleted, we check if it is the last origantor entry
* within tt_global entry. If yes, we set the BATADV_TT_CLIENT_ROAM flag and the
* timer, otherwise we simply remove the originator scheduled for deletion.
@@ -990,7 +1125,7 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
const unsigned char *addr,
const char *message, bool roaming)
{
- struct batadv_tt_global_entry *tt_global_entry = NULL;
+ struct batadv_tt_global_entry *tt_global_entry;
struct batadv_tt_local_entry *local_entry = NULL;
tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr);
@@ -1002,8 +1137,8 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
orig_node, message);
if (hlist_empty(&tt_global_entry->orig_list))
- batadv_tt_global_del_struct(bat_priv, tt_global_entry,
- message);
+ batadv_tt_global_free(bat_priv, tt_global_entry,
+ message);
goto out;
}
@@ -1026,7 +1161,7 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
if (local_entry) {
/* local entry exists, case 2: client roamed to us. */
batadv_tt_global_del_orig_list(tt_global_entry);
- batadv_tt_global_del_struct(bat_priv, tt_global_entry, message);
+ batadv_tt_global_free(bat_priv, tt_global_entry, message);
} else
/* no local entry exists, case 1: check for roaming */
batadv_tt_global_del_roaming(bat_priv, tt_global_entry,
@@ -1197,15 +1332,12 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
struct batadv_tt_local_entry *tt_local_entry = NULL;
struct batadv_tt_global_entry *tt_global_entry = NULL;
struct batadv_orig_node *orig_node = NULL;
- struct batadv_neigh_node *router = NULL;
- struct hlist_head *head;
- struct hlist_node *node;
- struct batadv_tt_orig_list_entry *orig_entry;
- int best_tq;
+ struct batadv_tt_orig_list_entry *best_entry;
if (src && atomic_read(&bat_priv->ap_isolation)) {
tt_local_entry = batadv_tt_local_hash_find(bat_priv, src);
- if (!tt_local_entry)
+ if (!tt_local_entry ||
+ (tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING))
goto out;
}
@@ -1220,25 +1352,15 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
_batadv_is_ap_isolated(tt_local_entry, tt_global_entry))
goto out;
- best_tq = 0;
-
rcu_read_lock();
- head = &tt_global_entry->orig_list;
- hlist_for_each_entry_rcu(orig_entry, node, head, list) {
- router = batadv_orig_node_get_router(orig_entry->orig_node);
- if (!router)
- continue;
-
- if (router->tq_avg > best_tq) {
- orig_node = orig_entry->orig_node;
- best_tq = router->tq_avg;
- }
- batadv_neigh_node_free_ref(router);
- }
+ best_entry = batadv_transtable_best_orig(tt_global_entry);
/* found anything? */
+ if (best_entry)
+ orig_node = best_entry->orig_node;
if (orig_node && !atomic_inc_not_zero(&orig_node->refcount))
orig_node = NULL;
rcu_read_unlock();
+
out:
if (tt_global_entry)
batadv_tt_global_entry_free_ref(tt_global_entry);
@@ -1471,11 +1593,11 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
tt_tot = tt_len / sizeof(struct batadv_tt_change);
len = tt_query_size + tt_len;
- skb = dev_alloc_skb(len + ETH_HLEN);
+ skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
if (!skb)
goto out;
- skb_reserve(skb, ETH_HLEN);
+ skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len);
tt_response->ttvn = ttvn;
@@ -1496,7 +1618,7 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
memcpy(tt_change->addr, tt_common_entry->addr,
ETH_ALEN);
- tt_change->flags = BATADV_NO_FLAGS;
+ tt_change->flags = tt_common_entry->flags;
tt_count++;
tt_change++;
@@ -1520,7 +1642,6 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
{
struct sk_buff *skb = NULL;
struct batadv_tt_query_packet *tt_request;
- struct batadv_neigh_node *neigh_node = NULL;
struct batadv_hard_iface *primary_if;
struct batadv_tt_req_node *tt_req_node = NULL;
int ret = 1;
@@ -1537,11 +1658,11 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
if (!tt_req_node)
goto out;
- skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN);
+ skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN + NET_IP_ALIGN);
if (!skb)
goto out;
- skb_reserve(skb, ETH_HLEN);
+ skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
tt_req_len = sizeof(*tt_request);
tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len);
@@ -1558,23 +1679,15 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
if (full_table)
tt_request->flags |= BATADV_TT_FULL_TABLE;
- neigh_node = batadv_orig_node_get_router(dst_orig_node);
- if (!neigh_node)
- goto out;
-
- batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Sending TT_REQUEST to %pM via %pM [%c]\n",
- dst_orig_node->orig, neigh_node->addr,
- (full_table ? 'F' : '.'));
+ batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending TT_REQUEST to %pM [%c]\n",
+ dst_orig_node->orig, (full_table ? 'F' : '.'));
batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX);
- batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
- ret = 0;
+ if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL))
+ ret = 0;
out:
- if (neigh_node)
- batadv_neigh_node_free_ref(neigh_node);
if (primary_if)
batadv_hardif_free_ref(primary_if);
if (ret)
@@ -1592,9 +1705,8 @@ static bool
batadv_send_other_tt_response(struct batadv_priv *bat_priv,
struct batadv_tt_query_packet *tt_request)
{
- struct batadv_orig_node *req_dst_orig_node = NULL;
+ struct batadv_orig_node *req_dst_orig_node;
struct batadv_orig_node *res_dst_orig_node = NULL;
- struct batadv_neigh_node *neigh_node = NULL;
struct batadv_hard_iface *primary_if = NULL;
uint8_t orig_ttvn, req_ttvn, ttvn;
int ret = false;
@@ -1620,10 +1732,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
if (!res_dst_orig_node)
goto out;
- neigh_node = batadv_orig_node_get_router(res_dst_orig_node);
- if (!neigh_node)
- goto out;
-
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
goto out;
@@ -1652,11 +1760,11 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
tt_tot = tt_len / sizeof(struct batadv_tt_change);
len = sizeof(*tt_response) + tt_len;
- skb = dev_alloc_skb(len + ETH_HLEN);
+ skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
if (!skb)
goto unlock;
- skb_reserve(skb, ETH_HLEN);
+ skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
packet_pos = skb_put(skb, len);
tt_response = (struct batadv_tt_query_packet *)packet_pos;
tt_response->ttvn = req_ttvn;
@@ -1695,14 +1803,13 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
tt_response->flags |= BATADV_TT_FULL_TABLE;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Sending TT_RESPONSE %pM via %pM for %pM (ttvn: %u)\n",
- res_dst_orig_node->orig, neigh_node->addr,
- req_dst_orig_node->orig, req_ttvn);
+ "Sending TT_RESPONSE %pM for %pM (ttvn: %u)\n",
+ res_dst_orig_node->orig, req_dst_orig_node->orig, req_ttvn);
batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX);
- batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
- ret = true;
+ if (batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL))
+ ret = true;
goto out;
unlock:
@@ -1713,8 +1820,6 @@ out:
batadv_orig_node_free_ref(res_dst_orig_node);
if (req_dst_orig_node)
batadv_orig_node_free_ref(req_dst_orig_node);
- if (neigh_node)
- batadv_neigh_node_free_ref(neigh_node);
if (primary_if)
batadv_hardif_free_ref(primary_if);
if (!ret)
@@ -1727,8 +1832,7 @@ static bool
batadv_send_my_tt_response(struct batadv_priv *bat_priv,
struct batadv_tt_query_packet *tt_request)
{
- struct batadv_orig_node *orig_node = NULL;
- struct batadv_neigh_node *neigh_node = NULL;
+ struct batadv_orig_node *orig_node;
struct batadv_hard_iface *primary_if = NULL;
uint8_t my_ttvn, req_ttvn, ttvn;
int ret = false;
@@ -1753,10 +1857,6 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
if (!orig_node)
goto out;
- neigh_node = batadv_orig_node_get_router(orig_node);
- if (!neigh_node)
- goto out;
-
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
goto out;
@@ -1779,11 +1879,11 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
tt_tot = tt_len / sizeof(struct batadv_tt_change);
len = sizeof(*tt_response) + tt_len;
- skb = dev_alloc_skb(len + ETH_HLEN);
+ skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
if (!skb)
goto unlock;
- skb_reserve(skb, ETH_HLEN);
+ skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
packet_pos = skb_put(skb, len);
tt_response = (struct batadv_tt_query_packet *)packet_pos;
tt_response->ttvn = req_ttvn;
@@ -1820,14 +1920,14 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
tt_response->flags |= BATADV_TT_FULL_TABLE;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Sending TT_RESPONSE to %pM via %pM [%c]\n",
- orig_node->orig, neigh_node->addr,
+ "Sending TT_RESPONSE to %pM [%c]\n",
+ orig_node->orig,
(tt_response->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX);
- batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
- ret = true;
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL))
+ ret = true;
goto out;
unlock:
@@ -1835,8 +1935,6 @@ unlock:
out:
if (orig_node)
batadv_orig_node_free_ref(orig_node);
- if (neigh_node)
- batadv_neigh_node_free_ref(neigh_node);
if (primary_if)
batadv_hardif_free_ref(primary_if);
if (!ret)
@@ -1893,7 +1991,7 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
struct batadv_tt_query_packet *tt_response)
{
- struct batadv_orig_node *orig_node = NULL;
+ struct batadv_orig_node *orig_node;
orig_node = batadv_orig_hash_find(bat_priv, tt_response->src);
if (!orig_node)
@@ -1935,7 +2033,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr)
{
- struct batadv_tt_local_entry *tt_local_entry = NULL;
+ struct batadv_tt_local_entry *tt_local_entry;
bool ret = false;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr);
@@ -1944,7 +2042,8 @@ bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr)
/* Check if the client has been logically deleted (but is kept for
* consistency purpose)
*/
- if (tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING)
+ if ((tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING) ||
+ (tt_local_entry->common.flags & BATADV_TT_CLIENT_ROAM))
goto out;
ret = true;
out:
@@ -1995,10 +2094,6 @@ void batadv_handle_tt_response(struct batadv_priv *bat_priv,
/* Recalculate the CRC for this orig_node and store it */
orig_node->tt_crc = batadv_tt_global_crc(bat_priv, orig_node);
- /* Roaming phase is over: tables are in sync again. I can
- * unset the flag
- */
- orig_node->tt_poss_change = false;
out:
if (orig_node)
batadv_orig_node_free_ref(orig_node);
@@ -2104,7 +2199,6 @@ unlock:
static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
struct batadv_orig_node *orig_node)
{
- struct batadv_neigh_node *neigh_node = NULL;
struct sk_buff *skb = NULL;
struct batadv_roam_adv_packet *roam_adv_packet;
int ret = 1;
@@ -2117,11 +2211,11 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
if (!batadv_tt_check_roam_count(bat_priv, client))
goto out;
- skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN);
+ skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN + NET_IP_ALIGN);
if (!skb)
goto out;
- skb_reserve(skb, ETH_HLEN);
+ skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len);
@@ -2137,23 +2231,17 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
memcpy(roam_adv_packet->dst, orig_node->orig, ETH_ALEN);
memcpy(roam_adv_packet->client, client, ETH_ALEN);
- neigh_node = batadv_orig_node_get_router(orig_node);
- if (!neigh_node)
- goto out;
-
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Sending ROAMING_ADV to %pM (client %pM) via %pM\n",
- orig_node->orig, client, neigh_node->addr);
+ "Sending ROAMING_ADV to %pM (client %pM)\n",
+ orig_node->orig, client);
batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX);
- batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
- ret = 0;
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL))
+ ret = 0;
out:
- if (neigh_node)
- batadv_neigh_node_free_ref(neigh_node);
- if (ret)
+ if (ret && skb)
kfree_skb(skb);
return;
}
@@ -2289,7 +2377,6 @@ static int batadv_tt_commit_changes(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Local changes committed, updating to ttvn %u\n",
(uint8_t)atomic_read(&bat_priv->tt.vn));
- bat_priv->tt.poss_change = false;
/* reset the sending counter */
atomic_set(&bat_priv->tt.ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX);
@@ -2401,11 +2488,6 @@ void batadv_tt_update_orig(struct batadv_priv *bat_priv,
*/
if (orig_node->tt_crc != tt_crc)
goto request_table;
-
- /* Roaming phase is over: tables are in sync again. I can
- * unset the flag
- */
- orig_node->tt_poss_change = false;
} else {
/* if we missed more than one change or our tables are not
* in sync anymore -> request fresh tt data
@@ -2438,18 +2520,51 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
if (!tt_global_entry)
goto out;
- ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM;
+ ret = !!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM);
batadv_tt_global_entry_free_ref(tt_global_entry);
out:
return ret;
}
+/**
+ * batadv_tt_local_client_is_roaming - tells whether the client is roaming
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: the MAC address of the local client to query
+ *
+ * Returns true if the local client is known to be roaming (it is not served by
+ * this node anymore) or not. If yes, the client is still present in the table
+ * to keep the latter consistent with the node TTVN
+ */
+bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
+ uint8_t *addr)
+{
+ struct batadv_tt_local_entry *tt_local_entry;
+ bool ret = false;
+
+ tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr);
+ if (!tt_local_entry)
+ goto out;
+
+ ret = tt_local_entry->common.flags & BATADV_TT_CLIENT_ROAM;
+ batadv_tt_local_entry_free_ref(tt_local_entry);
+out:
+ return ret;
+
+}
+
bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
const unsigned char *addr)
{
bool ret = false;
+ /* if the originator is a backbone node (meaning it belongs to the same
+ * LAN of this node) the temporary client must not be added because to
+ * reach such destination the node must use the LAN instead of the mesh
+ */
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig))
+ goto out;
+
if (!batadv_tt_global_add(bat_priv, orig_node, addr,
BATADV_TT_CLIENT_TEMP,
atomic_read(&orig_node->last_ttvn)))
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 811fffd4760c..46d4451a59ee 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -24,9 +24,9 @@ int batadv_tt_len(int changes_num);
int batadv_tt_init(struct batadv_priv *bat_priv);
void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
int ifindex);
-void batadv_tt_local_remove(struct batadv_priv *bat_priv,
- const uint8_t *addr, const char *message,
- bool roaming);
+uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
+ const uint8_t *addr, const char *message,
+ bool roaming);
int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
void batadv_tt_global_add_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
@@ -59,6 +59,8 @@ int batadv_tt_append_diff(struct batadv_priv *bat_priv,
int packet_min_len);
bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
uint8_t *addr);
+bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
+ uint8_t *addr);
bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
const unsigned char *addr);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 2ed82caacdca..ae9ac9aca8c5 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -28,20 +28,41 @@
(ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \
sizeof(struct batadv_bcast_packet)))
+#ifdef CONFIG_BATMAN_ADV_DAT
+
+/* batadv_dat_addr_t is the type used for all DHT addresses. If it is changed,
+ * BATADV_DAT_ADDR_MAX is changed as well.
+ *
+ * *Please be careful: batadv_dat_addr_t must be UNSIGNED*
+ */
+#define batadv_dat_addr_t uint16_t
+
+#endif /* CONFIG_BATMAN_ADV_DAT */
+
+/**
+ * struct batadv_hard_iface_bat_iv - per hard interface B.A.T.M.A.N. IV data
+ * @ogm_buff: buffer holding the OGM packet
+ * @ogm_buff_len: length of the OGM packet buffer
+ * @ogm_seqno: OGM sequence number - used to identify each OGM
+ */
+struct batadv_hard_iface_bat_iv {
+ unsigned char *ogm_buff;
+ int ogm_buff_len;
+ atomic_t ogm_seqno;
+};
+
struct batadv_hard_iface {
struct list_head list;
int16_t if_num;
char if_status;
struct net_device *net_dev;
- atomic_t seqno;
atomic_t frag_seqno;
- unsigned char *packet_buff;
- int packet_len;
struct kobject *hardif_obj;
atomic_t refcount;
struct packet_type batman_adv_ptype;
struct net_device *soft_iface;
struct rcu_head rcu;
+ struct batadv_hard_iface_bat_iv bat_iv;
};
/**
@@ -63,6 +84,9 @@ struct batadv_orig_node {
uint8_t orig[ETH_ALEN];
uint8_t primary_addr[ETH_ALEN];
struct batadv_neigh_node __rcu *router; /* rcu protected pointer */
+#ifdef CONFIG_BATMAN_ADV_DAT
+ batadv_dat_addr_t dat_addr;
+#endif
unsigned long *bcast_own;
uint8_t *bcast_own_sum;
unsigned long last_seen;
@@ -77,13 +101,6 @@ struct batadv_orig_node {
spinlock_t tt_buff_lock; /* protects tt_buff */
atomic_t tt_size;
bool tt_initialised;
- /* The tt_poss_change flag is used to detect an ongoing roaming phase.
- * If true, then I sent a Roaming_adv to this orig_node and I have to
- * inspect every packet directed to it to check whether it is still
- * the true destination or not. This flag will be reset to false as
- * soon as I receive a new TTVN from this orig_node
- */
- bool tt_poss_change;
uint32_t last_real_seqno;
uint8_t last_ttl;
DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
@@ -139,7 +156,7 @@ struct batadv_neigh_node {
#ifdef CONFIG_BATMAN_ADV_BLA
struct batadv_bcast_duplist_entry {
uint8_t orig[ETH_ALEN];
- uint16_t crc;
+ __be32 crc;
unsigned long entrytime;
};
#endif
@@ -162,6 +179,13 @@ enum batadv_counters {
BATADV_CNT_TT_RESPONSE_RX,
BATADV_CNT_TT_ROAM_ADV_TX,
BATADV_CNT_TT_ROAM_ADV_RX,
+#ifdef CONFIG_BATMAN_ADV_DAT
+ BATADV_CNT_DAT_GET_TX,
+ BATADV_CNT_DAT_GET_RX,
+ BATADV_CNT_DAT_PUT_TX,
+ BATADV_CNT_DAT_PUT_RX,
+ BATADV_CNT_DAT_CACHED_REPLY_TX,
+#endif
BATADV_CNT_NUM,
};
@@ -181,7 +205,6 @@ struct batadv_priv_tt {
atomic_t vn;
atomic_t ogm_append_cnt;
atomic_t local_changes;
- bool poss_change;
struct list_head changes_list;
struct batadv_hashtable *local_hash;
struct batadv_hashtable *global_hash;
@@ -205,6 +228,8 @@ struct batadv_priv_bla {
struct batadv_hashtable *backbone_hash;
struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE];
int bcast_duplist_curr;
+ /* protects bcast_duplist and bcast_duplist_curr */
+ spinlock_t bcast_duplist_lock;
struct batadv_bla_claim_dst claim_dest;
struct delayed_work work;
};
@@ -226,6 +251,20 @@ struct batadv_priv_vis {
struct batadv_vis_info *my_info;
};
+/**
+ * struct batadv_priv_dat - per mesh interface DAT private data
+ * @addr: node DAT address
+ * @hash: hashtable representing the local ARP cache
+ * @work: work queue callback item for cache purging
+ */
+#ifdef CONFIG_BATMAN_ADV_DAT
+struct batadv_priv_dat {
+ batadv_dat_addr_t addr;
+ struct batadv_hashtable *hash;
+ struct delayed_work work;
+};
+#endif
+
struct batadv_priv {
atomic_t mesh_state;
struct net_device_stats stats;
@@ -235,6 +274,9 @@ struct batadv_priv {
atomic_t fragmentation; /* boolean */
atomic_t ap_isolation; /* boolean */
atomic_t bridge_loop_avoidance; /* boolean */
+#ifdef CONFIG_BATMAN_ADV_DAT
+ atomic_t distributed_arp_table; /* boolean */
+#endif
atomic_t vis_mode; /* VIS_TYPE_* */
atomic_t gw_mode; /* GW_MODE_* */
atomic_t gw_sel_class; /* uint */
@@ -253,7 +295,7 @@ struct batadv_priv {
struct hlist_head forw_bcast_list;
struct batadv_hashtable *orig_hash;
spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
- spinlock_t forw_bcast_list_lock; /* protects */
+ spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */
struct delayed_work orig_work;
struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */
struct batadv_algo_ops *bat_algo_ops;
@@ -263,6 +305,9 @@ struct batadv_priv {
struct batadv_priv_gw gw;
struct batadv_priv_tt tt;
struct batadv_priv_vis vis;
+#ifdef CONFIG_BATMAN_ADV_DAT
+ struct batadv_priv_dat dat;
+#endif
};
struct batadv_socket_client {
@@ -316,6 +361,7 @@ struct batadv_backbone_gw {
struct hlist_node hash_entry;
struct batadv_priv *bat_priv;
unsigned long lasttime; /* last time we heard of this backbone gw */
+ atomic_t wait_periods;
atomic_t request_sent;
atomic_t refcount;
struct rcu_head rcu;
@@ -435,4 +481,36 @@ struct batadv_algo_ops {
void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
};
+/**
+ * struct batadv_dat_entry - it is a single entry of batman-adv ARP backend. It
+ * is used to stored ARP entries needed for the global DAT cache
+ * @ip: the IPv4 corresponding to this DAT/ARP entry
+ * @mac_addr: the MAC address associated to the stored IPv4
+ * @last_update: time in jiffies when this entry was refreshed last time
+ * @hash_entry: hlist node for batadv_priv_dat::hash
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
+struct batadv_dat_entry {
+ __be32 ip;
+ uint8_t mac_addr[ETH_ALEN];
+ unsigned long last_update;
+ struct hlist_node hash_entry;
+ atomic_t refcount;
+ struct rcu_head rcu;
+};
+
+/**
+ * struct batadv_dat_candidate - candidate destination for DAT operations
+ * @type: the type of the selected candidate. It can one of the following:
+ * - BATADV_DAT_CANDIDATE_NOT_FOUND
+ * - BATADV_DAT_CANDIDATE_ORIG
+ * @orig_node: if type is BATADV_DAT_CANDIDATE_ORIG this field points to the
+ * corresponding originator node structure
+ */
+struct batadv_dat_candidate {
+ int type;
+ struct batadv_orig_node *orig_node;
+};
+
#endif /* _NET_BATMAN_ADV_TYPES_H_ */
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index f39723281ca1..10aff49fcf25 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -291,14 +291,118 @@ out:
return ret;
}
-int batadv_unicast_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv)
+/**
+ * batadv_unicast_push_and_fill_skb - extends the buffer and initializes the
+ * common fields for unicast packets
+ * @skb: packet
+ * @hdr_size: amount of bytes to push at the beginning of the skb
+ * @orig_node: the destination node
+ *
+ * Returns false if the buffer extension was not possible or true otherwise
+ */
+static bool batadv_unicast_push_and_fill_skb(struct sk_buff *skb, int hdr_size,
+ struct batadv_orig_node *orig_node)
+{
+ struct batadv_unicast_packet *unicast_packet;
+ uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
+
+ if (batadv_skb_head_push(skb, hdr_size) < 0)
+ return false;
+
+ unicast_packet = (struct batadv_unicast_packet *)skb->data;
+ unicast_packet->header.version = BATADV_COMPAT_VERSION;
+ /* batman packet type: unicast */
+ unicast_packet->header.packet_type = BATADV_UNICAST;
+ /* set unicast ttl */
+ unicast_packet->header.ttl = BATADV_TTL;
+ /* copy the destination for faster routing */
+ memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN);
+ /* set the destination tt version number */
+ unicast_packet->ttvn = ttvn;
+
+ return true;
+}
+
+/**
+ * batadv_unicast_prepare_skb - encapsulate an skb with a unicast header
+ * @skb: the skb containing the payload to encapsulate
+ * @orig_node: the destination node
+ *
+ * Returns false if the payload could not be encapsulated or true otherwise
+ */
+static bool batadv_unicast_prepare_skb(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node)
+{
+ size_t uni_size = sizeof(struct batadv_unicast_packet);
+ return batadv_unicast_push_and_fill_skb(skb, uni_size, orig_node);
+}
+
+/**
+ * batadv_unicast_4addr_prepare_skb - encapsulate an skb with a unicast4addr
+ * header
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb containing the payload to encapsulate
+ * @orig_node: the destination node
+ * @packet_subtype: the batman 4addr packet subtype to use
+ *
+ * Returns false if the payload could not be encapsulated or true otherwise
+ */
+bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ struct batadv_orig_node *orig,
+ int packet_subtype)
+{
+ struct batadv_hard_iface *primary_if;
+ struct batadv_unicast_4addr_packet *unicast_4addr_packet;
+ bool ret = false;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ goto out;
+
+ /* pull the header space and fill the unicast_packet substructure.
+ * We can do that because the first member of the unicast_4addr_packet
+ * is of type struct unicast_packet
+ */
+ if (!batadv_unicast_push_and_fill_skb(skb,
+ sizeof(*unicast_4addr_packet),
+ orig))
+ goto out;
+
+ unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
+ unicast_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR;
+ memcpy(unicast_4addr_packet->src, primary_if->net_dev->dev_addr,
+ ETH_ALEN);
+ unicast_4addr_packet->subtype = packet_subtype;
+ unicast_4addr_packet->reserved = 0;
+
+ ret = true;
+out:
+ if (primary_if)
+ batadv_hardif_free_ref(primary_if);
+ return ret;
+}
+
+/**
+ * batadv_unicast_generic_send_skb - send an skb as unicast
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: payload to send
+ * @packet_type: the batman unicast packet type to use
+ * @packet_subtype: the batman packet subtype. It is ignored if packet_type is
+ * not BATADV_UNICAT_4ADDR
+ *
+ * Returns 1 in case of error or 0 otherwise
+ */
+int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int packet_type,
+ int packet_subtype)
{
struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
struct batadv_unicast_packet *unicast_packet;
struct batadv_orig_node *orig_node;
struct batadv_neigh_node *neigh_node;
int data_len = skb->len;
- int ret = 1;
+ int ret = NET_RX_DROP;
unsigned int dev_mtu;
/* get routing information */
@@ -324,21 +428,23 @@ find_router:
if (!neigh_node)
goto out;
- if (batadv_skb_head_push(skb, sizeof(*unicast_packet)) < 0)
+ switch (packet_type) {
+ case BATADV_UNICAST:
+ batadv_unicast_prepare_skb(skb, orig_node);
+ break;
+ case BATADV_UNICAST_4ADDR:
+ batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node,
+ packet_subtype);
+ break;
+ default:
+ /* this function supports UNICAST and UNICAST_4ADDR only. It
+ * should never be invoked with any other packet type
+ */
goto out;
+ }
unicast_packet = (struct batadv_unicast_packet *)skb->data;
- unicast_packet->header.version = BATADV_COMPAT_VERSION;
- /* batman packet type: unicast */
- unicast_packet->header.packet_type = BATADV_UNICAST;
- /* set unicast ttl */
- unicast_packet->header.ttl = BATADV_TTL;
- /* copy the destination for faster routing */
- memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN);
- /* set the destination tt version number */
- unicast_packet->ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
-
/* inform the destination node that we are still missing a correct route
* for this client. The destination will receive this packet and will
* try to reroute it because the ttvn contained in the header is less
@@ -348,7 +454,9 @@ find_router:
unicast_packet->ttvn = unicast_packet->ttvn - 1;
dev_mtu = neigh_node->if_incoming->net_dev->mtu;
- if (atomic_read(&bat_priv->fragmentation) &&
+ /* fragmentation mechanism only works for UNICAST (now) */
+ if (packet_type == BATADV_UNICAST &&
+ atomic_read(&bat_priv->fragmentation) &&
data_len + sizeof(*unicast_packet) > dev_mtu) {
/* send frag skb decreases ttl */
unicast_packet->header.ttl++;
@@ -358,16 +466,15 @@ find_router:
goto out;
}
- batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
- ret = 0;
- goto out;
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL))
+ ret = 0;
out:
if (neigh_node)
batadv_neigh_node_free_ref(neigh_node);
if (orig_node)
batadv_orig_node_free_ref(orig_node);
- if (ret == 1)
+ if (ret == NET_RX_DROP)
kfree_skb(skb);
return ret;
}
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
index 1c46e2eb1ef9..61abba58bd8f 100644
--- a/net/batman-adv/unicast.h
+++ b/net/batman-adv/unicast.h
@@ -29,10 +29,44 @@ int batadv_frag_reassemble_skb(struct sk_buff *skb,
struct batadv_priv *bat_priv,
struct sk_buff **new_skb);
void batadv_frag_list_free(struct list_head *head);
-int batadv_unicast_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv);
int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv,
struct batadv_hard_iface *hard_iface,
const uint8_t dstaddr[]);
+bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ int packet_subtype);
+int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int packet_type,
+ int packet_subtype);
+
+
+/**
+ * batadv_unicast_send_skb - send the skb encapsulated in a unicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the payload to send
+ */
+static inline int batadv_unicast_send_skb(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return batadv_unicast_generic_send_skb(bat_priv, skb, BATADV_UNICAST,
+ 0);
+}
+
+/**
+ * batadv_unicast_send_skb - send the skb encapsulated in a unicast4addr packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the payload to send
+ * @packet_subtype: the batman 4addr packet subtype to use
+ */
+static inline int batadv_unicast_4addr_send_skb(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ int packet_subtype)
+{
+ return batadv_unicast_generic_send_skb(bat_priv, skb,
+ BATADV_UNICAST_4ADDR,
+ packet_subtype);
+}
static inline int batadv_frag_can_reassemble(const struct sk_buff *skb, int mtu)
{
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 5abd1454fb07..0f65a9de5f74 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -396,12 +396,12 @@ batadv_add_packet(struct batadv_priv *bat_priv,
return NULL;
len = sizeof(*packet) + vis_info_len;
- info->skb_packet = dev_alloc_skb(len + ETH_HLEN);
+ info->skb_packet = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
if (!info->skb_packet) {
kfree(info);
return NULL;
}
- skb_reserve(info->skb_packet, ETH_HLEN);
+ skb_reserve(info->skb_packet, ETH_HLEN + NET_IP_ALIGN);
packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len);
kref_init(&info->refcount);
@@ -698,15 +698,12 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv)
static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv,
struct batadv_vis_info *info)
{
- struct batadv_neigh_node *router;
struct batadv_hashtable *hash = bat_priv->orig_hash;
struct hlist_node *node;
struct hlist_head *head;
struct batadv_orig_node *orig_node;
struct batadv_vis_packet *packet;
struct sk_buff *skb;
- struct batadv_hard_iface *hard_iface;
- uint8_t dstaddr[ETH_ALEN];
uint32_t i;
@@ -722,30 +719,20 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv,
if (!(orig_node->flags & BATADV_VIS_SERVER))
continue;
- router = batadv_orig_node_get_router(orig_node);
- if (!router)
- continue;
-
/* don't send it if we already received the packet from
* this node.
*/
if (batadv_recv_list_is_in(bat_priv, &info->recv_list,
- orig_node->orig)) {
- batadv_neigh_node_free_ref(router);
+ orig_node->orig))
continue;
- }
memcpy(packet->target_orig, orig_node->orig, ETH_ALEN);
- hard_iface = router->if_incoming;
- memcpy(dstaddr, router->addr, ETH_ALEN);
-
- batadv_neigh_node_free_ref(router);
-
skb = skb_clone(info->skb_packet, GFP_ATOMIC);
- if (skb)
- batadv_send_skb_packet(skb, hard_iface,
- dstaddr);
+ if (!skb)
+ continue;
+ if (!batadv_send_skb_to_orig(skb, orig_node, NULL))
+ kfree_skb(skb);
}
rcu_read_unlock();
}
@@ -755,7 +742,6 @@ static void batadv_unicast_vis_packet(struct batadv_priv *bat_priv,
struct batadv_vis_info *info)
{
struct batadv_orig_node *orig_node;
- struct batadv_neigh_node *router = NULL;
struct sk_buff *skb;
struct batadv_vis_packet *packet;
@@ -765,17 +751,14 @@ static void batadv_unicast_vis_packet(struct batadv_priv *bat_priv,
if (!orig_node)
goto out;
- router = batadv_orig_node_get_router(orig_node);
- if (!router)
+ skb = skb_clone(info->skb_packet, GFP_ATOMIC);
+ if (!skb)
goto out;
- skb = skb_clone(info->skb_packet, GFP_ATOMIC);
- if (skb)
- batadv_send_skb_packet(skb, router->if_incoming, router->addr);
+ if (!batadv_send_skb_to_orig(skb, orig_node, NULL))
+ kfree_skb(skb);
out:
- if (router)
- batadv_neigh_node_free_ref(router);
if (orig_node)
batadv_orig_node_free_ref(orig_node);
}
@@ -873,12 +856,13 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
if (!bat_priv->vis.my_info)
goto err;
- len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN;
+ len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE;
+ len += ETH_HLEN + NET_IP_ALIGN;
bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len);
if (!bat_priv->vis.my_info->skb_packet)
goto free_info;
- skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN);
+ skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN + NET_IP_ALIGN);
tmp_skb = bat_priv->vis.my_info->skb_packet;
packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet));
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 070e8a68cfc6..7c78e2640190 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -313,6 +313,8 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
.ndo_fdb_dump = br_fdb_dump,
+ .ndo_bridge_getlink = br_getlink,
+ .ndo_bridge_setlink = br_setlink,
};
static void br_dev_free(struct net_device *dev)
@@ -356,7 +358,7 @@ void br_dev_setup(struct net_device *dev)
br->bridge_id.prio[0] = 0x80;
br->bridge_id.prio[1] = 0x00;
- memcpy(br->group_addr, br_group_address, ETH_ALEN);
+ memcpy(br->group_addr, eth_reserved_addr_base, ETH_ALEN);
br->stp_enabled = BR_NO_STP;
br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 76f15fda0212..4b34207419b1 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -19,9 +19,6 @@
#include <linux/export.h>
#include "br_private.h"
-/* Bridge group multicast address 802.1d (pg 51). */
-const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
-
/* Hook for brouter */
br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
EXPORT_SYMBOL(br_should_route_hook);
@@ -127,18 +124,6 @@ static int br_handle_local_finish(struct sk_buff *skb)
return 0; /* process further */
}
-/* Does address match the link local multicast address.
- * 01:80:c2:00:00:0X
- */
-static inline int is_link_local(const unsigned char *dest)
-{
- __be16 *a = (__be16 *)dest;
- static const __be16 *b = (const __be16 *)br_group_address;
- static const __be16 m = cpu_to_be16(0xfff0);
-
- return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
-}
-
/*
* Return NULL if skb is handled
* note: already called with rcu_read_lock
@@ -162,7 +147,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
p = br_port_get_rcu(skb->dev);
- if (unlikely(is_link_local(dest))) {
+ if (unlikely(is_link_local_ether_addr(dest))) {
/*
* See IEEE 802.1D Table 7-10 Reserved addresses
*
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 7222fe1d5460..cd8c3a44ab7d 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -85,13 +85,14 @@ static int get_fdb_entries(struct net_bridge *br, void __user *userbuf,
/* called with RTNL */
static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
{
+ struct net *net = dev_net(br->dev);
struct net_device *dev;
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- dev = __dev_get_by_index(dev_net(br->dev), ifindex);
+ dev = __dev_get_by_index(net, ifindex);
if (dev == NULL)
return -EINVAL;
@@ -178,25 +179,25 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
}
case BRCTL_SET_BRIDGE_FORWARD_DELAY:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
return br_set_forward_delay(br, args[1]);
case BRCTL_SET_BRIDGE_HELLO_TIME:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
return br_set_hello_time(br, args[1]);
case BRCTL_SET_BRIDGE_MAX_AGE:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
return br_set_max_age(br, args[1]);
case BRCTL_SET_AGEING_TIME:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
br->ageing_time = clock_t_to_jiffies(args[1]);
@@ -236,14 +237,14 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
}
case BRCTL_SET_BRIDGE_STP_STATE:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
br_stp_set_enabled(br, args[1]);
return 0;
case BRCTL_SET_BRIDGE_PRIORITY:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
spin_lock_bh(&br->lock);
@@ -256,7 +257,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
struct net_bridge_port *p;
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
spin_lock_bh(&br->lock);
@@ -273,7 +274,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
struct net_bridge_port *p;
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
spin_lock_bh(&br->lock);
@@ -330,7 +331,7 @@ static int old_deviceless(struct net *net, void __user *uarg)
{
char buf[IFNAMSIZ];
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (copy_from_user(buf, (void __user *)args[1], IFNAMSIZ))
@@ -360,7 +361,7 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uar
{
char buf[IFNAMSIZ];
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (copy_from_user(buf, uarg, IFNAMSIZ))
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 093f527276a3..65429b99a2a3 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -20,16 +20,43 @@
#include "br_private.h"
#include "br_private_stp.h"
+static inline size_t br_port_info_size(void)
+{
+ return nla_total_size(1) /* IFLA_BRPORT_STATE */
+ + nla_total_size(2) /* IFLA_BRPORT_PRIORITY */
+ + nla_total_size(4) /* IFLA_BRPORT_COST */
+ + nla_total_size(1) /* IFLA_BRPORT_MODE */
+ + nla_total_size(1) /* IFLA_BRPORT_GUARD */
+ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */
+ + 0;
+}
+
static inline size_t br_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ifinfomsg))
- + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
- + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
- + nla_total_size(4) /* IFLA_MASTER */
- + nla_total_size(4) /* IFLA_MTU */
- + nla_total_size(4) /* IFLA_LINK */
- + nla_total_size(1) /* IFLA_OPERSTATE */
- + nla_total_size(1); /* IFLA_PROTINFO */
+ + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ + nla_total_size(4) /* IFLA_MASTER */
+ + nla_total_size(4) /* IFLA_MTU */
+ + nla_total_size(4) /* IFLA_LINK */
+ + nla_total_size(1) /* IFLA_OPERSTATE */
+ + nla_total_size(br_port_info_size()); /* IFLA_PROTINFO */
+}
+
+static int br_port_fill_attrs(struct sk_buff *skb,
+ const struct net_bridge_port *p)
+{
+ u8 mode = !!(p->flags & BR_HAIRPIN_MODE);
+
+ if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) ||
+ nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) ||
+ nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) ||
+ nla_put_u8(skb, IFLA_BRPORT_MODE, mode) ||
+ nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) ||
+ nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)))
+ return -EMSGSIZE;
+
+ return 0;
}
/*
@@ -67,10 +94,18 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
(dev->addr_len &&
nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
(dev->ifindex != dev->iflink &&
- nla_put_u32(skb, IFLA_LINK, dev->iflink)) ||
- (event == RTM_NEWLINK &&
- nla_put_u8(skb, IFLA_PROTINFO, port->state)))
+ nla_put_u32(skb, IFLA_LINK, dev->iflink)))
goto nla_put_failure;
+
+ if (event == RTM_NEWLINK) {
+ struct nlattr *nest
+ = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
+
+ if (nest == NULL || br_port_fill_attrs(skb, port) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+ }
+
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -111,89 +146,133 @@ errout:
/*
* Dump information about all ports, in response to GETLINK
*/
-static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev)
{
- struct net *net = sock_net(skb->sk);
- struct net_device *dev;
- int idx;
-
- idx = 0;
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- struct net_bridge_port *port = br_port_get_rcu(dev);
-
- /* not a bridge port */
- if (!port || idx < cb->args[0])
- goto skip;
-
- if (br_fill_ifinfo(skb, port,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWLINK,
- NLM_F_MULTI) < 0)
- break;
-skip:
- ++idx;
+ int err = 0;
+ struct net_bridge_port *port = br_port_get_rcu(dev);
+
+ /* not a bridge port */
+ if (!port)
+ goto out;
+
+ err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI);
+out:
+ return err;
+}
+
+static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
+ [IFLA_BRPORT_STATE] = { .type = NLA_U8 },
+ [IFLA_BRPORT_COST] = { .type = NLA_U32 },
+ [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
+ [IFLA_BRPORT_MODE] = { .type = NLA_U8 },
+ [IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
+ [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
+};
+
+/* Change the state of the port and notify spanning tree */
+static int br_set_port_state(struct net_bridge_port *p, u8 state)
+{
+ if (state > BR_STATE_BLOCKING)
+ return -EINVAL;
+
+ /* if kernel STP is running, don't allow changes */
+ if (p->br->stp_enabled == BR_KERNEL_STP)
+ return -EBUSY;
+
+ if (!netif_running(p->dev) ||
+ (!netif_carrier_ok(p->dev) && state != BR_STATE_DISABLED))
+ return -ENETDOWN;
+
+ p->state = state;
+ br_log_state(p);
+ br_port_state_selection(p->br);
+ return 0;
+}
+
+/* Set/clear or port flags based on attribute */
+static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[],
+ int attrtype, unsigned long mask)
+{
+ if (tb[attrtype]) {
+ u8 flag = nla_get_u8(tb[attrtype]);
+ if (flag)
+ p->flags |= mask;
+ else
+ p->flags &= ~mask;
}
- rcu_read_unlock();
- cb->args[0] = idx;
+}
+
+/* Process bridge protocol info on port */
+static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
+{
+ int err;
- return skb->len;
+ br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
+ br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
+
+ if (tb[IFLA_BRPORT_COST]) {
+ err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_BRPORT_PRIORITY]) {
+ err = br_stp_set_port_priority(p, nla_get_u16(tb[IFLA_BRPORT_PRIORITY]));
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_BRPORT_STATE]) {
+ err = br_set_port_state(p, nla_get_u8(tb[IFLA_BRPORT_STATE]));
+ if (err)
+ return err;
+ }
+ return 0;
}
-/*
- * Change state of port (ie from forwarding to blocking etc)
- * Used by spanning tree in user space.
- */
-static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+/* Change state and parameters on port. */
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
{
- struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct nlattr *protinfo;
- struct net_device *dev;
struct net_bridge_port *p;
- u8 new_state;
-
- if (nlmsg_len(nlh) < sizeof(*ifm))
- return -EINVAL;
+ struct nlattr *tb[IFLA_BRPORT_MAX];
+ int err;
ifm = nlmsg_data(nlh);
- if (ifm->ifi_family != AF_BRIDGE)
- return -EPFNOSUPPORT;
protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO);
- if (!protinfo || nla_len(protinfo) < sizeof(u8))
- return -EINVAL;
-
- new_state = nla_get_u8(protinfo);
- if (new_state > BR_STATE_BLOCKING)
- return -EINVAL;
-
- dev = __dev_get_by_index(net, ifm->ifi_index);
- if (!dev)
- return -ENODEV;
+ if (!protinfo)
+ return 0;
p = br_port_get_rtnl(dev);
if (!p)
return -EINVAL;
- /* if kernel STP is running, don't allow changes */
- if (p->br->stp_enabled == BR_KERNEL_STP)
- return -EBUSY;
-
- if (!netif_running(dev) ||
- (!netif_carrier_ok(dev) && new_state != BR_STATE_DISABLED))
- return -ENETDOWN;
-
- p->state = new_state;
- br_log_state(p);
+ if (protinfo->nla_type & NLA_F_NESTED) {
+ err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
+ protinfo, ifla_brport_policy);
+ if (err)
+ return err;
+
+ spin_lock_bh(&p->br->lock);
+ err = br_setport(p, tb);
+ spin_unlock_bh(&p->br->lock);
+ } else {
+ /* Binary compatability with old RSTP */
+ if (nla_len(protinfo) < sizeof(u8))
+ return -EINVAL;
- spin_lock_bh(&p->br->lock);
- br_port_state_selection(p->br);
- spin_unlock_bh(&p->br->lock);
+ spin_lock_bh(&p->br->lock);
+ err = br_set_port_state(p, nla_get_u8(protinfo));
+ spin_unlock_bh(&p->br->lock);
+ }
- br_ifinfo_notify(RTM_NEWLINK, p);
+ if (err == 0)
+ br_ifinfo_notify(RTM_NEWLINK, p);
- return 0;
+ return err;
}
static int br_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -218,29 +297,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
int __init br_netlink_init(void)
{
- int err;
-
- err = rtnl_link_register(&br_link_ops);
- if (err < 0)
- goto err1;
-
- err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL,
- br_dump_ifinfo, NULL);
- if (err)
- goto err2;
- err = __rtnl_register(PF_BRIDGE, RTM_SETLINK,
- br_rtm_setlink, NULL, NULL);
- if (err)
- goto err3;
-
- return 0;
-
-err3:
- rtnl_unregister_all(PF_BRIDGE);
-err2:
- rtnl_link_unregister(&br_link_ops);
-err1:
- return err;
+ return rtnl_link_register(&br_link_ops);
}
void __exit br_netlink_fini(void)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 9b278c4ebee1..eb9cd42146a5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -135,6 +135,8 @@ struct net_bridge_port
unsigned long flags;
#define BR_HAIRPIN_MODE 0x00000001
+#define BR_BPDU_GUARD 0x00000002
+#define BR_ROOT_BLOCK 0x00000004
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
u32 multicast_startup_queries_sent;
@@ -158,7 +160,9 @@ struct net_bridge_port
static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
{
- struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data);
+ struct net_bridge_port *port =
+ rcu_dereference_rtnl(dev->rx_handler_data);
+
return br_port_exists(dev) ? port : NULL;
}
@@ -288,7 +292,6 @@ struct br_input_skb_cb {
pr_debug("%s: " format, (br)->dev->name, ##args)
extern struct notifier_block br_device_notifier;
-extern const u8 br_group_address[ETH_ALEN];
/* called under bridge lock */
static inline int br_is_root_bridge(const struct net_bridge *br)
@@ -553,6 +556,9 @@ extern struct rtnl_link_ops br_link_ops;
extern int br_netlink_init(void);
extern void br_netlink_fini(void);
extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
+extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
+extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev);
#ifdef CONFIG_SYSFS
/* br_sysfs_if.c */
@@ -566,10 +572,10 @@ extern void br_sysfs_delbr(struct net_device *dev);
#else
-#define br_sysfs_addif(p) (0)
-#define br_sysfs_renameif(p) (0)
-#define br_sysfs_addbr(dev) (0)
-#define br_sysfs_delbr(dev) do { } while(0)
+static inline int br_sysfs_addif(struct net_bridge_port *p) { return 0; }
+static inline int br_sysfs_renameif(struct net_bridge_port *p) { return 0; }
+static inline int br_sysfs_addbr(struct net_device *dev) { return 0; }
+static inline void br_sysfs_delbr(struct net_device *dev) { return; }
#endif /* CONFIG_SYSFS */
#endif
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index af9a12099ba4..b01849a74310 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -100,6 +100,21 @@ static int br_should_become_root_port(const struct net_bridge_port *p,
return 0;
}
+static void br_root_port_block(const struct net_bridge *br,
+ struct net_bridge_port *p)
+{
+
+ br_notice(br, "port %u(%s) tried to become root port (blocked)",
+ (unsigned int) p->port_no, p->dev->name);
+
+ p->state = BR_STATE_LISTENING;
+ br_log_state(p);
+ br_ifinfo_notify(RTM_NEWLINK, p);
+
+ if (br->forward_delay > 0)
+ mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
+}
+
/* called under bridge lock */
static void br_root_selection(struct net_bridge *br)
{
@@ -107,7 +122,12 @@ static void br_root_selection(struct net_bridge *br)
u16 root_port = 0;
list_for_each_entry(p, &br->port_list, list) {
- if (br_should_become_root_port(p, root_port))
+ if (!br_should_become_root_port(p, root_port))
+ continue;
+
+ if (p->flags & BR_ROOT_BLOCK)
+ br_root_port_block(br, p);
+ else
root_port = p->port_no;
}
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index fd30a6022dea..7f884e3fb955 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -170,6 +170,13 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
if (!ether_addr_equal(dest, br->group_addr))
goto out;
+ if (p->flags & BR_BPDU_GUARD) {
+ br_notice(br, "BPDU received on blocked port %u(%s)\n",
+ (unsigned int) p->port_no, p->dev->name);
+ br_stp_disable_port(p);
+ goto out;
+ }
+
buf = skb_pull(skb, 3);
if (buf[0] == BPDU_TYPE_CONFIG) {
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index c5c059333eab..5913a3a0047b 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -14,6 +14,7 @@
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
#include <linux/if_bridge.h>
#include <linux/rtnetlink.h>
#include <linux/spinlock.h>
@@ -36,7 +37,7 @@ static ssize_t store_bridge_parm(struct device *d,
unsigned long val;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
val = simple_strtoul(buf, &endp, 0);
@@ -132,7 +133,7 @@ static ssize_t store_stp_state(struct device *d,
char *endp;
unsigned long val;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
val = simple_strtoul(buf, &endp, 0);
@@ -165,7 +166,7 @@ static ssize_t store_group_fwd_mask(struct device *d,
char *endp;
unsigned long val;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
val = simple_strtoul(buf, &endp, 0);
@@ -297,23 +298,18 @@ static ssize_t store_group_addr(struct device *d,
const char *buf, size_t len)
{
struct net_bridge *br = to_bridge(d);
- unsigned int new_addr[6];
+ u8 new_addr[6];
int i;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
- if (sscanf(buf, "%x:%x:%x:%x:%x:%x",
+ if (sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
&new_addr[0], &new_addr[1], &new_addr[2],
&new_addr[3], &new_addr[4], &new_addr[5]) != 6)
return -EINVAL;
- /* Must be 01:80:c2:00:00:0X */
- for (i = 0; i < 5; i++)
- if (new_addr[i] != br_group_address[i])
- return -EINVAL;
-
- if (new_addr[5] & ~0xf)
+ if (!is_link_local_ether_addr(new_addr))
return -EINVAL;
if (new_addr[5] == 1 || /* 802.3x Pause address */
@@ -337,7 +333,7 @@ static ssize_t store_flush(struct device *d,
{
struct net_bridge *br = to_bridge(d);
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
br_fdb_flush(br);
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 13b36bdc76a7..7ff95ba21982 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -34,6 +34,28 @@ const struct brport_attribute brport_attr_##_name = { \
.store = _store, \
};
+#define BRPORT_ATTR_FLAG(_name, _mask) \
+static ssize_t show_##_name(struct net_bridge_port *p, char *buf) \
+{ \
+ return sprintf(buf, "%d\n", !!(p->flags & _mask)); \
+} \
+static int store_##_name(struct net_bridge_port *p, unsigned long v) \
+{ \
+ unsigned long flags = p->flags; \
+ if (v) \
+ flags |= _mask; \
+ else \
+ flags &= ~_mask; \
+ if (flags != p->flags) { \
+ p->flags = flags; \
+ br_ifinfo_notify(RTM_NEWLINK, p); \
+ } \
+ return 0; \
+} \
+static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR, \
+ show_##_name, store_##_name)
+
+
static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->path_cost);
@@ -133,21 +155,9 @@ static int store_flush(struct net_bridge_port *p, unsigned long v)
}
static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
-static ssize_t show_hairpin_mode(struct net_bridge_port *p, char *buf)
-{
- int hairpin_mode = (p->flags & BR_HAIRPIN_MODE) ? 1 : 0;
- return sprintf(buf, "%d\n", hairpin_mode);
-}
-static int store_hairpin_mode(struct net_bridge_port *p, unsigned long v)
-{
- if (v)
- p->flags |= BR_HAIRPIN_MODE;
- else
- p->flags &= ~BR_HAIRPIN_MODE;
- return 0;
-}
-static BRPORT_ATTR(hairpin_mode, S_IRUGO | S_IWUSR,
- show_hairpin_mode, store_hairpin_mode);
+BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
+BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
+BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -181,6 +191,8 @@ static const struct brport_attribute *brport_attrs[] = {
&brport_attr_hold_timer,
&brport_attr_flush,
&brport_attr_hairpin_mode,
+ &brport_attr_bpdu_guard,
+ &brport_attr_root_block,
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
&brport_attr_multicast_router,
#endif
@@ -209,7 +221,7 @@ static ssize_t brport_store(struct kobject * kobj,
char *endp;
unsigned long val;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(dev_net(p->dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
val = simple_strtoul(buf, &endp, 0);
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 44f270fc2d06..a376ec1ac0a7 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -515,8 +515,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
client_layer : NULL);
}
- if (req != NULL)
- kfree(req);
+ kfree(req);
spin_unlock_bh(&cfctrl->info_list_lock);
}
diff --git a/net/can/gw.c b/net/can/gw.c
index 1f5c9785a262..574dda78eb0f 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -751,6 +751,9 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
struct cgw_job *gwj;
int err = 0;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (nlmsg_len(nlh) < sizeof(*r))
return -EINVAL;
@@ -839,6 +842,9 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct can_can_gw ccgw;
int err = 0;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (nlmsg_len(nlh) < sizeof(*r))
return -EINVAL;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 159aa8bef9e7..3ef1759403b4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2300,10 +2300,11 @@ restart:
mutex_unlock(&con->mutex);
return;
} else {
- con->ops->put(con);
dout("con_work %p FAILED to back off %lu\n", con,
con->delay);
+ set_bit(CON_FLAG_BACKOFF, &con->flags);
}
+ goto done;
}
if (con->state == CON_STATE_STANDBY) {
@@ -2749,7 +2750,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
msg = con->ops->alloc_msg(con, hdr, skip);
mutex_lock(&con->mutex);
if (con->state != CON_STATE_OPEN) {
- ceph_msg_put(msg);
+ if (msg)
+ ceph_msg_put(msg);
return -EAGAIN;
}
con->in_msg = msg;
diff --git a/net/core/dev.c b/net/core/dev.c
index 09cb3f6dc40c..2a5f55866429 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,8 +176,10 @@
#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
static DEFINE_SPINLOCK(ptype_lock);
+static DEFINE_SPINLOCK(offload_lock);
static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
static struct list_head ptype_all __read_mostly; /* Taps */
+static struct list_head offload_base __read_mostly;
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -201,6 +203,8 @@ static struct list_head ptype_all __read_mostly; /* Taps */
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+DEFINE_SEQLOCK(devnet_rename_seq);
+
static inline void dev_base_seq_inc(struct net *net)
{
while (++net->dev_base_seq == 0);
@@ -470,6 +474,82 @@ void dev_remove_pack(struct packet_type *pt)
}
EXPORT_SYMBOL(dev_remove_pack);
+
+/**
+ * dev_add_offload - register offload handlers
+ * @po: protocol offload declaration
+ *
+ * Add protocol offload handlers to the networking stack. The passed
+ * &proto_offload is linked into kernel lists and may not be freed until
+ * it has been removed from the kernel lists.
+ *
+ * This call does not sleep therefore it can not
+ * guarantee all CPU's that are in middle of receiving packets
+ * will see the new offload handlers (until the next received packet).
+ */
+void dev_add_offload(struct packet_offload *po)
+{
+ struct list_head *head = &offload_base;
+
+ spin_lock(&offload_lock);
+ list_add_rcu(&po->list, head);
+ spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(dev_add_offload);
+
+/**
+ * __dev_remove_offload - remove offload handler
+ * @po: packet offload declaration
+ *
+ * Remove a protocol offload handler that was previously added to the
+ * kernel offload handlers by dev_add_offload(). The passed &offload_type
+ * is removed from the kernel lists and can be freed or reused once this
+ * function returns.
+ *
+ * The packet type might still be in use by receivers
+ * and must not be freed until after all the CPU's have gone
+ * through a quiescent state.
+ */
+void __dev_remove_offload(struct packet_offload *po)
+{
+ struct list_head *head = &offload_base;
+ struct packet_offload *po1;
+
+ spin_lock(&offload_lock);
+
+ list_for_each_entry(po1, head, list) {
+ if (po == po1) {
+ list_del_rcu(&po->list);
+ goto out;
+ }
+ }
+
+ pr_warn("dev_remove_offload: %p not found\n", po);
+out:
+ spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(__dev_remove_offload);
+
+/**
+ * dev_remove_offload - remove packet offload handler
+ * @po: packet offload declaration
+ *
+ * Remove a packet offload handler that was previously added to the kernel
+ * offload handlers by dev_add_offload(). The passed &offload_type is
+ * removed from the kernel lists and can be freed or reused once this
+ * function returns.
+ *
+ * This call sleeps to guarantee that no CPU is looking at the packet
+ * type after return.
+ */
+void dev_remove_offload(struct packet_offload *po)
+{
+ __dev_remove_offload(po);
+
+ synchronize_net();
+}
+EXPORT_SYMBOL(dev_remove_offload);
+
/******************************************************************************
Device Boot-time Settings Routines
@@ -1013,22 +1093,31 @@ int dev_change_name(struct net_device *dev, const char *newname)
if (dev->flags & IFF_UP)
return -EBUSY;
- if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
+ write_seqlock(&devnet_rename_seq);
+
+ if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
+ write_sequnlock(&devnet_rename_seq);
return 0;
+ }
memcpy(oldname, dev->name, IFNAMSIZ);
err = dev_get_valid_name(net, dev, newname);
- if (err < 0)
+ if (err < 0) {
+ write_sequnlock(&devnet_rename_seq);
return err;
+ }
rollback:
ret = device_rename(&dev->dev, dev->name);
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
+ write_sequnlock(&devnet_rename_seq);
return ret;
}
+ write_sequnlock(&devnet_rename_seq);
+
write_lock_bh(&dev_base_lock);
hlist_del_rcu(&dev->name_hlist);
write_unlock_bh(&dev_base_lock);
@@ -1046,6 +1135,7 @@ rollback:
/* err >= 0 after dev_alloc_name() or stores the first errno */
if (err >= 0) {
err = ret;
+ write_seqlock(&devnet_rename_seq);
memcpy(dev->name, oldname, IFNAMSIZ);
goto rollback;
} else {
@@ -1075,10 +1165,8 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
return -EINVAL;
if (!len) {
- if (dev->ifalias) {
- kfree(dev->ifalias);
- dev->ifalias = NULL;
- }
+ kfree(dev->ifalias);
+ dev->ifalias = NULL;
return 0;
}
@@ -1666,7 +1754,7 @@ static inline int deliver_skb(struct sk_buff *skb,
static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
{
- if (ptype->af_packet_priv == NULL)
+ if (!ptype->af_packet_priv || !skb->sk)
return false;
if (ptype->id_match)
@@ -1994,7 +2082,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
int vlan_depth = ETH_HLEN;
int err;
@@ -2023,18 +2111,17 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
}
rcu_read_lock();
- list_for_each_entry_rcu(ptype,
- &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
- if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+ list_for_each_entry_rcu(ptype, &offload_base, list) {
+ if (ptype->type == type && ptype->callbacks.gso_segment) {
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
- err = ptype->gso_send_check(skb);
+ err = ptype->callbacks.gso_send_check(skb);
segs = ERR_PTR(err);
if (err || skb_gso_ok(skb, features))
break;
__skb_push(skb, (skb->data -
skb_network_header(skb)));
}
- segs = ptype->gso_segment(skb, features);
+ segs = ptype->callbacks.gso_segment(skb, features);
break;
}
}
@@ -2818,8 +2905,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
if (unlikely(tcpu != next_cpu) &&
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
- rflow->last_qtail)) >= 0))
+ rflow->last_qtail)) >= 0)) {
+ tcpu = next_cpu;
rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+ }
if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
*rflowp = rflow;
@@ -3444,9 +3533,9 @@ static void flush_backlog(void *arg)
static int napi_gro_complete(struct sk_buff *skb)
{
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ struct list_head *head = &offload_base;
int err = -ENOENT;
if (NAPI_GRO_CB(skb)->count == 1) {
@@ -3456,10 +3545,10 @@ static int napi_gro_complete(struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+ if (ptype->type != type || !ptype->callbacks.gro_complete)
continue;
- err = ptype->gro_complete(skb);
+ err = ptype->callbacks.gro_complete(skb);
break;
}
rcu_read_unlock();
@@ -3506,9 +3595,9 @@ EXPORT_SYMBOL(napi_gro_flush);
enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ struct list_head *head = &offload_base;
int same_flow;
int mac_len;
enum gro_result ret;
@@ -3521,7 +3610,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+ if (ptype->type != type || !ptype->callbacks.gro_receive)
continue;
skb_set_network_header(skb, skb_gro_offset(skb));
@@ -3531,7 +3620,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
- pp = ptype->gro_receive(&napi->gro_list, skb);
+ pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
break;
}
rcu_read_unlock();
@@ -4071,6 +4160,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
{
struct net_device *dev;
struct ifreq ifr;
+ unsigned seq;
/*
* Fetch the caller's info block.
@@ -4079,6 +4169,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
+retry:
+ seq = read_seqbegin(&devnet_rename_seq);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
if (!dev) {
@@ -4088,6 +4180,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
strcpy(ifr.ifr_name, dev->name);
rcu_read_unlock();
+ if (read_seqretry(&devnet_rename_seq, seq))
+ goto retry;
if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
@@ -5200,7 +5294,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSIFNAME:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
dev_load(net, ifr.ifr_name);
rtnl_lock();
@@ -5221,16 +5315,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
* - require strict serialization.
* - do not return a value
*/
+ case SIOCSIFMAP:
+ case SIOCSIFTXQLEN:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ /* fall through */
+ /*
+ * These ioctl calls:
+ * - require local superuser power.
+ * - require strict serialization.
+ * - do not return a value
+ */
case SIOCSIFFLAGS:
case SIOCSIFMETRIC:
case SIOCSIFMTU:
- case SIOCSIFMAP:
case SIOCSIFHWADDR:
case SIOCSIFSLAVE:
case SIOCADDMULTI:
case SIOCDELMULTI:
case SIOCSIFHWBROADCAST:
- case SIOCSIFTXQLEN:
case SIOCSMIIREG:
case SIOCBONDENSLAVE:
case SIOCBONDRELEASE:
@@ -5239,7 +5342,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCBRADDIF:
case SIOCBRDELIF:
case SIOCSHWTSTAMP:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
/* fall through */
case SIOCBONDSLAVEINFOQUERY:
@@ -6264,7 +6367,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
goto out;
/* Ensure the device has been registrered */
- err = -EINVAL;
if (dev->reg_state != NETREG_REGISTERED)
goto out;
@@ -6662,6 +6764,8 @@ static int __init net_dev_init(void)
for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]);
+ INIT_LIST_HEAD(&offload_base);
+
if (register_pernet_subsys(&netdev_net_ops))
goto out;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 87cc17db2d56..b079c7bbc157 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -319,7 +319,8 @@ int dev_addr_del(struct net_device *dev, const unsigned char *addr,
*/
ha = list_first_entry(&dev->dev_addrs.list,
struct netdev_hw_addr, list);
- if (ha->addr == dev->dev_addr && ha->refcount == 1)
+ if (!memcmp(ha->addr, addr, dev->addr_len) &&
+ ha->type == addr_type && ha->refcount == 1)
return -ENOENT;
err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4d64cc2e3fa9..a8705432e4b1 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1460,7 +1460,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GEEE:
break;
default:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 3d92ebb7fbcf..c23543cba132 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,6 +39,7 @@
#include <linux/reciprocal_div.h>
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
+#include <linux/if_vlan.h>
/* No hurry in this branch
*
@@ -341,6 +342,12 @@ load_b:
case BPF_S_ANC_CPU:
A = raw_smp_processor_id();
continue;
+ case BPF_S_ANC_VLAN_TAG:
+ A = vlan_tx_tag_get(skb);
+ continue;
+ case BPF_S_ANC_VLAN_TAG_PRESENT:
+ A = !!vlan_tx_tag_present(skb);
+ continue;
case BPF_S_ANC_NLATTR: {
struct nlattr *nla;
@@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
ANCILLARY(RXHASH);
ANCILLARY(CPU);
ANCILLARY(ALU_XOR_X);
+ ANCILLARY(VLAN_TAG);
+ ANCILLARY(VLAN_TAG_PRESENT);
}
}
ftest->code = code;
@@ -751,3 +760,133 @@ int sk_detach_filter(struct sock *sk)
return ret;
}
EXPORT_SYMBOL_GPL(sk_detach_filter);
+
+static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
+{
+ static const u16 decodes[] = {
+ [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
+ [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
+ [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
+ [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
+ [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
+ [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
+ [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
+ [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
+ [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
+ [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
+ [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
+ [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
+ [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
+ [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
+ [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
+ [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
+ [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
+ [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
+ [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
+ [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
+ [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
+ [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
+ [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
+ [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
+ [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
+ [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
+ [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
+ [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
+ [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
+ [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
+ [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
+ [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
+ [BPF_S_RET_K] = BPF_RET|BPF_K,
+ [BPF_S_RET_A] = BPF_RET|BPF_A,
+ [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
+ [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
+ [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
+ [BPF_S_ST] = BPF_ST,
+ [BPF_S_STX] = BPF_STX,
+ [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
+ [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
+ [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
+ [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
+ [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
+ [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
+ [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
+ [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
+ [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
+ };
+ u16 code;
+
+ code = filt->code;
+
+ to->code = decodes[code];
+ to->jt = filt->jt;
+ to->jf = filt->jf;
+
+ if (code == BPF_S_ALU_DIV_K) {
+ /*
+ * When loaded this rule user gave us X, which was
+ * translated into R = r(X). Now we calculate the
+ * RR = r(R) and report it back. If next time this
+ * value is loaded and RRR = r(RR) is calculated
+ * then the R == RRR will be true.
+ *
+ * One exception. X == 1 translates into R == 0 and
+ * we can't calculate RR out of it with r().
+ */
+
+ if (filt->k == 0)
+ to->k = 1;
+ else
+ to->k = reciprocal_value(filt->k);
+
+ BUG_ON(reciprocal_value(to->k) != filt->k);
+ } else
+ to->k = filt->k;
+}
+
+int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
+{
+ struct sk_filter *filter;
+ int i, ret;
+
+ lock_sock(sk);
+ filter = rcu_dereference_protected(sk->sk_filter,
+ sock_owned_by_user(sk));
+ ret = 0;
+ if (!filter)
+ goto out;
+ ret = filter->len;
+ if (!len)
+ goto out;
+ ret = -EINVAL;
+ if (len < filter->len)
+ goto out;
+
+ ret = -EFAULT;
+ for (i = 0; i < filter->len; i++) {
+ struct sock_filter fb;
+
+ sk_decode_filter(&filter->insns[i], &fb);
+ if (copy_to_user(&ubuf[i], &fb, sizeof(fb)))
+ goto out;
+ }
+
+ ret = filter->len;
+out:
+ release_sock(sk);
+ return ret;
+}
diff --git a/net/core/flow.c b/net/core/flow.c
index e318c7e98042..b0901ee5a002 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -327,11 +327,9 @@ static void flow_cache_flush_tasklet(unsigned long data)
static void flow_cache_flush_per_cpu(void *data)
{
struct flow_flush_info *info = data;
- int cpu;
struct tasklet_struct *tasklet;
- cpu = smp_processor_id();
- tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
+ tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet);
tasklet->data = (unsigned long)info;
tasklet_schedule(tasklet);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 22571488730a..f1c0c2e9cad5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2987,6 +2987,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
}
+ /* Don't export sysctls to unprivileged users */
+ if (neigh_parms_net(p)->user_ns != &init_user_ns)
+ t->neigh_vars[0].procname = NULL;
+
snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
p_name, dev_name_source);
t->sysctl_header =
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 017a8bacfb27..334efd5d67a9 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -18,11 +18,9 @@
#include <net/sock.h>
#include <net/net_namespace.h>
#include <linux/rtnetlink.h>
-#include <linux/wireless.h>
#include <linux/vmalloc.h>
#include <linux/export.h>
#include <linux/jiffies.h>
-#include <net/wext.h>
#include "net-sysfs.h"
@@ -73,11 +71,12 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len,
int (*set)(struct net_device *, unsigned long))
{
- struct net_device *net = to_net_dev(dev);
+ struct net_device *netdev = to_net_dev(dev);
+ struct net *net = dev_net(netdev);
unsigned long new;
int ret = -EINVAL;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
ret = kstrtoul(buf, 0, &new);
@@ -87,8 +86,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
if (!rtnl_trylock())
return restart_syscall();
- if (dev_isalive(net)) {
- if ((ret = (*set)(net, new)) == 0)
+ if (dev_isalive(netdev)) {
+ if ((ret = (*set)(netdev, new)) == 0)
ret = len;
}
rtnl_unlock();
@@ -264,6 +263,9 @@ static ssize_t store_tx_queue_len(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
return netdev_store(dev, attr, buf, len, change_tx_queue_len);
}
@@ -271,10 +273,11 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
struct net_device *netdev = to_net_dev(dev);
+ struct net *net = dev_net(netdev);
size_t count = len;
ssize_t ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
/* ignore trailing newline */
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 42f1e1c7514f..6456439cbbd9 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -13,6 +13,7 @@
#include <linux/proc_fs.h>
#include <linux/file.h>
#include <linux/export.h>
+#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -145,7 +146,7 @@ static void ops_free_list(const struct pernet_operations *ops,
/*
* setup_net runs the initializers for the network namespace object.
*/
-static __net_init int setup_net(struct net *net)
+static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
/* Must be called with net_mutex held */
const struct pernet_operations *ops, *saved_ops;
@@ -155,6 +156,7 @@ static __net_init int setup_net(struct net *net)
atomic_set(&net->count, 1);
atomic_set(&net->passive, 1);
net->dev_base_seq = 1;
+ net->user_ns = user_ns;
#ifdef NETNS_REFCNT_DEBUG
atomic_set(&net->use_count, 0);
@@ -232,7 +234,8 @@ void net_drop_ns(void *p)
net_free(ns);
}
-struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+struct net *copy_net_ns(unsigned long flags,
+ struct user_namespace *user_ns, struct net *old_net)
{
struct net *net;
int rv;
@@ -243,8 +246,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
net = net_alloc();
if (!net)
return ERR_PTR(-ENOMEM);
+
+ get_user_ns(user_ns);
+
mutex_lock(&net_mutex);
- rv = setup_net(net);
+ rv = setup_net(net, user_ns);
if (rv == 0) {
rtnl_lock();
list_add_tail_rcu(&net->list, &net_namespace_list);
@@ -252,6 +258,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
}
mutex_unlock(&net_mutex);
if (rv < 0) {
+ put_user_ns(user_ns);
net_drop_ns(net);
return ERR_PTR(rv);
}
@@ -308,6 +315,7 @@ static void cleanup_net(struct work_struct *work)
/* Finally it is safe to free my network namespace structure */
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
+ put_user_ns(net->user_ns);
net_drop_ns(net);
}
}
@@ -347,13 +355,6 @@ struct net *get_net_ns_by_fd(int fd)
}
#else
-struct net *copy_net_ns(unsigned long flags, struct net *old_net)
-{
- if (flags & CLONE_NEWNET)
- return ERR_PTR(-EINVAL);
- return old_net;
-}
-
struct net *get_net_ns_by_fd(int fd)
{
return ERR_PTR(-EINVAL);
@@ -402,7 +403,7 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
mutex_lock(&net_mutex);
- if (setup_net(&init_net))
+ if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
rtnl_lock();
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 79285a36035f..847c02b197b0 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -248,7 +248,7 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
return 0;
}
-void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct task_struct *p;
void *v;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d1dc14c2aac4..b29dacf900f9 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -419,20 +419,6 @@ struct pktgen_thread {
#define REMOVE 1
#define FIND 0
-static inline ktime_t ktime_now(void)
-{
- struct timespec ts;
- ktime_get_ts(&ts);
-
- return timespec_to_ktime(ts);
-}
-
-/* This works even if 32 bit because of careful byte order choice */
-static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
-{
- return cmp1.tv64 < cmp2.tv64;
-}
-
static const char version[] =
"Packet Generator for packet performance testing. "
"Version: " VERSION "\n";
@@ -675,7 +661,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_puts(seq, "\n");
/* not really stopped, more like last-running-at */
- stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at;
+ stopped = pkt_dev->running ? ktime_get() : pkt_dev->stopped_at;
idle = pkt_dev->idle_acc;
do_div(idle, NSEC_PER_USEC);
@@ -2141,12 +2127,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
return;
}
- start_time = ktime_now();
+ start_time = ktime_get();
if (remaining < 100000) {
/* for small delays (<100us), just loop until limit is reached */
do {
- end_time = ktime_now();
- } while (ktime_lt(end_time, spin_until));
+ end_time = ktime_get();
+ } while (ktime_compare(end_time, spin_until) < 0);
} else {
/* see do_nanosleep */
hrtimer_init_sleeper(&t, current);
@@ -2162,7 +2148,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
hrtimer_cancel(&t.timer);
} while (t.task && pkt_dev->running && !signal_pending(current));
__set_current_state(TASK_RUNNING);
- end_time = ktime_now();
+ end_time = ktime_get();
}
pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
@@ -2427,11 +2413,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
}
} else { /* IPV6 * */
- if (pkt_dev->min_in6_daddr.s6_addr32[0] == 0 &&
- pkt_dev->min_in6_daddr.s6_addr32[1] == 0 &&
- pkt_dev->min_in6_daddr.s6_addr32[2] == 0 &&
- pkt_dev->min_in6_daddr.s6_addr32[3] == 0) ;
- else {
+ if (!ipv6_addr_any(&pkt_dev->min_in6_daddr)) {
int i;
/* Only random destinations yet */
@@ -2916,8 +2898,7 @@ static void pktgen_run(struct pktgen_thread *t)
pktgen_clear_counters(pkt_dev);
pkt_dev->running = 1; /* Cranke yeself! */
pkt_dev->skb = NULL;
- pkt_dev->started_at =
- pkt_dev->next_tx = ktime_now();
+ pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
set_pkt_overhead(pkt_dev);
@@ -3076,7 +3057,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
kfree_skb(pkt_dev->skb);
pkt_dev->skb = NULL;
- pkt_dev->stopped_at = ktime_now();
+ pkt_dev->stopped_at = ktime_get();
pkt_dev->running = 0;
show_results(pkt_dev, nr_frags);
@@ -3095,7 +3076,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
continue;
if (best == NULL)
best = pkt_dev;
- else if (ktime_lt(pkt_dev->next_tx, best->next_tx))
+ else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
best = pkt_dev;
}
if_unlock(t);
@@ -3180,14 +3161,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
static void pktgen_resched(struct pktgen_dev *pkt_dev)
{
- ktime_t idle_start = ktime_now();
+ ktime_t idle_start = ktime_get();
schedule();
- pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+ pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
}
static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
{
- ktime_t idle_start = ktime_now();
+ ktime_t idle_start = ktime_get();
while (atomic_read(&(pkt_dev->skb->users)) != 1) {
if (signal_pending(current))
@@ -3198,7 +3179,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
else
cpu_relax();
}
- pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+ pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
}
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
@@ -3220,7 +3201,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
* "never transmit"
*/
if (unlikely(pkt_dev->delay == ULLONG_MAX)) {
- pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX);
+ pkt_dev->next_tx = ktime_add_ns(ktime_get(), ULONG_MAX);
return;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 76d4c2c3c89b..575a6ee89944 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -128,7 +128,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
if (tab == NULL || tab[msgindex].doit == NULL)
tab = rtnl_msg_handlers[PF_UNSPEC];
- return tab ? tab[msgindex].doit : NULL;
+ return tab[msgindex].doit;
}
static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
@@ -143,7 +143,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
if (tab == NULL || tab[msgindex].dumpit == NULL)
tab = rtnl_msg_handlers[PF_UNSPEC];
- return tab ? tab[msgindex].dumpit : NULL;
+ return tab[msgindex].dumpit;
}
static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
@@ -158,7 +158,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
if (tab == NULL || tab[msgindex].calcit == NULL)
tab = rtnl_msg_handlers[PF_UNSPEC];
- return tab ? tab[msgindex].calcit : NULL;
+ return tab[msgindex].calcit;
}
/**
@@ -1316,6 +1316,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
err = PTR_ERR(net);
goto errout;
}
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ err = -EPERM;
+ goto errout;
+ }
err = dev_change_net_namespace(dev, net, ifname);
put_net(net);
if (err)
@@ -2057,6 +2061,9 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
u8 *addr;
int err;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
if (err < 0)
return err;
@@ -2123,6 +2130,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
int err = -EINVAL;
__u8 *addr;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (nlmsg_len(nlh) < sizeof(*ndm))
return -EINVAL;
@@ -2192,7 +2202,8 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
goto skip;
err = nlmsg_populate_fdb_fill(skb, dev, ha->addr,
- portid, seq, 0, NTF_SELF);
+ portid, seq,
+ RTM_NEWNEIGH, NTF_SELF);
if (err < 0)
return err;
skip:
@@ -2252,6 +2263,211 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev, u16 mode)
+{
+ struct nlmsghdr *nlh;
+ struct ifinfomsg *ifm;
+ struct nlattr *br_afspec;
+ u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
+
+ nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ ifm = nlmsg_data(nlh);
+ ifm->ifi_family = AF_BRIDGE;
+ ifm->__ifi_pad = 0;
+ ifm->ifi_type = dev->type;
+ ifm->ifi_index = dev->ifindex;
+ ifm->ifi_flags = dev_get_flags(dev);
+ ifm->ifi_change = 0;
+
+
+ if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
+ nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
+ nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
+ (dev->master &&
+ nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) ||
+ (dev->addr_len &&
+ nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
+ (dev->ifindex != dev->iflink &&
+ nla_put_u32(skb, IFLA_LINK, dev->iflink)))
+ goto nla_put_failure;
+
+ br_afspec = nla_nest_start(skb, IFLA_AF_SPEC);
+ if (!br_afspec)
+ goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF) ||
+ nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) {
+ nla_nest_cancel(skb, br_afspec);
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, br_afspec);
+
+ return nlmsg_end(skb, nlh);
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL(ndo_dflt_bridge_getlink);
+
+static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int idx = 0;
+ u32 portid = NETLINK_CB(cb->skb).portid;
+ u32 seq = cb->nlh->nlmsg_seq;
+
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct net_device *master = dev->master;
+
+ if (master && master->netdev_ops->ndo_bridge_getlink) {
+ if (idx >= cb->args[0] &&
+ master->netdev_ops->ndo_bridge_getlink(
+ skb, portid, seq, dev) < 0)
+ break;
+ idx++;
+ }
+
+ if (ops->ndo_bridge_getlink) {
+ if (idx >= cb->args[0] &&
+ ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0)
+ break;
+ idx++;
+ }
+ }
+ rcu_read_unlock();
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+static inline size_t bridge_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ + nla_total_size(sizeof(u32)) /* IFLA_MASTER */
+ + nla_total_size(sizeof(u32)) /* IFLA_MTU */
+ + nla_total_size(sizeof(u32)) /* IFLA_LINK */
+ + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */
+ + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */
+ + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */
+ + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */
+ + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */
+}
+
+static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
+{
+ struct net *net = dev_net(dev);
+ struct net_device *master = dev->master;
+ struct sk_buff *skb;
+ int err = -EOPNOTSUPP;
+
+ skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC);
+ if (!skb) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
+ master && master->netdev_ops->ndo_bridge_getlink) {
+ err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+ if (err < 0)
+ goto errout;
+ }
+
+ if ((flags & BRIDGE_FLAGS_SELF) &&
+ dev->netdev_ops->ndo_bridge_getlink) {
+ err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+ if (err < 0)
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+ return 0;
+errout:
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+ return err;
+}
+
+static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ void *arg)
+{
+ struct net *net = sock_net(skb->sk);
+ struct ifinfomsg *ifm;
+ struct net_device *dev;
+ struct nlattr *br_spec, *attr = NULL;
+ int rem, err = -EOPNOTSUPP;
+ u16 oflags, flags = 0;
+ bool have_flags = false;
+
+ if (nlmsg_len(nlh) < sizeof(*ifm))
+ return -EINVAL;
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifi_family != AF_BRIDGE)
+ return -EPFNOSUPPORT;
+
+ dev = __dev_get_by_index(net, ifm->ifi_index);
+ if (!dev) {
+ pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ return -ENODEV;
+ }
+
+ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+ if (br_spec) {
+ nla_for_each_nested(attr, br_spec, rem) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
+ have_flags = true;
+ flags = nla_get_u16(attr);
+ break;
+ }
+ }
+ }
+
+ oflags = flags;
+
+ if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
+ if (!dev->master ||
+ !dev->master->netdev_ops->ndo_bridge_setlink) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh);
+ if (err)
+ goto out;
+
+ flags &= ~BRIDGE_FLAGS_MASTER;
+ }
+
+ if ((flags & BRIDGE_FLAGS_SELF)) {
+ if (!dev->netdev_ops->ndo_bridge_setlink)
+ err = -EOPNOTSUPP;
+ else
+ err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
+
+ if (!err)
+ flags &= ~BRIDGE_FLAGS_SELF;
+ }
+
+ if (have_flags)
+ memcpy(nla_data(attr), &flags, sizeof(flags));
+ /* Generate event to notify upper layer of bridge change */
+ if (!err)
+ err = rtnl_bridge_notify(dev, oflags);
+out:
+ return err;
+}
+
/* Protected by RTNL sempahore. */
static struct rtattr **rta_buf;
static int rtattr_max;
@@ -2282,7 +2498,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
sz_idx = type>>2;
kind = type&3;
- if (kind != 2 && !capable(CAP_NET_ADMIN))
+ if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
@@ -2433,5 +2649,8 @@ void __init rtnetlink_init(void)
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
+
+ rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
+ rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
}
diff --git a/net/core/scm.c b/net/core/scm.c
index ab570841a532..57fb1ee6649f 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -51,11 +51,11 @@ static __inline__ int scm_check_creds(struct ucred *creds)
if (!uid_valid(uid) || !gid_valid(gid))
return -EINVAL;
- if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
+ if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) &&
((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
- uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) &&
+ uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
- gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) {
+ gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) {
return 0;
}
return -EPERM;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6e04b1fa11f2..880722e22cc5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -519,7 +519,7 @@ static void skb_release_data(struct sk_buff *skb)
uarg = skb_shinfo(skb)->destructor_arg;
if (uarg->callback)
- uarg->callback(uarg);
+ uarg->callback(uarg, true);
}
if (skb_has_frag_list(skb))
@@ -635,6 +635,26 @@ void kfree_skb(struct sk_buff *skb)
EXPORT_SYMBOL(kfree_skb);
/**
+ * skb_tx_error - report an sk_buff xmit error
+ * @skb: buffer that triggered an error
+ *
+ * Report xmit error if a device callback is tracking this skb.
+ * skb must be freed afterwards.
+ */
+void skb_tx_error(struct sk_buff *skb)
+{
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ struct ubuf_info *uarg;
+
+ uarg = skb_shinfo(skb)->destructor_arg;
+ if (uarg->callback)
+ uarg->callback(uarg, false);
+ skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+ }
+}
+EXPORT_SYMBOL(skb_tx_error);
+
+/**
* consume_skb - free an skbuff
* @skb: buffer to free
*
@@ -797,7 +817,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
for (i = 0; i < num_frags; i++)
skb_frag_unref(skb, i);
- uarg->callback(uarg);
+ uarg->callback(uarg, false);
/* skb frags point to kernel buffers */
for (i = num_frags - 1; i >= 0; i--) {
@@ -3379,10 +3399,12 @@ EXPORT_SYMBOL(__skb_warn_lro_forwarding);
void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
{
- if (head_stolen)
+ if (head_stolen) {
+ skb_release_head_state(skb);
kmem_cache_free(skbuff_head_cache, skb);
- else
+ } else {
__kfree_skb(skb);
+ }
}
EXPORT_SYMBOL(kfree_skb_partial);
diff --git a/net/core/sock.c b/net/core/sock.c
index 8a146cfcc366..a692ef49c9bb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -505,7 +505,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
}
EXPORT_SYMBOL(sk_dst_check);
-static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
+static int sock_setbindtodevice(struct sock *sk, char __user *optval,
+ int optlen)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
@@ -515,7 +516,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
/* Sorry... */
ret = -EPERM;
- if (!capable(CAP_NET_RAW))
+ if (!ns_capable(net->user_ns, CAP_NET_RAW))
goto out;
ret = -EINVAL;
@@ -562,6 +563,59 @@ out:
return ret;
}
+static int sock_getbindtodevice(struct sock *sk, char __user *optval,
+ int __user *optlen, int len)
+{
+ int ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+ struct net *net = sock_net(sk);
+ struct net_device *dev;
+ char devname[IFNAMSIZ];
+ unsigned seq;
+
+ if (sk->sk_bound_dev_if == 0) {
+ len = 0;
+ goto zero;
+ }
+
+ ret = -EINVAL;
+ if (len < IFNAMSIZ)
+ goto out;
+
+retry:
+ seq = read_seqbegin(&devnet_rename_seq);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ ret = -ENODEV;
+ if (!dev) {
+ rcu_read_unlock();
+ goto out;
+ }
+
+ strcpy(devname, dev->name);
+ rcu_read_unlock();
+ if (read_seqretry(&devnet_rename_seq, seq))
+ goto retry;
+
+ len = strlen(devname) + 1;
+
+ ret = -EFAULT;
+ if (copy_to_user(optval, devname, len))
+ goto out;
+
+zero:
+ ret = -EFAULT;
+ if (put_user(len, optlen))
+ goto out;
+
+ ret = 0;
+
+out:
+#endif
+
+ return ret;
+}
+
static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
{
if (valbool)
@@ -589,7 +643,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
*/
if (optname == SO_BINDTODEVICE)
- return sock_bindtodevice(sk, optval, optlen);
+ return sock_setbindtodevice(sk, optval, optlen);
if (optlen < sizeof(int))
return -EINVAL;
@@ -696,7 +750,8 @@ set_rcvbuf:
break;
case SO_PRIORITY:
- if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
+ if ((val >= 0 && val <= 6) ||
+ ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
sk->sk_priority = val;
else
ret = -EPERM;
@@ -813,7 +868,7 @@ set_rcvbuf:
clear_bit(SOCK_PASSSEC, &sock->flags);
break;
case SO_MARK:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
ret = -EPERM;
else
sk->sk_mark = val;
@@ -1074,6 +1129,17 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_NOFCS:
v.val = sock_flag(sk, SOCK_NOFCS);
break;
+
+ case SO_BINDTODEVICE:
+ return sock_getbindtodevice(sk, optval, optlen, len);
+
+ case SO_GET_FILTER:
+ len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
+ if (len < 0)
+ return len;
+
+ goto lenout;
+
default:
return -ENOPROTOOPT;
}
@@ -1214,13 +1280,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
#ifdef CONFIG_CGROUPS
#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
-void sock_update_classid(struct sock *sk)
+void sock_update_classid(struct sock *sk, struct task_struct *task)
{
u32 classid;
- rcu_read_lock(); /* doing current task, which cannot vanish. */
- classid = task_cls_classid(current);
- rcu_read_unlock();
+ classid = task_cls_classid(task);
if (classid != sk->sk_classid)
sk->sk_classid = classid;
}
@@ -1263,7 +1327,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_net_set(sk, get_net(net));
atomic_set(&sk->sk_wmem_alloc, 1);
- sock_update_classid(sk);
+ sock_update_classid(sk, current);
sock_update_netprioidx(sk, current);
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a7c36845b123..d1b08045a9df 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -216,6 +216,11 @@ static __net_init int sysctl_core_net_init(struct net *net)
goto err_dup;
tbl[0].data = &net->core.sysctl_somaxconn;
+
+ /* Don't export any sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns) {
+ tbl[0].procname = NULL;
+ }
}
net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 70989e672304..b07c75d37e91 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1662,6 +1662,9 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct nlmsghdr *reply_nlh = NULL;
const struct reply_func *fn;
+ if ((nlh->nlmsg_type == RTM_SETDCB) && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!net_eq(net, &init_net))
return -EINVAL;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index ea850ce35d4a..662071b249cc 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -174,8 +174,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
* To protect against Request floods, increment retrans
* counter (backoff, monitored by dccp_response_timer).
*/
- req->retrans++;
- req->rsk_ops->rtx_syn_ack(sk, req, NULL);
+ inet_rtx_syn_ack(sk, req);
}
/* Network Duplicate, discard packet */
return NULL;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 7b7e561412d3..e47ba9fc4a0e 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -573,6 +573,9 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct dn_ifaddr __rcu **ifap;
int err = -EINVAL;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!net_eq(net, &init_net))
goto errout;
@@ -614,6 +617,9 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct dn_ifaddr *ifa;
int err;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!net_eq(net, &init_net))
return -EINVAL;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 102d6106a942..e36614eccc04 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -520,6 +520,9 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
struct rtattr **rta = arg;
struct rtmsg *r = NLMSG_DATA(nlh);
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!net_eq(net, &init_net))
return -EINVAL;
@@ -540,6 +543,9 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
struct rtattr **rta = arg;
struct rtmsg *r = NLMSG_DATA(nlh);
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!net_eq(net, &init_net))
return -EINVAL;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 274791cd7a35..f5eede1d6cb8 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,26 +1,24 @@
+config HAVE_NET_DSA
+ def_bool y
+ depends on NETDEVICES && !S390
+
+# Drivers must select NET_DSA and the appropriate tagging format
+
config NET_DSA
- tristate "Distributed Switch Architecture support"
- default n
- depends on EXPERIMENTAL && NETDEVICES && !S390
+ tristate
+ depends on HAVE_NET_DSA
select PHYLIB
- ---help---
- This allows you to use hardware switch chips that use
- the Distributed Switch Architecture.
-
if NET_DSA
# tagging formats
config NET_DSA_TAG_DSA
bool
- default n
config NET_DSA_TAG_EDSA
bool
- default n
config NET_DSA_TAG_TRAILER
bool
- default n
endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 766c59658563..24b384b7903e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -346,7 +346,8 @@ lookup_protocol:
}
err = -EPERM;
- if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+ if (sock->type == SOCK_RAW && !kern &&
+ !ns_capable(net->user_ns, CAP_NET_RAW))
goto out_rcu_unlock;
err = -EAFNOSUPPORT;
@@ -473,6 +474,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
unsigned short snum;
int chk_addr_ret;
int err;
@@ -496,7 +498,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
}
- chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
+ chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
/* Not specified by any standard per-se, however it breaks too
* many applications when removed. It is unfortunate since
@@ -516,7 +518,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
snum = ntohs(addr->sin_port);
err = -EACCES;
- if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ if (snum && snum < PROT_SOCK &&
+ !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
goto out;
/* We keep a pair of addresses. rcv_saddr is the one
@@ -1251,7 +1254,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
static int inet_gso_send_check(struct sk_buff *skb)
{
- const struct net_protocol *ops;
+ const struct net_offload *ops;
const struct iphdr *iph;
int proto;
int ihl;
@@ -1275,9 +1278,9 @@ static int inet_gso_send_check(struct sk_buff *skb)
err = -EPROTONOSUPPORT;
rcu_read_lock();
- ops = rcu_dereference(inet_protos[proto]);
- if (likely(ops && ops->gso_send_check))
- err = ops->gso_send_check(skb);
+ ops = rcu_dereference(inet_offloads[proto]);
+ if (likely(ops && ops->callbacks.gso_send_check))
+ err = ops->callbacks.gso_send_check(skb);
rcu_read_unlock();
out:
@@ -1288,7 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
- const struct net_protocol *ops;
+ const struct net_offload *ops;
struct iphdr *iph;
int proto;
int ihl;
@@ -1325,9 +1328,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
segs = ERR_PTR(-EPROTONOSUPPORT);
rcu_read_lock();
- ops = rcu_dereference(inet_protos[proto]);
- if (likely(ops && ops->gso_segment))
- segs = ops->gso_segment(skb, features);
+ ops = rcu_dereference(inet_offloads[proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+ segs = ops->callbacks.gso_segment(skb, features);
rcu_read_unlock();
if (!segs || IS_ERR(segs))
@@ -1356,7 +1359,7 @@ out:
static struct sk_buff **inet_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
- const struct net_protocol *ops;
+ const struct net_offload *ops;
struct sk_buff **pp = NULL;
struct sk_buff *p;
const struct iphdr *iph;
@@ -1378,8 +1381,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
proto = iph->protocol;
rcu_read_lock();
- ops = rcu_dereference(inet_protos[proto]);
- if (!ops || !ops->gro_receive)
+ ops = rcu_dereference(inet_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive)
goto out_unlock;
if (*(u8 *)iph != 0x45)
@@ -1420,7 +1423,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
skb_gro_pull(skb, sizeof(*iph));
skb_set_transport_header(skb, skb_gro_offset(skb));
- pp = ops->gro_receive(head, skb);
+ pp = ops->callbacks.gro_receive(head, skb);
out_unlock:
rcu_read_unlock();
@@ -1435,7 +1438,7 @@ static int inet_gro_complete(struct sk_buff *skb)
{
__be16 newlen = htons(skb->len - skb_network_offset(skb));
struct iphdr *iph = ip_hdr(skb);
- const struct net_protocol *ops;
+ const struct net_offload *ops;
int proto = iph->protocol;
int err = -ENOSYS;
@@ -1443,11 +1446,11 @@ static int inet_gro_complete(struct sk_buff *skb)
iph->tot_len = newlen;
rcu_read_lock();
- ops = rcu_dereference(inet_protos[proto]);
- if (WARN_ON(!ops || !ops->gro_complete))
+ ops = rcu_dereference(inet_offloads[proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
goto out_unlock;
- err = ops->gro_complete(skb);
+ err = ops->callbacks.gro_complete(skb);
out_unlock:
rcu_read_unlock();
@@ -1558,23 +1561,33 @@ static const struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
- .gso_send_check = tcp_v4_gso_send_check,
- .gso_segment = tcp_tso_segment,
- .gro_receive = tcp4_gro_receive,
- .gro_complete = tcp4_gro_complete,
.no_policy = 1,
.netns_ok = 1,
};
+static const struct net_offload tcp_offload = {
+ .callbacks = {
+ .gso_send_check = tcp_v4_gso_send_check,
+ .gso_segment = tcp_tso_segment,
+ .gro_receive = tcp4_gro_receive,
+ .gro_complete = tcp4_gro_complete,
+ },
+};
+
static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
- .gso_send_check = udp4_ufo_send_check,
- .gso_segment = udp4_ufo_fragment,
.no_policy = 1,
.netns_ok = 1,
};
+static const struct net_offload udp_offload = {
+ .callbacks = {
+ .gso_send_check = udp4_ufo_send_check,
+ .gso_segment = udp4_ufo_fragment,
+ },
+};
+
static const struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
.err_handler = ping_err,
@@ -1659,13 +1672,35 @@ static int ipv4_proc_init(void);
* IP protocol layer initialiser
*/
+static struct packet_offload ip_packet_offload __read_mostly = {
+ .type = cpu_to_be16(ETH_P_IP),
+ .callbacks = {
+ .gso_send_check = inet_gso_send_check,
+ .gso_segment = inet_gso_segment,
+ .gro_receive = inet_gro_receive,
+ .gro_complete = inet_gro_complete,
+ },
+};
+
+static int __init ipv4_offload_init(void)
+{
+ /*
+ * Add offloads
+ */
+ if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0)
+ pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
+ if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0)
+ pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__);
+
+ dev_add_offload(&ip_packet_offload);
+ return 0;
+}
+
+fs_initcall(ipv4_offload_init);
+
static struct packet_type ip_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IP),
.func = ip_rcv,
- .gso_send_check = inet_gso_send_check,
- .gso_segment = inet_gso_segment,
- .gro_receive = inet_gro_receive,
- .gro_complete = inet_gro_complete,
};
static int __init inet_init(void)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 47800459e4cb..ce6fbdfd40b8 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1161,7 +1161,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
switch (cmd) {
case SIOCDARP:
case SIOCSARP:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
case SIOCGARP:
err = copy_from_user(&r, arg, sizeof(struct arpreq));
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2a6abc163ed2..e13183abd7f6 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -55,6 +55,7 @@
#include <linux/sysctl.h>
#endif
#include <linux/kmod.h>
+#include <linux/netconf.h>
#include <net/arp.h>
#include <net/ip.h>
@@ -723,7 +724,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSIFFLAGS:
ret = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto out;
break;
case SIOCSIFADDR: /* Set interface address (and family) */
@@ -731,7 +732,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSIFDSTADDR: /* Set the destination address */
case SIOCSIFNETMASK: /* Set the netmask for the interface */
ret = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto out;
ret = -EINVAL;
if (sin->sin_family != AF_INET)
@@ -1442,6 +1443,149 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
return 0;
}
+static int inet_netconf_msgsize_devconf(int type)
+{
+ int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
+ + nla_total_size(4); /* NETCONFA_IFINDEX */
+
+ /* type -1 is used for ALL */
+ if (type == -1 || type == NETCONFA_FORWARDING)
+ size += nla_total_size(4);
+ if (type == -1 || type == NETCONFA_RP_FILTER)
+ size += nla_total_size(4);
+
+ return size;
+}
+
+static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
+ struct ipv4_devconf *devconf, u32 portid,
+ u32 seq, int event, unsigned int flags,
+ int type)
+{
+ struct nlmsghdr *nlh;
+ struct netconfmsg *ncm;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
+ flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ ncm = nlmsg_data(nlh);
+ ncm->ncm_family = AF_INET;
+
+ if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
+ goto nla_put_failure;
+
+ /* type -1 is used for ALL */
+ if ((type == -1 || type == NETCONFA_FORWARDING) &&
+ nla_put_s32(skb, NETCONFA_FORWARDING,
+ IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
+ goto nla_put_failure;
+ if ((type == -1 || type == NETCONFA_RP_FILTER) &&
+ nla_put_s32(skb, NETCONFA_RP_FILTER,
+ IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
+ struct ipv4_devconf *devconf)
+{
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
+ RTM_NEWNETCONF, 0, type);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
+}
+
+static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
+ [NETCONFA_IFINDEX] = { .len = sizeof(int) },
+ [NETCONFA_FORWARDING] = { .len = sizeof(int) },
+ [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
+};
+
+static int inet_netconf_get_devconf(struct sk_buff *in_skb,
+ struct nlmsghdr *nlh,
+ void *arg)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct nlattr *tb[NETCONFA_MAX+1];
+ struct netconfmsg *ncm;
+ struct sk_buff *skb;
+ struct ipv4_devconf *devconf;
+ struct in_device *in_dev;
+ struct net_device *dev;
+ int ifindex;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
+ devconf_ipv4_policy);
+ if (err < 0)
+ goto errout;
+
+ err = EINVAL;
+ if (!tb[NETCONFA_IFINDEX])
+ goto errout;
+
+ ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
+ switch (ifindex) {
+ case NETCONFA_IFINDEX_ALL:
+ devconf = net->ipv4.devconf_all;
+ break;
+ case NETCONFA_IFINDEX_DEFAULT:
+ devconf = net->ipv4.devconf_dflt;
+ break;
+ default:
+ dev = __dev_get_by_index(net, ifindex);
+ if (dev == NULL)
+ goto errout;
+ in_dev = __in_dev_get_rtnl(dev);
+ if (in_dev == NULL)
+ goto errout;
+ devconf = &in_dev->cnf;
+ break;
+ }
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet_netconf_fill_devconf(skb, ifindex, devconf,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
+ -1);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+errout:
+ return err;
+}
+
#ifdef CONFIG_SYSCTL
static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -1467,6 +1611,12 @@ static void inet_forward_change(struct net *net)
IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
+ inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv4.devconf_all);
+ inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv4.devconf_dflt);
for_each_netdev(net, dev) {
struct in_device *in_dev;
@@ -1474,8 +1624,11 @@ static void inet_forward_change(struct net *net)
dev_disable_lro(dev);
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
- if (in_dev)
+ if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
+ inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ dev->ifindex, &in_dev->cnf);
+ }
rcu_read_unlock();
}
}
@@ -1501,6 +1654,23 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
if ((new_value == 0) && (old_value != 0))
rt_cache_flush(net);
+ if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
+ new_value != old_value) {
+ int ifindex;
+
+ if (cnf == net->ipv4.devconf_dflt)
+ ifindex = NETCONFA_IFINDEX_DEFAULT;
+ else if (cnf == net->ipv4.devconf_all)
+ ifindex = NETCONFA_IFINDEX_ALL;
+ else {
+ struct in_device *idev =
+ container_of(cnf, struct in_device,
+ cnf);
+ ifindex = idev->dev->ifindex;
+ }
+ inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
+ ifindex, cnf);
+ }
}
return ret;
@@ -1527,15 +1697,23 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
}
if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
inet_forward_change(net);
- } else if (*valp) {
+ } else {
struct ipv4_devconf *cnf = ctl->extra1;
struct in_device *idev =
container_of(cnf, struct in_device, cnf);
- dev_disable_lro(idev->dev);
+ if (*valp)
+ dev_disable_lro(idev->dev);
+ inet_netconf_notify_devconf(net,
+ NETCONFA_FORWARDING,
+ idev->dev->ifindex,
+ cnf);
}
rtnl_unlock();
rt_cache_flush(net);
- }
+ } else
+ inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv4.devconf_dflt);
}
return ret;
@@ -1809,5 +1987,7 @@ void __init devinet_init(void)
rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
+ rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
+ NULL, NULL);
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 825c608826de..5cd75e2dab2c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -488,7 +488,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
switch (cmd) {
case SIOCADDRT: /* Add a route */
case SIOCDELRT: /* Delete a route */
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (copy_from_user(&rt, arg, sizeof(rt)))
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 71b125cd5db1..4797a800faf8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -803,7 +803,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
unsigned int bytes;
if (!new_size)
- new_size = 1;
+ new_size = 16;
bytes = new_size * sizeof(struct hlist_head *);
new_info_hash = fib_info_hash_alloc(bytes);
new_laddrhash = fib_info_hash_alloc(bytes);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d34ce2972c8f..2026542d6836 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -521,21 +521,31 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
int *expire, int *resend)
{
if (!rskq_defer_accept) {
- *expire = req->retrans >= thresh;
+ *expire = req->num_timeout >= thresh;
*resend = 1;
return;
}
- *expire = req->retrans >= thresh &&
- (!inet_rsk(req)->acked || req->retrans >= max_retries);
+ *expire = req->num_timeout >= thresh &&
+ (!inet_rsk(req)->acked || req->num_timeout >= max_retries);
/*
* Do not resend while waiting for data after ACK,
* start to resend on end of deferring period to give
* last chance for data or ACK to create established socket.
*/
*resend = !inet_rsk(req)->acked ||
- req->retrans >= rskq_defer_accept - 1;
+ req->num_timeout >= rskq_defer_accept - 1;
}
+int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
+{
+ int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL);
+
+ if (!err)
+ req->num_retrans++;
+ return err;
+}
+EXPORT_SYMBOL(inet_rtx_syn_ack);
+
void inet_csk_reqsk_queue_prune(struct sock *parent,
const unsigned long interval,
const unsigned long timeout,
@@ -599,13 +609,14 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
req->rsk_ops->syn_ack_timeout(parent, req);
if (!expire &&
(!resend ||
- !req->rsk_ops->rtx_syn_ack(parent, req, NULL) ||
+ !inet_rtx_syn_ack(parent, req) ||
inet_rsk(req)->acked)) {
unsigned long timeo;
- if (req->retrans++ == 0)
+ if (req->num_timeout++ == 0)
lopt->qlen_young--;
- timeo = min((timeout << req->retrans), max_rto);
+ timeo = min(timeout << req->num_timeout,
+ max_rto);
req->expires = now + timeo;
reqp = &req->dl_next;
continue;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 535584c00f91..cb98cbed1973 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -105,6 +105,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
r->id.idiag_src[0] = inet->inet_rcv_saddr;
r->id.idiag_dst[0] = inet->inet_daddr;
+ if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
+ goto errout;
+
/* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
* hence this needs to be included regardless of socket family.
*/
@@ -617,7 +620,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
r->idiag_family = sk->sk_family;
r->idiag_state = TCP_SYN_RECV;
r->idiag_timer = 1;
- r->idiag_retrans = req->retrans;
+ r->idiag_retrans = req->num_retrans;
r->id.idiag_if = sk->sk_bound_dev_if;
sock_diag_save_cookie(req, r->id.idiag_cookie);
@@ -892,13 +895,16 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct inet_diag_req_v2 *r, struct nlattr *bc)
{
const struct inet_diag_handler *handler;
+ int err = 0;
handler = inet_diag_lock_handler(r->sdiag_protocol);
if (!IS_ERR(handler))
handler->dump(skb, cb, r, bc);
+ else
+ err = PTR_ERR(handler);
inet_diag_unlock_handler(handler);
- return skb->len;
+ return err ? : skb->len;
}
static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 448e68546827..1cf6a768cd53 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -802,6 +802,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
table[0].data = &net->ipv4.frags.high_thresh;
table[1].data = &net->ipv4.frags.low_thresh;
table[2].data = &net->ipv4.frags.timeout;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
}
hdr = register_net_sysctl(net, "net/ipv4", table);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7240f8e2dd45..a85ae2f7a21c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -164,21 +164,6 @@ struct ipgre_net {
#define tunnels_r tunnels[2]
#define tunnels_l tunnels[1]
#define tunnels_wc tunnels[0]
-/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-#define for_each_ip_tunnel_rcu(start) \
- for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
- u64 rx_packets;
- u64 rx_bytes;
- u64 tx_packets;
- u64 tx_bytes;
- struct u64_stats_sync syncp;
-};
static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
@@ -250,7 +235,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
ARPHRD_ETHER : ARPHRD_IPGRE;
int score, cand_score = 4;
- for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
if (local != t->parms.iph.saddr ||
remote != t->parms.iph.daddr ||
!(t->dev->flags & IFF_UP))
@@ -277,7 +262,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
}
}
- for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
if (remote != t->parms.iph.daddr ||
!(t->dev->flags & IFF_UP))
continue;
@@ -303,7 +288,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
}
}
- for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
if ((local != t->parms.iph.saddr &&
(local != t->parms.iph.daddr ||
!ipv4_is_multicast(local))) ||
@@ -331,7 +316,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
}
}
- for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
if (t->parms.i_key != key ||
!(t->dev->flags & IFF_UP))
continue;
@@ -753,7 +738,6 @@ drop:
static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct pcpu_tstats *tstats;
const struct iphdr *old_iph = ip_hdr(skb);
const struct iphdr *tiph;
struct flowi4 fl4;
@@ -977,9 +961,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
}
}
- nf_reset(skb);
- tstats = this_cpu_ptr(dev->tstats);
- __IPTUNNEL_XMIT(tstats, &dev->stats);
+ iptunnel_xmit(skb, dev);
return NETDEV_TX_OK;
#if IS_ENABLED(CONFIG_IPV6)
@@ -1082,7 +1064,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EFAULT;
@@ -1157,7 +1139,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCDELTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
if (dev == ign->fb_tunnel_dev) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 1dc01f9793d5..f6289bf6f332 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -409,7 +409,7 @@ int ip_options_compile(struct net *net,
optptr[2] += 8;
break;
default:
- if (!skb && !capable(CAP_NET_RAW)) {
+ if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) {
pp_ptr = optptr + 3;
goto error;
}
@@ -445,7 +445,7 @@ int ip_options_compile(struct net *net,
opt->router_alert = optptr - iph;
break;
case IPOPT_CIPSO:
- if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) {
+ if ((!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) || opt->cipso) {
pp_ptr = optptr;
goto error;
}
@@ -458,7 +458,7 @@ int ip_options_compile(struct net *net,
case IPOPT_SEC:
case IPOPT_SID:
default:
- if (!skb && !capable(CAP_NET_RAW)) {
+ if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) {
pp_ptr = optptr;
goto error;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5eea4a811042..3c9d20880283 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -457,19 +457,28 @@ static int do_ip_setsockopt(struct sock *sk, int level,
struct inet_sock *inet = inet_sk(sk);
int val = 0, err;
- if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
- (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
- (1<<IP_RETOPTS) | (1<<IP_TOS) |
- (1<<IP_TTL) | (1<<IP_HDRINCL) |
- (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
- (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
- (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
- (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
- optname == IP_UNICAST_IF ||
- optname == IP_MULTICAST_TTL ||
- optname == IP_MULTICAST_ALL ||
- optname == IP_MULTICAST_LOOP ||
- optname == IP_RECVORIGDSTADDR) {
+ switch (optname) {
+ case IP_PKTINFO:
+ case IP_RECVTTL:
+ case IP_RECVOPTS:
+ case IP_RECVTOS:
+ case IP_RETOPTS:
+ case IP_TOS:
+ case IP_TTL:
+ case IP_HDRINCL:
+ case IP_MTU_DISCOVER:
+ case IP_RECVERR:
+ case IP_ROUTER_ALERT:
+ case IP_FREEBIND:
+ case IP_PASSSEC:
+ case IP_TRANSPARENT:
+ case IP_MINTTL:
+ case IP_NODEFRAG:
+ case IP_UNICAST_IF:
+ case IP_MULTICAST_TTL:
+ case IP_MULTICAST_ALL:
+ case IP_MULTICAST_LOOP:
+ case IP_RECVORIGDSTADDR:
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
@@ -980,13 +989,14 @@ mc_msf_out:
case IP_IPSEC_POLICY:
case IP_XFRM_POLICY:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
break;
err = xfrm_user_policy(sk, optname, optval, optlen);
break;
case IP_TRANSPARENT:
- if (!!val && !capable(CAP_NET_RAW) && !capable(CAP_NET_ADMIN)) {
+ if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
err = -EPERM;
break;
}
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1831092f999f..c3a4233c0ac2 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -66,20 +66,6 @@ static void vti_tunnel_setup(struct net_device *dev);
static void vti_dev_free(struct net_device *dev);
static int vti_tunnel_bind_dev(struct net_device *dev);
-/* Locking : hash tables are protected by RCU and RTNL */
-
-#define for_each_ip_tunnel_rcu(start) \
- for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
- u64 rx_packets;
- u64 rx_bytes;
- u64 tx_packets;
- u64 tx_bytes;
- struct u64_stats_sync syncp;
-};
-
#define VTI_XMIT(stats1, stats2) do { \
int err; \
int pkt_len = skb->len; \
@@ -142,19 +128,19 @@ static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
struct ip_tunnel *t;
struct vti_net *ipn = net_generic(net, vti_net_id);
- for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
+ for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
return t;
- for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
+ for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
return t;
- for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
+ for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
return t;
- for_each_ip_tunnel_rcu(ipn->tunnels_wc[0])
+ for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
if (t && (t->dev->flags&IFF_UP))
return t;
return NULL;
@@ -338,12 +324,17 @@ static int vti_rcv(struct sk_buff *skb)
if (tunnel != NULL) {
struct pcpu_tstats *tstats;
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ return -1;
+
tstats = this_cpu_ptr(tunnel->dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
+ skb->mark = 0;
+ secpath_reset(skb);
skb->dev = tunnel->dev;
return 1;
}
@@ -497,7 +488,7 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EFAULT;
@@ -562,7 +553,7 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCDELTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
if (dev == ipn->fb_tunnel_dev) {
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 798358b10717..d763701cff1b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1500,8 +1500,10 @@ static int __init ip_auto_config(void)
* Clue in the operator.
*/
pr_info("IP-Config: Complete:\n");
- pr_info(" device=%s, addr=%pI4, mask=%pI4, gw=%pI4\n",
- ic_dev->name, &ic_myaddr, &ic_netmask, &ic_gateway);
+
+ pr_info(" device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n",
+ ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr,
+ &ic_myaddr, &ic_netmask, &ic_gateway);
pr_info(" host=%s, domain=%s, nis-domain=%s\n",
utsname()->nodename, ic_domain, utsname()->domainname);
pr_info(" bootserver=%pI4, rootserver=%pI4, rootpath=%s",
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e15b45297c09..191fc24a745a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -138,22 +138,7 @@ struct ipip_net {
static int ipip_tunnel_init(struct net_device *dev);
static void ipip_tunnel_setup(struct net_device *dev);
static void ipip_dev_free(struct net_device *dev);
-
-/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-#define for_each_ip_tunnel_rcu(start) \
- for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
- u64 rx_packets;
- u64 rx_bytes;
- u64 tx_packets;
- u64 tx_bytes;
- struct u64_stats_sync syncp;
-};
+static struct rtnl_link_ops ipip_link_ops __read_mostly;
static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
@@ -197,16 +182,16 @@ static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
struct ip_tunnel *t;
struct ipip_net *ipn = net_generic(net, ipip_net_id);
- for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
+ for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
return t;
- for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
+ for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
return t;
- for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
+ for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
return t;
@@ -264,6 +249,32 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
rcu_assign_pointer(*tp, t);
}
+static int ipip_tunnel_create(struct net_device *dev)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct ipip_net *ipn = net_generic(net, ipip_net_id);
+ int err;
+
+ err = ipip_tunnel_init(dev);
+ if (err < 0)
+ goto out;
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto out;
+
+ strcpy(t->parms.name, dev->name);
+ dev->rtnl_link_ops = &ipip_link_ops;
+
+ dev_hold(dev);
+ ipip_tunnel_link(ipn, t);
+ return 0;
+
+out:
+ return err;
+}
+
static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
struct ip_tunnel_parm *parms, int create)
{
@@ -298,16 +309,9 @@ static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
nt = netdev_priv(dev);
nt->parms = *parms;
- if (ipip_tunnel_init(dev) < 0)
+ if (ipip_tunnel_create(dev) < 0)
goto failed_free;
- if (register_netdevice(dev) < 0)
- goto failed_free;
-
- strcpy(nt->parms.name, dev->name);
-
- dev_hold(dev);
- ipip_tunnel_link(ipn, nt);
return nt;
failed_free:
@@ -463,7 +467,6 @@ drop:
static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct pcpu_tstats *tstats;
const struct iphdr *tiph = &tunnel->parms.iph;
u8 tos = tunnel->parms.iph.tos;
__be16 df = tiph->frag_off;
@@ -479,6 +482,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (skb->protocol != htons(ETH_P_IP))
goto tx_error;
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb))
+ goto tx_error;
+
if (tos & 1)
tos = old_iph->tos;
@@ -586,9 +593,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if ((iph->ttl = tiph->ttl) == 0)
iph->ttl = old_iph->ttl;
- nf_reset(skb);
- tstats = this_cpu_ptr(dev->tstats);
- __IPTUNNEL_XMIT(tstats, &dev->stats);
+ iptunnel_xmit(skb, dev);
return NETDEV_TX_OK;
tx_error_icmp:
@@ -635,6 +640,28 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
dev->iflink = tunnel->parms.link;
}
+static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
+{
+ struct net *net = dev_net(t->dev);
+ struct ipip_net *ipn = net_generic(net, ipip_net_id);
+
+ ipip_tunnel_unlink(ipn, t);
+ synchronize_net();
+ t->parms.iph.saddr = p->iph.saddr;
+ t->parms.iph.daddr = p->iph.daddr;
+ memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
+ memcpy(t->dev->broadcast, &p->iph.daddr, 4);
+ ipip_tunnel_link(ipn, t);
+ t->parms.iph.ttl = p->iph.ttl;
+ t->parms.iph.tos = p->iph.tos;
+ t->parms.iph.frag_off = p->iph.frag_off;
+ if (t->parms.link != p->link) {
+ t->parms.link = p->link;
+ ipip_tunnel_bind_dev(t->dev);
+ }
+ netdev_state_change(t->dev);
+}
+
static int
ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -664,7 +691,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EFAULT;
@@ -693,29 +720,13 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
t = netdev_priv(dev);
- ipip_tunnel_unlink(ipn, t);
- synchronize_net();
- t->parms.iph.saddr = p.iph.saddr;
- t->parms.iph.daddr = p.iph.daddr;
- memcpy(dev->dev_addr, &p.iph.saddr, 4);
- memcpy(dev->broadcast, &p.iph.daddr, 4);
- ipip_tunnel_link(ipn, t);
- netdev_state_change(dev);
}
+
+ ipip_tunnel_update(t, &p);
}
if (t) {
err = 0;
- if (cmd == SIOCCHGTUNNEL) {
- t->parms.iph.ttl = p.iph.ttl;
- t->parms.iph.tos = p.iph.tos;
- t->parms.iph.frag_off = p.iph.frag_off;
- if (t->parms.link != p.link) {
- t->parms.link = p.link;
- ipip_tunnel_bind_dev(dev);
- netdev_state_change(dev);
- }
- }
if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
err = -EFAULT;
} else
@@ -724,7 +735,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCDELTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
if (dev == ipn->fb_tunnel_dev) {
@@ -773,6 +784,11 @@ static void ipip_dev_free(struct net_device *dev)
free_netdev(dev);
}
+#define IPIP_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_HW_CSUM)
+
static void ipip_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipip_netdev_ops;
@@ -787,6 +803,9 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
dev->features |= NETIF_F_LLTX;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+
+ dev->features |= IPIP_FEATURES;
+ dev->hw_features |= IPIP_FEATURES;
}
static int ipip_tunnel_init(struct net_device *dev)
@@ -829,6 +848,142 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
return 0;
}
+static void ipip_netlink_parms(struct nlattr *data[],
+ struct ip_tunnel_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ parms->iph.version = 4;
+ parms->iph.protocol = IPPROTO_IPIP;
+ parms->iph.ihl = 5;
+
+ if (!data)
+ return;
+
+ if (data[IFLA_IPTUN_LINK])
+ parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
+
+ if (data[IFLA_IPTUN_LOCAL])
+ parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
+
+ if (data[IFLA_IPTUN_REMOTE])
+ parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
+
+ if (data[IFLA_IPTUN_TTL]) {
+ parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
+ if (parms->iph.ttl)
+ parms->iph.frag_off = htons(IP_DF);
+ }
+
+ if (data[IFLA_IPTUN_TOS])
+ parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
+
+ if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
+ parms->iph.frag_off = htons(IP_DF);
+}
+
+static int ipip_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net *net = dev_net(dev);
+ struct ip_tunnel *nt;
+
+ nt = netdev_priv(dev);
+ ipip_netlink_parms(data, &nt->parms);
+
+ if (ipip_tunnel_locate(net, &nt->parms, 0))
+ return -EEXIST;
+
+ return ipip_tunnel_create(dev);
+}
+
+static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip_tunnel *t;
+ struct ip_tunnel_parm p;
+ struct net *net = dev_net(dev);
+ struct ipip_net *ipn = net_generic(net, ipip_net_id);
+
+ if (dev == ipn->fb_tunnel_dev)
+ return -EINVAL;
+
+ ipip_netlink_parms(data, &p);
+
+ if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
+ (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
+ return -EINVAL;
+
+ t = ipip_tunnel_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else
+ t = netdev_priv(dev);
+
+ ipip_tunnel_update(t, &p);
+ return 0;
+}
+
+static size_t ipip_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_IPTUN_LINK */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_LOCAL */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_REMOTE */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_TTL */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_TOS */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_PMTUDISC */
+ nla_total_size(1) +
+ 0;
+}
+
+static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_parm *parm = &tunnel->parms;
+
+ if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
+ nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
+ nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
+ nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
+ nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+ nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
+ !!(parm->iph.frag_off & htons(IP_DF))))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
+ [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
+ [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
+ [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
+ [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
+ [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
+ [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
+};
+
+static struct rtnl_link_ops ipip_link_ops __read_mostly = {
+ .kind = "ipip",
+ .maxtype = IFLA_IPTUN_MAX,
+ .policy = ipip_policy,
+ .priv_size = sizeof(struct ip_tunnel),
+ .setup = ipip_tunnel_setup,
+ .newlink = ipip_newlink,
+ .changelink = ipip_changelink,
+ .get_size = ipip_get_size,
+ .fill_info = ipip_fill_info,
+};
+
static struct xfrm_tunnel ipip_handler __read_mostly = {
.handler = ipip_rcv,
.err_handler = ipip_err,
@@ -925,14 +1080,26 @@ static int __init ipip_init(void)
return err;
err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
if (err < 0) {
- unregister_pernet_device(&ipip_net_ops);
pr_info("%s: can't register tunnel\n", __func__);
+ goto xfrm_tunnel_failed;
}
+ err = rtnl_link_register(&ipip_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
+
+out:
return err;
+
+rtnl_link_failed:
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+xfrm_tunnel_failed:
+ unregister_pernet_device(&ipip_net_ops);
+ goto out;
}
static void __exit ipip_fini(void)
{
+ rtnl_link_unregister(&ipip_link_ops);
if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
pr_info("%s: can't deregister tunnel\n", __func__);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 6168c4dc58b1..fc09ef936636 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -83,8 +83,8 @@ struct mr_table {
struct vif_device vif_table[MAXVIFS];
int maxvif;
atomic_t cache_resolve_queue_len;
- int mroute_do_assert;
- int mroute_do_pim;
+ bool mroute_do_assert;
+ bool mroute_do_pim;
#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
int mroute_reg_vif_num;
#endif
@@ -1207,23 +1207,24 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
struct net *net = sock_net(sk);
struct mr_table *mrt;
+ if (sk->sk_type != SOCK_RAW ||
+ inet_sk(sk)->inet_num != IPPROTO_IGMP)
+ return -EOPNOTSUPP;
+
mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
if (mrt == NULL)
return -ENOENT;
if (optname != MRT_INIT) {
if (sk != rcu_access_pointer(mrt->mroute_sk) &&
- !capable(CAP_NET_ADMIN))
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EACCES;
}
switch (optname) {
case MRT_INIT:
- if (sk->sk_type != SOCK_RAW ||
- inet_sk(sk)->inet_num != IPPROTO_IGMP)
- return -EOPNOTSUPP;
if (optlen != sizeof(int))
- return -ENOPROTOOPT;
+ return -EINVAL;
rtnl_lock();
if (rtnl_dereference(mrt->mroute_sk)) {
@@ -1284,9 +1285,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
case MRT_ASSERT:
{
int v;
+ if (optlen != sizeof(v))
+ return -EINVAL;
if (get_user(v, (int __user *)optval))
return -EFAULT;
- mrt->mroute_do_assert = (v) ? 1 : 0;
+ mrt->mroute_do_assert = v;
return 0;
}
#ifdef CONFIG_IP_PIMSM
@@ -1294,9 +1297,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
{
int v;
+ if (optlen != sizeof(v))
+ return -EINVAL;
if (get_user(v, (int __user *)optval))
return -EFAULT;
- v = (v) ? 1 : 0;
+ v = !!v;
rtnl_lock();
ret = 0;
@@ -1325,7 +1330,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
} else {
if (!ipmr_new_table(net, v))
ret = -ENOMEM;
- raw_sk(sk)->ipmr_table = v;
+ else
+ raw_sk(sk)->ipmr_table = v;
}
rtnl_unlock();
return ret;
@@ -1351,6 +1357,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
struct net *net = sock_net(sk);
struct mr_table *mrt;
+ if (sk->sk_type != SOCK_RAW ||
+ inet_sk(sk)->inet_num != IPPROTO_IGMP)
+ return -EOPNOTSUPP;
+
mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
if (mrt == NULL)
return -ENOENT;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 97e61eadf580..3ea4127404d6 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1533,7 +1533,7 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1677,7 +1677,7 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1698,7 +1698,7 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1722,7 +1722,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 170b1fdd6b72..17c5e06da662 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1846,7 +1846,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1961,7 +1961,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1983,7 +1983,7 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -2008,7 +2008,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 9e0ffaf1d942..ac635a7b4416 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -184,7 +184,8 @@ nf_nat_ipv4_out(unsigned int hooknum,
if ((ct->tuplehash[dir].tuple.src.u3.ip !=
ct->tuplehash[!dir].tuple.dst.u3.ip) ||
- (ct->tuplehash[dir].tuple.src.u.all !=
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.src.u.all !=
ct->tuplehash[!dir].tuple.dst.u.all))
if (nf_xfrm_me_harder(skb, AF_INET) < 0)
ret = NF_DROP;
@@ -221,6 +222,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
}
#ifdef CONFIG_XFRM
else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all)
if (nf_xfrm_me_harder(skb, AF_INET) < 0)
@@ -274,9 +276,7 @@ static int __net_init iptable_nat_net_init(struct net *net)
return -ENOMEM;
net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
kfree(repl);
- if (IS_ERR(net->ipv4.nat_table))
- return PTR_ERR(net->ipv4.nat_table);
- return 0;
+ return PTR_RET(net->ipv4.nat_table);
}
static void __net_exit iptable_nat_net_exit(struct net *net)
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 8918eff1426d..0f9d09f54bd9 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -29,6 +29,7 @@
#include <net/protocol.h>
const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
+const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
/*
* Add a protocol handler to the hash tables
@@ -41,6 +42,13 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
}
EXPORT_SYMBOL(inet_add_protocol);
+int inet_add_offload(const struct net_offload *prot, unsigned char protocol)
+{
+ return !cmpxchg((const struct net_offload **)&inet_offloads[protocol],
+ NULL, prot) ? 0 : -1;
+}
+EXPORT_SYMBOL(inet_add_offload);
+
/*
* Remove a protocol from the hash tables.
*/
@@ -57,3 +65,16 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
return ret;
}
EXPORT_SYMBOL(inet_del_protocol);
+
+int inet_del_offload(const struct net_offload *prot, unsigned char protocol)
+{
+ int ret;
+
+ ret = (cmpxchg((const struct net_offload **)&inet_offloads[protocol],
+ prot, NULL) == prot) ? 0 : -1;
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(inet_del_offload);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 432f4bb77238..baa9b289d7ab 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1163,8 +1163,12 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
spin_lock_bh(&fnhe_lock);
if (daddr == fnhe->fnhe_daddr) {
- struct rtable *orig;
-
+ struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
+ if (orig && rt_is_expired(orig)) {
+ fnhe->fnhe_gw = 0;
+ fnhe->fnhe_pmtu = 0;
+ fnhe->fnhe_expires = 0;
+ }
if (fnhe->fnhe_pmtu) {
unsigned long expires = fnhe->fnhe_expires;
unsigned long diff = expires - jiffies;
@@ -1181,7 +1185,6 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
} else if (!rt->rt_gateway)
rt->rt_gateway = daddr;
- orig = rcu_dereference(fnhe->fnhe_rth);
rcu_assign_pointer(fnhe->fnhe_rth, rt);
if (orig)
rt_free(orig);
@@ -1782,6 +1785,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
if (dev_out->flags & IFF_LOOPBACK)
flags |= RTCF_LOCAL;
+ do_cache = true;
if (type == RTN_BROADCAST) {
flags |= RTCF_BROADCAST | RTCF_LOCAL;
fi = NULL;
@@ -1790,6 +1794,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
fl4->flowi4_proto))
flags &= ~RTCF_LOCAL;
+ else
+ do_cache = false;
/* If multicast route do not exist use
* default one, but do not gateway in this case.
* Yes, it is hack.
@@ -1799,8 +1805,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
}
fnhe = NULL;
- do_cache = fi != NULL;
- if (fi) {
+ do_cache &= fi != NULL;
+ if (do_cache) {
struct rtable __rcu **prth;
struct fib_nh *nh = &FIB_RES_NH(*res);
@@ -2490,6 +2496,10 @@ static __net_init int sysctl_route_net_init(struct net *net)
tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
if (tbl == NULL)
goto err_dup;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ tbl[0].procname = NULL;
}
tbl[0].extra1 = net;
@@ -2594,7 +2604,7 @@ int __init ip_rt_init(void)
pr_err("Unable to create route proc files\n");
#ifdef CONFIG_XFRM
xfrm_init();
- xfrm4_init(ip_rt_max_size);
+ xfrm4_init();
#endif
rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index ba48e799b031..b236ef04914f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -340,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
}
req->expires = 0UL;
- req->retrans = 0;
+ req->num_retrans = 0;
/*
* We need to lookup the route here to get at the correct
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 63d4eccc674d..d84400b65049 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -883,6 +883,9 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
table[6].data =
&net->ipv4.sysctl_ping_group_range;
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
}
/*
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f32c02e2a543..e6eace1c2bdb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -536,30 +536,29 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
struct tcp_sock *tp = tcp_sk(sk);
int answ;
+ bool slow;
switch (cmd) {
case SIOCINQ:
if (sk->sk_state == TCP_LISTEN)
return -EINVAL;
- lock_sock(sk);
+ slow = lock_sock_fast(sk);
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
answ = 0;
else if (sock_flag(sk, SOCK_URGINLINE) ||
!tp->urg_data ||
before(tp->urg_seq, tp->copied_seq) ||
!before(tp->urg_seq, tp->rcv_nxt)) {
- struct sk_buff *skb;
answ = tp->rcv_nxt - tp->copied_seq;
- /* Subtract 1, if FIN is in queue. */
- skb = skb_peek_tail(&sk->sk_receive_queue);
- if (answ && skb)
- answ -= tcp_hdr(skb)->fin;
+ /* Subtract 1, if FIN was received */
+ if (answ && sock_flag(sk, SOCK_DONE))
+ answ--;
} else
answ = tp->urg_seq - tp->copied_seq;
- release_sock(sk);
+ unlock_sock_fast(sk, slow);
break;
case SIOCATMARK:
answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
@@ -1214,7 +1213,7 @@ new_segment:
wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
- if (copied && likely(!tp->repair))
+ if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
@@ -1225,7 +1224,7 @@ wait_for_memory:
}
out:
- if (copied && likely(!tp->repair))
+ if (copied)
tcp_push(sk, flags, mss_now, tp->nonagle);
release_sock(sk);
return copied + copied_syn;
@@ -2305,7 +2304,7 @@ void tcp_sock_destruct(struct sock *sk)
static inline bool tcp_can_repair_sock(const struct sock *sk)
{
- return capable(CAP_NET_ADMIN) &&
+ return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
}
@@ -2766,6 +2765,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
info->tcpi_options |= TCPI_OPT_ECN;
if (tp->ecn_flags & TCP_ECN_SEEN)
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
+ if (tp->syn_data_acked)
+ info->tcpi_options |= TCPI_OPT_SYN_DATA;
info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 1432cdb0644c..baf28611b334 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -259,7 +259,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
if (!ca)
err = -ENOENT;
- else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
+ else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
+ ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
err = -EPERM;
else if (!try_module_get(ca->owner))
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 813b43a76fec..834857f3c871 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -313,11 +313,13 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
.tcpv_rttcnt = ca->cnt_rtt,
.tcpv_minrtt = ca->base_rtt,
};
- u64 t = ca->sum_rtt;
- do_div(t, ca->cnt_rtt);
- info.tcpv_rtt = t;
+ if (info.tcpv_rttcnt > 0) {
+ u64 t = ca->sum_rtt;
+ do_div(t, info.tcpv_rttcnt);
+ info.tcpv_rtt = t;
+ }
nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
}
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 432c36649db3..fc67831656e5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3552,6 +3552,24 @@ static bool tcp_process_frto(struct sock *sk, int flag)
return false;
}
+/* RFC 5961 7 [ACK Throttling] */
+static void tcp_send_challenge_ack(struct sock *sk)
+{
+ /* unprotected vars, we dont care of overwrites */
+ static u32 challenge_timestamp;
+ static unsigned int challenge_count;
+ u32 now = jiffies / HZ;
+
+ if (now != challenge_timestamp) {
+ challenge_timestamp = now;
+ challenge_count = 0;
+ }
+ if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+ tcp_send_ack(sk);
+ }
+}
+
/* This routine deals with incoming acks, but not outgoing ones. */
static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
@@ -3571,8 +3589,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* If the ack is older than previous acks
* then we can probably ignore it.
*/
- if (before(ack, prior_snd_una))
+ if (before(ack, prior_snd_una)) {
+ /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
+ if (before(ack, prior_snd_una - tp->max_window)) {
+ tcp_send_challenge_ack(sk);
+ return -1;
+ }
goto old_ack;
+ }
/* If the ack includes data we haven't sent yet, discard
* this segment (RFC793 Section 3.9).
@@ -4529,6 +4553,9 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
struct tcphdr *th;
bool fragstolen;
+ if (size == 0)
+ return 0;
+
skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
if (!skb)
goto err;
@@ -5241,23 +5268,6 @@ out:
}
#endif /* CONFIG_NET_DMA */
-static void tcp_send_challenge_ack(struct sock *sk)
-{
- /* unprotected vars, we dont care of overwrites */
- static u32 challenge_timestamp;
- static unsigned int challenge_count;
- u32 now = jiffies / HZ;
-
- if (now != challenge_timestamp) {
- challenge_timestamp = now;
- challenge_count = 0;
- }
- if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
- tcp_send_ack(sk);
- }
-}
-
/* Does PAWS and seqno based validation of an incoming segment, flags will
* play significant role here.
*/
@@ -5310,11 +5320,6 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
goto discard;
}
- /* ts_recent update must be made after we are sure that the packet
- * is in window.
- */
- tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
-
/* step 3: check security and precedence [ignored] */
/* step 4: Check for a SYN
@@ -5549,6 +5554,11 @@ step5:
if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
goto discard;
+ /* ts_recent update must be made after we are sure that the packet
+ * is in window.
+ */
+ tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
+
tcp_rcv_rtt_measure_ts(sk, skb);
/* Process urgent data. */
@@ -5646,6 +5656,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tcp_rearm_rto(sk);
return true;
}
+ tp->syn_data_acked = tp->syn_data;
return false;
}
@@ -5963,7 +5974,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
req = tp->fastopen_rsk;
if (req != NULL) {
- BUG_ON(sk->sk_state != TCP_SYN_RECV &&
+ WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
@@ -5984,7 +5995,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
*/
if (req) {
tcp_synack_rtt_meas(sk, req);
- tp->total_retrans = req->retrans;
+ tp->total_retrans = req->num_retrans;
reqsk_fastopen_remove(sk, req, false);
} else {
@@ -6052,7 +6063,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* ACK we have received, this would have acknowledged
* our SYNACK so stop the SYNACK timer.
*/
- if (acceptable && req != NULL) {
+ if (req != NULL) {
+ /* Return RST if ack_seq is invalid.
+ * Note that RFC793 only says to generate a
+ * DUPACK for it but for TCP Fast Open it seems
+ * better to treat this case like TCP_SYN_RECV
+ * above.
+ */
+ if (!acceptable)
+ return 1;
/* We no longer need the request sock. */
reqsk_fastopen_remove(sk, req, false);
tcp_rearm_rto(sk);
@@ -6118,6 +6137,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
} else
goto discard;
+ /* ts_recent update must be made after we are sure that the packet
+ * is in window.
+ */
+ tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
+
/* step 6: check the URG bit */
tcp_urg(sk, skb, th);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ef998b008a57..1ed230716d51 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -138,14 +138,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
}
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
-static int tcp_repair_connect(struct sock *sk)
-{
- tcp_connect_init(sk);
- tcp_finish_connect(sk, NULL);
-
- return 0;
-}
-
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
@@ -250,10 +242,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_id = tp->write_seq ^ jiffies;
- if (likely(!tp->repair))
- err = tcp_connect(sk);
- else
- err = tcp_repair_connect(sk);
+ err = tcp_connect(sk);
rt = NULL;
if (err)
@@ -877,10 +866,13 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
}
static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
- struct request_values *rvp)
+ struct request_values *rvp)
{
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
+ int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
+
+ if (!res)
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ return res;
}
/*
@@ -1070,7 +1062,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
}
EXPORT_SYMBOL(tcp_md5_do_del);
-void tcp_clear_md5_list(struct sock *sk)
+static void tcp_clear_md5_list(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
@@ -1386,7 +1378,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
struct sock *child;
int err;
- req->retrans = 0;
+ req->num_retrans = 0;
+ req->num_timeout = 0;
req->sk = NULL;
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
@@ -1461,6 +1454,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
skb_set_owner_r(skb, child);
__skb_queue_tail(&child->sk_receive_queue, skb);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tp->syn_data_acked = 1;
}
sk->sk_data_ready(sk, 0);
bh_unlock_sock(child);
@@ -1740,7 +1734,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
tcp_initialize_rcv_mss(newsk);
tcp_synack_rtt_meas(newsk, req);
- newtp->total_retrans = req->retrans;
+ newtp->total_retrans = req->num_retrans;
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
@@ -1918,7 +1912,6 @@ EXPORT_SYMBOL(tcp_v4_do_rcv);
void tcp_v4_early_demux(struct sk_buff *skb)
{
- struct net *net = dev_net(skb->dev);
const struct iphdr *iph;
const struct tcphdr *th;
struct sock *sk;
@@ -1926,16 +1919,16 @@ void tcp_v4_early_demux(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
return;
- if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
return;
iph = ip_hdr(skb);
- th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
+ th = tcp_hdr(skb);
if (th->doff < sizeof(struct tcphdr) / 4)
return;
- sk = __inet_lookup_established(net, &tcp_hashinfo,
+ sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
iph->saddr, th->source,
iph->daddr, ntohs(th->dest),
skb->skb_iif);
@@ -2639,7 +2632,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
0, 0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
jiffies_delta_to_clock_t(delta),
- req->retrans,
+ req->num_timeout,
from_kuid_munged(seq_user_ns(f), uid),
0, /* non standard timer */
0, /* open_requests have no inode */
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 4c752a6e0bcd..f696d7c2e9fa 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1,7 +1,6 @@
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/jiffies.h>
-#include <linux/bootmem.h>
#include <linux/module.h>
#include <linux/cache.h>
#include <linux/slab.h>
@@ -9,6 +8,7 @@
#include <linux/tcp.h>
#include <linux/hash.h>
#include <linux/tcp_metrics.h>
+#include <linux/vmalloc.h>
#include <net/inet_connection_sock.h>
#include <net/net_namespace.h>
@@ -864,7 +864,7 @@ static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
}
a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6];
if (a) {
- if (nla_len(a) != sizeof(sizeof(struct in6_addr)))
+ if (nla_len(a) != sizeof(struct in6_addr))
return -EINVAL;
addr->family = AF_INET6;
memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6));
@@ -1034,7 +1034,10 @@ static int __net_init tcp_net_metrics_init(struct net *net)
net->ipv4.tcp_metrics_hash_log = order_base_2(slots);
size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log;
- net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL);
+ net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
+ if (!net->ipv4.tcp_metrics_hash)
+ net->ipv4.tcp_metrics_hash = vzalloc(size);
+
if (!net->ipv4.tcp_metrics_hash)
return -ENOMEM;
@@ -1055,7 +1058,10 @@ static void __net_exit tcp_net_metrics_exit(struct net *net)
tm = next;
}
}
- kfree(net->ipv4.tcp_metrics_hash);
+ if (is_vmalloc_addr(net->ipv4.tcp_metrics_hash))
+ vfree(net->ipv4.tcp_metrics_hash);
+ else
+ kfree(net->ipv4.tcp_metrics_hash);
}
static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 27536ba16c9d..f35f2dfb6401 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -510,6 +510,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rx_opt.mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req);
newtp->fastopen_rsk = NULL;
+ newtp->syn_data_acked = 0;
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
@@ -552,7 +553,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* it can be estimated (approximately)
* from another data.
*/
- tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
+ tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
}
}
@@ -581,7 +582,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* Note that even if there is new data in the SYN packet
* they will be thrown away too.
*/
- req->rsk_ops->rtx_syn_ack(sk, req, NULL);
+ inet_rtx_syn_ack(sk, req);
return NULL;
}
@@ -695,7 +696,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */
if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
- else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
+ else if (req->num_retrans) /* don't take RTT sample if retrans && ~TS */
tcp_rsk(req)->snt_synack = 0;
/* For Fast Open no more processing is needed (sk is the
@@ -705,7 +706,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
return sk;
/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
- if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+ if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
inet_rsk(req)->acked = 1;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cfe6ffe1c177..8ac085573217 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1986,6 +1986,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
+ if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
+ goto repair; /* Skip network transmission */
+
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota)
break;
@@ -2026,6 +2029,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
break;
+repair:
/* Advance the send_head. This one is sent out.
* This call will increment packets_out.
*/
@@ -2983,6 +2987,11 @@ int tcp_connect(struct sock *sk)
tcp_connect_init(sk);
+ if (unlikely(tp->repair)) {
+ tcp_finish_connect(sk, NULL);
+ return 0;
+ }
+
buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
if (unlikely(buff == NULL))
return -ENOBUFS;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index fc04711e80c8..b78aac30c498 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -318,7 +318,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
req = tcp_sk(sk)->fastopen_rsk;
req->rsk_ops->syn_ack_timeout(sk, req);
- if (req->retrans >= max_retries) {
+ if (req->num_timeout >= max_retries) {
tcp_write_err(sk);
return;
}
@@ -327,10 +327,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
* regular retransmit because if the child socket has been accepted
* it's not good to give up too easily.
*/
- req->rsk_ops->rtx_syn_ack(sk, req, NULL);
- req->retrans++;
+ inet_rtx_syn_ack(sk, req);
+ req->num_timeout++;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX);
+ TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
}
/*
@@ -347,8 +347,8 @@ void tcp_retransmit_timer(struct sock *sk)
return;
}
if (tp->fastopen_rsk) {
- BUG_ON(sk->sk_state != TCP_SYN_RECV &&
- sk->sk_state != TCP_FIN_WAIT1);
+ WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
+ sk->sk_state != TCP_FIN_WAIT1);
tcp_fastopen_synack_timer(sk);
/* Before we receive ACK to our SYN-ACK don't retransmit
* anything else (e.g., data or FIN segments).
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 05c5ab8d983c..3be0ac2c1920 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -279,19 +279,8 @@ static void __exit xfrm4_policy_fini(void)
xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
}
-void __init xfrm4_init(int rt_max_size)
+void __init xfrm4_init(void)
{
- /*
- * Select a default value for the gc_thresh based on the main route
- * table hash size. It seems to me the worst case scenario is when
- * we have ipsec operating in transport mode, in which we create a
- * dst_entry per socket. The xfrm gc algorithm starts trying to remove
- * entries at gc_thresh, and prevents new allocations as 2*gc_thresh
- * so lets set an initial xfrm gc_thresh value at the rt_max_size/2.
- * That will let us store an ipsec connection per route table entry,
- * and start cleaning when were 1/2 full
- */
- xfrm4_dst_ops.gc_thresh = rt_max_size/2;
dst_entries_init(&xfrm4_dst_ops);
xfrm4_state_init();
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index b6d3f79151e2..2068ac4fbdad 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,9 +7,11 @@ obj-$(CONFIG_IPV6) += ipv6.o
ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
addrlabel.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
- raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
+ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
+
ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
@@ -39,5 +41,6 @@ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
obj-y += addrconf_core.o exthdrs_core.o
+obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6_offload)
obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d7c56f8a5b4e..fc0e13ad6337 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
#include <net/pkt_sched.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
+#include <linux/netconf.h>
#ifdef CONFIG_IPV6_PRIVACY
#include <linux/random.h>
@@ -401,7 +402,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
ndev->cnf.accept_dad = -1;
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
pr_info("%s: Disabled Multicast RS\n", dev->name);
ndev->cnf.rtr_solicits = 0;
@@ -460,6 +461,141 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
return idev;
}
+static int inet6_netconf_msgsize_devconf(int type)
+{
+ int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
+ + nla_total_size(4); /* NETCONFA_IFINDEX */
+
+ /* type -1 is used for ALL */
+ if (type == -1 || type == NETCONFA_FORWARDING)
+ size += nla_total_size(4);
+
+ return size;
+}
+
+static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
+ struct ipv6_devconf *devconf, u32 portid,
+ u32 seq, int event, unsigned int flags,
+ int type)
+{
+ struct nlmsghdr *nlh;
+ struct netconfmsg *ncm;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
+ flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ ncm = nlmsg_data(nlh);
+ ncm->ncm_family = AF_INET6;
+
+ if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
+ goto nla_put_failure;
+
+ /* type -1 is used for ALL */
+ if ((type == -1 || type == NETCONFA_FORWARDING) &&
+ nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
+ struct ipv6_devconf *devconf)
+{
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
+ RTM_NEWNETCONF, 0, type);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
+}
+
+static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
+ [NETCONFA_IFINDEX] = { .len = sizeof(int) },
+ [NETCONFA_FORWARDING] = { .len = sizeof(int) },
+};
+
+static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
+ struct nlmsghdr *nlh,
+ void *arg)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct nlattr *tb[NETCONFA_MAX+1];
+ struct netconfmsg *ncm;
+ struct sk_buff *skb;
+ struct ipv6_devconf *devconf;
+ struct inet6_dev *in6_dev;
+ struct net_device *dev;
+ int ifindex;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
+ devconf_ipv6_policy);
+ if (err < 0)
+ goto errout;
+
+ err = EINVAL;
+ if (!tb[NETCONFA_IFINDEX])
+ goto errout;
+
+ ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
+ switch (ifindex) {
+ case NETCONFA_IFINDEX_ALL:
+ devconf = net->ipv6.devconf_all;
+ break;
+ case NETCONFA_IFINDEX_DEFAULT:
+ devconf = net->ipv6.devconf_dflt;
+ break;
+ default:
+ dev = __dev_get_by_index(net, ifindex);
+ if (dev == NULL)
+ goto errout;
+ in6_dev = __in6_dev_get(dev);
+ if (in6_dev == NULL)
+ goto errout;
+ devconf = &in6_dev->cnf;
+ break;
+ }
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_netconf_fill_devconf(skb, ifindex, devconf,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
+ -1);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+errout:
+ return err;
+}
+
#ifdef CONFIG_SYSCTL
static void dev_forward_change(struct inet6_dev *idev)
{
@@ -471,7 +607,7 @@ static void dev_forward_change(struct inet6_dev *idev)
dev = idev->dev;
if (idev->cnf.forwarding)
dev_disable_lro(dev);
- if (dev && (dev->flags & IFF_MULTICAST)) {
+ if (dev->flags & IFF_MULTICAST) {
if (idev->cnf.forwarding)
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
else
@@ -486,6 +622,8 @@ static void dev_forward_change(struct inet6_dev *idev)
else
addrconf_leave_anycast(ifa);
}
+ inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING,
+ dev->ifindex, &idev->cnf);
}
@@ -518,6 +656,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
*p = newf;
if (p == &net->ipv6.devconf_dflt->forwarding) {
+ if ((!newf) ^ (!old))
+ inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt);
rtnl_unlock();
return 0;
}
@@ -525,6 +667,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
if (p == &net->ipv6.devconf_all->forwarding) {
net->ipv6.devconf_dflt->forwarding = newf;
addrconf_forward_change(net, newf);
+ if ((!newf) ^ (!old))
+ inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
} else if ((!newf) ^ (!old))
dev_forward_change((struct inet6_dev *)table->extra1);
rtnl_unlock();
@@ -553,7 +699,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
pr_warn("Freeing alive inet6 address %p\n", ifp);
return;
}
- dst_release(&ifp->rt->dst);
+ ip6_rt_put(ifp->rt);
kfree_rcu(ifp, rcu);
}
@@ -805,7 +951,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
rt6_set_expires(rt, expires);
}
}
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
/* clean up prefsrc entries */
@@ -1692,7 +1838,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
This thing is done here expecting that the whole
class of non-broadcast devices need not cloning.
*/
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
cfg.fc_flags |= RTF_NONEXTHOP;
#endif
@@ -1752,7 +1898,7 @@ static void addrconf_add_mroute(struct net_device *dev)
ip6_route_add(&cfg);
}
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
static void sit_route_add(struct net_device *dev)
{
struct fib6_config cfg = {
@@ -1881,8 +2027,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
dev, expires, flags);
}
- if (rt)
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
/* Try to figure out our local address for this prefix */
@@ -2104,7 +2249,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
if (dev == NULL)
goto err_exit;
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
if (dev->type == ARPHRD_SIT) {
const struct net_device_ops *ops = dev->netdev_ops;
struct ifreq ifr;
@@ -2268,7 +2413,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
struct in6_ifreq ireq;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -2287,7 +2432,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
struct in6_ifreq ireq;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -2315,7 +2460,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
}
}
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
static void sit_add_v4_addrs(struct inet6_dev *idev)
{
struct in6_addr addr;
@@ -2434,7 +2579,7 @@ static void addrconf_dev_config(struct net_device *dev)
addrconf_add_linklocal(idev, &addr);
}
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
static void addrconf_sit_config(struct net_device *dev)
{
struct inet6_dev *idev;
@@ -2471,7 +2616,7 @@ static void addrconf_sit_config(struct net_device *dev)
}
#endif
-#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE)
+#if IS_ENABLED(CONFIG_NET_IPGRE)
static void addrconf_gre_config(struct net_device *dev)
{
struct inet6_dev *idev;
@@ -2601,12 +2746,12 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
}
switch (dev->type) {
-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_SIT)
case ARPHRD_SIT:
addrconf_sit_config(dev);
break;
#endif
-#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE)
+#if IS_ENABLED(CONFIG_NET_IPGRE)
case ARPHRD_IPGRE:
addrconf_gre_config(dev);
break;
@@ -3064,14 +3209,15 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
struct hlist_node *n;
hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
addr_lst) {
+ if (!net_eq(dev_net(ifa->idev->dev), net))
+ continue;
/* sync with offset */
if (p < state->offset) {
p++;
continue;
}
state->offset++;
- if (net_eq(dev_net(ifa->idev->dev), net))
- return ifa;
+ return ifa;
}
/* prepare for next bucket */
@@ -3089,18 +3235,20 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
struct hlist_node *n = &ifa->addr_lst;
hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) {
+ if (!net_eq(dev_net(ifa->idev->dev), net))
+ continue;
state->offset++;
- if (net_eq(dev_net(ifa->idev->dev), net))
- return ifa;
+ return ifa;
}
while (++state->bucket < IN6_ADDR_HSIZE) {
state->offset = 0;
hlist_for_each_entry_rcu_bh(ifa, n,
&inet6_addr_lst[state->bucket], addr_lst) {
+ if (!net_eq(dev_net(ifa->idev->dev), net))
+ continue;
state->offset++;
- if (net_eq(dev_net(ifa->idev->dev), net))
- return ifa;
+ return ifa;
}
}
@@ -3191,7 +3339,7 @@ void if6_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
/* Check if address is a home address configured on any interface. */
int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
{
@@ -3889,6 +4037,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
+ array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
}
static inline size_t inet6_ifla6_size(void)
@@ -4557,6 +4706,13 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec
},
{
+ .procname = "ndisc_notify",
+ .data = &ipv6_devconf.ndisc_notify,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
/* sentinel */
}
},
@@ -4781,6 +4937,8 @@ int __init addrconf_init(void)
inet6_dump_ifmcaddr, NULL);
__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
inet6_dump_ifacaddr, NULL);
+ __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
+ NULL, NULL);
ipv6_addr_label_rtnl_register();
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a974247a9ae4..b043c60429bd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -160,7 +160,8 @@ lookup_protocol:
}
err = -EPERM;
- if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+ if (sock->type == SOCK_RAW && !kern &&
+ !ns_capable(net->user_ns, CAP_NET_RAW))
goto out_rcu_unlock;
sock->ops = answer->ops;
@@ -282,7 +283,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return -EINVAL;
snum = ntohs(addr->sin6_port);
- if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
lock_sock(sk);
@@ -699,249 +700,9 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
-static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
-{
- const struct inet6_protocol *ops = NULL;
-
- for (;;) {
- struct ipv6_opt_hdr *opth;
- int len;
-
- if (proto != NEXTHDR_HOP) {
- ops = rcu_dereference(inet6_protos[proto]);
-
- if (unlikely(!ops))
- break;
-
- if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
-
- if (unlikely(!pskb_may_pull(skb, 8)))
- break;
-
- opth = (void *)skb->data;
- len = ipv6_optlen(opth);
-
- if (unlikely(!pskb_may_pull(skb, len)))
- break;
-
- proto = opth->nexthdr;
- __skb_pull(skb, len);
- }
-
- return proto;
-}
-
-static int ipv6_gso_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- const struct inet6_protocol *ops;
- int err = -EINVAL;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- goto out;
-
- ipv6h = ipv6_hdr(skb);
- __skb_pull(skb, sizeof(*ipv6h));
- err = -EPROTONOSUPPORT;
-
- rcu_read_lock();
- ops = rcu_dereference(inet6_protos[
- ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
-
- if (likely(ops && ops->gso_send_check)) {
- skb_reset_transport_header(skb);
- err = ops->gso_send_check(skb);
- }
- rcu_read_unlock();
-
-out:
- return err;
-}
-
-static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
- netdev_features_t features)
-{
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- struct ipv6hdr *ipv6h;
- const struct inet6_protocol *ops;
- int proto;
- struct frag_hdr *fptr;
- unsigned int unfrag_ip6hlen;
- u8 *prevhdr;
- int offset = 0;
-
- if (!(features & NETIF_F_V6_CSUM))
- features &= ~NETIF_F_SG;
-
- if (unlikely(skb_shinfo(skb)->gso_type &
- ~(SKB_GSO_UDP |
- SKB_GSO_DODGY |
- SKB_GSO_TCP_ECN |
- SKB_GSO_TCPV6 |
- 0)))
- goto out;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- goto out;
-
- ipv6h = ipv6_hdr(skb);
- __skb_pull(skb, sizeof(*ipv6h));
- segs = ERR_PTR(-EPROTONOSUPPORT);
-
- proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
- rcu_read_lock();
- ops = rcu_dereference(inet6_protos[proto]);
- if (likely(ops && ops->gso_segment)) {
- skb_reset_transport_header(skb);
- segs = ops->gso_segment(skb, features);
- }
- rcu_read_unlock();
-
- if (IS_ERR(segs))
- goto out;
-
- for (skb = segs; skb; skb = skb->next) {
- ipv6h = ipv6_hdr(skb);
- ipv6h->payload_len = htons(skb->len - skb->mac_len -
- sizeof(*ipv6h));
- if (proto == IPPROTO_UDP) {
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- fptr = (struct frag_hdr *)(skb_network_header(skb) +
- unfrag_ip6hlen);
- fptr->frag_off = htons(offset);
- if (skb->next != NULL)
- fptr->frag_off |= htons(IP6_MF);
- offset += (ntohs(ipv6h->payload_len) -
- sizeof(struct frag_hdr));
- }
- }
-
-out:
- return segs;
-}
-
-static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
-{
- const struct inet6_protocol *ops;
- struct sk_buff **pp = NULL;
- struct sk_buff *p;
- struct ipv6hdr *iph;
- unsigned int nlen;
- unsigned int hlen;
- unsigned int off;
- int flush = 1;
- int proto;
- __wsum csum;
-
- off = skb_gro_offset(skb);
- hlen = off + sizeof(*iph);
- iph = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- iph = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!iph))
- goto out;
- }
-
- skb_gro_pull(skb, sizeof(*iph));
- skb_set_transport_header(skb, skb_gro_offset(skb));
-
- flush += ntohs(iph->payload_len) != skb_gro_len(skb);
-
- rcu_read_lock();
- proto = iph->nexthdr;
- ops = rcu_dereference(inet6_protos[proto]);
- if (!ops || !ops->gro_receive) {
- __pskb_pull(skb, skb_gro_offset(skb));
- proto = ipv6_gso_pull_exthdrs(skb, proto);
- skb_gro_pull(skb, -skb_transport_offset(skb));
- skb_reset_transport_header(skb);
- __skb_push(skb, skb_gro_offset(skb));
-
- ops = rcu_dereference(inet6_protos[proto]);
- if (!ops || !ops->gro_receive)
- goto out_unlock;
-
- iph = ipv6_hdr(skb);
- }
-
- NAPI_GRO_CB(skb)->proto = proto;
-
- flush--;
- nlen = skb_network_header_len(skb);
-
- for (p = *head; p; p = p->next) {
- const struct ipv6hdr *iph2;
- __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
-
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- iph2 = ipv6_hdr(p);
- first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
-
- /* All fields must match except length and Traffic Class. */
- if (nlen != skb_network_header_len(p) ||
- (first_word & htonl(0xF00FFFFF)) ||
- memcmp(&iph->nexthdr, &iph2->nexthdr,
- nlen - offsetof(struct ipv6hdr, nexthdr))) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
- /* flush if Traffic Class fields are different */
- NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
- NAPI_GRO_CB(p)->flush |= flush;
- }
-
- NAPI_GRO_CB(skb)->flush |= flush;
-
- csum = skb->csum;
- skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
-
- pp = ops->gro_receive(head, skb);
-
- skb->csum = csum;
-
-out_unlock:
- rcu_read_unlock();
-
-out:
- NAPI_GRO_CB(skb)->flush |= flush;
-
- return pp;
-}
-
-static int ipv6_gro_complete(struct sk_buff *skb)
-{
- const struct inet6_protocol *ops;
- struct ipv6hdr *iph = ipv6_hdr(skb);
- int err = -ENOSYS;
-
- iph->payload_len = htons(skb->len - skb_network_offset(skb) -
- sizeof(*iph));
-
- rcu_read_lock();
- ops = rcu_dereference(inet6_protos[NAPI_GRO_CB(skb)->proto]);
- if (WARN_ON(!ops || !ops->gro_complete))
- goto out_unlock;
-
- err = ops->gro_complete(skb);
-
-out_unlock:
- rcu_read_unlock();
-
- return err;
-}
-
static struct packet_type ipv6_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IPV6),
.func = ipv6_rcv,
- .gso_send_check = ipv6_gso_send_check,
- .gso_segment = ipv6_gso_segment,
- .gro_receive = ipv6_gro_receive,
- .gro_complete = ipv6_gro_complete,
};
static int __init ipv6_packet_init(void)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 7e6139508ee7..ecc35b93314b 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -44,7 +44,7 @@
#define IPV6HDR_BASELEN 8
struct tmp_ext {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
struct in6_addr saddr;
#endif
struct in6_addr daddr;
@@ -152,7 +152,7 @@ bad:
return false;
}
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
/**
* ipv6_rearrange_destopt - rearrange IPv6 destination options header
* @iph: IPv6 header
@@ -320,7 +320,7 @@ static void ah6_output_done(struct crypto_async_request *base, int err)
memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
if (extlen) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
memcpy(&top_iph->saddr, iph_ext, extlen);
#else
memcpy(&top_iph->daddr, iph_ext, extlen);
@@ -385,7 +385,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
memcpy(iph_base, top_iph, IPV6HDR_BASELEN);
if (extlen) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
memcpy(iph_ext, &top_iph->saddr, extlen);
#else
memcpy(iph_ext, &top_iph->daddr, extlen);
@@ -434,7 +434,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
if (extlen) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
memcpy(&top_iph->saddr, iph_ext, extlen);
#else
memcpy(&top_iph->daddr, iph_ext, extlen);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index cdf02be5f191..2f4f584d796d 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -64,7 +64,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
int ishost = !net->ipv6.devconf_all->forwarding;
int err = 0;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (ipv6_addr_is_multicast(addr))
return -EINVAL;
@@ -84,7 +84,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
rt = rt6_lookup(net, addr, NULL, 0, 0);
if (rt) {
dev = rt->dst.dev;
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
} else if (ishost) {
err = -EADDRNOTAVAIL;
goto error;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index be2b67d631e5..8edf2601065a 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -701,7 +701,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
err = -EINVAL;
goto exit_f;
}
- if (!capable(CAP_NET_RAW)) {
+ if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
err = -EPERM;
goto exit_f;
}
@@ -721,7 +721,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
err = -EINVAL;
goto exit_f;
}
- if (!capable(CAP_NET_RAW)) {
+ if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
err = -EPERM;
goto exit_f;
}
@@ -746,7 +746,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
err = -EINVAL;
goto exit_f;
}
- if (!capable(CAP_NET_RAW)) {
+ if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
err = -EPERM;
goto exit_f;
}
@@ -769,7 +769,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
switch (rthdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (rthdr->hdrlen != 2 ||
rthdr->segments_left != 1) {
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index fa3d9c328092..473f628f9f20 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -43,56 +43,12 @@
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/xfrm.h>
#endif
#include <asm/uaccess.h>
-int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
-{
- const unsigned char *nh = skb_network_header(skb);
- int packet_len = skb->tail - skb->network_header;
- struct ipv6_opt_hdr *hdr;
- int len;
-
- if (offset + 2 > packet_len)
- goto bad;
- hdr = (struct ipv6_opt_hdr *)(nh + offset);
- len = ((hdr->hdrlen + 1) << 3);
-
- if (offset + len > packet_len)
- goto bad;
-
- offset += 2;
- len -= 2;
-
- while (len > 0) {
- int opttype = nh[offset];
- int optlen;
-
- if (opttype == type)
- return offset;
-
- switch (opttype) {
- case IPV6_TLV_PAD1:
- optlen = 1;
- break;
- default:
- optlen = nh[offset + 1] + 2;
- if (optlen > len)
- goto bad;
- break;
- }
- offset += optlen;
- len -= optlen;
- }
- /* not_found */
- bad:
- return -1;
-}
-EXPORT_SYMBOL_GPL(ipv6_find_tlv);
-
/*
* Parsing tlv encoded headers.
*
@@ -224,7 +180,7 @@ bad:
Destination options header.
*****************************/
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
{
struct ipv6_destopt_hao *hao;
@@ -288,7 +244,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
#endif
static const struct tlvtype_proc tlvprocdestopt_lst[] = {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
{
.type = IPV6_TLV_HAO,
.func = ipv6_dest_hao,
@@ -300,7 +256,7 @@ static const struct tlvtype_proc tlvprocdestopt_lst[] = {
static int ipv6_destopt_rcv(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb);
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
__u16 dstbuf;
#endif
struct dst_entry *dst = skb_dst(skb);
@@ -315,14 +271,14 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
}
opt->lastopt = opt->dst1 = skb_network_header_len(skb);
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
dstbuf = opt->dst1;
#endif
if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
opt->nhoff = dstbuf;
#else
opt->nhoff = opt->dst1;
@@ -378,7 +334,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
looped_back:
if (hdr->segments_left == 0) {
switch (hdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
/* Silently discard type 2 header unless it was
* processed by own
@@ -404,7 +360,7 @@ looped_back:
}
switch (hdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (accept_source_route < 0)
goto unknown_rh;
@@ -461,7 +417,7 @@ looped_back:
addr += i - 1;
switch (hdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
(xfrm_address_t *)&ipv6_hdr(skb)->saddr,
@@ -528,12 +484,12 @@ unknown_rh:
static const struct inet6_protocol rthdr_protocol = {
.handler = ipv6_rthdr_rcv,
- .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
+ .flags = INET6_PROTO_NOPOLICY,
};
static const struct inet6_protocol destopt_protocol = {
.handler = ipv6_destopt_rcv,
- .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
+ .flags = INET6_PROTO_NOPOLICY,
};
static const struct inet6_protocol nodata_protocol = {
@@ -559,10 +515,10 @@ int __init ipv6_exthdrs_init(void)
out:
return ret;
-out_rthdr:
- inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
out_destopt:
inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+out_rthdr:
+ inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
goto out;
};
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index f73d59a14131..e7d756e19d1d 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -111,3 +111,47 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
return start;
}
EXPORT_SYMBOL(ipv6_skip_exthdr);
+
+int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
+{
+ const unsigned char *nh = skb_network_header(skb);
+ int packet_len = skb->tail - skb->network_header;
+ struct ipv6_opt_hdr *hdr;
+ int len;
+
+ if (offset + 2 > packet_len)
+ goto bad;
+ hdr = (struct ipv6_opt_hdr *)(nh + offset);
+ len = ((hdr->hdrlen + 1) << 3);
+
+ if (offset + len > packet_len)
+ goto bad;
+
+ offset += 2;
+ len -= 2;
+
+ while (len > 0) {
+ int opttype = nh[offset];
+ int optlen;
+
+ if (opttype == type)
+ return offset;
+
+ switch (opttype) {
+ case IPV6_TLV_PAD1:
+ optlen = 1;
+ break;
+ default:
+ optlen = nh[offset + 1] + 2;
+ if (optlen > len)
+ goto bad;
+ break;
+ }
+ offset += optlen;
+ len -= optlen;
+ }
+ /* not_found */
+ bad:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(ipv6_find_tlv);
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
new file mode 100644
index 000000000000..cf77f3abfd06
--- /dev/null
+++ b/net/ipv6/exthdrs_offload.c
@@ -0,0 +1,41 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * IPV6 Extension Header GSO/GRO support
+ */
+#include <net/protocol.h>
+#include "ip6_offload.h"
+
+static const struct net_offload rthdr_offload = {
+ .flags = INET6_PROTO_GSO_EXTHDR,
+};
+
+static const struct net_offload dstopt_offload = {
+ .flags = INET6_PROTO_GSO_EXTHDR,
+};
+
+int __init ipv6_exthdrs_offload_init(void)
+{
+ int ret;
+
+ ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
+ if (!ret)
+ goto out;
+
+ ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
+ if (!ret)
+ goto out_rt;
+
+out:
+ return ret;
+
+out_rt:
+ inet_del_offload(&rthdr_offload, IPPROTO_ROUTING);
+ goto out;
+}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index d9fb9110f607..2e1a432867c0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -100,7 +100,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto out;
}
again:
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
rt = NULL;
goto out;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 24d69dbca4d6..b4a9fd51dae7 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -280,7 +280,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
return 0;
}
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
static void mip6_addr_swap(struct sk_buff *skb)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index c4f934176cab..30647857a375 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -252,6 +252,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
return NULL;
dst->ops->update_pmtu(dst, sk, NULL, mtu);
- return inet6_csk_route_socket(sk, &fl6);
+ dst = inet6_csk_route_socket(sk, &fl6);
+ return IS_ERR(dst) ? NULL : dst;
}
EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 24995a93ef8c..710cafd2e1a9 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -672,6 +672,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
iter->rt6i_idev == rt->rt6i_idev &&
ipv6_addr_equal(&iter->rt6i_gateway,
&rt->rt6i_gateway)) {
+ if (rt->rt6i_nsiblings)
+ rt->rt6i_nsiblings = 0;
if (!(iter->rt6i_flags & RTF_EXPIRES))
return -EEXIST;
if (!(rt->rt6i_flags & RTF_EXPIRES))
@@ -680,6 +682,21 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
rt6_set_expires(iter, rt->dst.expires);
return -EEXIST;
}
+ /* If we have the same destination and the same metric,
+ * but not the same gateway, then the route we try to
+ * add is sibling to this route, increment our counter
+ * of siblings, and later we will add our route to the
+ * list.
+ * Only static routes (which don't have flag
+ * RTF_EXPIRES) are used for ECMPv6.
+ *
+ * To avoid long list, we only had siblings if the
+ * route have a gateway.
+ */
+ if (rt->rt6i_flags & RTF_GATEWAY &&
+ !(rt->rt6i_flags & RTF_EXPIRES) &&
+ !(iter->rt6i_flags & RTF_EXPIRES))
+ rt->rt6i_nsiblings++;
}
if (iter->rt6i_metric > rt->rt6i_metric)
@@ -692,6 +709,35 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
if (ins == &fn->leaf)
fn->rr_ptr = NULL;
+ /* Link this route to others same route. */
+ if (rt->rt6i_nsiblings) {
+ unsigned int rt6i_nsiblings;
+ struct rt6_info *sibling, *temp_sibling;
+
+ /* Find the first route that have the same metric */
+ sibling = fn->leaf;
+ while (sibling) {
+ if (sibling->rt6i_metric == rt->rt6i_metric) {
+ list_add_tail(&rt->rt6i_siblings,
+ &sibling->rt6i_siblings);
+ break;
+ }
+ sibling = sibling->dst.rt6_next;
+ }
+ /* For each sibling in the list, increment the counter of
+ * siblings. BUG() if counters does not match, list of siblings
+ * is broken!
+ */
+ rt6i_nsiblings = 0;
+ list_for_each_entry_safe(sibling, temp_sibling,
+ &rt->rt6i_siblings, rt6i_siblings) {
+ sibling->rt6i_nsiblings++;
+ BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings);
+ rt6i_nsiblings++;
+ }
+ BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
+ }
+
/*
* insert node
*/
@@ -1193,6 +1239,17 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
if (fn->rr_ptr == rt)
fn->rr_ptr = NULL;
+ /* Remove this entry from other siblings */
+ if (rt->rt6i_nsiblings) {
+ struct rt6_info *sibling, *next_sibling;
+
+ list_for_each_entry_safe(sibling, next_sibling,
+ &rt->rt6i_siblings, rt6i_siblings)
+ sibling->rt6i_nsiblings--;
+ rt->rt6i_nsiblings = 0;
+ list_del_init(&rt->rt6i_siblings);
+ }
+
/* Adjust walkers */
read_lock(&fib6_walker_lock);
FOR_WALKERS(w) {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 90bbefb57943..29124b7a04c8 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -519,7 +519,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
}
read_unlock_bh(&ip6_sk_fl_lock);
- if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
+ if (freq.flr_share == IPV6_FL_S_NONE &&
+ ns_capable(net->user_ns, CAP_NET_ADMIN)) {
fl = fl_lookup(net, freq.flr_label);
if (fl) {
err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 0185679c5f53..867466c96aac 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -109,21 +109,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr)
#define tunnels_r tunnels[2]
#define tunnels_l tunnels[1]
#define tunnels_wc tunnels[0]
-/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-#define for_each_ip_tunnel_rcu(start) \
- for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
- u64 rx_packets;
- u64 rx_bytes;
- u64 tx_packets;
- u64 tx_bytes;
- struct u64_stats_sync syncp;
-};
static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
@@ -181,7 +166,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
ARPHRD_ETHER : ARPHRD_IP6GRE;
int score, cand_score = 4;
- for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
if (!ipv6_addr_equal(local, &t->parms.laddr) ||
!ipv6_addr_equal(remote, &t->parms.raddr) ||
key != t->parms.i_key ||
@@ -206,7 +191,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
}
}
- for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
key != t->parms.i_key ||
!(t->dev->flags & IFF_UP))
@@ -230,7 +215,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
}
}
- for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
(!ipv6_addr_equal(local, &t->parms.raddr) ||
!ipv6_addr_is_multicast(local))) ||
@@ -256,7 +241,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
}
}
- for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
+ for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
if (t->parms.i_key != key ||
!(t->dev->flags & IFF_UP))
continue;
@@ -1069,7 +1054,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
dev->mtu = IPV6_MIN_MTU;
}
}
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
t->hlen = addend;
@@ -1161,7 +1146,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EFAULT;
@@ -1209,7 +1194,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
case SIOCDELTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
if (dev == ign->fb_tunnel_dev) {
@@ -1633,9 +1618,9 @@ static size_t ip6gre_get_size(const struct net_device *dev)
/* IFLA_GRE_OKEY */
nla_total_size(4) +
/* IFLA_GRE_LOCAL */
- nla_total_size(4) +
+ nla_total_size(sizeof(struct in6_addr)) +
/* IFLA_GRE_REMOTE */
- nla_total_size(4) +
+ nla_total_size(sizeof(struct in6_addr)) +
/* IFLA_GRE_TTL */
nla_total_size(1) +
/* IFLA_GRE_TOS */
@@ -1659,8 +1644,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
- nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->raddr) ||
- nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->laddr) ||
+ nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->laddr) ||
+ nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->raddr) ||
nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
/*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
new file mode 100644
index 000000000000..f26f0da7f095
--- /dev/null
+++ b/net/ipv6/ip6_offload.c
@@ -0,0 +1,282 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/printk.h>
+
+#include <net/protocol.h>
+#include <net/ipv6.h>
+
+#include "ip6_offload.h"
+
+static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
+{
+ const struct net_offload *ops = NULL;
+
+ for (;;) {
+ struct ipv6_opt_hdr *opth;
+ int len;
+
+ if (proto != NEXTHDR_HOP) {
+ ops = rcu_dereference(inet6_offloads[proto]);
+
+ if (unlikely(!ops))
+ break;
+
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+ }
+
+ if (unlikely(!pskb_may_pull(skb, 8)))
+ break;
+
+ opth = (void *)skb->data;
+ len = ipv6_optlen(opth);
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ break;
+
+ proto = opth->nexthdr;
+ __skb_pull(skb, len);
+ }
+
+ return proto;
+}
+
+static int ipv6_gso_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ const struct net_offload *ops;
+ int err = -EINVAL;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ err = -EPROTONOSUPPORT;
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[
+ ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
+
+ if (likely(ops && ops->callbacks.gso_send_check)) {
+ skb_reset_transport_header(skb);
+ err = ops->callbacks.gso_send_check(skb);
+ }
+ rcu_read_unlock();
+
+out:
+ return err;
+}
+
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct ipv6hdr *ipv6h;
+ const struct net_offload *ops;
+ int proto;
+ struct frag_hdr *fptr;
+ unsigned int unfrag_ip6hlen;
+ u8 *prevhdr;
+ int offset = 0;
+
+ if (!(features & NETIF_F_V6_CSUM))
+ features &= ~NETIF_F_SG;
+
+ if (unlikely(skb_shinfo(skb)->gso_type &
+ ~(SKB_GSO_UDP |
+ SKB_GSO_DODGY |
+ SKB_GSO_TCP_ECN |
+ SKB_GSO_TCPV6 |
+ 0)))
+ goto out;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ segs = ERR_PTR(-EPROTONOSUPPORT);
+
+ proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (likely(ops && ops->callbacks.gso_segment)) {
+ skb_reset_transport_header(skb);
+ segs = ops->callbacks.gso_segment(skb, features);
+ }
+ rcu_read_unlock();
+
+ if (IS_ERR(segs))
+ goto out;
+
+ for (skb = segs; skb; skb = skb->next) {
+ ipv6h = ipv6_hdr(skb);
+ ipv6h->payload_len = htons(skb->len - skb->mac_len -
+ sizeof(*ipv6h));
+ if (proto == IPPROTO_UDP) {
+ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ fptr = (struct frag_hdr *)(skb_network_header(skb) +
+ unfrag_ip6hlen);
+ fptr->frag_off = htons(offset);
+ if (skb->next != NULL)
+ fptr->frag_off |= htons(IP6_MF);
+ offset += (ntohs(ipv6h->payload_len) -
+ sizeof(struct frag_hdr));
+ }
+ }
+
+out:
+ return segs;
+}
+
+static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ const struct net_offload *ops;
+ struct sk_buff **pp = NULL;
+ struct sk_buff *p;
+ struct ipv6hdr *iph;
+ unsigned int nlen;
+ unsigned int hlen;
+ unsigned int off;
+ int flush = 1;
+ int proto;
+ __wsum csum;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*iph);
+ iph = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ iph = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!iph))
+ goto out;
+ }
+
+ skb_gro_pull(skb, sizeof(*iph));
+ skb_set_transport_header(skb, skb_gro_offset(skb));
+
+ flush += ntohs(iph->payload_len) != skb_gro_len(skb);
+
+ rcu_read_lock();
+ proto = iph->nexthdr;
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive) {
+ __pskb_pull(skb, skb_gro_offset(skb));
+ proto = ipv6_gso_pull_exthdrs(skb, proto);
+ skb_gro_pull(skb, -skb_transport_offset(skb));
+ skb_reset_transport_header(skb);
+ __skb_push(skb, skb_gro_offset(skb));
+
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out_unlock;
+
+ iph = ipv6_hdr(skb);
+ }
+
+ NAPI_GRO_CB(skb)->proto = proto;
+
+ flush--;
+ nlen = skb_network_header_len(skb);
+
+ for (p = *head; p; p = p->next) {
+ const struct ipv6hdr *iph2;
+ __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ iph2 = ipv6_hdr(p);
+ first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
+
+ /* All fields must match except length and Traffic Class. */
+ if (nlen != skb_network_header_len(p) ||
+ (first_word & htonl(0xF00FFFFF)) ||
+ memcmp(&iph->nexthdr, &iph2->nexthdr,
+ nlen - offsetof(struct ipv6hdr, nexthdr))) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ /* flush if Traffic Class fields are different */
+ NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+ NAPI_GRO_CB(p)->flush |= flush;
+ }
+
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ csum = skb->csum;
+ skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
+
+ pp = ops->callbacks.gro_receive(head, skb);
+
+ skb->csum = csum;
+
+out_unlock:
+ rcu_read_unlock();
+
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
+}
+
+static int ipv6_gro_complete(struct sk_buff *skb)
+{
+ const struct net_offload *ops;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ int err = -ENOSYS;
+
+ iph->payload_len = htons(skb->len - skb_network_offset(skb) -
+ sizeof(*iph));
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ goto out_unlock;
+
+ err = ops->callbacks.gro_complete(skb);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return err;
+}
+
+static struct packet_offload ipv6_packet_offload __read_mostly = {
+ .type = cpu_to_be16(ETH_P_IPV6),
+ .callbacks = {
+ .gso_send_check = ipv6_gso_send_check,
+ .gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = ipv6_gro_complete,
+ },
+};
+
+static int __init ipv6_offload_init(void)
+{
+
+ if (tcpv6_offload_init() < 0)
+ pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
+ if (udp_offload_init() < 0)
+ pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
+ if (ipv6_exthdrs_offload_init() < 0)
+ pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
+
+ dev_add_offload(&ipv6_packet_offload);
+ return 0;
+}
+
+fs_initcall(ipv6_offload_init);
diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h
new file mode 100644
index 000000000000..2e155c651b35
--- /dev/null
+++ b/net/ipv6/ip6_offload.h
@@ -0,0 +1,18 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ip6_offload_h
+#define __ip6_offload_h
+
+int ipv6_exthdrs_offload_init(void);
+int udp_offload_init(void);
+int tcpv6_offload_init(void);
+
+#endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index aece3e792f84..5552d13ae92f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -538,78 +538,12 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
to->nf_trace = from->nf_trace;
#endif
skb_copy_secmark(to, from);
}
-int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
-{
- u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr =
- (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
- unsigned int packet_len = skb->tail - skb->network_header;
- int found_rhdr = 0;
- *nexthdr = &ipv6_hdr(skb)->nexthdr;
-
- while (offset + 1 <= packet_len) {
-
- switch (**nexthdr) {
-
- case NEXTHDR_HOP:
- break;
- case NEXTHDR_ROUTING:
- found_rhdr = 1;
- break;
- case NEXTHDR_DEST:
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
- break;
-#endif
- if (found_rhdr)
- return offset;
- break;
- default :
- return offset;
- }
-
- offset += ipv6_optlen(exthdr);
- *nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
- offset);
- }
-
- return offset;
-}
-
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
-{
- static atomic_t ipv6_fragmentation_id;
- int old, new;
-
- if (rt && !(rt->dst.flags & DST_NOPEER)) {
- struct inet_peer *peer;
- struct net *net;
-
- net = dev_net(rt->dst.dev);
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
- if (peer) {
- fhdr->identification = htonl(inet_getid(peer, 0));
- inet_putpeer(peer);
- return;
- }
- }
- do {
- old = atomic_read(&ipv6_fragmentation_id);
- new = old + 1;
- if (!new)
- new = 1;
- } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
- fhdr->identification = htonl(new);
-}
-
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct sk_buff *frag;
@@ -756,7 +690,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGOKS);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return 0;
}
@@ -768,7 +702,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return err;
slow_path_clean:
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index cb7e2ded6f08..fb828e9fe8e0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -83,6 +83,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
static int ip6_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops ip6_link_ops __read_mostly;
static int ip6_tnl_net_id __read_mostly;
struct ip6_tnl_net {
@@ -94,14 +95,6 @@ struct ip6_tnl_net {
struct ip6_tnl __rcu **tnls[2];
};
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
- unsigned long rx_packets;
- unsigned long rx_bytes;
- unsigned long tx_packets;
- unsigned long tx_bytes;
-} __attribute__((aligned(4*sizeof(unsigned long))));
-
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
struct pcpu_tstats sum = { 0 };
@@ -258,6 +251,33 @@ static void ip6_dev_free(struct net_device *dev)
free_netdev(dev);
}
+static int ip6_tnl_create2(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ int err;
+
+ t = netdev_priv(dev);
+ err = ip6_tnl_dev_init(dev);
+ if (err < 0)
+ goto out;
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto out;
+
+ strcpy(t->parms.name, dev->name);
+ dev->rtnl_link_ops = &ip6_link_ops;
+
+ dev_hold(dev);
+ ip6_tnl_link(ip6n, t);
+ return 0;
+
+out:
+ return err;
+}
+
/**
* ip6_tnl_create - create a new tunnel
* @p: tunnel parameters
@@ -276,7 +296,6 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
struct ip6_tnl *t;
char name[IFNAMSIZ];
int err;
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
if (p->name[0])
strlcpy(name, p->name, IFNAMSIZ);
@@ -291,17 +310,10 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
t = netdev_priv(dev);
t->parms = *p;
- err = ip6_tnl_dev_init(dev);
+ err = ip6_tnl_create2(dev);
if (err < 0)
goto failed_free;
- if ((err = register_netdevice(dev)) < 0)
- goto failed_free;
-
- strcpy(t->parms.name, dev->name);
-
- dev_hold(dev);
- ip6_tnl_link(ip6n, t);
return t;
failed_free:
@@ -663,8 +675,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
icmpv6_send(skb2, rel_type, rel_code, rel_info);
- if (rt)
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
kfree_skb(skb2);
}
@@ -1208,7 +1219,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
}
@@ -1237,6 +1248,20 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
return 0;
}
+static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+{
+ struct net *net = dev_net(t->dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ int err;
+
+ ip6_tnl_unlink(ip6n, t);
+ synchronize_net();
+ err = ip6_tnl_change(t, p);
+ ip6_tnl_link(ip6n, t);
+ netdev_state_change(t->dev);
+ return err;
+}
+
static void
ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
{
@@ -1325,7 +1350,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = -EFAULT;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
@@ -1345,11 +1370,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
} else
t = netdev_priv(dev);
- ip6_tnl_unlink(ip6n, t);
- synchronize_net();
- err = ip6_tnl_change(t, &p1);
- ip6_tnl_link(ip6n, t);
- netdev_state_change(dev);
+ err = ip6_tnl_update(t, &p1);
}
if (t) {
err = 0;
@@ -1362,7 +1383,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
case SIOCDELTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
if (dev == ip6n->fb_tnl_dev) {
@@ -1505,6 +1526,164 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
return 0;
}
+static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ u8 proto;
+
+ if (!data)
+ return 0;
+
+ proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (proto != IPPROTO_IPV6 &&
+ proto != IPPROTO_IPIP &&
+ proto != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void ip6_tnl_netlink_parms(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ if (!data)
+ return;
+
+ if (data[IFLA_IPTUN_LINK])
+ parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
+
+ if (data[IFLA_IPTUN_LOCAL])
+ nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_IPTUN_REMOTE])
+ nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_IPTUN_TTL])
+ parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
+
+ if (data[IFLA_IPTUN_ENCAP_LIMIT])
+ parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
+
+ if (data[IFLA_IPTUN_FLOWINFO])
+ parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
+
+ if (data[IFLA_IPTUN_FLAGS])
+ parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
+
+ if (data[IFLA_IPTUN_PROTO])
+ parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+}
+
+static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl *nt;
+
+ nt = netdev_priv(dev);
+ ip6_tnl_netlink_parms(data, &nt->parms);
+
+ if (ip6_tnl_locate(net, &nt->parms, 0))
+ return -EEXIST;
+
+ return ip6_tnl_create2(dev);
+}
+
+static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip6_tnl *t;
+ struct __ip6_tnl_parm p;
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ if (dev == ip6n->fb_tnl_dev)
+ return -EINVAL;
+
+ ip6_tnl_netlink_parms(data, &p);
+
+ t = ip6_tnl_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else
+ t = netdev_priv(dev);
+
+ return ip6_tnl_update(t, &p);
+}
+
+static size_t ip6_tnl_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_IPTUN_LINK */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_LOCAL */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_IPTUN_REMOTE */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_IPTUN_TTL */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_ENCAP_LIMIT */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_FLOWINFO */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_FLAGS */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_PROTO */
+ nla_total_size(1) +
+ 0;
+}
+
+static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct __ip6_tnl_parm *parm = &tunnel->parms;
+
+ if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
+ nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
+ &parm->raddr) ||
+ nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
+ &parm->laddr) ||
+ nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
+ nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
+ nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
+ nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
+ [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
+ [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) },
+ [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) },
+ [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
+ [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 },
+ [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 },
+ [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 },
+ [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
+};
+
+static struct rtnl_link_ops ip6_link_ops __read_mostly = {
+ .kind = "ip6tnl",
+ .maxtype = IFLA_IPTUN_MAX,
+ .policy = ip6_tnl_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6_tnl_dev_setup,
+ .validate = ip6_tnl_validate,
+ .newlink = ip6_tnl_newlink,
+ .changelink = ip6_tnl_changelink,
+ .get_size = ip6_tnl_get_size,
+ .fill_info = ip6_tnl_fill_info,
+};
+
static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
.handler = ip4ip6_rcv,
.err_handler = ip4ip6_err,
@@ -1613,9 +1792,14 @@ static int __init ip6_tunnel_init(void)
pr_err("%s: can't register ip6ip6\n", __func__);
goto out_ip6ip6;
}
+ err = rtnl_link_register(&ip6_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
return 0;
+rtnl_link_failed:
+ xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
out_ip6ip6:
xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
out_ip4ip6:
@@ -1630,6 +1814,7 @@ out_pernet:
static void __exit ip6_tunnel_cleanup(void)
{
+ rtnl_link_unregister(&ip6_link_ops);
if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
pr_info("%s: can't deregister ip4ip6\n", __func__);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f7c7c6319720..926ea544f499 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -66,8 +66,8 @@ struct mr6_table {
struct mif_device vif6_table[MAXMIFS];
int maxvif;
atomic_t cache_resolve_queue_len;
- int mroute_do_assert;
- int mroute_do_pim;
+ bool mroute_do_assert;
+ bool mroute_do_pim;
#ifdef CONFIG_IPV6_PIMSM_V2
int mroute_reg_vif_num;
#endif
@@ -1583,7 +1583,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
return -ENOENT;
if (optname != MRT6_INIT) {
- if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
+ if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EACCES;
}
@@ -1646,9 +1646,12 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_ASSERT:
{
int v;
+
+ if (optlen != sizeof(v))
+ return -EINVAL;
if (get_user(v, (int __user *)optval))
return -EFAULT;
- mrt->mroute_do_assert = !!v;
+ mrt->mroute_do_assert = v;
return 0;
}
@@ -1656,6 +1659,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_PIM:
{
int v;
+
+ if (optlen != sizeof(v))
+ return -EINVAL;
if (get_user(v, (int __user *)optval))
return -EFAULT;
v = !!v;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ba6d13d1f1e1..ee94d31c9d4d 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -343,7 +343,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_TRANSPARENT:
- if (valbool && !capable(CAP_NET_ADMIN) && !capable(CAP_NET_RAW)) {
+ if (valbool && !ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+ !ns_capable(net->user_ns, CAP_NET_RAW)) {
retv = -EPERM;
break;
}
@@ -381,7 +382,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
/* hop-by-hop / destination options are privileged option */
retv = -EPERM;
- if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
+ if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
break;
opt = ipv6_renew_options(sk, np->opt, optname,
@@ -397,7 +398,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_RTHDR && opt && opt->srcrt) {
struct ipv6_rt_hdr *rthdr = opt->srcrt;
switch (rthdr->type) {
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPV6_SRCRT_TYPE_2:
if (rthdr->hdrlen != 2 ||
rthdr->segments_left != 1)
@@ -754,7 +755,7 @@ done:
case IPV6_IPSEC_POLICY:
case IPV6_XFRM_POLICY:
retv = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
retv = xfrm_user_policy(sk, optname, optval, optlen);
break;
@@ -827,6 +828,7 @@ pref_skip_coa:
if (val < 0 || val > 255)
goto e_inval;
np->min_hopcount = val;
+ retv = 0;
break;
case IPV6_DONTFRAG:
np->dontfrag = valbool;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 92f8e48e4ba4..b19ed51a45bb 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -163,7 +163,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
rt = rt6_lookup(net, addr, NULL, 0, 0);
if (rt) {
dev = rt->dst.dev;
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
} else
dev = dev_get_by_index_rcu(net, ifindex);
@@ -260,7 +260,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
if (rt) {
dev = rt->dst.dev;
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
} else
dev = dev_get_by_index_rcu(net, ifindex);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ff36194a71aa..f41853bca428 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -535,7 +535,6 @@ static void ndisc_send_unsol_na(struct net_device *dev)
{
struct inet6_dev *idev;
struct inet6_ifaddr *ifa;
- struct in6_addr mcaddr;
idev = in6_dev_get(dev);
if (!idev)
@@ -543,8 +542,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
- addrconf_addr_solict_mult(&ifa->addr, &mcaddr);
- ndisc_send_na(dev, NULL, &mcaddr, &ifa->addr,
+ ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &ifa->addr,
/*router=*/ !!idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
/*inc_opt=*/ true);
@@ -906,7 +904,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
- /* XXX: idev->cnf.prixy_ndp */
+ /* XXX: idev->cnf.proxy_ndp */
goto out;
}
@@ -1145,7 +1143,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
ND_PRINTK(0, err,
"RA: %s got default router without neighbour\n",
__func__);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return;
}
}
@@ -1170,7 +1168,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
ND_PRINTK(0, err,
"RA: %s got default router without neighbour\n",
__func__);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return;
}
neigh->flags |= NTF_ROUTER;
@@ -1326,8 +1324,7 @@ skip_routeinfo:
ND_PRINTK(2, warn, "RA: invalid RA options\n");
}
out:
- if (rt)
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
if (neigh)
neigh_release(neigh);
}
@@ -1575,11 +1572,18 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
{
struct net_device *dev = ptr;
struct net *net = dev_net(dev);
+ struct inet6_dev *idev;
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&nd_tbl, dev);
fib6_run_gc(~0UL, net);
+ idev = in6_dev_get(dev);
+ if (!idev)
+ break;
+ if (idev->cnf.ndisc_notify)
+ ndisc_send_unsol_na(dev);
+ in6_dev_put(idev);
break;
case NETDEV_DOWN:
neigh_ifdown(&nd_tbl, dev);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d7cb04506c3d..74cadd0719a5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -207,8 +207,7 @@ ip6t_get_target_c(const struct ip6t_entry *e)
return ip6t_get_target((struct ip6t_entry *)e);
}
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* This cries for unification! */
static const char *const hooknames[] = {
[NF_INET_PRE_ROUTING] = "PREROUTING",
@@ -381,8 +380,7 @@ ip6t_do_table(struct sk_buff *skb,
t = ip6t_get_target_c(e);
IP_NF_ASSERT(t->u.kernel.target);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* The packet is traced: log it */
if (unlikely(skb->nf_trace))
trace_packet(skb, hook, in, out,
@@ -1856,7 +1854,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1971,7 +1969,7 @@ compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1993,7 +1991,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -2018,7 +2016,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 5d1d8b04d694..5060d54199ab 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -67,7 +67,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
ret = true;
out:
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return ret;
}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index e418bd6350a4..fa84cf8ec6bc 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -186,7 +186,8 @@ nf_nat_ipv6_out(unsigned int hooknum,
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3) ||
- (ct->tuplehash[dir].tuple.src.u.all !=
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+ ct->tuplehash[dir].tuple.src.u.all !=
ct->tuplehash[!dir].tuple.dst.u.all))
if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
ret = NF_DROP;
@@ -222,6 +223,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
}
#ifdef CONFIG_XFRM
else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all)
if (nf_xfrm_me_harder(skb, AF_INET6))
@@ -275,9 +277,7 @@ static int __net_init ip6table_nat_net_init(struct net *net)
return -ENOMEM;
net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_nat))
- return PTR_ERR(net->ipv6.ip6table_nat);
- return 0;
+ return PTR_RET(net->ipv6.ip6table_nat);
}
static void __net_exit ip6table_nat_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 8860d23e61cf..00ee17c3e893 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -21,6 +21,7 @@
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
@@ -295,7 +296,56 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
},
};
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+static int
+ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *inet6 = inet6_sk(sk);
+ const struct nf_conntrack_tuple_hash *h;
+ struct sockaddr_in6 sin6;
+ struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
+ struct nf_conn *ct;
+
+ tuple.src.u3.in6 = inet6->rcv_saddr;
+ tuple.src.u.tcp.port = inet->inet_sport;
+ tuple.dst.u3.in6 = inet6->daddr;
+ tuple.dst.u.tcp.port = inet->inet_dport;
+ tuple.dst.protonum = sk->sk_protocol;
+
+ if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP)
+ return -ENOPROTOOPT;
+
+ if (*len < 0 || (unsigned int) *len < sizeof(sin6))
+ return -EINVAL;
+
+ h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+ if (!h) {
+ pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
+ &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
+ &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+ return -ENOENT;
+ }
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+ sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK;
+ memcpy(&sin6.sin6_addr,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
+ sizeof(sin6.sin6_addr));
+
+ nf_ct_put(ct);
+
+ if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6.sin6_scope_id = sk->sk_bound_dev_if;
+ else
+ sin6.sin6_scope_id = 0;
+
+ return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -346,7 +396,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
.invert_tuple = ipv6_invert_tuple,
.print_tuple = ipv6_print_tuple,
.get_l4proto = ipv6_get_l4proto,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv6_tuple_to_nlattr,
.nlattr_tuple_size = ipv6_nlattr_tuple_size,
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
@@ -359,6 +409,14 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
+static struct nf_sockopt_ops so_getorigdst6 = {
+ .pf = NFPROTO_IPV6,
+ .get_optmin = IP6T_SO_ORIGINAL_DST,
+ .get_optmax = IP6T_SO_ORIGINAL_DST + 1,
+ .get = ipv6_getorigdst,
+ .owner = THIS_MODULE,
+};
+
static int ipv6_net_init(struct net *net)
{
int ret = 0;
@@ -425,6 +483,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
need_conntrack();
nf_defrag_ipv6_enable();
+ ret = nf_register_sockopt(&so_getorigdst6);
+ if (ret < 0) {
+ pr_err("Unable to register netfilter socket option\n");
+ return ret;
+ }
+
ret = register_pernet_subsys(&ipv6_net_ops);
if (ret < 0)
goto cleanup_pernet;
@@ -440,6 +504,7 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
cleanup_ipv6:
unregister_pernet_subsys(&ipv6_net_ops);
cleanup_pernet:
+ nf_unregister_sockopt(&so_getorigdst6);
return ret;
}
@@ -448,6 +513,7 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
synchronize_net();
nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
unregister_pernet_subsys(&ipv6_net_ops);
+ nf_unregister_sockopt(&so_getorigdst6);
}
module_init(nf_conntrack_l3proto_ipv6_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 2d54b2061d68..24df3dde0076 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -232,7 +232,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum);
}
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -375,7 +375,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
.get_timeouts = icmpv6_get_timeouts,
.new = icmpv6_new,
.error = icmpv6_error,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = icmpv6_tuple_to_nlattr,
.nlattr_tuple_size = icmpv6_nlattr_tuple_size,
.nlattr_to_tuple = icmpv6_nlattr_to_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 18bd9bbbd1c6..22c8ea951185 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -85,7 +85,7 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{ }
};
-static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
+static int nf_ct_frag6_sysctl_register(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
@@ -127,7 +127,7 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
}
#else
-static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
+static int nf_ct_frag6_sysctl_register(struct net *net)
{
return 0;
}
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index cdd6d045e42e..aacd121fe8c5 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -19,7 +19,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
@@ -35,7 +35,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
{
u16 zone = NF_CT_DEFAULT_ZONE;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif
@@ -60,7 +60,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
{
struct sk_buff *reasm;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* Previously seen (loopback)? */
if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 5d6da784305b..61aaf70f376e 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -84,7 +84,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
.manip_pkt = icmpv6_manip_pkt,
.in_range = icmpv6_in_range,
.unique_tuple = icmpv6_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
new file mode 100644
index 000000000000..c2e73e647e44
--- /dev/null
+++ b/net/ipv6/output_core.c
@@ -0,0 +1,76 @@
+/*
+ * IPv6 library code, needed by static components when full IPv6 support is
+ * not configured or static. These functions are needed by GSO/GRO implementation.
+ */
+#include <linux/export.h>
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+
+void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+ static atomic_t ipv6_fragmentation_id;
+ int old, new;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (rt && !(rt->dst.flags & DST_NOPEER)) {
+ struct inet_peer *peer;
+ struct net *net;
+
+ net = dev_net(rt->dst.dev);
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ if (peer) {
+ fhdr->identification = htonl(inet_getid(peer, 0));
+ inet_putpeer(peer);
+ return;
+ }
+ }
+#endif
+ do {
+ old = atomic_read(&ipv6_fragmentation_id);
+ new = old + 1;
+ if (!new)
+ new = 1;
+ } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
+ fhdr->identification = htonl(new);
+}
+EXPORT_SYMBOL(ipv6_select_ident);
+
+int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+{
+ u16 offset = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ unsigned int packet_len = skb->tail - skb->network_header;
+ int found_rhdr = 0;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+ while (offset + 1 <= packet_len) {
+
+ switch (**nexthdr) {
+
+ case NEXTHDR_HOP:
+ break;
+ case NEXTHDR_ROUTING:
+ found_rhdr = 1;
+ break;
+ case NEXTHDR_DEST:
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+ break;
+#endif
+ if (found_rhdr)
+ return offset;
+ break;
+ default :
+ return offset;
+ }
+
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+ offset);
+ }
+
+ return offset;
+}
+EXPORT_SYMBOL(ip6_find_1stfragopt);
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 053082dfc93e..22d1bd4670da 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,7 +25,9 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
+#if IS_ENABLED(CONFIG_IPV6)
const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet6_protos);
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
{
@@ -50,3 +52,26 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
return ret;
}
EXPORT_SYMBOL(inet6_del_protocol);
+#endif
+
+const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly;
+
+int inet6_add_offload(const struct net_offload *prot, unsigned char protocol)
+{
+ return !cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
+ NULL, prot) ? 0 : -1;
+}
+EXPORT_SYMBOL(inet6_add_offload);
+
+int inet6_del_offload(const struct net_offload *prot, unsigned char protocol)
+{
+ int ret;
+
+ ret = (cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
+ prot, NULL) == prot) ? 0 : -1;
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(inet6_del_offload);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index d8e95c77db99..6cd29b1e8b92 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -50,7 +50,7 @@
#include <net/udp.h>
#include <net/inet_common.h>
#include <net/tcp_states.h>
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
#include <linux/mroute6.h>
@@ -123,7 +123,7 @@ static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
return 1;
}
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
static mh_filter_t __rcu *mh_filter __read_mostly;
@@ -184,7 +184,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
filtered = icmpv6_filter(sk, skb);
break;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
{
/* XXX: To validate MH only once for each packet,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index da8a4e301b1b..e5253ec9e0fc 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -616,6 +616,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
table[0].data = &net->ipv6.frags.high_thresh;
table[1].data = &net->ipv6.frags.low_thresh;
table[2].data = &net->ipv6.frags.timeout;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
}
hdr = register_net_sysctl(net, "net/ipv6", table);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7c7e963260e1..8f124f575116 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -57,6 +57,7 @@
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
+#include <net/nexthop.h>
#include <asm/uaccess.h>
@@ -219,7 +220,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
};
static const u32 ip6_template_metrics[RTAX_MAX] = {
- [RTAX_HOPLIMIT - 1] = 255,
+ [RTAX_HOPLIMIT - 1] = 0,
};
static const struct rt6_info ip6_null_entry_template = {
@@ -289,6 +290,8 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
rt->rt6i_genid = rt_genid(net);
+ INIT_LIST_HEAD(&rt->rt6i_siblings);
+ rt->rt6i_nsiblings = 0;
}
return rt;
}
@@ -318,13 +321,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
}
}
-static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
-
-static u32 rt6_peer_genid(void)
-{
- return atomic_read(&__rt6_peer_genid);
-}
-
void rt6_bind_peer(struct rt6_info *rt, int create)
{
struct inet_peer_base *base;
@@ -338,8 +334,6 @@ void rt6_bind_peer(struct rt6_info *rt, int create)
if (peer) {
if (!rt6_set_peer(rt, peer))
inet_putpeer(peer);
- else
- rt->rt6i_peer_genid = rt6_peer_genid();
}
}
@@ -385,6 +379,69 @@ static bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
+/* Multipath route selection:
+ * Hash based function using packet header and flowlabel.
+ * Adapted from fib_info_hashfn()
+ */
+static int rt6_info_hash_nhsfn(unsigned int candidate_count,
+ const struct flowi6 *fl6)
+{
+ unsigned int val = fl6->flowi6_proto;
+
+ val ^= (__force u32)fl6->daddr.s6_addr32[0];
+ val ^= (__force u32)fl6->daddr.s6_addr32[1];
+ val ^= (__force u32)fl6->daddr.s6_addr32[2];
+ val ^= (__force u32)fl6->daddr.s6_addr32[3];
+
+ val ^= (__force u32)fl6->saddr.s6_addr32[0];
+ val ^= (__force u32)fl6->saddr.s6_addr32[1];
+ val ^= (__force u32)fl6->saddr.s6_addr32[2];
+ val ^= (__force u32)fl6->saddr.s6_addr32[3];
+
+ /* Work only if this not encapsulated */
+ switch (fl6->flowi6_proto) {
+ case IPPROTO_UDP:
+ case IPPROTO_TCP:
+ case IPPROTO_SCTP:
+ val ^= (__force u16)fl6->fl6_sport;
+ val ^= (__force u16)fl6->fl6_dport;
+ break;
+
+ case IPPROTO_ICMPV6:
+ val ^= (__force u16)fl6->fl6_icmp_type;
+ val ^= (__force u16)fl6->fl6_icmp_code;
+ break;
+ }
+ /* RFC6438 recommands to use flowlabel */
+ val ^= (__force u32)fl6->flowlabel;
+
+ /* Perhaps, we need to tune, this function? */
+ val = val ^ (val >> 7) ^ (val >> 12);
+ return val % candidate_count;
+}
+
+static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+ struct flowi6 *fl6)
+{
+ struct rt6_info *sibling, *next_sibling;
+ int route_choosen;
+
+ route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
+ /* Don't change the route, if route_choosen == 0
+ * (siblings does not include ourself)
+ */
+ if (route_choosen)
+ list_for_each_entry_safe(sibling, next_sibling,
+ &match->rt6i_siblings, rt6i_siblings) {
+ route_choosen--;
+ if (route_choosen == 0) {
+ match = sibling;
+ break;
+ }
+ }
+ return match;
+}
+
/*
* Route lookup. Any table->tb6_lock is implied.
*/
@@ -666,7 +723,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
else
rt6_set_expires(rt, jiffies + HZ * lifetime);
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
}
return 0;
}
@@ -702,6 +759,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
restart:
rt = fn->leaf;
rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
+ if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+ rt = rt6_multipath_select(rt, fl6);
BACKTRACK(net, &fl6->saddr);
out:
dst_use(&rt->dst, jiffies);
@@ -863,7 +922,8 @@ restart_2:
restart:
rt = rt6_select(fn, oif, strict | reachable);
-
+ if (rt->rt6i_nsiblings && oif == 0)
+ rt = rt6_multipath_select(rt, fl6);
BACKTRACK(net, &fl6->saddr);
if (rt == net->ipv6.ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
@@ -879,7 +939,7 @@ restart:
else
goto out2;
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
rt = nrt ? : net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
@@ -896,7 +956,7 @@ restart:
* Race condition! In the gap, when table->tb6_lock was
* released someone could insert this route. Relookup.
*/
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
goto relookup;
out:
@@ -1030,14 +1090,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
return NULL;
- if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
- if (rt->rt6i_peer_genid != rt6_peer_genid()) {
- if (!rt6_has_peer(rt))
- rt6_bind_peer(rt, 0);
- rt->rt6i_peer_genid = rt6_peer_genid();
- }
+ if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
return dst;
- }
+
return NULL;
}
@@ -1232,7 +1287,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
rt->rt6i_idev = idev;
- dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
spin_lock_bh(&icmp6_dst_lock);
rt->dst.next = icmp6_dst_gc_list;
@@ -1316,12 +1371,6 @@ out:
return entries > rt_max_size;
}
-/* Clean host part of a prefix. Not necessary in radix tree,
- but results in cleaner routing tables.
-
- Remove it only when all the things will work!
- */
-
int ip6_dst_hoplimit(struct dst_entry *dst)
{
int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
@@ -1507,7 +1556,7 @@ int ip6_route_add(struct fib6_config *cfg)
goto out;
if (dev) {
if (dev != grt->dst.dev) {
- dst_release(&grt->dst);
+ ip6_rt_put(grt);
goto out;
}
} else {
@@ -1518,7 +1567,7 @@ int ip6_route_add(struct fib6_config *cfg)
}
if (!(grt->rt6i_flags & RTF_GATEWAY))
err = 0;
- dst_release(&grt->dst);
+ ip6_rt_put(grt);
if (err)
goto out;
@@ -1604,7 +1653,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
write_unlock_bh(&table->tb6_lock);
out:
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
return err;
}
@@ -1987,7 +2036,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
switch(cmd) {
case SIOCADDRT: /* Add a route */
case SIOCDELRT: /* Delete a route */
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
err = copy_from_user(&rtmsg, arg,
sizeof(struct in6_rtmsg));
@@ -2249,6 +2298,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_IIF] = { .type = NLA_U32 },
[RTA_PRIORITY] = { .type = NLA_U32 },
[RTA_METRICS] = { .type = NLA_NESTED },
+ [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2326,11 +2376,71 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RTA_TABLE])
cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
+ if (tb[RTA_MULTIPATH]) {
+ cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
+ cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
+ }
+
err = 0;
errout:
return err;
}
+static int ip6_route_multipath(struct fib6_config *cfg, int add)
+{
+ struct fib6_config r_cfg;
+ struct rtnexthop *rtnh;
+ int remaining;
+ int attrlen;
+ int err = 0, last_err = 0;
+
+beginning:
+ rtnh = (struct rtnexthop *)cfg->fc_mp;
+ remaining = cfg->fc_mp_len;
+
+ /* Parse a Multipath Entry */
+ while (rtnh_ok(rtnh, remaining)) {
+ memcpy(&r_cfg, cfg, sizeof(*cfg));
+ if (rtnh->rtnh_ifindex)
+ r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla) {
+ nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+ r_cfg.fc_flags |= RTF_GATEWAY;
+ }
+ }
+ err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
+ if (err) {
+ last_err = err;
+ /* If we are trying to remove a route, do not stop the
+ * loop when ip6_route_del() fails (because next hop is
+ * already gone), we should try to remove all next hops.
+ */
+ if (add) {
+ /* If add fails, we should try to delete all
+ * next hops that have been already added.
+ */
+ add = 0;
+ goto beginning;
+ }
+ }
+ /* Because each route is added like a single route we remove
+ * this flag after the first nexthop (if there is a collision,
+ * we have already fail to add the first nexthop:
+ * fib6_add_rt2node() has reject it).
+ */
+ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ return last_err;
+}
+
static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib6_config cfg;
@@ -2340,7 +2450,10 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
if (err < 0)
return err;
- return ip6_route_del(&cfg);
+ if (cfg.fc_mp)
+ return ip6_route_multipath(&cfg, 0);
+ else
+ return ip6_route_del(&cfg);
}
static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -2352,7 +2465,10 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
if (err < 0)
return err;
- return ip6_route_add(&cfg);
+ if (cfg.fc_mp)
+ return ip6_route_multipath(&cfg, 1);
+ else
+ return ip6_route_add(&cfg);
}
static inline size_t rt6_nlmsg_size(void)
@@ -2596,7 +2712,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
- dst_release(&rt->dst);
+ ip6_rt_put(rt);
err = -ENOBUFS;
goto errout;
}
@@ -2873,6 +2989,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
}
return table;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3ed54ffd8d50..80cb3829831c 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -68,6 +68,7 @@
static int ipip6_tunnel_init(struct net_device *dev);
static void ipip6_tunnel_setup(struct net_device *dev);
static void ipip6_dev_free(struct net_device *dev);
+static struct rtnl_link_ops sit_link_ops __read_mostly;
static int sit_net_id __read_mostly;
struct sit_net {
@@ -80,22 +81,6 @@ struct sit_net {
struct net_device *fb_tunnel_dev;
};
-/*
- * Locking : hash tables are protected by RCU and RTNL
- */
-
-#define for_each_ip_tunnel_rcu(start) \
- for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
-/* often modified stats are per cpu, other are shared (netdev->stats) */
-struct pcpu_tstats {
- u64 rx_packets;
- u64 rx_bytes;
- u64 tx_packets;
- u64 tx_bytes;
- struct u64_stats_sync syncp;
-};
-
static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
{
@@ -141,20 +126,20 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
struct ip_tunnel *t;
struct sit_net *sitn = net_generic(net, sit_net_id);
- for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) {
+ for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
- for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) {
+ for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
if (remote == t->parms.iph.daddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
- for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) {
+ for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
if (local == t->parms.iph.saddr &&
(!dev || !t->parms.link || dev->iflink == t->parms.link) &&
(t->dev->flags & IFF_UP))
@@ -231,6 +216,37 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
#endif
}
+static int ipip6_tunnel_create(struct net_device *dev)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+ int err;
+
+ err = ipip6_tunnel_init(dev);
+ if (err < 0)
+ goto out;
+ ipip6_tunnel_clone_6rd(dev, sitn);
+
+ if ((__force u16)t->parms.i_flags & SIT_ISATAP)
+ dev->priv_flags |= IFF_ISATAP;
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto out;
+
+ strcpy(t->parms.name, dev->name);
+ dev->rtnl_link_ops = &sit_link_ops;
+
+ dev_hold(dev);
+
+ ipip6_tunnel_link(sitn, t);
+ return 0;
+
+out:
+ return err;
+}
+
static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
struct ip_tunnel_parm *parms, int create)
{
@@ -271,21 +287,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
nt = netdev_priv(dev);
nt->parms = *parms;
- if (ipip6_tunnel_init(dev) < 0)
+ if (ipip6_tunnel_create(dev) < 0)
goto failed_free;
- ipip6_tunnel_clone_6rd(dev, sitn);
- if (parms->i_flags & SIT_ISATAP)
- dev->priv_flags |= IFF_ISATAP;
-
- if (register_netdevice(dev) < 0)
- goto failed_free;
-
- strcpy(nt->parms.name, dev->name);
-
- dev_hold(dev);
-
- ipip6_tunnel_link(sitn, nt);
return nt;
failed_free:
@@ -683,7 +687,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct pcpu_tstats *tstats;
const struct iphdr *tiph = &tunnel->parms.iph;
const struct ipv6hdr *iph6 = ipv6_hdr(skb);
u8 tos = tunnel->parms.iph.tos;
@@ -864,9 +867,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
if ((iph->ttl = tiph->ttl) == 0)
iph->ttl = iph6->hop_limit;
- nf_reset(skb);
- tstats = this_cpu_ptr(dev->tstats);
- __IPTUNNEL_XMIT(tstats, &dev->stats);
+ iptunnel_xmit(skb, dev);
return NETDEV_TX_OK;
tx_error_icmp:
@@ -914,6 +915,59 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
dev->iflink = tunnel->parms.link;
}
+static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
+{
+ struct net *net = dev_net(t->dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ ipip6_tunnel_unlink(sitn, t);
+ synchronize_net();
+ t->parms.iph.saddr = p->iph.saddr;
+ t->parms.iph.daddr = p->iph.daddr;
+ memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
+ memcpy(t->dev->broadcast, &p->iph.daddr, 4);
+ ipip6_tunnel_link(sitn, t);
+ t->parms.iph.ttl = p->iph.ttl;
+ t->parms.iph.tos = p->iph.tos;
+ if (t->parms.link != p->link) {
+ t->parms.link = p->link;
+ ipip6_tunnel_bind_dev(t->dev);
+ }
+ netdev_state_change(t->dev);
+}
+
+#ifdef CONFIG_IPV6_SIT_6RD
+static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
+ struct ip_tunnel_6rd *ip6rd)
+{
+ struct in6_addr prefix;
+ __be32 relay_prefix;
+
+ if (ip6rd->relay_prefixlen > 32 ||
+ ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64)
+ return -EINVAL;
+
+ ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen);
+ if (!ipv6_addr_equal(&prefix, &ip6rd->prefix))
+ return -EINVAL;
+ if (ip6rd->relay_prefixlen)
+ relay_prefix = ip6rd->relay_prefix &
+ htonl(0xffffffffUL <<
+ (32 - ip6rd->relay_prefixlen));
+ else
+ relay_prefix = 0;
+ if (relay_prefix != ip6rd->relay_prefix)
+ return -EINVAL;
+
+ t->ip6rd.prefix = prefix;
+ t->ip6rd.relay_prefix = relay_prefix;
+ t->ip6rd.prefixlen = ip6rd->prefixlen;
+ t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
+ netdev_state_change(t->dev);
+ return 0;
+}
+#endif
+
static int
ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -966,7 +1020,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EFAULT;
@@ -995,28 +1049,13 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
t = netdev_priv(dev);
- ipip6_tunnel_unlink(sitn, t);
- synchronize_net();
- t->parms.iph.saddr = p.iph.saddr;
- t->parms.iph.daddr = p.iph.daddr;
- memcpy(dev->dev_addr, &p.iph.saddr, 4);
- memcpy(dev->broadcast, &p.iph.daddr, 4);
- ipip6_tunnel_link(sitn, t);
- netdev_state_change(dev);
}
+
+ ipip6_tunnel_update(t, &p);
}
if (t) {
err = 0;
- if (cmd == SIOCCHGTUNNEL) {
- t->parms.iph.ttl = p.iph.ttl;
- t->parms.iph.tos = p.iph.tos;
- if (t->parms.link != p.link) {
- t->parms.link = p.link;
- ipip6_tunnel_bind_dev(dev);
- netdev_state_change(dev);
- }
- }
if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
err = -EFAULT;
} else
@@ -1025,7 +1064,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCDELTUNNEL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
if (dev == sitn->fb_tunnel_dev) {
@@ -1058,7 +1097,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCDELPRL:
case SIOCCHGPRL:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EINVAL;
if (dev == sitn->fb_tunnel_dev)
@@ -1087,7 +1126,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCCHG6RD:
case SIOCDEL6RD:
err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
goto done;
err = -EFAULT;
@@ -1098,31 +1137,9 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
t = netdev_priv(dev);
if (cmd != SIOCDEL6RD) {
- struct in6_addr prefix;
- __be32 relay_prefix;
-
- err = -EINVAL;
- if (ip6rd.relay_prefixlen > 32 ||
- ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64)
- goto done;
-
- ipv6_addr_prefix(&prefix, &ip6rd.prefix,
- ip6rd.prefixlen);
- if (!ipv6_addr_equal(&prefix, &ip6rd.prefix))
+ err = ipip6_tunnel_update_6rd(t, &ip6rd);
+ if (err < 0)
goto done;
- if (ip6rd.relay_prefixlen)
- relay_prefix = ip6rd.relay_prefix &
- htonl(0xffffffffUL <<
- (32 - ip6rd.relay_prefixlen));
- else
- relay_prefix = 0;
- if (relay_prefix != ip6rd.relay_prefix)
- goto done;
-
- t->ip6rd.prefix = prefix;
- t->ip6rd.relay_prefix = relay_prefix;
- t->ip6rd.prefixlen = ip6rd.prefixlen;
- t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
} else
ipip6_tunnel_clone_6rd(dev, sitn);
@@ -1216,6 +1233,239 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
return 0;
}
+static void ipip6_netlink_parms(struct nlattr *data[],
+ struct ip_tunnel_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ parms->iph.version = 4;
+ parms->iph.protocol = IPPROTO_IPV6;
+ parms->iph.ihl = 5;
+ parms->iph.ttl = 64;
+
+ if (!data)
+ return;
+
+ if (data[IFLA_IPTUN_LINK])
+ parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
+
+ if (data[IFLA_IPTUN_LOCAL])
+ parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
+
+ if (data[IFLA_IPTUN_REMOTE])
+ parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
+
+ if (data[IFLA_IPTUN_TTL]) {
+ parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
+ if (parms->iph.ttl)
+ parms->iph.frag_off = htons(IP_DF);
+ }
+
+ if (data[IFLA_IPTUN_TOS])
+ parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
+
+ if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
+ parms->iph.frag_off = htons(IP_DF);
+
+ if (data[IFLA_IPTUN_FLAGS])
+ parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+}
+
+#ifdef CONFIG_IPV6_SIT_6RD
+/* This function returns true when 6RD attributes are present in the nl msg */
+static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
+ struct ip_tunnel_6rd *ip6rd)
+{
+ bool ret = false;
+ memset(ip6rd, 0, sizeof(*ip6rd));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_6RD_PREFIX]) {
+ ret = true;
+ nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX],
+ sizeof(struct in6_addr));
+ }
+
+ if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) {
+ ret = true;
+ ip6rd->relay_prefix =
+ nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]);
+ }
+
+ if (data[IFLA_IPTUN_6RD_PREFIXLEN]) {
+ ret = true;
+ ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]);
+ }
+
+ if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) {
+ ret = true;
+ ip6rd->relay_prefixlen =
+ nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]);
+ }
+
+ return ret;
+}
+#endif
+
+static int ipip6_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net *net = dev_net(dev);
+ struct ip_tunnel *nt;
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel_6rd ip6rd;
+#endif
+ int err;
+
+ nt = netdev_priv(dev);
+ ipip6_netlink_parms(data, &nt->parms);
+
+ if (ipip6_tunnel_locate(net, &nt->parms, 0))
+ return -EEXIST;
+
+ err = ipip6_tunnel_create(dev);
+ if (err < 0)
+ return err;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ if (ipip6_netlink_6rd_parms(data, &ip6rd))
+ err = ipip6_tunnel_update_6rd(nt, &ip6rd);
+#endif
+
+ return err;
+}
+
+static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip_tunnel *t;
+ struct ip_tunnel_parm p;
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel_6rd ip6rd;
+#endif
+
+ if (dev == sitn->fb_tunnel_dev)
+ return -EINVAL;
+
+ ipip6_netlink_parms(data, &p);
+
+ if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
+ (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
+ return -EINVAL;
+
+ t = ipip6_tunnel_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else
+ t = netdev_priv(dev);
+
+ ipip6_tunnel_update(t, &p);
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ if (ipip6_netlink_6rd_parms(data, &ip6rd))
+ return ipip6_tunnel_update_6rd(t, &ip6rd);
+#endif
+
+ return 0;
+}
+
+static size_t ipip6_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_IPTUN_LINK */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_LOCAL */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_REMOTE */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_TTL */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_TOS */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_PMTUDISC */
+ nla_total_size(1) +
+ /* IFLA_IPTUN_FLAGS */
+ nla_total_size(2) +
+#ifdef CONFIG_IPV6_SIT_6RD
+ /* IFLA_IPTUN_6RD_PREFIX */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_IPTUN_6RD_RELAY_PREFIX */
+ nla_total_size(4) +
+ /* IFLA_IPTUN_6RD_PREFIXLEN */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
+ nla_total_size(2) +
+#endif
+ 0;
+}
+
+static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_parm *parm = &tunnel->parms;
+
+ if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
+ nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
+ nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
+ nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
+ nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+ nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
+ !!(parm->iph.frag_off & htons(IP_DF))) ||
+ nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
+ goto nla_put_failure;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr),
+ &tunnel->ip6rd.prefix) ||
+ nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX,
+ tunnel->ip6rd.relay_prefix) ||
+ nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN,
+ tunnel->ip6rd.prefixlen) ||
+ nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
+ tunnel->ip6rd.relay_prefixlen))
+ goto nla_put_failure;
+#endif
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
+ [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
+ [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
+ [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
+ [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
+ [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
+ [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
+ [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 },
+#ifdef CONFIG_IPV6_SIT_6RD
+ [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) },
+ [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 },
+ [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 },
+ [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
+#endif
+};
+
+static struct rtnl_link_ops sit_link_ops __read_mostly = {
+ .kind = "sit",
+ .maxtype = IFLA_IPTUN_MAX,
+ .policy = ipip6_policy,
+ .priv_size = sizeof(struct ip_tunnel),
+ .setup = ipip6_tunnel_setup,
+ .newlink = ipip6_newlink,
+ .changelink = ipip6_changelink,
+ .get_size = ipip6_get_size,
+ .fill_info = ipip6_fill_info,
+};
+
static struct xfrm_tunnel sit_handler __read_mostly = {
.handler = ipip6_rcv,
.err_handler = ipip6_err,
@@ -1302,6 +1552,7 @@ static struct pernet_operations sit_net_ops = {
static void __exit sit_cleanup(void)
{
+ rtnl_link_unregister(&sit_link_ops);
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
unregister_pernet_device(&sit_net_ops);
@@ -1319,10 +1570,21 @@ static int __init sit_init(void)
return err;
err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
if (err < 0) {
- unregister_pernet_device(&sit_net_ops);
pr_info("%s: can't add protocol\n", __func__);
+ goto xfrm_tunnel_failed;
}
+ err = rtnl_link_register(&sit_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
+
+out:
return err;
+
+rtnl_link_failed:
+ xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+xfrm_tunnel_failed:
+ unregister_pernet_device(&sit_net_ops);
+ goto out;
}
module_init(sit_init);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 182ab9a85d6c..40161977f7cf 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -214,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq6->iif = inet6_iif(skb);
req->expires = 0UL;
- req->retrans = 0;
+ req->num_retrans = 0;
ireq->ecn_ok = ecn_ok;
ireq->snd_wscale = tcp_opt.snd_wscale;
ireq->sack_ok = tcp_opt.sack_ok;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 26175bffbaa0..6565cf55eb1e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -77,9 +77,6 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
-static void __tcp_v6_send_check(struct sk_buff *skb,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr);
static const struct inet_connection_sock_af_ops ipv6_mapped;
static const struct inet_connection_sock_af_ops ipv6_specific;
@@ -119,14 +116,6 @@ static void tcp_v6_hash(struct sock *sk)
}
}
-static __inline__ __sum16 tcp_v6_check(int len,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- __wsum base)
-{
- return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
-}
-
static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
{
return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
@@ -306,7 +295,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err)
goto late_failure;
- if (!tp->write_seq)
+ if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
np->daddr.s6_addr32,
inet->inet_sport,
@@ -495,9 +484,12 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
struct request_values *rvp)
{
struct flowi6 fl6;
+ int res;
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
+ res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
+ if (!res)
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ return res;
}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
@@ -719,94 +711,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
};
#endif
-static void __tcp_v6_send_check(struct sk_buff *skb,
- const struct in6_addr *saddr, const struct in6_addr *daddr)
-{
- struct tcphdr *th = tcp_hdr(skb);
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct tcphdr, check);
- } else {
- th->check = tcp_v6_check(skb->len, saddr, daddr,
- csum_partial(th, th->doff << 2,
- skb->csum));
- }
-}
-
-static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
-}
-
-static int tcp_v6_gso_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- struct tcphdr *th;
-
- if (!pskb_may_pull(skb, sizeof(*th)))
- return -EINVAL;
-
- ipv6h = ipv6_hdr(skb);
- th = tcp_hdr(skb);
-
- th->check = 0;
- skb->ip_summed = CHECKSUM_PARTIAL;
- __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
- return 0;
-}
-
-static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
-{
- const struct ipv6hdr *iph = skb_gro_network_header(skb);
- __wsum wsum;
- __sum16 sum;
-
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
- skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
-flush:
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
-
- case CHECKSUM_NONE:
- wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
- skb_gro_len(skb),
- IPPROTO_TCP, 0));
- sum = csum_fold(skb_checksum(skb,
- skb_gro_offset(skb),
- skb_gro_len(skb),
- wsum));
- if (sum)
- goto flush;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
-
- return tcp_gro_receive(head, skb);
-}
-
-static int tcp6_gro_complete(struct sk_buff *skb)
-{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct tcphdr *th = tcp_hdr(skb);
-
- th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
- &iph->saddr, &iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
-
- return tcp_gro_complete(skb);
-}
-
static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
{
@@ -1364,7 +1268,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
tcp_initialize_rcv_mss(newsk);
tcp_synack_rtt_meas(newsk, req);
- newtp->total_retrans = req->retrans;
+ newtp->total_retrans = req->num_retrans;
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
@@ -1741,11 +1645,11 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
skb->destructor = sock_edemux;
if (sk->sk_state != TCP_TIME_WAIT) {
struct dst_entry *dst = sk->sk_rx_dst;
- struct inet_sock *icsk = inet_sk(sk);
+
if (dst)
dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
if (dst &&
- icsk->rx_dst_ifindex == skb->skb_iif)
+ inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
}
}
@@ -1866,7 +1770,7 @@ static void get_openreq6(struct seq_file *seq,
0,0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
jiffies_to_clock_t(ttd),
- req->retrans,
+ req->num_timeout,
from_kuid_munged(seq_user_ns(seq), uid),
0, /* non standard timer */
0, /* open_requests have no inode */
@@ -2063,10 +1967,6 @@ static const struct inet6_protocol tcpv6_protocol = {
.early_demux = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
- .gso_send_check = tcp_v6_gso_send_check,
- .gso_segment = tcp_tso_segment,
- .gro_receive = tcp6_gro_receive,
- .gro_complete = tcp6_gro_complete,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
@@ -2121,10 +2021,10 @@ int __init tcpv6_init(void)
out:
return ret;
-out_tcpv6_protocol:
- inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
+out_tcpv6_protocol:
+ inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
goto out;
}
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
new file mode 100644
index 000000000000..2ec6bf6a0aa0
--- /dev/null
+++ b/net/ipv6/tcpv6_offload.c
@@ -0,0 +1,95 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * TCPv6 GSO/GRO support
+ */
+#include <linux/skbuff.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/ip6_checksum.h>
+#include "ip6_offload.h"
+
+static int tcp_v6_gso_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ struct tcphdr *th;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ return -EINVAL;
+
+ ipv6h = ipv6_hdr(skb);
+ th = tcp_hdr(skb);
+
+ th->check = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
+ return 0;
+}
+
+static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ const struct ipv6hdr *iph = skb_gro_network_header(skb);
+ __wsum wsum;
+ __sum16 sum;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
+ skb->csum)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+
+ case CHECKSUM_NONE:
+ wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
+ skb_gro_len(skb),
+ IPPROTO_TCP, 0));
+ sum = csum_fold(skb_checksum(skb,
+ skb_gro_offset(skb),
+ skb_gro_len(skb),
+ wsum));
+ if (sum)
+ goto flush;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+
+ return tcp_gro_receive(head, skb);
+}
+
+static int tcp6_gro_complete(struct sk_buff *skb)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
+
+ th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
+ &iph->saddr, &iph->daddr, 0);
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+
+ return tcp_gro_complete(skb);
+}
+
+static const struct net_offload tcpv6_offload = {
+ .callbacks = {
+ .gso_send_check = tcp_v6_gso_send_check,
+ .gso_segment = tcp_tso_segment,
+ .gro_receive = tcp6_gro_receive,
+ .gro_complete = tcp6_gro_complete,
+ },
+};
+
+int __init tcpv6_offload_init(void)
+{
+ return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP);
+}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index fc9997260a6b..dfaa29b8b293 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1343,103 +1343,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
}
#endif
-static int udp6_ufo_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- struct udphdr *uh;
-
- if (!pskb_may_pull(skb, sizeof(*uh)))
- return -EINVAL;
-
- ipv6h = ipv6_hdr(skb);
- uh = udp_hdr(skb);
-
- uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
- IPPROTO_UDP, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- skb->ip_summed = CHECKSUM_PARTIAL;
- return 0;
-}
-
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
- netdev_features_t features)
-{
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- unsigned int mss;
- unsigned int unfrag_ip6hlen, unfrag_len;
- struct frag_hdr *fptr;
- u8 *mac_start, *prevhdr;
- u8 nexthdr;
- u8 frag_hdr_sz = sizeof(struct frag_hdr);
- int offset;
- __wsum csum;
-
- mss = skb_shinfo(skb)->gso_size;
- if (unlikely(skb->len <= mss))
- goto out;
-
- if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
- /* Packet is from an untrusted source, reset gso_segs. */
- int type = skb_shinfo(skb)->gso_type;
-
- if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
- !(type & (SKB_GSO_UDP))))
- goto out;
-
- skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
- segs = NULL;
- goto out;
- }
-
- /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
- * do checksum of UDP packets sent as multiple IP fragments.
- */
- offset = skb_checksum_start_offset(skb);
- csum = skb_checksum(skb, offset, skb->len - offset, 0);
- offset += skb->csum_offset;
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
- skb->ip_summed = CHECKSUM_NONE;
-
- /* Check if there is enough headroom to insert fragment header. */
- if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
- pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
- goto out;
-
- /* Find the unfragmentable header and shift it left by frag_hdr_sz
- * bytes to insert fragment header.
- */
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- nexthdr = *prevhdr;
- *prevhdr = NEXTHDR_FRAGMENT;
- unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
- unfrag_ip6hlen;
- mac_start = skb_mac_header(skb);
- memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
-
- skb->mac_header -= frag_hdr_sz;
- skb->network_header -= frag_hdr_sz;
-
- fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
- fptr->nexthdr = nexthdr;
- fptr->reserved = 0;
- ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
-
- /* Fragment the skb. ipv6 header and the remaining fields of the
- * fragment header are updated in ipv6_gso_segment()
- */
- segs = skb_segment(skb, features);
-
-out:
- return segs;
-}
-
static const struct inet6_protocol udpv6_protocol = {
.handler = udpv6_rcv,
.err_handler = udpv6_err,
- .gso_send_check = udp6_ufo_send_check,
- .gso_segment = udp6_ufo_fragment,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
new file mode 100644
index 000000000000..0c8934a317c2
--- /dev/null
+++ b/net/ipv6/udp_offload.c
@@ -0,0 +1,120 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * UDPv6 GSO support
+ */
+#include <linux/skbuff.h>
+#include <net/protocol.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <net/ip6_checksum.h>
+#include "ip6_offload.h"
+
+static int udp6_ufo_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ struct udphdr *uh;
+
+ if (!pskb_may_pull(skb, sizeof(*uh)))
+ return -EINVAL;
+
+ ipv6h = ipv6_hdr(skb);
+ uh = udp_hdr(skb);
+
+ uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
+ IPPROTO_UDP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ return 0;
+}
+
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ unsigned int mss;
+ unsigned int unfrag_ip6hlen, unfrag_len;
+ struct frag_hdr *fptr;
+ u8 *mac_start, *prevhdr;
+ u8 nexthdr;
+ u8 frag_hdr_sz = sizeof(struct frag_hdr);
+ int offset;
+ __wsum csum;
+
+ mss = skb_shinfo(skb)->gso_size;
+ if (unlikely(skb->len <= mss))
+ goto out;
+
+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ int type = skb_shinfo(skb)->gso_type;
+
+ if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+ !(type & (SKB_GSO_UDP))))
+ goto out;
+
+ skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
+
+ segs = NULL;
+ goto out;
+ }
+
+ /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+ * do checksum of UDP packets sent as multiple IP fragments.
+ */
+ offset = skb_checksum_start_offset(skb);
+ csum = skb_checksum(skb, offset, skb->len - offset, 0);
+ offset += skb->csum_offset;
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* Check if there is enough headroom to insert fragment header. */
+ if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
+ pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
+ goto out;
+
+ /* Find the unfragmentable header and shift it left by frag_hdr_sz
+ * bytes to insert fragment header.
+ */
+ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ nexthdr = *prevhdr;
+ *prevhdr = NEXTHDR_FRAGMENT;
+ unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
+ unfrag_ip6hlen;
+ mac_start = skb_mac_header(skb);
+ memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
+
+ skb->mac_header -= frag_hdr_sz;
+ skb->network_header -= frag_hdr_sz;
+
+ fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
+ fptr->nexthdr = nexthdr;
+ fptr->reserved = 0;
+ ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+
+ /* Fragment the skb. ipv6 header and the remaining fields of the
+ * fragment header are updated in ipv6_gso_segment()
+ */
+ segs = skb_segment(skb, features);
+
+out:
+ return segs;
+}
+static const struct net_offload udpv6_offload = {
+ .callbacks = {
+ .gso_send_check = udp6_ufo_send_check,
+ .gso_segment = udp6_ufo_fragment,
+ },
+};
+
+int __init udp_offload_init(void)
+{
+ return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
+}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f8c4c08ffb60..c9844135c9ca 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -20,7 +20,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
@@ -182,7 +182,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
fl6->flowi6_proto = nexthdr;
return;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
struct ip6_mh *mh;
@@ -327,21 +327,7 @@ static struct ctl_table_header *sysctl_hdr;
int __init xfrm6_init(void)
{
int ret;
- unsigned int gc_thresh;
-
- /*
- * We need a good default value for the xfrm6 gc threshold.
- * In ipv4 we set it to the route hash table size * 8, which
- * is half the size of the maximaum route cache for ipv4. It
- * would be good to do the same thing for v6, except the table is
- * constructed differently here. Here each table for a net namespace
- * can have FIB_TABLE_HASHSZ entries, so lets go with the same
- * computation that we used for ipv4 here. Also, lets keep the initial
- * gc_thresh to a minimum of 1024, since, the ipv6 route cache defaults
- * to that as a minimum as well
- */
- gc_thresh = FIB6_TABLE_HASHSZ * 8;
- xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
+
dst_entries_init(&xfrm6_dst_ops);
ret = xfrm6_policy_init();
@@ -370,7 +356,6 @@ void xfrm6_fini(void)
if (sysctl_hdr)
unregister_net_sysctl_table(sysctl_hdr);
#endif
- //xfrm6_input_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
dst_entries_destroy(&xfrm6_dst_ops);
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 3f2f7c4ab721..d8c70b8efc24 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -101,7 +101,7 @@ static int __xfrm6_state_sort_cmp(void *p)
return 1;
else
return 3;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case XFRM_MODE_ROUTEOPTIMIZATION:
case XFRM_MODE_IN_TRIGGER:
return 2;
@@ -134,7 +134,7 @@ static int __xfrm6_tmpl_sort_cmp(void *p)
switch (v->mode) {
case XFRM_MODE_TRANSPORT:
return 1;
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
case XFRM_MODE_ROUTEOPTIMIZATION:
case XFRM_MODE_IN_TRIGGER:
return 2;
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 95a3a7a336ba..496ce2cebcd7 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -421,6 +421,8 @@ static int ircomm_tty_install(struct tty_driver *driver, struct tty_struct *tty)
hashbin_insert(ircomm_tty, (irda_queue_t *) self, line, NULL);
}
+ tty->driver_data = self;
+
return tty_port_install(&self->port, driver, tty);
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 08897a3c7ec7..5b426a646544 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -141,7 +141,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 37b8b8ba31f7..76125c57ee6d 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -291,6 +291,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
out_del_dev:
free_netdev(dev);
+ spriv->dev = NULL;
out_del_session:
l2tp_session_delete(session);
out:
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 6c4cc12c7414..bbba3a19e944 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -632,7 +632,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
nla_put_u16(skb, L2TP_ATTR_MRU, session->mru)))
goto nla_put_failure;
- if ((session->ifname && session->ifname[0] &&
+ if ((session->ifname[0] &&
nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) ||
(session->cookie_len &&
nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c2190005a114..88709882c464 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -160,7 +160,7 @@ static int llc_ui_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk;
int rc = -ESOCKTNOSUPPORT;
- if (!capable(CAP_NET_RAW))
+ if (!ns_capable(net->user_ns, CAP_NET_RAW))
return -EPERM;
if (!net_eq(net, &init_net))
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 778465f217fa..fed899f600b2 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1643,7 +1643,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
void *data;
int copylen = *len, ret = 0;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (optval != SO_IP_SET)
return -EBADF;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index ec3dba5dcd62..5c0b78528e55 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -173,6 +173,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
return adtfn(set, &nip, timeout, flags);
}
+ ip_to = ip;
if (tb[IPSET_ATTR_IP_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
@@ -185,8 +186,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (!cidr || cidr > 32)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
- } else
- ip_to = ip;
+ }
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 0171f7502fa5..6283351f4eeb 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -162,7 +162,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct ip_set_hash *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem data = { };
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to, p = 0, port, port_to;
u32 timeout = h->timeout;
bool with_ports = false;
int ret;
@@ -210,7 +210,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
return ip_set_eexist(ret, flags) ? 0 : ret;
}
- ip = ntohl(data.ip);
+ ip_to = ip = ntohl(data.ip);
if (tb[IPSET_ATTR_IP_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
@@ -223,8 +223,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (!cidr || cidr > 32)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
- } else
- ip_to = ip;
+ }
port_to = port = ntohs(data.port);
if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 6344ef551ec8..6a21271c8d5a 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -166,7 +166,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct ip_set_hash *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip4_elem data = { };
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to, p = 0, port, port_to;
u32 timeout = h->timeout;
bool with_ports = false;
int ret;
@@ -218,7 +218,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
return ip_set_eexist(ret, flags) ? 0 : ret;
}
- ip = ntohl(data.ip);
+ ip_to = ip = ntohl(data.ip);
if (tb[IPSET_ATTR_IP_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
@@ -231,8 +231,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (!cidr || cidr > 32)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
- } else
- ip_to = ip;
+ }
port_to = port = ntohs(data.port);
if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index cb71f9a774e7..2d5cd4ee30eb 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -215,8 +215,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct ip_set_hash *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem data = { .cidr = HOST_MASK - 1 };
- u32 ip, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to, ip2_last, ip2;
+ u32 ip, ip_to, p = 0, port, port_to;
+ u32 ip2_from, ip2_to, ip2_last, ip2;
u32 timeout = h->timeout;
bool with_ports = false;
u8 cidr;
@@ -286,6 +286,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
return ip_set_eexist(ret, flags) ? 0 : ret;
}
+ ip_to = ip;
if (tb[IPSET_ATTR_IP_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
@@ -306,6 +307,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (port > port_to)
swap(port, port_to);
}
+
+ ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
if (ret)
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 8b2cffdfdd99..0c3b1670b0d1 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -28,12 +28,11 @@ if IP_VS
config IP_VS_IPV6
bool "IPv6 support for IPVS"
depends on IPV6 = y || IP_VS = IPV6
+ select IP6_NF_IPTABLES
---help---
- Add IPv6 support to IPVS. This is incomplete and might be dangerous.
+ Add IPv6 support to IPVS.
- See http://www.mindbasket.com/ipvs for more information.
-
- Say N if unsure.
+ Say Y if unsure.
config IP_VS_DEBUG
bool "IP virtual server debugging"
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 1548df9a7524..30e764ad021f 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -308,13 +308,12 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
static int
ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph,
- unsigned int proto_off, int inverse,
- struct ip_vs_conn_param *p)
+ int inverse, struct ip_vs_conn_param *p)
{
__be16 _ports[2], *pptr;
struct net *net = skb_net(skb);
- pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+ pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL)
return 1;
@@ -329,12 +328,11 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
struct ip_vs_conn *
ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph,
- unsigned int proto_off, int inverse)
+ const struct ip_vs_iphdr *iph, int inverse)
{
struct ip_vs_conn_param p;
- if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
+ if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
return NULL;
return ip_vs_conn_in_get(&p);
@@ -432,12 +430,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
struct ip_vs_conn *
ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph,
- unsigned int proto_off, int inverse)
+ const struct ip_vs_iphdr *iph, int inverse)
{
struct ip_vs_conn_param p;
- if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
+ if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
return NULL;
return ip_vs_conn_out_get(&p);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 58918e20f9d5..fb45640dc1fb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -222,11 +222,10 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
*/
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
- struct sk_buff *skb,
- __be16 src_port, __be16 dst_port, int *ignored)
+ struct sk_buff *skb, __be16 src_port, __be16 dst_port,
+ int *ignored, struct ip_vs_iphdr *iph)
{
struct ip_vs_conn *cp = NULL;
- struct ip_vs_iphdr iph;
struct ip_vs_dest *dest;
struct ip_vs_conn *ct;
__be16 dport = 0; /* destination port to forward */
@@ -236,20 +235,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
union nf_inet_addr snet; /* source network of the client,
after masking */
- ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
-
/* Mask saddr with the netmask to adjust template granularity */
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
- ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask);
+ ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask);
else
#endif
- snet.ip = iph.saddr.ip & svc->netmask;
+ snet.ip = iph->saddr.ip & svc->netmask;
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
"mnet %s\n",
- IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
- IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
+ IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port),
+ IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port),
IP_VS_DBG_ADDR(svc->af, &snet));
/*
@@ -266,8 +263,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* is created for other persistent services.
*/
{
- int protocol = iph.protocol;
- const union nf_inet_addr *vaddr = &iph.daddr;
+ int protocol = iph->protocol;
+ const union nf_inet_addr *vaddr = &iph->daddr;
__be16 vport = 0;
if (dst_port == svc->port) {
@@ -342,14 +339,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
dport = dest->port;
flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
- && iph.protocol == IPPROTO_UDP)?
+ && iph->protocol == IPPROTO_UDP) ?
IP_VS_CONN_F_ONE_PACKET : 0;
/*
* Create a new connection according to the template
*/
- ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
- src_port, &iph.daddr, dst_port, &param);
+ ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
+ src_port, &iph->daddr, dst_port, &param);
cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
if (cp == NULL) {
@@ -392,18 +389,20 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
*/
struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_proto_data *pd, int *ignored)
+ struct ip_vs_proto_data *pd, int *ignored,
+ struct ip_vs_iphdr *iph)
{
struct ip_vs_protocol *pp = pd->pp;
struct ip_vs_conn *cp = NULL;
- struct ip_vs_iphdr iph;
struct ip_vs_dest *dest;
__be16 _ports[2], *pptr;
unsigned int flags;
*ignored = 1;
- ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
- pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
+ /*
+ * IPv6 frags, only the first hit here.
+ */
+ pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL)
return NULL;
@@ -423,7 +422,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
* Do not schedule replies from local real server.
*/
if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
+ (cp = pp->conn_in_get(svc->af, skb, iph, 1))) {
IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
"Not scheduling reply for existing connection");
__ip_vs_conn_put(cp);
@@ -434,7 +433,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
* Persistent service
*/
if (svc->flags & IP_VS_SVC_F_PERSISTENT)
- return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
+ return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored,
+ iph);
*ignored = 0;
@@ -456,7 +456,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
}
flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
- && iph.protocol == IPPROTO_UDP)?
+ && iph->protocol == IPPROTO_UDP) ?
IP_VS_CONN_F_ONE_PACKET : 0;
/*
@@ -465,9 +465,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
- &iph.saddr, pptr[0], &iph.daddr, pptr[1],
- &p);
+ ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
+ &iph->saddr, pptr[0], &iph->daddr,
+ pptr[1], &p);
cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1],
flags, dest, skb->mark);
@@ -496,19 +496,16 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
* no destination is available for a new connection.
*/
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_proto_data *pd)
+ struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
{
__be16 _ports[2], *pptr;
- struct ip_vs_iphdr iph;
#ifdef CONFIG_SYSCTL
struct net *net;
struct netns_ipvs *ipvs;
int unicast;
#endif
- ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
-
- pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
+ pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL) {
ip_vs_service_put(svc);
return NF_DROP;
@@ -519,10 +516,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
- unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
+ unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
else
#endif
- unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
+ unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create
@@ -532,7 +529,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
int ret;
struct ip_vs_conn *cp;
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
- iph.protocol == IPPROTO_UDP)?
+ iph->protocol == IPPROTO_UDP) ?
IP_VS_CONN_F_ONE_PACKET : 0;
union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
@@ -542,9 +539,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
- &iph.saddr, pptr[0],
- &iph.daddr, pptr[1], &p);
+ ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
+ &iph->saddr, pptr[0],
+ &iph->daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &daddr, 0,
IP_VS_CONN_F_BYPASS | flags,
NULL, skb->mark);
@@ -559,7 +556,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
/* transmit the first SYN packet */
- ret = cp->packet_xmit(skb, cp, pd->pp);
+ ret = cp->packet_xmit(skb, cp, pd->pp, iph);
/* do not touch skb anymore */
atomic_inc(&cp->in_pkts);
@@ -654,14 +651,6 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
return err;
}
-#ifdef CONFIG_IP_VS_IPV6
-static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
-{
- /* TODO IPv6: Find out what to do here for IPv6 */
- return 0;
-}
-#endif
-
static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
{
#ifdef CONFIG_IP_VS_IPV6
@@ -732,10 +721,19 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int inout)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
- unsigned int icmp_offset = sizeof(struct ipv6hdr);
- struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) +
- icmp_offset);
- struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1);
+ unsigned int icmp_offset = 0;
+ unsigned int offs = 0; /* header offset*/
+ int protocol;
+ struct icmp6hdr *icmph;
+ struct ipv6hdr *ciph;
+ unsigned short fragoffs;
+
+ ipv6_find_hdr(skb, &icmp_offset, IPPROTO_ICMPV6, &fragoffs, NULL);
+ icmph = (struct icmp6hdr *)(skb_network_header(skb) + icmp_offset);
+ offs = icmp_offset + sizeof(struct icmp6hdr);
+ ciph = (struct ipv6hdr *)(skb_network_header(skb) + offs);
+
+ protocol = ipv6_find_hdr(skb, &offs, -1, &fragoffs, NULL);
if (inout) {
iph->saddr = cp->vaddr.in6;
@@ -746,10 +744,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
}
/* the TCP/UDP/SCTP port */
- if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr ||
- IPPROTO_SCTP == ciph->nexthdr) {
- __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
+ if (!fragoffs && (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
+ IPPROTO_SCTP == protocol)) {
+ __be16 *ports = (void *)(skb_network_header(skb) + offs);
+ IP_VS_DBG(11, "%s() changed port %d to %d\n", __func__,
+ ntohs(inout ? ports[1] : ports[0]),
+ ntohs(inout ? cp->vport : cp->dport));
if (inout)
ports[1] = cp->vport;
else
@@ -898,51 +899,35 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
"Checking outgoing ICMP for");
- offset += cih->ihl * 4;
-
- ip_vs_fill_iphdr(AF_INET, cih, &ciph);
+ ip_vs_fill_ip4hdr(cih, &ciph);
+ ciph.len += offset;
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET, skb, &ciph, 1);
if (!cp)
return NF_ACCEPT;
snet.ip = iph->saddr;
return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
- pp, offset, ihl);
+ pp, ciph.len, ihl);
}
#ifdef CONFIG_IP_VS_IPV6
static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
- unsigned int hooknum)
+ unsigned int hooknum, struct ip_vs_iphdr *ipvsh)
{
- struct ipv6hdr *iph;
struct icmp6hdr _icmph, *ic;
- struct ipv6hdr _ciph, *cih; /* The ip header contained
- within the ICMP */
- struct ip_vs_iphdr ciph;
+ struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
+ struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
- unsigned int offset;
union nf_inet_addr snet;
+ unsigned int writable;
*related = 1;
-
- /* reassemble IP fragments */
- if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
- if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
- return NF_STOLEN;
- }
-
- iph = ipv6_hdr(skb);
- offset = sizeof(struct ipv6hdr);
- ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
if (ic == NULL)
return NF_DROP;
- IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n",
- ic->icmp6_type, ntohs(icmpv6_id(ic)),
- &iph->saddr, &iph->daddr);
-
/*
* Work through seeing if this is for us.
* These checks are supposed to be in an order that means easy
@@ -950,42 +935,45 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
* this means that some packets will manage to get a long way
* down this stack and then be rejected, but that's life.
*/
- if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
- (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
- (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+ if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
*related = 0;
return NF_ACCEPT;
}
+ /* Fragment header that is before ICMP header tells us that:
+ * it's not an error message since they can't be fragmented.
+ */
+ if (ipvsh->flags & IP6T_FH_F_FRAG)
+ return NF_DROP;
+
+ IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n",
+ ic->icmp6_type, ntohs(icmpv6_id(ic)),
+ &ipvsh->saddr, &ipvsh->daddr);
/* Now find the contained IP header */
- offset += sizeof(_icmph);
- cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
- if (cih == NULL)
+ ciph.len = ipvsh->len + sizeof(_icmph);
+ ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
+ if (ip6h == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
-
- pp = ip_vs_proto_get(cih->nexthdr);
+ ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
+ ciph.daddr.in6 = ip6h->daddr;
+ /* skip possible IPv6 exthdrs of contained IPv6 packet */
+ ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
+ if (ciph.protocol < 0)
+ return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
+
+ pp = ip_vs_proto_get(ciph.protocol);
if (!pp)
return NF_ACCEPT;
- /* Is the embedded protocol header present? */
- /* TODO: we don't support fragmentation at the moment anyways */
- if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
- return NF_ACCEPT;
-
- IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
- "Checking outgoing ICMPv6 for");
-
- offset += sizeof(struct ipv6hdr);
-
- ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1);
if (!cp)
return NF_ACCEPT;
- snet.in6 = iph->saddr;
- return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
- pp, offset, sizeof(struct ipv6hdr));
+ snet.in6 = ciph.saddr.in6;
+ writable = ciph.len;
+ return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
+ pp, writable, sizeof(struct ipv6hdr));
}
#endif
@@ -1018,17 +1006,17 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
*/
static unsigned int
handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
- struct ip_vs_conn *cp, int ihl)
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
{
struct ip_vs_protocol *pp = pd->pp;
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
- if (!skb_make_writable(skb, ihl))
+ if (!skb_make_writable(skb, iph->len))
goto drop;
/* mangle the packet */
- if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+ if (pp->snat_handler && !pp->snat_handler(skb, pp, cp, iph))
goto drop;
#ifdef CONFIG_IP_VS_IPV6
@@ -1115,17 +1103,22 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (!net_ipvs(net)->enable)
return NF_ACCEPT;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(af, skb, &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
+ if (!iph.fragoffs && skb_nfct_reasm(skb)) {
+ struct sk_buff *reasm = skb_nfct_reasm(skb);
+ /* Save fw mark for coming frags */
+ reasm->ipvs_property = 1;
+ reasm->mark = skb->mark;
+ }
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
int verdict = ip_vs_out_icmp_v6(skb, &related,
- hooknum);
+ hooknum, &iph);
if (related)
return verdict;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
} else
#endif
@@ -1135,7 +1128,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (related)
return verdict;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
pd = ip_vs_proto_data_get(net, iph.protocol);
@@ -1145,39 +1137,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
/* reassemble IP fragments */
#ifdef CONFIG_IP_VS_IPV6
- if (af == AF_INET6) {
- if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
- if (ip_vs_gather_frags_v6(skb,
- ip_vs_defrag_user(hooknum)))
- return NF_STOLEN;
- }
-
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
- } else
+ if (af == AF_INET)
#endif
if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
if (ip_vs_gather_frags(skb,
ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ ip_vs_fill_ip4hdr(skb_network_header(skb), &iph);
}
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
+ cp = pp->conn_out_get(af, skb, &iph, 0);
if (likely(cp))
- return handle_response(af, skb, pd, cp, iph.len);
+ return handle_response(af, skb, pd, cp, &iph);
if (sysctl_nat_icmp_send(net) &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
__be16 _ports[2], *pptr;
- pptr = skb_header_pointer(skb, iph.len,
- sizeof(_ports), _ports);
+ pptr = frag_safe_skb_hp(skb, iph.len,
+ sizeof(_ports), _ports, &iph);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
if (ip_vs_lookup_real_service(net, af, iph.protocol,
@@ -1375,13 +1359,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
"Checking incoming ICMP for");
offset2 = offset;
- offset += cih->ihl * 4;
-
- ip_vs_fill_iphdr(AF_INET, cih, &ciph);
+ ip_vs_fill_ip4hdr(cih, &ciph);
+ ciph.len += offset;
+ offset = ciph.len;
/* The embedded headers contain source and dest in reverse order.
* For IPIP this is error for request, not for reply.
*/
- cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1);
+ cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1);
if (!cp)
return NF_ACCEPT;
@@ -1450,7 +1434,7 @@ ignore_ipip:
ip_vs_in_stats(cp, skb);
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
offset += 2 * sizeof(__u16);
- verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum);
+ verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
out:
__ip_vs_conn_put(cp);
@@ -1459,38 +1443,24 @@ out:
}
#ifdef CONFIG_IP_VS_IPV6
-static int
-ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
+static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
+ unsigned int hooknum, struct ip_vs_iphdr *iph)
{
struct net *net = NULL;
- struct ipv6hdr *iph;
+ struct ipv6hdr _ip6h, *ip6h;
struct icmp6hdr _icmph, *ic;
- struct ipv6hdr _ciph, *cih; /* The ip header contained
- within the ICMP */
- struct ip_vs_iphdr ciph;
+ struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
- unsigned int offset, verdict;
+ unsigned int offs_ciph, writable, verdict;
*related = 1;
- /* reassemble IP fragments */
- if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
- if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
- return NF_STOLEN;
- }
-
- iph = ipv6_hdr(skb);
- offset = sizeof(struct ipv6hdr);
- ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph);
if (ic == NULL)
return NF_DROP;
- IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %pI6->%pI6\n",
- ic->icmp6_type, ntohs(icmpv6_id(ic)),
- &iph->saddr, &iph->daddr);
-
/*
* Work through seeing if this is for us.
* These checks are supposed to be in an order that means easy
@@ -1498,47 +1468,71 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
* this means that some packets will manage to get a long way
* down this stack and then be rejected, but that's life.
*/
- if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
- (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
- (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+ if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
*related = 0;
return NF_ACCEPT;
}
+ /* Fragment header that is before ICMP header tells us that:
+ * it's not an error message since they can't be fragmented.
+ */
+ if (iph->flags & IP6T_FH_F_FRAG)
+ return NF_DROP;
+
+ IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n",
+ ic->icmp6_type, ntohs(icmpv6_id(ic)),
+ &iph->saddr, &iph->daddr);
/* Now find the contained IP header */
- offset += sizeof(_icmph);
- cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
- if (cih == NULL)
+ ciph.len = iph->len + sizeof(_icmph);
+ offs_ciph = ciph.len; /* Save ip header offset */
+ ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
+ if (ip6h == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
+ ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
+ ciph.daddr.in6 = ip6h->daddr;
+ /* skip possible IPv6 exthdrs of contained IPv6 packet */
+ ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
+ if (ciph.protocol < 0)
+ return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
net = skb_net(skb);
- pd = ip_vs_proto_data_get(net, cih->nexthdr);
+ pd = ip_vs_proto_data_get(net, ciph.protocol);
if (!pd)
return NF_ACCEPT;
pp = pd->pp;
- /* Is the embedded protocol header present? */
- /* TODO: we don't support fragmentation at the moment anyways */
- if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+ /* Cannot handle fragmented embedded protocol */
+ if (ciph.fragoffs)
return NF_ACCEPT;
- IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph,
"Checking incoming ICMPv6 for");
- offset += sizeof(struct ipv6hdr);
+ /* The embedded headers contain source and dest in reverse order
+ * if not from localhost
+ */
+ cp = pp->conn_in_get(AF_INET6, skb, &ciph,
+ (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);
- ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
- /* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
+ /* VS/TUN, VS/DR and LOCALNODE just let it go */
+ if ((hooknum == NF_INET_LOCAL_OUT) &&
+ (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
+ __ip_vs_conn_put(cp);
+ return NF_ACCEPT;
+ }
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
- if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr ||
- IPPROTO_SCTP == cih->nexthdr)
- offset += 2 * sizeof(__u16);
- verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum);
+
+ /* Need to mangle contained IPv6 header in ICMPv6 packet */
+ writable = ciph.len;
+ if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
+ IPPROTO_SCTP == ciph.protocol)
+ writable += 2 * sizeof(__u16); /* Also mangle ports */
+
+ verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph);
__ip_vs_conn_put(cp);
@@ -1574,7 +1568,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (unlikely((skb->pkt_type != PACKET_HOST &&
hooknum != NF_INET_LOCAL_OUT) ||
!skb_dst(skb))) {
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(af, skb, &iph);
IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
" ignored in hook %u\n",
skb->pkt_type, iph.protocol,
@@ -1586,7 +1580,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (!net_ipvs(net)->enable)
return NF_ACCEPT;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(af, skb, &iph);
/* Bad... Do not break raw sockets */
if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
@@ -1600,13 +1594,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
+ if (!iph.fragoffs && skb_nfct_reasm(skb)) {
+ struct sk_buff *reasm = skb_nfct_reasm(skb);
+ /* Save fw mark for coming frags. */
+ reasm->ipvs_property = 1;
+ reasm->mark = skb->mark;
+ }
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
- int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
+ int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
+ &iph);
if (related)
return verdict;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
} else
#endif
@@ -1616,7 +1616,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (related)
return verdict;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
/* Protocol supported? */
@@ -1627,12 +1626,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
/*
* Check if the packet belongs to an existing connection entry
*/
- cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
-
- if (unlikely(!cp)) {
+ cp = pp->conn_in_get(af, skb, &iph, 0);
+ if (unlikely(!cp) && !iph.fragoffs) {
+ /* No (second) fragments need to enter here, as nf_defrag_ipv6
+ * replayed fragment zero will already have created the cp
+ */
int v;
- if (!pp->conn_schedule(af, skb, pd, &v, &cp))
+ /* Schedule and create new connection entry into &cp */
+ if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
return v;
}
@@ -1640,6 +1642,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
/* sorry, all this trouble for a no-hit :) */
IP_VS_DBG_PKT(12, af, pp, skb, 0,
"ip_vs_in: packet continues traversal as normal");
+ if (iph.fragoffs && !skb_nfct_reasm(skb)) {
+ /* Fragment that couldn't be mapped to a conn entry
+ * and don't have any pointer to a reasm skb
+ * is missing module nf_defrag_ipv6
+ */
+ IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
+ IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
+ }
return NF_ACCEPT;
}
@@ -1662,7 +1672,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
ip_vs_in_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
if (cp->packet_xmit)
- ret = cp->packet_xmit(skb, cp, pp);
+ ret = cp->packet_xmit(skb, cp, pp, &iph);
/* do not touch skb anymore */
else {
IP_VS_DBG_RL("warning: packet_xmit is null");
@@ -1724,6 +1734,38 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
#ifdef CONFIG_IP_VS_IPV6
/*
+ * AF_INET6 fragment handling
+ * Copy info from first fragment, to the rest of them.
+ */
+static unsigned int
+ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *reasm = skb_nfct_reasm(skb);
+ struct net *net;
+
+ /* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
+ * ipvs_property is set when checking first fragment
+ * in ip_vs_in() and ip_vs_out().
+ */
+ if (reasm)
+ IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
+ if (!reasm || !reasm->ipvs_property)
+ return NF_ACCEPT;
+
+ net = skb_net(skb);
+ if (!net_ipvs(net)->enable)
+ return NF_ACCEPT;
+
+ /* Copy stored fw mark, saved in ip_vs_{in,out} */
+ skb->mark = reasm->mark;
+
+ return NF_ACCEPT;
+}
+
+/*
* AF_INET6 handler in NF_INET_LOCAL_IN chain
* Schedule and forward packets from remote clients
*/
@@ -1793,8 +1835,10 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
{
int r;
struct net *net;
+ struct ip_vs_iphdr iphdr;
- if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
+ ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
+ if (iphdr.protocol != IPPROTO_ICMPV6)
return NF_ACCEPT;
/* ipvs enabled in this netns ? */
@@ -1802,7 +1846,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
if (!net_ipvs(net)->enable)
return NF_ACCEPT;
- return ip_vs_in_icmp_v6(skb, &r, hooknum);
+ return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
}
#endif
@@ -1860,6 +1904,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.priority = 100,
},
#ifdef CONFIG_IP_VS_IPV6
+ /* After mangle & nat fetch 2:nd fragment and following */
+ {
+ .hook = ip_vs_preroute_frag6,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP6_PRI_NAT_DST + 1,
+ },
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7e7198b51c06..ec664cbb119f 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2339,7 +2339,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
struct ip_vs_dest_user_kern udest;
struct netns_ipvs *ipvs = net_ipvs(net);
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
@@ -2589,6 +2589,8 @@ __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
struct ip_vs_proto_data *pd;
#endif
+ memset(u, 0, sizeof (*u));
+
#ifdef CONFIG_IP_VS_PROTO_TCP
pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
@@ -2630,7 +2632,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
struct netns_ipvs *ipvs = net_ipvs(net);
BUG_ON(!net);
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
@@ -2766,7 +2768,6 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
struct ip_vs_timeout_user t;
- memset(&t, 0, sizeof(t));
__ip_vs_get_timeouts(net, &t);
if (copy_to_user(user, &t, sizeof(t)) != 0)
ret = -EFAULT;
@@ -3698,6 +3699,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
if (tbl == NULL)
return -ENOMEM;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ tbl[0].procname = NULL;
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 8b7dca9ea422..7f3b0cc00b7a 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -215,7 +215,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
struct ip_vs_dh_bucket *tbl;
struct ip_vs_iphdr iph;
- ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index df646ccf08a7..fdd89b9564ea 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -479,7 +479,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
struct ip_vs_dest *dest = NULL;
struct ip_vs_lblc_entry *en;
- ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
@@ -560,6 +560,11 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
GFP_KERNEL);
if (ipvs->lblc_ctl_table == NULL)
return -ENOMEM;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ ipvs->lblc_ctl_table[0].procname = NULL;
+
} else
ipvs->lblc_ctl_table = vs_vars_table;
ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 570e31ea427a..c03b6a3ade2f 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -649,7 +649,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
struct ip_vs_dest *dest = NULL;
struct ip_vs_lblcr_entry *en;
- ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
@@ -754,6 +754,10 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
GFP_KERNEL);
if (ipvs->lblcr_ctl_table == NULL)
return -ENOMEM;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ ipvs->lblcr_ctl_table[0].procname = NULL;
} else
ipvs->lblcr_ctl_table = vs_vars_table;
ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 022e77e1e766..c8beafd401aa 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -82,7 +82,7 @@ void
ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
{
enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
struct nf_conntrack_tuple new_tuple;
if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1aa5cac748c4..12475ef88daf 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -68,23 +68,31 @@ static int get_callid(const char *dptr, unsigned int dataoff,
static int
ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
{
+ struct sk_buff *reasm = skb_nfct_reasm(skb);
struct ip_vs_iphdr iph;
unsigned int dataoff, datalen, matchoff, matchlen;
const char *dptr;
int retc;
- ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(p->af, skb, &iph);
/* Only useful with UDP */
if (iph.protocol != IPPROTO_UDP)
return -EINVAL;
+ /* todo: IPv6 fragments:
+ * I think this only should be done for the first fragment. /HS
+ */
+ if (reasm) {
+ skb = reasm;
+ dataoff = iph.thoff_reasm + sizeof(struct udphdr);
+ } else
+ dataoff = iph.len + sizeof(struct udphdr);
- /* No Data ? */
- dataoff = iph.len + sizeof(struct udphdr);
if (dataoff >= skb->len)
return -EINVAL;
-
- if ((retc=skb_linearize(skb)) < 0)
+ /* todo: Check if this will mess-up the reasm skb !!! /HS */
+ retc = skb_linearize(skb);
+ if (retc < 0)
return retc;
dptr = skb->data + dataoff;
datalen = skb->len - dataoff;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 50d82186da87..939f7fbe9b46 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -280,17 +280,17 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
if (ih == NULL)
sprintf(buf, "TRUNCATED");
else if (ih->nexthdr == IPPROTO_FRAGMENT)
- sprintf(buf, "%pI6->%pI6 frag", &ih->saddr, &ih->daddr);
+ sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr);
else {
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
sizeof(_ports), _ports);
if (pptr == NULL)
- sprintf(buf, "TRUNCATED %pI6->%pI6",
+ sprintf(buf, "TRUNCATED %pI6c->%pI6c",
&ih->saddr, &ih->daddr);
else
- sprintf(buf, "%pI6:%u->%pI6:%u",
+ sprintf(buf, "%pI6c:%u->%pI6c:%u",
&ih->saddr, ntohs(pptr[0]),
&ih->daddr, ntohs(pptr[1]));
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5b8eb8b12c3e..5de3dd312c0f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -57,7 +57,7 @@ ah_esp_conn_fill_param_proto(struct net *net, int af,
static struct ip_vs_conn *
ah_esp_conn_in_get(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph, unsigned int proto_off,
+ const struct ip_vs_iphdr *iph,
int inverse)
{
struct ip_vs_conn *cp;
@@ -85,9 +85,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
static struct ip_vs_conn *
ah_esp_conn_out_get(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph,
- unsigned int proto_off,
- int inverse)
+ const struct ip_vs_iphdr *iph, int inverse)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
@@ -110,7 +108,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
static int
ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
- int *verdict, struct ip_vs_conn **cpp)
+ int *verdict, struct ip_vs_conn **cpp,
+ struct ip_vs_iphdr *iph)
{
/*
* AH/ESP is only related traffic. Pass the packet to IP stack.
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 9f3fb751c491..746048b13ef3 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -10,28 +10,26 @@
static int
sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
- int *verdict, struct ip_vs_conn **cpp)
+ int *verdict, struct ip_vs_conn **cpp,
+ struct ip_vs_iphdr *iph)
{
struct net *net;
struct ip_vs_service *svc;
sctp_chunkhdr_t _schunkh, *sch;
sctp_sctphdr_t *sh, _sctph;
- struct ip_vs_iphdr iph;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-
- sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph);
+ sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
if (sh == NULL)
return 0;
- sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t),
+ sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
sizeof(_schunkh), &_schunkh);
if (sch == NULL)
return 0;
net = skb_net(skb);
if ((sch->type == SCTP_CID_INIT) &&
- (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
- &iph.daddr, sh->dest))) {
+ (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
+ &iph->daddr, sh->dest))) {
int ignored;
if (ip_vs_todrop(net_ipvs(net))) {
@@ -47,10 +45,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
if (!*cpp && ignored <= 0) {
if (!ignored)
- *verdict = ip_vs_leave(svc, skb, pd);
+ *verdict = ip_vs_leave(svc, skb, pd, iph);
else {
ip_vs_service_put(svc);
*verdict = NF_DROP;
@@ -64,20 +62,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
}
static int
-sctp_snat_handler(struct sk_buff *skb,
- struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
{
sctp_sctphdr_t *sctph;
- unsigned int sctphoff;
+ unsigned int sctphoff = iph->len;
struct sk_buff *iter;
__be32 crc32;
#ifdef CONFIG_IP_VS_IPV6
- if (cp->af == AF_INET6)
- sctphoff = sizeof(struct ipv6hdr);
- else
+ if (cp->af == AF_INET6 && iph->fragoffs)
+ return 1;
#endif
- sctphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
@@ -108,20 +104,18 @@ sctp_snat_handler(struct sk_buff *skb,
}
static int
-sctp_dnat_handler(struct sk_buff *skb,
- struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
{
sctp_sctphdr_t *sctph;
- unsigned int sctphoff;
+ unsigned int sctphoff = iph->len;
struct sk_buff *iter;
__be32 crc32;
#ifdef CONFIG_IP_VS_IPV6
- if (cp->af == AF_INET6)
- sctphoff = sizeof(struct ipv6hdr);
- else
+ if (cp->af == AF_INET6 && iph->fragoffs)
+ return 1;
#endif
- sctphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index cd609cc62721..9af653a75825 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -33,16 +33,14 @@
static int
tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
- int *verdict, struct ip_vs_conn **cpp)
+ int *verdict, struct ip_vs_conn **cpp,
+ struct ip_vs_iphdr *iph)
{
struct net *net;
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
- struct ip_vs_iphdr iph;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-
- th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
if (th == NULL) {
*verdict = NF_DROP;
return 0;
@@ -50,8 +48,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
net = skb_net(skb);
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
if (th->syn &&
- (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
- &iph.daddr, th->dest))) {
+ (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
+ &iph->daddr, th->dest))) {
int ignored;
if (ip_vs_todrop(net_ipvs(net))) {
@@ -68,10 +66,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
if (!*cpp && ignored <= 0) {
if (!ignored)
- *verdict = ip_vs_leave(svc, skb, pd);
+ *verdict = ip_vs_leave(svc, skb, pd, iph);
else {
ip_vs_service_put(svc);
*verdict = NF_DROP;
@@ -128,20 +126,18 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph,
static int
-tcp_snat_handler(struct sk_buff *skb,
- struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
{
struct tcphdr *tcph;
- unsigned int tcphoff;
+ unsigned int tcphoff = iph->len;
int oldlen;
int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6
- if (cp->af == AF_INET6)
- tcphoff = sizeof(struct ipv6hdr);
- else
+ if (cp->af == AF_INET6 && iph->fragoffs)
+ return 1;
#endif
- tcphoff = ip_hdrlen(skb);
oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
@@ -208,20 +204,18 @@ tcp_snat_handler(struct sk_buff *skb,
static int
-tcp_dnat_handler(struct sk_buff *skb,
- struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
{
struct tcphdr *tcph;
- unsigned int tcphoff;
+ unsigned int tcphoff = iph->len;
int oldlen;
int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6
- if (cp->af == AF_INET6)
- tcphoff = sizeof(struct ipv6hdr);
- else
+ if (cp->af == AF_INET6 && iph->fragoffs)
+ return 1;
#endif
- tcphoff = ip_hdrlen(skb);
oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 2fedb2dcb3d1..503a842c90d2 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -30,23 +30,22 @@
static int
udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
- int *verdict, struct ip_vs_conn **cpp)
+ int *verdict, struct ip_vs_conn **cpp,
+ struct ip_vs_iphdr *iph)
{
struct net *net;
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
- struct ip_vs_iphdr iph;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-
- uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
+ /* IPv6 fragments, only first fragment will hit this */
+ uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
if (uh == NULL) {
*verdict = NF_DROP;
return 0;
}
net = skb_net(skb);
- svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
- &iph.daddr, uh->dest);
+ svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
+ &iph->daddr, uh->dest);
if (svc) {
int ignored;
@@ -64,10 +63,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
if (!*cpp && ignored <= 0) {
if (!ignored)
- *verdict = ip_vs_leave(svc, skb, pd);
+ *verdict = ip_vs_leave(svc, skb, pd, iph);
else {
ip_vs_service_put(svc);
*verdict = NF_DROP;
@@ -125,20 +124,18 @@ udp_partial_csum_update(int af, struct udphdr *uhdr,
static int
-udp_snat_handler(struct sk_buff *skb,
- struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
{
struct udphdr *udph;
- unsigned int udphoff;
+ unsigned int udphoff = iph->len;
int oldlen;
int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6
- if (cp->af == AF_INET6)
- udphoff = sizeof(struct ipv6hdr);
- else
+ if (cp->af == AF_INET6 && iph->fragoffs)
+ return 1;
#endif
- udphoff = ip_hdrlen(skb);
oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
@@ -210,20 +207,18 @@ udp_snat_handler(struct sk_buff *skb,
static int
-udp_dnat_handler(struct sk_buff *skb,
- struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
{
struct udphdr *udph;
- unsigned int udphoff;
+ unsigned int udphoff = iph->len;
int oldlen;
int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6
- if (cp->af == AF_INET6)
- udphoff = sizeof(struct ipv6hdr);
- else
+ if (cp->af == AF_INET6 && iph->fragoffs)
+ return 1;
#endif
- udphoff = ip_hdrlen(skb);
oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 08dbdd5bc18f..d6bf20d6cdbe 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -159,7 +159,7 @@ void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
svc->fwmark, msg);
#ifdef CONFIG_IP_VS_IPV6
} else if (svc->af == AF_INET6) {
- IP_VS_ERR_RL("%s: %s [%pI6]:%d - %s\n",
+ IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
svc->scheduler->name,
ip_vs_proto_name(svc->protocol),
&svc->addr.in6, ntohs(svc->port), msg);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 05126521743e..e33126994628 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -228,7 +228,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
struct ip_vs_sh_bucket *tbl;
struct ip_vs_iphdr iph;
- ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index cc4c8095681a..ee6b7a9f1ec2 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -338,7 +338,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
local = __ip_vs_is_local_route6(rt);
if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
rt_mode)) {
- IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
+ IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
local ? "local":"non-local", daddr);
dst_release(&rt->dst);
return NULL;
@@ -346,8 +346,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
!((ort = (struct rt6_info *) skb_dst(skb)) &&
__ip_vs_is_local_route6(ort))) {
- IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
- "requires NAT method, dest: %pI6\n",
+ IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
+ "requires NAT method, dest: %pI6c\n",
&ipv6_hdr(skb)->daddr, daddr);
dst_release(&rt->dst);
return NULL;
@@ -355,8 +355,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
IPV6_ADDR_LOOPBACK)) {
- IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
- "to non-local address, dest: %pI6\n",
+ IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
+ "to non-local address, dest: %pI6c\n",
&ipv6_hdr(skb)->saddr, daddr);
dst_release(&rt->dst);
return NULL;
@@ -427,7 +427,7 @@ do { \
*/
int
ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
/* we do not touch skb and do not need pskb ptr */
IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
@@ -441,7 +441,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
*/
int
ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
struct rtable *rt; /* Route to the other host */
struct iphdr *iph = ip_hdr(skb);
@@ -496,16 +496,16 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
{
struct rt6_info *rt; /* Route to the other host */
- struct ipv6hdr *iph = ipv6_hdr(skb);
int mtu;
EnterFunction(10);
- if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0,
- IP_VS_RT_MODE_NON_LOCAL)))
+ rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
+ IP_VS_RT_MODE_NON_LOCAL);
+ if (!rt)
goto tx_error_icmp;
/* MTU checking */
@@ -516,7 +516,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->dev = net->loopback_dev;
}
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ /* only send ICMP too big on first fragment */
+ if (!iph->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
dst_release(&rt->dst);
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error;
@@ -559,7 +561,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
*/
int
ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
struct rtable *rt; /* Route to the other host */
int mtu;
@@ -592,7 +594,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
@@ -629,7 +631,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error_put;
/* mangle the packet */
- if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
+ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
goto tx_error_put;
ip_hdr(skb)->daddr = cp->daddr.ip;
ip_send_check(ip_hdr(skb));
@@ -677,7 +679,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
{
struct rt6_info *rt; /* Route to the other host */
int mtu;
@@ -686,10 +688,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
/* check if it is a connection of no-client-port */
- if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
__be16 _pt, *p;
- p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
- sizeof(_pt), &_pt);
+ p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
if (p == NULL)
goto tx_error;
ip_vs_conn_fill_cport(cp, *p);
@@ -709,7 +710,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
@@ -737,7 +738,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->dev = net->loopback_dev;
}
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ /* only send ICMP too big on first fragment */
+ if (!iph->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
"ip_vs_nat_xmit_v6(): frag needed for");
goto tx_error_put;
@@ -751,7 +754,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error_put;
/* mangle the packet */
- if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
+ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
goto tx_error;
ipv6_hdr(skb)->daddr = cp->daddr.in6;
@@ -812,7 +815,7 @@ tx_error_put:
*/
int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
struct rtable *rt; /* Route to the other host */
@@ -932,7 +935,7 @@ tx_error_put:
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
struct rt6_info *rt; /* Route to the other host */
struct in6_addr saddr; /* Source for tunnel */
@@ -972,7 +975,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->dev = net->loopback_dev;
}
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ /* only send ICMP too big on first fragment */
+ if (!ipvsh->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error_put;
}
@@ -1053,7 +1058,7 @@ tx_error_put:
*/
int
ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
struct rtable *rt; /* Route to the other host */
struct iphdr *iph = ip_hdr(skb);
@@ -1115,7 +1120,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
{
struct rt6_info *rt; /* Route to the other host */
int mtu;
@@ -1139,7 +1144,9 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->dev = net->loopback_dev;
}
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ /* only send ICMP too big on first fragment */
+ if (!iph->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
dst_release(&rt->dst);
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error;
@@ -1183,7 +1190,8 @@ tx_error:
*/
int
ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
+ struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
+ struct ip_vs_iphdr *iph)
{
struct rtable *rt; /* Route to the other host */
int mtu;
@@ -1198,7 +1206,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
translate address/port back */
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
if (cp->packet_xmit)
- rc = cp->packet_xmit(skb, cp, pp);
+ rc = cp->packet_xmit(skb, cp, pp, iph);
else
rc = NF_ACCEPT;
/* do not touch skb anymore */
@@ -1227,7 +1235,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
IP_VS_DBG(10, "%s(): "
@@ -1304,7 +1312,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
+ struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
+ struct ip_vs_iphdr *iph)
{
struct rt6_info *rt; /* Route to the other host */
int mtu;
@@ -1319,7 +1328,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
translate address/port back */
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
if (cp->packet_xmit)
- rc = cp->packet_xmit(skb, cp, pp);
+ rc = cp->packet_xmit(skb, cp, pp, iph);
else
rc = NF_ACCEPT;
/* do not touch skb anymore */
@@ -1347,7 +1356,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
IP_VS_DBG(10, "%s(): "
@@ -1375,7 +1384,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->dev = net->loopback_dev;
}
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ /* only send ICMP too big on first fragment */
+ if (!iph->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error_put;
}
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index d61e0782a797..7df424e2d10c 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -69,6 +69,10 @@ static int nf_conntrack_acct_init_sysctl(struct net *net)
table[0].data = &net->ct.sysctl_acct;
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
+
net->ct.acct_sysctl_header = register_net_sysctl(net, "net/netfilter",
table);
if (!net->ct.acct_sysctl_header) {
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index de9781b6464f..faa978f1714b 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -196,6 +196,10 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
table[0].data = &net->ct.sysctl_events;
table[1].data = &net->ct.sysctl_events_retry_timeout;
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
+
net->ct.event_sysctl_header =
register_net_sysctl(net, "net/netfilter", table);
if (!net->ct.event_sysctl_header) {
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 1b30b0dee708..962795e839ab 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -753,7 +753,8 @@ static int callforward_do_filter(const union nf_inet_addr *src,
flowi4_to_flowi(&fl1), false)) {
if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
flowi4_to_flowi(&fl2), false)) {
- if (rt1->rt_gateway == rt2->rt_gateway &&
+ if (rt_nexthop(rt1, fl1.daddr) ==
+ rt_nexthop(rt2, fl2.daddr) &&
rt1->dst.dev == rt2->dst.dev)
ret = 1;
dst_release(&rt2->dst);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index c4bc637feb76..884f2b39319a 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -64,6 +64,10 @@ static int nf_conntrack_helper_init_sysctl(struct net *net)
table[0].data = &net->ct.sysctl_auto_assign_helper;
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
+
net->ct.helper_sysctl_header =
register_net_sysctl(net, "net/netfilter", table);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 6535326cf07c..a8ae287bc7af 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -815,7 +815,7 @@ static struct ctl_table dccp_sysctl_table[] = {
};
#endif /* CONFIG_SYSCTL */
-static int dccp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
struct dccp_net *dn)
{
#ifdef CONFIG_SYSCTL
@@ -836,6 +836,10 @@ static int dccp_kmemdup_sysctl_table(struct nf_proto_net *pn,
pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING];
pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT];
pn->ctl_table[7].data = &dn->dccp_loose;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ pn->ctl_table[0].procname = NULL;
#endif
return 0;
}
@@ -857,7 +861,7 @@ static int dccp_init_net(struct net *net, u_int16_t proto)
dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL;
}
- return dccp_kmemdup_sysctl_table(pn, dn);
+ return dccp_kmemdup_sysctl_table(net, pn, dn);
}
static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9b3943252a5e..363285d544a1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -489,6 +489,10 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[3].data = &net->ct.sysctl_checksum;
table[4].data = &net->ct.sysctl_log_invalid;
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
+
net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
if (!net->ct.sysctl_header)
goto out_unregister_netfilter;
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
index dbb364f62d6f..7ea8026f07c9 100644
--- a/net/netfilter/nf_conntrack_timestamp.c
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -51,6 +51,10 @@ static int nf_conntrack_tstamp_init_sysctl(struct net *net)
table[0].data = &net->ct.sysctl_tstamp;
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
+
net->ct.tstamp_sysctl_header = register_net_sysctl(net, "net/netfilter",
table);
if (!net->ct.tstamp_sysctl_header) {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index ffb92c03a358..58a09b7c3f6d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -138,7 +138,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
const struct nfnetlink_subsystem *ss;
int type, err;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
/* All the messages must at least contain nfgenmsg */
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 8847b4d8be06..701c88a20fea 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -41,7 +41,8 @@ MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tu
static LIST_HEAD(cttimeout_list);
static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
- [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING },
+ [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING,
+ .len = CTNL_TIMEOUT_NAME_MAX - 1},
[CTA_TIMEOUT_L3PROTO] = { .type = NLA_U16 },
[CTA_TIMEOUT_L4PROTO] = { .type = NLA_U8 },
[CTA_TIMEOUT_DATA] = { .type = NLA_NESTED },
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 16c712563860..ae7f5daeee43 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -180,9 +180,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
struct ctnl_timeout *timeout;
struct nf_conn_timeout *timeout_ext;
- const struct ipt_entry *e = par->entryinfo;
struct nf_conntrack_l4proto *l4proto;
int ret = 0;
+ u8 proto;
rcu_read_lock();
timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook);
@@ -192,9 +192,11 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
goto out;
}
- if (e->ip.invflags & IPT_INV_PROTO) {
+ proto = xt_ct_find_proto(par);
+ if (!proto) {
ret = -EINVAL;
- pr_info("You cannot use inversion on L4 protocol\n");
+ pr_info("You must specify a L4 protocol, and not use "
+ "inversions on it.\n");
goto out;
}
@@ -214,7 +216,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
/* Make sure the timeout policy matches any existing protocol tracker,
* otherwise default to generic.
*/
- l4proto = __nf_ct_l4proto_find(par->family, e->ip.proto);
+ l4proto = __nf_ct_l4proto_find(par->family, proto);
if (timeout->l4proto->l4proto != l4proto->l4proto) {
ret = -EINVAL;
pr_info("Timeout policy `%s' can only be used by L4 protocol "
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index ee2e5bc5a8c7..bd93e51d30ac 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -70,6 +70,7 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
fl4.daddr = info->gw.ip;
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+ fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
return false;
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index bb10b0717f1b..8d47c3780fda 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -67,7 +67,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
goto out;
}
- ip_vs_fill_iphdr(family, skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(family, skb, &iph);
if (data->bitmask & XT_IPVS_PROTO)
if ((iph.protocol == data->l4proto) ^
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
+ cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */);
if (unlikely(cp == NULL)) {
match = false;
goto out;
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index 81aafa8e4fef..bea7464cc43f 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -111,7 +111,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
.family = NFPROTO_IPV4,
.table = "nat",
.hooks = (1 << NF_INET_POST_ROUTING) |
- (1 << NF_INET_LOCAL_OUT),
+ (1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
},
{
@@ -123,7 +123,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
.family = NFPROTO_IPV4,
.table = "nat",
.hooks = (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_LOCAL_IN),
+ (1 << NF_INET_LOCAL_OUT),
.me = THIS_MODULE,
},
{
@@ -133,7 +133,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
.targetsize = sizeof(struct nf_nat_range),
.table = "nat",
.hooks = (1 << NF_INET_POST_ROUTING) |
- (1 << NF_INET_LOCAL_OUT),
+ (1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
},
{
@@ -143,7 +143,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
.targetsize = sizeof(struct nf_nat_range),
.table = "nat",
.hooks = (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_LOCAL_IN),
+ (1 << NF_INET_LOCAL_OUT),
.me = THIS_MODULE,
},
};
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 01e944a017a4..c8a1eb6eca2d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -138,6 +138,8 @@ static int netlink_dump(struct sock *sk);
static DEFINE_RWLOCK(nl_table_lock);
static atomic_t nl_table_users = ATOMIC_INIT(0);
+#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
+
static ATOMIC_NOTIFIER_HEAD(netlink_chain);
static inline u32 netlink_group_mask(u32 group)
@@ -345,6 +347,11 @@ netlink_update_listeners(struct sock *sk)
struct hlist_node *node;
unsigned long mask;
unsigned int i;
+ struct listeners *listeners;
+
+ listeners = nl_deref_protected(tbl->listeners);
+ if (!listeners)
+ return;
for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
mask = 0;
@@ -352,7 +359,7 @@ netlink_update_listeners(struct sock *sk)
if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
mask |= nlk_sk(sk)->groups[i];
}
- tbl->listeners->masks[i] = mask;
+ listeners->masks[i] = mask;
}
/* this function is only called with the netlink table "grabbed", which
* makes sure updates are visible before bind or setsockopt return. */
@@ -536,7 +543,11 @@ static int netlink_release(struct socket *sock)
if (netlink_is_kernel(sk)) {
BUG_ON(nl_table[sk->sk_protocol].registered == 0);
if (--nl_table[sk->sk_protocol].registered == 0) {
- kfree(nl_table[sk->sk_protocol].listeners);
+ struct listeners *old;
+
+ old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
+ RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
+ kfree_rcu(old, rcu);
nl_table[sk->sk_protocol].module = NULL;
nl_table[sk->sk_protocol].bind = NULL;
nl_table[sk->sk_protocol].flags = 0;
@@ -601,7 +612,7 @@ retry:
static inline int netlink_capable(const struct socket *sock, unsigned int flag)
{
return (nl_table[sock->sk->sk_protocol].flags & flag) ||
- capable(CAP_NET_ADMIN);
+ ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
}
static void
@@ -982,7 +993,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
rcu_read_lock();
listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
- if (group - 1 < nl_table[sk->sk_protocol].groups)
+ if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
res = test_bit(group - 1, listeners->masks);
rcu_read_unlock();
@@ -1625,7 +1636,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
if (!new)
return -ENOMEM;
- old = rcu_dereference_protected(tbl->listeners, 1);
+ old = nl_deref_protected(tbl->listeners);
memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
rcu_assign_pointer(tbl->listeners, new);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 94060edbbd70..e639645e8fec 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1881,7 +1881,35 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
- data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
+ if (po->tp_tx_has_off) {
+ int off_min, off_max, off;
+ off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
+ off_max = po->tx_ring.frame_size - tp_len;
+ if (sock->type == SOCK_DGRAM) {
+ switch (po->tp_version) {
+ case TPACKET_V2:
+ off = ph.h2->tp_net;
+ break;
+ default:
+ off = ph.h1->tp_net;
+ break;
+ }
+ } else {
+ switch (po->tp_version) {
+ case TPACKET_V2:
+ off = ph.h2->tp_mac;
+ break;
+ default:
+ off = ph.h1->tp_mac;
+ break;
+ }
+ }
+ if (unlikely((off < off_min) || (off_max < off)))
+ return -EINVAL;
+ data = ph.raw + off;
+ } else {
+ data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
+ }
to_write = tp_len;
if (sock->type == SOCK_DGRAM) {
@@ -1907,7 +1935,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
to_write -= dev->hard_header_len;
}
- err = -EFAULT;
offset = offset_in_page(data);
len_max = PAGE_SIZE - offset;
len = ((to_write > len_max) ? len_max : to_write);
@@ -1957,7 +1984,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
mutex_lock(&po->pg_vec_lock);
- err = -EBUSY;
if (saddr == NULL) {
dev = po->prot_hook.dev;
proto = po->num;
@@ -2478,7 +2504,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
__be16 proto = (__force __be16)protocol; /* weird, but documented */
int err;
- if (!capable(CAP_NET_RAW))
+ if (!ns_capable(net->user_ns, CAP_NET_RAW))
return -EPERM;
if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
sock->type != SOCK_PACKET)
@@ -3111,6 +3137,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
return fanout_add(sk, val & 0xffff, val >> 16);
}
+ case PACKET_TX_HAS_OFF:
+ {
+ unsigned int val;
+
+ if (optlen != sizeof(val))
+ return -EINVAL;
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
+ return -EBUSY;
+ if (copy_from_user(&val, optval, sizeof(val)))
+ return -EFAULT;
+ po->tp_tx_has_off = !!val;
+ return 0;
+ }
default:
return -ENOPROTOOPT;
}
@@ -3202,6 +3241,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
((u32)po->fanout->type << 16)) :
0);
break;
+ case PACKET_TX_HAS_OFF:
+ val = po->tp_tx_has_off;
+ break;
default:
return -ENOPROTOOPT;
}
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 44945f6b7252..e84cab8cb7a9 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -109,6 +109,7 @@ struct packet_sock {
unsigned int tp_hdrlen;
unsigned int tp_reserve;
unsigned int tp_loss:1;
+ unsigned int tp_tx_has_off:1;
unsigned int tp_tstamp;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
};
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 83a8389619aa..0193630d3061 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -70,6 +70,9 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr)
int err;
u8 pnaddr;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -230,6 +233,9 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr)
int err;
u8 dst;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 8d2b3d5a7c21..7280ab8810c2 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -50,7 +50,7 @@ struct rds_ib_cache_head {
};
struct rds_ib_refill_cache {
- struct rds_ib_cache_head *percpu;
+ struct rds_ib_cache_head __percpu *percpu;
struct list_head *xfer;
struct list_head *ready;
};
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 8d194912c695..8c5bc857f04d 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -418,20 +418,21 @@ static void rds_ib_recv_cache_put(struct list_head *new_item,
struct rds_ib_refill_cache *cache)
{
unsigned long flags;
- struct rds_ib_cache_head *chp;
struct list_head *old;
+ struct list_head __percpu *chpfirst;
local_irq_save(flags);
- chp = per_cpu_ptr(cache->percpu, smp_processor_id());
- if (!chp->first)
+ chpfirst = __this_cpu_read(cache->percpu->first);
+ if (!chpfirst)
INIT_LIST_HEAD(new_item);
else /* put on front */
- list_add_tail(new_item, chp->first);
- chp->first = new_item;
- chp->count++;
+ list_add_tail(new_item, chpfirst);
- if (chp->count < RDS_IB_RECYCLE_BATCH_COUNT)
+ __this_cpu_write(chpfirst, new_item);
+ __this_cpu_inc(cache->percpu->count);
+
+ if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT)
goto end;
/*
@@ -443,12 +444,13 @@ static void rds_ib_recv_cache_put(struct list_head *new_item,
do {
old = xchg(&cache->xfer, NULL);
if (old)
- list_splice_entire_tail(old, chp->first);
- old = cmpxchg(&cache->xfer, NULL, chp->first);
+ list_splice_entire_tail(old, chpfirst);
+ old = cmpxchg(&cache->xfer, NULL, chpfirst);
} while (old);
- chp->first = NULL;
- chp->count = 0;
+
+ __this_cpu_write(chpfirst, NULL);
+ __this_cpu_write(cache->percpu->count, 0);
end:
local_irq_restore(flags);
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 62fb51face8a..235e01acac51 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -509,7 +509,7 @@ config NET_EMATCH_TEXT
config NET_EMATCH_CANID
tristate "CAN Identifier"
- depends on NET_EMATCH && CAN
+ depends on NET_EMATCH && (CAN=y || CAN=m)
---help---
Say Y here if you want to be able to classify CAN frames based
on CAN Identifier.
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 102761d294cb..65d240cbf74b 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -987,6 +987,9 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
+ if ((n->nlmsg_type != RTM_GETACTION) && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
if (ret < 0)
return ret;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 7ae02892437c..ff55ed6c49b2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -139,6 +139,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
int err;
int tp_created = 0;
+ if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN))
+ return -EPERM;
replay:
t = nlmsg_data(n);
protocol = TC_H_MIN(t->tcm_info);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 2ecde225ae60..709b0fb38a18 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -17,6 +17,7 @@
#include <linux/skbuff.h>
#include <linux/cgroup.h>
#include <linux/rcupdate.h>
+#include <linux/fdtable.h>
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
@@ -53,6 +54,28 @@ static void cgrp_destroy(struct cgroup *cgrp)
kfree(cgrp_cls_state(cgrp));
}
+static int update_classid(const void *v, struct file *file, unsigned n)
+{
+ int err;
+ struct socket *sock = sock_from_file(file, &err);
+ if (sock)
+ sock->sk->sk_classid = (u32)(unsigned long)v;
+ return 0;
+}
+
+static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+{
+ struct task_struct *p;
+ void *v;
+
+ cgroup_taskset_for_each(p, cgrp, tset) {
+ task_lock(p);
+ v = (void *)(unsigned long)task_cls_classid(p);
+ iterate_fd(p->files, 0, update_classid, v);
+ task_unlock(p);
+ }
+}
+
static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
{
return cgrp_cls_state(cgrp)->classid;
@@ -77,6 +100,7 @@ struct cgroup_subsys net_cls_subsys = {
.name = "net_cls",
.create = cgrp_create,
.destroy = cgrp_destroy,
+ .attach = cgrp_attach,
.subsys_id = net_cls_subsys_id,
.base_cftypes = ss_files,
.module = THIS_MODULE,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a18d975db59c..4799c4840c1a 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -495,16 +495,15 @@ EXPORT_SYMBOL(qdisc_watchdog_init);
void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
{
- ktime_t time;
-
if (test_bit(__QDISC_STATE_DEACTIVATED,
&qdisc_root_sleeping(wd->qdisc)->state))
return;
qdisc_throttled(wd->qdisc);
- time = ktime_set(0, 0);
- time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
- hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
+
+ hrtimer_start(&wd->timer,
+ ns_to_ktime(PSCHED_TICKS2NS(expires)),
+ HRTIMER_MODE_ABS);
}
EXPORT_SYMBOL(qdisc_watchdog_schedule);
@@ -981,6 +980,9 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
struct Qdisc *p = NULL;
int err;
+ if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
dev = __dev_get_by_index(net, tcm->tcm_ifindex);
if (!dev)
return -ENODEV;
@@ -1044,6 +1046,9 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
struct Qdisc *q, *p;
int err;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
replay:
/* Reinit, just in case something touches this. */
tcm = nlmsg_data(n);
@@ -1380,6 +1385,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
u32 qid = TC_H_MAJ(clid);
int err;
+ if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
dev = __dev_get_by_index(net, tcm->tcm_ifindex);
if (!dev)
return -ENODEV;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 564b9fc8efd3..0e19948470b8 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -509,8 +509,7 @@ static void cbq_ovl_delay(struct cbq_class *cl)
cl->cpriority = TC_CBQ_MAXPRIO;
q->pmask |= (1<<TC_CBQ_MAXPRIO);
- expires = ktime_set(0, 0);
- expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
+ expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
if (hrtimer_try_to_cancel(&q->delay_timer) &&
ktime_to_ns(ktime_sub(
hrtimer_get_expires(&q->delay_timer),
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 9d75b7761313..d2922c0ef57a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -71,6 +71,12 @@ enum htb_cmode {
HTB_CAN_SEND /* class can send */
};
+struct htb_rate_cfg {
+ u64 rate_bps;
+ u32 mult;
+ u32 shift;
+};
+
/* interior & leaf nodes; props specific to leaves are marked L: */
struct htb_class {
struct Qdisc_class_common common;
@@ -118,11 +124,11 @@ struct htb_class {
int filter_cnt;
/* token bucket parameters */
- struct qdisc_rate_table *rate; /* rate table of the class itself */
- struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
- long buffer, cbuffer; /* token bucket depth/rate */
+ struct htb_rate_cfg rate;
+ struct htb_rate_cfg ceil;
+ s64 buffer, cbuffer; /* token bucket depth/rate */
psched_tdiff_t mbuffer; /* max wait time */
- long tokens, ctokens; /* current number of tokens */
+ s64 tokens, ctokens; /* current number of tokens */
psched_time_t t_c; /* checkpoint time */
};
@@ -162,6 +168,45 @@ struct htb_sched {
struct work_struct work;
};
+static u64 l2t_ns(struct htb_rate_cfg *r, unsigned int len)
+{
+ return ((u64)len * r->mult) >> r->shift;
+}
+
+static void htb_precompute_ratedata(struct htb_rate_cfg *r)
+{
+ u64 factor;
+ u64 mult;
+ int shift;
+
+ r->shift = 0;
+ r->mult = 1;
+ /*
+ * Calibrate mult, shift so that token counting is accurate
+ * for smallest packet size (64 bytes). Token (time in ns) is
+ * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will
+ * work as long as the smallest packet transfer time can be
+ * accurately represented in nanosec.
+ */
+ if (r->rate_bps > 0) {
+ /*
+ * Higher shift gives better accuracy. Find the largest
+ * shift such that mult fits in 32 bits.
+ */
+ for (shift = 0; shift < 16; shift++) {
+ r->shift = shift;
+ factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
+ mult = div64_u64(factor, r->rate_bps);
+ if (mult > UINT_MAX)
+ break;
+ }
+
+ r->shift = shift - 1;
+ factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
+ r->mult = div64_u64(factor, r->rate_bps);
+ }
+}
+
/* find class in global hash table using given handle */
static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
{
@@ -273,7 +318,7 @@ static void htb_add_to_id_tree(struct rb_root *root,
* already in the queue.
*/
static void htb_add_to_wait_tree(struct htb_sched *q,
- struct htb_class *cl, long delay)
+ struct htb_class *cl, s64 delay)
{
struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
@@ -441,14 +486,14 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
htb_remove_class_from_row(q, cl, mask);
}
-static inline long htb_lowater(const struct htb_class *cl)
+static inline s64 htb_lowater(const struct htb_class *cl)
{
if (htb_hysteresis)
return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
else
return 0;
}
-static inline long htb_hiwater(const struct htb_class *cl)
+static inline s64 htb_hiwater(const struct htb_class *cl)
{
if (htb_hysteresis)
return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
@@ -469,9 +514,9 @@ static inline long htb_hiwater(const struct htb_class *cl)
* mode transitions per time unit. The speed gain is about 1/6.
*/
static inline enum htb_cmode
-htb_class_mode(struct htb_class *cl, long *diff)
+htb_class_mode(struct htb_class *cl, s64 *diff)
{
- long toks;
+ s64 toks;
if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
*diff = -toks;
@@ -495,7 +540,7 @@ htb_class_mode(struct htb_class *cl, long *diff)
* to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
*/
static void
-htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
+htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
{
enum htb_cmode new_mode = htb_class_mode(cl, diff);
@@ -581,26 +626,26 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
-static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
+static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
{
- long toks = diff + cl->tokens;
+ s64 toks = diff + cl->tokens;
if (toks > cl->buffer)
toks = cl->buffer;
- toks -= (long) qdisc_l2t(cl->rate, bytes);
+ toks -= (s64) l2t_ns(&cl->rate, bytes);
if (toks <= -cl->mbuffer)
toks = 1 - cl->mbuffer;
cl->tokens = toks;
}
-static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
+static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
{
- long toks = diff + cl->ctokens;
+ s64 toks = diff + cl->ctokens;
if (toks > cl->cbuffer)
toks = cl->cbuffer;
- toks -= (long) qdisc_l2t(cl->ceil, bytes);
+ toks -= (s64) l2t_ns(&cl->ceil, bytes);
if (toks <= -cl->mbuffer)
toks = 1 - cl->mbuffer;
@@ -623,10 +668,10 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
{
int bytes = qdisc_pkt_len(skb);
enum htb_cmode old_mode;
- long diff;
+ s64 diff;
while (cl) {
- diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
+ diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
if (cl->level >= level) {
if (cl->level == level)
cl->xstats.lends++;
@@ -673,7 +718,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
unsigned long stop_at = start + 2;
while (time_before(jiffies, stop_at)) {
struct htb_class *cl;
- long diff;
+ s64 diff;
struct rb_node *p = rb_first(&q->wait_pq[level]);
if (!p)
@@ -684,7 +729,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
return cl->pq_key;
htb_safe_rb_erase(p, q->wait_pq + level);
- diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
+ diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
htb_change_class_mode(q, cl, &diff);
if (cl->cmode != HTB_CAN_SEND)
htb_add_to_wait_tree(q, cl, diff);
@@ -871,10 +916,10 @@ ok:
if (!sch->q.qlen)
goto fin;
- q->now = psched_get_time();
+ q->now = ktime_to_ns(ktime_get());
start_at = jiffies;
- next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
+ next_event = q->now + 5 * NSEC_PER_SEC;
for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
/* common case optimization - skip event handler quickly */
@@ -884,7 +929,7 @@ ok:
if (q->now >= q->near_ev_cache[level]) {
event = htb_do_events(q, level, start_at);
if (!event)
- event = q->now + PSCHED_TICKS_PER_SEC;
+ event = q->now + NSEC_PER_SEC;
q->near_ev_cache[level] = event;
} else
event = q->near_ev_cache[level];
@@ -903,10 +948,17 @@ ok:
}
}
sch->qstats.overlimits++;
- if (likely(next_event > q->now))
- qdisc_watchdog_schedule(&q->watchdog, next_event);
- else
+ if (likely(next_event > q->now)) {
+ if (!test_bit(__QDISC_STATE_DEACTIVATED,
+ &qdisc_root_sleeping(q->watchdog.qdisc)->state)) {
+ ktime_t time = ns_to_ktime(next_event);
+ qdisc_throttled(q->watchdog.qdisc);
+ hrtimer_start(&q->watchdog.timer, time,
+ HRTIMER_MODE_ABS);
+ }
+ } else {
schedule_work(&q->work);
+ }
fin:
return skb;
}
@@ -1082,9 +1134,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
memset(&opt, 0, sizeof(opt));
- opt.rate = cl->rate->rate;
+ opt.rate.rate = cl->rate.rate_bps >> 3;
opt.buffer = cl->buffer;
- opt.ceil = cl->ceil->rate;
+ opt.ceil.rate = cl->ceil.rate_bps >> 3;
opt.cbuffer = cl->cbuffer;
opt.quantum = cl->quantum;
opt.prio = cl->prio;
@@ -1203,9 +1255,6 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
qdisc_destroy(cl->un.leaf.q);
}
gen_kill_estimator(&cl->bstats, &cl->rate_est);
- qdisc_put_rtab(cl->rate);
- qdisc_put_rtab(cl->ceil);
-
tcf_destroy_chain(&cl->filter_list);
kfree(cl);
}
@@ -1307,7 +1356,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)*arg, *parent;
struct nlattr *opt = tca[TCA_OPTIONS];
- struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
struct nlattr *tb[__TCA_HTB_MAX];
struct tc_htb_opt *hopt;
@@ -1326,10 +1374,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
hopt = nla_data(tb[TCA_HTB_PARMS]);
-
- rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
- ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
- if (!rtab || !ctab)
+ if (!hopt->rate.rate || !hopt->ceil.rate)
goto failure;
if (!cl) { /* new class */
@@ -1439,7 +1484,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
* is really leaf before changing cl->un.leaf !
*/
if (!cl->level) {
- cl->quantum = rtab->rate.rate / q->rate2quantum;
+ cl->quantum = hopt->rate.rate / q->rate2quantum;
if (!hopt->quantum && cl->quantum < 1000) {
pr_warning(
"HTB: quantum of class %X is small. Consider r2q change.\n",
@@ -1460,12 +1505,16 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->buffer = hopt->buffer;
cl->cbuffer = hopt->cbuffer;
- if (cl->rate)
- qdisc_put_rtab(cl->rate);
- cl->rate = rtab;
- if (cl->ceil)
- qdisc_put_rtab(cl->ceil);
- cl->ceil = ctab;
+
+ cl->rate.rate_bps = (u64)hopt->rate.rate << 3;
+ cl->ceil.rate_bps = (u64)hopt->ceil.rate << 3;
+
+ htb_precompute_ratedata(&cl->rate);
+ htb_precompute_ratedata(&cl->ceil);
+
+ cl->buffer = hopt->buffer << PSCHED_SHIFT;
+ cl->cbuffer = hopt->buffer << PSCHED_SHIFT;
+
sch_tree_unlock(sch);
qdisc_class_hash_grow(sch, &q->clhash);
@@ -1474,10 +1523,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
return 0;
failure:
- if (rtab)
- qdisc_put_rtab(rtab);
- if (ctab)
- qdisc_put_rtab(ctab);
return err;
}
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f0dd83cff906..9687fa1c2275 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -84,18 +84,19 @@
* grp->index is the index of the group; and grp->slot_shift
* is the shift for the corresponding (scaled) sigma_i.
*/
-#define QFQ_MAX_INDEX 19
-#define QFQ_MAX_WSHIFT 16
+#define QFQ_MAX_INDEX 24
+#define QFQ_MAX_WSHIFT 12
#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
-#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
+#define QFQ_MAX_WSUM (16*QFQ_MAX_WEIGHT)
#define FRAC_BITS 30 /* fixed point arithmetic */
#define ONE_FP (1UL << FRAC_BITS)
#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
-#define QFQ_MTU_SHIFT 11
+#define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */
#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
+#define QFQ_MIN_LMAX 256 /* min possible lmax for a class */
/*
* Possible group states. These values are used as indexes for the bitmaps
@@ -231,6 +232,32 @@ static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl,
q->wsum += delta_w;
}
+static void qfq_update_reactivate_class(struct qfq_sched *q,
+ struct qfq_class *cl,
+ u32 inv_w, u32 lmax, int delta_w)
+{
+ bool need_reactivation = false;
+ int i = qfq_calc_index(inv_w, lmax);
+
+ if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
+ /*
+ * shift cl->F back, to not charge the
+ * class for the not-yet-served head
+ * packet
+ */
+ cl->F = cl->S;
+ /* remove class from its slot in the old group */
+ qfq_deactivate_class(q, cl);
+ need_reactivation = true;
+ }
+
+ qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
+
+ if (need_reactivation) /* activate in new group */
+ qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
+}
+
+
static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
struct nlattr **tca, unsigned long *arg)
{
@@ -238,7 +265,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
struct qfq_class *cl = (struct qfq_class *)*arg;
struct nlattr *tb[TCA_QFQ_MAX + 1];
u32 weight, lmax, inv_w;
- int i, err;
+ int err;
int delta_w;
if (tca[TCA_OPTIONS] == NULL) {
@@ -270,16 +297,14 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tb[TCA_QFQ_LMAX]) {
lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
- if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) {
+ if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
pr_notice("qfq: invalid max length %u\n", lmax);
return -EINVAL;
}
} else
- lmax = 1UL << QFQ_MTU_SHIFT;
+ lmax = psched_mtu(qdisc_dev(sch));
if (cl != NULL) {
- bool need_reactivation = false;
-
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
qdisc_root_sleeping_lock(sch),
@@ -291,24 +316,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (lmax == cl->lmax && inv_w == cl->inv_w)
return 0; /* nothing to update */
- i = qfq_calc_index(inv_w, lmax);
sch_tree_lock(sch);
- if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
- /*
- * shift cl->F back, to not charge the
- * class for the not-yet-served head
- * packet
- */
- cl->F = cl->S;
- /* remove class from its slot in the old group */
- qfq_deactivate_class(q, cl);
- need_reactivation = true;
- }
-
- qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
-
- if (need_reactivation) /* activate in new group */
- qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
+ qfq_update_reactivate_class(q, cl, inv_w, lmax, delta_w);
sch_tree_unlock(sch);
return 0;
@@ -663,15 +672,48 @@ static void qfq_make_eligible(struct qfq_sched *q, u64 old_V)
/*
- * XXX we should make sure that slot becomes less than 32.
- * This is guaranteed by the input values.
- * roundedS is always cl->S rounded on grp->slot_shift bits.
+ * If the weight and lmax (max_pkt_size) of the classes do not change,
+ * then QFQ guarantees that the slot index is never higher than
+ * 2 + ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM).
+ *
+ * With the current values of the above constants, the index is
+ * then guaranteed to never be higher than 2 + 256 * (1 / 16) = 18.
+ *
+ * When the weight of a class is increased or the lmax of the class is
+ * decreased, a new class with smaller slot size may happen to be
+ * activated. The activation of this class should be properly delayed
+ * to when the service of the class has finished in the ideal system
+ * tracked by QFQ. If the activation of the class is not delayed to
+ * this reference time instant, then this class may be unjustly served
+ * before other classes waiting for service. This may cause
+ * (unfrequently) the above bound to the slot index to be violated for
+ * some of these unlucky classes.
+ *
+ * Instead of delaying the activation of the new class, which is quite
+ * complex, the following inaccurate but simple solution is used: if
+ * the slot index is higher than QFQ_MAX_SLOTS-2, then the timestamps
+ * of the class are shifted backward so as to let the slot index
+ * become equal to QFQ_MAX_SLOTS-2. This threshold is used because, if
+ * the slot index is above it, then the data structure implementing
+ * the bucket list either gets immediately corrupted or may get
+ * corrupted on a possible next packet arrival that causes the start
+ * time of the group to be shifted backward.
*/
static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl,
u64 roundedS)
{
u64 slot = (roundedS - grp->S) >> grp->slot_shift;
- unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
+ unsigned int i; /* slot index in the bucket list */
+
+ if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
+ u64 deltaS = roundedS - grp->S -
+ ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
+ cl->S -= deltaS;
+ cl->F -= deltaS;
+ slot = QFQ_MAX_SLOTS - 2;
+ }
+
+ i = (grp->front + slot) % QFQ_MAX_SLOTS;
hlist_add_head(&cl->next, &grp->slots[i]);
__set_bit(slot, &grp->full_slots);
@@ -892,6 +934,13 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
+ if (unlikely(cl->lmax < qdisc_pkt_len(skb))) {
+ pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
+ cl->lmax, qdisc_pkt_len(skb), cl->common.classid);
+ qfq_update_reactivate_class(q, cl, cl->inv_w,
+ qdisc_pkt_len(skb), 0);
+ }
+
err = qdisc_enqueue(skb, cl->qdisc);
if (unlikely(err != NET_XMIT_SUCCESS)) {
pr_debug("qfq_enqueue: enqueue failed %d\n", err);
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 126b014eb79b..a9edd2e205f4 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -9,7 +9,6 @@ menuconfig IP_SCTP
select CRYPTO
select CRYPTO_HMAC
select CRYPTO_SHA1
- select CRYPTO_MD5 if SCTP_HMAC_MD5
select LIBCRC32C
---help---
Stream Control Transmission Protocol
@@ -68,33 +67,21 @@ config SCTP_DBG_OBJCNT
If unsure, say N
-choice
- prompt "SCTP: Cookie HMAC Algorithm"
- default SCTP_HMAC_MD5
+config SCTP_COOKIE_HMAC_MD5
+ bool "Enable optional MD5 hmac cookie generation"
help
- HMAC algorithm to be used during association initialization. It
- is strongly recommended to use HMAC-SHA1 or HMAC-MD5. See
- configuration for Cryptographic API and enable those algorithms
- to make usable by SCTP.
-
-config SCTP_HMAC_NONE
- bool "None"
- help
- Choosing this disables the use of an HMAC during association
- establishment. It is advised to use either HMAC-MD5 or HMAC-SHA1.
-
-config SCTP_HMAC_SHA1
- bool "HMAC-SHA1"
- help
- Enable the use of HMAC-SHA1 during association establishment. It
- is advised to use either HMAC-MD5 or HMAC-SHA1.
-
-config SCTP_HMAC_MD5
- bool "HMAC-MD5"
+ Enable optional MD5 hmac based SCTP cookie generation
+ default y
+ select CRYPTO_HMAC if SCTP_COOKIE_HMAC_MD5
+ select CRYPTO_MD5 if SCTP_COOKIE_HMAC_MD5
+
+config SCTP_COOKIE_HMAC_SHA1
+ bool "Enable optional SHA1 hmac cookie generation"
help
- Enable the use of HMAC-MD5 during association establishment. It is
- advised to use either HMAC-MD5 or HMAC-SHA1.
+ Enable optional SHA1 hmac based SCTP cookie generation
+ default y
+ select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1
+ select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1
-endchoice
endif # IP_SCTP
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index c3bea269faf4..9966e7b16451 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -102,7 +102,7 @@ static const struct file_operations sctp_snmp_seq_fops = {
.open = sctp_snmp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = single_release_net,
};
/* Set up the proc fs entry for 'snmp' object. */
@@ -251,7 +251,7 @@ static const struct file_operations sctp_eps_seq_fops = {
.open = sctp_eps_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
/* Set up the proc fs entry for 'eps' object. */
@@ -372,7 +372,7 @@ static const struct file_operations sctp_assocs_seq_fops = {
.open = sctp_assocs_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
/* Set up the proc fs entry for 'assocs' object. */
@@ -517,7 +517,7 @@ static const struct file_operations sctp_remaddr_seq_fops = {
.open = sctp_remaddr_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
int __net_init sctp_remaddr_proc_init(struct net *net)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 2d518425d598..456bc3dbdd51 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1190,6 +1190,15 @@ static int sctp_net_init(struct net *net)
/* Whether Cookie Preservative is enabled(1) or not(0) */
net->sctp.cookie_preserve_enable = 1;
+ /* Default sctp sockets to use md5 as their hmac alg */
+#if defined (CONFIG_CRYPTO_MD5)
+ net->sctp.sctp_hmac_alg = "md5";
+#elif defined (CONFIG_CRYPTO_SHA1)
+ net->sctp.sctp_hmac_alg = "sha1";
+#else
+ net->sctp.sctp_hmac_alg = NULL;
+#endif
+
/* Max.Burst - 4 */
net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fbe1636309a7..e0f01a4e8cd6 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1090,6 +1090,25 @@ nodata:
return retval;
}
+struct sctp_chunk *sctp_make_violation_max_retrans(
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk)
+{
+ struct sctp_chunk *retval;
+ static const char error[] = "Association exceeded its max_retans count";
+ size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t);
+
+ retval = sctp_make_abort(asoc, chunk, payload_len);
+ if (!retval)
+ goto nodata;
+
+ sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, sizeof(error));
+ sctp_addto_chunk(retval, sizeof(error), error);
+
+nodata:
+ return retval;
+}
+
/* Make a HEARTBEAT chunk. */
struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
const struct sctp_transport *transport)
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 57f7de839b03..c0769569b05d 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -577,7 +577,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
unsigned int error)
{
struct sctp_ulpevent *event;
-
+ struct sctp_chunk *abort;
/* Cancel any partial delivery in progress. */
sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
@@ -593,6 +593,13 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
SCTP_ULPEVENT(event));
+ if (asoc->overall_error_count >= asoc->max_retrans) {
+ abort = sctp_make_violation_max_retrans(asoc, chunk);
+ if (abort)
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+ SCTP_CHUNK(abort));
+ }
+
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
@@ -1268,14 +1275,14 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
sctp_outq_uncork(&asoc->outqueue);
local_cork = 0;
}
- asoc = cmd->obj.ptr;
+ asoc = cmd->obj.asoc;
/* Register with the endpoint. */
sctp_endpoint_add_asoc(ep, asoc);
sctp_hash_established(asoc);
break;
case SCTP_CMD_UPDATE_ASSOC:
- sctp_assoc_update(asoc, cmd->obj.ptr);
+ sctp_assoc_update(asoc, cmd->obj.asoc);
break;
case SCTP_CMD_PURGE_OUTQUEUE:
@@ -1315,7 +1322,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
break;
case SCTP_CMD_PROCESS_FWDTSN:
- sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.ptr);
+ sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.chunk);
break;
case SCTP_CMD_GEN_SACK:
@@ -1331,7 +1338,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
case SCTP_CMD_PROCESS_SACK:
/* Process an inbound SACK. */
error = sctp_cmd_process_sack(commands, asoc,
- cmd->obj.ptr);
+ cmd->obj.chunk);
break;
case SCTP_CMD_GEN_INIT_ACK:
@@ -1352,15 +1359,15 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
* layer which will bail.
*/
error = sctp_cmd_process_init(commands, asoc, chunk,
- cmd->obj.ptr, gfp);
+ cmd->obj.init, gfp);
break;
case SCTP_CMD_GEN_COOKIE_ECHO:
/* Generate a COOKIE ECHO chunk. */
new_obj = sctp_make_cookie_echo(asoc, chunk);
if (!new_obj) {
- if (cmd->obj.ptr)
- sctp_chunk_free(cmd->obj.ptr);
+ if (cmd->obj.chunk)
+ sctp_chunk_free(cmd->obj.chunk);
goto nomem;
}
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -1369,9 +1376,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
/* If there is an ERROR chunk to be sent along with
* the COOKIE_ECHO, send it, too.
*/
- if (cmd->obj.ptr)
+ if (cmd->obj.chunk)
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
- SCTP_CHUNK(cmd->obj.ptr));
+ SCTP_CHUNK(cmd->obj.chunk));
if (new_obj->transport) {
new_obj->transport->init_sent_count++;
@@ -1417,18 +1424,18 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
case SCTP_CMD_CHUNK_ULP:
/* Send a chunk to the sockets layer. */
SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n",
- "chunk_up:", cmd->obj.ptr,
+ "chunk_up:", cmd->obj.chunk,
"ulpq:", &asoc->ulpq);
- sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.ptr,
+ sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk,
GFP_ATOMIC);
break;
case SCTP_CMD_EVENT_ULP:
/* Send a notification to the sockets layer. */
SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n",
- "event_up:",cmd->obj.ptr,
+ "event_up:",cmd->obj.ulpevent,
"ulpq:",&asoc->ulpq);
- sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ptr);
+ sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent);
break;
case SCTP_CMD_REPLY:
@@ -1438,12 +1445,12 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
local_cork = 1;
}
/* Send a chunk to our peer. */
- error = sctp_outq_tail(&asoc->outqueue, cmd->obj.ptr);
+ error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk);
break;
case SCTP_CMD_SEND_PKT:
/* Send a full packet to our peer. */
- packet = cmd->obj.ptr;
+ packet = cmd->obj.packet;
sctp_packet_transmit(packet);
sctp_ootb_pkt_free(packet);
break;
@@ -1480,7 +1487,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
break;
case SCTP_CMD_SETUP_T2:
- sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr);
+ sctp_cmd_setup_t2(commands, asoc, cmd->obj.chunk);
break;
case SCTP_CMD_TIMER_START_ONCE:
@@ -1514,7 +1521,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
break;
case SCTP_CMD_INIT_CHOOSE_TRANSPORT:
- chunk = cmd->obj.ptr;
+ chunk = cmd->obj.chunk;
t = sctp_assoc_choose_alter_transport(asoc,
asoc->init_last_sent_to);
asoc->init_last_sent_to = t;
@@ -1642,8 +1649,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
asoc->outqueue.outstanding_bytes;
sackh.num_gap_ack_blocks = 0;
sackh.num_dup_tsns = 0;
+ chunk->subh.sack_hdr = &sackh;
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK,
- SCTP_SACKH(&sackh));
+ SCTP_CHUNK(chunk));
break;
case SCTP_CMD_DISCARD_PACKET:
@@ -1664,17 +1672,16 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
break;
case SCTP_CMD_PART_DELIVER:
- sctp_ulpq_partial_delivery(&asoc->ulpq, cmd->obj.ptr,
- GFP_ATOMIC);
+ sctp_ulpq_partial_delivery(&asoc->ulpq, GFP_ATOMIC);
break;
case SCTP_CMD_RENEGE:
- sctp_ulpq_renege(&asoc->ulpq, cmd->obj.ptr,
+ sctp_ulpq_renege(&asoc->ulpq, cmd->obj.chunk,
GFP_ATOMIC);
break;
case SCTP_CMD_SETUP_T4:
- sctp_cmd_setup_t4(commands, asoc, cmd->obj.ptr);
+ sctp_cmd_setup_t4(commands, asoc, cmd->obj.chunk);
break;
case SCTP_CMD_PROCESS_OPERR:
@@ -1733,8 +1740,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
break;
default:
- pr_warn("Impossible command: %u, %p\n",
- cmd->verb, cmd->obj.ptr);
+ pr_warn("Impossible command: %u\n",
+ cmd->verb);
break;
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 59d16ea927f0..2e897069310a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -110,7 +110,6 @@ static int sctp_do_bind(struct sock *, union sctp_addr *, int);
static int sctp_autobind(struct sock *sk);
static void sctp_sock_migrate(struct sock *, struct sock *,
struct sctp_association *, sctp_socket_type_t);
-static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
extern struct kmem_cache *sctp_bucket_cachep;
extern long sysctl_sctp_mem[3];
@@ -336,6 +335,7 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
/* Bind a local address either to an endpoint or to an association. */
SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_endpoint *ep = sp->ep;
struct sctp_bind_addr *bp = &ep->base.bind_addr;
@@ -379,7 +379,8 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
}
}
- if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ if (snum && snum < PROT_SOCK &&
+ !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
/* See if the address matches any of the addresses we may have
@@ -974,7 +975,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
void *addr_buf;
struct sctp_af *af;
- SCTP_DEBUG_PRINTK("sctp_setsocktopt_bindx: sk %p addrs %p"
+ SCTP_DEBUG_PRINTK("sctp_setsockopt_bindx: sk %p addrs %p"
" addrs_size %d opt %d\n", sk, addrs, addrs_size, op);
if (unlikely(addrs_size <= 0))
@@ -1162,7 +1163,7 @@ static int __sctp_connect(struct sock* sk,
* be permitted to open new associations.
*/
if (ep->base.bind_addr.port < PROT_SOCK &&
- !capable(CAP_NET_BIND_SERVICE)) {
+ !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
err = -EACCES;
goto out_free;
}
@@ -1791,7 +1792,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
* associations.
*/
if (ep->base.bind_addr.port < PROT_SOCK &&
- !capable(CAP_NET_BIND_SERVICE)) {
+ !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
err = -EACCES;
goto out_unlock;
}
@@ -3890,6 +3891,8 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
sp->default_rcv_context = 0;
sp->max_burst = net->sctp.max_burst;
+ sp->sctp_hmac_alg = net->sctp.sctp_hmac_alg;
+
/* Initialize default setup parameters. These parameters
* can be modified with the SCTP_INITMSG socket option or
* overridden by the SCTP_INIT CMSG.
@@ -5981,13 +5984,15 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog)
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_endpoint *ep = sp->ep;
struct crypto_hash *tfm = NULL;
+ char alg[32];
/* Allocate HMAC for generating cookie. */
- if (!sctp_sk(sk)->hmac && sctp_hmac_alg) {
- tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
+ if (!sp->hmac && sp->sctp_hmac_alg) {
+ sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg);
+ tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm)) {
net_info_ratelimited("failed to load transform for %s: %ld\n",
- sctp_hmac_alg, PTR_ERR(tfm));
+ sp->sctp_hmac_alg, PTR_ERR(tfm));
return -ENOSYS;
}
sctp_sk(sk)->hmac = tfm;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 70e3ba5cb50b..043889ac86c0 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -62,6 +62,11 @@ extern long sysctl_sctp_mem[3];
extern int sysctl_sctp_rmem[3];
extern int sysctl_sctp_wmem[3];
+static int proc_sctp_do_hmac_alg(ctl_table *ctl,
+ int write,
+ void __user *buffer, size_t *lenp,
+
+ loff_t *ppos);
static ctl_table sctp_table[] = {
{
.procname = "sctp_mem",
@@ -147,6 +152,12 @@ static ctl_table sctp_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "cookie_hmac_alg",
+ .maxlen = 8,
+ .mode = 0644,
+ .proc_handler = proc_sctp_do_hmac_alg,
+ },
+ {
.procname = "valid_cookie_life",
.data = &init_net.sctp.valid_cookie_life,
.maxlen = sizeof(unsigned int),
@@ -289,6 +300,54 @@ static ctl_table sctp_net_table[] = {
{ /* sentinel */ }
};
+static int proc_sctp_do_hmac_alg(ctl_table *ctl,
+ int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct net *net = current->nsproxy->net_ns;
+ char tmp[8];
+ ctl_table tbl;
+ int ret;
+ int changed = 0;
+ char *none = "none";
+
+ memset(&tbl, 0, sizeof(struct ctl_table));
+
+ if (write) {
+ tbl.data = tmp;
+ tbl.maxlen = 8;
+ } else {
+ tbl.data = net->sctp.sctp_hmac_alg ? : none;
+ tbl.maxlen = strlen(tbl.data);
+ }
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+
+ if (write) {
+#ifdef CONFIG_CRYPTO_MD5
+ if (!strncmp(tmp, "md5", 3)) {
+ net->sctp.sctp_hmac_alg = "md5";
+ changed = 1;
+ }
+#endif
+#ifdef CONFIG_CRYPTO_SHA1
+ if (!strncmp(tmp, "sha1", 4)) {
+ net->sctp.sctp_hmac_alg = "sha1";
+ changed = 1;
+ }
+#endif
+ if (!strncmp(tmp, "none", 4)) {
+ net->sctp.sctp_hmac_alg = NULL;
+ changed = 1;
+ }
+
+ if (!changed)
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
int sctp_sysctl_net_register(struct net *net)
{
struct ctl_table *table;
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index b5fb7c409023..5f25e0c92c31 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -272,7 +272,7 @@ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map)
__u32 max_tsn = map->max_tsn_seen;
__u32 base_tsn = map->base_tsn;
__u16 pending_data;
- u32 gap, i;
+ u32 gap;
pending_data = max_tsn - cum_tsn;
gap = max_tsn - base_tsn;
@@ -280,11 +280,7 @@ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map)
if (gap == 0 || gap >= map->len)
goto out;
- for (i = 0; i < gap+1; i++) {
- if (test_bit(i, map->tsn_map))
- pending_data--;
- }
-
+ pending_data -= bitmap_weight(map->tsn_map, gap + 1);
out:
return pending_data;
}
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 360d8697b95c..ada17464b65b 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -997,7 +997,6 @@ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed)
/* Partial deliver the first message as there is pressure on rwnd. */
void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
- struct sctp_chunk *chunk,
gfp_t gfp)
{
struct sctp_ulpevent *event;
@@ -1060,7 +1059,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport);
sctp_ulpq_tail_data(ulpq, chunk, gfp);
- sctp_ulpq_partial_delivery(ulpq, chunk, gfp);
+ sctp_ulpq_partial_delivery(ulpq, gfp);
}
sk_mem_reclaim(asoc->base.sk);
diff --git a/net/socket.c b/net/socket.c
index d92c490e66fa..2ca51c719ef9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -620,8 +620,6 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
{
struct sock_iocb *si = kiocb_to_siocb(iocb);
- sock_update_classid(sock->sk);
-
si->sock = sock;
si->scm = NULL;
si->msg = msg;
@@ -784,8 +782,6 @@ static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
{
struct sock_iocb *si = kiocb_to_siocb(iocb);
- sock_update_classid(sock->sk);
-
si->sock = sock;
si->scm = NULL;
si->msg = msg;
@@ -896,8 +892,6 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
if (unlikely(!sock->ops->splice_read))
return -EINVAL;
- sock_update_classid(sock->sk);
-
return sock->ops->splice_read(sock, ppos, pipe, len, flags);
}
@@ -3437,8 +3431,6 @@ EXPORT_SYMBOL(kernel_setsockopt);
int kernel_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags)
{
- sock_update_classid(sock->sk);
-
if (sock->ops->sendpage)
return sock->ops->sendpage(sock, page, offset, size, flags);
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 5a3d675d2f2f..a9c0bbccad6b 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -172,7 +172,7 @@ out_free:
xprt_free_allocation(req);
dprintk("RPC: setup backchannel transport failed\n");
- return -1;
+ return -ENOMEM;
}
EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2a68bb3db772..fc2f7aa4dca7 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1409,11 +1409,11 @@ static ssize_t read_flush(struct file *file, char __user *buf,
size_t count, loff_t *ppos,
struct cache_detail *cd)
{
- char tbuf[20];
+ char tbuf[22];
unsigned long p = *ppos;
size_t len;
- sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time));
+ snprintf(tbuf, sizeof(tbuf), "%lu\n", convert_to_wallclock(cd->flush_time));
len = strlen(tbuf);
if (p >= len)
return 0;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index aaaadfbe36e9..75853cabf4c9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -254,7 +254,6 @@ struct sock_xprt {
void (*old_data_ready)(struct sock *, int);
void (*old_state_change)(struct sock *);
void (*old_write_space)(struct sock *);
- void (*old_error_report)(struct sock *);
};
/*
@@ -737,10 +736,10 @@ static int xs_tcp_send_request(struct rpc_task *task)
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
case -ECONNRESET:
- case -EPIPE:
xs_tcp_shutdown(xprt);
case -ECONNREFUSED:
case -ENOTCONN:
+ case -EPIPE:
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
}
@@ -781,7 +780,6 @@ static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk)
transport->old_data_ready = sk->sk_data_ready;
transport->old_state_change = sk->sk_state_change;
transport->old_write_space = sk->sk_write_space;
- transport->old_error_report = sk->sk_error_report;
}
static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk)
@@ -789,7 +787,6 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
sk->sk_data_ready = transport->old_data_ready;
sk->sk_state_change = transport->old_state_change;
sk->sk_write_space = transport->old_write_space;
- sk->sk_error_report = transport->old_error_report;
}
static void xs_reset_transport(struct sock_xprt *transport)
@@ -1453,7 +1450,7 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
xprt_clear_connecting(xprt);
}
-static void xs_sock_mark_closed(struct rpc_xprt *xprt)
+static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
{
smp_mb__before_clear_bit();
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
@@ -1461,6 +1458,11 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt)
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
smp_mb__after_clear_bit();
+}
+
+static void xs_sock_mark_closed(struct rpc_xprt *xprt)
+{
+ xs_sock_reset_connection_flags(xprt);
/* Mark transport as closed and wake up all pending tasks */
xprt_disconnect_done(xprt);
}
@@ -1516,6 +1518,7 @@ static void xs_tcp_state_change(struct sock *sk)
case TCP_CLOSE_WAIT:
/* The server initiated a shutdown of the socket */
xprt->connect_cookie++;
+ clear_bit(XPRT_CONNECTED, &xprt->state);
xs_tcp_force_close(xprt);
case TCP_CLOSING:
/*
@@ -1540,25 +1543,6 @@ static void xs_tcp_state_change(struct sock *sk)
read_unlock_bh(&sk->sk_callback_lock);
}
-/**
- * xs_error_report - callback mainly for catching socket errors
- * @sk: socket
- */
-static void xs_error_report(struct sock *sk)
-{
- struct rpc_xprt *xprt;
-
- read_lock_bh(&sk->sk_callback_lock);
- if (!(xprt = xprt_from_sock(sk)))
- goto out;
- dprintk("RPC: %s client %p...\n"
- "RPC: error %d\n",
- __func__, xprt, sk->sk_err);
- xprt_wake_pending_tasks(xprt, -EAGAIN);
-out:
- read_unlock_bh(&sk->sk_callback_lock);
-}
-
static void xs_write_space(struct sock *sk)
{
struct socket *sock;
@@ -1858,7 +1842,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_local_data_ready;
sk->sk_write_space = xs_udp_write_space;
- sk->sk_error_report = xs_error_report;
sk->sk_allocation = GFP_ATOMIC;
xprt_clear_connected(xprt);
@@ -1983,7 +1966,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_udp_data_ready;
sk->sk_write_space = xs_udp_write_space;
- sk->sk_error_report = xs_error_report;
sk->sk_no_check = UDP_CSUM_NORCV;
sk->sk_allocation = GFP_ATOMIC;
@@ -2050,10 +2032,8 @@ static void xs_abort_connection(struct sock_xprt *transport)
any.sa_family = AF_UNSPEC;
result = kernel_connect(transport->sock, &any, sizeof(any), 0);
if (!result)
- xs_sock_mark_closed(&transport->xprt);
- else
- dprintk("RPC: AF_UNSPEC connect return code %d\n",
- result);
+ xs_sock_reset_connection_flags(&transport->xprt);
+ dprintk("RPC: AF_UNSPEC connect return code %d\n", result);
}
static void xs_tcp_reuse_connection(struct sock_xprt *transport)
@@ -2098,7 +2078,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_data_ready = xs_tcp_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
- sk->sk_error_report = xs_error_report;
sk->sk_allocation = GFP_ATOMIC;
/* socket options */
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index e3a6e37cd1c5..9bc6db04be3e 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -38,15 +38,24 @@ static int is_seen(struct ctl_table_set *set)
}
/* Return standard mode bits for table entry. */
-static int net_ctl_permissions(struct ctl_table_root *root,
- struct nsproxy *nsproxy,
+static int net_ctl_permissions(struct ctl_table_header *head,
struct ctl_table *table)
{
+ struct net *net = container_of(head->set, struct net, sysctls);
+ kuid_t root_uid = make_kuid(net->user_ns, 0);
+ kgid_t root_gid = make_kgid(net->user_ns, 0);
+
/* Allow network administrator to have same access as root. */
- if (capable(CAP_NET_ADMIN)) {
+ if (ns_capable(net->user_ns, CAP_NET_ADMIN) ||
+ uid_eq(root_uid, current_uid())) {
int mode = (table->mode >> 6) & 7;
return (mode << 6) | (mode << 3) | mode;
}
+ /* Allow netns root group to have the same access as the root group */
+ if (gid_eq(root_gid, current_gid())) {
+ int mode = (table->mode >> 3) & 7;
+ return (mode << 3) | mode;
+ }
return table->mode;
}
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 585460180ffb..bc41bd31eadc 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -20,18 +20,9 @@ menuconfig TIPC
If in doubt, say N.
-if TIPC
-
-config TIPC_ADVANCED
- bool "Advanced TIPC configuration"
- default n
- help
- Saying Y here will open some advanced configuration for TIPC.
- Most users do not need to bother; if unsure, just say N.
-
config TIPC_PORTS
int "Maximum number of ports in a node"
- depends on TIPC_ADVANCED
+ depends on TIPC
range 127 65535
default "8191"
help
@@ -40,5 +31,3 @@ config TIPC_PORTS
Setting this to a smaller value saves some memory,
setting it to higher allows for more ports.
-
-endif # TIPC
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index e4e6d8cd47e6..54f89f90ac33 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -347,7 +347,7 @@ static void bclink_peek_nack(struct tipc_msg *msg)
tipc_node_lock(n_ptr);
- if (n_ptr->bclink.supported &&
+ if (n_ptr->bclink.recv_permitted &&
(n_ptr->bclink.last_in != n_ptr->bclink.last_sent) &&
(n_ptr->bclink.last_in == msg_bcgap_after(msg)))
n_ptr->bclink.oos_state = 2;
@@ -429,7 +429,7 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
goto exit;
tipc_node_lock(node);
- if (unlikely(!node->bclink.supported))
+ if (unlikely(!node->bclink.recv_permitted))
goto unlock;
/* Handle broadcast protocol message */
@@ -564,7 +564,7 @@ exit:
u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
{
- return (n_ptr->bclink.supported &&
+ return (n_ptr->bclink.recv_permitted &&
(tipc_bclink_get_last_sent() != n_ptr->bclink.acked));
}
@@ -619,16 +619,14 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
if (bcbearer->remains_new.count == bcbearer->remains.count)
continue; /* bearer pair doesn't add anything */
- if (p->blocked ||
- p->media->send_msg(buf, p, &p->media->bcast_addr)) {
+ if (!tipc_bearer_blocked(p))
+ tipc_bearer_send(p, buf, &p->media->bcast_addr);
+ else if (s && !tipc_bearer_blocked(s))
/* unable to send on primary bearer */
- if (!s || s->blocked ||
- s->media->send_msg(buf, s,
- &s->media->bcast_addr)) {
- /* unable to send on either bearer */
- continue;
- }
- }
+ tipc_bearer_send(s, buf, &s->media->bcast_addr);
+ else
+ /* unable to send on either bearer */
+ continue;
if (s) {
bcbearer->bpairs[bp_index].primary = s;
@@ -731,8 +729,8 @@ int tipc_bclink_stats(char *buf, const u32 buf_size)
" TX naks:%u acks:%u dups:%u\n",
s->sent_nacks, s->sent_acks, s->retransmitted);
ret += tipc_snprintf(buf + ret, buf_size - ret,
- " Congestion bearer:%u link:%u Send queue max:%u avg:%u\n",
- s->bearer_congs, s->link_congs, s->max_queue_sz,
+ " Congestion link:%u Send queue max:%u avg:%u\n",
+ s->link_congs, s->max_queue_sz,
s->queue_sz_counts ?
(s->accu_queue_sz / s->queue_sz_counts) : 0);
@@ -766,7 +764,6 @@ int tipc_bclink_set_queue_limits(u32 limit)
void tipc_bclink_init(void)
{
- INIT_LIST_HEAD(&bcbearer->bearer.cong_links);
bcbearer->bearer.media = &bcbearer->media;
bcbearer->media.send_msg = tipc_bcbearer_send;
sprintf(bcbearer->media.name, "tipc-broadcast");
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 4ec5c80e8a7c..aa62f93a9127 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -279,116 +279,31 @@ void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest)
}
/*
- * bearer_push(): Resolve bearer congestion. Force the waiting
- * links to push out their unsent packets, one packet per link
- * per iteration, until all packets are gone or congestion reoccurs.
- * 'tipc_net_lock' is read_locked when this function is called
- * bearer.lock must be taken before calling
- * Returns binary true(1) ore false(0)
- */
-static int bearer_push(struct tipc_bearer *b_ptr)
-{
- u32 res = 0;
- struct tipc_link *ln, *tln;
-
- if (b_ptr->blocked)
- return 0;
-
- while (!list_empty(&b_ptr->cong_links) && (res != PUSH_FAILED)) {
- list_for_each_entry_safe(ln, tln, &b_ptr->cong_links, link_list) {
- res = tipc_link_push_packet(ln);
- if (res == PUSH_FAILED)
- break;
- if (res == PUSH_FINISHED)
- list_move_tail(&ln->link_list, &b_ptr->links);
- }
- }
- return list_empty(&b_ptr->cong_links);
-}
-
-void tipc_bearer_lock_push(struct tipc_bearer *b_ptr)
-{
- spin_lock_bh(&b_ptr->lock);
- bearer_push(b_ptr);
- spin_unlock_bh(&b_ptr->lock);
-}
-
-
-/*
- * Interrupt enabling new requests after bearer congestion or blocking:
+ * Interrupt enabling new requests after bearer blocking:
* See bearer_send().
*/
-void tipc_continue(struct tipc_bearer *b_ptr)
+void tipc_continue(struct tipc_bearer *b)
{
- spin_lock_bh(&b_ptr->lock);
- if (!list_empty(&b_ptr->cong_links))
- tipc_k_signal((Handler)tipc_bearer_lock_push, (unsigned long)b_ptr);
- b_ptr->blocked = 0;
- spin_unlock_bh(&b_ptr->lock);
+ spin_lock_bh(&b->lock);
+ b->blocked = 0;
+ spin_unlock_bh(&b->lock);
}
/*
- * Schedule link for sending of messages after the bearer
- * has been deblocked by 'continue()'. This method is called
- * when somebody tries to send a message via this link while
- * the bearer is congested. 'tipc_net_lock' is in read_lock here
- * bearer.lock is busy
+ * tipc_bearer_blocked - determines if bearer is currently blocked
*/
-static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr,
- struct tipc_link *l_ptr)
+int tipc_bearer_blocked(struct tipc_bearer *b)
{
- list_move_tail(&l_ptr->link_list, &b_ptr->cong_links);
-}
-
-/*
- * Schedule link for sending of messages after the bearer
- * has been deblocked by 'continue()'. This method is called
- * when somebody tries to send a message via this link while
- * the bearer is congested. 'tipc_net_lock' is in read_lock here,
- * bearer.lock is free
- */
-void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr)
-{
- spin_lock_bh(&b_ptr->lock);
- tipc_bearer_schedule_unlocked(b_ptr, l_ptr);
- spin_unlock_bh(&b_ptr->lock);
-}
-
+ int res;
-/*
- * tipc_bearer_resolve_congestion(): Check if there is bearer congestion,
- * and if there is, try to resolve it before returning.
- * 'tipc_net_lock' is read_locked when this function is called
- */
-int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr,
- struct tipc_link *l_ptr)
-{
- int res = 1;
+ spin_lock_bh(&b->lock);
+ res = b->blocked;
+ spin_unlock_bh(&b->lock);
- if (list_empty(&b_ptr->cong_links))
- return 1;
- spin_lock_bh(&b_ptr->lock);
- if (!bearer_push(b_ptr)) {
- tipc_bearer_schedule_unlocked(b_ptr, l_ptr);
- res = 0;
- }
- spin_unlock_bh(&b_ptr->lock);
return res;
}
/**
- * tipc_bearer_congested - determines if bearer is currently congested
- */
-int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr)
-{
- if (unlikely(b_ptr->blocked))
- return 1;
- if (likely(list_empty(&b_ptr->cong_links)))
- return 0;
- return !tipc_bearer_resolve_congestion(b_ptr, l_ptr);
-}
-
-/**
* tipc_enable_bearer - enable bearer with the given name
*/
int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
@@ -489,7 +404,6 @@ restart:
b_ptr->net_plane = bearer_id + 'A';
b_ptr->active = 1;
b_ptr->priority = priority;
- INIT_LIST_HEAD(&b_ptr->cong_links);
INIT_LIST_HEAD(&b_ptr->links);
spin_lock_init(&b_ptr->lock);
@@ -528,7 +442,6 @@ int tipc_block_bearer(const char *name)
pr_info("Blocking bearer <%s>\n", name);
spin_lock_bh(&b_ptr->lock);
b_ptr->blocked = 1;
- list_splice_init(&b_ptr->cong_links, &b_ptr->links);
list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
struct tipc_node *n_ptr = l_ptr->owner;
@@ -555,7 +468,6 @@ static void bearer_disable(struct tipc_bearer *b_ptr)
spin_lock_bh(&b_ptr->lock);
b_ptr->blocked = 1;
b_ptr->media->disable_bearer(b_ptr);
- list_splice_init(&b_ptr->cong_links, &b_ptr->links);
list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
tipc_link_delete(l_ptr);
}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index dd4c2abf08e7..39f1192d04bf 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -120,7 +120,6 @@ struct tipc_media {
* @identity: array index of this bearer within TIPC bearer array
* @link_req: ptr to (optional) structure making periodic link setup requests
* @links: list of non-congested links associated with bearer
- * @cong_links: list of congested links associated with bearer
* @active: non-zero if bearer structure is represents a bearer
* @net_plane: network plane ('A' through 'H') currently associated with bearer
* @nodes: indicates which nodes in cluster can be reached through bearer
@@ -143,7 +142,6 @@ struct tipc_bearer {
u32 identity;
struct tipc_link_req *link_req;
struct list_head links;
- struct list_head cong_links;
int active;
char net_plane;
struct tipc_node_map nodes;
@@ -185,39 +183,23 @@ struct sk_buff *tipc_media_get_names(void);
struct sk_buff *tipc_bearer_get_names(void);
void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest);
void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest);
-void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr);
struct tipc_bearer *tipc_bearer_find(const char *name);
struct tipc_bearer *tipc_bearer_find_interface(const char *if_name);
struct tipc_media *tipc_media_find(const char *name);
-int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr,
- struct tipc_link *l_ptr);
-int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr);
+int tipc_bearer_blocked(struct tipc_bearer *b_ptr);
void tipc_bearer_stop(void);
-void tipc_bearer_lock_push(struct tipc_bearer *b_ptr);
-
/**
* tipc_bearer_send- sends buffer to destination over bearer
*
- * Returns true (1) if successful, or false (0) if unable to send
- *
* IMPORTANT:
* The media send routine must not alter the buffer being passed in
* as it may be needed for later retransmission!
- *
- * If the media send routine returns a non-zero value (indicating that
- * it was unable to send the buffer), it must:
- * 1) mark the bearer as blocked,
- * 2) call tipc_continue() once the bearer is able to send again.
- * Media types that are unable to meet these two critera must ensure their
- * send routine always returns success -- even if the buffer was not sent --
- * and let TIPC's link code deal with the undelivered message.
*/
-static inline int tipc_bearer_send(struct tipc_bearer *b_ptr,
- struct sk_buff *buf,
+static inline void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf,
struct tipc_media_addr *dest)
{
- return !b_ptr->media->send_msg(buf, b_ptr, dest);
+ b->media->send_msg(buf, b, dest);
}
#endif /* _TIPC_BEARER_H */
diff --git a/net/tipc/core.c b/net/tipc/core.c
index bfe8af88469a..fc05cecd7481 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -42,11 +42,6 @@
#include <linux/module.h>
-#ifndef CONFIG_TIPC_PORTS
-#define CONFIG_TIPC_PORTS 8191
-#endif
-
-
/* global variables used by multiple sub-systems within TIPC */
int tipc_random __read_mostly;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 50eaa403eb6e..1074b9587e81 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -243,7 +243,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
if ((type == DSC_REQ_MSG) && !link_fully_up && !b_ptr->blocked) {
rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr);
if (rbuf) {
- b_ptr->media->send_msg(rbuf, b_ptr, &media_addr);
+ tipc_bearer_send(b_ptr, rbuf, &media_addr);
kfree_skb(rbuf);
}
}
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
index 111ff8300ae5..b36f0fcd9bdf 100644
--- a/net/tipc/handler.c
+++ b/net/tipc/handler.c
@@ -116,7 +116,6 @@ void tipc_handler_stop(void)
return;
handler_enabled = 0;
- tasklet_disable(&tipc_tasklet);
tasklet_kill(&tipc_tasklet);
spin_lock_bh(&qitem_lock);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a79c755cb417..87bf5aad704b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1,7 +1,7 @@
/*
* net/tipc/link.c: TIPC link code
*
- * Copyright (c) 1996-2007, Ericsson AB
+ * Copyright (c) 1996-2007, 2012, Ericsson AB
* Copyright (c) 2004-2007, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -103,6 +103,8 @@ static void link_reset_statistics(struct tipc_link *l_ptr);
static void link_print(struct tipc_link *l_ptr, const char *str);
static void link_start(struct tipc_link *l_ptr);
static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
+static void tipc_link_send_sync(struct tipc_link *l);
+static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf);
/*
* Simple link routines
@@ -712,6 +714,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
link_activate(l_ptr);
tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
+ if (l_ptr->owner->working_links == 1)
+ tipc_link_send_sync(l_ptr);
link_set_timer(l_ptr, cont_intv);
break;
case RESET_MSG:
@@ -745,6 +749,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
link_activate(l_ptr);
tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
+ if (l_ptr->owner->working_links == 1)
+ tipc_link_send_sync(l_ptr);
link_set_timer(l_ptr, cont_intv);
break;
case RESET_MSG:
@@ -872,17 +878,12 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
return link_send_long_buf(l_ptr, buf);
/* Packet can be queued or sent. */
- if (likely(!tipc_bearer_congested(l_ptr->b_ptr, l_ptr) &&
+ if (likely(!tipc_bearer_blocked(l_ptr->b_ptr) &&
!link_congested(l_ptr))) {
link_add_to_outqueue(l_ptr, buf, msg);
- if (likely(tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr))) {
- l_ptr->unacked_window = 0;
- } else {
- tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
- l_ptr->stats.bearer_congs++;
- l_ptr->next_out = buf;
- }
+ tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ l_ptr->unacked_window = 0;
return dsz;
}
/* Congestion: can message be bundled ? */
@@ -891,10 +892,8 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
/* Try adding message to an existing bundle */
if (l_ptr->next_out &&
- link_bundle_buf(l_ptr, l_ptr->last_out, buf)) {
- tipc_bearer_resolve_congestion(l_ptr->b_ptr, l_ptr);
+ link_bundle_buf(l_ptr, l_ptr->last_out, buf))
return dsz;
- }
/* Try creating a new bundle */
if (size <= max_packet * 2 / 3) {
@@ -917,7 +916,6 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
if (!l_ptr->next_out)
l_ptr->next_out = buf;
link_add_to_outqueue(l_ptr, buf, msg);
- tipc_bearer_resolve_congestion(l_ptr->b_ptr, l_ptr);
return dsz;
}
@@ -949,7 +947,48 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
return res;
}
-/**
+/*
+ * tipc_link_send_sync - synchronize broadcast link endpoints.
+ *
+ * Give a newly added peer node the sequence number where it should
+ * start receiving and acking broadcast packets.
+ *
+ * Called with node locked
+ */
+static void tipc_link_send_sync(struct tipc_link *l)
+{
+ struct sk_buff *buf;
+ struct tipc_msg *msg;
+
+ buf = tipc_buf_acquire(INT_H_SIZE);
+ if (!buf)
+ return;
+
+ msg = buf_msg(buf);
+ tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, l->addr);
+ msg_set_last_bcast(msg, l->owner->bclink.acked);
+ link_add_chain_to_outqueue(l, buf, 0);
+ tipc_link_push_queue(l);
+}
+
+/*
+ * tipc_link_recv_sync - synchronize broadcast link endpoints.
+ * Receive the sequence number where we should start receiving and
+ * acking broadcast packets from a newly added peer node, and open
+ * up for reception of such packets.
+ *
+ * Called with node locked
+ */
+static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf)
+{
+ struct tipc_msg *msg = buf_msg(buf);
+
+ n->bclink.last_sent = n->bclink.last_in = msg_last_bcast(msg);
+ n->bclink.recv_permitted = true;
+ kfree_skb(buf);
+}
+
+/*
* tipc_link_send_names - send name table entries to new neighbor
*
* Send routine for bulk delivery of name table messages when contact
@@ -1006,16 +1045,11 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
if (likely(!link_congested(l_ptr))) {
if (likely(msg_size(msg) <= l_ptr->max_pkt)) {
- if (likely(list_empty(&l_ptr->b_ptr->cong_links))) {
+ if (likely(!tipc_bearer_blocked(l_ptr->b_ptr))) {
link_add_to_outqueue(l_ptr, buf, msg);
- if (likely(tipc_bearer_send(l_ptr->b_ptr, buf,
- &l_ptr->media_addr))) {
- l_ptr->unacked_window = 0;
- return res;
- }
- tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
- l_ptr->stats.bearer_congs++;
- l_ptr->next_out = buf;
+ tipc_bearer_send(l_ptr->b_ptr, buf,
+ &l_ptr->media_addr);
+ l_ptr->unacked_window = 0;
return res;
}
} else
@@ -1106,7 +1140,7 @@ exit:
/* Exit if link (or bearer) is congested */
if (link_congested(l_ptr) ||
- !list_empty(&l_ptr->b_ptr->cong_links)) {
+ tipc_bearer_blocked(l_ptr->b_ptr)) {
res = link_schedule_port(l_ptr,
sender->ref, res);
goto exit;
@@ -1329,15 +1363,11 @@ u32 tipc_link_push_packet(struct tipc_link *l_ptr)
if (r_q_size && buf) {
msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
- if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
- l_ptr->retransm_queue_head = mod(++r_q_head);
- l_ptr->retransm_queue_size = --r_q_size;
- l_ptr->stats.retransmitted++;
- return 0;
- } else {
- l_ptr->stats.bearer_congs++;
- return PUSH_FAILED;
- }
+ tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ l_ptr->retransm_queue_head = mod(++r_q_head);
+ l_ptr->retransm_queue_size = --r_q_size;
+ l_ptr->stats.retransmitted++;
+ return 0;
}
/* Send deferred protocol message, if any: */
@@ -1345,15 +1375,11 @@ u32 tipc_link_push_packet(struct tipc_link *l_ptr)
if (buf) {
msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
- if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
- l_ptr->unacked_window = 0;
- kfree_skb(buf);
- l_ptr->proto_msg_queue = NULL;
- return 0;
- } else {
- l_ptr->stats.bearer_congs++;
- return PUSH_FAILED;
- }
+ tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ l_ptr->unacked_window = 0;
+ kfree_skb(buf);
+ l_ptr->proto_msg_queue = NULL;
+ return 0;
}
/* Send one deferred data message, if send window not full: */
@@ -1366,18 +1392,14 @@ u32 tipc_link_push_packet(struct tipc_link *l_ptr)
if (mod(next - first) < l_ptr->queue_limit[0]) {
msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
- if (msg_user(msg) == MSG_BUNDLER)
- msg_set_type(msg, CLOSED_MSG);
- l_ptr->next_out = buf->next;
- return 0;
- } else {
- l_ptr->stats.bearer_congs++;
- return PUSH_FAILED;
- }
+ tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ if (msg_user(msg) == MSG_BUNDLER)
+ msg_set_type(msg, CLOSED_MSG);
+ l_ptr->next_out = buf->next;
+ return 0;
}
}
- return PUSH_FINISHED;
+ return 1;
}
/*
@@ -1388,15 +1410,12 @@ void tipc_link_push_queue(struct tipc_link *l_ptr)
{
u32 res;
- if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr))
+ if (tipc_bearer_blocked(l_ptr->b_ptr))
return;
do {
res = tipc_link_push_packet(l_ptr);
} while (!res);
-
- if (res == PUSH_FAILED)
- tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
}
static void link_reset_all(unsigned long addr)
@@ -1454,9 +1473,8 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
tipc_addr_string_fill(addr_string, n_ptr->addr);
pr_info("Broadcast link info for %s\n", addr_string);
- pr_info("Supportable: %d, Supported: %d, Acked: %u\n",
- n_ptr->bclink.supportable,
- n_ptr->bclink.supported,
+ pr_info("Reception permitted: %d, Acked: %u\n",
+ n_ptr->bclink.recv_permitted,
n_ptr->bclink.acked);
pr_info("Last in: %u, Oos state: %u, Last sent: %u\n",
n_ptr->bclink.last_in,
@@ -1481,7 +1499,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf,
msg = buf_msg(buf);
- if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
+ if (tipc_bearer_blocked(l_ptr->b_ptr)) {
if (l_ptr->retransm_queue_size == 0) {
l_ptr->retransm_queue_head = msg_seqno(msg);
l_ptr->retransm_queue_size = retransmits;
@@ -1491,7 +1509,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf,
}
return;
} else {
- /* Detect repeated retransmit failures on uncongested bearer */
+ /* Detect repeated retransmit failures on unblocked bearer */
if (l_ptr->last_retransmitted == msg_seqno(msg)) {
if (++l_ptr->stale_count > 100) {
link_retransmit_failure(l_ptr, buf);
@@ -1507,17 +1525,10 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf,
msg = buf_msg(buf);
msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
- buf = buf->next;
- retransmits--;
- l_ptr->stats.retransmitted++;
- } else {
- tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
- l_ptr->stats.bearer_congs++;
- l_ptr->retransm_queue_head = buf_seqno(buf);
- l_ptr->retransm_queue_size = retransmits;
- return;
- }
+ tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ buf = buf->next;
+ retransmits--;
+ l_ptr->stats.retransmitted++;
}
l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0;
@@ -1676,7 +1687,7 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
ackd = msg_ack(msg);
/* Release acked messages */
- if (n_ptr->bclink.supported)
+ if (n_ptr->bclink.recv_permitted)
tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg));
crs = l_ptr->first_out;
@@ -1727,9 +1738,14 @@ deliver:
tipc_link_recv_bundle(buf);
continue;
case NAME_DISTRIBUTOR:
+ n_ptr->bclink.recv_permitted = true;
tipc_node_unlock(n_ptr);
tipc_named_recv(buf);
continue;
+ case BCAST_PROTOCOL:
+ tipc_link_recv_sync(n_ptr, buf);
+ tipc_node_unlock(n_ptr);
+ continue;
case CONN_MANAGER:
tipc_node_unlock(n_ptr);
tipc_port_recv_proto_msg(buf);
@@ -1772,16 +1788,19 @@ deliver:
continue;
}
+ /* Link is not in state WORKING_WORKING */
if (msg_user(msg) == LINK_PROTOCOL) {
link_recv_proto_msg(l_ptr, buf);
head = link_insert_deferred_queue(l_ptr, head);
tipc_node_unlock(n_ptr);
continue;
}
+
+ /* Traffic message. Conditionally activate link */
link_state_event(l_ptr, TRAFFIC_MSG_EVT);
if (link_working_working(l_ptr)) {
- /* Re-insert in front of queue */
+ /* Re-insert buffer in front of queue */
buf->next = head;
head = buf;
tipc_node_unlock(n_ptr);
@@ -1972,21 +1991,13 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ,
skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
- /* Defer message if bearer is already congested */
- if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
- l_ptr->proto_msg_queue = buf;
- return;
- }
-
- /* Defer message if attempting to send results in bearer congestion */
- if (!tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
- tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
+ /* Defer message if bearer is already blocked */
+ if (tipc_bearer_blocked(l_ptr->b_ptr)) {
l_ptr->proto_msg_queue = buf;
- l_ptr->stats.bearer_congs++;
return;
}
- /* Discard message if it was sent successfully */
+ tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
l_ptr->unacked_window = 0;
kfree_skb(buf);
}
@@ -2057,7 +2068,6 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
} else {
l_ptr->max_pkt = l_ptr->max_pkt_target;
}
- l_ptr->owner->bclink.supportable = (max_pkt_info != 0);
/* Synchronize broadcast link info, if not done previously */
if (!tipc_node_is_up(l_ptr->owner)) {
@@ -2112,7 +2122,7 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
}
/* Protocol message before retransmits, reduce loss risk */
- if (l_ptr->owner->bclink.supported)
+ if (l_ptr->owner->bclink.recv_permitted)
tipc_bclink_update_link_state(l_ptr->owner,
msg_last_bcast(msg));
@@ -2937,8 +2947,8 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
s->sent_nacks, s->sent_acks, s->retransmitted);
ret += tipc_snprintf(buf + ret, buf_size - ret,
- " Congestion bearer:%u link:%u Send queue"
- " max:%u avg:%u\n", s->bearer_congs, s->link_congs,
+ " Congestion link:%u Send queue"
+ " max:%u avg:%u\n", s->link_congs,
s->max_queue_sz, s->queue_sz_counts ?
(s->accu_queue_sz / s->queue_sz_counts) : 0);
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 6e921121be06..c048ed1cbd76 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -40,9 +40,6 @@
#include "msg.h"
#include "node.h"
-#define PUSH_FAILED 1
-#define PUSH_FINISHED 2
-
/*
* Out-of-range value for link sequence numbers
*/
@@ -82,7 +79,6 @@ struct tipc_stats {
u32 recv_fragmented;
u32 recv_fragments;
u32 link_congs; /* # port sends blocked by congestion */
- u32 bearer_congs;
u32 deferred_recv;
u32 duplicates;
u32 max_queue_sz; /* send queue size high water mark */
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 55d3928dfd67..e0d08055754e 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -262,7 +262,7 @@ void tipc_named_node_up(unsigned long nodearg)
named_distribute(&message_list, node, &publ_zone, max_item_buf);
read_unlock_bh(&tipc_nametbl_lock);
- tipc_link_send_names(&message_list, (u32)node);
+ tipc_link_send_names(&message_list, node);
}
/**
diff --git a/net/tipc/node.c b/net/tipc/node.c
index d21db204e25a..48f39dd3eae8 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1,7 +1,7 @@
/*
* net/tipc/node.c: TIPC node management routines
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2012 Ericsson AB
* Copyright (c) 2005-2006, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -263,12 +263,9 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
static void node_established_contact(struct tipc_node *n_ptr)
{
tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
-
- if (n_ptr->bclink.supportable) {
- n_ptr->bclink.acked = tipc_bclink_get_last_sent();
- tipc_bclink_add_node(n_ptr->addr);
- n_ptr->bclink.supported = 1;
- }
+ n_ptr->bclink.oos_state = 0;
+ n_ptr->bclink.acked = tipc_bclink_get_last_sent();
+ tipc_bclink_add_node(n_ptr->addr);
}
static void node_name_purge_complete(unsigned long node_addr)
@@ -294,7 +291,7 @@ static void node_lost_contact(struct tipc_node *n_ptr)
tipc_addr_string_fill(addr_string, n_ptr->addr));
/* Flush broadcast link info associated with lost node */
- if (n_ptr->bclink.supported) {
+ if (n_ptr->bclink.recv_permitted) {
while (n_ptr->bclink.deferred_head) {
struct sk_buff *buf = n_ptr->bclink.deferred_head;
n_ptr->bclink.deferred_head = buf->next;
@@ -310,7 +307,7 @@ static void node_lost_contact(struct tipc_node *n_ptr)
tipc_bclink_remove_node(n_ptr->addr);
tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ);
- n_ptr->bclink.supported = 0;
+ n_ptr->bclink.recv_permitted = false;
}
/* Abort link changeover */
diff --git a/net/tipc/node.h b/net/tipc/node.h
index cfcaf4d6e480..3c189b35b102 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -67,8 +67,6 @@
* @permit_changeover: non-zero if node has redundant links to this system
* @signature: node instance identifier
* @bclink: broadcast-related info
- * @supportable: non-zero if node supports TIPC b'cast link capability
- * @supported: non-zero if node supports TIPC b'cast capability
* @acked: sequence # of last outbound b'cast message acknowledged by node
* @last_in: sequence # of last in-sequence b'cast message received from node
* @last_sent: sequence # of last b'cast message sent by node
@@ -77,6 +75,7 @@
* @deferred_head: oldest OOS b'cast message received from node
* @deferred_tail: newest OOS b'cast message received from node
* @defragm: list of partially reassembled b'cast message fragments from node
+ * @recv_permitted: true if node is allowed to receive b'cast messages
*/
struct tipc_node {
u32 addr;
@@ -92,8 +91,6 @@ struct tipc_node {
int permit_changeover;
u32 signature;
struct {
- u8 supportable;
- u8 supported;
u32 acked;
u32 last_in;
u32 last_sent;
@@ -102,6 +99,7 @@ struct tipc_node {
struct sk_buff *deferred_head;
struct sk_buff *deferred_tail;
struct sk_buff *defragm;
+ bool recv_permitted;
} bclink;
};
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index fd5f042dbff4..1a720c86e80a 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -62,6 +62,8 @@ struct tipc_sock {
static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
static void wakeupdispatch(struct tipc_port *tport);
+static void tipc_data_ready(struct sock *sk, int len);
+static void tipc_write_space(struct sock *sk);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -221,6 +223,8 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
sock_init_data(sock, sk);
sk->sk_backlog_rcv = backlog_rcv;
sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2;
+ sk->sk_data_ready = tipc_data_ready;
+ sk->sk_write_space = tipc_write_space;
tipc_sk(sk)->p = tp_ptr;
tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT;
@@ -408,7 +412,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
* socket state flags set
* ------------ ---------
* unconnected no read flags
- * no write flags
+ * POLLOUT if port is not congested
*
* connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue
* no write flags
@@ -435,9 +439,13 @@ static unsigned int poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk;
u32 mask = 0;
- poll_wait(file, sk_sleep(sk), wait);
+ sock_poll_wait(file, sk_sleep(sk), wait);
switch ((int)sock->state) {
+ case SS_UNCONNECTED:
+ if (!tipc_sk_port(sk)->congested)
+ mask |= POLLOUT;
+ break;
case SS_READY:
case SS_CONNECTED:
if (!tipc_sk_port(sk)->congested)
@@ -1126,6 +1134,39 @@ exit:
}
/**
+ * tipc_write_space - wake up thread if port congestion is released
+ * @sk: socket
+ */
+static void tipc_write_space(struct sock *sk)
+{
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
+ POLLWRNORM | POLLWRBAND);
+ rcu_read_unlock();
+}
+
+/**
+ * tipc_data_ready - wake up threads to indicate messages have been received
+ * @sk: socket
+ * @len: the length of messages
+ */
+static void tipc_data_ready(struct sock *sk, int len)
+{
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
+ POLLRDNORM | POLLRDBAND);
+ rcu_read_unlock();
+}
+
+/**
* rx_queue_full - determine if receive queue can accept another message
* @msg: message to be added to queue
* @queue_size: current size of queue
@@ -1222,8 +1263,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
tipc_disconnect_port(tipc_sk_port(sk));
}
- if (waitqueue_active(sk_sleep(sk)))
- wake_up_interruptible(sk_sleep(sk));
+ sk->sk_data_ready(sk, 0);
return TIPC_OK;
}
@@ -1290,8 +1330,7 @@ static void wakeupdispatch(struct tipc_port *tport)
{
struct sock *sk = (struct sock *)tport->usr_handle;
- if (waitqueue_active(sk_sleep(sk)))
- wake_up_interruptible(sk_sleep(sk));
+ sk->sk_write_space(sk);
}
/**
@@ -1556,10 +1595,11 @@ restart:
case SS_DISCONNECTING:
- /* Discard any unreceived messages; wake up sleeping tasks */
+ /* Discard any unreceived messages */
discard_rx_queue(sk);
- if (waitqueue_active(sk_sleep(sk)))
- wake_up_interruptible(sk_sleep(sk));
+
+ /* Wake up anyone sleeping in poll */
+ sk->sk_state_change(sk);
res = 0;
break;
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 06748f108a57..5ac19dc1d5e4 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -151,6 +151,9 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO))
goto out_nlmsg_trim;
+ if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown))
+ goto out_nlmsg_trim;
+
return nlmsg_end(skb, nlh);
out_nlmsg_trim:
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index b34b5b9792f0..8800604c93f4 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -34,6 +34,10 @@ int __net_init unix_sysctl_register(struct net *net)
if (table == NULL)
goto err_alloc;
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
+
table[0].data = &net->unx.sysctl_max_dgram_qlen;
net->unx.ctl = register_net_sysctl(net, "net/unix", table);
if (net->unx.ctl == NULL)
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index e5246fbe36c4..2906d520eea7 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -276,18 +276,16 @@ static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name)
struct crypto_comp * __percpu *tfms;
int cpu;
- /* This can be any valid CPU ID so we don't need locking. */
- cpu = raw_smp_processor_id();
list_for_each_entry(pos, &ipcomp_tfms_list, list) {
struct crypto_comp *tfm;
- tfms = pos->tfms;
- tfm = *per_cpu_ptr(tfms, cpu);
+ /* This can be any valid CPU ID so we don't need locking. */
+ tfm = __this_cpu_read(*pos->tfms);
if (!strcmp(crypto_comp_name(tfm), alg_name)) {
pos->users++;
- return tfms;
+ return pos->tfms;
}
}
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 3efb07d3eb27..765f6fe951eb 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -521,13 +521,12 @@ int xfrm_init_replay(struct xfrm_state *x)
replay_esn->bmp_len * sizeof(__u32) * 8)
return -EINVAL;
- if ((x->props.flags & XFRM_STATE_ESN) && replay_esn->replay_window == 0)
- return -EINVAL;
-
- if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn)
- x->repl = &xfrm_replay_esn;
- else
- x->repl = &xfrm_replay_bmp;
+ if (x->props.flags & XFRM_STATE_ESN) {
+ if (replay_esn->replay_window == 0)
+ return -EINVAL;
+ x->repl = &xfrm_replay_esn;
+ } else
+ x->repl = &xfrm_replay_bmp;
} else
x->repl = &xfrm_replay_legacy;
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 380976f74c4c..05a6e3d9c258 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -54,6 +54,10 @@ int __net_init xfrm_sysctl_init(struct net *net)
table[2].data = &net->xfrm.sysctl_larval_drop;
table[3].data = &net->xfrm.sysctl_acq_expires;
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
+
net->xfrm.sysctl_hdr = register_net_sysctl(net, "net/core", table);
if (!net->xfrm.sysctl_hdr)
goto out_register;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 421f98444335..eb872b2e366e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2349,7 +2349,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
link = &xfrm_dispatch[type];
/* All operations require privileges, even GET */
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||