summaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorLinus Torvalds2019-07-11 19:55:49 +0200
committerLinus Torvalds2019-07-11 19:55:49 +0200
commit237f83dfbe668443b5e31c3c7576125871cca674 (patch)
tree11848a8d0aa414a1d3ce2024e181071b1d9dea08 /net/netfilter
parentMerge tag 'clone3-v5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/brau... (diff)
parentnet/mlx5e: Return in default case statement in tx_post_resync_params (diff)
downloadkernel-qcow2-linux-237f83dfbe668443b5e31c3c7576125871cca674.tar.gz
kernel-qcow2-linux-237f83dfbe668443b5e31c3c7576125871cca674.tar.xz
kernel-qcow2-linux-237f83dfbe668443b5e31c3c7576125871cca674.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Some highlights from this development cycle: 1) Big refactoring of ipv6 route and neigh handling to support nexthop objects configurable as units from userspace. From David Ahern. 2) Convert explored_states in BPF verifier into a hash table, significantly decreased state held for programs with bpf2bpf calls, from Alexei Starovoitov. 3) Implement bpf_send_signal() helper, from Yonghong Song. 4) Various classifier enhancements to mvpp2 driver, from Maxime Chevallier. 5) Add aRFS support to hns3 driver, from Jian Shen. 6) Fix use after free in inet frags by allocating fqdirs dynamically and reworking how rhashtable dismantle occurs, from Eric Dumazet. 7) Add act_ctinfo packet classifier action, from Kevin Darbyshire-Bryant. 8) Add TFO key backup infrastructure, from Jason Baron. 9) Remove several old and unused ISDN drivers, from Arnd Bergmann. 10) Add devlink notifications for flash update status to mlxsw driver, from Jiri Pirko. 11) Lots of kTLS offload infrastructure fixes, from Jakub Kicinski. 12) Add support for mv88e6250 DSA chips, from Rasmus Villemoes. 13) Various enhancements to ipv6 flow label handling, from Eric Dumazet and Willem de Bruijn. 14) Support TLS offload in nfp driver, from Jakub Kicinski, Dirk van der Merwe, and others. 15) Various improvements to axienet driver including converting it to phylink, from Robert Hancock. 16) Add PTP support to sja1105 DSA driver, from Vladimir Oltean. 17) Add mqprio qdisc offload support to dpaa2-eth, from Ioana Radulescu. 18) Add devlink health reporting to mlx5, from Moshe Shemesh. 19) Convert stmmac over to phylink, from Jose Abreu. 20) Add PTP PHC (Physical Hardware Clock) support to mlxsw, from Shalom Toledo. 21) Add nftables SYNPROXY support, from Fernando Fernandez Mancera. 22) Convert tcp_fastopen over to use SipHash, from Ard Biesheuvel. 23) Track spill/fill of constants in BPF verifier, from Alexei Starovoitov. 24) Support bounded loops in BPF, from Alexei Starovoitov. 25) Various page_pool API fixes and improvements, from Jesper Dangaard Brouer. 26) Just like ipv4, support ref-countless ipv6 route handling. From Wei Wang. 27) Support VLAN offloading in aquantia driver, from Igor Russkikh. 28) Add AF_XDP zero-copy support to mlx5, from Maxim Mikityanskiy. 29) Add flower GRE encap/decap support to nfp driver, from Pieter Jansen van Vuuren. 30) Protect against stack overflow when using act_mirred, from John Hurley. 31) Allow devmap map lookups from eBPF, from Toke Høiland-Jørgensen. 32) Use page_pool API in netsec driver, Ilias Apalodimas. 33) Add Google gve network driver, from Catherine Sullivan. 34) More indirect call avoidance, from Paolo Abeni. 35) Add kTLS TX HW offload support to mlx5, from Tariq Toukan. 36) Add XDP_REDIRECT support to bnxt_en, from Andy Gospodarek. 37) Add MPLS manipulation actions to TC, from John Hurley. 38) Add sending a packet to connection tracking from TC actions, and then allow flower classifier matching on conntrack state. From Paul Blakey. 39) Netfilter hw offload support, from Pablo Neira Ayuso" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2080 commits) net/mlx5e: Return in default case statement in tx_post_resync_params mlx5: Return -EINVAL when WARN_ON_ONCE triggers in mlx5e_tls_resync(). net: dsa: add support for BRIDGE_MROUTER attribute pkt_sched: Include const.h net: netsec: remove static declaration for netsec_set_tx_de() net: netsec: remove superfluous if statement netfilter: nf_tables: add hardware offload support net: flow_offload: rename tc_cls_flower_offload to flow_cls_offload net: flow_offload: add flow_block_cb_is_busy() and use it net: sched: remove tcf block API drivers: net: use flow block API net: sched: use flow block API net: flow_offload: add flow_block_cb_{priv, incref, decref}() net: flow_offload: add list handling functions net: flow_offload: add flow_block_cb_alloc() and flow_block_cb_free() net: flow_offload: rename TCF_BLOCK_BINDER_TYPE_* to FLOW_BLOCK_BINDER_TYPE_* net: flow_offload: rename TC_BLOCK_{UN}BIND to FLOW_BLOCK_{UN}BIND net: flow_offload: add flow_block_cb_setup_simple() net: hisilicon: Add an tx_desc to adapt HI13X1_GMAC net: hisilicon: Add an rx_desc to adapt HI13X1_GMAC ...
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig11
-rw-r--r--net/netfilter/Makefile3
-rw-r--r--net/netfilter/core.c24
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h3
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c4
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c3
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c5
-rw-r--r--net/netfilter/ipset/ip_set_core.c97
-rw-r--r--net/netfilter/ipset/ip_set_getport.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h5
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c3
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c131
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c88
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c134
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c215
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c9
-rw-r--r--net/netfilter/nf_conntrack_core.c25
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c7
-rw-r--r--net/netfilter/nf_conntrack_proto.c126
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c2
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c4
-rw-r--r--net/netfilter/nf_flow_table_core.c1
-rw-r--r--net/netfilter/nf_log.c2
-rw-r--r--net/netfilter/nf_nat_helper.c4
-rw-r--r--net/netfilter/nf_nat_proto.c26
-rw-r--r--net/netfilter/nf_nat_redirect.c12
-rw-r--r--net/netfilter/nf_nat_sip.c2
-rw-r--r--net/netfilter/nf_queue.c14
-rw-r--r--net/netfilter/nf_synproxy_core.c898
-rw-r--r--net/netfilter/nf_tables_api.c127
-rw-r--r--net/netfilter/nf_tables_core.c1
-rw-r--r--net/netfilter/nf_tables_offload.c267
-rw-r--r--net/netfilter/nfnetlink_osf.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_cmp.c53
-rw-r--r--net/netfilter/nft_ct.c142
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_exthdr.c136
-rw-r--r--net/netfilter/nft_immediate.c31
-rw-r--r--net/netfilter/nft_meta.c112
-rw-r--r--net/netfilter/nft_payload.c193
-rw-r--r--net/netfilter/nft_synproxy.c287
-rw-r--r--net/netfilter/utils.c5
-rw-r--r--net/netfilter/xt_DSCP.c8
-rw-r--r--net/netfilter/xt_HL.c4
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c28
-rw-r--r--net/netfilter/xt_iprange.c4
-rw-r--r--net/netfilter/xt_owner.c26
-rw-r--r--net/netfilter/xt_set.c45
69 files changed, 2909 insertions, 512 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 1837734ce85b..32a45c03786e 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -651,6 +651,17 @@ config NFT_TPROXY
help
This makes transparent proxy support available in nftables.
+config NFT_SYNPROXY
+ tristate "Netfilter nf_tables SYNPROXY expression support"
+ depends on NF_CONNTRACK && NETFILTER_ADVANCED
+ select NETFILTER_SYNPROXY
+ select SYN_COOKIES
+ help
+ The SYNPROXY expression allows you to intercept TCP connections and
+ establish them using syncookies before they are passed on to the
+ server. This allows to avoid conntrack and server resource usage
+ during SYN-flood attacks.
+
if NF_TABLES_NETDEV
config NF_DUP_NETDEV
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 72cca6b48960..9270a7fae484 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -78,7 +78,7 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \
- nft_chain_route.o
+ nft_chain_route.o nf_tables_offload.o
nf_tables_set-objs := nf_tables_set_core.o \
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o
@@ -110,6 +110,7 @@ obj-$(CONFIG_NFT_SOCKET) += nft_socket.o
obj-$(CONFIG_NFT_OSF) += nft_osf.o
obj-$(CONFIG_NFT_TPROXY) += nft_tproxy.o
obj-$(CONFIG_NFT_XFRM) += nft_xfrm.o
+obj-$(CONFIG_NFT_SYNPROXY) += nft_synproxy.o
obj-$(CONFIG_NFT_NAT) += nft_chain_nat.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b96fd3f54705..5d5bdf450091 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -520,7 +520,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
ret = -EPERM;
return ret;
case NF_QUEUE:
- ret = nf_queue(skb, state, e, s, verdict);
+ ret = nf_queue(skb, state, s, verdict);
if (ret == 1)
continue;
return ret;
@@ -536,28 +536,6 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
}
EXPORT_SYMBOL(nf_hook_slow);
-
-int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
-{
- if (writable_len > skb->len)
- return 0;
-
- /* Not exclusive use of packet? Must copy. */
- if (!skb_cloned(skb)) {
- if (writable_len <= skb_headlen(skb))
- return 1;
- } else if (skb_clone_writable(skb, writable_len))
- return 1;
-
- if (writable_len <= skb_headlen(skb))
- writable_len = 0;
- else
- writable_len -= skb_headlen(skb);
-
- return !!__pskb_pull_tail(skb, writable_len);
-}
-EXPORT_SYMBOL(skb_make_writable);
-
/* This needs to be compiled in any case to avoid dependencies between the
* nfnetlink_queue code and nf_conntrack.
*/
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 8acc4e173167..063df74b4647 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -1,6 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
#ifndef __IP_SET_BITMAP_IP_GEN_H
#define __IP_SET_BITMAP_IP_GEN_H
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index e3884b0cca91..11ff9d4a7006 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
* Patrick Schaaf <bof@bof.de>
- * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
*/
/* Kernel module implementing an IP set type: the bitmap:ip type */
@@ -28,7 +28,7 @@
#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_bitmap:ip");
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b73c37b3a791..ca7ac4a25ada 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -2,7 +2,6 @@
/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
* Patrick Schaaf <bof@bof.de>
* Martin Josefsson <gandalf@wlug.westbo.se>
- * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*/
/* Kernel module implementing an IP set type: the bitmap:ip,mac type */
@@ -28,7 +27,7 @@
#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_bitmap:ip,mac");
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index d8c140553379..704a0dda1609 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the bitmap:port type */
@@ -23,7 +22,7 @@
#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_bitmap:port");
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 3cdf171cd468..2e151856ad99 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
* Patrick Schaaf <bof@bof.de>
- * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
*/
/* Kernel module for IP set management */
@@ -48,7 +48,7 @@ static unsigned int max_sets;
module_param(max_sets, int, 0600);
MODULE_PARM_DESC(max_sets, "maximal number of sets");
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
MODULE_DESCRIPTION("core IP set support");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
@@ -1290,11 +1290,13 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
struct nlattr *attr = (void *)nlh + min_len;
u32 dump_type;
ip_set_id_t index;
+ int ret;
- /* Second pass, so parser can't fail */
- nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, attr,
- nlh->nlmsg_len - min_len, ip_set_setname_policy,
- NULL);
+ ret = nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, attr,
+ nlh->nlmsg_len - min_len,
+ ip_set_setname_policy, NULL);
+ if (ret)
+ return ret;
cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]);
if (cda[IPSET_ATTR_SETNAME]) {
@@ -1541,10 +1543,14 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
cmdattr = (void *)&errmsg->msg + min_len;
- nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, cmdattr,
- nlh->nlmsg_len - min_len,
- ip_set_adt_policy, NULL);
+ ret = nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, cmdattr,
+ nlh->nlmsg_len - min_len,
+ ip_set_adt_policy, NULL);
+ if (ret) {
+ nlmsg_free(skb2);
+ return ret;
+ }
errline = nla_data(cda[IPSET_ATTR_LINENO]);
*errline = lineno;
@@ -1558,10 +1564,12 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
return ret;
}
-static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_ad(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ enum ipset_adt adt,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set;
@@ -1590,18 +1598,17 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (attr[IPSET_ATTR_DATA]) {
if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
- ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
+ ret = call_ad(ctnl, skb, set, tb, adt, flags,
use_lineno);
} else {
int nla_rem;
nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
- memset(tb, 0, sizeof(tb));
if (nla_type(nla) != IPSET_ATTR_DATA ||
!flag_nested(nla) ||
nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
- ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
+ ret = call_ad(ctnl, skb, set, tb, adt,
flags, use_lineno);
if (ret < 0)
return ret;
@@ -1610,56 +1617,22 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
return ret;
}
-static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int ip_set_uadd(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
struct netlink_ext_ack *extack)
{
- struct ip_set_net *inst = ip_set_pernet(net);
- struct ip_set *set;
- struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
- const struct nlattr *nla;
- u32 flags = flag_exist(nlh);
- bool use_lineno;
- int ret = 0;
-
- if (unlikely(protocol_min_failed(attr) ||
- !attr[IPSET_ATTR_SETNAME] ||
- !((attr[IPSET_ATTR_DATA] != NULL) ^
- (attr[IPSET_ATTR_ADT] != NULL)) ||
- (attr[IPSET_ATTR_DATA] &&
- !flag_nested(attr[IPSET_ATTR_DATA])) ||
- (attr[IPSET_ATTR_ADT] &&
- (!flag_nested(attr[IPSET_ATTR_ADT]) ||
- !attr[IPSET_ATTR_LINENO]))))
- return -IPSET_ERR_PROTOCOL;
-
- set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (!set)
- return -ENOENT;
-
- use_lineno = !!attr[IPSET_ATTR_LINENO];
- if (attr[IPSET_ATTR_DATA]) {
- if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL))
- return -IPSET_ERR_PROTOCOL;
- ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
- use_lineno);
- } else {
- int nla_rem;
+ return ip_set_ad(net, ctnl, skb,
+ IPSET_ADD, nlh, attr, extack);
+}
- nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
- memset(tb, 0, sizeof(*tb));
- if (nla_type(nla) != IPSET_ATTR_DATA ||
- !flag_nested(nla) ||
- nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL))
- return -IPSET_ERR_PROTOCOL;
- ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
- flags, use_lineno);
- if (ret < 0)
- return ret;
- }
- }
- return ret;
+static int ip_set_udel(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
+{
+ return ip_set_ad(net, ctnl, skb,
+ IPSET_DEL, nlh, attr, extack);
}
static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 2384e36aef5c..2b8f959574b4 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -1,5 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
*/
/* Get Layer-4 data from the packets */
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 10f619625abd..0feb77fa9edc 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1,6 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
#ifndef _IP_SET_HASH_GEN_H
#define _IP_SET_HASH_GEN_H
@@ -622,7 +621,7 @@ retry:
goto cleanup;
}
m->size = AHASH_INIT_SIZE;
- extsize = ext_size(AHASH_INIT_SIZE, dsize);
+ extsize += ext_size(AHASH_INIT_SIZE, dsize);
RCU_INIT_POINTER(hbucket(t, key), m);
} else if (m->pos >= m->size) {
struct hbucket *ht;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 69d7576be2e6..f4432d9fcad0 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the hash:ip type */
@@ -27,7 +26,7 @@
#define IPSET_TYPE_REV_MAX 4 /* skbinfo support */
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:ip");
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 6fe1ec0d2154..7a1734aad0c5 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- * Copyright (C) 2013 Smoothwall Ltd. <vytas.dauksa@smoothwall.net>
- */
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the hash:ip,mark type */
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 74ec7e097e34..32e240658334 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the hash:ip,port type */
@@ -29,7 +28,7 @@
#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:ip,port");
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index ced57d63b01f..15d419353179 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the hash:ip,port,ip type */
@@ -29,7 +28,7 @@
#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:ip,port,ip");
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 905f6cf0f55e..7a4d7afd4121 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the hash:ip,port,net type */
@@ -31,7 +30,7 @@
#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:ip,port,net");
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index 853e772ab4d9..d94c585d33c5 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the hash:mac type */
@@ -20,7 +19,7 @@
#define IPSET_TYPE_REV_MAX 0
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("hash:mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:mac");
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 06c91e49bf25..c259cbc3ef45 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the hash:net type */
@@ -28,7 +27,7 @@
#define IPSET_TYPE_REV_MAX 6 /* skbinfo mapping support added */
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:net");
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 0a8cbcdfb42b..87b29f971226 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the hash:net,iface type */
@@ -29,7 +28,7 @@
#define IPSET_TYPE_REV_MAX 6 /* skbinfo support added */
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:net,iface");
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 832e4f5491cb..a3ae69bfee66 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
* Copyright (C) 2013 Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
*/
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index a4f3f15b874a..799f2272cc65 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the hash:net,port type */
@@ -30,7 +29,7 @@
#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:net,port");
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index e54d415405f3..a82b70e8b9a6 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the hash:ip,port,net type */
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 8ada318bf09d..6f9ead6319e0 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
+/* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
/* Kernel module implementing an IP set type: the list:set type */
@@ -19,7 +18,7 @@
#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_list:set");
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index bfd4365a8d73..4515056ef1c2 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -358,7 +358,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
struct tcphdr *th;
__u32 seq;
- if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+ if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
return 0;
th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
@@ -435,7 +435,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
struct tcphdr *th;
__u32 seq;
- if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+ if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
return 0;
th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 7138556b206b..46f06f92ab8f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -34,6 +34,8 @@
#include <net/tcp.h>
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
+#include <net/gue.h>
+#include <net/gre.h>
#include <net/route.h>
#include <net/ip6_checksum.h>
#include <net/netns/generic.h> /* net_generic() */
@@ -892,7 +894,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
IPPROTO_SCTP == protocol)
offset += 2 * sizeof(__u16);
- if (!skb_make_writable(skb, offset))
+ if (skb_ensure_writable(skb, offset))
goto out;
#ifdef CONFIG_IP_VS_IPV6
@@ -1282,7 +1284,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
- if (!skb_make_writable(skb, iph->len))
+ if (skb_ensure_writable(skb, iph->len))
goto drop;
/* mangle the packet */
@@ -1574,6 +1576,73 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 1;
}
+/* Check the UDP tunnel and return its header length */
+static int ipvs_udp_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ unsigned int offset, __u16 af,
+ const union nf_inet_addr *daddr, __u8 *proto)
+{
+ struct udphdr _udph, *udph;
+ struct ip_vs_dest *dest;
+
+ udph = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+ if (!udph)
+ goto unk;
+ offset += sizeof(struct udphdr);
+ dest = ip_vs_find_tunnel(ipvs, af, daddr, udph->dest);
+ if (!dest)
+ goto unk;
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ struct guehdr _gueh, *gueh;
+
+ gueh = skb_header_pointer(skb, offset, sizeof(_gueh), &_gueh);
+ if (!gueh)
+ goto unk;
+ if (gueh->control != 0 || gueh->version != 0)
+ goto unk;
+ /* Later we can support also IPPROTO_IPV6 */
+ if (gueh->proto_ctype != IPPROTO_IPIP)
+ goto unk;
+ *proto = gueh->proto_ctype;
+ return sizeof(struct udphdr) + sizeof(struct guehdr) +
+ (gueh->hlen << 2);
+ }
+
+unk:
+ return 0;
+}
+
+/* Check the GRE tunnel and return its header length */
+static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ unsigned int offset, __u16 af,
+ const union nf_inet_addr *daddr, __u8 *proto)
+{
+ struct gre_base_hdr _greh, *greh;
+ struct ip_vs_dest *dest;
+
+ greh = skb_header_pointer(skb, offset, sizeof(_greh), &_greh);
+ if (!greh)
+ goto unk;
+ dest = ip_vs_find_tunnel(ipvs, af, daddr, 0);
+ if (!dest)
+ goto unk;
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ __be16 type;
+
+ /* Only support version 0 and C (csum) */
+ if ((greh->flags & ~GRE_CSUM) != 0)
+ goto unk;
+ type = greh->protocol;
+ /* Later we can support also IPPROTO_IPV6 */
+ if (type != htons(ETH_P_IP))
+ goto unk;
+ *proto = IPPROTO_IPIP;
+ return gre_calc_hlen(gre_flags_to_tnl_flags(greh->flags));
+ }
+
+unk:
+ return 0;
+}
+
/*
* Handle ICMP messages in the outside-to-inside direction (incoming).
* Find any that might be relevant, check against existing connections,
@@ -1593,6 +1662,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
struct ip_vs_proto_data *pd;
unsigned int offset, offset2, ihl, verdict;
bool ipip, new_cp = false;
+ union nf_inet_addr *raddr;
*related = 1;
@@ -1631,20 +1701,56 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
+ raddr = (union nf_inet_addr *)&cih->daddr;
/* Special case for errors for IPIP packets */
ipip = false;
if (cih->protocol == IPPROTO_IPIP) {
+ struct ip_vs_dest *dest;
+
if (unlikely(cih->frag_off & htons(IP_OFFSET)))
return NF_ACCEPT;
/* Error for our IPIP must arrive at LOCAL_IN */
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
return NF_ACCEPT;
+ dest = ip_vs_find_tunnel(ipvs, AF_INET, raddr, 0);
+ /* Only for known tunnel */
+ if (!dest || dest->tun_type != IP_VS_CONN_F_TUNNEL_TYPE_IPIP)
+ return NF_ACCEPT;
offset += cih->ihl * 4;
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
ipip = true;
+ } else if ((cih->protocol == IPPROTO_UDP || /* Can be UDP encap */
+ cih->protocol == IPPROTO_GRE) && /* Can be GRE encap */
+ /* Error for our tunnel must arrive at LOCAL_IN */
+ (skb_rtable(skb)->rt_flags & RTCF_LOCAL)) {
+ __u8 iproto;
+ int ulen;
+
+ /* Non-first fragment has no UDP header */
+ if (unlikely(cih->frag_off & htons(IP_OFFSET)))
+ return NF_ACCEPT;
+ offset2 = offset + cih->ihl * 4;
+ if (cih->protocol == IPPROTO_UDP)
+ ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET,
+ raddr, &iproto);
+ else
+ ulen = ipvs_gre_decap(ipvs, skb, offset2, AF_INET,
+ raddr, &iproto);
+ if (ulen > 0) {
+ /* Skip IP and UDP/GRE tunnel headers */
+ offset = offset2 + ulen;
+ /* Now we should be at the original IP header */
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph),
+ &_ciph);
+ if (cih && cih->version == 4 && cih->ihl >= 5 &&
+ iproto == IPPROTO_IPIP)
+ ipip = true;
+ else
+ return NF_ACCEPT;
+ }
}
pd = ip_vs_proto_data_get(ipvs, cih->protocol);
@@ -2245,7 +2351,6 @@ static const struct nf_hook_ops ip_vs_ops[] = {
static int __net_init __ip_vs_init(struct net *net)
{
struct netns_ipvs *ipvs;
- int ret;
ipvs = net_generic(net, ip_vs_net_id);
if (ipvs == NULL)
@@ -2277,17 +2382,11 @@ static int __net_init __ip_vs_init(struct net *net)
if (ip_vs_sync_net_init(ipvs) < 0)
goto sync_fail;
- ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
- if (ret < 0)
- goto hook_fail;
-
return 0;
/*
* Error handling
*/
-hook_fail:
- ip_vs_sync_net_cleanup(ipvs);
sync_fail:
ip_vs_conn_net_cleanup(ipvs);
conn_fail:
@@ -2317,6 +2416,19 @@ static void __net_exit __ip_vs_cleanup(struct net *net)
net->ipvs = NULL;
}
+static int __net_init __ip_vs_dev_init(struct net *net)
+{
+ int ret;
+
+ ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+ if (ret < 0)
+ goto hook_fail;
+ return 0;
+
+hook_fail:
+ return ret;
+}
+
static void __net_exit __ip_vs_dev_cleanup(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -2336,6 +2448,7 @@ static struct pernet_operations ipvs_core_ops = {
};
static struct pernet_operations ipvs_core_dev_ops = {
+ .init = __ip_vs_dev_init,
.exit = __ip_vs_dev_cleanup,
};
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 776c87ed4813..07e0967bf129 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -510,15 +510,37 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{
unsigned int hash;
+ __be16 port;
if (dest->in_rs_table)
return;
+ switch (IP_VS_DFWD_METHOD(dest)) {
+ case IP_VS_CONN_F_MASQ:
+ port = dest->port;
+ break;
+ case IP_VS_CONN_F_TUNNEL:
+ switch (dest->tun_type) {
+ case IP_VS_CONN_F_TUNNEL_TYPE_GUE:
+ port = dest->tun_port;
+ break;
+ case IP_VS_CONN_F_TUNNEL_TYPE_IPIP:
+ case IP_VS_CONN_F_TUNNEL_TYPE_GRE:
+ port = 0;
+ break;
+ default:
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+
/*
* Hash by proto,addr,port,
* which are the parameters of the real service.
*/
- hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
+ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port);
hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
dest->in_rs_table = 1;
@@ -550,7 +572,8 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
- (dest->protocol == protocol || dest->vfwmark)) {
+ (dest->protocol == protocol || dest->vfwmark) &&
+ IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
/* HIT */
return true;
}
@@ -580,7 +603,37 @@ struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
- (dest->protocol == protocol || dest->vfwmark)) {
+ (dest->protocol == protocol || dest->vfwmark) &&
+ IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
+ /* HIT */
+ return dest;
+ }
+ }
+
+ return NULL;
+}
+
+/* Find real service record by <af,addr,tun_port>.
+ * In case of multiple records with the same <af,addr,tun_port>, only
+ * the first found record is returned.
+ *
+ * To be called under RCU lock.
+ */
+struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
+ const union nf_inet_addr *daddr,
+ __be16 tun_port)
+{
+ struct ip_vs_dest *dest;
+ unsigned int hash;
+
+ /* Check for "full" addressed entries */
+ hash = ip_vs_rs_hashkey(af, daddr, tun_port);
+
+ hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+ if (dest->tun_port == tun_port &&
+ dest->af == af &&
+ ip_vs_addr_equal(af, &dest->addr, daddr) &&
+ IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) {
/* HIT */
return dest;
}
@@ -826,24 +879,29 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
conn_flags |= IP_VS_CONN_F_INACTIVE;
+ /* Need to rehash? */
+ if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) !=
+ IP_VS_DFWD_METHOD(dest) ||
+ udest->tun_type != dest->tun_type ||
+ udest->tun_port != dest->tun_port)
+ ip_vs_rs_unhash(dest);
+
/* set the tunnel info */
dest->tun_type = udest->tun_type;
dest->tun_port = udest->tun_port;
+ dest->tun_flags = udest->tun_flags;
/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
conn_flags |= IP_VS_CONN_F_NOOUTPUT;
} else {
- /*
- * Put the real service in rs_table if not present.
- * For now only for NAT!
- */
- ip_vs_rs_hash(ipvs, dest);
/* FTP-NAT requires conntrack for mangling */
if (svc->port == FTPPORT)
ip_vs_register_conntrack(svc);
}
atomic_set(&dest->conn_flags, conn_flags);
+ /* Put the real service in rs_table if not present. */
+ ip_vs_rs_hash(ipvs, dest);
/* bind the service */
old_svc = rcu_dereference_protected(dest->svc, 1);
@@ -2396,9 +2454,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
cfg.syncid = dm->syncid;
ret = start_sync_thread(ipvs, &cfg, dm->state);
} else {
- mutex_lock(&ipvs->sync_mutex);
ret = stop_sync_thread(ipvs, dm->state);
- mutex_unlock(&ipvs->sync_mutex);
}
goto out_dec;
}
@@ -2906,6 +2962,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
[IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
[IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 },
[IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 },
+ [IPVS_DEST_ATTR_TUN_FLAGS] = { .type = NLA_U16 },
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
@@ -3212,6 +3269,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
dest->tun_type) ||
nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
dest->tun_port) ||
+ nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS,
+ dest->tun_flags) ||
nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
@@ -3332,7 +3391,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
- *nla_l_thresh, *nla_tun_type, *nla_tun_port;
+ *nla_l_thresh, *nla_tun_type, *nla_tun_port,
+ *nla_tun_flags;
nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
@@ -3340,6 +3400,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE];
nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT];
+ nla_tun_flags = attrs[IPVS_DEST_ATTR_TUN_FLAGS];
if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
return -EINVAL;
@@ -3355,6 +3416,9 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
if (nla_tun_port)
udest->tun_port = nla_get_be16(nla_tun_port);
+
+ if (nla_tun_flags)
+ udest->tun_flags = nla_get_u16(nla_tun_flags);
}
return 0;
@@ -3515,10 +3579,8 @@ static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL;
- mutex_lock(&ipvs->sync_mutex);
ret = stop_sync_thread(ipvs,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
- mutex_unlock(&ipvs->sync_mutex);
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index c244b2545e24..cf925906f59b 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -267,7 +267,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1;
/* Linear packets are much easier to deal with. */
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return 0;
if (cp->app_data == (void *) IP_VS_FTP_PASV) {
@@ -433,7 +433,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1;
/* Linear packets are much easier to deal with. */
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return 0;
data = data_start = ip_vs_ftp_data_ptr(skb, ipvsh);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index b58ddb7dffd1..a0921adc31a9 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -101,7 +101,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
#endif
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+ if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
return 0;
if (unlikely(cp->app != NULL)) {
@@ -148,7 +148,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
#endif
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+ if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
return 0;
if (unlikely(cp->app != NULL)) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 915ac8206076..000d961b97e4 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -159,7 +159,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+ if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
@@ -237,7 +237,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+ if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 379140075e95..153d89647c87 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -148,7 +148,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+ if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
@@ -231,7 +231,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+ if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 2526be6b3d90..a4a78c4b06de 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -195,6 +195,7 @@ union ip_vs_sync_conn {
#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
struct ip_vs_sync_thread_data {
+ struct task_struct *task;
struct netns_ipvs *ipvs;
struct socket *sock;
char *buf;
@@ -374,8 +375,11 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs,
max(IPVS_SYNC_SEND_DELAY, 1));
ms->sync_queue_len++;
list_add_tail(&sb->list, &ms->sync_queue);
- if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
- wake_up_process(ms->master_thread);
+ if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) {
+ int id = (int)(ms - ipvs->ms);
+
+ wake_up_process(ipvs->master_tinfo[id].task);
+ }
} else
ip_vs_sync_buff_release(sb);
spin_unlock(&ipvs->sync_lock);
@@ -1636,8 +1640,10 @@ static void master_wakeup_work_handler(struct work_struct *work)
spin_lock_bh(&ipvs->sync_lock);
if (ms->sync_queue_len &&
ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
+ int id = (int)(ms - ipvs->ms);
+
ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
- wake_up_process(ms->master_thread);
+ wake_up_process(ipvs->master_tinfo[id].task);
}
spin_unlock_bh(&ipvs->sync_lock);
}
@@ -1703,10 +1709,6 @@ done:
if (sb)
ip_vs_sync_buff_release(sb);
- /* release the sending multicast socket */
- sock_release(tinfo->sock);
- kfree(tinfo);
-
return 0;
}
@@ -1740,11 +1742,6 @@ static int sync_thread_backup(void *data)
}
}
- /* release the sending multicast socket */
- sock_release(tinfo->sock);
- kfree(tinfo->buf);
- kfree(tinfo);
-
return 0;
}
@@ -1752,8 +1749,8 @@ static int sync_thread_backup(void *data)
int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
int state)
{
- struct ip_vs_sync_thread_data *tinfo = NULL;
- struct task_struct **array = NULL, *task;
+ struct ip_vs_sync_thread_data *ti = NULL, *tinfo;
+ struct task_struct *task;
struct net_device *dev;
char *name;
int (*threadfn)(void *data);
@@ -1822,7 +1819,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
threadfn = sync_thread_master;
} else if (state == IP_VS_STATE_BACKUP) {
result = -EEXIST;
- if (ipvs->backup_threads)
+ if (ipvs->backup_tinfo)
goto out_early;
ipvs->bcfg = *c;
@@ -1849,28 +1846,22 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
master_wakeup_work_handler);
ms->ipvs = ipvs;
}
- } else {
- array = kcalloc(count, sizeof(struct task_struct *),
- GFP_KERNEL);
- result = -ENOMEM;
- if (!array)
- goto out;
}
+ result = -ENOMEM;
+ ti = kcalloc(count, sizeof(struct ip_vs_sync_thread_data),
+ GFP_KERNEL);
+ if (!ti)
+ goto out;
for (id = 0; id < count; id++) {
- result = -ENOMEM;
- tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
- if (!tinfo)
- goto out;
+ tinfo = &ti[id];
tinfo->ipvs = ipvs;
- tinfo->sock = NULL;
if (state == IP_VS_STATE_BACKUP) {
+ result = -ENOMEM;
tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
GFP_KERNEL);
if (!tinfo->buf)
goto out;
- } else {
- tinfo->buf = NULL;
}
tinfo->id = id;
if (state == IP_VS_STATE_MASTER)
@@ -1885,17 +1876,15 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
result = PTR_ERR(task);
goto out;
}
- tinfo = NULL;
- if (state == IP_VS_STATE_MASTER)
- ipvs->ms[id].master_thread = task;
- else
- array[id] = task;
+ tinfo->task = task;
}
/* mark as active */
- if (state == IP_VS_STATE_BACKUP)
- ipvs->backup_threads = array;
+ if (state == IP_VS_STATE_MASTER)
+ ipvs->master_tinfo = ti;
+ else
+ ipvs->backup_tinfo = ti;
spin_lock_bh(&ipvs->sync_buff_lock);
ipvs->sync_state |= state;
spin_unlock_bh(&ipvs->sync_buff_lock);
@@ -1910,29 +1899,31 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
out:
/* We do not need RTNL lock anymore, release it here so that
- * sock_release below and in the kthreads can use rtnl_lock
- * to leave the mcast group.
+ * sock_release below can use rtnl_lock to leave the mcast group.
*/
rtnl_unlock();
- count = id;
- while (count-- > 0) {
- if (state == IP_VS_STATE_MASTER)
- kthread_stop(ipvs->ms[count].master_thread);
- else
- kthread_stop(array[count]);
+ id = min(id, count - 1);
+ if (ti) {
+ for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+ if (tinfo->task)
+ kthread_stop(tinfo->task);
+ }
}
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
kfree(ipvs->ms);
ipvs->ms = NULL;
}
mutex_unlock(&ipvs->sync_mutex);
- if (tinfo) {
- if (tinfo->sock)
- sock_release(tinfo->sock);
- kfree(tinfo->buf);
- kfree(tinfo);
+
+ /* No more mutexes, release socks */
+ if (ti) {
+ for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+ if (tinfo->sock)
+ sock_release(tinfo->sock);
+ kfree(tinfo->buf);
+ }
+ kfree(ti);
}
- kfree(array);
return result;
out_early:
@@ -1944,15 +1935,18 @@ out_early:
int stop_sync_thread(struct netns_ipvs *ipvs, int state)
{
- struct task_struct **array;
+ struct ip_vs_sync_thread_data *ti, *tinfo;
int id;
int retc = -EINVAL;
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
+ mutex_lock(&ipvs->sync_mutex);
if (state == IP_VS_STATE_MASTER) {
+ retc = -ESRCH;
if (!ipvs->ms)
- return -ESRCH;
+ goto err;
+ ti = ipvs->master_tinfo;
/*
* The lock synchronizes with sb_queue_tail(), so that we don't
@@ -1971,38 +1965,56 @@ int stop_sync_thread(struct netns_ipvs *ipvs, int state)
struct ipvs_master_sync_state *ms = &ipvs->ms[id];
int ret;
+ tinfo = &ti[id];
pr_info("stopping master sync thread %d ...\n",
- task_pid_nr(ms->master_thread));
+ task_pid_nr(tinfo->task));
cancel_delayed_work_sync(&ms->master_wakeup_work);
- ret = kthread_stop(ms->master_thread);
+ ret = kthread_stop(tinfo->task);
if (retc >= 0)
retc = ret;
}
kfree(ipvs->ms);
ipvs->ms = NULL;
+ ipvs->master_tinfo = NULL;
} else if (state == IP_VS_STATE_BACKUP) {
- if (!ipvs->backup_threads)
- return -ESRCH;
+ retc = -ESRCH;
+ if (!ipvs->backup_tinfo)
+ goto err;
+ ti = ipvs->backup_tinfo;
ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
- array = ipvs->backup_threads;
retc = 0;
for (id = ipvs->threads_mask; id >= 0; id--) {
int ret;
+ tinfo = &ti[id];
pr_info("stopping backup sync thread %d ...\n",
- task_pid_nr(array[id]));
- ret = kthread_stop(array[id]);
+ task_pid_nr(tinfo->task));
+ ret = kthread_stop(tinfo->task);
if (retc >= 0)
retc = ret;
}
- kfree(array);
- ipvs->backup_threads = NULL;
+ ipvs->backup_tinfo = NULL;
+ } else {
+ goto err;
}
+ id = ipvs->threads_mask;
+ mutex_unlock(&ipvs->sync_mutex);
+
+ /* No more mutexes, release socks */
+ for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+ if (tinfo->sock)
+ sock_release(tinfo->sock);
+ kfree(tinfo->buf);
+ }
+ kfree(ti);
/* decrease the module use count */
ip_vs_use_count_dec();
+ return retc;
+err:
+ mutex_unlock(&ipvs->sync_mutex);
return retc;
}
@@ -2021,7 +2033,6 @@ void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
{
int retc;
- mutex_lock(&ipvs->sync_mutex);
retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Master Daemon\n");
@@ -2029,5 +2040,4 @@ void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Backup Daemon\n");
- mutex_unlock(&ipvs->sync_mutex);
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index e101eda05d55..9c464d24beec 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -29,6 +29,7 @@
#include <linux/tcp.h> /* for tcphdr */
#include <net/ip.h>
#include <net/gue.h>
+#include <net/gre.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
@@ -36,6 +37,7 @@
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip_tunnels.h>
+#include <net/ip6_checksum.h>
#include <net/addrconf.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
@@ -275,7 +277,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
}
/* don't propagate ttl change to cloned packets */
- if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
return false;
ipv6_hdr(skb)->hop_limit--;
@@ -290,7 +292,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
}
/* don't propagate ttl change to cloned packets */
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return false;
/* Decrease ttl */
@@ -381,8 +383,19 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (!dest)
goto err_put;
- if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
+ if ((dest->tun_flags &
+ IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL)
+ mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ __be16 tflags = 0;
+
+ if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ tflags |= TUNNEL_CSUM;
+ mtu -= gre_calc_hlen(tflags);
+ }
if (mtu < 68) {
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto err_put;
@@ -536,8 +549,19 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
if (!dest)
goto err_put;
- if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
+ if ((dest->tun_flags &
+ IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL)
+ mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ __be16 tflags = 0;
+
+ if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ tflags |= TUNNEL_CSUM;
+ mtu -= gre_calc_hlen(tflags);
+ }
if (mtu < IPV6_MIN_MTU) {
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
IPV6_MIN_MTU);
@@ -792,7 +816,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -881,7 +905,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -1002,17 +1026,56 @@ ipvs_gue_encap(struct net *net, struct sk_buff *skb,
__be16 sport = udp_flow_src_port(net, skb, 0, 0, false);
struct udphdr *udph; /* Our new UDP header */
struct guehdr *gueh; /* Our new GUE header */
+ size_t hdrlen, optlen = 0;
+ void *data;
+ bool need_priv = false;
+
+ if ((cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ need_priv = true;
+ }
- skb_push(skb, sizeof(struct guehdr));
+ hdrlen = sizeof(struct guehdr) + optlen;
+
+ skb_push(skb, hdrlen);
gueh = (struct guehdr *)skb->data;
gueh->control = 0;
gueh->version = 0;
- gueh->hlen = 0;
+ gueh->hlen = optlen >> 2;
gueh->flags = 0;
gueh->proto_ctype = *next_protocol;
+ data = &gueh[1];
+
+ if (need_priv) {
+ __be32 *flags = data;
+ u16 csum_start = skb_checksum_start_offset(skb);
+ __be16 *pd;
+
+ gueh->flags |= GUE_FLAG_PRIV;
+ *flags = 0;
+ data += GUE_LEN_PRIV;
+
+ if (csum_start < hdrlen)
+ return -EINVAL;
+
+ csum_start -= hdrlen;
+ pd = data;
+ pd[0] = htons(csum_start);
+ pd[1] = htons(csum_start + skb->csum_offset);
+
+ if (!skb_is_gso(skb)) {
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->encapsulation = 0;
+ }
+
+ *flags |= GUE_PFLAG_REMCSUM;
+ data += GUE_PLEN_REMCSUM;
+ }
+
skb_push(skb, sizeof(struct udphdr));
skb_reset_transport_header(skb);
@@ -1029,6 +1092,24 @@ ipvs_gue_encap(struct net *net, struct sk_buff *skb,
return 0;
}
+static void
+ipvs_gre_encap(struct net *net, struct sk_buff *skb,
+ struct ip_vs_conn *cp, __u8 *next_protocol)
+{
+ __be16 proto = *next_protocol == IPPROTO_IPIP ?
+ htons(ETH_P_IP) : htons(ETH_P_IPV6);
+ __be16 tflags = 0;
+ size_t hdrlen;
+
+ if (cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ tflags |= TUNNEL_CSUM;
+
+ hdrlen = gre_calc_hlen(tflags);
+ gre_build_header(skb, hdrlen, tflags, proto, 0, 0);
+
+ *next_protocol = IPPROTO_GRE;
+}
+
/*
* IP Tunneling transmitter
*
@@ -1066,6 +1147,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
int tun_type, gso_type;
+ int tun_flags;
EnterFunction(10);
@@ -1088,9 +1170,28 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
tun_type = cp->dest->tun_type;
+ tun_flags = cp->dest->tun_flags;
+
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ size_t gue_hdrlen, gue_optlen = 0;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ }
+ gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
+
+ max_headroom += sizeof(struct udphdr) + gue_hdrlen;
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ size_t gre_hdrlen;
+ __be16 tflags = 0;
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);
+ if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ tflags |= TUNNEL_CSUM;
+ gre_hdrlen = gre_calc_hlen(tflags);
+
+ max_headroom += gre_hdrlen;
+ }
/* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
@@ -1101,8 +1202,22 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
gso_type = __tun_gso_type_mask(AF_INET, cp->af);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- gso_type |= SKB_GSO_UDP_TUNNEL;
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ else
+ gso_type |= SKB_GSO_UDP_TUNNEL;
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+ }
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ gso_type |= SKB_GSO_GRE_CSUM;
+ else
+ gso_type |= SKB_GSO_GRE;
+ }
if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error;
@@ -1111,8 +1226,19 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_set_inner_ipproto(skb, next_protocol);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- ipvs_gue_encap(net, skb, cp, &next_protocol);
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ bool check = false;
+
+ if (ipvs_gue_encap(net, skb, cp, &next_protocol))
+ goto tx_error;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ check = true;
+
+ udp_set_csum(!check, skb, saddr, cp->daddr.ip, skb->len);
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE)
+ ipvs_gre_encap(net, skb, cp, &next_protocol);
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
@@ -1170,6 +1296,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
int tun_type, gso_type;
+ int tun_flags;
EnterFunction(10);
@@ -1193,9 +1320,28 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
tun_type = cp->dest->tun_type;
+ tun_flags = cp->dest->tun_flags;
+
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ size_t gue_hdrlen, gue_optlen = 0;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ }
+ gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
+
+ max_headroom += sizeof(struct udphdr) + gue_hdrlen;
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ size_t gre_hdrlen;
+ __be16 tflags = 0;
+
+ if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ tflags |= TUNNEL_CSUM;
+ gre_hdrlen = gre_calc_hlen(tflags);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);
+ max_headroom += gre_hdrlen;
+ }
skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
&next_protocol, &payload_len,
@@ -1204,8 +1350,22 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
gso_type = __tun_gso_type_mask(AF_INET6, cp->af);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- gso_type |= SKB_GSO_UDP_TUNNEL;
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ else
+ gso_type |= SKB_GSO_UDP_TUNNEL;
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+ }
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ gso_type |= SKB_GSO_GRE_CSUM;
+ else
+ gso_type |= SKB_GSO_GRE;
+ }
if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error;
@@ -1214,8 +1374,19 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_set_inner_ipproto(skb, next_protocol);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- ipvs_gue_encap(net, skb, cp, &next_protocol);
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ bool check = false;
+
+ if (ipvs_gue_encap(net, skb, cp, &next_protocol))
+ goto tx_error;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ check = true;
+
+ udp6_set_csum(!check, skb, &saddr, &cp->daddr.in6, skb->len);
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE)
+ ipvs_gre_encap(net, skb, cp, &next_protocol);
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
@@ -1400,7 +1571,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, offset))
+ if (skb_ensure_writable(skb, offset))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -1489,7 +1660,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, offset))
+ if (skb_ensure_writable(skb, offset))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index e52fcb9c9a96..921a7b95be68 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -37,12 +37,17 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
in_dev = __in_dev_get_rcu(rt->dst.dev);
if (in_dev != NULL) {
- for_primary_ifa(in_dev) {
+ const struct in_ifaddr *ifa;
+
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ if (ifa->ifa_flags & IFA_F_SECONDARY)
+ continue;
+
if (ifa->ifa_broadcast == iph->daddr) {
mask = ifa->ifa_mask;
break;
}
- } endfor_ifa(in_dev);
+ }
}
if (mask == 0)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f4f9b8344a32..bdfeacee0817 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -749,9 +749,6 @@ begin:
continue;
}
- if (nf_ct_is_dying(ct))
- continue;
-
if (nf_ct_key_equal(h, tuple, zone, net))
return h;
}
@@ -777,20 +774,24 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
struct nf_conn *ct;
rcu_read_lock();
-begin:
+
h = ____nf_conntrack_find(net, zone, tuple, hash);
if (h) {
+ /* We have a candidate that matches the tuple we're interested
+ * in, try to obtain a reference and re-check tuple
+ */
ct = nf_ct_tuplehash_to_ctrack(h);
- if (unlikely(nf_ct_is_dying(ct) ||
- !atomic_inc_not_zero(&ct->ct_general.use)))
- h = NULL;
- else {
- if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
- nf_ct_put(ct);
- goto begin;
- }
+ if (likely(atomic_inc_not_zero(&ct->ct_general.use))) {
+ if (likely(nf_ct_key_equal(h, tuple, zone, net)))
+ goto found;
+
+ /* TYPESAFE_BY_RCU recycled the candidate */
+ nf_ct_put(ct);
}
+
+ h = NULL;
}
+found:
rcu_read_unlock();
return h;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index fac6986d37a8..6497e5fc0871 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -6,7 +6,7 @@
* Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* Based on the 'brute force' H.323 connection tracking module by
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Jozsef Kadlecsik <kadlec@netfilter.org>
*
* For more information, please see http://nath323.sourceforge.net/
*/
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7db79c1b8084..1b77444d5b52 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1256,7 +1256,6 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
struct nf_conntrack_zone zone;
int err;
@@ -1266,11 +1265,13 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
if (cda[CTA_TUPLE_ORIG])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
- u3, &zone);
+ nfmsg->nfgen_family, &zone);
else if (cda[CTA_TUPLE_REPLY])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
- u3, &zone);
+ nfmsg->nfgen_family, &zone);
else {
+ u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
+
return ctnetlink_flush_conntrack(net, cda,
NETLINK_CB(skb).portid,
nlmsg_report(nlh), u3);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 37bb530d848f..a0560d175a7f 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -16,6 +16,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
#include <net/netfilter/nf_log.h>
#include <linux/ip.h>
@@ -120,10 +121,8 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
};
EXPORT_SYMBOL_GPL(nf_ct_l4proto_find);
-static unsigned int nf_confirm(struct sk_buff *skb,
- unsigned int protoff,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
+unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
const struct nf_conn_help *help;
@@ -154,6 +153,7 @@ static unsigned int nf_confirm(struct sk_buff *skb,
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
+EXPORT_SYMBOL_GPL(nf_confirm);
static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb,
@@ -442,12 +442,14 @@ static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto)
return 0;
}
+static struct nf_ct_bridge_info *nf_ct_bridge_info;
+
static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
{
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
- bool fixup_needed = false;
+ bool fixup_needed = false, retry = true;
int err = 0;
-
+retry:
mutex_lock(&nf_ct_proto_mutex);
switch (nfproto) {
@@ -487,6 +489,32 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
fixup_needed = true;
break;
#endif
+ case NFPROTO_BRIDGE:
+ if (!nf_ct_bridge_info) {
+ if (!retry) {
+ err = -EPROTO;
+ goto out_unlock;
+ }
+ mutex_unlock(&nf_ct_proto_mutex);
+ request_module("nf_conntrack_bridge");
+ retry = false;
+ goto retry;
+ }
+ if (!try_module_get(nf_ct_bridge_info->me)) {
+ err = -EPROTO;
+ goto out_unlock;
+ }
+ cnet->users_bridge++;
+ if (cnet->users_bridge > 1)
+ goto out_unlock;
+
+ err = nf_register_net_hooks(net, nf_ct_bridge_info->ops,
+ nf_ct_bridge_info->ops_size);
+ if (err)
+ cnet->users_bridge = 0;
+ else
+ fixup_needed = true;
+ break;
default:
err = -EPROTO;
break;
@@ -519,47 +547,99 @@ static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
ARRAY_SIZE(ipv6_conntrack_ops));
break;
#endif
+ case NFPROTO_BRIDGE:
+ if (!nf_ct_bridge_info)
+ break;
+ if (cnet->users_bridge && (--cnet->users_bridge == 0))
+ nf_unregister_net_hooks(net, nf_ct_bridge_info->ops,
+ nf_ct_bridge_info->ops_size);
+
+ module_put(nf_ct_bridge_info->me);
+ break;
}
-
mutex_unlock(&nf_ct_proto_mutex);
}
-int nf_ct_netns_get(struct net *net, u8 nfproto)
+static int nf_ct_netns_inet_get(struct net *net)
{
int err;
- if (nfproto == NFPROTO_INET) {
- err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
- if (err < 0)
- goto err1;
- err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
- if (err < 0)
- goto err2;
- } else {
- err = nf_ct_netns_do_get(net, nfproto);
- if (err < 0)
- goto err1;
- }
- return 0;
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+ if (err < 0)
+ goto err1;
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+ if (err < 0)
+ goto err2;
+ return err;
err2:
nf_ct_netns_put(net, NFPROTO_IPV4);
err1:
return err;
}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+ int err;
+
+ switch (nfproto) {
+ case NFPROTO_INET:
+ err = nf_ct_netns_inet_get(net);
+ break;
+ case NFPROTO_BRIDGE:
+ err = nf_ct_netns_do_get(net, NFPROTO_BRIDGE);
+ if (err < 0)
+ return err;
+
+ err = nf_ct_netns_inet_get(net);
+ if (err < 0) {
+ nf_ct_netns_put(net, NFPROTO_BRIDGE);
+ return err;
+ }
+ break;
+ default:
+ err = nf_ct_netns_do_get(net, nfproto);
+ break;
+ }
+ return err;
+}
EXPORT_SYMBOL_GPL(nf_ct_netns_get);
void nf_ct_netns_put(struct net *net, uint8_t nfproto)
{
- if (nfproto == NFPROTO_INET) {
+ switch (nfproto) {
+ case NFPROTO_BRIDGE:
+ nf_ct_netns_do_put(net, NFPROTO_BRIDGE);
+ /* fall through */
+ case NFPROTO_INET:
nf_ct_netns_do_put(net, NFPROTO_IPV4);
nf_ct_netns_do_put(net, NFPROTO_IPV6);
- } else {
+ break;
+ default:
nf_ct_netns_do_put(net, nfproto);
+ break;
}
}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
+void nf_ct_bridge_register(struct nf_ct_bridge_info *info)
+{
+ WARN_ON(nf_ct_bridge_info);
+ mutex_lock(&nf_ct_proto_mutex);
+ nf_ct_bridge_info = info;
+ mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_register);
+
+void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info)
+{
+ WARN_ON(!nf_ct_bridge_info);
+ mutex_lock(&nf_ct_proto_mutex);
+ nf_ct_bridge_info = NULL;
+ mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_unregister);
+
int nf_conntrack_proto_init(void)
{
int ret;
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index a824367ed518..dd53e2b20f6b 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -218,7 +218,7 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
/* See ip_conntrack_proto_tcp.c */
if (state->net->ct.sysctl_checksum &&
state->hook == NF_INET_PRE_ROUTING &&
- nf_ip_checksum(skb, state->hook, dataoff, 0)) {
+ nf_ip_checksum(skb, state->hook, dataoff, IPPROTO_ICMP)) {
icmp_error_log(skb, state, "bad hw icmp checksum");
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 522c08c23600..fce3d93f1541 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -336,7 +336,7 @@ static bool sctp_error(struct sk_buff *skb,
if (state->hook == NF_INET_PRE_ROUTING &&
state->net->ct.sysctl_checksum &&
skb->ip_summed == CHECKSUM_NONE) {
- if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
+ if (skb_ensure_writable(skb, dataoff + sizeof(*sh))) {
logmsg = "nf_ct_sctp: failed to read header ";
goto out_invalid;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 1e2cc83ff5da..d5fdfa00d683 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * (C) 2002-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
* (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*/
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index dc21a43cd145..3066449f8bd8 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -126,7 +126,7 @@ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
optoff = protoff + sizeof(struct tcphdr);
optend = protoff + tcph->doff * 4;
- if (!skb_make_writable(skb, optend))
+ if (skb_ensure_writable(skb, optend))
return 0;
tcph = (void *)skb->data + protoff;
@@ -176,7 +176,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
this_way = &seqadj->seq[dir];
other_way = &seqadj->seq[!dir];
- if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
+ if (skb_ensure_writable(skb, protoff + sizeof(*tcph)))
return 0;
tcph = (void *)skb->data + protoff;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 948b4ebbe3fb..e3d797252a98 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -53,7 +53,6 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
ft->dst_port = ctt->dst.u.tcp.port;
ft->iifidx = other_dst->dev->ifindex;
- ft->oifidx = dst->dev->ifindex;
ft->dst_cache = dst;
}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 3574a212bdc2..bb25d4c794c7 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -374,7 +374,7 @@ static int seq_show(struct seq_file *s, void *v)
continue;
logger = nft_log_dereference(loggers[*pos][i]);
- seq_printf(s, "%s", logger->name);
+ seq_puts(s, logger->name);
if (i == 0 && loggers[*pos][i + 1] != NULL)
seq_puts(s, ",");
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 98bf543e9891..a263505455fc 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -95,7 +95,7 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
struct tcphdr *tcph;
int oldlen, datalen;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return false;
if (rep_len > match_len &&
@@ -145,7 +145,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
struct udphdr *udph;
int datalen, oldlen;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return false;
if (rep_len > match_len &&
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 07da07788f6b..7ac733ebd060 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -70,7 +70,7 @@ static bool udp_manip_pkt(struct sk_buff *skb,
struct udphdr *hdr;
bool do_csum;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
@@ -88,7 +88,7 @@ static bool udplite_manip_pkt(struct sk_buff *skb,
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
struct udphdr *hdr;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
@@ -114,7 +114,7 @@ sctp_manip_pkt(struct sk_buff *skb,
if (skb->len >= hdroff + sizeof(*hdr))
hdrsize = sizeof(*hdr);
- if (!skb_make_writable(skb, hdroff + hdrsize))
+ if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct sctphdr *)(skb->data + hdroff);
@@ -155,7 +155,7 @@ tcp_manip_pkt(struct sk_buff *skb,
if (skb->len >= hdroff + sizeof(struct tcphdr))
hdrsize = sizeof(struct tcphdr);
- if (!skb_make_writable(skb, hdroff + hdrsize))
+ if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct tcphdr *)(skb->data + hdroff);
@@ -195,7 +195,7 @@ dccp_manip_pkt(struct sk_buff *skb,
if (skb->len >= hdroff + sizeof(struct dccp_hdr))
hdrsize = sizeof(struct dccp_hdr);
- if (!skb_make_writable(skb, hdroff + hdrsize))
+ if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct dccp_hdr *)(skb->data + hdroff);
@@ -229,7 +229,7 @@ icmp_manip_pkt(struct sk_buff *skb,
{
struct icmphdr *hdr;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmphdr *)(skb->data + hdroff);
@@ -247,7 +247,7 @@ icmpv6_manip_pkt(struct sk_buff *skb,
{
struct icmp6hdr *hdr;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmp6hdr *)(skb->data + hdroff);
@@ -275,7 +275,7 @@ gre_manip_pkt(struct sk_buff *skb,
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
- if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
return false;
greh = (void *)skb->data + hdroff;
@@ -347,7 +347,7 @@ static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
struct iphdr *iph;
unsigned int hdroff;
- if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+ if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
return false;
iph = (void *)skb->data + iphdroff;
@@ -378,7 +378,7 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
int hdroff;
u8 nexthdr;
- if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
+ if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
return false;
ipv6h = (void *)skb->data + iphdroff;
@@ -562,9 +562,9 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
- if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
return 0;
- if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
+ if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP))
return 0;
inside = (void *)skb->data + hdrlen;
@@ -784,7 +784,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
- if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
return 0;
if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
return 0;
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 4ffe5e5e65ba..f91579c821e9 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -44,15 +44,17 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
if (hooknum == NF_INET_LOCAL_OUT) {
newdst = htonl(0x7F000001);
} else {
- struct in_device *indev;
- struct in_ifaddr *ifa;
+ const struct in_device *indev;
newdst = 0;
indev = __in_dev_get_rcu(skb->dev);
- if (indev && indev->ifa_list) {
- ifa = indev->ifa_list;
- newdst = ifa->ifa_local;
+ if (indev) {
+ const struct in_ifaddr *ifa;
+
+ ifa = rcu_dereference(indev->ifa_list);
+ if (ifa)
+ newdst = ifa->ifa_local;
}
if (!newdst)
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 7de28fa0f14a..e338d91980d8 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -282,7 +282,7 @@ next:
if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) {
struct udphdr *uh;
- if (!skb_make_writable(skb, skb->len)) {
+ if (skb_ensure_writable(skb, skb->len)) {
nf_ct_helper_log(skb, ct, "cannot mangle packet");
return NF_DROP;
}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index b5b2be55ca82..a2b58de82600 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -156,7 +156,6 @@ static void nf_ip6_saveroute(const struct sk_buff *skb,
}
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
- const struct nf_hook_entries *entries,
unsigned int index, unsigned int queuenum)
{
int status = -ENOENT;
@@ -190,6 +189,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
goto err;
}
+ if (!skb_dst_force(skb) && state->hook != NF_INET_PRE_ROUTING) {
+ status = -ENETDOWN;
+ goto err;
+ }
+
*entry = (struct nf_queue_entry) {
.skb = skb,
.state = *state,
@@ -198,7 +202,6 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
};
nf_queue_entry_get_refs(entry);
- skb_dst_force(skb);
switch (entry->state.pf) {
case AF_INET:
@@ -225,12 +228,11 @@ err:
/* Packets leaving via this function must come back through nf_reinject(). */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
- const struct nf_hook_entries *entries, unsigned int index,
- unsigned int verdict)
+ unsigned int index, unsigned int verdict)
{
int ret;
- ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS);
+ ret = __nf_queue(skb, state, index, verdict >> NF_VERDICT_QBITS);
if (ret < 0) {
if (ret == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
@@ -336,7 +338,7 @@ next_hook:
local_bh_enable();
break;
case NF_QUEUE:
- err = nf_queue(skb, &entry->state, hooks, i, verdict);
+ err = nf_queue(skb, &entry->state, i, verdict);
if (err == 1)
goto next_hook;
break;
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 8ce74ed985c0..b101f187eda8 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -10,16 +10,16 @@
#include <net/netns/generic.h>
#include <linux/proc_fs.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_tcpudp.h>
-#include <linux/netfilter/xt_SYNPROXY.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nf_synproxy.h>
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_synproxy.h>
unsigned int synproxy_net_id;
EXPORT_SYMBOL_GPL(synproxy_net_id);
@@ -57,7 +57,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
case TCPOPT_MSS:
if (opsize == TCPOLEN_MSS) {
opts->mss = get_unaligned_be16(ptr);
- opts->options |= XT_SYNPROXY_OPT_MSS;
+ opts->options |= NF_SYNPROXY_OPT_MSS;
}
break;
case TCPOPT_WINDOW:
@@ -65,19 +65,19 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
opts->wscale = *ptr;
if (opts->wscale > TCP_MAX_WSCALE)
opts->wscale = TCP_MAX_WSCALE;
- opts->options |= XT_SYNPROXY_OPT_WSCALE;
+ opts->options |= NF_SYNPROXY_OPT_WSCALE;
}
break;
case TCPOPT_TIMESTAMP:
if (opsize == TCPOLEN_TIMESTAMP) {
opts->tsval = get_unaligned_be32(ptr);
opts->tsecr = get_unaligned_be32(ptr + 4);
- opts->options |= XT_SYNPROXY_OPT_TIMESTAMP;
+ opts->options |= NF_SYNPROXY_OPT_TIMESTAMP;
}
break;
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM)
- opts->options |= XT_SYNPROXY_OPT_SACK_PERM;
+ opts->options |= NF_SYNPROXY_OPT_SACK_PERM;
break;
}
@@ -89,36 +89,36 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
}
EXPORT_SYMBOL_GPL(synproxy_parse_options);
-unsigned int synproxy_options_size(const struct synproxy_options *opts)
+static unsigned int
+synproxy_options_size(const struct synproxy_options *opts)
{
unsigned int size = 0;
- if (opts->options & XT_SYNPROXY_OPT_MSS)
+ if (opts->options & NF_SYNPROXY_OPT_MSS)
size += TCPOLEN_MSS_ALIGNED;
- if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
size += TCPOLEN_TSTAMP_ALIGNED;
- else if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
+ else if (opts->options & NF_SYNPROXY_OPT_SACK_PERM)
size += TCPOLEN_SACKPERM_ALIGNED;
- if (opts->options & XT_SYNPROXY_OPT_WSCALE)
+ if (opts->options & NF_SYNPROXY_OPT_WSCALE)
size += TCPOLEN_WSCALE_ALIGNED;
return size;
}
-EXPORT_SYMBOL_GPL(synproxy_options_size);
-void
+static void
synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
{
__be32 *ptr = (__be32 *)(th + 1);
u8 options = opts->options;
- if (options & XT_SYNPROXY_OPT_MSS)
+ if (options & NF_SYNPROXY_OPT_MSS)
*ptr++ = htonl((TCPOPT_MSS << 24) |
(TCPOLEN_MSS << 16) |
opts->mss);
- if (options & XT_SYNPROXY_OPT_TIMESTAMP) {
- if (options & XT_SYNPROXY_OPT_SACK_PERM)
+ if (options & NF_SYNPROXY_OPT_TIMESTAMP) {
+ if (options & NF_SYNPROXY_OPT_SACK_PERM)
*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
(TCPOLEN_SACK_PERM << 16) |
(TCPOPT_TIMESTAMP << 8) |
@@ -131,58 +131,56 @@ synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
*ptr++ = htonl(opts->tsval);
*ptr++ = htonl(opts->tsecr);
- } else if (options & XT_SYNPROXY_OPT_SACK_PERM)
+ } else if (options & NF_SYNPROXY_OPT_SACK_PERM)
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_SACK_PERM << 8) |
TCPOLEN_SACK_PERM);
- if (options & XT_SYNPROXY_OPT_WSCALE)
+ if (options & NF_SYNPROXY_OPT_WSCALE)
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_WINDOW << 16) |
(TCPOLEN_WINDOW << 8) |
opts->wscale);
}
-EXPORT_SYMBOL_GPL(synproxy_build_options);
-void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
+void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info,
struct synproxy_options *opts)
{
opts->tsecr = opts->tsval;
opts->tsval = tcp_time_stamp_raw() & ~0x3f;
- if (opts->options & XT_SYNPROXY_OPT_WSCALE) {
+ if (opts->options & NF_SYNPROXY_OPT_WSCALE) {
opts->tsval |= opts->wscale;
opts->wscale = info->wscale;
} else
opts->tsval |= 0xf;
- if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
+ if (opts->options & NF_SYNPROXY_OPT_SACK_PERM)
opts->tsval |= 1 << 4;
- if (opts->options & XT_SYNPROXY_OPT_ECN)
+ if (opts->options & NF_SYNPROXY_OPT_ECN)
opts->tsval |= 1 << 5;
}
EXPORT_SYMBOL_GPL(synproxy_init_timestamp_cookie);
-void synproxy_check_timestamp_cookie(struct synproxy_options *opts)
+static void
+synproxy_check_timestamp_cookie(struct synproxy_options *opts)
{
opts->wscale = opts->tsecr & 0xf;
if (opts->wscale != 0xf)
- opts->options |= XT_SYNPROXY_OPT_WSCALE;
+ opts->options |= NF_SYNPROXY_OPT_WSCALE;
- opts->options |= opts->tsecr & (1 << 4) ? XT_SYNPROXY_OPT_SACK_PERM : 0;
+ opts->options |= opts->tsecr & (1 << 4) ? NF_SYNPROXY_OPT_SACK_PERM : 0;
- opts->options |= opts->tsecr & (1 << 5) ? XT_SYNPROXY_OPT_ECN : 0;
+ opts->options |= opts->tsecr & (1 << 5) ? NF_SYNPROXY_OPT_ECN : 0;
}
-EXPORT_SYMBOL_GPL(synproxy_check_timestamp_cookie);
-unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
- unsigned int protoff,
- struct tcphdr *th,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct nf_conn_synproxy *synproxy)
+static unsigned int
+synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff,
+ struct tcphdr *th, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_conn_synproxy *synproxy)
{
unsigned int optoff, optend;
__be32 *ptr, old;
@@ -193,7 +191,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
optoff = protoff + sizeof(struct tcphdr);
optend = protoff + th->doff * 4;
- if (!skb_make_writable(skb, optend))
+ if (skb_ensure_writable(skb, optend))
return 0;
while (optoff < optend) {
@@ -232,7 +230,6 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
}
return 1;
}
-EXPORT_SYMBOL_GPL(synproxy_tstamp_adjust);
static struct nf_ct_ext_type nf_ct_synproxy_extend __read_mostly = {
.len = sizeof(struct nf_conn_synproxy),
@@ -413,5 +410,830 @@ static void __exit synproxy_core_exit(void)
module_init(synproxy_core_init);
module_exit(synproxy_core_exit);
+static struct iphdr *
+synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
+ __be32 daddr)
+{
+ struct iphdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = skb_put(skb, sizeof(*iph));
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) / 4;
+ iph->tos = 0;
+ iph->id = 0;
+ iph->frag_off = htons(IP_DF);
+ iph->ttl = net->ipv4.sysctl_ip_default_ttl;
+ iph->protocol = IPPROTO_TCP;
+ iph->check = 0;
+ iph->saddr = saddr;
+ iph->daddr = daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp(struct net *net,
+ const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct iphdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ skb_dst_set_noref(nskb, skb_dst(skb));
+ nskb->protocol = htons(ETH_P_IP);
+ if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
+ goto free_nskb;
+
+ if (nfct) {
+ nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo);
+ nf_conntrack_get(nfct);
+ }
+
+ ip_local_out(net, nskb->sk, nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+void
+synproxy_send_client_synack(struct net *net,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (!nskb)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & NF_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
+ IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
+}
+EXPORT_SYMBOL_GPL(synproxy_send_client_synack);
+
+static void
+synproxy_send_server_syn(struct net *net,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct synproxy_net *snet = synproxy_pernet(net);
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (!nskb)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & NF_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(struct net *net,
+ const struct ip_ct_tcp *state,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (!nskb)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(struct net *net,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (!nskb)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(ntohs(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
+ IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
+}
+
+bool
+synproxy_recv_client_ack(struct net *net,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ struct synproxy_net *snet = synproxy_pernet(net);
+ int mss;
+
+ mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+ opts->options |= NF_SYNPROXY_OPT_MSS;
+
+ if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn(net, skb, th, opts, recv_seq);
+ return true;
+}
+EXPORT_SYMBOL_GPL(synproxy_recv_client_ack);
+
+unsigned int
+ipv4_synproxy_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *nhs)
+{
+ struct net *net = nhs->net;
+ struct synproxy_net *snet = synproxy_pernet(net);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ unsigned int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (!synproxy)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb) ||
+ ip_hdr(skb)->protocol != IPPROTO_TCP)
+ return NF_ACCEPT;
+
+ thoff = ip_hdrlen(skb);
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (!th)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack(net, skb, th, &opts,
+ ntohl(th->seq) + 1)) {
+ this_cpu_inc(snet->stats->cookie_retrans);
+ consume_skb(skb);
+ return NF_STOLEN;
+ } else {
+ return NF_DROP;
+ }
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) {
+ synproxy->tsoff = opts.tsval - synproxy->its;
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+ }
+
+ opts.options &= ~(NF_SYNPROXY_OPT_MSS |
+ NF_SYNPROXY_OPT_WSCALE |
+ NF_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack(net, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+ nf_conntrack_event_cache(IPCT_SEQADJ, ct);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack(net, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(ipv4_synproxy_hook);
+
+static const struct nf_hook_ops ipv4_synproxy_ops[] = {
+ {
+ .hook = ipv4_synproxy_hook,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv4_synproxy_hook,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net)
+{
+ int err;
+
+ if (snet->hook_ref4 == 0) {
+ err = nf_register_net_hooks(net, ipv4_synproxy_ops,
+ ARRAY_SIZE(ipv4_synproxy_ops));
+ if (err)
+ return err;
+ }
+
+ snet->hook_ref4++;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_init);
+
+void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net)
+{
+ snet->hook_ref4--;
+ if (snet->hook_ref4 == 0)
+ nf_unregister_net_hooks(net, ipv4_synproxy_ops,
+ ARRAY_SIZE(ipv4_synproxy_ops));
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_fini);
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct ipv6hdr *
+synproxy_build_ip_ipv6(struct net *net, struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
+{
+ struct ipv6hdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = skb_put(skb, sizeof(*iph));
+ ip6_flow_hdr(iph, 0, 0);
+ iph->hop_limit = net->ipv6.devconf_all->hop_limit;
+ iph->nexthdr = IPPROTO_TCP;
+ iph->saddr = *saddr;
+ iph->daddr = *daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp_ipv6(struct net *net,
+ const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct ipv6hdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+ int err;
+
+ nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.saddr = niph->saddr;
+ fl6.daddr = niph->daddr;
+ fl6.fl6_sport = nth->source;
+ fl6.fl6_dport = nth->dest;
+ security_skb_classify_flow((struct sk_buff *)skb,
+ flowi6_to_flowi(&fl6));
+ err = nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false);
+ if (err) {
+ goto free_nskb;
+ }
+
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(dst))
+ goto free_nskb;
+
+ skb_dst_set(nskb, dst);
+
+ if (nfct) {
+ nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo);
+ nf_conntrack_get(nfct);
+ }
+
+ ip6_local_out(net, nskb->sk, nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+void
+synproxy_send_client_synack_ipv6(struct net *net,
+ const struct sk_buff *skb,
+ const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (!nskb)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip_ipv6(net, nskb, &iph->daddr, &iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(nf_ipv6_cookie_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & NF_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp_ipv6(net, skb, nskb, skb_nfct(skb),
+ IP_CT_ESTABLISHED_REPLY, niph, nth,
+ tcp_hdr_size);
+}
+EXPORT_SYMBOL_GPL(synproxy_send_client_synack_ipv6);
+
+static void
+synproxy_send_server_syn_ipv6(struct net *net, const struct sk_buff *skb,
+ const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct synproxy_net *snet = synproxy_pernet(net);
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (!nskb)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip_ipv6(net, nskb, &iph->saddr, &iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & NF_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp_ipv6(net, skb, nskb, &snet->tmpl->ct_general,
+ IP_CT_NEW, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack_ipv6(struct net *net, const struct ip_ct_tcp *state,
+ const struct sk_buff *skb,
+ const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (!nskb)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip_ipv6(net, nskb, &iph->daddr, &iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp_ipv6(net, skb, nskb, NULL, 0, niph, nth,
+ tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack_ipv6(struct net *net, const struct sk_buff *skb,
+ const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (!nskb)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip_ipv6(net, nskb, &iph->saddr, &iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(ntohs(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp_ipv6(net, skb, nskb, skb_nfct(skb),
+ IP_CT_ESTABLISHED_REPLY, niph, nth,
+ tcp_hdr_size);
+}
+
+bool
+synproxy_recv_client_ack_ipv6(struct net *net,
+ const struct sk_buff *skb,
+ const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ struct synproxy_net *snet = synproxy_pernet(net);
+ int mss;
+
+ mss = nf_cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+ opts->options |= NF_SYNPROXY_OPT_MSS;
+
+ if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn_ipv6(net, skb, th, opts, recv_seq);
+ return true;
+}
+EXPORT_SYMBOL_GPL(synproxy_recv_client_ack_ipv6);
+
+unsigned int
+ipv6_synproxy_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *nhs)
+{
+ struct net *net = nhs->net;
+ struct synproxy_net *snet = synproxy_pernet(net);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ __be16 frag_off;
+ u8 nexthdr;
+ int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (!synproxy)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb))
+ return NF_ACCEPT;
+
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
+ if (thoff < 0 || nexthdr != IPPROTO_TCP)
+ return NF_ACCEPT;
+
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (!th)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack_ipv6(net, skb, th, &opts,
+ ntohl(th->seq) + 1)) {
+ this_cpu_inc(snet->stats->cookie_retrans);
+ consume_skb(skb);
+ return NF_STOLEN;
+ } else {
+ return NF_DROP;
+ }
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) {
+ synproxy->tsoff = opts.tsval - synproxy->its;
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+ }
+
+ opts.options &= ~(NF_SYNPROXY_OPT_MSS |
+ NF_SYNPROXY_OPT_WSCALE |
+ NF_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack_ipv6(net, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+ nf_conntrack_event_cache(IPCT_SEQADJ, ct);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack_ipv6(net, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(ipv6_synproxy_hook);
+
+static const struct nf_hook_ops ipv6_synproxy_ops[] = {
+ {
+ .hook = ipv6_synproxy_hook,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv6_synproxy_hook,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+int
+nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net)
+{
+ int err;
+
+ if (snet->hook_ref6 == 0) {
+ err = nf_register_net_hooks(net, ipv6_synproxy_ops,
+ ARRAY_SIZE(ipv6_synproxy_ops));
+ if (err)
+ return err;
+ }
+
+ snet->hook_ref6++;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_init);
+
+void
+nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net)
+{
+ snet->hook_ref6--;
+ if (snet->hook_ref6 == 0)
+ nf_unregister_net_hooks(net, ipv6_synproxy_ops,
+ ARRAY_SIZE(ipv6_synproxy_ops));
+}
+EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini);
+#endif /* CONFIG_IPV6 */
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index bcf17fb46d96..ed17a7c29b86 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -18,6 +18,7 @@
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -97,6 +98,7 @@ static void nft_ctx_init(struct nft_ctx *ctx,
ctx->nla = nla;
ctx->portid = NETLINK_CB(skb).portid;
ctx->report = nlmsg_report(nlh);
+ ctx->flags = nlh->nlmsg_flags;
ctx->seq = nlh->nlmsg_seq;
}
@@ -1169,6 +1171,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
[NFTA_CHAIN_POLICY] = { .type = NLA_U32 },
[NFTA_CHAIN_TYPE] = { .type = NLA_STRING },
[NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
+ [NFTA_CHAIN_FLAGS] = { .type = NLA_U32 },
};
static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
@@ -1446,25 +1449,18 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
return newstats;
}
-static void nft_chain_stats_replace(struct net *net,
- struct nft_base_chain *chain,
- struct nft_stats __percpu *newstats)
+static void nft_chain_stats_replace(struct nft_trans *trans)
{
- struct nft_stats __percpu *oldstats;
+ struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain);
- if (newstats == NULL)
+ if (!nft_trans_chain_stats(trans))
return;
- if (rcu_access_pointer(chain->stats)) {
- oldstats = rcu_dereference_protected(chain->stats,
- lockdep_commit_lock_is_held(net));
- rcu_assign_pointer(chain->stats, newstats);
- synchronize_rcu();
- free_percpu(oldstats);
- } else {
- rcu_assign_pointer(chain->stats, newstats);
+ rcu_swap_protected(chain->stats, nft_trans_chain_stats(trans),
+ lockdep_commit_lock_is_held(trans->ctx.net));
+
+ if (!nft_trans_chain_stats(trans))
static_branch_inc(&nft_counters_enabled);
- }
}
static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
@@ -1610,7 +1606,7 @@ static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *cha
}
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
- u8 policy)
+ u8 policy, u32 flags)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
@@ -1664,8 +1660,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
ops->hook = hook.type->hooks[ops->hooknum];
ops->dev = hook.dev;
- chain->flags |= NFT_BASE_CHAIN;
+ chain->flags |= NFT_BASE_CHAIN | flags;
basechain->policy = NF_ACCEPT;
+ INIT_LIST_HEAD(&basechain->cb_list);
} else {
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
@@ -1725,7 +1722,8 @@ err1:
return err;
}
-static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy)
+static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
+ u32 flags)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
@@ -1737,6 +1735,9 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy)
struct nft_trans *trans;
int err;
+ if (chain->flags ^ flags)
+ return -EOPNOTSUPP;
+
if (nla[NFTA_CHAIN_HOOK]) {
if (!nft_is_base_chain(chain))
return -EBUSY;
@@ -1842,6 +1843,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
u8 policy = NF_ACCEPT;
struct nft_ctx ctx;
u64 handle = 0;
+ u32 flags = 0;
lockdep_assert_held(&net->nft.commit_mutex);
@@ -1896,6 +1898,9 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
}
}
+ if (nla[NFTA_CHAIN_FLAGS])
+ flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS]));
+
nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
if (chain != NULL) {
@@ -1906,10 +1911,10 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- return nf_tables_updchain(&ctx, genmask, policy);
+ return nf_tables_updchain(&ctx, genmask, policy, flags);
}
- return nf_tables_addchain(&ctx, family, genmask, policy);
+ return nf_tables_addchain(&ctx, family, genmask, policy, flags);
}
static int nf_tables_delchain(struct net *net, struct sock *nlsk,
@@ -2016,15 +2021,31 @@ EXPORT_SYMBOL_GPL(nft_unregister_expr);
static const struct nft_expr_type *__nft_expr_type_get(u8 family,
struct nlattr *nla)
{
- const struct nft_expr_type *type;
+ const struct nft_expr_type *type, *candidate = NULL;
list_for_each_entry(type, &nf_tables_expressions, list) {
- if (!nla_strcmp(nla, type->name) &&
- (!type->family || type->family == family))
- return type;
+ if (!nla_strcmp(nla, type->name)) {
+ if (!type->family && !candidate)
+ candidate = type;
+ else if (type->family == family)
+ candidate = type;
+ }
}
- return NULL;
+ return candidate;
+}
+
+#ifdef CONFIG_MODULES
+static int nft_expr_type_request_module(struct net *net, u8 family,
+ struct nlattr *nla)
+{
+ nft_request_module(net, "nft-expr-%u-%.*s", family,
+ nla_len(nla), (char *)nla_data(nla));
+ if (__nft_expr_type_get(family, nla))
+ return -EAGAIN;
+
+ return 0;
}
+#endif
static const struct nft_expr_type *nft_expr_type_get(struct net *net,
u8 family,
@@ -2042,9 +2063,7 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net,
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (type == NULL) {
- nft_request_module(net, "nft-expr-%u-%.*s", family,
- nla_len(nla), (char *)nla_data(nla));
- if (__nft_expr_type_get(family, nla))
+ if (nft_expr_type_request_module(net, family, nla) == -EAGAIN)
return ERR_PTR(-EAGAIN);
nft_request_module(net, "nft-expr-%.*s",
@@ -2137,6 +2156,12 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
(const struct nlattr * const *)info->tb);
if (IS_ERR(ops)) {
err = PTR_ERR(ops);
+#ifdef CONFIG_MODULES
+ if (err == -EAGAIN)
+ nft_expr_type_request_module(ctx->net,
+ ctx->family,
+ tb[NFTA_EXPR_NAME]);
+#endif
goto err1;
}
} else
@@ -2645,6 +2670,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
u8 genmask = nft_genmask_next(net);
struct nft_expr_info *info = NULL;
int family = nfmsg->nfgen_family;
+ struct nft_flow_rule *flow;
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
@@ -2791,7 +2817,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
- if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
+ trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
+ if (!trans) {
err = -ENOMEM;
goto err2;
}
@@ -2814,6 +2841,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
if (net->nft.validate_state == NFT_VALIDATE_DO)
return nft_table_validate(net, table);
+ if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
+ flow = nft_flow_rule_create(rule);
+ if (IS_ERR(flow))
+ return PTR_ERR(flow);
+
+ nft_trans_flow_rule(trans) = flow;
+ }
+
return 0;
err2:
nf_tables_rule_release(&ctx, rule);
@@ -3877,6 +3912,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
[NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 },
[NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_SET_ELEM_EXPIRATION] = { .type = NLA_U64 },
[NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
[NFTA_SET_ELEM_EXPR] = { .type = NLA_NESTED },
@@ -4330,7 +4366,7 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *data,
- u64 timeout, gfp_t gfp)
+ u64 timeout, u64 expiration, gfp_t gfp)
{
struct nft_set_ext *ext;
void *elem;
@@ -4345,9 +4381,11 @@ void *nft_set_elem_init(const struct nft_set *set,
memcpy(nft_set_ext_key(ext), key, set->klen);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
memcpy(nft_set_ext_data(ext), data, set->dlen);
- if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
- *nft_set_ext_expiration(ext) =
- get_jiffies_64() + timeout;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+ *nft_set_ext_expiration(ext) = get_jiffies_64() + expiration;
+ if (expiration == 0)
+ *nft_set_ext_expiration(ext) += timeout;
+ }
if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
*nft_set_ext_timeout(ext) = timeout;
@@ -4412,6 +4450,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_trans *trans;
u32 flags = 0;
u64 timeout;
+ u64 expiration;
u8 ulen;
int err;
@@ -4455,6 +4494,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
timeout = set->timeout;
}
+ expiration = 0;
+ if (nla[NFTA_SET_ELEM_EXPIRATION] != NULL) {
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_EXPIRATION],
+ &expiration);
+ if (err)
+ return err;
+ }
+
err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1,
nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
@@ -4537,7 +4586,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = -ENOMEM;
elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data,
- timeout, GFP_KERNEL);
+ timeout, expiration, GFP_KERNEL);
if (elem.priv == NULL)
goto err3;
@@ -4739,7 +4788,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
err = -ENOMEM;
elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0,
- GFP_KERNEL);
+ 0, GFP_KERNEL);
if (elem.priv == NULL)
goto err2;
@@ -6359,9 +6408,9 @@ static void nft_chain_commit_update(struct nft_trans *trans)
if (!nft_is_base_chain(trans->ctx.chain))
return;
+ nft_chain_stats_replace(trans);
+
basechain = nft_base_chain(trans->ctx.chain);
- nft_chain_stats_replace(trans->ctx.net, basechain,
- nft_trans_chain_stats(trans));
switch (nft_trans_chain_policy(trans)) {
case NF_DROP:
@@ -6378,6 +6427,7 @@ static void nft_commit_release(struct nft_trans *trans)
nf_tables_table_destroy(&trans->ctx);
break;
case NFT_MSG_NEWCHAIN:
+ free_percpu(nft_trans_chain_stats(trans));
kfree(nft_trans_chain_name(trans));
break;
case NFT_MSG_DELCHAIN:
@@ -6596,6 +6646,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
+ int err;
if (list_empty(&net->nft.commit_list)) {
mutex_unlock(&net->nft.commit_mutex);
@@ -6606,6 +6657,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
if (nf_tables_validate(net) < 0)
return -EAGAIN;
+ err = nft_flow_rule_offload_commit(net);
+ if (err < 0)
+ return err;
+
/* 1. Allocate space for next generation rules_gen_X[] */
list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
int ret;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index b950cd31348b..96c74c4c7176 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -19,6 +19,7 @@
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
+#include <net/netfilter/nft_meta.h>
static noinline void __nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
new file mode 100644
index 000000000000..2c3302845f67
--- /dev/null
+++ b/net/netfilter/nf_tables_offload.c
@@ -0,0 +1,267 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
+#include <net/pkt_cls.h>
+
+static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions)
+{
+ struct nft_flow_rule *flow;
+
+ flow = kzalloc(sizeof(struct nft_flow_rule), GFP_KERNEL);
+ if (!flow)
+ return NULL;
+
+ flow->rule = flow_rule_alloc(num_actions);
+ if (!flow->rule) {
+ kfree(flow);
+ return NULL;
+ }
+
+ flow->rule->match.dissector = &flow->match.dissector;
+ flow->rule->match.mask = &flow->match.mask;
+ flow->rule->match.key = &flow->match.key;
+
+ return flow;
+}
+
+struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule)
+{
+ struct nft_offload_ctx ctx = {
+ .dep = {
+ .type = NFT_OFFLOAD_DEP_UNSPEC,
+ },
+ };
+ struct nft_flow_rule *flow;
+ int num_actions = 0, err;
+ struct nft_expr *expr;
+
+ expr = nft_expr_first(rule);
+ while (expr->ops && expr != nft_expr_last(rule)) {
+ if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
+ num_actions++;
+
+ expr = nft_expr_next(expr);
+ }
+
+ flow = nft_flow_rule_alloc(num_actions);
+ if (!flow)
+ return ERR_PTR(-ENOMEM);
+
+ expr = nft_expr_first(rule);
+ while (expr->ops && expr != nft_expr_last(rule)) {
+ if (!expr->ops->offload) {
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+ err = expr->ops->offload(&ctx, flow, expr);
+ if (err < 0)
+ goto err_out;
+
+ expr = nft_expr_next(expr);
+ }
+ flow->proto = ctx.dep.l3num;
+
+ return flow;
+err_out:
+ nft_flow_rule_destroy(flow);
+
+ return ERR_PTR(err);
+}
+
+void nft_flow_rule_destroy(struct nft_flow_rule *flow)
+{
+ kfree(flow->rule);
+ kfree(flow);
+}
+
+void nft_offload_set_dependency(struct nft_offload_ctx *ctx,
+ enum nft_offload_dep_type type)
+{
+ ctx->dep.type = type;
+}
+
+void nft_offload_update_dependency(struct nft_offload_ctx *ctx,
+ const void *data, u32 len)
+{
+ switch (ctx->dep.type) {
+ case NFT_OFFLOAD_DEP_NETWORK:
+ WARN_ON(len != sizeof(__u16));
+ memcpy(&ctx->dep.l3num, data, sizeof(__u16));
+ break;
+ case NFT_OFFLOAD_DEP_TRANSPORT:
+ WARN_ON(len != sizeof(__u8));
+ memcpy(&ctx->dep.protonum, data, sizeof(__u8));
+ break;
+ default:
+ break;
+ }
+ ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
+}
+
+static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
+ __be16 proto,
+ struct netlink_ext_ack *extack)
+{
+ common->protocol = proto;
+ common->extack = extack;
+}
+
+static int nft_setup_cb_call(struct nft_base_chain *basechain,
+ enum tc_setup_type type, void *type_data)
+{
+ struct flow_block_cb *block_cb;
+ int err;
+
+ list_for_each_entry(block_cb, &basechain->cb_list, list) {
+ err = block_cb->cb(type, type_data, block_cb->cb_priv);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
+static int nft_flow_offload_rule(struct nft_trans *trans,
+ enum flow_cls_command command)
+{
+ struct nft_flow_rule *flow = nft_trans_flow_rule(trans);
+ struct nft_rule *rule = nft_trans_rule(trans);
+ struct flow_cls_offload cls_flow = {};
+ struct nft_base_chain *basechain;
+ struct netlink_ext_ack extack;
+ __be16 proto = ETH_P_ALL;
+
+ if (!nft_is_base_chain(trans->ctx.chain))
+ return -EOPNOTSUPP;
+
+ basechain = nft_base_chain(trans->ctx.chain);
+
+ if (flow)
+ proto = flow->proto;
+
+ nft_flow_offload_common_init(&cls_flow.common, proto, &extack);
+ cls_flow.command = command;
+ cls_flow.cookie = (unsigned long) rule;
+ if (flow)
+ cls_flow.rule = flow->rule;
+
+ return nft_setup_cb_call(basechain, TC_SETUP_CLSFLOWER, &cls_flow);
+}
+
+static int nft_flow_offload_bind(struct flow_block_offload *bo,
+ struct nft_base_chain *basechain)
+{
+ list_splice(&bo->cb_list, &basechain->cb_list);
+ return 0;
+}
+
+static int nft_flow_offload_unbind(struct flow_block_offload *bo,
+ struct nft_base_chain *basechain)
+{
+ struct flow_block_cb *block_cb, *next;
+
+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
+ list_del(&block_cb->list);
+ flow_block_cb_free(block_cb);
+ }
+
+ return 0;
+}
+
+#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
+
+static int nft_flow_offload_chain(struct nft_trans *trans,
+ enum flow_block_command cmd)
+{
+ struct nft_chain *chain = trans->ctx.chain;
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo = {};
+ struct nft_base_chain *basechain;
+ struct net_device *dev;
+ int err;
+
+ if (!nft_is_base_chain(chain))
+ return -EOPNOTSUPP;
+
+ basechain = nft_base_chain(chain);
+ dev = basechain->ops.dev;
+ if (!dev || !dev->netdev_ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ /* Only default policy to accept is supported for now. */
+ if (cmd == FLOW_BLOCK_BIND &&
+ nft_trans_chain_policy(trans) != -1 &&
+ nft_trans_chain_policy(trans) != NF_ACCEPT)
+ return -EOPNOTSUPP;
+
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo);
+ if (err < 0)
+ return err;
+
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ err = nft_flow_offload_bind(&bo, basechain);
+ break;
+ case FLOW_BLOCK_UNBIND:
+ err = nft_flow_offload_unbind(&bo, basechain);
+ break;
+ }
+
+ return err;
+}
+
+int nft_flow_rule_offload_commit(struct net *net)
+{
+ struct nft_trans *trans;
+ int err = 0;
+
+ list_for_each_entry(trans, &net->nft.commit_list, list) {
+ if (trans->ctx.family != NFPROTO_NETDEV)
+ continue;
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWCHAIN:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ err = nft_flow_offload_chain(trans, FLOW_BLOCK_BIND);
+ break;
+ case NFT_MSG_DELCHAIN:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ err = nft_flow_offload_chain(trans, FLOW_BLOCK_UNBIND);
+ break;
+ case NFT_MSG_NEWRULE:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ if (trans->ctx.flags & NLM_F_REPLACE ||
+ !(trans->ctx.flags & NLM_F_APPEND))
+ return -EOPNOTSUPP;
+
+ err = nft_flow_offload_rule(trans, FLOW_CLS_REPLACE);
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+ break;
+ case NFT_MSG_DELRULE:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ err = nft_flow_offload_rule(trans, FLOW_CLS_DESTROY);
+ break;
+ }
+
+ if (err)
+ return err;
+ }
+
+ return err;
+}
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index f42326b40d6f..9f5dea0064ea 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -33,6 +33,7 @@ static inline int nf_osf_ttl(const struct sk_buff *skb,
{
struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
const struct iphdr *ip = ip_hdr(skb);
+ const struct in_ifaddr *ifa;
int ret = 0;
if (ttl_check == NF_OSF_TTL_TRUE)
@@ -42,15 +43,13 @@ static inline int nf_osf_ttl(const struct sk_buff *skb,
else if (ip->ttl <= f_ttl)
return 1;
- for_ifa(in_dev) {
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
if (inet_ifa_match(ip->saddr, ifa)) {
ret = (ip->ttl == f_ttl);
break;
}
}
- endfor_ifa(in_dev);
-
return ret;
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 89750f74e3a2..b6a7ce622c72 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -859,7 +859,7 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
}
skb_put(e->skb, diff);
}
- if (!skb_make_writable(e->skb, data_len))
+ if (skb_ensure_writable(e->skb, data_len))
return -ENOMEM;
skb_copy_to_linear_data(e->skb, data, data_len);
e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 411c0cf741e3..bd173b1824c6 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -12,6 +12,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables_offload.h>
#include <net/netfilter/nf_tables.h>
struct nft_cmp_expr {
@@ -107,12 +108,44 @@ nla_put_failure:
return -1;
}
+static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_cmp_expr *priv)
+{
+ struct nft_offload_reg *reg = &ctx->regs[priv->sreg];
+ u8 *mask = (u8 *)&flow->match.mask;
+ u8 *key = (u8 *)&flow->match.key;
+
+ if (priv->op != NFT_CMP_EQ)
+ return -EOPNOTSUPP;
+
+ memcpy(key + reg->offset, &priv->data, priv->len);
+ memcpy(mask + reg->offset, &reg->mask, priv->len);
+
+ flow->match.dissector.used_keys |= BIT(reg->key);
+ flow->match.dissector.offset[reg->key] = reg->base_offset;
+
+ nft_offload_update_dependency(ctx, &priv->data, priv->len);
+
+ return 0;
+}
+
+static int nft_cmp_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+
+ return __nft_cmp_offload(ctx, flow, priv);
+}
+
static const struct nft_expr_ops nft_cmp_ops = {
.type = &nft_cmp_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
.eval = nft_cmp_eval,
.init = nft_cmp_init,
.dump = nft_cmp_dump,
+ .offload = nft_cmp_offload,
};
static int nft_cmp_fast_init(const struct nft_ctx *ctx,
@@ -143,6 +176,25 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
return 0;
}
+static int nft_cmp_fast_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+ struct nft_cmp_expr cmp = {
+ .data = {
+ .data = {
+ [0] = priv->data,
+ },
+ },
+ .sreg = priv->sreg,
+ .len = priv->len / BITS_PER_BYTE,
+ .op = NFT_CMP_EQ,
+ };
+
+ return __nft_cmp_offload(ctx, flow, &cmp);
+}
+
static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
@@ -169,6 +221,7 @@ const struct nft_expr_ops nft_cmp_fast_ops = {
.eval = NULL, /* inlined */
.init = nft_cmp_fast_init,
.dump = nft_cmp_fast_dump,
+ .offload = nft_cmp_fast_offload,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index dfcdea6619f1..827ab6196df9 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -21,6 +21,7 @@
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
struct nft_ct {
enum nft_ct_keys key:8;
@@ -1153,6 +1154,135 @@ static struct nft_object_type nft_ct_helper_obj_type __read_mostly = {
.owner = THIS_MODULE,
};
+struct nft_ct_expect_obj {
+ u16 l3num;
+ __be16 dport;
+ u8 l4proto;
+ u8 size;
+ u32 timeout;
+};
+
+static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_ct_expect_obj *priv = nft_obj_data(obj);
+
+ if (!tb[NFTA_CT_EXPECT_L4PROTO] ||
+ !tb[NFTA_CT_EXPECT_DPORT] ||
+ !tb[NFTA_CT_EXPECT_TIMEOUT] ||
+ !tb[NFTA_CT_EXPECT_SIZE])
+ return -EINVAL;
+
+ priv->l3num = ctx->family;
+ if (tb[NFTA_CT_EXPECT_L3PROTO])
+ priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO]));
+
+ priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
+ priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
+ priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
+ priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
+
+ return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_ct_expect_obj_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static int nft_ct_expect_obj_dump(struct sk_buff *skb,
+ struct nft_object *obj, bool reset)
+{
+ const struct nft_ct_expect_obj *priv = nft_obj_data(obj);
+
+ if (nla_put_be16(skb, NFTA_CT_EXPECT_L3PROTO, htons(priv->l3num)) ||
+ nla_put_u8(skb, NFTA_CT_EXPECT_L4PROTO, priv->l4proto) ||
+ nla_put_be16(skb, NFTA_CT_EXPECT_DPORT, priv->dport) ||
+ nla_put_u32(skb, NFTA_CT_EXPECT_TIMEOUT, priv->timeout) ||
+ nla_put_u8(skb, NFTA_CT_EXPECT_SIZE, priv->size))
+ return -1;
+
+ return 0;
+}
+
+static void nft_ct_expect_obj_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_ct_expect_obj *priv = nft_obj_data(obj);
+ struct nf_conntrack_expect *exp;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_help *help;
+ enum ip_conntrack_dir dir;
+ u16 l3num = priv->l3num;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(pkt->skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_UNTRACKED) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+ dir = CTINFO2DIR(ctinfo);
+
+ help = nfct_help(ct);
+ if (!help)
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+ if (!help) {
+ regs->verdict.code = NF_DROP;
+ return;
+ }
+
+ if (help->expecting[NF_CT_EXPECT_CLASS_DEFAULT] >= priv->size) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+ if (l3num == NFPROTO_INET)
+ l3num = nf_ct_l3num(ct);
+
+ exp = nf_ct_expect_alloc(ct);
+ if (exp == NULL) {
+ regs->verdict.code = NF_DROP;
+ return;
+ }
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, l3num,
+ &ct->tuplehash[!dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ priv->l4proto, NULL, &priv->dport);
+ exp->timeout.expires = jiffies + priv->timeout * HZ;
+
+ if (nf_ct_expect_related(exp) != 0)
+ regs->verdict.code = NF_DROP;
+}
+
+static const struct nla_policy nft_ct_expect_policy[NFTA_CT_EXPECT_MAX + 1] = {
+ [NFTA_CT_EXPECT_L3PROTO] = { .type = NLA_U16 },
+ [NFTA_CT_EXPECT_L4PROTO] = { .type = NLA_U8 },
+ [NFTA_CT_EXPECT_DPORT] = { .type = NLA_U16 },
+ [NFTA_CT_EXPECT_TIMEOUT] = { .type = NLA_U32 },
+ [NFTA_CT_EXPECT_SIZE] = { .type = NLA_U8 },
+};
+
+static struct nft_object_type nft_ct_expect_obj_type;
+
+static const struct nft_object_ops nft_ct_expect_obj_ops = {
+ .type = &nft_ct_expect_obj_type,
+ .size = sizeof(struct nft_ct_expect_obj),
+ .eval = nft_ct_expect_obj_eval,
+ .init = nft_ct_expect_obj_init,
+ .destroy = nft_ct_expect_obj_destroy,
+ .dump = nft_ct_expect_obj_dump,
+};
+
+static struct nft_object_type nft_ct_expect_obj_type __read_mostly = {
+ .type = NFT_OBJECT_CT_EXPECT,
+ .ops = &nft_ct_expect_obj_ops,
+ .maxattr = NFTA_CT_EXPECT_MAX,
+ .policy = nft_ct_expect_policy,
+ .owner = THIS_MODULE,
+};
+
static int __init nft_ct_module_init(void)
{
int err;
@@ -1170,17 +1300,23 @@ static int __init nft_ct_module_init(void)
err = nft_register_obj(&nft_ct_helper_obj_type);
if (err < 0)
goto err2;
+
+ err = nft_register_obj(&nft_ct_expect_obj_type);
+ if (err < 0)
+ goto err3;
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
err = nft_register_obj(&nft_ct_timeout_obj_type);
if (err < 0)
- goto err3;
+ goto err4;
#endif
return 0;
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+err4:
+ nft_unregister_obj(&nft_ct_expect_obj_type);
+#endif
err3:
nft_unregister_obj(&nft_ct_helper_obj_type);
-#endif
err2:
nft_unregister_expr(&nft_notrack_type);
err1:
@@ -1193,6 +1329,7 @@ static void __exit nft_ct_module_exit(void)
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
nft_unregister_obj(&nft_ct_timeout_obj_type);
#endif
+ nft_unregister_obj(&nft_ct_expect_obj_type);
nft_unregister_obj(&nft_ct_helper_obj_type);
nft_unregister_expr(&nft_notrack_type);
nft_unregister_expr(&nft_ct_type);
@@ -1207,3 +1344,4 @@ MODULE_ALIAS_NFT_EXPR("ct");
MODULE_ALIAS_NFT_EXPR("notrack");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT);
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_EXPECT);
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 505bdfc66801..33833a0cb989 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -56,7 +56,7 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
elem = nft_set_elem_init(set, &priv->tmpl,
&regs->data[priv->sreg_key],
&regs->data[priv->sreg_data],
- timeout, GFP_ATOMIC);
+ timeout, 0, GFP_ATOMIC);
if (elem == NULL)
goto err1;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a7aa6c5250a4..a5e8469859e3 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -59,6 +59,103 @@ err:
regs->verdict.code = NFT_BREAK;
}
+/* find the offset to specified option.
+ *
+ * If target header is found, its offset is set in *offset and return option
+ * number. Otherwise, return negative error.
+ *
+ * If the first fragment doesn't contain the End of Options it is considered
+ * invalid.
+ */
+static int ipv4_find_option(struct net *net, struct sk_buff *skb,
+ unsigned int *offset, int target)
+{
+ unsigned char optbuf[sizeof(struct ip_options) + 40];
+ struct ip_options *opt = (struct ip_options *)optbuf;
+ struct iphdr *iph, _iph;
+ unsigned int start;
+ bool found = false;
+ __be32 info;
+ int optlen;
+
+ iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+ if (!iph)
+ return -EBADMSG;
+ start = sizeof(struct iphdr);
+
+ optlen = iph->ihl * 4 - (int)sizeof(struct iphdr);
+ if (optlen <= 0)
+ return -ENOENT;
+
+ memset(opt, 0, sizeof(struct ip_options));
+ /* Copy the options since __ip_options_compile() modifies
+ * the options.
+ */
+ if (skb_copy_bits(skb, start, opt->__data, optlen))
+ return -EBADMSG;
+ opt->optlen = optlen;
+
+ if (__ip_options_compile(net, opt, NULL, &info))
+ return -EBADMSG;
+
+ switch (target) {
+ case IPOPT_SSRR:
+ case IPOPT_LSRR:
+ if (!opt->srr)
+ break;
+ found = target == IPOPT_SSRR ? opt->is_strictroute :
+ !opt->is_strictroute;
+ if (found)
+ *offset = opt->srr + start;
+ break;
+ case IPOPT_RR:
+ if (!opt->rr)
+ break;
+ *offset = opt->rr + start;
+ found = true;
+ break;
+ case IPOPT_RA:
+ if (!opt->router_alert)
+ break;
+ *offset = opt->router_alert + start;
+ found = true;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return found ? target : -ENOENT;
+}
+
+static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ u32 *dest = &regs->data[priv->dreg];
+ struct sk_buff *skb = pkt->skb;
+ unsigned int offset;
+ int err;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ goto err;
+
+ err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type);
+ if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+ *dest = (err >= 0);
+ return;
+ } else if (err < 0) {
+ goto err;
+ }
+ offset += priv->offset;
+
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ goto err;
+ return;
+err:
+ regs->verdict.code = NFT_BREAK;
+}
+
static void *
nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
unsigned int len, void *buffer, unsigned int *tcphdr_len)
@@ -153,7 +250,8 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
return;
- if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
+ if (skb_ensure_writable(pkt->skb,
+ pkt->xt.thoff + i + priv->len))
return;
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
@@ -311,6 +409,28 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
return nft_validate_register_load(priv->sreg, priv->len);
}
+static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ int err = nft_exthdr_init(ctx, expr, tb);
+
+ if (err < 0)
+ return err;
+
+ switch (priv->type) {
+ case IPOPT_SSRR:
+ case IPOPT_LSRR:
+ case IPOPT_RR:
+ case IPOPT_RA:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
{
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
@@ -357,6 +477,14 @@ static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.dump = nft_exthdr_dump,
};
+static const struct nft_expr_ops nft_exthdr_ipv4_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_ipv4_eval,
+ .init = nft_exthdr_ipv4_init,
+ .dump = nft_exthdr_dump,
+};
+
static const struct nft_expr_ops nft_exthdr_tcp_ops = {
.type = &nft_exthdr_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
@@ -397,6 +525,12 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_ipv6_ops;
break;
+ case NFT_EXTHDR_OP_IPV4:
+ if (ctx->family != NFPROTO_IPV6) {
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_ipv4_ops;
+ }
+ break;
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index cb8547f97220..ca2ae4b95a8d 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -13,6 +13,7 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
void nft_immediate_eval(const struct nft_expr *expr,
struct nft_regs *regs,
@@ -124,6 +125,34 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
return 0;
}
+static int nft_immediate_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ struct flow_action_entry *entry;
+ const struct nft_data *data;
+
+ if (priv->dreg != NFT_REG_VERDICT)
+ return -EOPNOTSUPP;
+
+ entry = &flow->rule->action.entries[ctx->num_actions++];
+
+ data = &priv->data;
+ switch (data->verdict.code) {
+ case NF_ACCEPT:
+ entry->id = FLOW_ACTION_ACCEPT;
+ break;
+ case NF_DROP:
+ entry->id = FLOW_ACTION_DROP;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static const struct nft_expr_ops nft_imm_ops = {
.type = &nft_imm_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
@@ -133,6 +162,8 @@ static const struct nft_expr_ops nft_imm_ops = {
.deactivate = nft_immediate_deactivate,
.dump = nft_immediate_dump,
.validate = nft_immediate_validate,
+ .offload = nft_immediate_offload,
+ .offload_flags = NFT_OFFLOAD_F_ACTION,
};
struct nft_expr_type nft_imm_type __read_mostly = {
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index a54329b8634a..76866f77e343 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -21,23 +21,13 @@
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nft_meta.h>
+#include <net/netfilter/nf_tables_offload.h>
#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
-struct nft_meta {
- enum nft_meta_keys key:8;
- union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
- };
-};
-
static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
-#ifdef CONFIG_NF_TABLES_BRIDGE
-#include "../bridge/br_private.h"
-#endif
-
void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -47,9 +37,6 @@ void nft_meta_get_eval(const struct nft_expr *expr,
const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
struct sock *sk;
u32 *dest = &regs->data[priv->dreg];
-#ifdef CONFIG_NF_TABLES_BRIDGE
- const struct net_bridge_port *p;
-#endif
switch (priv->key) {
case NFT_META_LEN:
@@ -229,18 +216,6 @@ void nft_meta_get_eval(const struct nft_expr *expr,
nft_reg_store8(dest, secpath_exists(skb));
break;
#endif
-#ifdef CONFIG_NF_TABLES_BRIDGE
- case NFT_META_BRI_IIFNAME:
- if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
- goto err;
- strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
- return;
- case NFT_META_BRI_OIFNAME:
- if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
- goto err;
- strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
- return;
-#endif
case NFT_META_IIFKIND:
if (in == NULL || in->rtnl_link_ops == NULL)
goto err;
@@ -260,10 +235,11 @@ void nft_meta_get_eval(const struct nft_expr *expr,
err:
regs->verdict.code = NFT_BREAK;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_eval);
-static void nft_meta_set_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_meta_set_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
@@ -300,16 +276,18 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
WARN_ON(1);
}
}
+EXPORT_SYMBOL_GPL(nft_meta_set_eval);
-static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
[NFTA_META_DREG] = { .type = NLA_U32 },
[NFTA_META_KEY] = { .type = NLA_U32 },
[NFTA_META_SREG] = { .type = NLA_U32 },
};
+EXPORT_SYMBOL_GPL(nft_meta_policy);
-static int nft_meta_get_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+int nft_meta_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int len;
@@ -360,14 +338,6 @@ static int nft_meta_get_init(const struct nft_ctx *ctx,
len = sizeof(u8);
break;
#endif
-#ifdef CONFIG_NF_TABLES_BRIDGE
- case NFT_META_BRI_IIFNAME:
- case NFT_META_BRI_OIFNAME:
- if (ctx->family != NFPROTO_BRIDGE)
- return -EOPNOTSUPP;
- len = IFNAMSIZ;
- break;
-#endif
default:
return -EOPNOTSUPP;
}
@@ -376,6 +346,7 @@ static int nft_meta_get_init(const struct nft_ctx *ctx,
return nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, len);
}
+EXPORT_SYMBOL_GPL(nft_meta_get_init);
static int nft_meta_get_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
@@ -409,9 +380,9 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx,
#endif
}
-static int nft_meta_set_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+int nft_meta_set_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int hooks;
@@ -437,10 +408,11 @@ static int nft_meta_set_validate(const struct nft_ctx *ctx,
return nft_chain_validate_hooks(ctx->chain, hooks);
}
+EXPORT_SYMBOL_GPL(nft_meta_set_validate);
-static int nft_meta_set_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+int nft_meta_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int len;
@@ -475,9 +447,10 @@ static int nft_meta_set_init(const struct nft_ctx *ctx,
return 0;
}
+EXPORT_SYMBOL_GPL(nft_meta_set_init);
-static int nft_meta_get_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+int nft_meta_get_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -490,8 +463,9 @@ static int nft_meta_get_dump(struct sk_buff *skb,
nla_put_failure:
return -1;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_dump);
-static int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -505,15 +479,42 @@ static int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
nla_put_failure:
return -1;
}
+EXPORT_SYMBOL_GPL(nft_meta_set_dump);
-static void nft_meta_set_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+void nft_meta_set_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
if (priv->key == NFT_META_NFTRACE)
static_branch_dec(&nft_trace_enabled);
}
+EXPORT_SYMBOL_GPL(nft_meta_set_destroy);
+
+static int nft_meta_get_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+ struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+ switch (priv->key) {
+ case NFT_META_PROTOCOL:
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto,
+ sizeof(__u16), reg);
+ nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
+ break;
+ case NFT_META_L4PROTO:
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
+ sizeof(__u8), reg);
+ nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
@@ -522,6 +523,7 @@ static const struct nft_expr_ops nft_meta_get_ops = {
.init = nft_meta_get_init,
.dump = nft_meta_get_dump,
.validate = nft_meta_get_validate,
+ .offload = nft_meta_get_offload,
};
static const struct nft_expr_ops nft_meta_set_ops = {
@@ -544,6 +546,10 @@ nft_meta_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
return ERR_PTR(-EINVAL);
+#ifdef CONFIG_NF_TABLES_BRIDGE
+ if (ctx->family == NFPROTO_BRIDGE)
+ return ERR_PTR(-EAGAIN);
+#endif
if (tb[NFTA_META_DREG])
return &nft_meta_get_ops;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 680bd9f38a81..22a80eb60222 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -15,10 +15,13 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
/* For layer 4 checksum field offset. */
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmpv6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
/* add vlan header into the user buffer for if tag was removed by offloads */
static bool
@@ -150,12 +153,195 @@ nla_put_failure:
return -1;
}
+static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_payload *priv)
+{
+ struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+ switch (priv->offset) {
+ case offsetof(struct ethhdr, h_source):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
+ src, ETH_ALEN, reg);
+ break;
+ case offsetof(struct ethhdr, h_dest):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
+ dst, ETH_ALEN, reg);
+ break;
+ }
+
+ return 0;
+}
+
+static int nft_payload_offload_ip(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_payload *priv)
+{
+ struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+ switch (priv->offset) {
+ case offsetof(struct iphdr, saddr):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src,
+ sizeof(struct in_addr), reg);
+ break;
+ case offsetof(struct iphdr, daddr):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst,
+ sizeof(struct in_addr), reg);
+ break;
+ case offsetof(struct iphdr, protocol):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
+ sizeof(__u8), reg);
+ nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_payload *priv)
+{
+ struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+ switch (priv->offset) {
+ case offsetof(struct ipv6hdr, saddr):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src,
+ sizeof(struct in6_addr), reg);
+ break;
+ case offsetof(struct ipv6hdr, daddr):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst,
+ sizeof(struct in6_addr), reg);
+ break;
+ case offsetof(struct ipv6hdr, nexthdr):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
+ sizeof(__u8), reg);
+ nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int nft_payload_offload_nh(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_payload *priv)
+{
+ int err;
+
+ switch (ctx->dep.l3num) {
+ case htons(ETH_P_IP):
+ err = nft_payload_offload_ip(ctx, flow, priv);
+ break;
+ case htons(ETH_P_IPV6):
+ err = nft_payload_offload_ip6(ctx, flow, priv);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_payload *priv)
+{
+ struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+ switch (priv->offset) {
+ case offsetof(struct tcphdr, source):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
+ sizeof(__be16), reg);
+ break;
+ case offsetof(struct tcphdr, dest):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
+ sizeof(__be16), reg);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int nft_payload_offload_udp(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_payload *priv)
+{
+ struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
+
+ switch (priv->offset) {
+ case offsetof(struct udphdr, source):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
+ sizeof(__be16), reg);
+ break;
+ case offsetof(struct udphdr, dest):
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
+ sizeof(__be16), reg);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int nft_payload_offload_th(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_payload *priv)
+{
+ int err;
+
+ switch (ctx->dep.protonum) {
+ case IPPROTO_TCP:
+ err = nft_payload_offload_tcp(ctx, flow, priv);
+ break;
+ case IPPROTO_UDP:
+ err = nft_payload_offload_udp(ctx, flow, priv);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int nft_payload_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_payload *priv = nft_expr_priv(expr);
+ int err;
+
+ switch (priv->base) {
+ case NFT_PAYLOAD_LL_HEADER:
+ err = nft_payload_offload_ll(ctx, flow, priv);
+ break;
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ err = nft_payload_offload_nh(ctx, flow, priv);
+ break;
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+ err = nft_payload_offload_th(ctx, flow, priv);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+ return err;
+}
+
static const struct nft_expr_ops nft_payload_ops = {
.type = &nft_payload_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
.eval = nft_payload_eval,
.init = nft_payload_init,
.dump = nft_payload_dump,
+ .offload = nft_payload_offload,
};
const struct nft_expr_ops nft_payload_fast_ops = {
@@ -164,6 +350,7 @@ const struct nft_expr_ops nft_payload_fast_ops = {
.eval = nft_payload_eval,
.init = nft_payload_init,
.dump = nft_payload_dump,
+ .offload = nft_payload_offload,
};
static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum)
@@ -240,7 +427,7 @@ static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt,
tsum));
}
- if (!skb_make_writable(skb, l4csum_offset + sizeof(sum)) ||
+ if (skb_ensure_writable(skb, l4csum_offset + sizeof(sum)) ||
skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0)
return -1;
@@ -256,7 +443,7 @@ static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src,
return -1;
nft_csum_replace(&sum, fsum, tsum);
- if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
+ if (skb_ensure_writable(skb, csum_offset + sizeof(sum)) ||
skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
return -1;
@@ -309,7 +496,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
goto err;
}
- if (!skb_make_writable(skb, max(offset + priv->len, 0)) ||
+ if (skb_ensure_writable(skb, max(offset + priv->len, 0)) ||
skb_store_bits(skb, offset, src, priv->len) < 0)
goto err;
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
new file mode 100644
index 000000000000..80060ade8a5b
--- /dev/null
+++ b/net/netfilter/nft_synproxy.c
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/netlink.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_synproxy.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter/nf_synproxy.h>
+
+struct nft_synproxy {
+ struct nf_synproxy_info info;
+};
+
+static const struct nla_policy nft_synproxy_policy[NFTA_SYNPROXY_MAX + 1] = {
+ [NFTA_SYNPROXY_MSS] = { .type = NLA_U16 },
+ [NFTA_SYNPROXY_WSCALE] = { .type = NLA_U8 },
+ [NFTA_SYNPROXY_FLAGS] = { .type = NLA_U32 },
+};
+
+static void nft_synproxy_tcp_options(struct synproxy_options *opts,
+ const struct tcphdr *tcp,
+ struct synproxy_net *snet,
+ struct nf_synproxy_info *info,
+ struct nft_synproxy *priv)
+{
+ this_cpu_inc(snet->stats->syn_received);
+ if (tcp->ece && tcp->cwr)
+ opts->options |= NF_SYNPROXY_OPT_ECN;
+
+ opts->options &= priv->info.options;
+ if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_init_timestamp_cookie(info, opts);
+ else
+ opts->options &= ~(NF_SYNPROXY_OPT_WSCALE |
+ NF_SYNPROXY_OPT_SACK_PERM |
+ NF_SYNPROXY_OPT_ECN);
+}
+
+static void nft_synproxy_eval_v4(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt,
+ const struct tcphdr *tcp,
+ struct tcphdr *_tcph,
+ struct synproxy_options *opts)
+{
+ struct nft_synproxy *priv = nft_expr_priv(expr);
+ struct nf_synproxy_info info = priv->info;
+ struct net *net = nft_net(pkt);
+ struct synproxy_net *snet = synproxy_pernet(net);
+ struct sk_buff *skb = pkt->skb;
+
+ if (tcp->syn) {
+ /* Initial SYN from client */
+ nft_synproxy_tcp_options(opts, tcp, snet, &info, priv);
+ synproxy_send_client_synack(net, skb, tcp, opts);
+ consume_skb(skb);
+ regs->verdict.code = NF_STOLEN;
+ } else if (tcp->ack) {
+ /* ACK from client */
+ if (synproxy_recv_client_ack(net, skb, tcp, opts,
+ ntohl(tcp->seq))) {
+ consume_skb(skb);
+ regs->verdict.code = NF_STOLEN;
+ } else {
+ regs->verdict.code = NF_DROP;
+ }
+ }
+}
+
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+static void nft_synproxy_eval_v6(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt,
+ const struct tcphdr *tcp,
+ struct tcphdr *_tcph,
+ struct synproxy_options *opts)
+{
+ struct nft_synproxy *priv = nft_expr_priv(expr);
+ struct nf_synproxy_info info = priv->info;
+ struct net *net = nft_net(pkt);
+ struct synproxy_net *snet = synproxy_pernet(net);
+ struct sk_buff *skb = pkt->skb;
+
+ if (tcp->syn) {
+ /* Initial SYN from client */
+ nft_synproxy_tcp_options(opts, tcp, snet, &info, priv);
+ synproxy_send_client_synack_ipv6(net, skb, tcp, opts);
+ consume_skb(skb);
+ regs->verdict.code = NF_STOLEN;
+ } else if (tcp->ack) {
+ /* ACK from client */
+ if (synproxy_recv_client_ack_ipv6(net, skb, tcp, opts,
+ ntohl(tcp->seq))) {
+ consume_skb(skb);
+ regs->verdict.code = NF_STOLEN;
+ } else {
+ regs->verdict.code = NF_DROP;
+ }
+ }
+}
+#endif /* CONFIG_NF_TABLES_IPV6*/
+
+static void nft_synproxy_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct synproxy_options opts = {};
+ struct sk_buff *skb = pkt->skb;
+ int thoff = pkt->xt.thoff;
+ const struct tcphdr *tcp;
+ struct tcphdr _tcph;
+
+ if (pkt->tprot != IPPROTO_TCP) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
+ if (nf_ip_checksum(skb, nft_hook(pkt), thoff, IPPROTO_TCP)) {
+ regs->verdict.code = NF_DROP;
+ return;
+ }
+
+ tcp = skb_header_pointer(skb, pkt->xt.thoff,
+ sizeof(struct tcphdr),
+ &_tcph);
+ if (!tcp) {
+ regs->verdict.code = NF_DROP;
+ return;
+ }
+
+ if (!synproxy_parse_options(skb, thoff, tcp, &opts)) {
+ regs->verdict.code = NF_DROP;
+ return;
+ }
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ nft_synproxy_eval_v4(expr, regs, pkt, tcp, &_tcph, &opts);
+ return;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+ case htons(ETH_P_IPV6):
+ nft_synproxy_eval_v6(expr, regs, pkt, tcp, &_tcph, &opts);
+ return;
+#endif
+ }
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_synproxy_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct synproxy_net *snet = synproxy_pernet(ctx->net);
+ struct nft_synproxy *priv = nft_expr_priv(expr);
+ u32 flags;
+ int err;
+
+ if (tb[NFTA_SYNPROXY_MSS])
+ priv->info.mss = ntohs(nla_get_be16(tb[NFTA_SYNPROXY_MSS]));
+ if (tb[NFTA_SYNPROXY_WSCALE])
+ priv->info.wscale = nla_get_u8(tb[NFTA_SYNPROXY_WSCALE]);
+ if (tb[NFTA_SYNPROXY_FLAGS]) {
+ flags = ntohl(nla_get_be32(tb[NFTA_SYNPROXY_FLAGS]));
+ if (flags & ~NF_SYNPROXY_OPT_MASK)
+ return -EOPNOTSUPP;
+ priv->info.options = flags;
+ }
+
+ err = nf_ct_netns_get(ctx->net, ctx->family);
+ if (err)
+ return err;
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ err = nf_synproxy_ipv4_init(snet, ctx->net);
+ if (err)
+ goto nf_ct_failure;
+ break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+ case NFPROTO_IPV6:
+ err = nf_synproxy_ipv6_init(snet, ctx->net);
+ if (err)
+ goto nf_ct_failure;
+ break;
+#endif
+ case NFPROTO_INET:
+ case NFPROTO_BRIDGE:
+ err = nf_synproxy_ipv4_init(snet, ctx->net);
+ if (err)
+ goto nf_ct_failure;
+ err = nf_synproxy_ipv6_init(snet, ctx->net);
+ if (err)
+ goto nf_ct_failure;
+ break;
+ }
+
+ return 0;
+
+nf_ct_failure:
+ nf_ct_netns_put(ctx->net, ctx->family);
+ return err;
+}
+
+static void nft_synproxy_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct synproxy_net *snet = synproxy_pernet(ctx->net);
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ nf_synproxy_ipv4_fini(snet, ctx->net);
+ break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+ case NFPROTO_IPV6:
+ nf_synproxy_ipv6_fini(snet, ctx->net);
+ break;
+#endif
+ case NFPROTO_INET:
+ case NFPROTO_BRIDGE:
+ nf_synproxy_ipv4_fini(snet, ctx->net);
+ nf_synproxy_ipv6_fini(snet, ctx->net);
+ break;
+ }
+ nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_synproxy *priv = nft_expr_priv(expr);
+
+ if (nla_put_be16(skb, NFTA_SYNPROXY_MSS, htons(priv->info.mss)) ||
+ nla_put_u8(skb, NFTA_SYNPROXY_WSCALE, priv->info.wscale) ||
+ nla_put_be32(skb, NFTA_SYNPROXY_FLAGS, htonl(priv->info.options)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_synproxy_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD));
+}
+
+static struct nft_expr_type nft_synproxy_type;
+static const struct nft_expr_ops nft_synproxy_ops = {
+ .eval = nft_synproxy_eval,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_synproxy)),
+ .init = nft_synproxy_init,
+ .destroy = nft_synproxy_destroy,
+ .dump = nft_synproxy_dump,
+ .type = &nft_synproxy_type,
+ .validate = nft_synproxy_validate,
+};
+
+static struct nft_expr_type nft_synproxy_type __read_mostly = {
+ .ops = &nft_synproxy_ops,
+ .name = "synproxy",
+ .owner = THIS_MODULE,
+ .policy = nft_synproxy_policy,
+ .maxattr = NFTA_SYNPROXY_MAX,
+};
+
+static int __init nft_synproxy_module_init(void)
+{
+ return nft_register_expr(&nft_synproxy_type);
+}
+
+static void __exit nft_synproxy_module_exit(void)
+{
+ return nft_unregister_expr(&nft_synproxy_type);
+}
+
+module_init(nft_synproxy_module_init);
+module_exit(nft_synproxy_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>");
+MODULE_ALIAS_NFT_EXPR("synproxy");
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 06dc55590441..51b454d8fa9c 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -17,7 +17,8 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
case CHECKSUM_COMPLETE:
if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
break;
- if ((protocol == 0 && !csum_fold(skb->csum)) ||
+ if ((protocol != IPPROTO_TCP && protocol != IPPROTO_UDP &&
+ !csum_fold(skb->csum)) ||
!csum_tcpudp_magic(iph->saddr, iph->daddr,
skb->len - dataoff, protocol,
skb->csum)) {
@@ -26,7 +27,7 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
}
/* fall through */
case CHECKSUM_NONE:
- if (protocol == 0)
+ if (protocol != IPPROTO_TCP && protocol != IPPROTO_UDP)
skb->csum = 0;
else
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index b1054a3d18c5..eababc354ff1 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -31,7 +31,7 @@ dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
ipv4_change_dsfield(ip_hdr(skb),
@@ -49,7 +49,7 @@ dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par)
u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
- if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
return NF_DROP;
ipv6_change_dsfield(ipv6_hdr(skb),
@@ -79,7 +79,7 @@ tos_tg(struct sk_buff *skb, const struct xt_action_param *par)
nv = (orig & ~info->tos_mask) ^ info->tos_value;
if (orig != nv) {
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
iph = ip_hdr(skb);
ipv4_change_dsfield(iph, 0, nv);
@@ -99,7 +99,7 @@ tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
nv = (orig & ~info->tos_mask) ^ info->tos_value;
if (orig != nv) {
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
iph = ipv6_hdr(skb);
ipv6_change_dsfield(iph, 0, nv);
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 8221a5ce44bf..7873b834c300 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -29,7 +29,7 @@ ttl_tg(struct sk_buff *skb, const struct xt_action_param *par)
const struct ipt_TTL_info *info = par->targinfo;
int new_ttl;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, sizeof(*iph)))
return NF_DROP;
iph = ip_hdr(skb);
@@ -69,7 +69,7 @@ hl_tg6(struct sk_buff *skb, const struct xt_action_param *par)
const struct ip6t_HL_info *info = par->targinfo;
int new_hl;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, sizeof(*ip6h)))
return NF_DROP;
ip6h = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 0b3a1b291c91..122db9fbb9f4 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -86,7 +86,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
if (par->fragoff != 0)
return 0;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return -1;
len = skb->len - tcphoff;
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 666f4ca9b15f..30e99464171b 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -28,33 +28,33 @@ static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset)
static unsigned int
tcpoptstrip_mangle_packet(struct sk_buff *skb,
const struct xt_action_param *par,
- unsigned int tcphoff, unsigned int minlen)
+ unsigned int tcphoff)
{
const struct xt_tcpoptstrip_target_info *info = par->targinfo;
+ struct tcphdr *tcph, _th;
unsigned int optl, i, j;
- struct tcphdr *tcph;
u_int16_t n, o;
u_int8_t *opt;
- int len, tcp_hdrlen;
+ int tcp_hdrlen;
/* This is a fragment, no TCP header is available */
if (par->fragoff != 0)
return XT_CONTINUE;
- if (!skb_make_writable(skb, skb->len))
+ tcph = skb_header_pointer(skb, tcphoff, sizeof(_th), &_th);
+ if (!tcph)
return NF_DROP;
- len = skb->len - tcphoff;
- if (len < (int)sizeof(struct tcphdr))
- return NF_DROP;
-
- tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
tcp_hdrlen = tcph->doff * 4;
+ if (tcp_hdrlen < sizeof(struct tcphdr))
+ return NF_DROP;
- if (len < tcp_hdrlen)
+ if (skb_ensure_writable(skb, tcphoff + tcp_hdrlen))
return NF_DROP;
- opt = (u_int8_t *)tcph;
+ /* must reload tcph, might have been moved */
+ tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+ opt = (u8 *)tcph;
/*
* Walk through all TCP options - if we find some option to remove,
@@ -88,8 +88,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
static unsigned int
tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
- return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb),
- sizeof(struct iphdr) + sizeof(struct tcphdr));
+ return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb));
}
#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
@@ -106,8 +105,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
if (tcphoff < 0)
return NF_DROP;
- return tcpoptstrip_mangle_packet(skb, par, tcphoff,
- sizeof(*ipv6h) + sizeof(struct tcphdr));
+ return tcpoptstrip_mangle_packet(skb, par, tcphoff);
}
#endif
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 140ce6be639a..0c9e014e30b4 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -2,7 +2,7 @@
/*
* xt_iprange - Netfilter module to match IP address ranges
*
- * (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * (C) 2003 Jozsef Kadlecsik <kadlec@netfilter.org>
* (C) CC Computer Consultants GmbH, 2008
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -130,7 +130,7 @@ static void __exit iprange_mt_exit(void)
module_init(iprange_mt_init);
module_exit(iprange_mt_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
MODULE_ALIAS("ipt_iprange");
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 95f64a99e425..e85ce69924ae 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -22,6 +22,9 @@ static int owner_check(const struct xt_mtchk_param *par)
struct xt_owner_match_info *info = par->matchinfo;
struct net *net = par->net;
+ if (info->match & ~XT_OWNER_MASK)
+ return -EINVAL;
+
/* Only allow the common case where the userns of the writer
* matches the userns of the network namespace.
*/
@@ -88,11 +91,28 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
}
if (info->match & XT_OWNER_GID) {
+ unsigned int i, match = false;
kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
- if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
- gid_lte(filp->f_cred->fsgid, gid_max)) ^
- !(info->invert & XT_OWNER_GID))
+ struct group_info *gi = filp->f_cred->group_info;
+
+ if (gid_gte(filp->f_cred->fsgid, gid_min) &&
+ gid_lte(filp->f_cred->fsgid, gid_max))
+ match = true;
+
+ if (!match && (info->match & XT_OWNER_SUPPL_GROUPS) && gi) {
+ for (i = 0; i < gi->ngroups; ++i) {
+ kgid_t group = gi->gid[i];
+
+ if (gid_gte(group, gid_min) &&
+ gid_lte(group, gid_max)) {
+ match = true;
+ break;
+ }
+ }
+ }
+
+ if (match ^ !(info->invert & XT_OWNER_GID))
return false;
}
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index f099228cb9c4..ecbfa291fb70 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -2,7 +2,7 @@
/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
* Patrick Schaaf <bof@bof.de>
* Martin Josefsson <gandalf@wlug.westbo.se>
- * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
*/
/* Kernel module which implements the set match and SET target
@@ -18,7 +18,7 @@
#include <uapi/linux/netfilter/xt_set.h>
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
MODULE_DESCRIPTION("Xtables: IP set match and target module");
MODULE_ALIAS("xt_SET");
MODULE_ALIAS("ipt_set");
@@ -436,6 +436,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
{
const struct xt_set_info_target_v3 *info = par->targinfo;
ip_set_id_t index;
+ int ret = 0;
if (info->add_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net,
@@ -453,17 +454,16 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
if (index == IPSET_INVALID_ID) {
pr_info_ratelimited("Cannot find del_set index %u as target\n",
info->del_set.index);
- if (info->add_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(par->net,
- info->add_set.index);
- return -ENOENT;
+ ret = -ENOENT;
+ goto cleanup_add;
}
}
if (info->map_set.index != IPSET_INVALID_ID) {
if (strncmp(par->table, "mangle", 7)) {
pr_info_ratelimited("--map-set only usable from mangle table\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto cleanup_del;
}
if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
(info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
@@ -471,20 +471,16 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
1 << NF_INET_LOCAL_OUT |
1 << NF_INET_POST_ROUTING))) {
pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto cleanup_del;
}
index = ip_set_nfnl_get_byindex(par->net,
info->map_set.index);
if (index == IPSET_INVALID_ID) {
pr_info_ratelimited("Cannot find map_set index %u as target\n",
info->map_set.index);
- if (info->add_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(par->net,
- info->add_set.index);
- if (info->del_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(par->net,
- info->del_set.index);
- return -ENOENT;
+ ret = -ENOENT;
+ goto cleanup_del;
}
}
@@ -492,16 +488,21 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
info->del_set.dim > IPSET_DIM_MAX ||
info->map_set.dim > IPSET_DIM_MAX) {
pr_info_ratelimited("SET target dimension over the limit!\n");
- if (info->add_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(par->net, info->add_set.index);
- if (info->del_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(par->net, info->del_set.index);
- if (info->map_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(par->net, info->map_set.index);
- return -ERANGE;
+ ret = -ERANGE;
+ goto cleanup_mark;
}
return 0;
+cleanup_mark:
+ if (info->map_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->map_set.index);
+cleanup_del:
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->del_set.index);
+cleanup_add:
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->add_set.index);
+ return ret;
}
static void