summaryrefslogtreecommitdiffstats
path: root/net/netfilter/nf_conntrack_core.c
diff options
context:
space:
mode:
authorDavid S. Miller2018-07-21 07:28:28 +0200
committerDavid S. Miller2018-07-21 07:28:28 +0200
commit99d20a461c43556242a4e1b65e309f1c1fadea4f (patch)
tree3e8db4f96e048c35500d802a4933da1d8a7e29d3 /net/netfilter/nf_conntrack_core.c
parentMerge ra.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux (diff)
parentnetfilter: nf_osf: add missing definitions to header file (diff)
downloadkernel-qcow2-linux-99d20a461c43556242a4e1b65e309f1c1fadea4f.tar.gz
kernel-qcow2-linux-99d20a461c43556242a4e1b65e309f1c1fadea4f.tar.xz
kernel-qcow2-linux-99d20a461c43556242a4e1b65e309f1c1fadea4f.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter/IPVS updates for net-next The following patchset contains Netfilter/IPVS updates for your net-next tree: 1) No need to set ttl from reject action for the bridge family, from Taehee Yoo. 2) Use a fixed timeout for flow that are passed up from the flowtable to conntrack, from Florian Westphal. 3) More preparation patches for tproxy support for nf_tables, from Mate Eckl. 4) Remove unnecessary indirection in core IPv6 checksum function, from Florian Westphal. 5) Use nf_ct_get_tuplepr() from openvswitch, instead of opencoding it. From Florian Westphal. 6) socket match now selects socket infrastructure, instead of depending on it. From Mate Eckl. 7) Patch series to simplify conntrack tuple building/parsing from packet path and ctnetlink, from Florian Westphal. 8) Fetch timeout policy from protocol helpers, instead of doing it from core, from Florian Westphal. 9) Merge IPv4 and IPv6 protocol trackers into conntrack core, from Florian Westphal. 10) Depend on CONFIG_NF_TABLES_IPV6 and CONFIG_IP6_NF_IPTABLES respectively, instead of IPV6. Patch from Mate Eckl. 11) Add specific function for garbage collection in conncount, from Yi-Hung Wei. 12) Catch number of elements in the connlimit list, from Yi-Hung Wei. 13) Move locking to nf_conncount, from Yi-Hung Wei. 14) Series of patches to add lockless tree traversal in nf_conncount, from Yi-Hung Wei. 15) Resolve clash in matching conntracks when race happens, from Martynas Pumputis. 16) If connection entry times out, remove template entry from the ip_vs_conn_tab table to improve behaviour under flood, from Julian Anastasov. 17) Remove useless parameter from nf_ct_helper_ext_add(), from Gao feng. 18) Call abort from 2-phase commit protocol before requesting modules, make sure this is done under the mutex, from Florian Westphal. 19) Grab module reference when starting transaction, also from Florian. 20) Dynamically allocate expression info array for pre-parsing, from Florian. 21) Add per netns mutex for nf_tables, from Florian Westphal. 22) A couple of patches to simplify and refactor nf_osf code to prepare for nft_osf support. 23) Break evaluation on missing socket, from Mate Eckl. 24) Allow to match socket mark from nft_socket, from Mate Eckl. 25) Remove dependency on nf_defrag_ipv6, now that IPv6 tracker is built-in into nf_conntrack. From Florian Westphal. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter/nf_conntrack_core.c')
-rw-r--r--net/netfilter/nf_conntrack_core.c252
1 files changed, 185 insertions, 67 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 805500197c22..8a113ca1eea2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -37,7 +37,6 @@
#include <linux/rculist_nulls.h>
#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
@@ -55,6 +54,7 @@
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
+#include <net/ip.h>
#include "nf_internals.h"
@@ -222,7 +222,7 @@ static u32 hash_conntrack(const struct net *net,
return scale_hash(hash_conntrack_raw(tuple, net));
}
-bool
+static bool
nf_ct_get_tuple(const struct sk_buff *skb,
unsigned int nhoff,
unsigned int dataoff,
@@ -230,37 +230,151 @@ nf_ct_get_tuple(const struct sk_buff *skb,
u_int8_t protonum,
struct net *net,
struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
+ unsigned int size;
+ const __be32 *ap;
+ __be32 _addrs[8];
+ struct {
+ __be16 sport;
+ __be16 dport;
+ } _inet_hdr, *inet_hdr;
+
memset(tuple, 0, sizeof(*tuple));
tuple->src.l3num = l3num;
- if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
+ switch (l3num) {
+ case NFPROTO_IPV4:
+ nhoff += offsetof(struct iphdr, saddr);
+ size = 2 * sizeof(__be32);
+ break;
+ case NFPROTO_IPV6:
+ nhoff += offsetof(struct ipv6hdr, saddr);
+ size = sizeof(_addrs);
+ break;
+ default:
+ return true;
+ }
+
+ ap = skb_header_pointer(skb, nhoff, size, _addrs);
+ if (!ap)
return false;
+ switch (l3num) {
+ case NFPROTO_IPV4:
+ tuple->src.u3.ip = ap[0];
+ tuple->dst.u3.ip = ap[1];
+ break;
+ case NFPROTO_IPV6:
+ memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
+ memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
+ break;
+ }
+
tuple->dst.protonum = protonum;
tuple->dst.dir = IP_CT_DIR_ORIGINAL;
- return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
+ if (unlikely(l4proto->pkt_to_tuple))
+ return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
+
+ /* Actually only need first 4 bytes to get ports. */
+ inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
+ if (!inet_hdr)
+ return false;
+
+ tuple->src.u.udp.port = inet_hdr->sport;
+ tuple->dst.u.udp.port = inet_hdr->dport;
+ return true;
+}
+
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ u_int8_t *protonum)
+{
+ int dataoff = -1;
+ const struct iphdr *iph;
+ struct iphdr _iph;
+
+ iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ if (!iph)
+ return -1;
+
+ /* Conntrack defragments packets, we might still see fragments
+ * inside ICMP packets though.
+ */
+ if (iph->frag_off & htons(IP_OFFSET))
+ return -1;
+
+ dataoff = nhoff + (iph->ihl << 2);
+ *protonum = iph->protocol;
+
+ /* Check bogus IP headers */
+ if (dataoff > skb->len) {
+ pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
+ nhoff, iph->ihl << 2, skb->len);
+ return -1;
+ }
+ return dataoff;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ u8 *protonum)
+{
+ int protoff = -1;
+ unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
+ __be16 frag_off;
+ u8 nexthdr;
+
+ if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
+ &nexthdr, sizeof(nexthdr)) != 0) {
+ pr_debug("can't get nexthdr\n");
+ return -1;
+ }
+ protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
+ /*
+ * (protoff == skb->len) means the packet has not data, just
+ * IPv6 and possibly extensions headers, but it is tracked anyway
+ */
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("can't find proto in pkt\n");
+ return -1;
+ }
+
+ *protonum = nexthdr;
+ return protoff;
+}
+#endif
+
+static int get_l4proto(const struct sk_buff *skb,
+ unsigned int nhoff, u8 pf, u8 *l4num)
+{
+ switch (pf) {
+ case NFPROTO_IPV4:
+ return ipv4_get_l4proto(skb, nhoff, l4num);
+#if IS_ENABLED(CONFIG_IPV6)
+ case NFPROTO_IPV6:
+ return ipv6_get_l4proto(skb, nhoff, l4num);
+#endif
+ default:
+ *l4num = 0;
+ break;
+ }
+ return -1;
}
-EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
u_int16_t l3num,
struct net *net, struct nf_conntrack_tuple *tuple)
{
- const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
- unsigned int protoff;
- u_int8_t protonum;
+ u8 protonum;
+ int protoff;
int ret;
rcu_read_lock();
- l3proto = __nf_ct_l3proto_find(l3num);
- ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
- if (ret != NF_ACCEPT) {
+ protoff = get_l4proto(skb, nhoff, l3num, &protonum);
+ if (protoff <= 0) {
rcu_read_unlock();
return false;
}
@@ -268,7 +382,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
l4proto = __nf_ct_l4proto_find(l3num, protonum);
ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
- l3proto, l4proto);
+ l4proto);
rcu_read_unlock();
return ret;
@@ -278,19 +392,35 @@ EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
bool
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig,
- const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
memset(inverse, 0, sizeof(*inverse));
inverse->src.l3num = orig->src.l3num;
- if (l3proto->invert_tuple(inverse, orig) == 0)
- return false;
+
+ switch (orig->src.l3num) {
+ case NFPROTO_IPV4:
+ inverse->src.u3.ip = orig->dst.u3.ip;
+ inverse->dst.u3.ip = orig->src.u3.ip;
+ break;
+ case NFPROTO_IPV6:
+ inverse->src.u3.in6 = orig->dst.u3.in6;
+ inverse->dst.u3.in6 = orig->src.u3.in6;
+ break;
+ default:
+ break;
+ }
inverse->dst.dir = !orig->dst.dir;
inverse->dst.protonum = orig->dst.protonum;
- return l4proto->invert_tuple(inverse, orig);
+
+ if (unlikely(l4proto->invert_tuple))
+ return l4proto->invert_tuple(inverse, orig);
+
+ inverse->src.u.all = orig->dst.u.all;
+ inverse->dst.u.all = orig->src.u.all;
+ return true;
}
EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
@@ -502,6 +632,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
+static inline bool
+nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
+{
+ return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) &&
+ nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple,
+ &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) &&
+ nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) &&
+ nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) &&
+ net_eq(nf_ct_net(ct1), nf_ct_net(ct2));
+}
+
/* caller must hold rcu readlock and none of the nf_conntrack_locks */
static void nf_ct_gc_expired(struct nf_conn *ct)
{
@@ -695,19 +837,21 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
const struct nf_conntrack_l4proto *l4proto;
+ enum ip_conntrack_info oldinfo;
+ struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
- ((ct->status & IPS_NAT_DONE_MASK) == 0) &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
- enum ip_conntrack_info oldinfo;
- struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
-
- nf_ct_acct_merge(ct, ctinfo, loser_ct);
- nf_conntrack_put(&loser_ct->ct_general);
- nf_ct_set(skb, ct, oldinfo);
- return NF_ACCEPT;
+ if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
+ nf_ct_match(ct, loser_ct)) {
+ nf_ct_acct_merge(ct, ctinfo, loser_ct);
+ nf_conntrack_put(&loser_ct->ct_general);
+ nf_ct_set(skb, ct, oldinfo);
+ return NF_ACCEPT;
+ }
+ nf_ct_put(ct);
}
NF_CT_STAT_INC(net, drop);
return NF_DROP;
@@ -1195,7 +1339,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff, u32 hash)
@@ -1208,9 +1351,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_zone *zone;
struct nf_conn_timeout *timeout_ext;
struct nf_conntrack_zone tmp;
- unsigned int *timeouts;
- if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
+ if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
pr_debug("Can't invert tuple.\n");
return NULL;
}
@@ -1227,15 +1369,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
}
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
- if (timeout_ext) {
- timeouts = nf_ct_timeout_data(timeout_ext);
- if (unlikely(!timeouts))
- timeouts = l4proto->get_timeouts(net);
- } else {
- timeouts = l4proto->get_timeouts(net);
- }
- if (!l4proto->new(ct, skb, dataoff, timeouts)) {
+ if (!l4proto->new(ct, skb, dataoff)) {
nf_conntrack_free(ct);
pr_debug("can't track with proto module\n");
return NULL;
@@ -1266,8 +1401,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
ct->master = exp->master;
if (exp->helper) {
- help = nf_ct_helper_ext_add(ct, exp->helper,
- GFP_ATOMIC);
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help)
rcu_assign_pointer(help->helper, exp->helper);
}
@@ -1307,7 +1441,6 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
- const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
@@ -1319,8 +1452,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
- dataoff, l3num, protonum, net, &tuple, l3proto,
- l4proto)) {
+ dataoff, l3num, protonum, net, &tuple, l4proto)) {
pr_debug("Can't get tuple\n");
return 0;
}
@@ -1330,7 +1462,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
hash = hash_conntrack_raw(&tuple, net);
h = __nf_conntrack_find_get(net, zone, &tuple, hash);
if (!h) {
- h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
+ h = init_conntrack(net, tmpl, &tuple, l4proto,
skb, dataoff, hash);
if (!h)
return 0;
@@ -1363,14 +1495,11 @@ unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
- const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
- unsigned int *timeouts;
- unsigned int dataoff;
u_int8_t protonum;
- int ret;
+ int dataoff, ret;
tmpl = nf_ct_get(skb, &ctinfo);
if (tmpl || ctinfo == IP_CT_UNTRACKED) {
@@ -1384,14 +1513,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
}
/* rcu_read_lock()ed by nf_hook_thresh */
- l3proto = __nf_ct_l3proto_find(pf);
- ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
- &dataoff, &protonum);
- if (ret <= 0) {
+ dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
+ if (dataoff <= 0) {
pr_debug("not prepared to track yet or error occurred\n");
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
- ret = -ret;
+ ret = NF_ACCEPT;
goto out;
}
@@ -1413,8 +1540,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
goto out;
}
repeat:
- ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
- l3proto, l4proto);
+ ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
if (ret < 0) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
@@ -1430,10 +1556,7 @@ repeat:
goto out;
}
- /* Decide what timeout policy we want to apply to this flow. */
- timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
-
- ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
@@ -1471,7 +1594,6 @@ bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
rcu_read_lock();
ret = nf_ct_invert_tuple(inverse, orig,
- __nf_ct_l3proto_find(orig->src.l3num),
__nf_ct_l4proto_find(orig->src.l3num,
orig->dst.protonum));
rcu_read_unlock();
@@ -1609,14 +1731,14 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
{
- const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct nf_nat_hook *nat_hook;
- unsigned int dataoff, status;
+ unsigned int status;
struct nf_conn *ct;
+ int dataoff;
u16 l3num;
u8 l4num;
@@ -1625,16 +1747,15 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
return 0;
l3num = nf_ct_l3num(ct);
- l3proto = nf_ct_l3proto_find_get(l3num);
- if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
- &l4num) <= 0)
+ dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
+ if (dataoff <= 0)
return -1;
l4proto = nf_ct_l4proto_find_get(l3num, l4num);
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
- l4num, net, &tuple, l3proto, l4proto))
+ l4num, net, &tuple, l4proto))
return -1;
if (ct->status & IPS_SRC_NAT) {
@@ -2089,9 +2210,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
}
EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
-module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
- &nf_conntrack_htable_size, 0600);
-
static __always_inline unsigned int total_extension_size(void)
{
/* remember to add new extensions below */