From 66293c46c9314d2b3e80be829a48fed17a848146 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 12 Apr 2019 11:09:25 +0200 Subject: netfilter: nf_tables: delay chain policy update until transaction is complete When we process a long ruleset of the form chain input { type filter hook input priority filter; policy drop; ... } Then the base chain gets registered early on, we then continue to process/validate the next messages coming in the same transaction. Problem is that if the base chain policy is 'drop', it will take effect immediately, which causes all traffic to get blocked until the transaction completes or is aborted. Fix this by deferring the policy until the transaction has been processed and all of the rules have been flagged as active. Reported-by: Jann Haber Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 50 +++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 11 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1606eaa5ae0d..f2e12ae50544 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -214,33 +214,33 @@ static int nft_deltable(struct nft_ctx *ctx) return err; } -static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); if (trans == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWCHAIN) nft_activate_next(ctx->net, ctx->chain); list_add_tail(&trans->list, &ctx->net->nft.commit_list); - return 0; + return trans; } static int nft_delchain(struct nft_ctx *ctx) { - int err; + struct nft_trans *trans; - err = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN); - if (err < 0) - return err; + trans = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN); + if (IS_ERR(trans)) + return PTR_ERR(trans); ctx->table->use--; nft_deactivate_next(ctx->net, ctx->chain); - return err; + return 0; } static void nft_rule_expr_activate(const struct nft_ctx *ctx, @@ -1615,6 +1615,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_base_chain *basechain; struct nft_stats __percpu *stats; struct net *net = ctx->net; + struct nft_trans *trans; struct nft_chain *chain; struct nft_rule **rules; int err; @@ -1662,7 +1663,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, ops->dev = hook.dev; chain->flags |= NFT_BASE_CHAIN; - basechain->policy = policy; + basechain->policy = NF_ACCEPT; } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) @@ -1698,13 +1699,18 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err) goto err2; - err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); - if (err < 0) { + trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); rhltable_remove(&table->chains_ht, &chain->rhlhead, nft_chain_ht_params); goto err2; } + nft_trans_chain_policy(trans) = -1; + if (nft_is_base_chain(chain)) + nft_trans_chain_policy(trans) = policy; + table->use++; list_add_tail_rcu(&chain->list, &table->chains); @@ -6311,6 +6317,27 @@ static int nf_tables_validate(struct net *net) return 0; } +/* a drop policy has to be deferred until all rules have been activated, + * otherwise a large ruleset that contains a drop-policy base chain will + * cause all packets to get dropped until the full transaction has been + * processed. + * + * We defer the drop policy until the transaction has been finalized. + */ +static void nft_chain_commit_drop_policy(struct nft_trans *trans) +{ + struct nft_base_chain *basechain; + + if (nft_trans_chain_policy(trans) != NF_DROP) + return; + + if (!nft_is_base_chain(trans->ctx.chain)) + return; + + basechain = nft_base_chain(trans->ctx.chain); + basechain->policy = NF_DROP; +} + static void nft_chain_commit_update(struct nft_trans *trans) { struct nft_base_chain *basechain; @@ -6632,6 +6659,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); /* trans destroyed after rcu grace period */ } else { + nft_chain_commit_drop_policy(trans); nft_clear(net, trans->ctx.chain); nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); nft_trans_destroy(trans); -- cgit v1.2.3-55-g7522 From 270a8a297f42ecff82060aaa53118361f09c1f7d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Apr 2019 11:54:56 +0200 Subject: netfilter: nft_flow_offload: add entry to flowtable after confirmation This is fixing flow offload for UDP traffic where packets only follow one single direction. The flow_offload_fixup_tcp() mechanism works fine in case that the offloaded entry remains in SYN_RECV state, given sequence tracking is reset and that conntrack handles syn+ack packets as a retransmission, ie. sES + synack => sIG for reply traffic. Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 6e6b9adf7d38..8968c7f5a72e 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -94,8 +94,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (help) goto out; - if (ctinfo == IP_CT_NEW || - ctinfo == IP_CT_RELATED) + if (!nf_ct_is_confirmed(ct)) goto out; if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) -- cgit v1.2.3-55-g7522 From 26a302afbe328ecb7507cae2035d938e6635131b Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 30 Apr 2019 01:55:29 +0900 Subject: netfilter: nf_flow_table: fix netdev refcnt leak flow_offload_alloc() calls nf_route() to get a dst_entry. Internally, nf_route() calls ip_route_output_key() that allocates a dst_entry and holds it. So, a dst_entry should be released by dst_release() if nf_route() is successful. Otherwise, netns exit routine cannot be finished and the following message is printed: [ 257.490952] unregister_netdevice: waiting for lo to become free. Usage count = 1 Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 8968c7f5a72e..69d7a8439c7a 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -112,6 +112,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (ret < 0) goto err_flow_add; + dst_release(route.tuple[!dir].dst); return; err_flow_add: -- cgit v1.2.3-55-g7522 From 33cc3c0cfa64c86b6c4bbee86997aea638534931 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 30 Apr 2019 01:55:54 +0900 Subject: netfilter: nf_flow_table: check ttl value in flow offload data path nf_flow_offload_ip_hook() and nf_flow_offload_ipv6_hook() do not check ttl value. So, ttl value overflow may occur. Fixes: 97add9f0d66d ("netfilter: flow table support for IPv4") Fixes: 0995210753a2 ("netfilter: flow table support for IPv6") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 1d291a51cd45..46022a2867d7 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -181,6 +181,9 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, iph->protocol != IPPROTO_UDP) return -1; + if (iph->ttl <= 1) + return -1; + thoff = iph->ihl * 4; if (!pskb_may_pull(skb, thoff + sizeof(*ports))) return -1; @@ -411,6 +414,9 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, ip6h->nexthdr != IPPROTO_UDP) return -1; + if (ip6h->hop_limit <= 1) + return -1; + thoff = sizeof(*ip6h); if (!pskb_may_pull(skb, thoff + sizeof(*ports))) return -1; -- cgit v1.2.3-55-g7522 From f5e85ce8e733c2547827f6268136b70b802eabdb Mon Sep 17 00:00:00 2001 From: Jakub Jankowski Date: Thu, 25 Apr 2019 23:46:50 +0200 Subject: netfilter: nf_conntrack_h323: restore boundary check correctness Since commit bc7d811ace4a ("netfilter: nf_ct_h323: Convert CHECK_BOUND macro to function"), NAT traversal for H.323 doesn't work, failing to parse H323-UserInformation. nf_h323_error_boundary() compares contents of the bitstring, not the addresses, preventing valid H.323 packets from being conntrack'd. This looks like an oversight from when CHECK_BOUND macro was converted to a function. To fix it, stop dereferencing bs->cur and bs->end. Fixes: bc7d811ace4a ("netfilter: nf_ct_h323: Convert CHECK_BOUND macro to function") Signed-off-by: Jakub Jankowski Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_asn1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 1601275efe2d..4c2ef42e189c 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -172,7 +172,7 @@ static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits) if (bits % BITS_PER_BYTE > 0) bytes++; - if (*bs->cur + bytes > *bs->end) + if (bs->cur + bytes > bs->end) return 1; return 0; -- cgit v1.2.3-55-g7522 From edbd82c5fba009f68d20b5db585be1e667c605f6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Apr 2019 14:33:22 +0200 Subject: netfilter: nf_tables: fix base chain stat rcu_dereference usage Following splat gets triggered when nfnetlink monitor is running while xtables-nft selftests are running: net/netfilter/nf_tables_api.c:1272 suspicious rcu_dereference_check() usage! other info that might help us debug this: 1 lock held by xtables-nft-mul/27006: #0: 00000000e0f85be9 (&net->nft.commit_mutex){+.+.}, at: nf_tables_valid_genid+0x1a/0x50 Call Trace: nf_tables_fill_chain_info.isra.45+0x6cc/0x6e0 nf_tables_chain_notify+0xf8/0x1a0 nf_tables_commit+0x165c/0x1740 nf_tables_fill_chain_info() can be called both from dumps (rcu read locked) or from the transaction path if a userspace process subscribed to nftables notifications. In the 'table dump' case, rcu_access_pointer() cannot be used: We do not hold transaction mutex so the pointer can be NULLed right after the check. Just unconditionally fetch the value, then have the helper return immediately if its NULL. In the notification case we don't hold the rcu read lock, but updates are prevented due to transaction mutex. Use rcu_dereference_check() to make lockdep aware of this. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f2e12ae50544..e4f6ecac48c3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1190,6 +1190,9 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) u64 pkts, bytes; int cpu; + if (!stats) + return 0; + memset(&total, 0, sizeof(total)); for_each_possible_cpu(cpu) { cpu_stats = per_cpu_ptr(stats, cpu); @@ -1247,6 +1250,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nft_is_base_chain(chain)) { const struct nft_base_chain *basechain = nft_base_chain(chain); const struct nf_hook_ops *ops = &basechain->ops; + struct nft_stats __percpu *stats; struct nlattr *nest; nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); @@ -1268,8 +1272,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) goto nla_put_failure; - if (rcu_access_pointer(basechain->stats) && - nft_dump_stats(skb, rcu_dereference(basechain->stats))) + stats = rcu_dereference_check(basechain->stats, + lockdep_commit_lock_is_held(net)); + if (nft_dump_stats(skb, stats)) goto nla_put_failure; } -- cgit v1.2.3-55-g7522 From 43c8f131184faf20c07221f3e09724611c6525d8 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 3 May 2019 01:56:38 +0900 Subject: netfilter: nf_flow_table: fix missing error check for rhashtable_insert_fast rhashtable_insert_fast() may return an error value when memory allocation fails, but flow_offload_add() does not check for errors. This patch just adds missing error checking. Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 7aabfd4b1e50..a9e4f74b1ff6 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -185,14 +185,25 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = { int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) { - flow->timeout = (u32)jiffies; + int err; - rhashtable_insert_fast(&flow_table->rhashtable, - &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, - nf_flow_offload_rhash_params); - rhashtable_insert_fast(&flow_table->rhashtable, - &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, - nf_flow_offload_rhash_params); + err = rhashtable_insert_fast(&flow_table->rhashtable, + &flow->tuplehash[0].node, + nf_flow_offload_rhash_params); + if (err < 0) + return err; + + err = rhashtable_insert_fast(&flow_table->rhashtable, + &flow->tuplehash[1].node, + nf_flow_offload_rhash_params); + if (err < 0) { + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[0].node, + nf_flow_offload_rhash_params); + return err; + } + + flow->timeout = (u32)jiffies; return 0; } EXPORT_SYMBOL_GPL(flow_offload_add); -- cgit v1.2.3-55-g7522 From f8e608982022fad035160870f5b06086d3cba54d Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Fri, 3 May 2019 17:40:07 +0200 Subject: netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression Commit 59c08c69c278 ("netfilter: ctnetlink: Support L3 protocol-filter on flush") introduced a user-space regression when flushing connection track entries. Before this commit, the nfgen_family field was not used by the kernel and all entries were removed. Since this commit, nfgen_family is used to filter out entries that should not be removed. One example a broken tool is conntrack. conntrack always sets nfgen_family to AF_INET, so after 59c08c69c278 only IPv4 entries were removed with the -F parameter. Pablo Neira Ayuso suggested using nfgenmsg->version to resolve the regression, and this commit implements his suggestion. nfgenmsg->version is so far set to zero, so it is well-suited to be used as a flag for selecting old or new flush behavior. If version is 0, nfgen_family is ignored and all entries are used. If user-space sets the version to one (or any other value than 0), then the new behavior is used. As version only can have two valid values, I chose not to add a new NFNETLINK_VERSION-constant. Fixes: 59c08c69c278 ("netfilter: ctnetlink: Support L3 protocol-filter on flush") Reported-by: Nicolas Dichtel Suggested-by: Pablo Neira Ayuso Signed-off-by: Kristian Evensen Tested-by: Nicolas Dichtel Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d7f61b0547c6..d2715b4d2e72 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1254,7 +1254,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, struct nf_conntrack_tuple tuple; struct nf_conn *ct; struct nfgenmsg *nfmsg = nlmsg_data(nlh); - u_int8_t u3 = nfmsg->nfgen_family; + u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC; struct nf_conntrack_zone zone; int err; -- cgit v1.2.3-55-g7522 From b33c448c4f920d5399acea9ccbb508baec272f6f Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 3 May 2019 12:39:08 -0600 Subject: netfilter: nf_conntrack_h323: Remove deprecated config check CONFIG_NF_CONNTRACK_IPV6 has been deprecated so replace it with a check for IPV6 instead. Use nf_ip6_route6() instead of v6ops->route() and keep the IS_MODULE() in nf_ipv6_ops as mentioned by Florian so that direct calls are used when IPV6 is builtin and indirect calls are used only when IPV6 is a module. Fixes: a0ae2562c6c4b2 ("netfilter: conntrack: remove l3proto abstraction") Signed-off-by: Subash Abhinov Kasiviswanathan Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_main.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 005589c6d0f6..12de40390e97 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -748,24 +748,19 @@ static int callforward_do_filter(struct net *net, } break; } -#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { - const struct nf_ipv6_ops *v6ops; struct rt6_info *rt1, *rt2; struct flowi6 fl1, fl2; - v6ops = nf_get_ipv6_ops(); - if (!v6ops) - return 0; - memset(&fl1, 0, sizeof(fl1)); fl1.daddr = src->in6; memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->in6; - if (!v6ops->route(net, (struct dst_entry **)&rt1, + if (!nf_ip6_route(net, (struct dst_entry **)&rt1, flowi6_to_flowi(&fl1), false)) { - if (!v6ops->route(net, (struct dst_entry **)&rt2, + if (!nf_ip6_route(net, (struct dst_entry **)&rt2, flowi6_to_flowi(&fl2), false)) { if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr), rt6_nexthop(rt2, &fl2.daddr)) && -- cgit v1.2.3-55-g7522 From 8cd2bc981c5335cacc432cba7666c2741c3e912f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 30 Apr 2019 22:56:14 +0900 Subject: netfilter: nf_flow_table: do not flow offload deleted conntrack entries Conntrack entries can be deleted by the masquerade module. In that case, flow offload should be deleted too, but GC and data-path of flow offload do not check for conntrack status bits, hence flow offload entries will be removed only by the timeout. Update garbage collector and data-path to check for ct->status. If IPS_DYING_BIT is set, garbage collector removes flow offload entries and data-path routine ignores them. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index a9e4f74b1ff6..4469519a4879 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -243,6 +243,7 @@ flow_offload_lookup(struct nf_flowtable *flow_table, { struct flow_offload_tuple_rhash *tuplehash; struct flow_offload *flow; + struct flow_offload_entry *e; int dir; tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple, @@ -255,6 +256,10 @@ flow_offload_lookup(struct nf_flowtable *flow_table, if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)) return NULL; + e = container_of(flow, struct flow_offload_entry, flow); + if (unlikely(nf_ct_is_dying(e->ct))) + return NULL; + return tuplehash; } EXPORT_SYMBOL_GPL(flow_offload_lookup); @@ -301,8 +306,10 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow) static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) { struct nf_flowtable *flow_table = data; + struct flow_offload_entry *e; - if (nf_flow_has_expired(flow) || + e = container_of(flow, struct flow_offload_entry, flow); + if (nf_flow_has_expired(flow) || nf_ct_is_dying(e->ct) || (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))) flow_offload_del(flow_table, flow); } -- cgit v1.2.3-55-g7522