From b21613aeb649293946556410ff10dbb8c639f298 Mon Sep 17 00:00:00 2001 From: holger@eitzenberger.org Date: Fri, 20 Sep 2013 22:43:04 +0200 Subject: netfilter: nf_ct_sip: extend RCU read lock in set_expected_rtp_rtcp() Currently set_expected_rtp_rtcp() in the SIP helper uses rcu_dereference() two times to access two different NAT hook functions. However, only the first one is protected by the RCU reader lock, but the 2nd isn't. Fix it by extending the RCU protected area. This is more a cosmetic thing since we rely on all netfilter hooks being rcu_read_lock()ed by nf_hook_slow() in many places anyways, as Patrick McHardy clarified. Signed-off-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index e0c4373b4747..5ed8c441dffd 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -966,7 +966,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, #endif skip_expect = 1; } while (!skip_expect); - rcu_read_unlock(); base_port = ntohs(tuple.dst.u.udp.port) & ~1; rtp_port = htons(base_port); @@ -980,8 +979,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, goto err1; } - if (skip_expect) + if (skip_expect) { + rcu_read_unlock(); return NF_ACCEPT; + } rtp_exp = nf_ct_expect_alloc(ct); if (rtp_exp == NULL) @@ -1012,6 +1013,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, err2: nf_ct_expect_put(rtp_exp); err1: + rcu_read_unlock(); return ret; } -- cgit v1.2.3-55-g7522 From de1389b11686f436c81d696b5a33eec2bc975665 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 26 Sep 2013 15:00:30 +0800 Subject: netfilter: xt_TCPMSS: Get mtu only if clamp-mss-to-pmtu is specified This patch refactors the code to skip tcpmss_reverse_mtu if no clamp-mss-to-pmtu is specified. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TCPMSS.c | 70 ++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 34 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index cd24290f3b2f..62776de8293c 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -43,10 +43,41 @@ optlen(const u_int8_t *opt, unsigned int offset) return opt[offset+1]; } +static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, + unsigned int family) +{ + struct flowi fl; + const struct nf_afinfo *ai; + struct rtable *rt = NULL; + u_int32_t mtu = ~0U; + + if (family == PF_INET) { + struct flowi4 *fl4 = &fl.u.ip4; + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = ip_hdr(skb)->saddr; + } else { + struct flowi6 *fl6 = &fl.u.ip6; + + memset(fl6, 0, sizeof(*fl6)); + fl6->daddr = ipv6_hdr(skb)->saddr; + } + rcu_read_lock(); + ai = nf_get_afinfo(family); + if (ai != NULL) + ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); + rcu_read_unlock(); + + if (rt != NULL) { + mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); + } + return mtu; +} + static int tcpmss_mangle_packet(struct sk_buff *skb, const struct xt_action_param *par, - unsigned int in_mtu, + unsigned int family, unsigned int tcphoff, unsigned int minlen) { @@ -76,6 +107,8 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { + unsigned int in_mtu = tcpmss_reverse_mtu(skb, family); + if (dst_mtu(skb_dst(skb)) <= minlen) { net_err_ratelimited("unknown or invalid path-MTU (%u)\n", dst_mtu(skb_dst(skb))); @@ -165,37 +198,6 @@ tcpmss_mangle_packet(struct sk_buff *skb, return TCPOLEN_MSS; } -static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, - unsigned int family) -{ - struct flowi fl; - const struct nf_afinfo *ai; - struct rtable *rt = NULL; - u_int32_t mtu = ~0U; - - if (family == PF_INET) { - struct flowi4 *fl4 = &fl.u.ip4; - memset(fl4, 0, sizeof(*fl4)); - fl4->daddr = ip_hdr(skb)->saddr; - } else { - struct flowi6 *fl6 = &fl.u.ip6; - - memset(fl6, 0, sizeof(*fl6)); - fl6->daddr = ipv6_hdr(skb)->saddr; - } - rcu_read_lock(); - ai = nf_get_afinfo(family); - if (ai != NULL) - ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); - rcu_read_unlock(); - - if (rt != NULL) { - mtu = dst_mtu(&rt->dst); - dst_release(&rt->dst); - } - return mtu; -} - static unsigned int tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) { @@ -204,7 +206,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) int ret; ret = tcpmss_mangle_packet(skb, par, - tcpmss_reverse_mtu(skb, PF_INET), + PF_INET, iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) @@ -233,7 +235,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) if (tcphoff < 0) return NF_DROP; ret = tcpmss_mangle_packet(skb, par, - tcpmss_reverse_mtu(skb, PF_INET6), + PF_INET6, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) -- cgit v1.2.3-55-g7522 From 7722e0d1c076d9610f00d79bde8af977157aa23b Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 26 Sep 2013 15:00:31 +0800 Subject: netfilter: xt_TCPMSS: lookup route from proper net namespace Otherwise the pmtu will be incorrect. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TCPMSS.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 62776de8293c..e762de5ee89b 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -43,7 +43,8 @@ optlen(const u_int8_t *opt, unsigned int offset) return opt[offset+1]; } -static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, +static u_int32_t tcpmss_reverse_mtu(struct net *net, + const struct sk_buff *skb, unsigned int family) { struct flowi fl; @@ -64,7 +65,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) - ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); + ai->route(net, (struct dst_entry **)&rt, &fl, false); rcu_read_unlock(); if (rt != NULL) { @@ -107,7 +108,8 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - unsigned int in_mtu = tcpmss_reverse_mtu(skb, family); + struct net *net = dev_net(par->in ? par->in : par->out); + unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); if (dst_mtu(skb_dst(skb)) <= minlen) { net_err_ratelimited("unknown or invalid path-MTU (%u)\n", -- cgit v1.2.3-55-g7522 From 122ebbf24c33ac13a9243248c9b8da0fa4c012bd Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 27 Apr 2013 21:02:59 +0200 Subject: netfilter: ipset: Don't call ip_nest_end needlessly in the error path Suggested-by: Pablo Neira Ayuso Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_gen.h | 2 +- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- net/netfilter/ipset/ip_set_list_set.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 25243379b887..f6af97cf8d3e 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -228,11 +228,11 @@ mtype_list(const struct ip_set *set, nla_put_failure: nla_nest_cancel(skb, nested); - ipset_nest_end(skb, adt); if (unlikely(id == first)) { cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, adt); return 0; } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 707bc520d629..7ff20ecbe185 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -909,13 +909,13 @@ mtype_list(const struct ip_set *set, nla_put_failure: nlmsg_trim(skb, incomplete); - ipset_nest_end(skb, atd); if (unlikely(first == cb->args[2])) { pr_warning("Can't list set %s: one bucket does not fit into " "a message. Please report it!\n", set->name); cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, atd); return 0; } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 979b8c90e422..68299ee15847 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -550,11 +550,11 @@ finish: nla_put_failure: nla_nest_cancel(skb, nested); - ipset_nest_end(skb, atd); if (unlikely(i == first)) { cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, atd); return 0; } -- cgit v1.2.3-55-g7522 From b3aabd149c201a7c2a5f9ee673376948ae0724d8 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Apr 2013 18:40:43 +0200 Subject: netfilter: ipset: Sparse warning about shadowed variable fixed net/netfilter/ipset/ip_set_hash_ipportnet.c:275:20: warning: symbol 'cidr' shadows an earlier one Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_ipportnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f15f3e28b9c3..9a80d8bc9f18 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -272,7 +272,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip > ip_to) swap(ip, ip_to); } else if (tb[IPSET_ATTR_CIDR]) { - u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; -- cgit v1.2.3-55-g7522 From a0f28dc754bb6e78158eb41bf729342f7c2bcd70 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Apr 2013 21:23:18 +0200 Subject: netfilter: ipset: Fix sparse warnings due to missing rcu annotations Reported-by: Pablo Neira Ayuso Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_gen.h | 87 ++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 32 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 7ff20ecbe185..09a21dd5f120 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -15,6 +15,8 @@ #define rcu_dereference_bh(p) rcu_dereference(p) #endif +#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) + #define CONCAT(a, b) a##b #define TOKEN(a, b) CONCAT(a, b) @@ -269,7 +271,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) /* The generic hash structure */ struct htype { - struct htable *table; /* the hash table */ + struct htable __rcu *table; /* the hash table */ u32 maxelem; /* max elements in the hash */ u32 elements; /* current element (vs timeout) */ u32 initval; /* random jhash init value */ @@ -347,10 +349,10 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) /* Calculate the actual memory size of the set data */ static size_t -mtype_ahash_memsize(const struct htype *h, u8 nets_length) +mtype_ahash_memsize(const struct htype *h, const struct htable *t, + u8 nets_length) { u32 i; - struct htable *t = h->table; size_t memsize = sizeof(*h) + sizeof(*t) #ifdef IP_SET_HASH_WITH_NETS @@ -369,10 +371,11 @@ static void mtype_flush(struct ip_set *set) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; struct hbucket *n; u32 i; + t = rcu_dereference_bh_nfnl(h->table); for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); if (n->size) { @@ -397,7 +400,7 @@ mtype_destroy(struct ip_set *set) if (set->extensions & IPSET_EXT_TIMEOUT) del_timer_sync(&h->gc); - ahash_destroy(h->table); + ahash_destroy(rcu_dereference_bh_nfnl(h->table)); #ifdef IP_SET_HASH_WITH_RBTREE rbtree_destroy(&h->rbtree); #endif @@ -443,12 +446,14 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) static void mtype_expire(struct htype *h, u8 nets_length, size_t dsize) { - struct htable *t = h->table; + struct htable *t; struct hbucket *n; struct mtype_elem *data; u32 i; int j; + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); for (j = 0; j < n->pos; j++) { @@ -481,6 +486,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize) n->value = tmp; } } + rcu_read_unlock_bh(); } static void @@ -505,7 +511,7 @@ static int mtype_resize(struct ip_set *set, bool retried) { struct htype *h = set->data; - struct htable *t, *orig = h->table; + struct htable *t, *orig = rcu_dereference_bh_nfnl(h->table); u8 htable_bits = orig->htable_bits; #ifdef IP_SET_HASH_WITH_NETS u8 flags; @@ -682,13 +688,15 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n; - int i; + int i, ret = -IPSET_ERR_EXIST; u32 key, multi = 0; + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { @@ -697,7 +705,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, continue; if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(data, h))) - return -IPSET_ERR_EXIST; + goto out; if (i != n->pos - 1) /* Not last one */ memcpy(data, ahash_data(n, n->pos - 1, h->dsize), @@ -712,17 +720,22 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) * h->dsize, GFP_ATOMIC); - if (!tmp) - return 0; + if (!tmp) { + ret = 0; + goto out; + } n->size -= AHASH_INIT_SIZE; memcpy(tmp, n->value, n->size * h->dsize); kfree(n->value); n->value = tmp; } - return 0; + ret = 0; + goto out; } - return -IPSET_ERR_EXIST; +out: + rcu_read_unlock_bh(); + return ret; } static inline int @@ -745,7 +758,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t = rcu_dereference_bh(h->table); struct hbucket *n; struct mtype_elem *data; int i, j = 0; @@ -785,18 +798,22 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; struct mtype_elem *d = value; struct hbucket *n; struct mtype_elem *data; - int i; + int i, ret = 0; u32 key, multi = 0; + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, * try all possible network sizes */ - if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) - return mtype_test_cidrs(set, d, ext, mext, flags); + if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) { + ret = mtype_test_cidrs(set, d, ext, mext, flags); + goto out; + } #endif key = HKEY(d, h->initval, t->htable_bits); @@ -805,10 +822,14 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, data = ahash_data(n, i, h->dsize); if (mtype_data_equal(data, d, &multi) && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) - return mtype_data_match(data, ext, mext, set, flags); + ip_set_timeout_expired(ext_timeout(data, h)))) { + ret = mtype_data_match(data, ext, mext, set, flags); + goto out; + } } - return 0; +out: + rcu_read_unlock_bh(); + return ret; } /* Reply a HEADER request: fill out the header part of the set */ @@ -816,18 +837,18 @@ static int mtype_head(struct ip_set *set, struct sk_buff *skb) { const struct htype *h = set->data; + const struct htable *t; struct nlattr *nested; size_t memsize; - read_lock_bh(&set->lock); - memsize = mtype_ahash_memsize(h, NETS_LENGTH(set->family)); - read_unlock_bh(&set->lock); + t = rcu_dereference_bh_nfnl(h->table); + memsize = mtype_ahash_memsize(h, t, NETS_LENGTH(set->family)); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, - htonl(jhash_size(h->table->htable_bits))) || + htonl(jhash_size(t->htable_bits))) || nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) goto nla_put_failure; #ifdef IP_SET_HASH_WITH_NETMASK @@ -856,7 +877,7 @@ mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) { const struct htype *h = set->data; - const struct htable *t = h->table; + const struct htable *t = rcu_dereference_bh_nfnl(h->table); struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; @@ -956,6 +977,7 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) #endif size_t hsize; struct HTYPE *h; + struct htable *t; if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; @@ -1013,12 +1035,13 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) kfree(h); return -ENOMEM; } - h->table = ip_set_alloc(hsize); - if (!h->table) { + t = ip_set_alloc(hsize); + if (!t) { kfree(h); return -ENOMEM; } - h->table->htable_bits = hbits; + t->htable_bits = hbits; + rcu_assign_pointer(h->table, t); set->data = h; if (set->family == NFPROTO_IPV4) @@ -1096,8 +1119,8 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", - set->name, jhash_size(h->table->htable_bits), - h->table->htable_bits, h->maxelem, set->data, h->table); + set->name, jhash_size(t->htable_bits), + t->htable_bits, h->maxelem, set->data, t); return 0; } -- cgit v1.2.3-55-g7522 From 35b8dcf8c3a0be1feb1c8b29b22e1685ba0c2e14 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Apr 2013 23:02:43 +0200 Subject: netfilter: ipset: Rename simple macro names to avoid namespace issues. Reported-by: David Laight Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 3 + net/netfilter/ipset/ip_set_bitmap_gen.h | 47 +++++---- net/netfilter/ipset/ip_set_bitmap_ip.c | 10 +- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 10 +- net/netfilter/ipset/ip_set_bitmap_port.c | 10 +- net/netfilter/ipset/ip_set_hash_gen.h | 147 +++++++++++++++------------- net/netfilter/ipset/ip_set_hash_ip.c | 10 +- net/netfilter/ipset/ip_set_hash_ipport.c | 12 +-- net/netfilter/ipset/ip_set_hash_ipportip.c | 12 +-- net/netfilter/ipset/ip_set_hash_ipportnet.c | 16 +-- net/netfilter/ipset/ip_set_hash_net.c | 14 +-- net/netfilter/ipset/ip_set_hash_netiface.c | 14 +-- net/netfilter/ipset/ip_set_hash_netport.c | 16 +-- net/netfilter/ipset/ip_set_list_set.c | 10 +- 14 files changed, 169 insertions(+), 162 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 9ac9fbde7b61..f900f33a5f3d 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -398,4 +398,7 @@ bitmap_bytes(u32 a, u32 b) { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \ .timeout = (map)->timeout } +#define IPSET_CONCAT(a, b) a##b +#define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b) + #endif /*_IP_SET_H */ diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index f6af97cf8d3e..d39905e7e881 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -8,31 +8,28 @@ #ifndef __IP_SET_BITMAP_IP_GEN_H #define __IP_SET_BITMAP_IP_GEN_H -#define CONCAT(a, b) a##b -#define TOKEN(a,b) CONCAT(a, b) - -#define mtype_do_test TOKEN(MTYPE, _do_test) -#define mtype_gc_test TOKEN(MTYPE, _gc_test) -#define mtype_is_filled TOKEN(MTYPE, _is_filled) -#define mtype_do_add TOKEN(MTYPE, _do_add) -#define mtype_do_del TOKEN(MTYPE, _do_del) -#define mtype_do_list TOKEN(MTYPE, _do_list) -#define mtype_do_head TOKEN(MTYPE, _do_head) -#define mtype_adt_elem TOKEN(MTYPE, _adt_elem) -#define mtype_add_timeout TOKEN(MTYPE, _add_timeout) -#define mtype_gc_init TOKEN(MTYPE, _gc_init) -#define mtype_kadt TOKEN(MTYPE, _kadt) -#define mtype_uadt TOKEN(MTYPE, _uadt) -#define mtype_destroy TOKEN(MTYPE, _destroy) -#define mtype_flush TOKEN(MTYPE, _flush) -#define mtype_head TOKEN(MTYPE, _head) -#define mtype_same_set TOKEN(MTYPE, _same_set) -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_test TOKEN(MTYPE, _test) -#define mtype_add TOKEN(MTYPE, _add) -#define mtype_del TOKEN(MTYPE, _del) -#define mtype_list TOKEN(MTYPE, _list) -#define mtype_gc TOKEN(MTYPE, _gc) +#define mtype_do_test IPSET_TOKEN(MTYPE, _do_test) +#define mtype_gc_test IPSET_TOKEN(MTYPE, _gc_test) +#define mtype_is_filled IPSET_TOKEN(MTYPE, _is_filled) +#define mtype_do_add IPSET_TOKEN(MTYPE, _do_add) +#define mtype_do_del IPSET_TOKEN(MTYPE, _do_del) +#define mtype_do_list IPSET_TOKEN(MTYPE, _do_list) +#define mtype_do_head IPSET_TOKEN(MTYPE, _do_head) +#define mtype_adt_elem IPSET_TOKEN(MTYPE, _adt_elem) +#define mtype_add_timeout IPSET_TOKEN(MTYPE, _add_timeout) +#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) +#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) +#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_flush IPSET_TOKEN(MTYPE, _flush) +#define mtype_head IPSET_TOKEN(MTYPE, _head) +#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_add IPSET_TOKEN(MTYPE, _add) +#define mtype_del IPSET_TOKEN(MTYPE, _del) +#define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype MTYPE #define ext_timeout(e, m) \ diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index f1a8128bef01..c2f89b1c1d92 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -25,12 +25,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip"); #define MTYPE bitmap_ip @@ -401,8 +401,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_IPV4, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ip_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 3b30e0bef890..1d6551cc590e 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -25,12 +25,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip,mac"); #define MTYPE bitmap_ipmac @@ -460,8 +460,8 @@ static struct ip_set_type bitmap_ipmac_type = { .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = NFPROTO_IPV4, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ipmac_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 8207d1fda528..b22048965d2a 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -20,12 +20,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:port"); #define MTYPE bitmap_port @@ -333,8 +333,8 @@ static struct ip_set_type bitmap_port_type = { .features = IPSET_TYPE_PORT, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_port_create, .create_policy = { [IPSET_ATTR_PORT] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 09a21dd5f120..68b9ccebabaa 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -17,9 +17,6 @@ #define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) -#define CONCAT(a, b) a##b -#define TOKEN(a, b) CONCAT(a, b) - /* Hashing which uses arrays to resolve clashing. The hash table is resized * (doubled) when searching becomes too long. * Internally jhash is used with the assumption that the size of the @@ -222,41 +219,41 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef HKEY -#define mtype_data_equal TOKEN(MTYPE, _data_equal) +#define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) #ifdef IP_SET_HASH_WITH_NETS -#define mtype_do_data_match TOKEN(MTYPE, _do_data_match) +#define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match) #else #define mtype_do_data_match(d) 1 #endif -#define mtype_data_set_flags TOKEN(MTYPE, _data_set_flags) -#define mtype_data_reset_flags TOKEN(MTYPE, _data_reset_flags) -#define mtype_data_netmask TOKEN(MTYPE, _data_netmask) -#define mtype_data_list TOKEN(MTYPE, _data_list) -#define mtype_data_next TOKEN(MTYPE, _data_next) -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_add_cidr TOKEN(MTYPE, _add_cidr) -#define mtype_del_cidr TOKEN(MTYPE, _del_cidr) -#define mtype_ahash_memsize TOKEN(MTYPE, _ahash_memsize) -#define mtype_flush TOKEN(MTYPE, _flush) -#define mtype_destroy TOKEN(MTYPE, _destroy) -#define mtype_gc_init TOKEN(MTYPE, _gc_init) -#define mtype_same_set TOKEN(MTYPE, _same_set) -#define mtype_kadt TOKEN(MTYPE, _kadt) -#define mtype_uadt TOKEN(MTYPE, _uadt) +#define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) +#define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) +#define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) +#define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) +#define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) +#define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) +#define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) +#define mtype_flush IPSET_TOKEN(MTYPE, _flush) +#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) +#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) +#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype MTYPE -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_add TOKEN(MTYPE, _add) -#define mtype_del TOKEN(MTYPE, _del) -#define mtype_test_cidrs TOKEN(MTYPE, _test_cidrs) -#define mtype_test TOKEN(MTYPE, _test) -#define mtype_expire TOKEN(MTYPE, _expire) -#define mtype_resize TOKEN(MTYPE, _resize) -#define mtype_head TOKEN(MTYPE, _head) -#define mtype_list TOKEN(MTYPE, _list) -#define mtype_gc TOKEN(MTYPE, _gc) -#define mtype_variant TOKEN(MTYPE, _variant) -#define mtype_data_match TOKEN(MTYPE, _data_match) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_add IPSET_TOKEN(MTYPE, _add) +#define mtype_del IPSET_TOKEN(MTYPE, _del) +#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) +#define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_expire IPSET_TOKEN(MTYPE, _expire) +#define mtype_resize IPSET_TOKEN(MTYPE, _resize) +#define mtype_head IPSET_TOKEN(MTYPE, _head) +#define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_variant IPSET_TOKEN(MTYPE, _variant) +#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) #ifndef HKEY_DATALEN #define HKEY_DATALEN sizeof(struct mtype_elem) @@ -941,13 +938,13 @@ nla_put_failure: } static int -TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt); +IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt); static int -TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); +IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); static const struct ip_set_type_variant mtype_variant = { .kadt = mtype_kadt, @@ -967,7 +964,7 @@ static const struct ip_set_type_variant mtype_variant = { #ifdef IP_SET_EMIT_CREATE static int -TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) +IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u32 cadt_flags = 0; @@ -1045,9 +1042,9 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) set->data = h; if (set->family == NFPROTO_IPV4) - set->variant = &TOKEN(HTYPE, 4_variant); + set->variant = &IPSET_TOKEN(HTYPE, 4_variant); else - set->variant = &TOKEN(HTYPE, 6_variant); + set->variant = &IPSET_TOKEN(HTYPE, 6_variant); if (tb[IPSET_ATTR_CADT_FLAGS]) cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); @@ -1058,64 +1055,74 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = - sizeof(struct TOKEN(HTYPE, 4ct_elem)); + h->dsize = sizeof(struct + IPSET_TOKEN(HTYPE, 4ct_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 4ct_elem), - timeout); + offsetof(struct + IPSET_TOKEN(HTYPE, 4ct_elem), + timeout); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 4ct_elem), - counter); - TOKEN(HTYPE, 4_gc_init)(set, - TOKEN(HTYPE, 4_gc)); + offsetof(struct + IPSET_TOKEN(HTYPE, 4ct_elem), + counter); + IPSET_TOKEN(HTYPE, 4_gc_init)(set, + IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = - sizeof(struct TOKEN(HTYPE, 6ct_elem)); + h->dsize = sizeof(struct + IPSET_TOKEN(HTYPE, 6ct_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 6ct_elem), - timeout); + offsetof(struct + IPSET_TOKEN(HTYPE, 6ct_elem), + timeout); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 6ct_elem), - counter); - TOKEN(HTYPE, 6_gc_init)(set, - TOKEN(HTYPE, 6_gc)); + offsetof(struct + IPSET_TOKEN(HTYPE, 6ct_elem), + counter); + IPSET_TOKEN(HTYPE, 6_gc_init)(set, + IPSET_TOKEN(HTYPE, 6_gc)); } } else { if (set->family == NFPROTO_IPV4) { h->dsize = - sizeof(struct TOKEN(HTYPE, 4c_elem)); + sizeof(struct + IPSET_TOKEN(HTYPE, 4c_elem)); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 4c_elem), - counter); + offsetof(struct + IPSET_TOKEN(HTYPE, 4c_elem), + counter); } else { h->dsize = - sizeof(struct TOKEN(HTYPE, 6c_elem)); + sizeof(struct + IPSET_TOKEN(HTYPE, 6c_elem)); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 6c_elem), - counter); + offsetof(struct + IPSET_TOKEN(HTYPE, 6c_elem), + counter); } } } else if (tb[IPSET_ATTR_TIMEOUT]) { h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 4t_elem), + offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), timeout); - TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc)); + IPSET_TOKEN(HTYPE, 4_gc_init)(set, + IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 6t_elem), + offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), timeout); - TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc)); + IPSET_TOKEN(HTYPE, 6_gc_init)(set, + IPSET_TOKEN(HTYPE, 6_gc)); } } else { if (set->family == NFPROTO_IPV4) - h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); else - h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index c74e6e14cd93..de44fca687c3 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -23,12 +23,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counters support */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counters support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip"); /* Type specific function prefix */ @@ -304,8 +304,8 @@ static struct ip_set_type hash_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 7a2d2bd98d04..b514ff4e83ae 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -24,13 +24,13 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -#define REVISION_MAX 2 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +#define IPSET_TYPE_REV_MAX 2 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port"); /* Type specific function prefix */ @@ -396,8 +396,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 34e8a1acce42..d05070d74604 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -24,13 +24,13 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -#define REVISION_MAX 2 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +#define IPSET_TYPE_REV_MAX 2 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ @@ -414,8 +414,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 9a80d8bc9f18..7d1dede4ab6d 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -24,15 +24,15 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -/* 2 Range as input support for IPv4 added */ -/* 3 nomatch flag support added */ -#define REVISION_MAX 4 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +/* 3 nomatch flag support added */ +#define IPSET_TYPE_REV_MAX 4 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,net"); /* Type specific function prefix */ @@ -574,8 +574,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 223e9f546d0f..9cb9ef43d941 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -22,14 +22,14 @@ #include #include -#define REVISION_MIN 0 -/* 1 Range as input support for IPv4 added */ -/* 2 nomatch flag support added */ -#define REVISION_MAX 3 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Range as input support for IPv4 added */ +/* 2 nomatch flag support added */ +#define IPSET_TYPE_REV_MAX 3 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net"); /* Type specific function prefix */ @@ -406,8 +406,8 @@ static struct ip_set_type hash_net_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_net_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 7d798d5d5cd3..2310fc29e89e 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -23,14 +23,14 @@ #include #include -#define REVISION_MIN 0 -/* 1 nomatch flag support added */ -/* 2 /0 support added */ -#define REVISION_MAX 3 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 nomatch flag support added */ +/* 2 /0 support added */ +#define IPSET_TYPE_REV_MAX 3 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); /* Interface name rbtree */ @@ -645,8 +645,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = { IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 09d6690bee6f..1601d489fdbc 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -23,15 +23,15 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -/* 2 Range as input support for IPv4 added */ -/* 3 nomatch flag support added */ -#define REVISION_MAX 4 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +/* 3 nomatch flag support added */ +#define IPSET_TYPE_REV_MAX 4 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,port"); /* Type specific function prefix */ @@ -518,8 +518,8 @@ static struct ip_set_type hash_netport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_netport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 68299ee15847..a9e301f6e093 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -15,12 +15,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_list:set"); /* Member elements */ @@ -703,8 +703,8 @@ static struct ip_set_type list_set_type __read_mostly = { .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = list_set_create, .create_policy = { [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, -- cgit v1.2.3-55-g7522 From 20b2fab483094d51c8d26784b81e12149474e0f2 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 1 May 2013 18:47:32 +0200 Subject: netfilter: ipset: Fix "may be used uninitialized" warnings Reported-by: Pablo Neira Ayuso Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_ip.c | 4 ++-- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 4 ++-- net/netfilter/ipset/ip_set_hash_ip.c | 2 +- net/netfilter/ipset/ip_set_hash_ipport.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportip.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 ++-- net/netfilter/ipset/ip_set_hash_net.c | 2 +- net/netfilter/ipset/ip_set_hash_netiface.c | 2 +- net/netfilter/ipset/ip_set_hash_netport.c | 2 +- 9 files changed, 12 insertions(+), 12 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index c2f89b1c1d92..ce99d2652d47 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -131,7 +131,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], { struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - u32 ip, ip_to; + u32 ip = 0, ip_to = 0; struct bitmap_ip_adt_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(map); int ret = 0; @@ -264,7 +264,7 @@ static int bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { struct bitmap_ip *map; - u32 first_ip, last_ip, hosts, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0, hosts, cadt_flags = 0; u64 elements; u8 netmask = 32; int ret; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 1d6551cc590e..6d5bad93026b 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -246,7 +246,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; struct ip_set_ext ext = IP_SET_INIT_UEXT(map); - u32 ip; + u32 ip = 0; int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -355,7 +355,7 @@ static int bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { - u32 first_ip, last_ip, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0, cadt_flags = 0; u64 elements; struct bitmap_ipmac *map; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index de44fca687c3..260c9a80d8a5 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -119,7 +119,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, hosts; + u32 ip = 0, ip_to = 0, hosts; int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index b514ff4e83ae..64caad35a391 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -137,7 +137,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index d05070d74604..2873bbc20d7a 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -142,7 +142,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 7d1dede4ab6d..f111558c4597 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -196,8 +196,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; - u32 ip2_from, ip2_to, ip2_last, ip2; + u32 ip = 0, ip_to = 0, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; bool with_ports = false; u8 cidr; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 9cb9ef43d941..0a64dad156d9 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -166,7 +166,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip = 0, ip_to, last; + u32 ip = 0, ip_to = 0, last; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 2310fc29e89e..846371b08630 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -320,7 +320,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip = 0, ip_to, last; + u32 ip = 0, ip_to = 0, last; char iface[IFNAMSIZ]; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 1601d489fdbc..d98a685cd916 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -187,7 +187,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 port, port_to, p = 0, ip = 0, ip_to, last; + u32 port, port_to, p = 0, ip = 0, ip_to = 0, last; bool with_ports = false; u8 cidr; int ret; -- cgit v1.2.3-55-g7522 From 60b0fe372499f66e0c965dc0594320438a3b711c Mon Sep 17 00:00:00 2001 From: Anders K. Pedersen Date: Fri, 3 May 2013 23:15:48 +0200 Subject: netfilter: ipset: Support package fragments for IPv4 protos without ports Enable ipset port set types to match IPv4 package fragments for protocols that doesn't have ports (or the port information isn't supported by ipset). For example this allows a hash:ip,port ipset containing the entry 192.168.0.1,gre:0 to match all package fragments for PPTP VPN tunnels to/from the host. Without this patch only the first package fragment (with fragment offset 0) was matched, while subsequent fragments wasn't. This is not possible for IPv6, where the protocol is in the fragmented part of the package unlike IPv4, where the protocol is in the IP header. IPPROTO_ICMPV6 is deliberately not included, because it isn't relevant for IPv4. Signed-off-by: Anders K. Pedersen Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_getport.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index dac156f819ac..29fb01ddff93 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -102,9 +102,25 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src, int protocol = iph->protocol; /* See comments at tcp_match in ip_tables.c */ - if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET)) + if (protocol <= 0) return false; + if (ntohs(iph->frag_off) & IP_OFFSET) + switch (protocol) { + case IPPROTO_TCP: + case IPPROTO_SCTP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_ICMP: + /* Port info not available for fragment offset > 0 */ + return false; + default: + /* Other protocols doesn't have ports, + so we can match fragments */ + *proto = protocol; + return true; + } + return get_port(skb, protocol, protooff, src, port, proto); } EXPORT_SYMBOL_GPL(ip_set_get_ip4_port); -- cgit v1.2.3-55-g7522 From bd3129fc5e8868df74ffa803c7fec527976bbf1c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 30 Sep 2013 07:49:47 +0200 Subject: netfilter: ipset: order matches and targets separatedly in xt_set.c Signed-off-by: Jozsef Kadlecsik --- net/netfilter/xt_set.c | 188 +++++++++++++++++++++++++------------------------ 1 file changed, 96 insertions(+), 92 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 31790e789e22..2095488684ff 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -109,6 +109,101 @@ set_match_v0_destroy(const struct xt_mtdtor_param *par) ip_set_nfnl_put(info->match_set.index); } +/* Revision 1 match */ + +static bool +set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v1 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, 0, UINT_MAX); + + if (opt.flags & IPSET_RETURN_NOMATCH) + opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; + + return match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); +} + +static int +set_match_v1_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_set_info_match_v1 *info = par->matchinfo; + ip_set_id_t index; + + index = ip_set_nfnl_get_byindex(info->match_set.index); + + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find set indentified by id %u to match\n", + info->match_set.index); + return -ENOENT; + } + if (info->match_set.dim > IPSET_DIM_MAX) { + pr_warning("Protocol error: set match dimension " + "is over the limit!\n"); + ip_set_nfnl_put(info->match_set.index); + return -ERANGE; + } + + return 0; +} + +static void +set_match_v1_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_set_info_match_v1 *info = par->matchinfo; + + ip_set_nfnl_put(info->match_set.index); +} + +/* Revision 3 match */ + +static bool +match_counter(u64 counter, const struct ip_set_counter_match *info) +{ + switch (info->op) { + case IPSET_COUNTER_NONE: + return true; + case IPSET_COUNTER_EQ: + return counter == info->value; + case IPSET_COUNTER_NE: + return counter != info->value; + case IPSET_COUNTER_LT: + return counter < info->value; + case IPSET_COUNTER_GT: + return counter > info->value; + } + return false; +} + +static bool +set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v3 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, info->flags, UINT_MAX); + int ret; + + if (info->packets.op != IPSET_COUNTER_NONE || + info->bytes.op != IPSET_COUNTER_NONE) + opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; + + ret = match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); + + if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) + return ret; + + if (!match_counter(opt.ext.packets, &info->packets)) + return 0; + return match_counter(opt.ext.bytes, &info->bytes); +} + +#define set_match_v3_checkentry set_match_v1_checkentry +#define set_match_v3_destroy set_match_v1_destroy + +/* Revision 0 interface: backward compatible with netfilter/iptables */ + static unsigned int set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { @@ -180,52 +275,7 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par) ip_set_nfnl_put(info->del_set.index); } -/* Revision 1 match and target */ - -static bool -set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct xt_set_info_match_v1 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, - info->match_set.flags, 0, UINT_MAX); - - if (opt.flags & IPSET_RETURN_NOMATCH) - opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; - - return match_set(info->match_set.index, skb, par, &opt, - info->match_set.flags & IPSET_INV_MATCH); -} - -static int -set_match_v1_checkentry(const struct xt_mtchk_param *par) -{ - struct xt_set_info_match_v1 *info = par->matchinfo; - ip_set_id_t index; - - index = ip_set_nfnl_get_byindex(info->match_set.index); - - if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find set indentified by id %u to match\n", - info->match_set.index); - return -ENOENT; - } - if (info->match_set.dim > IPSET_DIM_MAX) { - pr_warning("Protocol error: set match dimension " - "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); - return -ERANGE; - } - - return 0; -} - -static void -set_match_v1_destroy(const struct xt_mtdtor_param *par) -{ - struct xt_set_info_match_v1 *info = par->matchinfo; - - ip_set_nfnl_put(info->match_set.index); -} +/* Revision 1 target */ static unsigned int set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) @@ -320,52 +370,6 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) #define set_target_v2_checkentry set_target_v1_checkentry #define set_target_v2_destroy set_target_v1_destroy -/* Revision 3 match */ - -static bool -match_counter(u64 counter, const struct ip_set_counter_match *info) -{ - switch (info->op) { - case IPSET_COUNTER_NONE: - return true; - case IPSET_COUNTER_EQ: - return counter == info->value; - case IPSET_COUNTER_NE: - return counter != info->value; - case IPSET_COUNTER_LT: - return counter < info->value; - case IPSET_COUNTER_GT: - return counter > info->value; - } - return false; -} - -static bool -set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct xt_set_info_match_v3 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, - info->match_set.flags, info->flags, UINT_MAX); - int ret; - - if (info->packets.op != IPSET_COUNTER_NONE || - info->bytes.op != IPSET_COUNTER_NONE) - opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; - - ret = match_set(info->match_set.index, skb, par, &opt, - info->match_set.flags & IPSET_INV_MATCH); - - if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) - return ret; - - if (!match_counter(opt.ext.packets, &info->packets)) - return 0; - return match_counter(opt.ext.bytes, &info->bytes); -} - -#define set_match_v3_checkentry set_match_v1_checkentry -#define set_match_v3_destroy set_match_v1_destroy - static struct xt_match set_matches[] __read_mostly = { { .name = "set", -- cgit v1.2.3-55-g7522 From 5e04c0c38c90f1f11a0e87800e4c22d4aba1d733 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 30 Sep 2013 07:57:18 +0200 Subject: netfilter: ipset: Introduce new operation to get both setname and family ip[6]tables set match and SET target need to know the family of the set in order to reject adding rules which refer to a set with a non-mathcing family. Currently such rules are silently accepted and then ignored instead of generating a clear error message to the user, which is not helpful. Signed-off-by: Jozsef Kadlecsik --- include/uapi/linux/netfilter/ipset/ip_set.h | 8 ++++++++ net/netfilter/ipset/ip_set_core.c | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'net/netfilter') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 8024cdf13b70..2b61ac44dcc1 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -250,6 +250,14 @@ struct ip_set_req_get_set { #define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */ /* Uses ip_set_req_get_set */ +#define IP_SET_OP_GET_FNAME 0x00000008 /* Get set index and family */ +struct ip_set_req_get_set_family { + unsigned int op; + unsigned int version; + unsigned int family; + union ip_set_name_index set; +}; + #define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */ struct ip_set_req_version { unsigned int op; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index f2e30fb31e78..428c30a8586f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1788,6 +1788,23 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } + case IP_SET_OP_GET_FNAME: { + struct ip_set_req_get_set_family *req_get = data; + ip_set_id_t id; + + if (*len != sizeof(struct ip_set_req_get_set_family)) { + ret = -EINVAL; + goto done; + } + req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; + nfnl_lock(NFNL_SUBSYS_IPSET); + find_set_and_id(req_get->set.name, &id); + req_get->set.index = id; + if (id != IPSET_INVALID_ID) + req_get->family = nfnl_set(id)->family; + nfnl_unlock(NFNL_SUBSYS_IPSET); + goto copy; + } case IP_SET_OP_GET_BYINDEX: { struct ip_set_req_get_set *req_get = data; struct ip_set *set; -- cgit v1.2.3-55-g7522 From a04d8b6bd9113f3e7f0c216dcaa3c1ad498f2a96 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 30 Sep 2013 09:05:54 +0200 Subject: netfilter: ipset: Prepare ipset to support multiple networks for hash types In order to support hash:net,net, hash:net,port,net etc. types, arrays are introduced for the book-keeping of existing cidr sizes and network numbers in a set. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 + net/netfilter/ipset/ip_set_hash_gen.h | 78 +++++++++++++++-------------- net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 +- net/netfilter/ipset/ip_set_hash_net.c | 4 +- net/netfilter/ipset/ip_set_hash_netiface.c | 4 +- net/netfilter/ipset/ip_set_hash_netport.c | 4 +- 6 files changed, 50 insertions(+), 46 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 69aa60487f05..56012a3431b2 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -398,6 +398,8 @@ bitmap_bytes(u32 a, u32 b) { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \ .timeout = (map)->timeout } +#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b)) + #define IPSET_CONCAT(a, b) a##b #define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 68b9ccebabaa..a8332404d9b4 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -77,10 +77,14 @@ struct htable { #define hbucket(h, i) (&((h)->bucket[i])) +#ifndef IPSET_NET_COUNT +#define IPSET_NET_COUNT 1 +#endif + /* Book-keeping of the prefixes added to the set */ struct net_prefixes { - u8 cidr; /* the different cidr values in the set */ - u32 nets; /* number of elements per cidr */ + u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */ + u8 cidr[IPSET_NET_COUNT]; /* the different cidr values in the set */ }; /* Compute the hash table size */ @@ -165,13 +169,13 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) #ifdef IP_SET_HASH_WITH_MULTI -#define NETS_LENGTH(family) (SET_HOST_MASK(family) + 1) +#define NLEN(family) (SET_HOST_MASK(family) + 1) #else -#define NETS_LENGTH(family) SET_HOST_MASK(family) +#define NLEN(family) SET_HOST_MASK(family) #endif #else -#define NETS_LENGTH(family) 0 +#define NLEN(family) 0 #endif /* IP_SET_HASH_WITH_NETS */ #define ext_timeout(e, h) \ @@ -296,49 +300,49 @@ struct htype { /* Network cidr size book keeping when the hash stores different * sized networks */ static void -mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length) +mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { int i, j; /* Add in increasing prefix order, so larger cidr first */ - for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) { + for (i = 0, j = -1; i < nets_length && h->nets[i].nets[n]; i++) { if (j != -1) continue; - else if (h->nets[i].cidr < cidr) + else if (h->nets[i].cidr[n] < cidr) j = i; - else if (h->nets[i].cidr == cidr) { - h->nets[i].nets++; + else if (h->nets[i].cidr[n] == cidr) { + h->nets[i].nets[n]++; return; } } if (j != -1) { for (; i > j; i--) { - h->nets[i].cidr = h->nets[i - 1].cidr; - h->nets[i].nets = h->nets[i - 1].nets; + h->nets[i].cidr[n] = h->nets[i - 1].cidr[n]; + h->nets[i].nets[n] = h->nets[i - 1].nets[n]; } } - h->nets[i].cidr = cidr; - h->nets[i].nets = 1; + h->nets[i].cidr[n] = cidr; + h->nets[i].nets[n] = 1; } static void -mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) +mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { u8 i, j, net_end = nets_length - 1; for (i = 0; i < nets_length; i++) { - if (h->nets[i].cidr != cidr) + if (h->nets[i].cidr[n] != cidr) continue; - if (h->nets[i].nets > 1 || i == net_end || - h->nets[i + 1].nets == 0) { - h->nets[i].nets--; + if (h->nets[i].nets[n] > 1 || i == net_end || + h->nets[i + 1].nets[n] == 0) { + h->nets[i].nets[n]--; return; } - for (j = i; j < net_end && h->nets[j].nets; j++) { - h->nets[j].cidr = h->nets[j + 1].cidr; - h->nets[j].nets = h->nets[j + 1].nets; + for (j = i; j < net_end && h->nets[j].nets[n]; j++) { + h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; + h->nets[j].nets[n] = h->nets[j + 1].nets[n]; } - h->nets[j].nets = 0; + h->nets[j].nets[n] = 0; return; } } @@ -382,8 +386,7 @@ mtype_flush(struct ip_set *set) } } #ifdef IP_SET_HASH_WITH_NETS - memset(h->nets, 0, sizeof(struct net_prefixes) - * NETS_LENGTH(set->family)); + memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); #endif h->elements = 0; } @@ -459,7 +462,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize) pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(data->cidr), - nets_length); + nets_length, 0); #endif if (j != n->pos - 1) /* Not last one */ @@ -494,7 +497,7 @@ mtype_gc(unsigned long ul_set) pr_debug("called\n"); write_lock_bh(&set->lock); - mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + mtype_expire(h, NLEN(set->family), h->dsize); write_unlock_bh(&set->lock); h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; @@ -523,8 +526,7 @@ mtype_resize(struct ip_set *set, bool retried) if (SET_WITH_TIMEOUT(set) && !retried) { i = h->elements; write_lock_bh(&set->lock); - mtype_expire(set->data, NETS_LENGTH(set->family), - h->dsize); + mtype_expire(set->data, NLEN(set->family), h->dsize); write_unlock_bh(&set->lock); if (h->elements < i) return 0; @@ -607,7 +609,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) /* FIXME: when set is full, we slow down here */ - mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + mtype_expire(h, NLEN(set->family), h->dsize); if (h->elements >= h->maxelem) { if (net_ratelimit()) @@ -645,8 +647,8 @@ reuse_slot: /* Fill out reused slot */ data = ahash_data(n, j, h->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family)); - mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0); + mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif } else { /* Use/create a new slot */ @@ -659,7 +661,7 @@ reuse_slot: } data = ahash_data(n, n->pos++, h->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif h->elements++; } @@ -711,7 +713,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, n->pos--; h->elements--; #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif if (n->pos + AHASH_INIT_SIZE < n->size) { void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) @@ -760,11 +762,11 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, struct mtype_elem *data; int i, j = 0; u32 key, multi = 0; - u8 nets_length = NETS_LENGTH(set->family); + u8 nets_length = NLEN(set->family); pr_debug("test by nets\n"); - for (; j < nets_length && h->nets[j].nets && !multi; j++) { - mtype_data_netmask(d, h->nets[j].cidr); + for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) { + mtype_data_netmask(d, h->nets[j].cidr[0]); key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { @@ -839,7 +841,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) size_t memsize; t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t, NETS_LENGTH(set->family)); + memsize = mtype_ahash_memsize(h, t, NLEN(set->family)); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f111558c4597..6ce5a8e2c44e 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -170,7 +170,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -454,7 +454,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 0a64dad156d9..ec1c7dc10489 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -143,7 +143,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -338,7 +338,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 846371b08630..814b4e31a7f8 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -265,7 +265,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -534,7 +534,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index d98a685cd916..3bd923d3f179 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -162,7 +162,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -407,7 +407,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); -- cgit v1.2.3-55-g7522 From f925f7056920213889c5e61445f9529f1a86ae41 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 6 Sep 2013 22:31:40 +0200 Subject: netfilter: ipset: Rename extension offset ids to extension ids Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 16 ++++++++-------- net/netfilter/ipset/ip_set_bitmap_gen.h | 4 ++-- net/netfilter/ipset/ip_set_bitmap_ip.c | 10 +++++----- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 10 +++++----- net/netfilter/ipset/ip_set_bitmap_port.c | 10 +++++----- net/netfilter/ipset/ip_set_hash_gen.h | 22 +++++++++++----------- net/netfilter/ipset/ip_set_list_set.c | 14 +++++++------- 7 files changed, 43 insertions(+), 43 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 56012a3431b2..b4db7912bf0d 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -56,20 +56,20 @@ enum ip_set_extension { IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER), }; -/* Extension offsets */ -enum ip_set_offset { - IPSET_OFFSET_TIMEOUT = 0, - IPSET_OFFSET_COUNTER, - IPSET_OFFSET_MAX, -}; - #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) +/* Extension id, in size order */ +enum ip_set_ext_id { + IPSET_EXT_ID_COUNTER = 0, + IPSET_EXT_ID_TIMEOUT, + IPSET_EXT_ID_MAX, +}; + struct ip_set_ext { - u32 timeout; u64 packets; u64 bytes; + u32 timeout; }; struct ip_set; diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index d39905e7e881..889a929b908f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -33,9 +33,9 @@ #define mtype MTYPE #define ext_timeout(e, m) \ - (unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) + (unsigned long *)((e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, m) \ - (struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER]) + (struct ip_set_counter *)((e) + (m)->offset[IPSET_EXT_ID_COUNTER]) #define get_ext(map, id) ((map)->extensions + (map)->dsize * (id)) static void diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index ce99d2652d47..2ee210ef49a2 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -45,7 +45,7 @@ struct bitmap_ip { u32 hosts; /* number of hosts in a subnet */ size_t memsize; /* members size */ size_t dsize; /* extensions struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u8 netmask; /* subnet netmask */ u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ @@ -342,9 +342,9 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipct_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -360,7 +360,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) bitmap_ip_gc_init(set, bitmap_ip_gc); } else { map->dsize = sizeof(struct bitmap_ipc_elem); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipc_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -371,7 +371,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipt_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipt_elem, timeout); if (!init_map_ip(set, map, first_ip, last_ip, diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 6d5bad93026b..e711875ea452 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -52,7 +52,7 @@ struct bitmap_ipmac { struct timer_list gc; /* garbage collector */ size_t memsize; /* members size */ size_t dsize; /* size of element */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ }; /* ADT structure for generic function args */ @@ -405,9 +405,9 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipmacct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmacct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacct_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -421,7 +421,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); } else { map->dsize = sizeof(struct bitmap_ipmacc_elem); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacc_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -432,7 +432,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], } } else if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipmact_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmact_elem, timeout); if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index b22048965d2a..bebc137a5737 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -39,7 +39,7 @@ struct bitmap_port { u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ size_t dsize; /* extensions struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -282,9 +282,9 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_portct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portct_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); @@ -297,7 +297,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) bitmap_port_gc_init(set, bitmap_port_gc); } else { map->dsize = sizeof(struct bitmap_portc_elem); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portc_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); @@ -306,7 +306,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_portt_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portt_elem, timeout); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index a8332404d9b4..e4db9250f337 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -179,9 +179,9 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #endif /* IP_SET_HASH_WITH_NETS */ #define ext_timeout(e, h) \ -(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT]) +(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, h) \ -(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER]) +(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_COUNTER]) #endif /* _IP_SET_HASH_GEN_H */ @@ -278,7 +278,7 @@ struct htype { u32 initval; /* random jhash init value */ u32 timeout; /* timeout value, if enabled */ size_t dsize; /* data struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ struct timer_list gc; /* garbage collection when timeout enabled */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_MULTI @@ -1059,11 +1059,11 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) if (set->family == NFPROTO_IPV4) { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4ct_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), timeout); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), counter); @@ -1072,11 +1072,11 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) } else { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6ct_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), timeout); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), counter); @@ -1088,7 +1088,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4c_elem)); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4c_elem), counter); @@ -1096,7 +1096,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6c_elem)); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6c_elem), counter); @@ -1107,14 +1107,14 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), timeout); IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); } else { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), timeout); IPSET_TOKEN(HTYPE, 6_gc_init)(set, diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index a9e301f6e093..0ed19b5e675e 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -59,7 +59,7 @@ struct set_adt_elem { /* Type structure */ struct list_set { size_t dsize; /* element size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u32 size; /* size of set list array */ u32 timeout; /* timeout value */ struct timer_list gc; /* garbage collection */ @@ -73,9 +73,9 @@ list_set_elem(const struct list_set *map, u32 id) } #define ext_timeout(e, m) \ -(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) +(unsigned long *)((void *)(e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, m) \ -(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER]) +(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_EXT_ID_COUNTER]) static int list_set_ktest(struct ip_set *set, const struct sk_buff *skb, @@ -667,9 +667,9 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct setct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setct_elem, counter); list_set_gc_init(set, list_set_gc); } else { @@ -677,7 +677,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) sizeof(struct setc_elem), 0); if (!map) return -ENOMEM; - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setc_elem, counter); } } else if (tb[IPSET_ATTR_TIMEOUT]) { @@ -686,7 +686,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct sett_elem, timeout); list_set_gc_init(set, list_set_gc); } else { -- cgit v1.2.3-55-g7522 From ca134ce86451f3f5ac45ffbf1494a1f42110bf93 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Sep 2013 00:10:07 +0200 Subject: netfilter: ipset: Move extension data to set structure Default timeout and extension offsets are moved to struct set, because all set types supports all extensions and it makes possible to generalize extension support. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 29 ++++-- include/linux/netfilter/ipset/ip_set_timeout.h | 4 +- net/netfilter/ipset/ip_set_bitmap_gen.h | 59 ++++++------ net/netfilter/ipset/ip_set_bitmap_ip.c | 45 +++++---- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 64 ++++++------- net/netfilter/ipset/ip_set_bitmap_port.c | 44 ++++----- net/netfilter/ipset/ip_set_hash_gen.h | 127 ++++++++++++------------- net/netfilter/ipset/ip_set_hash_ip.c | 8 +- net/netfilter/ipset/ip_set_hash_ipport.c | 10 +- net/netfilter/ipset/ip_set_hash_ipportip.c | 10 +- net/netfilter/ipset/ip_set_hash_ipportnet.c | 14 +-- net/netfilter/ipset/ip_set_hash_net.c | 9 +- net/netfilter/ipset/ip_set_hash_netiface.c | 8 +- net/netfilter/ipset/ip_set_hash_netport.c | 8 +- net/netfilter/ipset/ip_set_list_set.c | 116 ++++++++++------------ 15 files changed, 266 insertions(+), 289 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index b4db7912bf0d..992a2f58dbd3 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -72,6 +72,16 @@ struct ip_set_ext { u32 timeout; }; +struct ip_set_counter { + atomic64_t bytes; + atomic64_t packets; +}; + +#define ext_timeout(e, s) \ +(unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]) +#define ext_counter(e, s) \ +(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]) + struct ip_set; typedef int (*ipset_adtfn)(struct ip_set *set, void *value, @@ -179,15 +189,16 @@ struct ip_set { u8 revision; /* Extensions */ u8 extensions; + /* Default timeout value, if enabled */ + u32 timeout; + /* Element data size */ + size_t dsize; + /* Offsets to extensions in elements */ + size_t offset[IPSET_EXT_ID_MAX]; /* The type specific data */ void *data; }; -struct ip_set_counter { - atomic64_t bytes; - atomic64_t packets; -}; - static inline void ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) { @@ -390,13 +401,13 @@ bitmap_bytes(u32 a, u32 b) #include -#define IP_SET_INIT_KEXT(skb, opt, map) \ +#define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ - .timeout = ip_set_adt_opt_timeout(opt, map) } + .timeout = ip_set_adt_opt_timeout(opt, set) } -#define IP_SET_INIT_UEXT(map) \ +#define IP_SET_INIT_UEXT(set) \ { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \ - .timeout = (map)->timeout } + .timeout = (set)->timeout } #define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b)) diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 3aac04167ca7..83c2f9e0886c 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -23,8 +23,8 @@ /* Set is defined with timeout support: timeout value may be 0 */ #define IPSET_NO_TIMEOUT UINT_MAX -#define ip_set_adt_opt_timeout(opt, map) \ -((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (map)->timeout) +#define ip_set_adt_opt_timeout(opt, set) \ +((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (set)->timeout) static inline unsigned int ip_set_timeout_uget(struct nlattr *tb) diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 889a929b908f..f32ddbca3923 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -32,11 +32,7 @@ #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype MTYPE -#define ext_timeout(e, m) \ - (unsigned long *)((e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) -#define ext_counter(e, m) \ - (struct ip_set_counter *)((e) + (m)->offset[IPSET_EXT_ID_COUNTER]) -#define get_ext(map, id) ((map)->extensions + (map)->dsize * (id)) +#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id)) static void mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) @@ -46,7 +42,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&map->gc); map->gc.data = (unsigned long) set; map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } @@ -59,7 +55,7 @@ mtype_destroy(struct ip_set *set) del_timer_sync(&map->gc); ip_set_free(map->members); - if (map->dsize) + if (set->dsize) ip_set_free(map->extensions); kfree(map); @@ -88,9 +84,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize + - map->dsize * map->elements)) || + set->dsize * map->elements)) || (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || (SET_WITH_COUNTER(set) && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(IPSET_FLAG_WITH_COUNTERS)))) @@ -108,16 +104,16 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - void *x = get_ext(map, e->id); - int ret = mtype_do_test(e, map); + void *x = get_ext(set, map, e->id); + int ret = mtype_do_test(e, map, set->dsize); if (ret <= 0) return ret; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map))) + ip_set_timeout_expired(ext_timeout(x, set))) return 0; if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(x, map), ext, mext, flags); + ip_set_update_counter(ext_counter(x, set), ext, mext, flags); return 1; } @@ -127,12 +123,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - void *x = get_ext(map, e->id); - int ret = mtype_do_add(e, map, flags); + void *x = get_ext(set, map, e->id); + int ret = mtype_do_add(e, map, flags, set->dsize); if (ret == IPSET_ADD_FAILED) { if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map))) + ip_set_timeout_expired(ext_timeout(x, set))) ret = 0; else if (!(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; @@ -140,13 +136,13 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_TIMEOUT(set)) #ifdef IP_SET_BITMAP_STORED_TIMEOUT - mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret); + mtype_add_timeout(ext_timeout(x, set), e, ext, set, map, ret); #else - ip_set_timeout_set(ext_timeout(x, map), ext->timeout); + ip_set_timeout_set(ext_timeout(x, set), ext->timeout); #endif if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(x, map), ext); + ip_set_init_counter(ext_counter(x, set), ext); return 0; } @@ -156,11 +152,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - const void *x = get_ext(map, e->id); + const void *x = get_ext(set, map, e->id); if (mtype_do_del(e, map) || (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map)))) + ip_set_timeout_expired(ext_timeout(x, set)))) return -IPSET_ERR_EXIST; return 0; @@ -180,13 +176,13 @@ mtype_list(const struct ip_set *set, return -EMSGSIZE; for (; cb->args[2] < map->elements; cb->args[2]++) { id = cb->args[2]; - x = get_ext(map, id); + x = get_ext(set, map, id); if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && #ifdef IP_SET_BITMAP_STORED_TIMEOUT mtype_is_filled((const struct mtype_elem *) x) && #endif - ip_set_timeout_expired(ext_timeout(x, map)))) + ip_set_timeout_expired(ext_timeout(x, set)))) continue; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { @@ -196,23 +192,24 @@ mtype_list(const struct ip_set *set, } else goto nla_put_failure; } - if (mtype_do_list(skb, map, id)) + if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; if (SET_WITH_TIMEOUT(set)) { #ifdef IP_SET_BITMAP_STORED_TIMEOUT if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_stored(map, id, - ext_timeout(x, map))))) + ext_timeout(x, set), + set->dsize)))) goto nla_put_failure; #else if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_get( - ext_timeout(x, map))))) + ext_timeout(x, set))))) goto nla_put_failure; #endif } if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(x, map))) + ip_set_put_counter(skb, ext_counter(x, set))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -245,14 +242,14 @@ mtype_gc(unsigned long ul_set) * but adding/deleting new entries is locked out */ read_lock_bh(&set->lock); for (id = 0; id < map->elements; id++) - if (mtype_gc_test(id, map)) { - x = get_ext(map, id); - if (ip_set_timeout_expired(ext_timeout(x, map))) + if (mtype_gc_test(id, map, set->dsize)) { + x = get_ext(set, map, id); + if (ip_set_timeout_expired(ext_timeout(x, set))) clear_bit(id, map->members); } read_unlock_bh(&set->lock); - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 2ee210ef49a2..363022edb8fb 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -44,10 +44,7 @@ struct bitmap_ip { u32 elements; /* number of max elements in the set */ u32 hosts; /* number of hosts in a subnet */ size_t memsize; /* members size */ - size_t dsize; /* extensions struct size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u8 netmask; /* subnet netmask */ - u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -65,20 +62,21 @@ ip_to_id(const struct bitmap_ip *m, u32 ip) /* Common functions */ static inline int -bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map) +bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, + struct bitmap_ip *map, size_t dsize) { return !!test_bit(e->id, map->members); } static inline int -bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map) +bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize) { return !!test_bit(id, map->members); } static inline int bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map, - u32 flags) + u32 flags, size_t dsize) { return !!test_and_set_bit(e->id, map->members); } @@ -90,7 +88,8 @@ bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map) } static inline int -bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id) +bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id, + size_t dsize) { return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id * map->hosts)); @@ -113,7 +112,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ip_adt_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); @@ -133,7 +132,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; u32 ip = 0, ip_to = 0; struct bitmap_ip_adt_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -200,7 +199,7 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_ip == y->first_ip && x->last_ip == y->last_ip && x->netmask == y->netmask && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -240,8 +239,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * elements); if (!map->extensions) { kfree(map->members); return false; @@ -252,7 +251,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->elements = elements; map->hosts = hosts; map->netmask = netmask; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_IPV4; @@ -341,10 +340,10 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipct_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipct_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipct_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -353,14 +352,14 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) return -ENOMEM; } - map->timeout = ip_set_timeout_uget( + set->timeout = ip_set_timeout_uget( tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ip_gc_init(set, bitmap_ip_gc); } else { - map->dsize = sizeof(struct bitmap_ipc_elem); - map->offset[IPSET_EXT_ID_COUNTER] = + set->dsize = sizeof(struct bitmap_ipc_elem); + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipc_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -370,8 +369,8 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipt_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipt_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipt_elem, timeout); if (!init_map_ip(set, map, first_ip, last_ip, @@ -380,12 +379,12 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) return -ENOMEM; } - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ip_gc_init(set, bitmap_ip_gc); } else { - map->dsize = 0; + set->dsize = 0; if (!init_map_ip(set, map, first_ip, last_ip, elements, hosts, netmask)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index e711875ea452..74576cb19264 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -48,11 +48,8 @@ struct bitmap_ipmac { u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ - u32 timeout; /* timeout value */ - struct timer_list gc; /* garbage collector */ size_t memsize; /* members size */ - size_t dsize; /* size of element */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ + struct timer_list gc; /* garbage collector */ }; /* ADT structure for generic function args */ @@ -82,13 +79,13 @@ get_elem(void *extensions, u16 id, size_t dsize) static inline int bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, - const struct bitmap_ipmac *map) + const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(e->id, map->members)) return 0; - elem = get_elem(map->extensions, e->id, map->dsize); + elem = get_elem(map->extensions, e->id, dsize); if (elem->filled == MAC_FILLED) return e->ether == NULL || ether_addr_equal(e->ether, elem->ether); @@ -97,13 +94,13 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, } static inline int -bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map) +bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(id, map->members)) return 0; - elem = get_elem(map->extensions, id, map->dsize); + elem = get_elem(map->extensions, id, dsize); /* Timer not started for the incomplete elements */ return elem->filled == MAC_FILLED; } @@ -117,13 +114,13 @@ bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem) static inline int bitmap_ipmac_add_timeout(unsigned long *timeout, const struct bitmap_ipmac_adt_elem *e, - const struct ip_set_ext *ext, + const struct ip_set_ext *ext, struct ip_set *set, struct bitmap_ipmac *map, int mode) { u32 t = ext->timeout; if (mode == IPSET_ADD_START_STORED_TIMEOUT) { - if (t == map->timeout) + if (t == set->timeout) /* Timeout was not specified, get stored one */ t = *timeout; ip_set_timeout_set(timeout, t); @@ -142,11 +139,11 @@ bitmap_ipmac_add_timeout(unsigned long *timeout, static inline int bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, - struct bitmap_ipmac *map, u32 flags) + struct bitmap_ipmac *map, u32 flags, size_t dsize) { struct bitmap_ipmac_elem *elem; - elem = get_elem(map->extensions, e->id, map->dsize); + elem = get_elem(map->extensions, e->id, dsize); if (test_and_set_bit(e->id, map->members)) { if (elem->filled == MAC_FILLED) { if (e->ether && (flags & IPSET_FLAG_EXIST)) @@ -179,10 +176,11 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e, } static inline unsigned long -ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout) +ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout, + size_t dsize) { const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, map->dsize); + get_elem(map->extensions, id, dsize); return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) : *timeout; @@ -190,10 +188,10 @@ ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout) static inline int bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map, - u32 id) + u32 id, size_t dsize) { const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, map->dsize); + get_elem(map->extensions, id, dsize); return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id)) || @@ -216,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; /* MAC can be src only */ @@ -245,7 +243,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], const struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0; int ret = 0; @@ -285,7 +283,7 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_ip == y->first_ip && x->last_ip == y->last_ip && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -330,11 +328,11 @@ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize); + map->members = ip_set_alloc((last_ip - first_ip + 1) * set->dsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * elements); if (!map->extensions) { kfree(map->members); return false; @@ -343,7 +341,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, map->first_ip = first_ip; map->last_ip = last_ip; map->elements = elements; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_IPV4; @@ -404,10 +402,10 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipmacct_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipmacct_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmacct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacct_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -415,13 +413,13 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], kfree(map); return -ENOMEM; } - map->timeout = ip_set_timeout_uget( + set->timeout = ip_set_timeout_uget( tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); } else { - map->dsize = sizeof(struct bitmap_ipmacc_elem); - map->offset[IPSET_EXT_ID_COUNTER] = + set->dsize = sizeof(struct bitmap_ipmacc_elem); + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacc_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -431,19 +429,19 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipmact_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipmact_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmact_elem, timeout); if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { kfree(map); return -ENOMEM; } - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); } else { - map->dsize = sizeof(struct bitmap_ipmac_elem); + set->dsize = sizeof(struct bitmap_ipmac_elem); if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index bebc137a5737..71da31935499 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -38,9 +38,6 @@ struct bitmap_port { u16 last_port; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ - size_t dsize; /* extensions struct size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ - u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -59,20 +56,20 @@ port_to_id(const struct bitmap_port *m, u16 port) static inline int bitmap_port_do_test(const struct bitmap_port_adt_elem *e, - const struct bitmap_port *map) + const struct bitmap_port *map, size_t dsize) { return !!test_bit(e->id, map->members); } static inline int -bitmap_port_gc_test(u16 id, const struct bitmap_port *map) +bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize) { return !!test_bit(id, map->members); } static inline int bitmap_port_do_add(const struct bitmap_port_adt_elem *e, - struct bitmap_port *map, u32 flags) + struct bitmap_port *map, u32 flags, size_t dsize) { return !!test_and_set_bit(e->id, map->members); } @@ -85,7 +82,8 @@ bitmap_port_do_del(const struct bitmap_port_adt_elem *e, } static inline int -bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id) +bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id, + size_t dsize) { return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port + id)); @@ -106,7 +104,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be16 __port; u16 port = 0; @@ -131,7 +129,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port; /* wraparound */ u16 port_to; int ret = 0; @@ -191,7 +189,7 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_port == y->first_port && x->last_port == y->last_port && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -230,8 +228,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * map->elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * map->elements); if (!map->extensions) { kfree(map->members); return false; @@ -239,7 +237,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, } map->first_port = first_port; map->last_port = last_port; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_UNSPEC; @@ -281,23 +279,23 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_portct_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_portct_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portct_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); return -ENOMEM; } - map->timeout = + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_port_gc_init(set, bitmap_port_gc); } else { - map->dsize = sizeof(struct bitmap_portc_elem); - map->offset[IPSET_EXT_ID_COUNTER] = + set->dsize = sizeof(struct bitmap_portc_elem); + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portc_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); @@ -305,19 +303,19 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_portt_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_portt_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portt_elem, timeout); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); return -ENOMEM; } - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_port_gc_init(set, bitmap_port_gc); } else { - map->dsize = 0; + set->dsize = 0; if (!init_map_port(set, map, first_port, last_port)) { kfree(map); return -ENOMEM; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index e4db9250f337..0cb840e1f8ae 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -178,11 +178,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define NLEN(family) 0 #endif /* IP_SET_HASH_WITH_NETS */ -#define ext_timeout(e, h) \ -(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_TIMEOUT]) -#define ext_counter(e, h) \ -(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_COUNTER]) - #endif /* _IP_SET_HASH_GEN_H */ /* Family dependent templates */ @@ -276,9 +271,6 @@ struct htype { u32 maxelem; /* max elements in the hash */ u32 elements; /* current element (vs timeout) */ u32 initval; /* random jhash init value */ - u32 timeout; /* timeout value, if enabled */ - size_t dsize; /* data struct size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ struct timer_list gc; /* garbage collection when timeout enabled */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_MULTI @@ -351,7 +343,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) /* Calculate the actual memory size of the set data */ static size_t mtype_ahash_memsize(const struct htype *h, const struct htable *t, - u8 nets_length) + u8 nets_length, size_t dsize) { u32 i; size_t memsize = sizeof(*h) @@ -362,7 +354,7 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t, + jhash_size(t->htable_bits) * sizeof(struct hbucket); for (i = 0; i < jhash_size(t->htable_bits); i++) - memsize += t->bucket[i].size * h->dsize; + memsize += t->bucket[i].size * dsize; return memsize; } @@ -417,10 +409,10 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&h->gc); h->gc.data = (unsigned long) set; h->gc.function = gc; - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); pr_debug("gc initialized, run in every %u\n", - IPSET_GC_PERIOD(h->timeout)); + IPSET_GC_PERIOD(set->timeout)); } static bool @@ -431,7 +423,7 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) /* Resizing changes htable_bits, so we ignore it */ return x->maxelem == y->maxelem && - x->timeout == y->timeout && + a->timeout == b->timeout && #ifdef IP_SET_HASH_WITH_NETMASK x->netmask == y->netmask && #endif @@ -444,7 +436,7 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) /* Delete expired elements from the hashtable */ static void -mtype_expire(struct htype *h, u8 nets_length, size_t dsize) +mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) { struct htable *t; struct hbucket *n; @@ -458,7 +450,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize) n = hbucket(t, i); for (j = 0; j < n->pos; j++) { data = ahash_data(n, j, dsize); - if (ip_set_timeout_expired(ext_timeout(data, h))) { + if (ip_set_timeout_expired(ext_timeout(data, set))) { pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(data->cidr), @@ -497,10 +489,10 @@ mtype_gc(unsigned long ul_set) pr_debug("called\n"); write_lock_bh(&set->lock); - mtype_expire(h, NLEN(set->family), h->dsize); + mtype_expire(set, h, NLEN(set->family), set->dsize); write_unlock_bh(&set->lock); - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); } @@ -526,7 +518,7 @@ mtype_resize(struct ip_set *set, bool retried) if (SET_WITH_TIMEOUT(set) && !retried) { i = h->elements; write_lock_bh(&set->lock); - mtype_expire(set->data, NLEN(set->family), h->dsize); + mtype_expire(set, set->data, NLEN(set->family), set->dsize); write_unlock_bh(&set->lock); if (h->elements < i) return 0; @@ -553,13 +545,13 @@ retry: for (i = 0; i < jhash_size(orig->htable_bits); i++) { n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { - data = ahash_data(n, j, h->dsize); + data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS flags = 0; mtype_data_reset_flags(data, &flags); #endif m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize); + ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize); if (ret < 0) { #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(data, &flags); @@ -570,8 +562,8 @@ retry: goto retry; return ret; } - d = ahash_data(m, m->pos++, h->dsize); - memcpy(d, data, h->dsize); + d = ahash_data(m, m->pos++, set->dsize); + memcpy(d, data, set->dsize); #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(d, &flags); #endif @@ -609,7 +601,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) /* FIXME: when set is full, we slow down here */ - mtype_expire(h, NLEN(set->family), h->dsize); + mtype_expire(set, h, NLEN(set->family), set->dsize); if (h->elements >= h->maxelem) { if (net_ratelimit()) @@ -623,11 +615,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi)) { if (flag_exist || (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) { + ip_set_timeout_expired(ext_timeout(data, set)))) { /* Just the extensions could be overwritten */ j = i; goto reuse_slot; @@ -638,14 +630,14 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, } /* Reuse first timed out entry */ if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)) && + ip_set_timeout_expired(ext_timeout(data, set)) && j != AHASH_MAX(h) + 1) j = i; } reuse_slot: if (j != AHASH_MAX(h) + 1) { /* Fill out reused slot */ - data = ahash_data(n, j, h->dsize); + data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0); mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); @@ -653,13 +645,13 @@ reuse_slot: } else { /* Use/create a new slot */ TUNE_AHASH_MAX(h, multi); - ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize); + ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize); if (ret != 0) { if (ret == -EAGAIN) mtype_data_next(&h->next, d); goto out; } - data = ahash_data(n, n->pos++, h->dsize); + data = ahash_data(n, n->pos++, set->dsize); #ifdef IP_SET_HASH_WITH_NETS mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif @@ -670,9 +662,9 @@ reuse_slot: mtype_data_set_flags(data, flags); #endif if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(data, h), ext->timeout); + ip_set_timeout_set(ext_timeout(data, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(data, h), ext); + ip_set_init_counter(ext_counter(data, set), ext); out: rcu_read_unlock_bh(); @@ -699,16 +691,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h))) + ip_set_timeout_expired(ext_timeout(data, set))) goto out; if (i != n->pos - 1) /* Not last one */ - memcpy(data, ahash_data(n, n->pos - 1, h->dsize), - h->dsize); + memcpy(data, ahash_data(n, n->pos - 1, set->dsize), + set->dsize); n->pos--; h->elements--; @@ -717,14 +709,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, #endif if (n->pos + AHASH_INIT_SIZE < n->size) { void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) - * h->dsize, + * set->dsize, GFP_ATOMIC); if (!tmp) { ret = 0; goto out; } n->size -= AHASH_INIT_SIZE; - memcpy(tmp, n->value, n->size * h->dsize); + memcpy(tmp, n->value, n->size * set->dsize); kfree(n->value); n->value = tmp; } @@ -742,8 +734,7 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, struct ip_set_ext *mext, struct ip_set *set, u32 flags) { if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(data, - (struct htype *)(set->data)), + ip_set_update_counter(ext_counter(data, set), ext, mext, flags); return mtype_do_data_match(data); } @@ -770,12 +761,12 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_WITH_TIMEOUT(set)) { if (!ip_set_timeout_expired( - ext_timeout(data, h))) + ext_timeout(data, set))) return mtype_data_match(data, ext, mext, set, flags); @@ -818,10 +809,10 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi) && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) { + ip_set_timeout_expired(ext_timeout(data, set)))) { ret = mtype_data_match(data, ext, mext, set, flags); goto out; } @@ -841,7 +832,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) size_t memsize; t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t, NLEN(set->family)); + memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) @@ -858,7 +849,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || ((set->extensions & IPSET_EXT_TIMEOUT) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || ((set->extensions & IPSET_EXT_COUNTER) && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(IPSET_FLAG_WITH_COUNTERS)))) @@ -894,9 +885,9 @@ mtype_list(const struct ip_set *set, n = hbucket(t, cb->args[2]); pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); for (i = 0; i < n->pos; i++) { - e = ahash_data(n, i, h->dsize); + e = ahash_data(n, i, set->dsize); if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, h))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", cb->args[2], n, i, e); @@ -913,10 +904,10 @@ mtype_list(const struct ip_set *set, if (SET_WITH_TIMEOUT(set) && nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_get( - ext_timeout(e, h))))) + ext_timeout(e, set))))) goto nla_put_failure; if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, h))) + ip_set_put_counter(skb, ext_counter(e, set))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -1026,7 +1017,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) h->netmask = netmask; #endif get_random_bytes(&h->initval, sizeof(h->initval)); - h->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; hbits = htable_bits(hashsize); hsize = htable_size(hbits); @@ -1053,30 +1044,30 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - h->timeout = + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = sizeof(struct + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4ct_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), timeout); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), counter); IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = sizeof(struct + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6ct_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), timeout); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), counter); @@ -1085,36 +1076,36 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else { if (set->family == NFPROTO_IPV4) { - h->dsize = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4c_elem)); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4c_elem), counter); } else { - h->dsize = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6c_elem)); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6c_elem), counter); } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), timeout); IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), timeout); IPSET_TOKEN(HTYPE, 6_gc_init)(set, @@ -1122,9 +1113,9 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else { if (set->family == NFPROTO_IPV4) - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); else - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 260c9a80d8a5..bbde7c304622 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -99,7 +99,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be32 ip; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip); @@ -118,7 +118,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, hosts; int ret = 0; @@ -253,7 +253,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); hash_ip6_netmask(&e.ip, h->netmask); @@ -270,7 +270,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 64caad35a391..dd175d6f3965 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -116,10 +116,9 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -136,7 +135,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; @@ -306,10 +305,9 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -326,7 +324,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 2873bbc20d7a..87a2cfab2568 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -120,10 +120,9 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -141,7 +140,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; @@ -319,10 +318,9 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -340,7 +338,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 6ce5a8e2c44e..0b9a28d7c740 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -172,7 +172,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_ipportnet4_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -195,7 +195,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; bool with_ports = false; @@ -306,9 +306,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], : port; for (; p <= port_to; p++) { e.port = htons(p); - ip2 = retried - && ip == ntohl(h->next.ip) - && p == ntohs(h->next.port) + ip2 = retried && + ip == ntohl(h->next.ip) && + p == ntohs(h->next.port) ? ntohl(h->next.ip2) : ip2_from; while (!after(ip2, ip2_to)) { e.ip2 = htonl(ip2); @@ -456,7 +456,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_ipportnet6_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -479,7 +479,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index ec1c7dc10489..1d4caa50dacb 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -145,7 +145,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_net4_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; @@ -165,7 +165,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; int ret; @@ -340,7 +340,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_net6_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; @@ -357,10 +357,9 @@ static int hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { .cidr = HOST_MASK }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 814b4e31a7f8..2f0ffe35c408 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -268,7 +268,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); int ret; if (e.cidr == 0) @@ -319,7 +319,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; char iface[IFNAMSIZ]; int ret; @@ -537,7 +537,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); int ret; if (e.cidr == 0) @@ -584,7 +584,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); char iface[IFNAMSIZ]; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 3bd923d3f179..cab236625f97 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -164,7 +164,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netport4_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -186,7 +186,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to, p = 0, ip = 0, ip_to = 0, last; bool with_ports = false; u8 cidr; @@ -409,7 +409,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netport6_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -431,7 +431,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 0ed19b5e675e..f22d05d6e366 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -58,24 +58,13 @@ struct set_adt_elem { /* Type structure */ struct list_set { - size_t dsize; /* element size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u32 size; /* size of set list array */ - u32 timeout; /* timeout value */ struct timer_list gc; /* garbage collection */ struct set_elem members[0]; /* the set members */ }; -static inline struct set_elem * -list_set_elem(const struct list_set *map, u32 id) -{ - return (struct set_elem *)((void *)map->members + id * map->dsize); -} - -#define ext_timeout(e, m) \ -(unsigned long *)((void *)(e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) -#define ext_counter(e, m) \ -(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_EXT_ID_COUNTER]) +#define list_set_elem(set, map, id) \ + (struct set_elem *)((void *)(map)->members + (id) * (set)->dsize) static int list_set_ktest(struct ip_set *set, const struct sk_buff *skb, @@ -92,16 +81,16 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_test(e->id, skb, par, opt); if (ret > 0) { if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(e, map), + ip_set_update_counter(ext_counter(e, set), ext, &opt->ext, cmdflags); return ret; @@ -121,11 +110,11 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_add(e->id, skb, par, opt); if (ret == 0) @@ -145,11 +134,11 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_del(e->id, skb, par, opt); if (ret == 0) @@ -163,8 +152,7 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - struct list_set *map = set->data; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); switch (adt) { case IPSET_TEST: @@ -188,10 +176,10 @@ id_eq(const struct ip_set *set, u32 i, ip_set_id_t id) if (i >= map->size) return 0; - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); return !!(e->id == id && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map)))); + ip_set_timeout_expired(ext_timeout(e, set)))); } static int @@ -199,28 +187,29 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, const struct ip_set_ext *ext) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(map, i); + struct set_elem *e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { if (i == map->size - 1) /* Last element replaced: e.g. add new,before,last */ ip_set_put_byindex(e->id); else { - struct set_elem *x = list_set_elem(map, map->size - 1); + struct set_elem *x = list_set_elem(set, map, + map->size - 1); /* Last element pushed off */ if (x->id != IPSET_INVALID_ID) ip_set_put_byindex(x->id); - memmove(list_set_elem(map, i + 1), e, - map->dsize * (map->size - (i + 1))); + memmove(list_set_elem(set, map, i + 1), e, + set->dsize * (map->size - (i + 1))); } } e->id = d->id; if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, map), ext->timeout); + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, map), ext); + ip_set_init_counter(ext_counter(e, set), ext); return 0; } @@ -228,16 +217,16 @@ static int list_set_del(struct ip_set *set, u32 i) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(map, i); + struct set_elem *e = list_set_elem(set, map, i); ip_set_put_byindex(e->id); if (i < map->size - 1) - memmove(e, list_set_elem(map, i + 1), - map->dsize * (map->size - (i + 1))); + memmove(e, list_set_elem(set, map, i + 1), + set->dsize * (map->size - (i + 1))); /* Last element */ - e = list_set_elem(map, map->size - 1); + e = list_set_elem(set, map, map->size - 1); e->id = IPSET_INVALID_ID; return 0; } @@ -250,9 +239,9 @@ set_cleanup_entries(struct ip_set *set) u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) list_set_del(set, i); } } @@ -268,11 +257,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -301,11 +290,11 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Check already added element */ for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto insert; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -320,9 +309,9 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, return -IPSET_ERR_EXIST; /* Update extensions */ if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, map), ext->timeout); + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, map), ext); + ip_set_init_counter(ext_counter(e, set), ext); /* Set is already added to the list */ ip_set_put_byindex(d->id); return 0; @@ -330,7 +319,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, insert: ret = -IPSET_ERR_LIST_FULL; for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) ret = d->before != 0 ? -IPSET_ERR_REF_EXIST : list_set_add(set, i, d, ext); @@ -355,12 +344,12 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -383,10 +372,9 @@ static int list_set_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - struct list_set *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); struct ip_set *s; int ret = 0; @@ -454,7 +442,7 @@ list_set_flush(struct ip_set *set) u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { ip_set_put_byindex(e->id); e->id = IPSET_INVALID_ID; @@ -486,13 +474,13 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || (SET_WITH_COUNTER(set) && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(IPSET_FLAG_WITH_COUNTERS))) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + map->size * map->dsize))) + htonl(sizeof(*map) + map->size * set->dsize))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -515,11 +503,11 @@ list_set_list(const struct ip_set *set, return -EMSGSIZE; for (; cb->args[2] < map->size; cb->args[2]++) { i = cb->args[2]; - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto finish; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { @@ -535,10 +523,10 @@ list_set_list(const struct ip_set *set, if (SET_WITH_TIMEOUT(set) && nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_get( - ext_timeout(e, map))))) + ext_timeout(e, set))))) goto nla_put_failure; if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, map))) + ip_set_put_counter(skb, ext_counter(e, set))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -565,7 +553,7 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) const struct list_set *y = b->data; return x->size == y->size && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -594,7 +582,7 @@ list_set_gc(unsigned long ul_set) set_cleanup_entries(set); write_unlock_bh(&set->lock); - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } @@ -606,7 +594,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&map->gc); map->gc.data = (unsigned long) set; map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } @@ -625,12 +613,12 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize, return NULL; map->size = size; - map->dsize = dsize; - map->timeout = timeout; + set->dsize = dsize; + set->timeout = timeout; set->data = map; for (i = 0; i < size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); e->id = IPSET_INVALID_ID; } @@ -667,9 +655,9 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct setct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setct_elem, counter); list_set_gc_init(set, list_set_gc); } else { @@ -677,7 +665,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) sizeof(struct setc_elem), 0); if (!map) return -ENOMEM; - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setc_elem, counter); } } else if (tb[IPSET_ATTR_TIMEOUT]) { @@ -686,7 +674,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct sett_elem, timeout); list_set_gc_init(set, list_set_gc); } else { -- cgit v1.2.3-55-g7522 From 03c8b234e61a9a3aab8d970b3bf681934ecfe443 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Sep 2013 00:43:52 +0200 Subject: netfilter: ipset: Generalize extensions support Get rid of the structure based extensions and introduce a blob for the extensions. Thus we can support more extension types easily. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 13 +++++ net/netfilter/ipset/ip_set_bitmap_ip.c | 81 +++---------------------- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 91 +++-------------------------- net/netfilter/ipset/ip_set_bitmap_port.c | 71 ++-------------------- net/netfilter/ipset/ip_set_core.c | 46 +++++++++++++++ net/netfilter/ipset/ip_set_hash_gen.h | 86 ++++----------------------- net/netfilter/ipset/ip_set_hash_ip.c | 36 +----------- net/netfilter/ipset/ip_set_hash_ipport.c | 54 +---------------- net/netfilter/ipset/ip_set_hash_ipportip.c | 60 +------------------ net/netfilter/ipset/ip_set_hash_ipportnet.c | 66 +-------------------- net/netfilter/ipset/ip_set_hash_net.c | 54 +---------------- net/netfilter/ipset/ip_set_hash_netiface.c | 68 +-------------------- net/netfilter/ipset/ip_set_hash_netport.c | 60 +------------------ net/netfilter/ipset/ip_set_list_set.c | 81 ++++--------------------- 14 files changed, 118 insertions(+), 749 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 992a2f58dbd3..66d6bd404d64 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -66,6 +66,17 @@ enum ip_set_ext_id { IPSET_EXT_ID_MAX, }; +/* Extension type */ +struct ip_set_ext_type { + enum ip_set_extension type; + enum ipset_cadt_flags flag; + /* Size and minimal alignment */ + u8 len; + u8 align; +}; + +extern const struct ip_set_ext_type ip_set_extensions[]; + struct ip_set_ext { u64 packets; u64 bytes; @@ -283,6 +294,8 @@ extern void *ip_set_alloc(size_t size); extern void ip_set_free(void *members); extern int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr); extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr); +extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], + size_t len); extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext); diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 363022edb8fb..94d985457c51 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -208,25 +208,6 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) struct bitmap_ip_elem { }; -/* Timeout variant */ - -struct bitmap_ipt_elem { - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_ipc_elem { - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_ipct_elem { - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip type of sets */ @@ -263,7 +244,7 @@ static int bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { struct bitmap_ip *map; - u32 first_ip = 0, last_ip = 0, hosts, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0, hosts; u64 elements; u8 netmask = 32; int ret; @@ -335,61 +316,15 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) map->memsize = bitmap_bytes(0, elements - 1); set->variant = &bitmap_ip; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipct_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipct_elem, counter); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - - set->timeout = ip_set_timeout_uget( - tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - - bitmap_ip_gc_init(set, bitmap_ip_gc); - } else { - set->dsize = sizeof(struct bitmap_ipc_elem); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipc_elem, counter); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipt_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipt_elem, timeout); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - + set->dsize = ip_set_elem_len(set, tb, 0); + if (!init_map_ip(set, map, first_ip, last_ip, + elements, hosts, netmask)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_ip_gc_init(set, bitmap_ip_gc); - } else { - set->dsize = 0; - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } } return 0; } diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 74576cb19264..654a97bedfe9 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -289,37 +289,6 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) /* Plain variant */ -/* Timeout variant */ - -struct bitmap_ipmact_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_ipmacc_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_ipmacct_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip,mac type of sets */ @@ -328,7 +297,7 @@ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc((last_ip - first_ip + 1) * set->dsize); + map->members = ip_set_alloc(map->memsize); if (!map->members) return false; if (set->dsize) { @@ -353,7 +322,7 @@ static int bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { - u32 first_ip = 0, last_ip = 0, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0; u64 elements; struct bitmap_ipmac *map; int ret; @@ -397,57 +366,15 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], map->memsize = bitmap_bytes(0, elements - 1); set->variant = &bitmap_ipmac; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipmacct_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipmacct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipmacct_elem, counter); - - if (!init_map_ipmac(set, map, first_ip, last_ip, - elements)) { - kfree(map); - return -ENOMEM; - } - set->timeout = ip_set_timeout_uget( - tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); - } else { - set->dsize = sizeof(struct bitmap_ipmacc_elem); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipmacc_elem, counter); - - if (!init_map_ipmac(set, map, first_ip, last_ip, - elements)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipmact_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipmact_elem, timeout); - - if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); - return -ENOMEM; - } + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct bitmap_ipmac_elem)); + if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); - } else { - set->dsize = sizeof(struct bitmap_ipmac_elem); - - if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); - return -ENOMEM; - } - set->variant = &bitmap_ipmac; } return 0; } diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 71da31935499..1ef2f3186b80 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -198,25 +198,6 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) struct bitmap_port_elem { }; -/* Timeout variant */ - -struct bitmap_portt_elem { - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_portc_elem { - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_portct_elem { - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip type of sets */ @@ -250,7 +231,6 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { struct bitmap_port *map; u16 first_port, last_port; - u32 cadt_flags = 0; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -274,53 +254,14 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) map->elements = last_port - first_port + 1; map->memsize = map->elements * sizeof(unsigned long); set->variant = &bitmap_port; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_portct_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_portct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_portct_elem, counter); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - - set->timeout = - ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_port_gc_init(set, bitmap_port_gc); - } else { - set->dsize = sizeof(struct bitmap_portc_elem); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_portc_elem, counter); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_portt_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_portt_elem, timeout); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - + set->dsize = ip_set_elem_len(set, tb, 0); + if (!init_map_port(set, map, first_port, last_port)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; bitmap_port_gc_init(set, bitmap_port_gc); - } else { - set->dsize = 0; - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - } return 0; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 428c30a8586f..f35afed3814f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -315,6 +315,52 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); +/* ipset data extension types, in size order */ + +const struct ip_set_ext_type ip_set_extensions[] = { + [IPSET_EXT_ID_COUNTER] = { + .type = IPSET_EXT_COUNTER, + .flag = IPSET_FLAG_WITH_COUNTERS, + .len = sizeof(struct ip_set_counter), + .align = __alignof__(struct ip_set_counter), + }, + [IPSET_EXT_ID_TIMEOUT] = { + .type = IPSET_EXT_TIMEOUT, + .len = sizeof(unsigned long), + .align = __alignof__(unsigned long), + }, +}; +EXPORT_SYMBOL_GPL(ip_set_extensions); + +static inline bool +add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[]) +{ + return ip_set_extensions[id].flag ? + (flags & ip_set_extensions[id].flag) : + !!tb[IPSET_ATTR_TIMEOUT]; +} + +size_t +ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) +{ + enum ip_set_ext_id id; + size_t offset = 0; + u32 cadt_flags = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) + cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + for (id = 0; id < IPSET_EXT_ID_MAX; id++) { + if (!add_extension(id, cadt_flags, tb)) + continue; + offset += ALIGN(len + offset, ip_set_extensions[id].align); + set->offset[id] = offset; + set->extensions |= ip_set_extensions[id].type; + offset += ip_set_extensions[id].len; + } + return len + offset; +} +EXPORT_SYMBOL_GPL(ip_set_elem_len); + int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 0cb840e1f8ae..3999f1719f69 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -960,7 +960,6 @@ static int IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; - u32 cadt_flags = 0; u8 hbits; #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; @@ -1034,88 +1033,23 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) rcu_assign_pointer(h->table, t); set->data = h; - if (set->family == NFPROTO_IPV4) + if (set->family == NFPROTO_IPV4) { set->variant = &IPSET_TOKEN(HTYPE, 4_variant); - else + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); + } else { set->variant = &IPSET_TOKEN(HTYPE, 6_variant); - - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->timeout = - ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - if (set->family == NFPROTO_IPV4) { - set->dsize = sizeof(struct - IPSET_TOKEN(HTYPE, 4ct_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct - IPSET_TOKEN(HTYPE, 4ct_elem), - timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 4ct_elem), - counter); - IPSET_TOKEN(HTYPE, 4_gc_init)(set, - IPSET_TOKEN(HTYPE, 4_gc)); - } else { - set->dsize = sizeof(struct - IPSET_TOKEN(HTYPE, 6ct_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct - IPSET_TOKEN(HTYPE, 6ct_elem), - timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 6ct_elem), - counter); - IPSET_TOKEN(HTYPE, 6_gc_init)(set, - IPSET_TOKEN(HTYPE, 6_gc)); - } - } else { - if (set->family == NFPROTO_IPV4) { - set->dsize = - sizeof(struct - IPSET_TOKEN(HTYPE, 4c_elem)); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 4c_elem), - counter); - } else { - set->dsize = - sizeof(struct - IPSET_TOKEN(HTYPE, 6c_elem)); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 6c_elem), - counter); - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct IPSET_TOKEN(HTYPE, 6_elem))); + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - if (set->family == NFPROTO_IPV4) { - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), - timeout); + if (set->family == NFPROTO_IPV4) IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); - } else { - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), - timeout); + else IPSET_TOKEN(HTYPE, 6_gc_init)(set, IPSET_TOKEN(HTYPE, 6_gc)); - } - } else { - if (set->family == NFPROTO_IPV4) - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); - else - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index bbde7c304622..a111ffe40b46 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -35,7 +35,7 @@ MODULE_ALIAS("ip_set_hash:ip"); #define HTYPE hash_ip #define IP_SET_HASH_WITH_NETMASK -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ip4_elem { @@ -43,22 +43,6 @@ struct hash_ip4_elem { __be32 ip; }; -struct hash_ip4t_elem { - __be32 ip; - unsigned long timeout; -}; - -struct hash_ip4c_elem { - __be32 ip; - struct ip_set_counter counter; -}; - -struct hash_ip4ct_elem { - __be32 ip; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -178,29 +162,13 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ /* Member elements */ struct hash_ip6_elem { union nf_inet_addr ip; }; -struct hash_ip6t_elem { - union nf_inet_addr ip; - unsigned long timeout; -}; - -struct hash_ip6c_elem { - union nf_inet_addr ip; - struct ip_set_counter counter; -}; - -struct hash_ip6ct_elem { - union nf_inet_addr ip; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index dd175d6f3965..5dc735c4dac2 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:ip,port"); /* Type specific function prefix */ #define HTYPE hash_ipport -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipport4_elem { @@ -46,31 +46,6 @@ struct hash_ipport4_elem { u8 padding; }; -struct hash_ipport4t_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipport4c_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipport4ct_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -221,7 +196,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipport6_elem { union nf_inet_addr ip; @@ -230,31 +205,6 @@ struct hash_ipport6_elem { u8 padding; }; -struct hash_ipport6t_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipport6c_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipport6ct_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 87a2cfab2568..8c43dc7811cb 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ #define HTYPE hash_ipportip -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipportip4_elem { @@ -47,34 +47,6 @@ struct hash_ipportip4_elem { u8 padding; }; -struct hash_ipportip4t_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipportip4c_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipportip4ct_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - static inline bool hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, const struct hash_ipportip4_elem *ip2, @@ -230,7 +202,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipportip6_elem { union nf_inet_addr ip; @@ -240,34 +212,6 @@ struct hash_ipportip6_elem { u8 padding; }; -struct hash_ipportip6t_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipportip6c_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipportip6ct_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 0b9a28d7c740..34890452366c 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -46,7 +46,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,net"); #define IP_SET_HASH_WITH_PROTO #define IP_SET_HASH_WITH_NETS -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipportnet4_elem { @@ -58,37 +58,6 @@ struct hash_ipportnet4_elem { u8 proto; }; -struct hash_ipportnet4t_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - unsigned long timeout; -}; - -struct hash_ipportnet4c_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; -}; - -struct hash_ipportnet4ct_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -328,7 +297,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipportnet6_elem { union nf_inet_addr ip; @@ -339,37 +308,6 @@ struct hash_ipportnet6_elem { u8 proto; }; -struct hash_ipportnet6t_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - unsigned long timeout; -}; - -struct hash_ipportnet6c_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; -}; - -struct hash_ipportnet6ct_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 1d4caa50dacb..d5598557f4a9 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:net"); #define HTYPE hash_net #define IP_SET_HASH_WITH_NETS -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_net4_elem { @@ -46,31 +46,6 @@ struct hash_net4_elem { u8 cidr; }; -struct hash_net4t_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - unsigned long timeout; -}; - -struct hash_net4c_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; -}; - -struct hash_net4ct_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -228,7 +203,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_net6_elem { union nf_inet_addr ip; @@ -237,31 +212,6 @@ struct hash_net6_elem { u8 cidr; }; -struct hash_net6t_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - unsigned long timeout; -}; - -struct hash_net6c_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; -}; - -struct hash_net6ct_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 2f0ffe35c408..26703e9e5082 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -134,7 +134,7 @@ iface_add(struct rb_root *root, const char **iface) #define STREQ(a, b) (strcmp(a, b) == 0) -/* IPv4 variants */ +/* IPv4 variant */ struct hash_netiface4_elem_hashed { __be32 ip; @@ -144,7 +144,7 @@ struct hash_netiface4_elem_hashed { u8 elem; }; -/* Member elements without timeout */ +/* Member elements */ struct hash_netiface4_elem { __be32 ip; u8 physdev; @@ -154,37 +154,6 @@ struct hash_netiface4_elem { const char *iface; }; -struct hash_netiface4t_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - unsigned long timeout; -}; - -struct hash_netiface4c_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; -}; - -struct hash_netiface4ct_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -399,7 +368,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_netiface6_elem_hashed { union nf_inet_addr ip; @@ -418,37 +387,6 @@ struct hash_netiface6_elem { const char *iface; }; -struct hash_netiface6t_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - unsigned long timeout; -}; - -struct hash_netiface6c_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; -}; - -struct hash_netiface6ct_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index cab236625f97..45b6e91b0636 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -45,7 +45,7 @@ MODULE_ALIAS("ip_set_hash:net,port"); */ #define IP_SET_HASH_WITH_NETS_PACKED -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_netport4_elem { @@ -56,34 +56,6 @@ struct hash_netport4_elem { u8 nomatch:1; }; -struct hash_netport4t_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - unsigned long timeout; -}; - -struct hash_netport4c_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; -}; - -struct hash_netport4ct_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -287,7 +259,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_netport6_elem { union nf_inet_addr ip; @@ -297,34 +269,6 @@ struct hash_netport6_elem { u8 nomatch:1; }; -struct hash_netport6t_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - unsigned long timeout; -}; - -struct hash_netport6c_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; -}; - -struct hash_netport6ct_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index f22d05d6e366..7fd11c79aff4 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -28,28 +28,6 @@ struct set_elem { ip_set_id_t id; }; -struct sett_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - unsigned long timeout; -}; - -struct setc_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - struct ip_set_counter counter; -}; - -struct setct_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - struct ip_set_counter counter; - unsigned long timeout; -}; - struct set_adt_elem { ip_set_id_t id; ip_set_id_t refid; @@ -600,21 +578,18 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) /* Create list:set type of sets */ -static struct list_set * -init_list_set(struct ip_set *set, u32 size, size_t dsize, - unsigned long timeout) +static bool +init_list_set(struct ip_set *set, u32 size) { struct list_set *map; struct set_elem *e; u32 i; - map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL); + map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL); if (!map) - return NULL; + return false; map->size = size; - set->dsize = dsize; - set->timeout = timeout; set->data = map; for (i = 0; i < size; i++) { @@ -622,15 +597,13 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize, e->id = IPSET_INVALID_ID; } - return map; + return true; } static int list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { - struct list_set *map; - u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0; - unsigned long timeout = 0; + u32 size = IP_SET_LIST_DEFAULT_SIZE; if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || @@ -642,45 +615,13 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (size < IP_SET_LIST_MIN_SIZE) size = IP_SET_LIST_MIN_SIZE; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (tb[IPSET_ATTR_TIMEOUT]) - timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->variant = &set_variant; - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - map = init_list_set(set, size, - sizeof(struct setct_elem), timeout); - if (!map) - return -ENOMEM; - set->extensions |= IPSET_EXT_TIMEOUT; - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct setct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct setct_elem, counter); - list_set_gc_init(set, list_set_gc); - } else { - map = init_list_set(set, size, - sizeof(struct setc_elem), 0); - if (!map) - return -ENOMEM; - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct setc_elem, counter); - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - map = init_list_set(set, size, - sizeof(struct sett_elem), timeout); - if (!map) - return -ENOMEM; - set->extensions |= IPSET_EXT_TIMEOUT; - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct sett_elem, timeout); + set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); + if (!init_list_set(set, size)) + return -ENOMEM; + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); list_set_gc_init(set, list_set_gc); - } else { - map = init_list_set(set, size, sizeof(struct set_elem), 0); - if (!map) - return -ENOMEM; } return 0; } -- cgit v1.2.3-55-g7522 From 40cd63bf33b21ef4d43776b1d49c605f876fe32c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 9 Sep 2013 14:44:29 +0200 Subject: netfilter: ipset: Support extensions which need a per data destroy function Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 22 +++++++--- net/netfilter/ipset/ip_set_bitmap_gen.h | 38 ++++++++++++++---- net/netfilter/ipset/ip_set_hash_gen.h | 71 +++++++++++++++++++++------------ net/netfilter/ipset/ip_set_list_set.c | 19 ++++++--- 4 files changed, 107 insertions(+), 43 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 66d6bd404d64..6372ee224fe8 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -49,11 +49,13 @@ enum ip_set_feature { /* Set extensions */ enum ip_set_extension { - IPSET_EXT_NONE = 0, - IPSET_EXT_BIT_TIMEOUT = 1, + IPSET_EXT_BIT_TIMEOUT = 0, IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT), - IPSET_EXT_BIT_COUNTER = 2, + IPSET_EXT_BIT_COUNTER = 1, IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER), + /* Mark set with an extension which needs to call destroy */ + IPSET_EXT_BIT_DESTROY = 7, + IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY), }; #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) @@ -68,6 +70,8 @@ enum ip_set_ext_id { /* Extension type */ struct ip_set_ext_type { + /* Destroy extension private data (can be NULL) */ + void (*destroy)(void *ext); enum ip_set_extension type; enum ipset_cadt_flags flag; /* Size and minimal alignment */ @@ -88,13 +92,21 @@ struct ip_set_counter { atomic64_t packets; }; +struct ip_set; + +static inline void +ip_set_ext_destroy(struct ip_set *set, void *data) +{ + /* Check that the extension is enabled for the set and + * call it's destroy function for its extension part in data. + */ +} + #define ext_timeout(e, s) \ (unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, s) \ (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]) -struct ip_set; - typedef int (*ipset_adtfn)(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 cmdflags); diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index f32ddbca3923..4515fe8b83dd 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -12,6 +12,7 @@ #define mtype_gc_test IPSET_TOKEN(MTYPE, _gc_test) #define mtype_is_filled IPSET_TOKEN(MTYPE, _is_filled) #define mtype_do_add IPSET_TOKEN(MTYPE, _do_add) +#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) #define mtype_do_del IPSET_TOKEN(MTYPE, _do_del) #define mtype_do_list IPSET_TOKEN(MTYPE, _do_list) #define mtype_do_head IPSET_TOKEN(MTYPE, _do_head) @@ -46,6 +47,17 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) add_timer(&map->gc); } +static void +mtype_ext_cleanup(struct ip_set *set) +{ + struct mtype *map = set->data; + u32 id; + + for (id = 0; id < map->elements; id++) + if (test_bit(id, map->members)) + ip_set_ext_destroy(set, get_ext(set, map, id)); +} + static void mtype_destroy(struct ip_set *set) { @@ -55,8 +67,11 @@ mtype_destroy(struct ip_set *set) del_timer_sync(&map->gc); ip_set_free(map->members); - if (set->dsize) + if (set->dsize) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set); ip_set_free(map->extensions); + } kfree(map); set->data = NULL; @@ -67,6 +82,8 @@ mtype_flush(struct ip_set *set) { struct mtype *map = set->data; + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set); memset(map->members, 0, map->memsize); } @@ -132,6 +149,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, ret = 0; else if (!(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; + /* Element is re-added, cleanup extensions */ + ip_set_ext_destroy(set, x); } if (SET_WITH_TIMEOUT(set)) @@ -152,11 +171,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - const void *x = get_ext(set, map, e->id); + void *x = get_ext(set, map, e->id); - if (mtype_do_del(e, map) || - (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, set)))) + if (mtype_do_del(e, map)) + return -IPSET_ERR_EXIST; + + ip_set_ext_destroy(set, x); + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(x, set))) return -IPSET_ERR_EXIST; return 0; @@ -235,7 +257,7 @@ mtype_gc(unsigned long ul_set) { struct ip_set *set = (struct ip_set *) ul_set; struct mtype *map = set->data; - const void *x; + void *x; u32 id; /* We run parallel with other readers (test element) @@ -244,8 +266,10 @@ mtype_gc(unsigned long ul_set) for (id = 0; id < map->elements; id++) if (mtype_gc_test(id, map, set->dsize)) { x = get_ext(set, map, id); - if (ip_set_timeout_expired(ext_timeout(x, set))) + if (ip_set_timeout_expired(ext_timeout(x, set))) { clear_bit(id, map->members); + ip_set_ext_destroy(set, x); + } } read_unlock_bh(&set->lock); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 3999f1719f69..3c26e5b946f5 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -117,23 +117,6 @@ htable_bits(u32 hashsize) return bits; } -/* Destroy the hashtable part of the set */ -static void -ahash_destroy(struct htable *t) -{ - struct hbucket *n; - u32 i; - - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = hbucket(t, i); - if (n->size) - /* FIXME: use slab cache */ - kfree(n->value); - } - - ip_set_free(t); -} - static int hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) { @@ -192,6 +175,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef mtype_data_next #undef mtype_elem +#undef mtype_ahash_destroy +#undef mtype_ext_cleanup #undef mtype_add_cidr #undef mtype_del_cidr #undef mtype_ahash_memsize @@ -230,6 +215,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) #define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) #define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) +#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) #define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) #define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) #define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) @@ -359,6 +346,19 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t, return memsize; } +/* Get the ith element from the array block n */ +#define ahash_data(n, i, dsize) \ + ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) + +static void +mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) +{ + int i; + + for (i = 0; i < n->pos; i++) + ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); +} + /* Flush a hash type of set: destroy all elements */ static void mtype_flush(struct ip_set *set) @@ -372,6 +372,8 @@ mtype_flush(struct ip_set *set) for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); if (n->size) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); n->size = n->pos = 0; /* FIXME: use slab cache */ kfree(n->value); @@ -383,6 +385,26 @@ mtype_flush(struct ip_set *set) h->elements = 0; } +/* Destroy the hashtable part of the set */ +static void +mtype_ahash_destroy(struct ip_set *set, struct htable *t) +{ + struct hbucket *n; + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + if (n->size) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); + /* FIXME: use slab cache */ + kfree(n->value); + } + } + + ip_set_free(t); +} + /* Destroy a hash type of set */ static void mtype_destroy(struct ip_set *set) @@ -392,7 +414,7 @@ mtype_destroy(struct ip_set *set) if (set->extensions & IPSET_EXT_TIMEOUT) del_timer_sync(&h->gc); - ahash_destroy(rcu_dereference_bh_nfnl(h->table)); + mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table)); #ifdef IP_SET_HASH_WITH_RBTREE rbtree_destroy(&h->rbtree); #endif @@ -430,10 +452,6 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } -/* Get the ith element from the array block n */ -#define ahash_data(n, i, dsize) \ - ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) - /* Delete expired elements from the hashtable */ static void mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) @@ -456,6 +474,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) mtype_del_cidr(h, CIDR(data->cidr), nets_length, 0); #endif + ip_set_ext_destroy(set, data); if (j != n->pos - 1) /* Not last one */ memcpy(data, @@ -557,7 +576,7 @@ retry: mtype_data_reset_flags(data, &flags); #endif read_unlock_bh(&set->lock); - ahash_destroy(t); + mtype_ahash_destroy(set, t); if (ret == -EAGAIN) goto retry; return ret; @@ -578,7 +597,7 @@ retry: pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - ahash_destroy(orig); + mtype_ahash_destroy(set, orig); return 0; } @@ -642,6 +661,7 @@ reuse_slot: mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0); mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif + ip_set_ext_destroy(set, data); } else { /* Use/create a new slot */ TUNE_AHASH_MAX(h, multi); @@ -707,6 +727,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif + ip_set_ext_destroy(set, data); if (n->pos + AHASH_INIT_SIZE < n->size) { void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) * set->dsize, @@ -1033,7 +1054,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) rcu_assign_pointer(h->table, t); set->data = h; - if (set->family == NFPROTO_IPV4) { + if (set->family == NFPROTO_IPV4) { set->variant = &IPSET_TOKEN(HTYPE, 4_variant); set->dsize = ip_set_elem_len(set, tb, sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 7fd11c79aff4..e44986af1fc2 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -168,16 +168,19 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, struct set_elem *e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { - if (i == map->size - 1) + if (i == map->size - 1) { /* Last element replaced: e.g. add new,before,last */ ip_set_put_byindex(e->id); - else { + ip_set_ext_destroy(set, e); + } else { struct set_elem *x = list_set_elem(set, map, map->size - 1); /* Last element pushed off */ - if (x->id != IPSET_INVALID_ID) + if (x->id != IPSET_INVALID_ID) { ip_set_put_byindex(x->id); + ip_set_ext_destroy(set, x); + } memmove(list_set_elem(set, map, i + 1), e, set->dsize * (map->size - (i + 1))); } @@ -198,6 +201,7 @@ list_set_del(struct ip_set *set, u32 i) struct set_elem *e = list_set_elem(set, map, i); ip_set_put_byindex(e->id); + ip_set_ext_destroy(set, e); if (i < map->size - 1) memmove(e, list_set_elem(set, map, i + 1), @@ -266,14 +270,14 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, bool flag_exist = flags & IPSET_FLAG_EXIST; u32 i, ret = 0; + if (SET_WITH_TIMEOUT(set)) + set_cleanup_entries(set); + /* Check already added element */ for (i = 0; i < map->size; i++) { e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto insert; - else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) - continue; else if (e->id != d->id) continue; @@ -286,6 +290,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Can't re-add */ return -IPSET_ERR_EXIST; /* Update extensions */ + ip_set_ext_destroy(set, e); + if (SET_WITH_TIMEOUT(set)) ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) @@ -423,6 +429,7 @@ list_set_flush(struct ip_set *set) e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { ip_set_put_byindex(e->id); + ip_set_ext_destroy(set, e); e->id = IPSET_INVALID_ID; } } -- cgit v1.2.3-55-g7522 From b91b396d5e35eef9938a56e781cb0171a53907ca Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 9 Sep 2013 17:10:20 +0200 Subject: netfilter: ipset: list:set: make sure all elements are checked by the gc When an element timed out, the next one was skipped by the garbage collector, fixed. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_list_set.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e44986af1fc2..30bf1dd11f04 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -218,13 +218,16 @@ set_cleanup_entries(struct ip_set *set) { struct list_set *map = set->data; struct set_elem *e; - u32 i; + u32 i = 0; - for (i = 0; i < map->size; i++) { + while (i < map->size) { e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID && ip_set_timeout_expired(ext_timeout(e, set))) list_set_del(set, i); + /* Check element moved to position i in next loop */ + else + i++; } } -- cgit v1.2.3-55-g7522 From d9628bbeca888fd1f9a9c57864dd90b7d25954e0 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 30 Sep 2013 09:15:44 +0200 Subject: netfilter: ipset: Kconfig: ipset needs NETFILTER_NETLINK Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index ba36c283d837..fd2c14b6c51b 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -1,7 +1,7 @@ menuconfig IP_SET tristate "IP set support" depends on INET && NETFILTER - depends on NETFILTER_NETLINK + select NETFILTER_NETLINK help This option adds IP set support to the kernel. In order to define and use the sets, you need the userspace utility -- cgit v1.2.3-55-g7522 From ea53ac5b630e813aec20c7cdcfe511daca70dee4 Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Fri, 20 Sep 2013 10:13:53 +0200 Subject: netfilter: ipset: Add hash:net,net module to kernel. This adds a new set that provides the ability to configure pairs of subnets. A small amount of additional handling code has been added to the generic hash header file - this code is conditionally activated by a preprocessor definition. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_gen.h | 58 +++- net/netfilter/ipset/ip_set_hash_netnet.c | 482 +++++++++++++++++++++++++++++++ 4 files changed, 541 insertions(+), 9 deletions(-) create mode 100644 net/netfilter/ipset/ip_set_hash_netnet.c (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index fd2c14b6c51b..9119f658a69b 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -99,6 +99,15 @@ config IP_SET_HASH_NET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETNET + tristate "hash:net,net set support" + depends on IP_SET + help + This option adds the hash:net,net set type support, by which + one can store IPv4/IPv6 network address/prefix pairs in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_NETPORT tristate "hash:net,port set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 6e965ecd5444..43eef7a19e3c 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o +obj-$(CONFIG_IP_SET_HASH_NETNET) += ip_set_hash_netnet.o # list types obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 3c26e5b946f5..6bd2eef4f5d0 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -142,11 +142,16 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) } #ifdef IP_SET_HASH_WITH_NETS +#if IPSET_NET_COUNT > 1 +#define __CIDR(cidr, i) (cidr[i]) +#else +#define __CIDR(cidr, i) (cidr) +#endif #ifdef IP_SET_HASH_WITH_NETS_PACKED /* When cidr is packed with nomatch, cidr - 1 is stored in the entry */ -#define CIDR(cidr) (cidr + 1) +#define CIDR(cidr, i) (__CIDR(cidr, i) + 1) #else -#define CIDR(cidr) (cidr) +#define CIDR(cidr, i) (__CIDR(cidr, i)) #endif #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) @@ -210,6 +215,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_do_data_match(d) 1 #endif #define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) +#define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem) #define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) #define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) @@ -461,6 +467,9 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) struct mtype_elem *data; u32 i; int j; +#ifdef IP_SET_HASH_WITH_NETS + u8 k; +#endif rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); @@ -471,8 +480,9 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) if (ip_set_timeout_expired(ext_timeout(data, set))) { pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(data->cidr), - nets_length, 0); + for (k = 0; k < IPSET_NET_COUNT; k++) + mtype_del_cidr(h, CIDR(data->cidr, k), + nets_length, k); #endif ip_set_ext_destroy(set, data); if (j != n->pos - 1) @@ -658,8 +668,12 @@ reuse_slot: /* Fill out reused slot */ data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0); - mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); + for (i = 0; i < IPSET_NET_COUNT; i++) { + mtype_del_cidr(h, CIDR(data->cidr, i), + NLEN(set->family), i); + mtype_add_cidr(h, CIDR(d->cidr, i), + NLEN(set->family), i); + } #endif ip_set_ext_destroy(set, data); } else { @@ -673,7 +687,9 @@ reuse_slot: } data = ahash_data(n, n->pos++, set->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); + for (i = 0; i < IPSET_NET_COUNT; i++) + mtype_add_cidr(h, CIDR(d->cidr, i), NLEN(set->family), + i); #endif h->elements++; } @@ -704,6 +720,9 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct mtype_elem *data; struct hbucket *n; int i, ret = -IPSET_ERR_EXIST; +#ifdef IP_SET_HASH_WITH_NETS + u8 j; +#endif u32 key, multi = 0; rcu_read_lock_bh(); @@ -725,7 +744,9 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, n->pos--; h->elements--; #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); + for (j = 0; j < IPSET_NET_COUNT; j++) + mtype_del_cidr(h, CIDR(d->cidr, j), NLEN(set->family), + j); #endif ip_set_ext_destroy(set, data); if (n->pos + AHASH_INIT_SIZE < n->size) { @@ -772,13 +793,26 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, struct htable *t = rcu_dereference_bh(h->table); struct hbucket *n; struct mtype_elem *data; +#if IPSET_NET_COUNT == 2 + struct mtype_elem orig = *d; + int i, j = 0, k; +#else int i, j = 0; +#endif u32 key, multi = 0; u8 nets_length = NLEN(set->family); pr_debug("test by nets\n"); for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) { +#if IPSET_NET_COUNT == 2 + mtype_data_reset_elem(d, &orig); + mtype_data_netmask(d, h->nets[j].cidr[0], false); + for (k = 0; k < nets_length && h->nets[k].nets[1] && !multi; + k++) { + mtype_data_netmask(d, h->nets[k].cidr[1], true); +#else mtype_data_netmask(d, h->nets[j].cidr[0]); +#endif key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { @@ -798,6 +832,9 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, return mtype_data_match(data, ext, mext, set, flags); } +#if IPSET_NET_COUNT == 2 + } +#endif } return 0; } @@ -821,7 +858,10 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, * try all possible network sizes */ - if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) { + for (i = 0; i < IPSET_NET_COUNT; i++) + if (CIDR(d->cidr, i) != SET_HOST_MASK(set->family)) + break; + if (i == IPSET_NET_COUNT) { ret = mtype_test_cidrs(set, d, ext, mext, flags); goto out; } diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c new file mode 100644 index 000000000000..771ce2b565a6 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -0,0 +1,482 @@ +/* Copyright (C) 2003-2013 Jozsef Kadlecsik + * Copyright (C) 2013 Oliver Smith + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:net type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Oliver Smith "); +IP_SET_MODULE_DESC("hash:net,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:net,net"); + +/* Type specific function prefix */ +#define HTYPE hash_netnet +#define IP_SET_HASH_WITH_NETS +#define IPSET_NET_COUNT 2 + +/* IPv4 variants */ + +/* Member elements */ +struct hash_netnet4_elem { + union { + __be32 ip[2]; + __be64 ipcmp; + }; + u8 nomatch; + union { + u8 cidr[2]; + u16 ccmp; + }; +}; + +/* Common functions */ + +static inline bool +hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, + const struct hash_netnet4_elem *ip2, + u32 *multi) +{ + return ip1->ipcmp == ip2->ipcmp && + ip2->ccmp == ip2->ccmp; +} + +static inline int +hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags) +{ + elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; +} + +static inline void +hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem, + struct hash_netnet4_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner) +{ + if (inner) { + elem->ip[1] &= ip_set_netmask(cidr); + elem->cidr[1] = cidr; + } else { + elem->ip[0] &= ip_set_netmask(cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netnet4_data_list(struct sk_buff *skb, + const struct hash_netnet4_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netnet4_data_next(struct hash_netnet4_elem *next, + const struct hash_netnet4_elem *d) +{ + next->ipcmp = d->ipcmp; +} + +#define MTYPE hash_netnet4 +#define PF 4 +#define HOST_MASK 32 +#include "ip_set_hash_gen.h" + +static int +hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet4_elem e = { + .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, + .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK, + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]); + ip4addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1]); + e.ip[0] &= ip_set_netmask(e.cidr[0]); + e.ip[1] &= ip_set_netmask(e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet4_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, last; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; + u8 cidr, cidr2; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || + ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[0] = cidr; + } + + if (tb[IPSET_ATTR_CIDR2]) { + cidr2 = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + if (!cidr2 || cidr2 > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[1] = cidr2; + } + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] && + tb[IPSET_ATTR_IP2_TO])) { + e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0])); + e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1])); + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip_to = ip; + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip_to < ip) + swap(ip, ip_to); + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } + + ip2_to = ip2_from; + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); + if (ret) + return ret; + if (ip2_to < ip2_from) + swap(ip2_from, ip2_to); + if (ip2_from + UINT_MAX == ip2_to) + return -IPSET_ERR_HASH_RANGE; + + } + + if (retried) + ip = ntohl(h->next.ip[0]); + + while (!after(ip, ip_to)) { + e.ip[0] = htonl(ip); + last = ip_set_range_to_cidr(ip, ip_to, &cidr); + e.cidr[0] = cidr; + ip2 = (retried && + ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) + : ip2_from; + while (!after(ip2, ip2_to)) { + e.ip[1] = htonl(ip2); + last2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr2); + e.cidr[1] = cidr2; + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip2 = last2 + 1; + } + ip = last + 1; + } + return ret; +} + +/* IPv6 variants */ + +struct hash_netnet6_elem { + union nf_inet_addr ip[2]; + u8 nomatch; + union { + u8 cidr[2]; + u16 ccmp; + }; +}; + +/* Common functions */ + +static inline bool +hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1, + const struct hash_netnet6_elem *ip2, + u32 *multi) +{ + return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && + ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && + ip1->ccmp == ip2->ccmp; +} + +static inline int +hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags) +{ + elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; +} + +static inline void +hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem, + struct hash_netnet6_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner) +{ + if (inner) { + ip6_netmask(&elem->ip[1], cidr); + elem->cidr[1] = cidr; + } else { + ip6_netmask(&elem->ip[0], cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netnet6_data_list(struct sk_buff *skb, + const struct hash_netnet6_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netnet6_data_next(struct hash_netnet4_elem *next, + const struct hash_netnet6_elem *d) +{ +} + +#undef MTYPE +#undef PF +#undef HOST_MASK + +#define MTYPE hash_netnet6 +#define PF 6 +#define HOST_MASK 128 +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + +static int +hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet6_elem e = { + .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, + .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6); + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet6_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || + ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (tb[IPSET_ATTR_CIDR2]) + e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || + e.cidr[1] > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + ret = adtfn(set, &e, &ext, &ext, flags); + + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; +} + +static struct ip_set_type hash_netnet_type __read_mostly = { + .name = "hash:net,net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_IP2 | IPSET_TYPE_NOMATCH, + .dimension = IPSET_DIM_TWO, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_netnet_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netnet_init(void) +{ + return ip_set_type_register(&hash_netnet_type); +} + +static void __exit +hash_netnet_fini(void) +{ + ip_set_type_unregister(&hash_netnet_type); +} + +module_init(hash_netnet_init); +module_exit(hash_netnet_fini); -- cgit v1.2.3-55-g7522 From 68b63f08d22f23161c43cd2417104aa213ff877f Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sun, 22 Sep 2013 20:56:30 +0200 Subject: netfilter: ipset: Support comments for ipset entries in the core. This adds the core support for having comments on ipset entries. The comments are stored as standard null-terminated strings in dynamically allocated memory after being passed to the kernel. As a result of this, code has been added to the generic destroy function to iterate all extensions and call that extension's destroy task if the set has that extension activated, and if such a task is defined. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 51 +++++++++++++++++++---- include/linux/netfilter/ipset/ip_set_comment.h | 57 ++++++++++++++++++++++++++ include/uapi/linux/netfilter/ipset/ip_set.h | 8 +++- net/netfilter/ipset/ip_set_core.c | 14 +++++++ 4 files changed, 121 insertions(+), 9 deletions(-) create mode 100644 include/linux/netfilter/ipset/ip_set_comment.h (limited to 'net/netfilter') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 6372ee224fe8..407f84df6a47 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -53,6 +53,8 @@ enum ip_set_extension { IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT), IPSET_EXT_BIT_COUNTER = 1, IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER), + IPSET_EXT_BIT_COMMENT = 2, + IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT), /* Mark set with an extension which needs to call destroy */ IPSET_EXT_BIT_DESTROY = 7, IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY), @@ -60,11 +62,13 @@ enum ip_set_extension { #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) +#define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT) /* Extension id, in size order */ enum ip_set_ext_id { IPSET_EXT_ID_COUNTER = 0, IPSET_EXT_ID_TIMEOUT, + IPSET_EXT_ID_COMMENT, IPSET_EXT_ID_MAX, }; @@ -85,6 +89,7 @@ struct ip_set_ext { u64 packets; u64 bytes; u32 timeout; + char *comment; }; struct ip_set_counter { @@ -92,20 +97,19 @@ struct ip_set_counter { atomic64_t packets; }; -struct ip_set; +struct ip_set_comment { + char *str; +}; -static inline void -ip_set_ext_destroy(struct ip_set *set, void *data) -{ - /* Check that the extension is enabled for the set and - * call it's destroy function for its extension part in data. - */ -} +struct ip_set; #define ext_timeout(e, s) \ (unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, s) \ (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]) +#define ext_comment(e, s) \ +(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT]) + typedef int (*ipset_adtfn)(struct ip_set *set, void *value, const struct ip_set_ext *ext, @@ -222,6 +226,36 @@ struct ip_set { void *data; }; +static inline void +ip_set_ext_destroy(struct ip_set *set, void *data) +{ + /* Check that the extension is enabled for the set and + * call it's destroy function for its extension part in data. + */ + if (SET_WITH_COMMENT(set)) + ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy( + ext_comment(data, set)); +} + +static inline int +ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) +{ + u32 cadt_flags = 0; + + if (SET_WITH_TIMEOUT(set)) + if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(set->timeout)))) + return -EMSGSIZE; + if (SET_WITH_COUNTER(set)) + cadt_flags |= IPSET_FLAG_WITH_COUNTERS; + if (SET_WITH_COMMENT(set)) + cadt_flags |= IPSET_FLAG_WITH_COMMENT; + + if (!cadt_flags) + return 0; + return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags)); +} + static inline void ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) { @@ -425,6 +459,7 @@ bitmap_bytes(u32 a, u32 b) } #include +#include #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h new file mode 100644 index 000000000000..21217ea008d7 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -0,0 +1,57 @@ +#ifndef _IP_SET_COMMENT_H +#define _IP_SET_COMMENT_H + +/* Copyright (C) 2013 Oliver Smith + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +static inline char* +ip_set_comment_uget(struct nlattr *tb) +{ + return nla_data(tb); +} + +static inline void +ip_set_init_comment(struct ip_set_comment *comment, + const struct ip_set_ext *ext) +{ + size_t len = ext->comment ? strlen(ext->comment) : 0; + + if (unlikely(comment->str)) { + kfree(comment->str); + comment->str = NULL; + } + if (!len) + return; + if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) + len = IPSET_MAX_COMMENT_SIZE; + comment->str = kzalloc(len + 1, GFP_ATOMIC); + if (unlikely(!comment->str)) + return; + strlcpy(comment->str, ext->comment, len + 1); +} + +static inline int +ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) +{ + if (!comment->str) + return 0; + return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str); +} + +static inline void +ip_set_comment_free(struct ip_set_comment *comment) +{ + if (unlikely(!comment->str)) + return; + kfree(comment->str); + comment->str = NULL; +} + +#endif +#endif diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 2b61ac44dcc1..25d3b2f79c02 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -10,12 +10,14 @@ #ifndef _UAPI_IP_SET_H #define _UAPI_IP_SET_H - #include /* The protocol version */ #define IPSET_PROTOCOL 6 +/* The maximum permissible comment length we will accept over netlink */ +#define IPSET_MAX_COMMENT_SIZE 255 + /* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 @@ -110,6 +112,7 @@ enum { IPSET_ATTR_IFACE, IPSET_ATTR_BYTES, IPSET_ATTR_PACKETS, + IPSET_ATTR_COMMENT, __IPSET_ATTR_ADT_MAX, }; #define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) @@ -140,6 +143,7 @@ enum ipset_errno { IPSET_ERR_IPADDR_IPV4, IPSET_ERR_IPADDR_IPV6, IPSET_ERR_COUNTER, + IPSET_ERR_COMMENT, /* Type specific error codes */ IPSET_ERR_TYPE_SPECIFIC = 4352, @@ -176,6 +180,8 @@ enum ipset_cadt_flags { IPSET_FLAG_NOMATCH = (1 << IPSET_FLAG_BIT_NOMATCH), IPSET_FLAG_BIT_WITH_COUNTERS = 3, IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS), + IPSET_FLAG_BIT_WITH_COMMENT = 4, + IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT), IPSET_FLAG_CADT_MAX = 15, }; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index f35afed3814f..3bf9a3d29dff 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -315,6 +315,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); +typedef void (*destroyer)(void *); /* ipset data extension types, in size order */ const struct ip_set_ext_type ip_set_extensions[] = { @@ -329,6 +330,13 @@ const struct ip_set_ext_type ip_set_extensions[] = { .len = sizeof(unsigned long), .align = __alignof__(unsigned long), }, + [IPSET_EXT_ID_COMMENT] = { + .type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY, + .flag = IPSET_FLAG_WITH_COMMENT, + .len = sizeof(struct ip_set_comment), + .align = __alignof__(struct ip_set_comment), + .destroy = (destroyer) ip_set_comment_free, + }, }; EXPORT_SYMBOL_GPL(ip_set_extensions); @@ -380,6 +388,12 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], ext->packets = be64_to_cpu(nla_get_be64( tb[IPSET_ATTR_PACKETS])); } + if (tb[IPSET_ATTR_COMMENT]) { + if (!(set->extensions & IPSET_EXT_COMMENT)) + return -IPSET_ERR_COMMENT; + ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]); + } + return 0; } EXPORT_SYMBOL_GPL(ip_set_get_extensions); -- cgit v1.2.3-55-g7522 From b90cb8ba19dac9b98add5e64adb583fccbf63f94 Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sun, 22 Sep 2013 20:56:32 +0200 Subject: netfilter: ipset: Support comments in bitmap-type ipsets. This provides kernel support for creating bitmap ipsets with comment support. As is the case for hashes, this incurs a penalty when flushing or destroying the entire ipset as the entries must first be walked in order to free the comment strings. This penalty is of course far less than the cost of listing an ipset to userspace. Any set created without support for comments will be flushed/destroyed as before. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_gen.h | 14 ++++++++------ net/netfilter/ipset/ip_set_bitmap_ip.c | 4 +++- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 4 +++- net/netfilter/ipset/ip_set_bitmap_port.c | 4 +++- 4 files changed, 17 insertions(+), 9 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 4515fe8b83dd..6167fc9d0efe 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -101,12 +101,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize + - set->dsize * map->elements)) || - (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || - (SET_WITH_COUNTER(set) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS)))) + set->dsize * map->elements))) + goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -162,6 +159,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(x, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(x, set), ext); return 0; } @@ -233,6 +232,9 @@ mtype_list(const struct ip_set *set, if (SET_WITH_COUNTER(set) && ip_set_put_counter(skb, ext_counter(x, set))) goto nla_put_failure; + if (SET_WITH_COMMENT(set) && + ip_set_put_comment(skb, ext_comment(x, set))) + goto nla_put_failure; ipset_nest_end(skb, nested); } ipset_nest_end(skb, adt); diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 94d985457c51..faac124e2645 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -26,7 +26,8 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -354,6 +355,7 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 654a97bedfe9..fb4d163dea82 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -26,7 +26,8 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -403,6 +404,7 @@ static struct ip_set_type bitmap_ipmac_type = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 1ef2f3186b80..407a63caee6b 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -21,7 +21,8 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -288,6 +289,7 @@ static struct ip_set_type bitmap_port_type = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; -- cgit v1.2.3-55-g7522 From 81b10bb4bd16ea975c007f9bb2c2676cef6ade10 Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sun, 22 Sep 2013 20:56:33 +0200 Subject: netfilter: ipset: Support comments in the list-type ipset. This provides kernel support for creating list ipsets with the comment annotation extension. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_list_set.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 30bf1dd11f04..f9681dcf7476 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -16,7 +16,8 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Counters support added */ +/* 1 Counters support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -191,6 +192,8 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(e, set), ext); + if (SET_WITH_COMMENT(set) && ext->comment) + ip_set_init_comment(ext_comment(e, set), ext); return 0; } @@ -299,6 +302,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(e, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(e, set), ext); /* Set is already added to the list */ ip_set_put_byindex(d->id); return 0; @@ -461,15 +466,12 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || - (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || - (SET_WITH_COUNTER(set) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS))) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->size * set->dsize))) goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) + goto nla_put_failure; ipset_nest_end(skb, nested); return 0; @@ -516,6 +518,9 @@ list_set_list(const struct ip_set *set, if (SET_WITH_COUNTER(set) && ip_set_put_counter(skb, ext_counter(e, set))) goto nla_put_failure; + if (SET_WITH_COMMENT(set) && + ip_set_put_comment(skb, ext_comment(e, set))) + goto nla_put_failure; ipset_nest_end(skb, nested); } finish: @@ -660,6 +665,7 @@ static struct ip_set_type list_set_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; -- cgit v1.2.3-55-g7522 From fda75c6d9e31a901e25b922e86c8fd505c899873 Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sun, 22 Sep 2013 20:56:31 +0200 Subject: netfilter: ipset: Support comments in hash-type ipsets. This provides kernel support for creating ipsets with comment support. This does incur a penalty to flushing/destroying an ipset since all entries are walked in order to free the allocated strings, this penalty is of course less expensive than the operation of listing an ipset to userspace, so for general-purpose usage the overall impact is expected to be little to none. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_gen.h | 14 ++++++++------ net/netfilter/ipset/ip_set_hash_ip.c | 4 +++- net/netfilter/ipset/ip_set_hash_ipport.c | 4 +++- net/netfilter/ipset/ip_set_hash_ipportip.c | 4 +++- net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 +++- net/netfilter/ipset/ip_set_hash_net.c | 4 +++- net/netfilter/ipset/ip_set_hash_netiface.c | 4 +++- net/netfilter/ipset/ip_set_hash_netnet.c | 1 + net/netfilter/ipset/ip_set_hash_netport.c | 4 +++- 9 files changed, 30 insertions(+), 13 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6bd2eef4f5d0..6efb65569e88 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -701,6 +701,8 @@ reuse_slot: ip_set_timeout_set(ext_timeout(data, set), ext->timeout); if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(data, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(data, set), ext); out: rcu_read_unlock_bh(); @@ -908,12 +910,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; #endif if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || - ((set->extensions & IPSET_EXT_TIMEOUT) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || - ((set->extensions & IPSET_EXT_COUNTER) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS)))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) + goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -970,6 +969,9 @@ mtype_list(const struct ip_set *set, if (SET_WITH_COUNTER(set) && ip_set_put_counter(skb, ext_counter(e, set))) goto nla_put_failure; + if (SET_WITH_COMMENT(set) && + ip_set_put_comment(skb, ext_comment(e, set))) + goto nla_put_failure; ipset_nest_end(skb, nested); } } diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index a111ffe40b46..e65fc2423d56 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -24,7 +24,8 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Counters support */ +/* 1 Counters support */ +#define IPSET_TYPE_REV_MAX 2 /* Comments support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -292,6 +293,7 @@ static struct ip_set_type hash_ip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 5dc735c4dac2..525a595dd1fe 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -26,7 +26,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ -#define IPSET_TYPE_REV_MAX 2 /* Counters support added */ +/* 2 Counters support added */ +#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -367,6 +368,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 8c43dc7811cb..f5636631466e 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -26,7 +26,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ -#define IPSET_TYPE_REV_MAX 2 /* Counters support added */ +/* 2 Counters support added */ +#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -379,6 +380,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 34890452366c..5d87fe8a41ff 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -28,7 +28,8 @@ /* 1 SCTP and UDPLITE support added */ /* 2 Range as input support for IPv4 added */ /* 3 nomatch flag support added */ -#define IPSET_TYPE_REV_MAX 4 /* Counters support added */ +/* 4 Counters support added */ +#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -538,6 +539,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index d5598557f4a9..8295cf4f9fdc 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -25,7 +25,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 Range as input support for IPv4 added */ /* 2 nomatch flag support added */ -#define IPSET_TYPE_REV_MAX 3 /* Counters support added */ +/* 3 Counters support added */ +#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -374,6 +375,7 @@ static struct ip_set_type hash_net_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 26703e9e5082..3f64a66bf5d9 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -26,7 +26,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 nomatch flag support added */ /* 2 /0 support added */ -#define IPSET_TYPE_REV_MAX 3 /* Counters support added */ +/* 3 Counters support added */ +#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -606,6 +607,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 771ce2b565a6..426032706ca9 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -462,6 +462,7 @@ static struct ip_set_type hash_netnet_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 45b6e91b0636..7097fb0141bf 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -27,7 +27,8 @@ /* 1 SCTP and UDPLITE support added */ /* 2 Range as input support for IPv4 added */ /* 3 nomatch flag support added */ -#define IPSET_TYPE_REV_MAX 4 /* Counters support added */ +/* 4 Counters support added */ +#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -486,6 +487,7 @@ static struct ip_set_type hash_netport_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; -- cgit v1.2.3-55-g7522 From 80571a9ea4ca9b6fee7ef0c13d3f31e774e0533b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 23 Sep 2013 17:45:21 +0200 Subject: netfilter: ipset: Fix hash resizing with comments The destroy function must take into account that resizing doesn't create new extensions so those cannot be destroyed at resize. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_gen.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6efb65569e88..1cffeb977605 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -393,7 +393,7 @@ mtype_flush(struct ip_set *set) /* Destroy the hashtable part of the set */ static void -mtype_ahash_destroy(struct ip_set *set, struct htable *t) +mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) { struct hbucket *n; u32 i; @@ -401,7 +401,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t) for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); if (n->size) { - if (set->extensions & IPSET_EXT_DESTROY) + if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) mtype_ext_cleanup(set, n); /* FIXME: use slab cache */ kfree(n->value); @@ -420,7 +420,7 @@ mtype_destroy(struct ip_set *set) if (set->extensions & IPSET_EXT_TIMEOUT) del_timer_sync(&h->gc); - mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table)); + mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table), true); #ifdef IP_SET_HASH_WITH_RBTREE rbtree_destroy(&h->rbtree); #endif @@ -586,7 +586,7 @@ retry: mtype_data_reset_flags(data, &flags); #endif read_unlock_bh(&set->lock); - mtype_ahash_destroy(set, t); + mtype_ahash_destroy(set, t, false); if (ret == -EAGAIN) goto retry; return ret; @@ -607,7 +607,7 @@ retry: pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - mtype_ahash_destroy(set, orig); + mtype_ahash_destroy(set, orig, false); return 0; } -- cgit v1.2.3-55-g7522 From 8ec81f9a4db94fa5638c55793365b896dd9daecc Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 23 Sep 2013 21:28:06 +0200 Subject: netfilter: ipset: For set:list types, replaced elements must be zeroed out The new extensions require zero initialization for the new element to be added into a slot from where another element was pushed away. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_list_set.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index f9681dcf7476..e23f33c14435 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -184,6 +184,8 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, } memmove(list_set_elem(set, map, i + 1), e, set->dsize * (map->size - (i + 1))); + /* Extensions must be initialized to zero */ + memset(e, 0, set->dsize); } } @@ -192,7 +194,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(e, set), ext); - if (SET_WITH_COMMENT(set) && ext->comment) + if (SET_WITH_COMMENT(set)) ip_set_init_comment(ext_comment(e, set), ext); return 0; } -- cgit v1.2.3-55-g7522 From 3fd986b3d99e3847f1cce6fc36043d0f16508e1d Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 25 Sep 2013 17:44:35 +0200 Subject: netfilter: ipset: Use a common function at listing the extensions Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 21 +++++++++++++++++++++ net/netfilter/ipset/ip_set_bitmap_gen.h | 29 ++++++++++------------------- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 11 ----------- net/netfilter/ipset/ip_set_hash_gen.h | 11 +---------- net/netfilter/ipset/ip_set_list_set.c | 11 +---------- 5 files changed, 33 insertions(+), 50 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 407f84df6a47..da2a45acf74c 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -461,6 +461,27 @@ bitmap_bytes(u32 a, u32 b) #include #include +static inline int +ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, + const void *e, bool active) +{ + if (SET_WITH_TIMEOUT(set)) { + unsigned long *timeout = ext_timeout(e, set); + + if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(active ? ip_set_timeout_get(timeout) + : *timeout))) + return -EMSGSIZE; + } + if (SET_WITH_COUNTER(set) && + ip_set_put_counter(skb, ext_counter(e, set))) + return -EMSGSIZE; + if (SET_WITH_COMMENT(set) && + ip_set_put_comment(skb, ext_comment(e, set))) + return -EMSGSIZE; + return 0; +} + #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ .timeout = ip_set_adt_opt_timeout(opt, set) } diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 6167fc9d0efe..a13e15be7911 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -183,6 +183,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, return 0; } +#ifndef IP_SET_BITMAP_STORED_TIMEOUT +static inline bool +mtype_is_filled(const struct mtype_elem *x) +{ + return true; +} +#endif + static int mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) @@ -215,25 +223,8 @@ mtype_list(const struct ip_set *set, } if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set)) { -#ifdef IP_SET_BITMAP_STORED_TIMEOUT - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_stored(map, id, - ext_timeout(x, set), - set->dsize)))) - goto nla_put_failure; -#else - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(x, set))))) - goto nla_put_failure; -#endif - } - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(x, set))) - goto nla_put_failure; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(x, set))) + if (ip_set_put_extensions(skb, set, x, + mtype_is_filled((const struct mtype_elem *) x))) goto nla_put_failure; ipset_nest_end(skb, nested); } diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index fb4d163dea82..87a218f8ab5f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -176,17 +176,6 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e, return !test_and_clear_bit(e->id, map->members); } -static inline unsigned long -ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout, - size_t dsize) -{ - const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, dsize); - - return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) : - *timeout; -} - static inline int bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map, u32 id, size_t dsize) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 1cffeb977605..b4add206c603 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -961,16 +961,7 @@ mtype_list(const struct ip_set *set, } if (mtype_data_list(skb, e)) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(e, set))))) - goto nla_put_failure; - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, set))) - goto nla_put_failure; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(e, set))) + if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; ipset_nest_end(skb, nested); } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e23f33c14435..ba4232e451f4 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -512,16 +512,7 @@ list_set_list(const struct ip_set *set, if (nla_put_string(skb, IPSET_ATTR_NAME, ip_set_name_byindex(e->id))) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(e, set))))) - goto nla_put_failure; - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, set))) - goto nla_put_failure; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(e, set))) + if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; ipset_nest_end(skb, nested); } -- cgit v1.2.3-55-g7522 From 1785e8f473082aa60d62c7165856cf6484077b99 Mon Sep 17 00:00:00 2001 From: Vitaly Lavrov Date: Mon, 30 Sep 2013 17:07:02 +0200 Subject: netfiler: ipset: Add net namespace for ipset This patch adds netns support for ipset. Major changes were made in ip_set_core.c and ip_set.h. Global variables are moved to per net namespace. Added initialization code and the destruction of the network namespace ipset subsystem. In the prototypes of public functions ip_set_* added parameter "struct net*". The remaining corrections related to the change prototypes of public functions ip_set_*. The patch for git://git.netfilter.org/ipset.git commit 6a4ec96c0b8caac5c35474e40e319704d92ca347 Signed-off-by: Vitaly Lavrov Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 16 +- net/netfilter/ipset/ip_set_bitmap_ip.c | 3 +- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 2 +- net/netfilter/ipset/ip_set_bitmap_port.c | 3 +- net/netfilter/ipset/ip_set_core.c | 288 +++++++++++++++++++----------- net/netfilter/ipset/ip_set_hash_gen.h | 3 +- net/netfilter/ipset/ip_set_list_set.c | 31 ++-- net/netfilter/xt_set.c | 40 ++--- net/sched/em_ipset.c | 7 +- 9 files changed, 245 insertions(+), 148 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index da2a45acf74c..7967516adc0d 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -184,7 +184,8 @@ struct ip_set_type { u8 revision_min, revision_max; /* Create set */ - int (*create)(struct ip_set *set, struct nlattr *tb[], u32 flags); + int (*create)(struct net *net, struct ip_set *set, + struct nlattr *tb[], u32 flags); /* Attribute policies */ const struct nla_policy create_policy[IPSET_ATTR_CREATE_MAX + 1]; @@ -316,12 +317,13 @@ ip_set_init_counter(struct ip_set_counter *counter, } /* register and unregister set references */ -extern ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set); -extern void ip_set_put_byindex(ip_set_id_t index); -extern const char *ip_set_name_byindex(ip_set_id_t index); -extern ip_set_id_t ip_set_nfnl_get(const char *name); -extern ip_set_id_t ip_set_nfnl_get_byindex(ip_set_id_t index); -extern void ip_set_nfnl_put(ip_set_id_t index); +extern ip_set_id_t ip_set_get_byname(struct net *net, + const char *name, struct ip_set **set); +extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); +extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index); +extern ip_set_id_t ip_set_nfnl_get(struct net *net, const char *name); +extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); +extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index); /* API for iptables set match, and SET target */ diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index faac124e2645..6f1f9f494808 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -242,7 +242,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, } static int -bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { struct bitmap_ip *map; u32 first_ip = 0, last_ip = 0, hosts; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 87a218f8ab5f..740eabededd9 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -309,7 +309,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, } static int -bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], +bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 first_ip = 0, last_ip = 0; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 407a63caee6b..e7603c5b53d7 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -228,7 +228,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, } static int -bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { struct bitmap_port *map; u16 first_port, last_port; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 3bf9a3d29dff..dc9284bdd2dd 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -27,8 +29,17 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ -static struct ip_set * __rcu *ip_set_list; /* all individual sets */ -static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ +struct ip_set_net { + struct ip_set * __rcu *ip_set_list; /* all individual sets */ + ip_set_id_t ip_set_max; /* max number of sets */ + int is_deleted; /* deleted by ip_set_net_exit */ +}; +static int ip_set_net_id __read_mostly; + +static inline struct ip_set_net *ip_set_pernet(struct net *net) +{ + return net_generic(net, ip_set_net_id); +} #define IP_SET_INC 64 #define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) @@ -45,8 +56,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex is held: */ #define nfnl_dereference(p) \ rcu_dereference_protected(p, 1) -#define nfnl_set(id) \ - nfnl_dereference(ip_set_list)[id] +#define nfnl_set(inst, id) \ + nfnl_dereference((inst)->ip_set_list)[id] /* * The set types are implemented in modules and registered set types @@ -434,13 +445,14 @@ __ip_set_put(struct ip_set *set) */ static inline struct ip_set * -ip_set_rcu_get(ip_set_id_t index) +ip_set_rcu_get(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); rcu_read_lock(); /* ip_set_list itself needs to be protected */ - set = rcu_dereference(ip_set_list)[index]; + set = rcu_dereference(inst->ip_set_list)[index]; rcu_read_unlock(); return set; @@ -450,7 +462,8 @@ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret = 0; BUG_ON(set == NULL); @@ -488,7 +501,8 @@ int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret; BUG_ON(set == NULL); @@ -510,7 +524,8 @@ int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret = 0; BUG_ON(set == NULL); @@ -534,14 +549,15 @@ EXPORT_SYMBOL_GPL(ip_set_del); * */ ip_set_id_t -ip_set_get_byname(const char *name, struct ip_set **set) +ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; + struct ip_set_net *inst = ip_set_pernet(net); rcu_read_lock(); - for (i = 0; i < ip_set_max; i++) { - s = rcu_dereference(ip_set_list)[i]; + for (i = 0; i < inst->ip_set_max; i++) { + s = rcu_dereference(inst->ip_set_list)[i]; if (s != NULL && STREQ(s->name, name)) { __ip_set_get(s); index = i; @@ -561,17 +577,26 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname); * to be valid, after calling this function. * */ -void -ip_set_put_byindex(ip_set_id_t index) + +static inline void +__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index) { struct ip_set *set; rcu_read_lock(); - set = rcu_dereference(ip_set_list)[index]; + set = rcu_dereference(inst->ip_set_list)[index]; if (set != NULL) __ip_set_put(set); rcu_read_unlock(); } + +void +ip_set_put_byindex(struct net *net, ip_set_id_t index) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + __ip_set_put_byindex(inst, index); +} EXPORT_SYMBOL_GPL(ip_set_put_byindex); /* @@ -582,9 +607,9 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex); * */ const char * -ip_set_name_byindex(ip_set_id_t index) +ip_set_name_byindex(struct net *net, ip_set_id_t index) { - const struct ip_set *set = ip_set_rcu_get(index); + const struct ip_set *set = ip_set_rcu_get(net, index); BUG_ON(set == NULL); BUG_ON(set->ref == 0); @@ -606,14 +631,15 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex); * The nfnl mutex is used in the function. */ ip_set_id_t -ip_set_nfnl_get(const char *name) +ip_set_nfnl_get(struct net *net, const char *name) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; + struct ip_set_net *inst = ip_set_pernet(net); nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && STREQ(s->name, name)) { __ip_set_get(s); index = i; @@ -633,15 +659,16 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get); * The nfnl mutex is used in the function. */ ip_set_id_t -ip_set_nfnl_get_byindex(ip_set_id_t index) +ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); - if (index > ip_set_max) + if (index > inst->ip_set_max) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(index); + set = nfnl_set(inst, index); if (set) __ip_set_get(set); else @@ -660,13 +687,17 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); * The nfnl mutex is used in the function. */ void -ip_set_nfnl_put(ip_set_id_t index) +ip_set_nfnl_put(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); + nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(index); - if (set != NULL) - __ip_set_put(set); + if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ + set = nfnl_set(inst, index); + if (set != NULL) + __ip_set_put(set); + } nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -724,14 +755,14 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static struct ip_set * -find_set_and_id(const char *name, ip_set_id_t *id) +find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) { struct ip_set *set = NULL; ip_set_id_t i; *id = IPSET_INVALID_ID; - for (i = 0; i < ip_set_max; i++) { - set = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + set = nfnl_set(inst, i); if (set != NULL && STREQ(set->name, name)) { *id = i; break; @@ -741,22 +772,23 @@ find_set_and_id(const char *name, ip_set_id_t *id) } static inline struct ip_set * -find_set(const char *name) +find_set(struct ip_set_net *inst, const char *name) { ip_set_id_t id; - return find_set_and_id(name, &id); + return find_set_and_id(inst, name, &id); } static int -find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) +find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, + struct ip_set **set) { struct ip_set *s; ip_set_id_t i; *index = IPSET_INVALID_ID; - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s == NULL) { if (*index == IPSET_INVALID_ID) *index = i; @@ -785,6 +817,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct net *net = sock_net(ctnl); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; @@ -843,7 +877,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, goto put_out; } - ret = set->type->create(set, tb, flags); + ret = set->type->create(net, set, tb, flags); if (ret != 0) goto put_out; @@ -854,7 +888,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * by the nfnl mutex. Find the first free index in ip_set_list * and check clashing. */ - ret = find_free_id(set->name, &index, &clash); + ret = find_free_id(inst, set->name, &index, &clash); if (ret == -EEXIST) { /* If this is the same set and requested, ignore error */ if ((flags & IPSET_FLAG_EXIST) && @@ -867,9 +901,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, goto cleanup; } else if (ret == -IPSET_ERR_MAX_SETS) { struct ip_set **list, **tmp; - ip_set_id_t i = ip_set_max + IP_SET_INC; + ip_set_id_t i = inst->ip_set_max + IP_SET_INC; - if (i < ip_set_max || i == IPSET_INVALID_ID) + if (i < inst->ip_set_max || i == IPSET_INVALID_ID) /* Wraparound */ goto cleanup; @@ -877,14 +911,14 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ - tmp = nfnl_dereference(ip_set_list); - memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max); - rcu_assign_pointer(ip_set_list, list); + tmp = nfnl_dereference(inst->ip_set_list); + memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); + rcu_assign_pointer(inst->ip_set_list, list); /* Make sure all current packets have passed through */ synchronize_net(); /* Use new list */ - index = ip_set_max; - ip_set_max = i; + index = inst->ip_set_max; + inst->ip_set_max = i; kfree(tmp); ret = 0; } else if (ret) @@ -894,7 +928,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * Finally! Add our shiny new set to the list, and be done. */ pr_debug("create: '%s' created with index %u!\n", set->name, index); - nfnl_set(index) = set; + nfnl_set(inst, index) = set; return ret; @@ -917,12 +951,12 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static void -ip_set_destroy_set(ip_set_id_t index) +ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index) { - struct ip_set *set = nfnl_set(index); + struct ip_set *set = nfnl_set(inst, index); pr_debug("set: %s\n", set->name); - nfnl_set(index) = NULL; + nfnl_set(inst, index) = NULL; /* Must call it without holding any lock */ set->variant->destroy(set); @@ -935,6 +969,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *s; ip_set_id_t i; int ret = 0; @@ -954,21 +989,22 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, */ read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && s->ref) { ret = -IPSET_ERR_BUSY; goto out; } } read_unlock_bh(&ip_set_ref_lock); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL) - ip_set_destroy_set(i); + ip_set_destroy_set(inst, i); } } else { - s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i); + s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), + &i); if (s == NULL) { ret = -ENOENT; goto out; @@ -978,7 +1014,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, } read_unlock_bh(&ip_set_ref_lock); - ip_set_destroy_set(i); + ip_set_destroy_set(inst, i); } return 0; out: @@ -1003,6 +1039,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *s; ip_set_id_t i; @@ -1010,13 +1047,13 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, return -IPSET_ERR_PROTOCOL; if (!attr[IPSET_ATTR_SETNAME]) { - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL) ip_set_flush_set(s); } } else { - s = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (s == NULL) return -ENOENT; @@ -1042,6 +1079,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set, *s; const char *name2; ip_set_id_t i; @@ -1052,7 +1090,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME2] == NULL)) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1063,8 +1101,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, } name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && STREQ(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; @@ -1091,6 +1129,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; @@ -1100,11 +1139,13 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME2] == NULL)) return -IPSET_ERR_PROTOCOL; - from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id); + from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), + &from_id); if (from == NULL) return -ENOENT; - to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id); + to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]), + &to_id); if (to == NULL) return -IPSET_ERR_EXIST_SETNAME2; @@ -1121,8 +1162,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, write_lock_bh(&ip_set_ref_lock); swap(from->ref, to->ref); - nfnl_set(from_id) = to; - nfnl_set(to_id) = from; + nfnl_set(inst, from_id) = to; + nfnl_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); return 0; @@ -1141,9 +1182,10 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { + struct ip_set_net *inst = (struct ip_set_net *)cb->data; if (cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(cb->args[1])->name); - ip_set_put_byindex((ip_set_id_t) cb->args[1]); + pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name); + __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]); } return 0; } @@ -1169,6 +1211,7 @@ dump_init(struct netlink_callback *cb) struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; + struct ip_set_net *inst = (struct ip_set_net *)cb->data; /* Second pass, so parser can't fail */ nla_parse(cda, IPSET_ATTR_CMD_MAX, @@ -1182,7 +1225,7 @@ dump_init(struct netlink_callback *cb) if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; - set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]), + set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); if (set == NULL) return -ENOENT; @@ -1210,6 +1253,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; u32 dump_type, dump_flags; int ret = 0; + struct ip_set_net *inst = (struct ip_set_net *)cb->data; if (!cb->args[0]) { ret = dump_init(cb); @@ -1223,18 +1267,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) } } - if (cb->args[1] >= ip_set_max) + if (cb->args[1] >= inst->ip_set_max) goto out; dump_type = DUMP_TYPE(cb->args[0]); dump_flags = DUMP_FLAGS(cb->args[0]); - max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; + max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max; dump_last: pr_debug("args[0]: %u %u args[1]: %ld\n", dump_type, dump_flags, cb->args[1]); for (; cb->args[1] < max; cb->args[1]++) { index = (ip_set_id_t) cb->args[1]; - set = nfnl_set(index); + set = nfnl_set(inst, index); if (set == NULL) { if (dump_type == DUMP_ONE) { ret = -ENOENT; @@ -1312,8 +1356,8 @@ next_set: release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(index)->name); - ip_set_put_byindex(index); + pr_debug("release set %s\n", nfnl_set(inst, index)->name); + __ip_set_put_byindex(inst, index); cb->args[2] = 0; } out: @@ -1331,6 +1375,8 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1338,6 +1384,7 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, struct netlink_dump_control c = { .dump = ip_set_dump_start, .done = ip_set_dump_done, + .data = (void *)inst }; return netlink_dump_start(ctnl, skb, nlh, &c); } @@ -1416,6 +1463,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; const struct nlattr *nla; @@ -1434,7 +1482,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_LINENO] == NULL)))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1470,6 +1518,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; const struct nlattr *nla; @@ -1488,7 +1537,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_LINENO] == NULL)))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1524,6 +1573,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; int ret = 0; @@ -1534,7 +1584,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, !flag_nested(attr[IPSET_ATTR_DATA]))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1559,6 +1609,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); const struct ip_set *set; struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1568,7 +1619,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME] == NULL)) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1793,8 +1844,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) unsigned int *op; void *data; int copylen = *len, ret = 0; + struct net *net = sock_net(sk); + struct ip_set_net *inst = ip_set_pernet(net); - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (optval != SO_IP_SET) return -EBADF; @@ -1843,7 +1896,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; nfnl_lock(NFNL_SUBSYS_IPSET); - find_set_and_id(req_get->set.name, &id); + find_set_and_id(inst, req_get->set.name, &id); req_get->set.index = id; nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; @@ -1858,10 +1911,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; nfnl_lock(NFNL_SUBSYS_IPSET); - find_set_and_id(req_get->set.name, &id); + find_set_and_id(inst, req_get->set.name, &id); req_get->set.index = id; if (id != IPSET_INVALID_ID) - req_get->family = nfnl_set(id)->family; + req_get->family = nfnl_set(inst, id)->family; nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } @@ -1870,12 +1923,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) struct ip_set *set; if (*len != sizeof(struct ip_set_req_get_set) || - req_get->set.index >= ip_set_max) { + req_get->set.index >= inst->ip_set_max) { ret = -EINVAL; goto done; } nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(req_get->set.index); + set = nfnl_set(inst, req_get->set.index); strncpy(req_get->set.name, set ? set->name : "", IPSET_MAXNAMELEN); nfnl_unlock(NFNL_SUBSYS_IPSET); @@ -1904,49 +1957,82 @@ static struct nf_sockopt_ops so_set __read_mostly = { .owner = THIS_MODULE, }; -static int __init -ip_set_init(void) +static int __net_init +ip_set_net_init(struct net *net) { + struct ip_set_net *inst = ip_set_pernet(net); + struct ip_set **list; - int ret; - if (max_sets) - ip_set_max = max_sets; - if (ip_set_max >= IPSET_INVALID_ID) - ip_set_max = IPSET_INVALID_ID - 1; + inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; + if (inst->ip_set_max >= IPSET_INVALID_ID) + inst->ip_set_max = IPSET_INVALID_ID - 1; - list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL); + list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL); if (!list) return -ENOMEM; + inst->is_deleted = 0; + rcu_assign_pointer(inst->ip_set_list, list); + pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + return 0; +} + +static void __net_exit +ip_set_net_exit(struct net *net) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + struct ip_set *set = NULL; + ip_set_id_t i; + + inst->is_deleted = 1; /* flag for ip_set_nfnl_put */ + + for (i = 0; i < inst->ip_set_max; i++) { + set = nfnl_set(inst, i); + if (set != NULL) + ip_set_destroy_set(inst, i); + } + kfree(rcu_dereference_protected(inst->ip_set_list, 1)); +} + +static struct pernet_operations ip_set_net_ops = { + .init = ip_set_net_init, + .exit = ip_set_net_exit, + .id = &ip_set_net_id, + .size = sizeof(struct ip_set_net) +}; + - rcu_assign_pointer(ip_set_list, list); - ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); +static int __init +ip_set_init(void) +{ + int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); - kfree(list); return ret; } ret = nf_register_sockopt(&so_set); if (ret != 0) { pr_err("SO_SET registry failed: %d\n", ret); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(list); return ret; } - - pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + ret = register_pernet_subsys(&ip_set_net_ops); + if (ret) { + pr_err("ip_set: cannot register pernet_subsys.\n"); + nf_unregister_sockopt(&so_set); + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + return ret; + } return 0; } static void __exit ip_set_fini(void) { - struct ip_set **list = rcu_dereference_protected(ip_set_list, 1); - - /* There can't be any existing set */ + unregister_pernet_subsys(&ip_set_net_ops); nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(list); pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index b4add206c603..6a80dbd30df7 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1011,7 +1011,8 @@ static const struct ip_set_type_variant mtype_variant = { #ifdef IP_SET_EMIT_CREATE static int -IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) +IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, + struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u8 hbits; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index ba4232e451f4..ec6f6d15dded 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -39,6 +39,7 @@ struct set_adt_elem { struct list_set { u32 size; /* size of set list array */ struct timer_list gc; /* garbage collection */ + struct net *net; /* namespace */ struct set_elem members[0]; /* the set members */ }; @@ -171,7 +172,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, if (e->id != IPSET_INVALID_ID) { if (i == map->size - 1) { /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); } else { struct set_elem *x = list_set_elem(set, map, @@ -179,7 +180,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, /* Last element pushed off */ if (x->id != IPSET_INVALID_ID) { - ip_set_put_byindex(x->id); + ip_set_put_byindex(map->net, x->id); ip_set_ext_destroy(set, x); } memmove(list_set_elem(set, map, i + 1), e, @@ -205,7 +206,7 @@ list_set_del(struct ip_set *set, u32 i) struct list_set *map = set->data; struct set_elem *e = list_set_elem(set, map, i); - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); if (i < map->size - 1) @@ -307,7 +308,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_COMMENT(set)) ip_set_init_comment(ext_comment(e, set), ext); /* Set is already added to the list */ - ip_set_put_byindex(d->id); + ip_set_put_byindex(map->net, d->id); return 0; } insert: @@ -366,6 +367,7 @@ static int list_set_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { + struct list_set *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -385,7 +387,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s); + e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); if (e.id == IPSET_INVALID_ID) return -IPSET_ERR_NAME; /* "Loop detection" */ @@ -405,7 +407,8 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], } if (tb[IPSET_ATTR_NAMEREF]) { - e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]), + e.refid = ip_set_get_byname(map->net, + nla_data(tb[IPSET_ATTR_NAMEREF]), &s); if (e.refid == IPSET_INVALID_ID) { ret = -IPSET_ERR_NAMEREF; @@ -421,9 +424,9 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], finish: if (e.refid != IPSET_INVALID_ID) - ip_set_put_byindex(e.refid); + ip_set_put_byindex(map->net, e.refid); if (adt != IPSET_ADD || ret) - ip_set_put_byindex(e.id); + ip_set_put_byindex(map->net, e.id); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -438,7 +441,7 @@ list_set_flush(struct ip_set *set) for (i = 0; i < map->size; i++) { e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); e->id = IPSET_INVALID_ID; } @@ -510,7 +513,7 @@ list_set_list(const struct ip_set *set, goto nla_put_failure; } if (nla_put_string(skb, IPSET_ATTR_NAME, - ip_set_name_byindex(e->id))) + ip_set_name_byindex(map->net, e->id))) goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; @@ -587,7 +590,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) /* Create list:set type of sets */ static bool -init_list_set(struct ip_set *set, u32 size) +init_list_set(struct net *net, struct ip_set *set, u32 size) { struct list_set *map; struct set_elem *e; @@ -598,6 +601,7 @@ init_list_set(struct ip_set *set, u32 size) return false; map->size = size; + map->net = net; set->data = map; for (i = 0; i < size; i++) { @@ -609,7 +613,8 @@ init_list_set(struct ip_set *set, u32 size) } static int -list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { u32 size = IP_SET_LIST_DEFAULT_SIZE; @@ -625,7 +630,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) set->variant = &set_variant; set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); - if (!init_list_set(set, size)) + if (!init_list_set(net, set, size)) return -ENOMEM; if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 2095488684ff..e7c4e0e01ff5 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -81,7 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) struct xt_set_info_match_v0 *info = par->matchinfo; ip_set_id_t index; - index = ip_set_nfnl_get_byindex(info->match_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find set indentified by id %u to match\n", @@ -91,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -106,7 +106,7 @@ set_match_v0_destroy(const struct xt_mtdtor_param *par) { struct xt_set_info_match_v0 *info = par->matchinfo; - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); } /* Revision 1 match */ @@ -131,7 +131,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par) struct xt_set_info_match_v1 *info = par->matchinfo; ip_set_id_t index; - index = ip_set_nfnl_get_byindex(info->match_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find set indentified by id %u to match\n", @@ -141,7 +141,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par) if (info->match_set.dim > IPSET_DIM_MAX) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -153,7 +153,7 @@ set_match_v1_destroy(const struct xt_mtdtor_param *par) { struct xt_set_info_match_v1 *info = par->matchinfo; - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); } /* Revision 3 match */ @@ -228,7 +228,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->add_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find add_set index %u as target\n", info->add_set.index); @@ -237,12 +237,12 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) } if (info->del_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->del_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; } } @@ -251,9 +251,9 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); return -ERANGE; } @@ -270,9 +270,9 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par) const struct xt_set_info_target_v0 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); } /* Revision 1 target */ @@ -301,7 +301,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->add_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find add_set index %u as target\n", info->add_set.index); @@ -310,12 +310,12 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) } if (info->del_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->del_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; } } @@ -324,9 +324,9 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); return -ERANGE; } @@ -339,9 +339,9 @@ set_target_v1_destroy(const struct xt_tgdtor_param *par) const struct xt_set_info_target_v1 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); } /* Revision 2 target */ diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 938b7cbf5627..1ac41d3de5c3 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -24,11 +24,12 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, { struct xt_set_info *set = data; ip_set_id_t index; + struct net *net = qdisc_dev(tp->q)->nd_net; if (data_len != sizeof(*set)) return -EINVAL; - index = ip_set_nfnl_get_byindex(set->index); + index = ip_set_nfnl_get_byindex(net, set->index); if (index == IPSET_INVALID_ID) return -ENOENT; @@ -37,7 +38,7 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, if (em->data) return 0; - ip_set_nfnl_put(index); + ip_set_nfnl_put(net, index); return -ENOMEM; } @@ -45,7 +46,7 @@ static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em) { const struct xt_set_info *set = (const void *) em->data; if (set) { - ip_set_nfnl_put(set->index); + ip_set_nfnl_put(qdisc_dev(p->q)->nd_net, set->index); kfree((void *) em->data); } } -- cgit v1.2.3-55-g7522 From 7c3ad056ef79fd10f5f111c807ccbd9fa9068c7f Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sat, 28 Sep 2013 20:33:23 +0200 Subject: netfilter: ipset: Add hash:net,port,net module to kernel. This adds a new set that provides similar functionality to ip,port,net but permits arbitrary size subnets for both the first and last parameter. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_netportnet.c | 588 +++++++++++++++++++++++++++ 3 files changed, 598 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_netportnet.c (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 9119f658a69b..a2d6263b6c64 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -90,6 +90,15 @@ config IP_SET_HASH_IPPORTNET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETPORTNET + tristate "hash:net,port,net set support" + depends on IP_SET + help + This option adds the hash:net,port,net set type support, by which + one can store two IPv4/IPv6 subnets, and a protocol/port in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_NET tristate "hash:net set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 43eef7a19e3c..44b2d38476fa 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o obj-$(CONFIG_IP_SET_HASH_NETNET) += ip_set_hash_netnet.o +obj-$(CONFIG_IP_SET_HASH_NETPORTNET) += ip_set_hash_netportnet.o # list types obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c new file mode 100644 index 000000000000..363fab933d48 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -0,0 +1,588 @@ +/* Copyright (C) 2003-2013 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,port,net type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 /* Comments support added */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Oliver Smith "); +IP_SET_MODULE_DESC("hash:net,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:net,port,net"); + +/* Type specific function prefix */ +#define HTYPE hash_netportnet +#define IP_SET_HASH_WITH_PROTO +#define IP_SET_HASH_WITH_NETS +#define IPSET_NET_COUNT 2 + +/* IPv4 variant */ + +/* Member elements */ +struct hash_netportnet4_elem { + union { + __be32 ip[2]; + __be64 ipcmp; + }; + __be16 port; + union { + u8 cidr[2]; + u16 ccmp; + }; + u8 nomatch:1; + u8 proto; +}; + +/* Common functions */ + +static inline bool +hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1, + const struct hash_netportnet4_elem *ip2, + u32 *multi) +{ + return ip1->ipcmp == ip2->ipcmp && + ip1->ccmp == ip2->ccmp && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline int +hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags) +{ + elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem, + struct hash_netportnet4_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem, + u8 cidr, bool inner) +{ + if (inner) { + elem->ip[1] &= ip_set_netmask(cidr); + elem->cidr[1] = cidr; + } else { + elem->ip[0] &= ip_set_netmask(cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netportnet4_data_list(struct sk_buff *skb, + const struct hash_netportnet4_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netportnet4_data_next(struct hash_netportnet4_elem *next, + const struct hash_netportnet4_elem *d) +{ + next->ipcmp = d->ipcmp; + next->port = d->port; +} + +#define MTYPE hash_netportnet4 +#define PF 4 +#define HOST_MASK 32 +#include "ip_set_hash_gen.h" + +static int +hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet4_elem e = { + .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; + + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, + &e.port, &e.proto)) + return -EINVAL; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]); + ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1]); + e.ip[0] &= ip_set_netmask(e.cidr[0]); + e.ip[1] &= ip_set_netmask(e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet4_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; + bool with_ports = false; + u8 cidr, cidr2; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || + ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[0] = cidr; + } + + if (tb[IPSET_ATTR_CIDR2]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[1] = cidr; + } + + if (tb[IPSET_ATTR_PORT]) + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(e.proto); + + if (e.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + if (!(with_ports || e.proto == IPPROTO_ICMP)) + e.port = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; + if (adt == IPSET_TEST || + !(tb[IPSET_ATTR_IP_TO] || with_ports || tb[IPSET_ATTR_IP2_TO])) { + e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0])); + e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1])); + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip_to = ip; + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + if (unlikely(ip + UINT_MAX == ip_to)) + return -IPSET_ERR_HASH_RANGE; + } + + port_to = port = ntohs(e.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + } + + ip2_to = ip2_from; + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); + if (ret) + return ret; + if (ip2_from > ip2_to) + swap(ip2_from, ip2_to); + if (unlikely(ip2_from + UINT_MAX == ip2_to)) + return -IPSET_ERR_HASH_RANGE; + } + + if (retried) + ip = ntohl(h->next.ip[0]); + + while (!after(ip, ip_to)) { + e.ip[0] = htonl(ip); + ip_last = ip_set_range_to_cidr(ip, ip_to, &cidr); + e.cidr[0] = cidr; + p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) + : port; + for (; p <= port_to; p++) { + e.port = htons(p); + ip2 = (retried && ip == ntohl(h->next.ip[0]) && + p == ntohs(h->next.port)) ? ntohl(h->next.ip[1]) + : ip2_from; + while (!after(ip2, ip2_to)) { + e.ip[1] = htonl(ip2); + ip2_last = ip_set_range_to_cidr(ip2, ip2_to, + &cidr2); + e.cidr[1] = cidr2; + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip2 = ip2_last + 1; + } + } + ip = ip_last + 1; + } + return ret; +} + +/* IPv6 variant */ + +struct hash_netportnet6_elem { + union nf_inet_addr ip[2]; + __be16 port; + union { + u8 cidr[2]; + u16 ccmp; + }; + u8 nomatch:1; + u8 proto; +}; + +/* Common functions */ + +static inline bool +hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1, + const struct hash_netportnet6_elem *ip2, + u32 *multi) +{ + return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && + ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && + ip1->ccmp == ip2->ccmp && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline int +hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags) +{ + elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem, + struct hash_netportnet6_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem, + u8 cidr, bool inner) +{ + if (inner) { + ip6_netmask(&elem->ip[1], cidr); + elem->cidr[1] = cidr; + } else { + ip6_netmask(&elem->ip[0], cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netportnet6_data_list(struct sk_buff *skb, + const struct hash_netportnet6_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netportnet6_data_next(struct hash_netportnet4_elem *next, + const struct hash_netportnet6_elem *d) +{ + next->port = d->port; +} + +#undef MTYPE +#undef PF +#undef HOST_MASK + +#define MTYPE hash_netportnet6 +#define PF 6 +#define HOST_MASK 128 +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + +static int +hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet6_elem e = { + .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; + + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, + &e.port, &e.proto)) + return -EINVAL; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1].in6); + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet6_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 port, port_to; + bool with_ports = false; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || + ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (tb[IPSET_ATTR_CIDR2]) + e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (unlikely(!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || + e.cidr[1] > HOST_MASK)) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + if (tb[IPSET_ATTR_PORT]) + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(e.proto); + + if (e.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + if (!(with_ports || e.proto == IPPROTO_ICMPV6)) + e.port = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(e.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + if (retried) + port = ntohs(h->next.port); + for (; port <= port_to; port++) { + e.port = htons(port); + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static struct ip_set_type hash_netportnet_type __read_mostly = { + .name = "hash:net,port,net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 | + IPSET_TYPE_NOMATCH, + .dimension = IPSET_DIM_THREE, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_netportnet_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netportnet_init(void) +{ + return ip_set_type_register(&hash_netportnet_type); +} + +static void __exit +hash_netportnet_fini(void) +{ + ip_set_type_unregister(&hash_netportnet_type); +} + +module_init(hash_netportnet_init); +module_exit(hash_netportnet_fini); -- cgit v1.2.3-55-g7522 From 7433268783eda21a04bf963f9992547a848d44a4 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 23 Sep 2013 19:20:55 +0800 Subject: netfilter: nfnetlink_queue: use proper net namespace to allocate skb Use proper net struct to allocate skb, otherwise netlink mmap will have no effect. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index ae2e5c11d01a..21258cf70091 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -298,7 +298,7 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, } static struct sk_buff * -nfqnl_build_packet_message(struct nfqnl_instance *queue, +nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, __be32 **packet_id_ptr) { @@ -372,7 +372,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (queue->flags & NFQA_CFG_F_CONNTRACK) ct = nfqnl_ct_get(entskb, &size, &ctinfo); - skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid, + skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); if (!skb) return NULL; @@ -525,7 +525,7 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, __be32 *packet_id_ptr; int failopen = 0; - nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); + nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr); if (nskb == NULL) { err = -ENOMEM; goto err_out; -- cgit v1.2.3-55-g7522 From afff14f6089ac50f05f29e489c5b2a067077d560 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 23 Sep 2013 19:20:56 +0800 Subject: netfilter: nfnetlink_log: use proper net to allocate skb Use proper net struct to allocate skb, otherwise netlink mmap will be of no effect. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index d92cc317bf8b..3c4b69e5fe17 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -319,7 +319,8 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags) } static struct sk_buff * -nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) +nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, + unsigned int pkt_size) { struct sk_buff *skb; unsigned int n; @@ -328,13 +329,13 @@ nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC); + skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current * packet */ - skb = nfnetlink_alloc_skb(&init_net, pkt_size, + skb = nfnetlink_alloc_skb(net, pkt_size, peer_portid, GFP_ATOMIC); if (!skb) pr_err("nfnetlink_log: can't even alloc %u bytes\n", @@ -702,8 +703,8 @@ nfulnl_log_packet(struct net *net, } if (!inst->skb) { - inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz, - size); + inst->skb = nfulnl_alloc_skb(net, inst->peer_portid, + inst->nlbufsiz, size); if (!inst->skb) goto alloc_failure; } -- cgit v1.2.3-55-g7522 From 180cf72f56fab2810e00497c087c7126bfe53c85 Mon Sep 17 00:00:00 2001 From: holger@eitzenberger.org Date: Mon, 30 Sep 2013 17:07:28 +0200 Subject: netfilter: nf_ct_sip: consolidate NAT hook functions There are currently seven different NAT hooks used in both nf_conntrack_sip and nf_nat_sip, each of the hooks is exported in nf_conntrack_sip, then set from the nf_nat_sip NAT helper. And because each of them is exported there is quite some overhead introduced due of this. By introducing nf_nat_sip_hooks I am able to reduce both text/data somewhat. For nf_conntrack_sip e. g. I get text data bss dec old 15243 5256 32 20531 new 15010 5192 32 20234 Signed-off-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_sip.h | 107 +++++++++++++----------- net/netfilter/nf_conntrack_sip.c | 127 ++++++++--------------------- net/netfilter/nf_nat_sip.c | 35 ++++---- 3 files changed, 107 insertions(+), 162 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index ba7f571a2b1c..4cb71551f611 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -107,55 +107,64 @@ enum sdp_header_types { SDP_HDR_MEDIA, }; -extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen); -extern void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, - unsigned int protoff, s16 off); -extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *exp, - unsigned int matchoff, - unsigned int matchlen); -extern unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - enum sdp_header_types type, - enum sdp_header_types term, - const union nf_inet_addr *addr); -extern unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int matchoff, - unsigned int matchlen, - u_int16_t port); -extern unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - const union nf_inet_addr *addr); -extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp, - unsigned int mediaoff, - unsigned int medialen, - union nf_inet_addr *rtp_addr); +struct nf_nat_sip_hooks { + unsigned int (*msg)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen); + + void (*seq_adjust)(struct sk_buff *skb, + unsigned int protoff, s16 off); + + unsigned int (*expect)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen, + struct nf_conntrack_expect *exp, + unsigned int matchoff, + unsigned int matchlen); + + unsigned int (*sdp_addr)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen, + unsigned int sdpoff, + enum sdp_header_types type, + enum sdp_header_types term, + const union nf_inet_addr *addr); + + unsigned int (*sdp_port)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen, + unsigned int matchoff, + unsigned int matchlen, + u_int16_t port); + + unsigned int (*sdp_session)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen, + unsigned int sdpoff, + const union nf_inet_addr *addr); + + unsigned int (*sdp_media)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp, + unsigned int mediaoff, + unsigned int medialen, + union nf_inet_addr *rtp_addr); +}; +extern const struct nf_nat_sip_hooks *nf_nat_sip_hooks; extern int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr, unsigned int datalen, diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 5ed8c441dffd..466410eaa482 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -52,66 +52,8 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, const char **dptr, - unsigned int *datalen) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_hook); - -void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff, - s16 off) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook); - -unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *exp, - unsigned int matchoff, - unsigned int matchlen) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); - -unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - enum sdp_header_types type, - enum sdp_header_types term, - const union nf_inet_addr *addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook); - -unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int matchoff, - unsigned int matchlen, - u_int16_t port) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook); - -unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - const union nf_inet_addr *addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook); - -unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp, - unsigned int mediaoff, - unsigned int medialen, - union nf_inet_addr *rtp_addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_media_hook); +const struct nf_nat_sip_hooks *nf_nat_sip_hooks; +EXPORT_SYMBOL_GPL(nf_nat_sip_hooks); static int string_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) @@ -914,8 +856,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, int direct_rtp = 0, skip_expect = 0, ret = NF_DROP; u_int16_t base_port; __be16 rtp_port, rtcp_port; - typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port; - typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media; + const struct nf_nat_sip_hooks *hooks; saddr = NULL; if (sip_direct_media) { @@ -972,9 +913,9 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, rtcp_port = htons(base_port + 1); if (direct_rtp) { - nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook); - if (nf_nat_sdp_port && - !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && + !hooks->sdp_port(skb, protoff, dataoff, dptr, datalen, mediaoff, medialen, ntohs(rtp_port))) goto err1; } @@ -996,10 +937,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(rtcp_exp, class, nf_ct_l3num(ct), saddr, daddr, IPPROTO_UDP, NULL, &rtcp_port); - nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); - if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp) - ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen, - rtp_exp, rtcp_exp, + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK && !direct_rtp) + ret = hooks->sdp_media(skb, protoff, dataoff, dptr, + datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { if (nf_ct_expect_related(rtp_exp) == 0) { @@ -1053,13 +994,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, unsigned int caddr_len, maddr_len; unsigned int i; union nf_inet_addr caddr, maddr, rtp_addr; + const struct nf_nat_sip_hooks *hooks; unsigned int port; const struct sdp_media_type *t; int ret = NF_ACCEPT; - typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr; - typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session; - nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook); + hooks = rcu_dereference(nf_nat_sip_hooks); /* Find beginning of session description */ if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, @@ -1127,10 +1067,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, } /* Update media connection address if present */ - if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { - ret = nf_nat_sdp_addr(skb, protoff, dataoff, + if (maddr_len && hooks && ct->status & IPS_NAT_MASK) { + ret = hooks->sdp_addr(skb, protoff, dataoff, dptr, datalen, mediaoff, - SDP_HDR_CONNECTION, SDP_HDR_MEDIA, + SDP_HDR_CONNECTION, + SDP_HDR_MEDIA, &rtp_addr); if (ret != NF_ACCEPT) { nf_ct_helper_log(skb, ct, "cannot mangle SDP"); @@ -1141,10 +1082,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, } /* Update session connection and owner addresses */ - nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); - if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp_session(skb, protoff, dataoff, - dptr, datalen, sdpoff, &rtp_addr); + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK) + ret = hooks->sdp_session(skb, protoff, dataoff, + dptr, datalen, sdpoff, + &rtp_addr); return ret; } @@ -1244,11 +1186,11 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, unsigned int matchoff, matchlen; struct nf_conntrack_expect *exp; union nf_inet_addr *saddr, daddr; + const struct nf_nat_sip_hooks *hooks; __be16 port; u8 proto; unsigned int expires = 0; int ret; - typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect; /* Expected connections can not register again. */ if (ct->status & IPS_EXPECTED) @@ -1311,10 +1253,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, exp->helper = nfct_help(ct)->helper; exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; - nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); - if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK) - ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen, - exp, matchoff, matchlen); + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK) + ret = hooks->expect(skb, protoff, dataoff, dptr, datalen, + exp, matchoff, matchlen); else { if (nf_ct_expect_related(exp) != 0) { nf_ct_helper_log(skb, ct, "cannot add expectation"); @@ -1517,7 +1459,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, unsigned int protoff, unsigned int dataoff, const char **dptr, unsigned int *datalen) { - typeof(nf_nat_sip_hook) nf_nat_sip; + const struct nf_nat_sip_hooks *hooks; int ret; if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) @@ -1526,9 +1468,9 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, ret = process_sip_response(skb, protoff, dataoff, dptr, datalen); if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip = rcu_dereference(nf_nat_sip_hook); - if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff, - dptr, datalen)) { + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && !hooks->msg(skb, protoff, dataoff, + dptr, datalen)) { nf_ct_helper_log(skb, ct, "cannot NAT SIP message"); ret = NF_DROP; } @@ -1548,7 +1490,6 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, s16 diff, tdiff = 0; int ret = NF_ACCEPT; bool term; - typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) @@ -1612,9 +1553,11 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, } if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook); - if (nf_nat_sip_seq_adjust) - nf_nat_sip_seq_adjust(skb, protoff, tdiff); + const struct nf_nat_sip_hooks *hooks; + + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks) + hooks->seq_adjust(skb, protoff, tdiff); } return ret; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index f9790405b7ff..b4d691db955e 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -625,33 +625,26 @@ static struct nf_ct_helper_expectfn sip_nat = { static void __exit nf_nat_sip_fini(void) { - RCU_INIT_POINTER(nf_nat_sip_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_hooks, NULL); + nf_ct_helper_expectfn_unregister(&sip_nat); synchronize_rcu(); } +static const struct nf_nat_sip_hooks sip_hooks = { + .msg = nf_nat_sip, + .seq_adjust = nf_nat_sip_seq_adjust, + .expect = nf_nat_sip_expect, + .sdp_addr = nf_nat_sdp_addr, + .sdp_port = nf_nat_sdp_port, + .sdp_session = nf_nat_sdp_session, + .sdp_media = nf_nat_sdp_media, +}; + static int __init nf_nat_sip_init(void) { - BUG_ON(nf_nat_sip_hook != NULL); - BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); - BUG_ON(nf_nat_sip_expect_hook != NULL); - BUG_ON(nf_nat_sdp_addr_hook != NULL); - BUG_ON(nf_nat_sdp_port_hook != NULL); - BUG_ON(nf_nat_sdp_session_hook != NULL); - BUG_ON(nf_nat_sdp_media_hook != NULL); - RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media); + BUG_ON(nf_nat_sip_hooks != NULL); + RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks); nf_ct_helper_expectfn_register(&sip_nat); return 0; } -- cgit v1.2.3-55-g7522 From 91cb498e6a34b429a032f8cfbb57dde28cd20e0c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 4 Sep 2013 20:57:48 +0200 Subject: netfilter: cttimeout: allow to set/get default protocol timeouts Default timeouts are currently set via proc/sysctl interface, the typical pattern is a file name like: /proc/sys/net/netfilter/nf_conntrack_PROTOCOL_timeout_STATE This results in one entry per default protocol state timeout. This patch simplifies this by allowing to set default protocol timeouts via cttimeout netlink interface. This should allow us to get rid of the existing proc/sysctl code in the midterm. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_cttimeout.h | 2 + net/netfilter/nfnetlink_cttimeout.c | 161 ++++++++++++++++++++- 2 files changed, 155 insertions(+), 8 deletions(-) (limited to 'net/netfilter') diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h index a2810a7c5e30..1ab0b97b3a1e 100644 --- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h +++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h @@ -6,6 +6,8 @@ enum ctnl_timeout_msg_types { IPCTNL_MSG_TIMEOUT_NEW, IPCTNL_MSG_TIMEOUT_GET, IPCTNL_MSG_TIMEOUT_DELETE, + IPCTNL_MSG_TIMEOUT_DEFAULT_SET, + IPCTNL_MSG_TIMEOUT_DEFAULT_GET, IPCTNL_MSG_TIMEOUT_MAX }; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 50580494148d..476accd17145 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -49,10 +49,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { }; static int -ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, - struct nf_conntrack_l4proto *l4proto, - struct net *net, - const struct nlattr *attr) +ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, + struct net *net, const struct nlattr *attr) { int ret = 0; @@ -64,8 +62,7 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, if (ret < 0) return ret; - ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, - &timeout->data); + ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts); } return ret; } @@ -123,7 +120,8 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(matching, l4proto, net, + ret = ctnl_timeout_parse_policy(&matching->data, + l4proto, net, cda[CTA_TIMEOUT_DATA]); return ret; } @@ -138,7 +136,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(timeout, l4proto, net, + ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; @@ -342,6 +340,147 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, return ret; } +static int +cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + __u16 l3num; + __u8 l4num; + struct nf_conntrack_l4proto *l4proto; + struct net *net = sock_net(skb->sk); + unsigned int *timeouts; + int ret; + + if (!cda[CTA_TIMEOUT_L3PROTO] || + !cda[CTA_TIMEOUT_L4PROTO] || + !cda[CTA_TIMEOUT_DATA]) + return -EINVAL; + + l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + /* This protocol is not supported, skip. */ + if (l4proto->l4proto != l4num) { + ret = -EOPNOTSUPP; + goto err; + } + + timeouts = l4proto->get_timeouts(net); + + ret = ctnl_timeout_parse_policy(timeouts, l4proto, net, + cda[CTA_TIMEOUT_DATA]); + if (ret < 0) + goto err; + + nf_ct_l4proto_put(l4proto); + return 0; +err: + nf_ct_l4proto_put(l4proto); + return ret; +} + +static int +cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, + u32 seq, u32 type, int event, + struct nf_conntrack_l4proto *l4proto) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = portid ? NLM_F_MULTI : 0; + + event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) || + nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) + goto nla_put_failure; + + if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { + struct nlattr *nest_parms; + unsigned int *timeouts = l4proto->get_timeouts(net); + int ret; + + nest_parms = nla_nest_start(skb, + CTA_TIMEOUT_DATA | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts); + if (ret < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + } + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + __u16 l3num; + __u8 l4num; + struct nf_conntrack_l4proto *l4proto; + struct net *net = sock_net(skb->sk); + struct sk_buff *skb2; + int ret, err; + + if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO]) + return -EINVAL; + + l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + /* This protocol is not supported, skip. */ + if (l4proto->l4proto != l4num) { + err = -EOPNOTSUPP; + goto err; + } + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + err = -ENOMEM; + goto err; + } + + ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + IPCTNL_MSG_TIMEOUT_DEFAULT_SET, + l4proto); + if (ret <= 0) { + kfree_skb(skb2); + err = -ENOMEM; + goto err; + } + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; +err: + nf_ct_l4proto_put(l4proto); + return err; +} + #ifdef CONFIG_NF_CONNTRACK_TIMEOUT static struct ctnl_timeout *ctnl_timeout_find_get(const char *name) { @@ -384,6 +523,12 @@ static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, }; static const struct nfnetlink_subsystem cttimeout_subsys = { -- cgit v1.2.3-55-g7522 From efe4208f47f907b86f528788da711e8ab9dea44d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Oct 2013 15:42:29 -0700 Subject: ipv6: make lookups simpler and faster TCP listener refactoring, part 4 : To speed up inet lookups, we moved IPv4 addresses from inet to struct sock_common Now is time to do the same for IPv6, because it permits us to have fast lookups for all kind of sockets, including upcoming SYN_RECV. Getting IPv6 addresses in TCP lookups currently requires two extra cache lines, plus a dereference (and memory stall). inet6_sk(sk) does the dereference of inet_sk(__sk)->pinet6 This patch is way bigger than its IPv4 counter part, because for IPv4, we could add aliases (inet_daddr, inet_rcv_saddr), while on IPv6, it's not doable easily. inet6_sk(sk)->daddr becomes sk->sk_v6_daddr inet6_sk(sk)->rcv_saddr becomes sk->sk_v6_rcv_saddr And timewait socket also have tw->tw_v6_daddr & tw->tw_v6_rcv_saddr at the same offset. We get rid of INET6_TW_MATCH() as INET6_MATCH() is now the generic macro. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 46 +++----------------- include/net/inet6_hashtables.h | 5 +-- include/net/inet_timewait_sock.h | 4 +- include/net/ip.h | 2 +- include/net/ip6_checksum.h | 2 +- include/net/sock.h | 9 ++++ net/dccp/ipv6.c | 24 +++++------ net/dccp/ipv6.h | 1 - net/dccp/minisocks.c | 7 +--- net/ipv4/inet_diag.c | 35 +++++++--------- net/ipv4/ping.c | 15 ++++--- net/ipv4/tcp_metrics.c | 10 ++--- net/ipv4/tcp_minisocks.c | 7 +--- net/ipv4/tcp_probe.c | 29 +++++-------- net/ipv4/tcp_timer.c | 3 +- net/ipv6/af_inet6.c | 10 ++--- net/ipv6/datagram.c | 25 ++++++----- net/ipv6/inet6_connection_sock.c | 7 ++-- net/ipv6/inet6_hashtables.c | 58 +++++++++----------------- net/ipv6/ipv6_sockglue.c | 7 ++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 4 +- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 17 ++++---- net/ipv6/tcp_ipv6.c | 44 ++++++++++--------- net/ipv6/udp.c | 48 ++++++++++----------- net/l2tp/l2tp_core.c | 10 ++--- net/l2tp/l2tp_debugfs.c | 5 ++- net/l2tp/l2tp_ip6.c | 16 +++---- net/l2tp/l2tp_netlink.c | 4 +- net/l2tp/l2tp_ppp.c | 12 +++--- net/netfilter/xt_TPROXY.c | 2 +- net/netfilter/xt_socket.c | 2 +- net/sctp/ipv6.c | 22 +++++----- net/sunrpc/svcsock.c | 2 +- security/lsm_audit.c | 5 +-- 35 files changed, 213 insertions(+), 288 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index b7f1f3bb346d..35f6c1b562c4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -141,8 +141,6 @@ struct ipv6_fl_socklist; */ struct ipv6_pinfo { struct in6_addr saddr; - struct in6_addr rcv_saddr; - struct in6_addr daddr; struct in6_pktinfo sticky_pktinfo; const struct in6_addr *daddr_cache; #ifdef CONFIG_IPV6_SUBTREES @@ -256,22 +254,10 @@ struct tcp6_sock { extern int inet6_sk_rebuild_header(struct sock *sk); -struct inet6_timewait_sock { - struct in6_addr tw_v6_daddr; - struct in6_addr tw_v6_rcv_saddr; -}; - struct tcp6_timewait_sock { struct tcp_timewait_sock tcp6tw_tcp; - struct inet6_timewait_sock tcp6tw_inet6; }; -static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk) -{ - return (struct inet6_timewait_sock *)(((u8 *)sk) + - inet_twsk(sk)->tw_ipv6_offset); -} - #if IS_ENABLED(CONFIG_IPV6) static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) { @@ -321,21 +307,11 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) -static inline u16 inet6_tw_offset(const struct proto *prot) -{ - return prot->twsk_prot->twsk_obj_size - - sizeof(struct inet6_timewait_sock); -} - -static inline struct in6_addr *__inet6_rcv_saddr(const struct sock *sk) +static inline const struct in6_addr *inet6_rcv_saddr(const struct sock *sk) { - return likely(sk->sk_state != TCP_TIME_WAIT) ? - &inet6_sk(sk)->rcv_saddr : &inet6_twsk(sk)->tw_v6_rcv_saddr; -} - -static inline struct in6_addr *inet6_rcv_saddr(const struct sock *sk) -{ - return sk->sk_family == AF_INET6 ? __inet6_rcv_saddr(sk) : NULL; + if (sk->sk_family == AF_INET6) + return &sk->sk_v6_rcv_saddr; + return NULL; } static inline int inet_v6_ipv6only(const struct sock *sk) @@ -363,7 +339,6 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) return NULL; } -#define __inet6_rcv_saddr(__sk) NULL #define inet6_rcv_saddr(__sk) NULL #define tcp_twsk_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0 @@ -372,19 +347,10 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ - ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ + ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ + ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ (!(__sk)->sk_bound_dev_if || \ ((__sk)->sk_bound_dev_if == (__dif))) && \ net_eq(sock_net(__sk), (__net))) -#define INET6_TW_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr)) && \ - ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ - net_eq(sock_net(__sk), (__net))) - #endif /* _IPV6_H */ diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index f52fa88feb64..a105d1a2fc00 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -43,9 +43,8 @@ static inline unsigned int inet6_ehashfn(struct net *net, static inline int inet6_sk_ehashfn(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *laddr = &np->rcv_saddr; - const struct in6_addr *faddr = &np->daddr; + const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *faddr = &sk->sk_v6_daddr; const __u16 lport = inet->inet_num; const __be16 fport = inet->inet_dport; struct net *net = sock_net(sk); diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index de9e3ab7d43d..b647c6270eb7 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -116,7 +116,9 @@ struct inet_timewait_sock { #define tw_prot __tw_common.skc_prot #define tw_net __tw_common.skc_net #define tw_daddr __tw_common.skc_daddr +#define tw_v6_daddr __tw_common.skc_v6_daddr #define tw_rcv_saddr __tw_common.skc_rcv_saddr +#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr #define tw_dport __tw_common.skc_dport #define tw_num __tw_common.skc_num @@ -133,7 +135,7 @@ struct inet_timewait_sock { tw_transparent : 1, tw_pad : 6, /* 6 bits hole */ tw_tos : 8, - tw_ipv6_offset : 16; + tw_pad2 : 16 /* 16 bits hole */ kmemcheck_bitfield_end(flags); u32 tw_ttd; struct inet_bind_bucket *tw_tb; diff --git a/include/net/ip.h b/include/net/ip.h index b39ebe5339ac..217bc5bfc6c6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -374,7 +374,7 @@ static __inline__ void inet_reset_saddr(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); memset(&np->saddr, 0, sizeof(np->saddr)); - memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr)); } #endif } diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index 7686e3f5033d..1944406949ba 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -70,7 +70,7 @@ static inline void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); - __tcp_v6_send_check(skb, &np->saddr, &np->daddr); + __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr); } int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto); diff --git a/include/net/sock.h b/include/net/sock.h index 3f3e48c4704d..7e50df5c71d4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -191,6 +191,12 @@ struct sock_common { #ifdef CONFIG_NET_NS struct net *skc_net; #endif + +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr skc_v6_daddr; + struct in6_addr skc_v6_rcv_saddr; +#endif + /* * fields between dontcopy_begin/dontcopy_end * are not copied in sock_copy() @@ -314,6 +320,9 @@ struct sock { #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot #define sk_net __sk_common.skc_net +#define sk_v6_daddr __sk_common.skc_v6_daddr +#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr + socket_lock_t sk_lock; struct sk_buff_head sk_receive_queue; /* diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6cf9f7782ad4..7f075b83128a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -67,7 +67,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) struct dccp_hdr *dh = dccp_hdr(skb); dccp_csum_outgoing(skb); - dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); + dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr); } static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) @@ -467,11 +467,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - newnp->rcv_saddr = newnp->saddr; + newsk->sk_v6_rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; @@ -538,9 +538,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newnp->daddr = ireq6->rmt_addr; + newsk->sk_v6_daddr = ireq6->rmt_addr; newnp->saddr = ireq6->loc_addr; - newnp->rcv_saddr = ireq6->loc_addr; + newsk->sk_v6_rcv_saddr = ireq6->loc_addr; newsk->sk_bound_dev_if = ireq6->iif; /* Now IPv6 options... @@ -885,7 +885,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, return -EINVAL; } - np->daddr = usin->sin6_addr; + sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* @@ -915,16 +915,16 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto failure; } ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); + ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr); return err; } - if (!ipv6_addr_any(&np->rcv_saddr)) - saddr = &np->rcv_saddr; + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = usin->sin6_port; @@ -941,7 +941,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (saddr == NULL) { saddr = &fl6.saddr; - np->rcv_saddr = *saddr; + sk->sk_v6_rcv_saddr = *saddr; } /* set the source address */ @@ -963,7 +963,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto late_failure; dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, - np->daddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); err = dccp_connect(sk); diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h index 6eef81fdbe56..6604fc3fe953 100644 --- a/net/dccp/ipv6.h +++ b/net/dccp/ipv6.h @@ -30,7 +30,6 @@ struct dccp6_request_sock { struct dccp6_timewait_sock { struct inet_timewait_sock inet; - struct inet6_timewait_sock tw6; }; #endif /* _DCCP_IPV6_H */ diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 662071b249cc..32e80d96d4c0 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -56,12 +56,9 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_timewait_sock *tw6; - tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); - tw6 = inet6_twsk((struct sock *)tw); - tw6->tw_v6_daddr = np->daddr; - tw6->tw_v6_rcv_saddr = np->rcv_saddr; + tw->tw_v6_daddr = sk->sk_v6_daddr; + tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_ipv6only = np->ipv6only; } #endif diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8e1e40653357..ecc179d676e4 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -121,13 +121,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = np->daddr; + *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; if (ext & (1 << (INET_DIAG_TCLASS - 1))) - if (nla_put_u8(skb, INET_DIAG_TCLASS, np->tclass) < 0) + if (nla_put_u8(skb, INET_DIAG_TCLASS, + inet6_sk(sk)->tclass) < 0) goto errout; } #endif @@ -255,11 +255,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_inode = 0; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { - const struct inet6_timewait_sock *tw6 = - inet6_twsk((struct sock *)tw); - - *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr; + *(struct in6_addr *)r->id.idiag_src = tw->tw_v6_rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = tw->tw_v6_daddr; } #endif @@ -273,10 +270,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, r, portid, seq, nlmsg_flags, - unlh); - return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); + return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq, + nlmsg_flags, unlh); + + return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, + nlmsg_flags, unlh); } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, @@ -489,10 +487,9 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) entry.family = sk->sk_family; #if IS_ENABLED(CONFIG_IPV6) if (entry.family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - entry.saddr = np->rcv_saddr.s6_addr32; - entry.daddr = np->daddr.s6_addr32; + entry.saddr = sk->sk_v6_rcv_saddr.s6_addr32; + entry.daddr = sk->sk_v6_daddr.s6_addr32; } else #endif { @@ -649,10 +646,8 @@ static int inet_twsk_diag_dump(struct sock *sk, entry.family = tw->tw_family; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { - struct inet6_timewait_sock *tw6 = - inet6_twsk((struct sock *)tw); - entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32; - entry.daddr = tw6->tw_v6_daddr.s6_addr32; + entry.saddr = tw->tw_v6_rcv_saddr.s6_addr32; + entry.daddr = tw->tw_v6_daddr.s6_addr32; } else #endif { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index a62610443152..ccefc07beacd 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -202,15 +202,14 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6) && sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, (int) isk->inet_num, - &inet6_sk(sk)->rcv_saddr, + &sk->sk_v6_rcv_saddr, sk->sk_bound_dev_if); - if (!ipv6_addr_any(&np->rcv_saddr) && - !ipv6_addr_equal(&np->rcv_saddr, + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && + !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, &ipv6_hdr(skb)->daddr)) continue; #endif @@ -362,7 +361,7 @@ static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) } else if (saddr->sa_family == AF_INET6) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; struct ipv6_pinfo *np = inet6_sk(sk); - np->rcv_saddr = np->saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = np->saddr = addr->sin6_addr; #endif } } @@ -376,7 +375,7 @@ static void ping_clear_saddr(struct sock *sk, int dif) #if IS_ENABLED(CONFIG_IPV6) } else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr)); memset(&np->saddr, 0, sizeof(np->saddr)); #endif } @@ -418,7 +417,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) err = 0; if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) || (sk->sk_family == AF_INET6 && - !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr))) + !ipv6_addr_any(&sk->sk_v6_rcv_saddr))) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (snum) @@ -429,7 +428,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr)); + memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr)); #endif sk_dst_reset(sk); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 52f3c6b971d2..27535fd5ea10 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -240,7 +240,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) { - struct inet6_timewait_sock *tw6; struct tcp_metrics_block *tm; struct inetpeer_addr addr; unsigned int hash; @@ -253,9 +252,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock hash = (__force unsigned int) addr.addr.a4; break; case AF_INET6: - tw6 = inet6_twsk((struct sock *)tw); - *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr; - hash = ipv6_addr_hash(&tw6->tw_v6_daddr); + *(struct in6_addr *)addr.addr.a6 = tw->tw_v6_daddr; + hash = ipv6_addr_hash(&tw->tw_v6_daddr); break; default: return NULL; @@ -289,8 +287,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, hash = (__force unsigned int) addr.addr.a4; break; case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr; - hash = ipv6_addr_hash(&inet6_sk(sk)->daddr); + *(struct in6_addr *)addr.addr.a6 = sk->sk_v6_daddr; + hash = ipv6_addr_hash(&sk->sk_v6_daddr); break; default: return NULL; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 58a3e69aef64..97b684159861 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -293,12 +293,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_timewait_sock *tw6; - tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); - tw6 = inet6_twsk((struct sock *)tw); - tw6->tw_v6_daddr = np->daddr; - tw6->tw_v6_rcv_saddr = np->rcv_saddr; + tw->tw_v6_daddr = sk->sk_v6_daddr; + tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; tw->tw_ipv6only = np->ipv6only; } diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 611beab38a00..8b97d71e193b 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -101,22 +101,6 @@ static inline int tcp_probe_avail(void) si4.sin_addr.s_addr = inet->inet_##mem##addr; \ } while (0) \ -#if IS_ENABLED(CONFIG_IPV6) -#define tcp_probe_copy_fl_to_si6(inet, si6, mem) \ - do { \ - struct ipv6_pinfo *pi6 = inet->pinet6; \ - si6.sin6_family = AF_INET6; \ - si6.sin6_port = inet->inet_##mem##port; \ - si6.sin6_addr = pi6->mem##addr; \ - si6.sin6_flowinfo = 0; /* No need here. */ \ - si6.sin6_scope_id = 0; /* No need here. */ \ - } while (0) -#else -#define tcp_probe_copy_fl_to_si6(fl, si6, mem) \ - do { \ - memset(&si6, 0, sizeof(si6)); \ - } while (0) -#endif /* * Hook inserted to be called before each receive packet. @@ -147,8 +131,17 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d); break; case AF_INET6: - tcp_probe_copy_fl_to_si6(inet, p->src.v6, s); - tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d); + memset(&p->src.v6, 0, sizeof(p->src.v6)); + memset(&p->dst.v6, 0, sizeof(p->dst.v6)); +#if IS_ENABLED(CONFIG_IPV6) + p->src.v6.sin6_family = AF_INET6; + p->src.v6.sin6_port = inet->inet_sport; + p->src.v6.sin6_addr = inet6_sk(sk)->saddr; + + p->dst.v6.sin6_family = AF_INET6; + p->dst.v6.sin6_port = inet->inet_dport; + p->dst.v6.sin6_addr = sk->sk_v6_daddr; +#endif break; default: BUG(); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4b85e6f636c9..af07b5b23ebf 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -374,9 +374,8 @@ void tcp_retransmit_timer(struct sock *sk) } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"), - &np->daddr, + &sk->sk_v6_daddr, ntohs(inet->inet_dport), inet->inet_num, tp->snd_una, tp->snd_nxt); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4966b124dc2e..a2cb07cd3850 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -364,7 +364,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) inet->inet_rcv_saddr = v4addr; inet->inet_saddr = v4addr; - np->rcv_saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; @@ -461,14 +461,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, peer == 1) return -ENOTCONN; sin->sin6_port = inet->inet_dport; - sin->sin6_addr = np->daddr; + sin->sin6_addr = sk->sk_v6_daddr; if (np->sndflow) sin->sin6_flowinfo = np->flow_label; } else { - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) sin->sin6_addr = np->saddr; else - sin->sin6_addr = np->rcv_saddr; + sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; } @@ -655,7 +655,7 @@ int inet6_sk_rebuild_header(struct sock *sk) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = np->saddr; fl6.flowlabel = np->flow_label; fl6.flowi6_oif = sk->sk_bound_dev_if; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 48b6bd2a9a14..a454b0ff57c7 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -107,16 +107,16 @@ ipv4_connected: if (err) goto out; - ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); + ipv6_addr_set_v4mapped(inet->inet_daddr, &sk->sk_v6_daddr); if (ipv6_addr_any(&np->saddr) || ipv6_mapped_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr) || - ipv6_mapped_addr_any(&np->rcv_saddr)) { + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) || + ipv6_mapped_addr_any(&sk->sk_v6_rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &np->rcv_saddr); + &sk->sk_v6_rcv_saddr); if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } @@ -145,7 +145,7 @@ ipv4_connected: } } - np->daddr = *daddr; + sk->sk_v6_daddr = *daddr; np->flow_label = fl6.flowlabel; inet->inet_dport = usin->sin6_port; @@ -156,7 +156,7 @@ ipv4_connected: */ fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; @@ -183,16 +183,16 @@ ipv4_connected: if (ipv6_addr_any(&np->saddr)) np->saddr = fl6.saddr; - if (ipv6_addr_any(&np->rcv_saddr)) { - np->rcv_saddr = fl6.saddr; + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + sk->sk_v6_rcv_saddr = fl6.saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &np->daddr) ? - &np->daddr : NULL, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : @@ -883,11 +883,10 @@ EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, __u16 destp, int bucket) { - struct ipv6_pinfo *np = inet6_sk(sp); const struct in6_addr *dest, *src; - dest = &np->daddr; - src = &np->rcv_saddr; + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; seq_printf(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n", diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e4311cbc8b4e..b7400b480e74 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -165,11 +165,10 @@ EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) { - struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; sin6->sin6_family = AF_INET6; - sin6->sin6_addr = np->daddr; + sin6->sin6_addr = sk->sk_v6_daddr; sin6->sin6_port = inet_sk(sk)->inet_dport; /* We do not store received flowlabel for TCP */ sin6->sin6_flowinfo = 0; @@ -203,7 +202,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = sk->sk_protocol; - fl6->daddr = np->daddr; + fl6->daddr = sk->sk_v6_daddr; fl6->saddr = np->saddr; fl6->flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl6->flowlabel); @@ -245,7 +244,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) skb_dst_set_noref(skb, dst); /* Restore final destination back after routing done */ - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); rcu_read_unlock(); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 46440777e1c5..842d833dfc18 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -89,30 +89,16 @@ begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; - if (sk->sk_state == TCP_TIME_WAIT) { - if (!INET6_TW_MATCH(sk, net, saddr, daddr, ports, dif)) - continue; - } else { - if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) - continue; - } + if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) + continue; if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) goto out; - if (sk->sk_state == TCP_TIME_WAIT) { - if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, - ports, dif))) { - sock_gen_put(sk); - goto begin; - } - } else { - if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, - ports, dif))) { - sock_put(sk); - goto begin; - } - goto found; + if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { + sock_gen_put(sk); + goto begin; } + goto found; } if (get_nulls_value(node) != slot) goto begin; @@ -133,11 +119,10 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); score = 1; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score++; } @@ -229,9 +214,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *daddr = &np->rcv_saddr; - const struct in6_addr *saddr = &np->daddr; + const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *saddr = &sk->sk_v6_daddr; const int dif = sk->sk_bound_dev_if; const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); @@ -250,23 +234,19 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, if (sk2->sk_hash != hash) continue; - if (sk2->sk_state == TCP_TIME_WAIT) { - if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr, - ports, dif))) { + if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) { + if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) - goto unique; - else - goto not_unique; + break; } - } - if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) goto not_unique; + } } -unique: /* Must record num and sport now. Otherwise we will see - * in hash table socket with a funny identity. */ + * in hash table socket with a funny identity. + */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; @@ -299,9 +279,9 @@ not_unique: static inline u32 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, - np->daddr.s6_addr32, + + return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_dport); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d1e2e8ef29c5..4919a8e6063e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -174,7 +174,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } if (ipv6_only_sock(sk) || - !ipv6_addr_v4mapped(&np->daddr)) { + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { retv = -EADDRNOTAVAIL; break; } @@ -1011,7 +1011,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { @@ -1026,7 +1026,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : + np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index d6e4dd8b58df..54b75ead5a69 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -297,9 +297,9 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; struct nf_conn *ct; - tuple.src.u3.in6 = inet6->rcv_saddr; + tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; tuple.src.u.tcp.port = inet->inet_sport; - tuple.dst.u3.in6 = inet6->daddr; + tuple.dst.u3.in6 = sk->sk_v6_daddr; tuple.dst.u.tcp.port = inet->inet_dport; tuple.dst.protonum = sk->sk_protocol; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 18f19df4189f..8815e31a87fe 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -116,7 +116,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; } if (!iif) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a4ed2416399e..3c00842b0079 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -77,20 +77,19 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, sk_for_each_from(sk) if (inet_sk(sk)->inet_num == num) { - struct ipv6_pinfo *np = inet6_sk(sk); if (!net_eq(sock_net(sk), net)) continue; - if (!ipv6_addr_any(&np->daddr) && - !ipv6_addr_equal(&np->daddr, rmt_addr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) goto found; if (is_multicast && inet6_mc_check(sk, loc_addr, rmt_addr)) @@ -302,7 +301,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) } inet->inet_rcv_saddr = inet->inet_saddr = v4addr; - np->rcv_saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; err = 0; @@ -804,8 +803,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && @@ -816,7 +815,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, return -EDESTADDRREQ; proto = inet->inet_num; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 528e61afaf5e..541dfc40c7b3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -192,13 +192,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } if (tp->rx_opt.ts_recent_stamp && - !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) { + !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; tp->write_seq = 0; } - np->daddr = usin->sin6_addr; + sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* @@ -237,17 +237,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } else { ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &np->rcv_saddr); + &sk->sk_v6_rcv_saddr); } return err; } - if (!ipv6_addr_any(&np->rcv_saddr)) - saddr = &np->rcv_saddr; + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; @@ -266,7 +266,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (saddr == NULL) { saddr = &fl6.saddr; - np->rcv_saddr = *saddr; + sk->sk_v6_rcv_saddr = *saddr; } /* set the source address */ @@ -279,7 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, rt = (struct rt6_info *) dst; if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) + ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr)) tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; @@ -298,7 +298,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, - np->daddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); @@ -515,7 +515,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, struct sock *addr_sk) { - return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr); + return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); } static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, @@ -621,7 +621,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, if (sk) { saddr = &inet6_sk(sk)->saddr; - daddr = &inet6_sk(sk)->daddr; + daddr = &sk->sk_v6_daddr; } else if (req) { saddr = &inet6_rsk(req)->loc_addr; daddr = &inet6_rsk(req)->rmt_addr; @@ -1116,11 +1116,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - newnp->rcv_saddr = newnp->saddr; + newsk->sk_v6_rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -1185,9 +1185,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newnp->daddr = treq->rmt_addr; + newsk->sk_v6_daddr = treq->rmt_addr; newnp->saddr = treq->loc_addr; - newnp->rcv_saddr = treq->loc_addr; + newsk->sk_v6_rcv_saddr = treq->loc_addr; newsk->sk_bound_dev_if = treq->iif; /* Now IPv6 options... @@ -1244,13 +1244,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) { + if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get * memory, then we end up not copying the key * across. Shucks. */ - tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, + tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, AF_INET6, key->key, key->keylen, sk_gfp_atomic(sk, GFP_ATOMIC)); } @@ -1758,10 +1758,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); - const struct ipv6_pinfo *np = inet6_sk(sp); - dest = &np->daddr; - src = &np->rcv_saddr; + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); @@ -1810,11 +1809,10 @@ static void get_timewait6_sock(struct seq_file *seq, { const struct in6_addr *dest, *src; __u16 destp, srcp; - const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); s32 delta = tw->tw_ttd - inet_tw_time_stamp(); - dest = &tw6->tw_v6_daddr; - src = &tw6->tw_v6_rcv_saddr; + dest = &tw->tw_v6_daddr; + src = &tw->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 37532478e3ba..b496de19a341 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -55,11 +55,10 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { - const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(sk_rcv_saddr6); + int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; /* if both are mapped, treat as IPv4 */ @@ -77,7 +76,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 1; if (sk2_rcv_saddr6 && - ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) + ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) return 1; return 0; @@ -105,7 +104,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) unsigned int hash2_nulladdr = udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); unsigned int hash2_partial = - udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); + udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@ -115,7 +114,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) static void udp_v6_rehash(struct sock *sk) { u16 new_hash = udp6_portaddr_hash(sock_net(sk), - &inet6_sk(sk)->rcv_saddr, + &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); udp_lib_rehash(sk, new_hash); @@ -131,7 +130,6 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); score = 0; @@ -140,13 +138,13 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score++; } - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score++; } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } @@ -169,10 +167,9 @@ static inline int compute_score2(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score = 0; if (inet->inet_dport) { @@ -180,8 +177,8 @@ static inline int compute_score2(struct sock *sk, struct net *net, return -1; score++; } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } @@ -549,7 +546,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; - if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) { + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); } @@ -690,20 +687,19 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, if (udp_sk(s)->udp_port_hash == num && s->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(s); if (inet->inet_dport) { if (inet->inet_dport != rmt_port) continue; } - if (!ipv6_addr_any(&np->daddr) && - !ipv6_addr_equal(&np->daddr, rmt_addr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) continue; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) continue; } if (!inet6_mc_check(s, loc_addr, rmt_addr)) @@ -1063,7 +1059,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, } else if (!up->pending) { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; } else daddr = NULL; @@ -1133,8 +1129,8 @@ do_udp_sendmsg: * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && @@ -1145,7 +1141,7 @@ do_udp_sendmsg: return -EDESTADDRREQ; fl6.fl6_dport = inet->inet_dport; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; connected = 1; } @@ -1261,8 +1257,8 @@ do_append_data: if (dst) { if (connected) { ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &np->daddr) ? - &np->daddr : NULL, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index b076e8309bc2..9af77d9c0ec9 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1181,7 +1181,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { __wsum csum = skb_checksum(skb, 0, udp_len, 0); skb->ip_summed = CHECKSUM_UNNECESSARY; - uh->check = csum_ipv6_magic(&np->saddr, &np->daddr, udp_len, + uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -1189,7 +1189,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, + uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len, IPPROTO_UDP, 0); } } @@ -1713,13 +1713,13 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 struct ipv6_pinfo *np = inet6_sk(sk); if (ipv6_addr_v4mapped(&np->saddr) && - ipv6_addr_v4mapped(&np->daddr)) { + ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { struct inet_sock *inet = inet_sk(sk); tunnel->v4mapped = true; inet->inet_saddr = np->saddr.s6_addr32[3]; - inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3]; - inet->inet_daddr = np->daddr.s6_addr32[3]; + inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3]; + inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3]; } else { tunnel->v4mapped = false; } diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 072d7202e182..2d6760a2ae34 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -127,9 +127,10 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) #if IS_ENABLED(CONFIG_IPV6) if (tunnel->sock->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(tunnel->sock); + const struct ipv6_pinfo *np = inet6_sk(tunnel->sock); + seq_printf(m, " from %pI6c to %pI6c\n", - &np->saddr, &np->daddr); + &np->saddr, &tunnel->sock->sk_v6_daddr); } else #endif seq_printf(m, " from %pI4 to %pI4\n", diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index b8a6039314e8..cfd65304be60 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -63,7 +63,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct sock *sk; sk_for_each_bound(sk, &l2tp_ip6_bind_table) { - struct in6_addr *addr = inet6_rcv_saddr(sk); + const struct in6_addr *addr = inet6_rcv_saddr(sk); struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); if (l2tp == NULL) @@ -331,7 +331,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) rcu_read_unlock(); inet->inet_rcv_saddr = inet->inet_saddr = v4addr; - np->rcv_saddr = addr->l2tp_addr; + sk->sk_v6_rcv_saddr = addr->l2tp_addr; np->saddr = addr->l2tp_addr; l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id; @@ -421,14 +421,14 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, if (!lsk->peer_conn_id) return -ENOTCONN; lsa->l2tp_conn_id = lsk->peer_conn_id; - lsa->l2tp_addr = np->daddr; + lsa->l2tp_addr = sk->sk_v6_daddr; if (np->sndflow) lsa->l2tp_flowinfo = np->flow_label; } else { - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) lsa->l2tp_addr = np->saddr; else - lsa->l2tp_addr = np->rcv_saddr; + lsa->l2tp_addr = sk->sk_v6_rcv_saddr; lsa->l2tp_conn_id = lsk->conn_id; } @@ -537,8 +537,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && lsa->l2tp_scope_id && @@ -548,7 +548,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; } diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 0825ff26e113..be446d517bc9 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -306,8 +306,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (np) { if (nla_put(skb, L2TP_ATTR_IP6_SADDR, sizeof(np->saddr), &np->saddr) || - nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(np->daddr), - &np->daddr)) + nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(sk->sk_v6_daddr), + &sk->sk_v6_daddr)) goto nla_put_failure; } else #endif diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 5ebee2ded9e9..f0a7adaef2ea 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -906,8 +906,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, #if IS_ENABLED(CONFIG_IPV6) } else if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET6)) { - struct ipv6_pinfo *np = inet6_sk(tunnel->sock); struct sockaddr_pppol2tpin6 sp; + len = sizeof(sp); memset(&sp, 0, len); sp.sa_family = AF_PPPOX; @@ -920,13 +920,13 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, sp.pppol2tp.d_session = session->peer_session_id; sp.pppol2tp.addr.sin6_family = AF_INET6; sp.pppol2tp.addr.sin6_port = inet->inet_dport; - memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr, - sizeof(np->daddr)); + memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr, + sizeof(tunnel->sock->sk_v6_daddr)); memcpy(uaddr, &sp, len); } else if ((tunnel->version == 3) && (tunnel->sock->sk_family == AF_INET6)) { - struct ipv6_pinfo *np = inet6_sk(tunnel->sock); struct sockaddr_pppol2tpv3in6 sp; + len = sizeof(sp); memset(&sp, 0, len); sp.sa_family = AF_PPPOX; @@ -939,8 +939,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, sp.pppol2tp.d_session = session->peer_session_id; sp.pppol2tp.addr.sin6_family = AF_INET6; sp.pppol2tp.addr.sin6_port = inet->inet_dport; - memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr, - sizeof(np->daddr)); + memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr, + sizeof(tunnel->sock->sk_v6_daddr)); memcpy(uaddr, &sp, len); #endif } else if (tunnel->version == 3) { diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 5d8a3a3cd5a7..ef8a926752a9 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -200,7 +200,7 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, in->ifindex); if (sk) { int connected = (sk->sk_state == TCP_ESTABLISHED); - int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr); + int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr); /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 06df2b9110f5..3dd0e374bc2b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -370,7 +370,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) */ wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && sk->sk_state != TCP_TIME_WAIT && - ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); + ipv6_addr_any(&sk->sk_v6_rcv_saddr)); /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e7b2d4fe2b6a..f6334aa19151 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -426,20 +426,20 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = 0; - addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr; + addr->v6.sin6_addr = sk->sk_v6_rcv_saddr; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) { if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) { - inet6_sk(sk)->rcv_saddr.s6_addr32[0] = 0; - inet6_sk(sk)->rcv_saddr.s6_addr32[1] = 0; - inet6_sk(sk)->rcv_saddr.s6_addr32[2] = htonl(0x0000ffff); - inet6_sk(sk)->rcv_saddr.s6_addr32[3] = + sk->sk_v6_rcv_saddr.s6_addr32[0] = 0; + sk->sk_v6_rcv_saddr.s6_addr32[1] = 0; + sk->sk_v6_rcv_saddr.s6_addr32[2] = htonl(0x0000ffff); + sk->sk_v6_rcv_saddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { - inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr; + sk->sk_v6_rcv_saddr = addr->v6.sin6_addr; } } @@ -447,12 +447,12 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk) { if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) { - inet6_sk(sk)->daddr.s6_addr32[0] = 0; - inet6_sk(sk)->daddr.s6_addr32[1] = 0; - inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff); - inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; + sk->sk_v6_daddr.s6_addr32[0] = 0; + sk->sk_v6_daddr.s6_addr32[1] = 0; + sk->sk_v6_daddr.s6_addr32[2] = htonl(0x0000ffff); + sk->sk_v6_daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { - inet6_sk(sk)->daddr = addr->v6.sin6_addr; + sk->sk_v6_daddr = addr->v6.sin6_addr; } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 9c9caaa5e0d3..0045c7cf1e9e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -294,7 +294,7 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) case PF_INET6: len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", proto_name, - &inet6_sk(sk)->rcv_saddr, + &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); break; default: diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 8d8d97dbb389..80554fcf9fcc 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -304,12 +304,11 @@ static void dump_common_audit_data(struct audit_buffer *ab, } case AF_INET6: { struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *inet6 = inet6_sk(sk); - print_ipv6_addr(ab, &inet6->rcv_saddr, + print_ipv6_addr(ab, &sk->sk_v6_rcv_saddr, inet->inet_sport, "laddr", "lport"); - print_ipv6_addr(ab, &inet6->daddr, + print_ipv6_addr(ab, &sk->sk_v6_daddr, inet->inet_dport, "faddr", "fport"); break; -- cgit v1.2.3-55-g7522 From 795aa6ef6a1aba99050735eadd0c2341b789b53b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 09:21:55 +0200 Subject: netfilter: pass hook ops to hookfn Pass the hook ops to the hookfn to allow for generic hook functions. This change is required by nf_tables. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 3 +- net/bridge/br_netfilter.c | 22 +++++++++----- net/bridge/netfilter/ebtable_filter.c | 16 ++++++---- net/bridge/netfilter/ebtable_nat.c | 16 ++++++---- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/ipv4/netfilter/arptable_filter.c | 5 +-- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv4/netfilter/ipt_SYNPROXY.c | 2 +- net/ipv4/netfilter/iptable_filter.c | 7 +++-- net/ipv4/netfilter/iptable_mangle.c | 10 +++--- net/ipv4/netfilter/iptable_nat.c | 26 ++++++++-------- net/ipv4/netfilter/iptable_raw.c | 6 ++-- net/ipv4/netfilter/iptable_security.c | 7 +++-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 12 ++++---- net/ipv4/netfilter/nf_defrag_ipv4.c | 6 ++-- net/ipv6/netfilter/ip6t_SYNPROXY.c | 2 +- net/ipv6/netfilter/ip6table_filter.c | 5 +-- net/ipv6/netfilter/ip6table_mangle.c | 10 +++--- net/ipv6/netfilter/ip6table_nat.c | 27 +++++++++-------- net/ipv6/netfilter/ip6table_raw.c | 5 +-- net/ipv6/netfilter/ip6table_security.c | 5 +-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 14 +++++---- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 6 ++-- net/netfilter/core.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 42 +++++++++++++------------- security/selinux/hooks.c | 10 +++--- 26 files changed, 148 insertions(+), 122 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 61223c52414f..fef7e67f7101 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -42,7 +42,8 @@ int netfilter_init(void); struct sk_buff; -typedef unsigned int nf_hookfn(unsigned int hooknum, +struct nf_hook_ops; +typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index f87736270eaa..878f008afefa 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -619,7 +619,7 @@ bad: /* Replicate the checks that IPv6 does on packet reception and pass the packet * to ip6tables, which doesn't support NAT, so things are fairly simple. */ -static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, +static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -669,7 +669,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -691,7 +692,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); - return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); + return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn); } if (!brnf_call_iptables && !br->nf_call_iptables) @@ -727,7 +728,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -765,7 +767,8 @@ static int br_nf_forward_finish(struct sk_buff *skb) * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -818,7 +821,8 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } -static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -878,7 +882,8 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) #endif /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -923,7 +928,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, +static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 94b2b700cff8..bb2da7b706e7 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -60,17 +60,21 @@ static const struct ebt_table frame_filter = }; static unsigned int -ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(in)->xt.frame_filter); } static unsigned int -ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(out)->xt.frame_filter); } static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 322555acdd40..bd238f1f105b 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -60,17 +60,21 @@ static struct ebt_table frame_nat = }; static unsigned int -ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in - , const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(in)->xt.frame_nat); } static unsigned int -ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in - , const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(out)->xt.frame_nat); } static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 2a7efe388344..e83015cecfa7 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) } -static unsigned int dnrmg_hook(unsigned int hook, +static unsigned int dnrmg_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index a865f6f94013..802ddecb30b8 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -27,13 +27,14 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int -arptable_filter_hook(unsigned int hook, struct sk_buff *skb, +arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter); + return arpt_do_table(skb, ops->hooknum, in, out, + net->ipv4.arptable_filter); } static struct nf_hook_ops *arpfilter_ops __read_mostly; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 0b732efd32e2..a2e2b61cd7da 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -483,7 +483,7 @@ static void arp_print(struct arp_payload *payload) #endif static unsigned int -arp_mangle(unsigned int hook, +arp_mangle(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index b6346bf2fde3..01cffeaa0085 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -297,7 +297,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv4_synproxy_hook(unsigned int hooknum, +static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 50af5b45c050..e08a74a243a8 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -33,20 +33,21 @@ static const struct xt_table packet_filter = { }; static unsigned int -iptable_filter_hook(unsigned int hook, struct sk_buff *skb, +iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter); + return ipt_do_table(skb, ops->hooknum, in, out, + net->ipv4.iptable_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 0d8cd82e0fad..6a5079c34bb3 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -79,19 +79,19 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int -iptable_mangle_hook(unsigned int hook, +iptable_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (hook == NF_INET_LOCAL_OUT) + if (ops->hooknum == NF_INET_LOCAL_OUT) return ipt_mangle_out(skb, out); - if (hook == NF_INET_POST_ROUTING) - return ipt_do_table(skb, hook, in, out, + if (ops->hooknum == NF_INET_POST_ROUTING) + return ipt_do_table(skb, ops->hooknum, in, out, dev_net(out)->ipv4.iptable_mangle); /* PREROUTING/INPUT/FORWARD: */ - return ipt_do_table(skb, hook, in, out, + return ipt_do_table(skb, ops->hooknum, in, out, dev_net(in)->ipv4.iptable_mangle); } diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 683bfaffed65..ee2886126e3d 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -61,7 +61,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, } static unsigned int -nf_nat_ipv4_fn(unsigned int hooknum, +nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -71,7 +71,7 @@ nf_nat_ipv4_fn(unsigned int hooknum, enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ - enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); /* We never see fragments: conntrack defrags on pre-routing * and local-out, and nf_nat_out protects post-routing. @@ -108,7 +108,7 @@ nf_nat_ipv4_fn(unsigned int hooknum, case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, - hooknum)) + ops->hooknum)) return NF_DROP; else return NF_ACCEPT; @@ -121,14 +121,14 @@ nf_nat_ipv4_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } break; @@ -137,11 +137,11 @@ nf_nat_ipv4_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, hooknum, skb); + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); @@ -149,7 +149,7 @@ oif_changed: } static unsigned int -nf_nat_ipv4_in(unsigned int hooknum, +nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -158,7 +158,7 @@ nf_nat_ipv4_in(unsigned int hooknum, unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && daddr != ip_hdr(skb)->daddr) skb_dst_drop(skb); @@ -167,7 +167,7 @@ nf_nat_ipv4_in(unsigned int hooknum, } static unsigned int -nf_nat_ipv4_out(unsigned int hooknum, +nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -185,7 +185,7 @@ nf_nat_ipv4_out(unsigned int hooknum, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && @@ -207,7 +207,7 @@ nf_nat_ipv4_out(unsigned int hooknum, } static unsigned int -nf_nat_ipv4_local_fn(unsigned int hooknum, +nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -223,7 +223,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 1f82aea11df6..b2f7e8f98316 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -20,20 +20,20 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -iptable_raw_hook(unsigned int hook, struct sk_buff *skb, +iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw); + return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index f867a8d38bf7..c86647ed2078 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -37,21 +37,22 @@ static const struct xt_table security_table = { }; static unsigned int -iptable_security_hook(unsigned int hook, struct sk_buff *skb, +iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* Somebody is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security); + return ipt_do_table(skb, ops->hooknum, in, out, + net->ipv4.iptable_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 86f5b34a4ed1..ecd8bec411c9 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv4_helper(unsigned int hooknum, +static unsigned int ipv4_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -121,7 +121,7 @@ static unsigned int ipv4_helper(unsigned int hooknum, ct, ctinfo); } -static unsigned int ipv4_confirm(unsigned int hooknum, +static unsigned int ipv4_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -147,16 +147,16 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_in(unsigned int hooknum, +static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb); } -static unsigned int ipv4_conntrack_local(unsigned int hooknum, +static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -166,7 +166,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 742815518b0f..12e13bd82b5b 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -60,7 +60,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, return IP_DEFRAG_CONNTRACK_OUT + zone; } -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, +static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -83,7 +83,9 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #endif /* Gather fragments. */ if (ip_is_fragment(ip_hdr(skb))) { - enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); + enum ip_defrag_users user = + nf_ct_defrag_user(ops->hooknum, skb); + if (nf_ct_ipv4_gather_frags(skb, user)) return NF_STOLEN; } diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 2748b042da72..bf9f612c1bc2 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -312,7 +312,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv6_synproxy_hook(unsigned int hooknum, +static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 29b44b14c5ea..ca7f6c128086 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -32,13 +32,14 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_filter_hook(unsigned int hook, struct sk_buff *skb, +ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index c705907ae6ab..307bbb782d14 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -76,17 +76,17 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb, +ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (hook == NF_INET_LOCAL_OUT) + if (ops->hooknum == NF_INET_LOCAL_OUT) return ip6t_mangle_out(skb, out); - if (hook == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, hook, in, out, + if (ops->hooknum == NF_INET_POST_ROUTING) + return ip6t_do_table(skb, ops->hooknum, in, out, dev_net(out)->ipv6.ip6table_mangle); /* INPUT/FORWARD */ - return ip6t_do_table(skb, hook, in, out, + return ip6t_do_table(skb, ops->hooknum, in, out, dev_net(in)->ipv6.ip6table_mangle); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 9b076d2d3a7b..84c7f33d0cf8 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -63,7 +63,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, } static unsigned int -nf_nat_ipv6_fn(unsigned int hooknum, +nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -72,7 +72,7 @@ nf_nat_ipv6_fn(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; - enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); __be16 frag_off; int hdrlen; u8 nexthdr; @@ -111,7 +111,8 @@ nf_nat_ipv6_fn(unsigned int hooknum, if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - hooknum, hdrlen)) + ops->hooknum, + hdrlen)) return NF_DROP; else return NF_ACCEPT; @@ -124,14 +125,14 @@ nf_nat_ipv6_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } break; @@ -140,11 +141,11 @@ nf_nat_ipv6_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, hooknum, skb); + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); @@ -152,7 +153,7 @@ oif_changed: } static unsigned int -nf_nat_ipv6_in(unsigned int hooknum, +nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -161,7 +162,7 @@ nf_nat_ipv6_in(unsigned int hooknum, unsigned int ret; struct in6_addr daddr = ipv6_hdr(skb)->daddr; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); @@ -170,7 +171,7 @@ nf_nat_ipv6_in(unsigned int hooknum, } static unsigned int -nf_nat_ipv6_out(unsigned int hooknum, +nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -187,7 +188,7 @@ nf_nat_ipv6_out(unsigned int hooknum, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -209,7 +210,7 @@ nf_nat_ipv6_out(unsigned int hooknum, } static unsigned int -nf_nat_ipv6_local_fn(unsigned int hooknum, +nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -224,7 +225,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 9a626d86720f..5274740acecc 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -19,13 +19,14 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_raw_hook(unsigned int hook, struct sk_buff *skb, +ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index ce88d1d7e525..ab3b0219ecfa 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -36,14 +36,15 @@ static const struct xt_table security_table = { }; static unsigned int -ip6table_security_hook(unsigned int hook, struct sk_buff *skb, +ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 54b75ead5a69..486545eb42ce 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv6_helper(unsigned int hooknum, +static unsigned int ipv6_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -133,7 +133,7 @@ static unsigned int ipv6_helper(unsigned int hooknum, return helper->help(skb, protoff, ct, ctinfo); } -static unsigned int ipv6_confirm(unsigned int hooknum, +static unsigned int ipv6_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -219,16 +219,17 @@ static unsigned int __ipv6_conntrack_in(struct net *net, return nf_conntrack_in(net, PF_INET6, hooknum, skb); } -static unsigned int ipv6_conntrack_in(unsigned int hooknum, +static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out, + okfn); } -static unsigned int ipv6_conntrack_local(unsigned int hooknum, +static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -239,7 +240,8 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out, + okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index aacd121fe8c5..ec483aa3f60f 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -52,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, } -static unsigned int ipv6_defrag(unsigned int hooknum, +static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -66,7 +66,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); + reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb)); /* queued */ if (reasm == NULL) return NF_STOLEN; @@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, if (reasm == skb) return NF_ACCEPT; - nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, + nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in, (struct net_device *)out, okfn); return NF_STOLEN; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 593b16ea45e0..1fbab0cdd302 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -146,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head, /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn); + verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 74fd00c27210..34fda62f40f6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1239,11 +1239,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET); + return ip_vs_out(ops->hooknum, skb, AF_INET); } /* @@ -1251,11 +1251,11 @@ ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET); + return ip_vs_out(ops->hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1266,11 +1266,11 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET6); + return ip_vs_out(ops->hooknum, skb, AF_INET6); } /* @@ -1278,11 +1278,11 @@ ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET6); + return ip_vs_out(ops->hooknum, skb, AF_INET6); } #endif @@ -1733,12 +1733,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET); + return ip_vs_in(ops->hooknum, skb, AF_INET); } /* @@ -1746,11 +1746,11 @@ ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET); + return ip_vs_in(ops->hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1760,7 +1760,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, * Copy info from first fragment, to the rest of them. */ static unsigned int -ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -1792,12 +1792,12 @@ ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET6); + return ip_vs_in(ops->hooknum, skb, AF_INET6); } /* @@ -1805,11 +1805,11 @@ ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET6); + return ip_vs_in(ops->hooknum, skb, AF_INET6); } #endif @@ -1825,7 +1825,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, +ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { @@ -1842,12 +1842,12 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp(skb, &r, hooknum); + return ip_vs_in_icmp(skb, &r, ops->hooknum); } #ifdef CONFIG_IP_VS_IPV6 static unsigned int -ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { @@ -1866,7 +1866,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); + return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr); } #endif diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 568c7699abf1..3f224d7795f5 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4668,7 +4668,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, return NF_ACCEPT; } -static unsigned int selinux_ipv4_forward(unsigned int hooknum, +static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4678,7 +4678,7 @@ static unsigned int selinux_ipv4_forward(unsigned int hooknum, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static unsigned int selinux_ipv6_forward(unsigned int hooknum, +static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4710,7 +4710,7 @@ static unsigned int selinux_ip_output(struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int selinux_ipv4_output(unsigned int hooknum, +static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4837,7 +4837,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, return NF_ACCEPT; } -static unsigned int selinux_ipv4_postroute(unsigned int hooknum, +static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4847,7 +4847,7 @@ static unsigned int selinux_ipv4_postroute(unsigned int hooknum, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static unsigned int selinux_ipv6_postroute(unsigned int hooknum, +static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, -- cgit v1.2.3-55-g7522 From f59cb0453cd885736daa11ae2445982c5ab2fc83 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 10:57:04 +0200 Subject: netfilter: nf_nat: move alloc_null_binding to nf_nat_core.c Similar to nat_decode_session, alloc_null_binding is needed for both ip_tables and nf_tables, so move it to nf_nat_core.c. This change is required by nf_tables. This is an adapted version of the original patch from Patrick McHardy. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat.h | 3 +++ net/netfilter/nf_nat_core.c | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index c29b4e545f87..07eaaf604092 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -45,6 +45,9 @@ unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype); +extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, + unsigned int hooknum); + /* Is this tuple already taken? (not by us)*/ int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6f0f4f7f68a5..63a815402211 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -432,6 +432,26 @@ nf_nat_setup_info(struct nf_conn *ct, } EXPORT_SYMBOL(nf_nat_setup_info); +unsigned int +nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + * Use reply in case it's already been mangled (eg local packet). + */ + union nf_inet_addr ip = + (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); + struct nf_nat_range range = { + .flags = NF_NAT_RANGE_MAP_IPS, + .min_addr = ip, + .max_addr = ip, + }; + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} +EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); + /* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, -- cgit v1.2.3-55-g7522 From 96518518cc417bb0a8c80b9fb736202e28acdf96 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Oct 2013 11:00:02 +0200 Subject: netfilter: add nftables This patch adds nftables which is the intended successor of iptables. This packet filtering framework reuses the existing netfilter hooks, the connection tracking system, the NAT subsystem, the transparent proxying engine, the logging infrastructure and the userspace packet queueing facilities. In a nutshell, nftables provides a pseudo-state machine with 4 general purpose registers of 128 bits and 1 specific purpose register to store verdicts. This pseudo-machine comes with an extensible instruction set, a.k.a. "expressions" in the nftables jargon. The expressions included in this patch provide the basic functionality, they are: * bitwise: to perform bitwise operations. * byteorder: to change from host/network endianess. * cmp: to compare data with the content of the registers. * counter: to enable counters on rules. * ct: to store conntrack keys into register. * exthdr: to match IPv6 extension headers. * immediate: to load data into registers. * limit: to limit matching based on packet rate. * log: to log packets. * meta: to match metainformation that usually comes with the skbuff. * nat: to perform Network Address Translation. * payload: to fetch data from the packet payload and store it into registers. * reject (IPv4 only): to explicitly close connection, eg. TCP RST. Using this instruction-set, the userspace utility 'nft' can transform the rules expressed in human-readable text representation (using a new syntax, inspired by tcpdump) to nftables bytecode. nftables also inherits the table, chain and rule objects from iptables, but in a more configurable way, and it also includes the original datatype-agnostic set infrastructure with mapping support. This set infrastructure is enhanced in the follow up patch (netfilter: nf_tables: add netlink set API). This patch includes the following components: * the netlink API: net/netfilter/nf_tables_api.c and include/uapi/netfilter/nf_tables.h * the packet filter core: net/netfilter/nf_tables_core.c * the expressions (described above): net/netfilter/nft_*.c * the filter tables: arp, IPv4, IPv6 and bridge: net/ipv4/netfilter/nf_tables_ipv4.c net/ipv6/netfilter/nf_tables_ipv6.c net/ipv4/netfilter/nf_tables_arp.c net/bridge/netfilter/nf_tables_bridge.c * the NAT table (IPv4 only): net/ipv4/netfilter/nf_table_nat_ipv4.c * the route table (similar to mangle): net/ipv4/netfilter/nf_table_route_ipv4.c net/ipv6/netfilter/nf_table_route_ipv6.c * internal definitions under: include/net/netfilter/nf_tables.h include/net/netfilter/nf_tables_core.h * It also includes an skeleton expression: net/netfilter/nft_expr_template.c and the preliminary implementation of the meta target net/netfilter/nft_meta_target.c It also includes a change in struct nf_hook_ops to add a new pointer to store private data to the hook, that is used to store the rule list per chain. This patch is based on the patch from Patrick McHardy, plus merged accumulated cleanups, fixes and small enhancements to the nftables code that has been done since 2009, which are: From Patrick McHardy: * nf_tables: adjust netlink handler function signatures * nf_tables: only retry table lookup after successful table module load * nf_tables: fix event notification echo and avoid unnecessary messages * nft_ct: add l3proto support * nf_tables: pass expression context to nft_validate_data_load() * nf_tables: remove redundant definition * nft_ct: fix maxattr initialization * nf_tables: fix invalid event type in nf_tables_getrule() * nf_tables: simplify nft_data_init() usage * nf_tables: build in more core modules * nf_tables: fix double lookup expression unregistation * nf_tables: move expression initialization to nf_tables_core.c * nf_tables: build in payload module * nf_tables: use NFPROTO constants * nf_tables: rename pid variables to portid * nf_tables: save 48 bits per rule * nf_tables: introduce chain rename * nf_tables: check for duplicate names on chain rename * nf_tables: remove ability to specify handles for new rules * nf_tables: return error for rule change request * nf_tables: return error for NLM_F_REPLACE without rule handle * nf_tables: include NLM_F_APPEND/NLM_F_REPLACE flags in rule notification * nf_tables: fix NLM_F_MULTI usage in netlink notifications * nf_tables: include NLM_F_APPEND in rule dumps From Pablo Neira Ayuso: * nf_tables: fix stack overflow in nf_tables_newrule * nf_tables: nft_ct: fix compilation warning * nf_tables: nft_ct: fix crash with invalid packets * nft_log: group and qthreshold are 2^16 * nf_tables: nft_meta: fix socket uid,gid handling * nft_counter: allow to restore counters * nf_tables: fix module autoload * nf_tables: allow to remove all rules placed in one chain * nf_tables: use 64-bits rule handle instead of 16-bits * nf_tables: fix chain after rule deletion * nf_tables: improve deletion performance * nf_tables: add missing code in route chain type * nf_tables: rise maximum number of expressions from 12 to 128 * nf_tables: don't delete table if in use * nf_tables: fix basechain release From Tomasz Bursztyka: * nf_tables: Add support for changing users chain's name * nf_tables: Change chain's name to be fixed sized * nf_tables: Add support for replacing a rule by another one * nf_tables: Update uapi nftables netlink header documentation From Florian Westphal: * nft_log: group is u16, snaplen u32 From Phil Oester: * nf_tables: operational limit match Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 11 +- include/net/netfilter/nf_tables.h | 301 ++++ include/net/netfilter/nf_tables_core.h | 25 + include/uapi/linux/netfilter/Kbuild | 1 + include/uapi/linux/netfilter/nf_conntrack_common.h | 4 + include/uapi/linux/netfilter/nf_tables.h | 582 +++++++ include/uapi/linux/netfilter/nfnetlink.h | 5 +- net/bridge/netfilter/Kconfig | 3 + net/bridge/netfilter/Makefile | 2 + net/bridge/netfilter/nf_tables_bridge.c | 37 + net/ipv4/netfilter/Kconfig | 16 + net/ipv4/netfilter/Makefile | 5 + net/ipv4/netfilter/nf_table_nat_ipv4.c | 409 +++++ net/ipv4/netfilter/nf_table_route_ipv4.c | 97 ++ net/ipv4/netfilter/nf_tables_ipv4.c | 59 + net/ipv4/netfilter/nft_reject_ipv4.c | 117 ++ net/ipv6/netfilter/Kconfig | 8 + net/ipv6/netfilter/Makefile | 4 + net/ipv6/netfilter/nf_table_route_ipv6.c | 93 ++ net/ipv6/netfilter/nf_tables_ipv6.c | 57 + net/netfilter/Kconfig | 37 + net/netfilter/Makefile | 16 + net/netfilter/nf_tables_api.c | 1760 ++++++++++++++++++++ net/netfilter/nf_tables_core.c | 152 ++ net/netfilter/nft_bitwise.c | 140 ++ net/netfilter/nft_byteorder.c | 167 ++ net/netfilter/nft_cmp.c | 146 ++ net/netfilter/nft_counter.c | 107 ++ net/netfilter/nft_ct.c | 252 +++ net/netfilter/nft_expr_template.c | 88 + net/netfilter/nft_exthdr.c | 127 ++ net/netfilter/nft_hash.c | 348 ++++ net/netfilter/nft_immediate.c | 113 ++ net/netfilter/nft_limit.c | 113 ++ net/netfilter/nft_log.c | 140 ++ net/netfilter/nft_meta.c | 222 +++ net/netfilter/nft_meta_target.c | 117 ++ net/netfilter/nft_payload.c | 137 ++ net/netfilter/nft_set.c | 381 +++++ 39 files changed, 6393 insertions(+), 6 deletions(-) create mode 100644 include/net/netfilter/nf_tables.h create mode 100644 include/net/netfilter/nf_tables_core.h create mode 100644 include/uapi/linux/netfilter/nf_tables.h create mode 100644 net/bridge/netfilter/nf_tables_bridge.c create mode 100644 net/ipv4/netfilter/nf_table_nat_ipv4.c create mode 100644 net/ipv4/netfilter/nf_table_route_ipv4.c create mode 100644 net/ipv4/netfilter/nf_tables_ipv4.c create mode 100644 net/ipv4/netfilter/nft_reject_ipv4.c create mode 100644 net/ipv6/netfilter/nf_table_route_ipv6.c create mode 100644 net/ipv6/netfilter/nf_tables_ipv6.c create mode 100644 net/netfilter/nf_tables_api.c create mode 100644 net/netfilter/nf_tables_core.c create mode 100644 net/netfilter/nft_bitwise.c create mode 100644 net/netfilter/nft_byteorder.c create mode 100644 net/netfilter/nft_cmp.c create mode 100644 net/netfilter/nft_counter.c create mode 100644 net/netfilter/nft_ct.c create mode 100644 net/netfilter/nft_expr_template.c create mode 100644 net/netfilter/nft_exthdr.c create mode 100644 net/netfilter/nft_hash.c create mode 100644 net/netfilter/nft_immediate.c create mode 100644 net/netfilter/nft_limit.c create mode 100644 net/netfilter/nft_log.c create mode 100644 net/netfilter/nft_meta.c create mode 100644 net/netfilter/nft_meta_target.c create mode 100644 net/netfilter/nft_payload.c create mode 100644 net/netfilter/nft_set.c (limited to 'net/netfilter') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index fef7e67f7101..2077489f9887 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -53,12 +53,13 @@ struct nf_hook_ops { struct list_head list; /* User fills in from here down. */ - nf_hookfn *hook; - struct module *owner; - u_int8_t pf; - unsigned int hooknum; + nf_hookfn *hook; + struct module *owner; + void *priv; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ - int priority; + int priority; }; struct nf_sockopt_ops { diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h new file mode 100644 index 000000000000..d26dfa345f49 --- /dev/null +++ b/include/net/netfilter/nf_tables.h @@ -0,0 +1,301 @@ +#ifndef _NET_NF_TABLES_H +#define _NET_NF_TABLES_H + +#include +#include +#include +#include + +struct nft_pktinfo { + struct sk_buff *skb; + const struct net_device *in; + const struct net_device *out; + u8 hooknum; + u8 nhoff; + u8 thoff; +}; + +struct nft_data { + union { + u32 data[4]; + struct { + u32 verdict; + struct nft_chain *chain; + }; + }; +} __attribute__((aligned(__alignof__(u64)))); + +static inline int nft_data_cmp(const struct nft_data *d1, + const struct nft_data *d2, + unsigned int len) +{ + return memcmp(d1->data, d2->data, len); +} + +static inline void nft_data_copy(struct nft_data *dst, + const struct nft_data *src) +{ + BUILD_BUG_ON(__alignof__(*dst) != __alignof__(u64)); + *(u64 *)&dst->data[0] = *(u64 *)&src->data[0]; + *(u64 *)&dst->data[2] = *(u64 *)&src->data[2]; +} + +static inline void nft_data_debug(const struct nft_data *data) +{ + pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n", + data->data[0], data->data[1], + data->data[2], data->data[3]); +} + +/** + * struct nft_ctx - nf_tables rule context + * + * @afi: address family info + * @table: the table the chain is contained in + * @chain: the chain the rule is contained in + */ +struct nft_ctx { + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; +}; + +enum nft_data_types { + NFT_DATA_VALUE, + NFT_DATA_VERDICT, +}; + +struct nft_data_desc { + enum nft_data_types type; + unsigned int len; +}; + +extern int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla); +extern void nft_data_uninit(const struct nft_data *data, + enum nft_data_types type); +extern int nft_data_dump(struct sk_buff *skb, int attr, + const struct nft_data *data, + enum nft_data_types type, unsigned int len); + +static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg) +{ + return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; +} + +extern int nft_validate_input_register(enum nft_registers reg); +extern int nft_validate_output_register(enum nft_registers reg); +extern int nft_validate_data_load(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type); + +/** + * struct nft_expr_ops - nf_tables expression operations + * + * @eval: Expression evaluation function + * @init: initialization function + * @destroy: destruction function + * @dump: function to dump parameters + * @list: used internally + * @name: Identifier + * @owner: module reference + * @policy: netlink attribute policy + * @maxattr: highest netlink attribute number + * @size: full expression size, including private data size + */ +struct nft_expr; +struct nft_expr_ops { + void (*eval)(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt); + int (*init)(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + void (*destroy)(const struct nft_expr *expr); + int (*dump)(struct sk_buff *skb, + const struct nft_expr *expr); + + struct list_head list; + const char *name; + struct module *owner; + const struct nla_policy *policy; + unsigned int maxattr; + unsigned int size; +}; + +#define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \ + ALIGN(size, __alignof__(struct nft_expr))) + +/** + * struct nft_expr - nf_tables expression + * + * @ops: expression ops + * @data: expression private data + */ +struct nft_expr { + const struct nft_expr_ops *ops; + unsigned char data[]; +}; + +static inline void *nft_expr_priv(const struct nft_expr *expr) +{ + return (void *)expr->data; +} + +/** + * struct nft_rule - nf_tables rule + * + * @list: used internally + * @rcu_head: used internally for rcu + * @handle: rule handle + * @dlen: length of expression data + * @data: expression data + */ +struct nft_rule { + struct list_head list; + struct rcu_head rcu_head; + u64 handle:48, + dlen:16; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) +{ + return (struct nft_expr *)&rule->data[0]; +} + +static inline struct nft_expr *nft_expr_next(const struct nft_expr *expr) +{ + return ((void *)expr) + expr->ops->size; +} + +static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) +{ + return (struct nft_expr *)&rule->data[rule->dlen]; +} + +/* + * The last pointer isn't really necessary, but the compiler isn't able to + * determine that the result of nft_expr_last() is always the same since it + * can't assume that the dlen value wasn't changed within calls in the loop. + */ +#define nft_rule_for_each_expr(expr, last, rule) \ + for ((expr) = nft_expr_first(rule), (last) = nft_expr_last(rule); \ + (expr) != (last); \ + (expr) = nft_expr_next(expr)) + +enum nft_chain_flags { + NFT_BASE_CHAIN = 0x1, + NFT_CHAIN_BUILTIN = 0x2, +}; + +/** + * struct nft_chain - nf_tables chain + * + * @rules: list of rules in the chain + * @list: used internally + * @rcu_head: used internally + * @handle: chain handle + * @flags: bitmask of enum nft_chain_flags + * @use: number of jump references to this chain + * @level: length of longest path to this chain + * @name: name of the chain + */ +struct nft_chain { + struct list_head rules; + struct list_head list; + struct rcu_head rcu_head; + u64 handle; + u8 flags; + u16 use; + u16 level; + char name[NFT_CHAIN_MAXNAMELEN]; +}; + +/** + * struct nft_base_chain - nf_tables base chain + * + * @ops: netfilter hook ops + * @chain: the chain + */ +struct nft_base_chain { + struct nf_hook_ops ops; + struct nft_chain chain; +}; + +static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain) +{ + return container_of(chain, struct nft_base_chain, chain); +} + +extern unsigned int nft_do_chain(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); + +enum nft_table_flags { + NFT_TABLE_BUILTIN = 0x1, +}; + +/** + * struct nft_table - nf_tables table + * + * @list: used internally + * @chains: chains in the table + * @sets: sets in the table + * @hgenerator: handle generator state + * @use: number of chain references to this table + * @flags: table flag (see enum nft_table_flags) + * @name: name of the table + */ +struct nft_table { + struct list_head list; + struct list_head chains; + struct list_head sets; + u64 hgenerator; + u32 use; + u16 flags; + char name[]; +}; + +/** + * struct nft_af_info - nf_tables address family info + * + * @list: used internally + * @family: address family + * @nhooks: number of hooks in this family + * @owner: module owner + * @tables: used internally + * @hooks: hookfn overrides for packet validation + */ +struct nft_af_info { + struct list_head list; + int family; + unsigned int nhooks; + struct module *owner; + struct list_head tables; + nf_hookfn *hooks[NF_MAX_HOOKS]; +}; + +extern int nft_register_afinfo(struct nft_af_info *); +extern void nft_unregister_afinfo(struct nft_af_info *); + +extern int nft_register_table(struct nft_table *, int family); +extern void nft_unregister_table(struct nft_table *, int family); + +extern int nft_register_expr(struct nft_expr_ops *); +extern void nft_unregister_expr(struct nft_expr_ops *); + +#define MODULE_ALIAS_NFT_FAMILY(family) \ + MODULE_ALIAS("nft-afinfo-" __stringify(family)) + +#define MODULE_ALIAS_NFT_TABLE(family, name) \ + MODULE_ALIAS("nft-table-" __stringify(family) "-" name) + +#define MODULE_ALIAS_NFT_EXPR(name) \ + MODULE_ALIAS("nft-expr-" name) + +#endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h new file mode 100644 index 000000000000..283396c916e0 --- /dev/null +++ b/include/net/netfilter/nf_tables_core.h @@ -0,0 +1,25 @@ +#ifndef _NET_NF_TABLES_CORE_H +#define _NET_NF_TABLES_CORE_H + +extern int nf_tables_core_module_init(void); +extern void nf_tables_core_module_exit(void); + +extern int nft_immediate_module_init(void); +extern void nft_immediate_module_exit(void); + +extern int nft_cmp_module_init(void); +extern void nft_cmp_module_exit(void); + +extern int nft_lookup_module_init(void); +extern void nft_lookup_module_exit(void); + +extern int nft_bitwise_module_init(void); +extern void nft_bitwise_module_exit(void); + +extern int nft_byteorder_module_init(void); +extern void nft_byteorder_module_exit(void); + +extern int nft_payload_module_init(void); +extern void nft_payload_module_exit(void); + +#endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 174915420d3f..6ce0b7f566a7 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h +header-y += nf_tables.h header-y += nf_nat.h header-y += nfnetlink.h header-y += nfnetlink_acct.h diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 8dd803818ebe..319f47128db8 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -25,6 +25,10 @@ enum ip_conntrack_info { IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 }; +#define NF_CT_STATE_INVALID_BIT (1 << 0) +#define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) + /* Bitset representing status of connection. */ enum ip_conntrack_status { /* It's an expected connection: bit 0 set. This bit never changed */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h new file mode 100644 index 000000000000..ec6d84a8ed1e --- /dev/null +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -0,0 +1,582 @@ +#ifndef _LINUX_NF_TABLES_H +#define _LINUX_NF_TABLES_H + +#define NFT_CHAIN_MAXNAMELEN 32 + +enum nft_registers { + NFT_REG_VERDICT, + NFT_REG_1, + NFT_REG_2, + NFT_REG_3, + NFT_REG_4, + __NFT_REG_MAX +}; +#define NFT_REG_MAX (__NFT_REG_MAX - 1) + +/** + * enum nft_verdicts - nf_tables internal verdicts + * + * @NFT_CONTINUE: continue evaluation of the current rule + * @NFT_BREAK: terminate evaluation of the current rule + * @NFT_JUMP: push the current chain on the jump stack and jump to a chain + * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack + * @NFT_RETURN: return to the topmost chain on the jump stack + * + * The nf_tables verdicts share their numeric space with the netfilter verdicts. + */ +enum nft_verdicts { + NFT_CONTINUE = -1, + NFT_BREAK = -2, + NFT_JUMP = -3, + NFT_GOTO = -4, + NFT_RETURN = -5, +}; + +/** + * enum nf_tables_msg_types - nf_tables netlink message types + * + * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes) + * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes) + * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes) + * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes) + * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes) + * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes) + * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes) + * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes) + * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes) + */ +enum nf_tables_msg_types { + NFT_MSG_NEWTABLE, + NFT_MSG_GETTABLE, + NFT_MSG_DELTABLE, + NFT_MSG_NEWCHAIN, + NFT_MSG_GETCHAIN, + NFT_MSG_DELCHAIN, + NFT_MSG_NEWRULE, + NFT_MSG_GETRULE, + NFT_MSG_DELRULE, + NFT_MSG_MAX, +}; + +enum nft_list_attributes { + NFTA_LIST_UNPEC, + NFTA_LIST_ELEM, + __NFTA_LIST_MAX +}; +#define NFTA_LIST_MAX (__NFTA_LIST_MAX - 1) + +/** + * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes + * + * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) + * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + */ +enum nft_hook_attributes { + NFTA_HOOK_UNSPEC, + NFTA_HOOK_HOOKNUM, + NFTA_HOOK_PRIORITY, + __NFTA_HOOK_MAX +}; +#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) + +/** + * enum nft_table_attributes - nf_tables table netlink attributes + * + * @NFTA_TABLE_NAME: name of the table (NLA_STRING) + */ +enum nft_table_attributes { + NFTA_TABLE_UNSPEC, + NFTA_TABLE_NAME, + __NFTA_TABLE_MAX +}; +#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) + +/** + * enum nft_chain_attributes - nf_tables chain netlink attributes + * + * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING) + * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) + * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) + * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + */ +enum nft_chain_attributes { + NFTA_CHAIN_UNSPEC, + NFTA_CHAIN_TABLE, + NFTA_CHAIN_HANDLE, + NFTA_CHAIN_NAME, + NFTA_CHAIN_HOOK, + __NFTA_CHAIN_MAX +}; +#define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) + +/** + * enum nft_rule_attributes - nf_tables rule netlink attributes + * + * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING) + * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING) + * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) + */ +enum nft_rule_attributes { + NFTA_RULE_UNSPEC, + NFTA_RULE_TABLE, + NFTA_RULE_CHAIN, + NFTA_RULE_HANDLE, + NFTA_RULE_EXPRESSIONS, + __NFTA_RULE_MAX +}; +#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) + +enum nft_data_attributes { + NFTA_DATA_UNSPEC, + NFTA_DATA_VALUE, + NFTA_DATA_VERDICT, + __NFTA_DATA_MAX +}; +#define NFTA_DATA_MAX (__NFTA_DATA_MAX - 1) + +/** + * enum nft_verdict_attributes - nf_tables verdict netlink attributes + * + * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts) + * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING) + */ +enum nft_verdict_attributes { + NFTA_VERDICT_UNSPEC, + NFTA_VERDICT_CODE, + NFTA_VERDICT_CHAIN, + __NFTA_VERDICT_MAX +}; +#define NFTA_VERDICT_MAX (__NFTA_VERDICT_MAX - 1) + +/** + * enum nft_expr_attributes - nf_tables expression netlink attributes + * + * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING) + * @NFTA_EXPR_DATA: type specific data (NLA_NESTED) + */ +enum nft_expr_attributes { + NFTA_EXPR_UNSPEC, + NFTA_EXPR_NAME, + NFTA_EXPR_DATA, + __NFTA_EXPR_MAX +}; +#define NFTA_EXPR_MAX (__NFTA_EXPR_MAX - 1) + +/** + * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes + * + * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32) + * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes) + */ +enum nft_immediate_attributes { + NFTA_IMMEDIATE_UNSPEC, + NFTA_IMMEDIATE_DREG, + NFTA_IMMEDIATE_DATA, + __NFTA_IMMEDIATE_MAX +}; +#define NFTA_IMMEDIATE_MAX (__NFTA_IMMEDIATE_MAX - 1) + +/** + * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes + * + * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BITWISE_LEN: length of operands (NLA_U32) + * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes) + * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes) + * + * The bitwise expression performs the following operation: + * + * dreg = (sreg & mask) ^ xor + * + * which allow to express all bitwise operations: + * + * mask xor + * NOT: 1 1 + * OR: 0 x + * XOR: 1 x + * AND: x 0 + */ +enum nft_bitwise_attributes { + NFTA_BITWISE_UNSPEC, + NFTA_BITWISE_SREG, + NFTA_BITWISE_DREG, + NFTA_BITWISE_LEN, + NFTA_BITWISE_MASK, + NFTA_BITWISE_XOR, + __NFTA_BITWISE_MAX +}; +#define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1) + +/** + * enum nft_byteorder_ops - nf_tables byteorder operators + * + * @NFT_BYTEORDER_NTOH: network to host operator + * @NFT_BYTEORDER_HTON: host to network opertaor + */ +enum nft_byteorder_ops { + NFT_BYTEORDER_NTOH, + NFT_BYTEORDER_HTON, +}; + +/** + * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes + * + * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops) + * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32) + * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4) + */ +enum nft_byteorder_attributes { + NFTA_BYTEORDER_UNSPEC, + NFTA_BYTEORDER_SREG, + NFTA_BYTEORDER_DREG, + NFTA_BYTEORDER_OP, + NFTA_BYTEORDER_LEN, + NFTA_BYTEORDER_SIZE, + __NFTA_BYTEORDER_MAX +}; +#define NFTA_BYTEORDER_MAX (__NFTA_BYTEORDER_MAX - 1) + +/** + * enum nft_cmp_ops - nf_tables relational operator + * + * @NFT_CMP_EQ: equal + * @NFT_CMP_NEQ: not equal + * @NFT_CMP_LT: less than + * @NFT_CMP_LTE: less than or equal to + * @NFT_CMP_GT: greater than + * @NFT_CMP_GTE: greater than or equal to + */ +enum nft_cmp_ops { + NFT_CMP_EQ, + NFT_CMP_NEQ, + NFT_CMP_LT, + NFT_CMP_LTE, + NFT_CMP_GT, + NFT_CMP_GTE, +}; + +/** + * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes + * + * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes) + */ +enum nft_cmp_attributes { + NFTA_CMP_UNSPEC, + NFTA_CMP_SREG, + NFTA_CMP_OP, + NFTA_CMP_DATA, + __NFTA_CMP_MAX +}; +#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) + +enum nft_set_elem_flags { + NFT_SE_INTERVAL_END = 0x1, +}; + +enum nft_set_elem_attributes { + NFTA_SE_UNSPEC, + NFTA_SE_KEY, + NFTA_SE_DATA, + NFTA_SE_FLAGS, + __NFTA_SE_MAX +}; +#define NFTA_SE_MAX (__NFTA_SE_MAX - 1) + +enum nft_set_flags { + NFT_SET_INTERVAL = 0x1, + NFT_SET_MAP = 0x2, +}; + +enum nft_set_attributes { + NFTA_SET_UNSPEC, + NFTA_SET_FLAGS, + NFTA_SET_SREG, + NFTA_SET_DREG, + NFTA_SET_KLEN, + NFTA_SET_DLEN, + NFTA_SET_ELEMENTS, + __NFTA_SET_MAX +}; +#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) + +enum nft_hash_flags { + NFT_HASH_MAP = 0x1, +}; + +enum nft_hash_elem_attributes { + NFTA_HE_UNSPEC, + NFTA_HE_KEY, + NFTA_HE_DATA, + __NFTA_HE_MAX +}; +#define NFTA_HE_MAX (__NFTA_HE_MAX - 1) + +enum nft_hash_attributes { + NFTA_HASH_UNSPEC, + NFTA_HASH_FLAGS, + NFTA_HASH_SREG, + NFTA_HASH_DREG, + NFTA_HASH_KLEN, + NFTA_HASH_ELEMENTS, + __NFTA_HASH_MAX +}; +#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) + +/** + * enum nft_payload_bases - nf_tables payload expression offset bases + * + * @NFT_PAYLOAD_LL_HEADER: link layer header + * @NFT_PAYLOAD_NETWORK_HEADER: network header + * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header + */ +enum nft_payload_bases { + NFT_PAYLOAD_LL_HEADER, + NFT_PAYLOAD_NETWORK_HEADER, + NFT_PAYLOAD_TRANSPORT_HEADER, +}; + +/** + * enum nft_payload_attributes - nf_tables payload expression netlink attributes + * + * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases) + * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_LEN: payload length (NLA_U32) + */ +enum nft_payload_attributes { + NFTA_PAYLOAD_UNSPEC, + NFTA_PAYLOAD_DREG, + NFTA_PAYLOAD_BASE, + NFTA_PAYLOAD_OFFSET, + NFTA_PAYLOAD_LEN, + __NFTA_PAYLOAD_MAX +}; +#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) + +/** + * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes + * + * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8) + * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32) + * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) + */ +enum nft_exthdr_attributes { + NFTA_EXTHDR_UNSPEC, + NFTA_EXTHDR_DREG, + NFTA_EXTHDR_TYPE, + NFTA_EXTHDR_OFFSET, + NFTA_EXTHDR_LEN, + __NFTA_EXTHDR_MAX +}; +#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1) + +/** + * enum nft_meta_keys - nf_tables meta expression keys + * + * @NFT_META_LEN: packet length (skb->len) + * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT + * @NFT_META_PRIORITY: packet priority (skb->priority) + * @NFT_META_MARK: packet mark (skb->mark) + * @NFT_META_IIF: packet input interface index (dev->ifindex) + * @NFT_META_OIF: packet output interface index (dev->ifindex) + * @NFT_META_IIFNAME: packet input interface name (dev->name) + * @NFT_META_OIFNAME: packet output interface name (dev->name) + * @NFT_META_IIFTYPE: packet input interface type (dev->type) + * @NFT_META_OIFTYPE: packet output interface type (dev->type) + * @NFT_META_SKUID: originating socket UID (fsuid) + * @NFT_META_SKGID: originating socket GID (fsgid) + * @NFT_META_NFTRACE: packet nftrace bit + * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_META_SECMARK: packet secmark (skb->secmark) + */ +enum nft_meta_keys { + NFT_META_LEN, + NFT_META_PROTOCOL, + NFT_META_PRIORITY, + NFT_META_MARK, + NFT_META_IIF, + NFT_META_OIF, + NFT_META_IIFNAME, + NFT_META_OIFNAME, + NFT_META_IIFTYPE, + NFT_META_OIFTYPE, + NFT_META_SKUID, + NFT_META_SKGID, + NFT_META_NFTRACE, + NFT_META_RTCLASSID, + NFT_META_SECMARK, +}; + +/** + * enum nft_meta_attributes - nf_tables meta expression netlink attributes + * + * @NFTA_META_DREG: destination register (NLA_U32) + * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys) + */ +enum nft_meta_attributes { + NFTA_META_UNSPEC, + NFTA_META_DREG, + NFTA_META_KEY, + __NFTA_META_MAX +}; +#define NFTA_META_MAX (__NFTA_META_MAX - 1) + +/** + * enum nft_ct_keys - nf_tables ct expression keys + * + * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info) + * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir) + * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status) + * @NFT_CT_MARK: conntrack mark value + * @NFT_CT_SECMARK: conntrack secmark value + * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms + * @NFT_CT_HELPER: connection tracking helper assigned to conntrack + * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol + * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address) + * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address) + * @NFT_CT_PROTOCOL: conntrack layer 4 protocol + * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source + * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination + */ +enum nft_ct_keys { + NFT_CT_STATE, + NFT_CT_DIRECTION, + NFT_CT_STATUS, + NFT_CT_MARK, + NFT_CT_SECMARK, + NFT_CT_EXPIRATION, + NFT_CT_HELPER, + NFT_CT_L3PROTOCOL, + NFT_CT_SRC, + NFT_CT_DST, + NFT_CT_PROTOCOL, + NFT_CT_PROTO_SRC, + NFT_CT_PROTO_DST, +}; + +/** + * enum nft_ct_attributes - nf_tables ct expression netlink attributes + * + * @NFTA_CT_DREG: destination register (NLA_U32) + * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys) + * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8) + */ +enum nft_ct_attributes { + NFTA_CT_UNSPEC, + NFTA_CT_DREG, + NFTA_CT_KEY, + NFTA_CT_DIRECTION, + __NFTA_CT_MAX +}; +#define NFTA_CT_MAX (__NFTA_CT_MAX - 1) + +/** + * enum nft_limit_attributes - nf_tables limit expression netlink attributes + * + * @NFTA_LIMIT_RATE: refill rate (NLA_U64) + * @NFTA_LIMIT_UNIT: refill unit (NLA_U64) + */ +enum nft_limit_attributes { + NFTA_LIMIT_UNSPEC, + NFTA_LIMIT_RATE, + NFTA_LIMIT_UNIT, + __NFTA_LIMIT_MAX +}; +#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) + +/** + * enum nft_counter_attributes - nf_tables counter expression netlink attributes + * + * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64) + * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64) + */ +enum nft_counter_attributes { + NFTA_COUNTER_UNSPEC, + NFTA_COUNTER_BYTES, + NFTA_COUNTER_PACKETS, + __NFTA_COUNTER_MAX +}; +#define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1) + +/** + * enum nft_log_attributes - nf_tables log expression netlink attributes + * + * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32) + * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING) + * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32) + * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32) + */ +enum nft_log_attributes { + NFTA_LOG_UNSPEC, + NFTA_LOG_GROUP, + NFTA_LOG_PREFIX, + NFTA_LOG_SNAPLEN, + NFTA_LOG_QTHRESHOLD, + __NFTA_LOG_MAX +}; +#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) + +/** + * enum nft_reject_types - nf_tables reject expression reject types + * + * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable + * @NFT_REJECT_TCP_RST: reject using TCP RST + */ +enum nft_reject_types { + NFT_REJECT_ICMP_UNREACH, + NFT_REJECT_TCP_RST, +}; + +/** + * enum nft_reject_attributes - nf_tables reject expression netlink attributes + * + * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types) + * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8) + */ +enum nft_reject_attributes { + NFTA_REJECT_UNSPEC, + NFTA_REJECT_TYPE, + NFTA_REJECT_ICMP_CODE, + __NFTA_REJECT_MAX +}; +#define NFTA_REJECT_MAX (__NFTA_REJECT_MAX - 1) + +/** + * enum nft_nat_types - nf_tables nat expression NAT types + * + * @NFT_NAT_SNAT: source NAT + * @NFT_NAT_DNAT: destination NAT + */ +enum nft_nat_types { + NFT_NAT_SNAT, + NFT_NAT_DNAT, +}; + +/** + * enum nft_nat_attributes - nf_tables nat expression netlink attributes + * + * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types) + * @NFTA_NAT_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) + * @NFTA_NAT_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) + * @NFTA_NAT_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_NAT_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + */ +enum nft_nat_attributes { + NFTA_NAT_UNSPEC, + NFTA_NAT_TYPE, + NFTA_NAT_ADDR_MIN, + NFTA_NAT_ADDR_MAX, + NFTA_NAT_PROTO_MIN, + NFTA_NAT_PROTO_MAX, + __NFTA_NAT_MAX +}; +#define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) + +#endif /* _LINUX_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 4a4efafad5f4..d276c3bd55b8 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -18,6 +18,8 @@ enum nfnetlink_groups { #define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_DESTROY, #define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + NFNLGRP_NFTABLES, +#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) @@ -51,6 +53,7 @@ struct nfgenmsg { #define NFNL_SUBSYS_ACCT 7 #define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 #define NFNL_SUBSYS_CTHELPER 9 -#define NFNL_SUBSYS_COUNT 10 +#define NFNL_SUBSYS_NFTABLES 10 +#define NFNL_SUBSYS_COUNT 11 #endif /* _UAPI_NFNETLINK_H */ diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index a9aff9c7d027..68f8128147be 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -1,6 +1,9 @@ # # Bridge netfilter configuration # +# +config NF_TABLES_BRIDGE + tristate "Ethernet Bridge nf_tables support" menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 0718699540b0..ea7629f58b3d 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -2,6 +2,8 @@ # Makefile for the netfilter modules for Link Layer filtering on a bridge. # +obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o + obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o # tables diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c new file mode 100644 index 000000000000..bc5c21c911c0 --- /dev/null +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include + +static struct nft_af_info nft_af_bridge __read_mostly = { + .family = NFPROTO_BRIDGE, + .nhooks = NF_BR_NUMHOOKS, + .owner = THIS_MODULE, +}; + +static int __init nf_tables_bridge_init(void) +{ + return nft_register_afinfo(&nft_af_bridge); +} + +static void __exit nf_tables_bridge_exit(void) +{ + nft_unregister_afinfo(&nft_af_bridge); +} + +module_init(nf_tables_bridge_init); +module_exit(nf_tables_bridge_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 1657e39b291f..eb1d56ece361 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,6 +36,22 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. +config NF_TABLES_IPV4 + depends on NF_TABLES + tristate "IPv4 nf_tables support" + +config NFT_REJECT_IPV4 + depends on NF_TABLES_IPV4 + tristate "nf_tables IPv4 reject support" + +config NF_TABLE_ROUTE_IPV4 + depends on NF_TABLES_IPV4 + tristate "IPv4 nf_tables route table support" + +config NF_TABLE_NAT_IPV4 + depends on NF_TABLES_IPV4 + tristate "IPv4 nf_tables nat table support" + config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3622b248b6dd..b2f01cd2cd65 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -27,6 +27,11 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o +obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o +obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o +obj-$(CONFIG_NF_TABLE_ROUTE_IPV4) += nf_table_route_ipv4.o +obj-$(CONFIG_NF_TABLE_NAT_IPV4) += nf_table_nat_ipv4.o + # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/nf_table_nat_ipv4.c b/net/ipv4/netfilter/nf_table_nat_ipv4.c new file mode 100644 index 000000000000..2a6f184c10bd --- /dev/null +++ b/net/ipv4/netfilter/nf_table_nat_ipv4.c @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_nat { + enum nft_registers sreg_addr_min:8; + enum nft_registers sreg_addr_max:8; + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + enum nf_nat_manip_type type; +}; + +static void nft_nat_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); + struct nf_nat_range range; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_addr_min) { + range.min_addr.ip = data[priv->sreg_addr_min].data[0]; + range.max_addr.ip = data[priv->sreg_addr_max].data[0]; + range.flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (priv->sreg_proto_min) { + range.min_proto.all = data[priv->sreg_proto_min].data[0]; + range.max_proto.all = data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + data[NFT_REG_VERDICT].verdict = + nf_nat_setup_info(ct, &range, priv->type); +} + +static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { + [NFTA_NAT_ADDR_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_ADDR_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_PROTO_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_TYPE] = { .type = NLA_U32 }, +}; + +static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_nat *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_NAT_TYPE] == NULL) + return -EINVAL; + + switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { + case NFT_NAT_SNAT: + priv->type = NF_NAT_MANIP_SRC; + break; + case NFT_NAT_DNAT: + priv->type = NF_NAT_MANIP_DST; + break; + default: + return -EINVAL; + } + + if (tb[NFTA_NAT_ADDR_MIN]) { + priv->sreg_addr_min = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_ADDR_MAX]) { + priv->sreg_addr_max = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MAX])); + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else + priv->sreg_addr_max = priv->sreg_addr_min; + + if (tb[NFTA_NAT_PROTO_MIN]) { + priv->sreg_proto_min = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_PROTO_MAX]) { + priv->sreg_proto_max = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MAX])); + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else + priv->sreg_proto_max = priv->sreg_proto_min; + + return 0; +} + +static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NF_NAT_MANIP_SRC: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT))) + goto nla_put_failure; + break; + case NF_NAT_MANIP_DST: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT))) + goto nla_put_failure; + break; + } + + if (nla_put_be32(skb, NFTA_NAT_ADDR_MIN, htonl(priv->sreg_addr_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_ADDR_MAX, htonl(priv->sreg_addr_max))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_PROTO_MIN, htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_PROTO_MAX, htonl(priv->sreg_proto_max))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_nat_ops __read_mostly = { + .name = "nat", + .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), + .owner = THIS_MODULE, + .eval = nft_nat_eval, + .init = nft_nat_init, + .dump = nft_nat_dump, + .policy = nft_nat_policy, + .maxattr = NFTA_NAT_MAX, +}; + +/* + * NAT table + */ + +static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + unsigned int ret; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); + + nat = nfct_nat(ct); + if (nat == NULL) { + /* Conntrack module was loaded late, can't add extension. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) + return NF_ACCEPT; + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED + IP_CT_IS_REPLY: + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + ops->hooknum)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall through */ + case IP_CT_NEW: + if (nf_nat_initialized(ct, maniptype)) + break; + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_ACCEPT) + return ret; + if (!nf_nat_initialized(ct, maniptype)) { + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } + default: + break; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); +} + +static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + __be32 daddr = ip_hdr(skb)->daddr; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ip_hdr(skb)->daddr != daddr) { + skb_dst_drop(skb); + } + return ret; +} + +static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo __maybe_unused; + const struct nf_conn *ct __maybe_unused; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip || + ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all) + return nf_xfrm_me_harder(skb, AF_INET) == 0 ? + ret : NF_DROP; + } +#endif + return ret; +} + +static unsigned int nf_nat_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nft_base_chain nf_chain_nat_prerouting __read_mostly = { + .chain = { + .name = "PREROUTING", + .rules = LIST_HEAD_INIT(nf_chain_nat_prerouting.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_prerouting, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_NAT_DST, + .priv = &nf_chain_nat_prerouting.chain, + }, +}; + +static struct nft_base_chain nf_chain_nat_postrouting __read_mostly = { + .chain = { + .name = "POSTROUTING", + .rules = LIST_HEAD_INIT(nf_chain_nat_postrouting.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_postrouting, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SRC, + .priv = &nf_chain_nat_postrouting.chain, + }, +}; + +static struct nft_base_chain nf_chain_nat_output __read_mostly = { + .chain = { + .name = "OUTPUT", + .rules = LIST_HEAD_INIT(nf_chain_nat_output.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_output, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_NAT_DST, + .priv = &nf_chain_nat_output.chain, + }, +}; + +static struct nft_base_chain nf_chain_nat_input __read_mostly = { + .chain = { + .name = "INPUT", + .rules = LIST_HEAD_INIT(nf_chain_nat_input.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_NAT_SRC, + .priv = &nf_chain_nat_input.chain, + }, +}; + + +static struct nft_table nf_table_nat_ipv4 __read_mostly = { + .name = "nat", + .chains = LIST_HEAD_INIT(nf_table_nat_ipv4.chains), +}; + +static int __init nf_table_nat_init(void) +{ + int err; + + list_add_tail(&nf_chain_nat_prerouting.chain.list, + &nf_table_nat_ipv4.chains); + list_add_tail(&nf_chain_nat_postrouting.chain.list, + &nf_table_nat_ipv4.chains); + list_add_tail(&nf_chain_nat_output.chain.list, + &nf_table_nat_ipv4.chains); + list_add_tail(&nf_chain_nat_input.chain.list, + &nf_table_nat_ipv4.chains); + + err = nft_register_table(&nf_table_nat_ipv4, NFPROTO_IPV4); + if (err < 0) + goto err1; + + err = nft_register_expr(&nft_nat_ops); + if (err < 0) + goto err2; + + return 0; + +err2: + nft_unregister_table(&nf_table_nat_ipv4, NFPROTO_IPV4); +err1: + return err; +} + +static void __exit nf_table_nat_exit(void) +{ + nft_unregister_expr(&nft_nat_ops); + nft_unregister_table(&nf_table_nat_ipv4, AF_INET); +} + +module_init(nf_table_nat_init); +module_exit(nf_table_nat_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_TABLE(AF_INET, "nat"); +MODULE_ALIAS_NFT_EXPR("nat"); diff --git a/net/ipv4/netfilter/nf_table_route_ipv4.c b/net/ipv4/netfilter/nf_table_route_ipv4.c new file mode 100644 index 000000000000..4f257a1ed661 --- /dev/null +++ b/net/ipv4/netfilter/nf_table_route_ipv4.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + u32 mark; + __be32 saddr, daddr; + u_int8_t tos; + const struct iphdr *iph; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_QUEUE) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } + return ret; +} + +static struct nft_base_chain nf_chain_route_output __read_mostly = { + .chain = { + .name = "OUTPUT", + .rules = LIST_HEAD_INIT(nf_chain_route_output.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_route_table_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_MANGLE, + .priv = &nf_chain_route_output.chain, + }, +}; + +static struct nft_table nf_table_route_ipv4 __read_mostly = { + .name = "route", + .chains = LIST_HEAD_INIT(nf_table_route_ipv4.chains), +}; + +static int __init nf_table_route_init(void) +{ + list_add_tail(&nf_chain_route_output.chain.list, + &nf_table_route_ipv4.chains); + return nft_register_table(&nf_table_route_ipv4, NFPROTO_IPV4); +} + +static void __exit nf_table_route_exit(void) +{ + nft_unregister_table(&nf_table_route_ipv4, NFPROTO_IPV4); +} + +module_init(nf_table_route_init); +module_exit(nf_table_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_TABLE(AF_INET, "route"); diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c new file mode 100644 index 000000000000..63d0a3bf53d3 --- /dev/null +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include + +static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if (unlikely(skb->len < sizeof(struct iphdr) || + ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { + if (net_ratelimit()) + pr_info("nf_tables_ipv4: ignoring short SOCK_RAW " + "packet\n"); + return NF_ACCEPT; + } + + return nft_do_chain(ops, skb, in, out, okfn); +} + +static struct nft_af_info nft_af_ipv4 __read_mostly = { + .family = NFPROTO_IPV4, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .hooks = { + [NF_INET_LOCAL_OUT] = nft_ipv4_output, + }, +}; + +static int __init nf_tables_ipv4_init(void) +{ + return nft_register_afinfo(&nft_af_ipv4); +} + +static void __exit nf_tables_ipv4_exit(void) +{ + nft_unregister_afinfo(&nft_af_ipv4); +} + +module_init(nf_tables_ipv4_init); +module_exit(nf_tables_ipv4_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(AF_INET); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 000000000000..b4ee8d3bb1e4 --- /dev/null +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_reject { + enum nft_reject_types type:8; + u8 icmp_code; +}; + +static void nft_reject_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0); + break; + case NFT_REJECT_TCP_RST: + break; + } + + data[NFT_REG_VERDICT].verdict = NF_DROP; +} + +static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { + [NFTA_REJECT_TYPE] = { .type = NLA_U32 }, + [NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 }, +}; + +static int nft_reject_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + if (tb[NFTA_REJECT_TYPE] == NULL) + return -EINVAL; + + priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + if (tb[NFTA_REJECT_ICMP_CODE] == NULL) + return -EINVAL; + priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); + case NFT_REJECT_TCP_RST: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_reject *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type)) + goto nla_put_failure; + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) + goto nla_put_failure; + break; + } + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops reject_ops __read_mostly = { + .name = "reject", + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .owner = THIS_MODULE, + .eval = nft_reject_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, +}; + +static int __init nft_reject_module_init(void) +{ + return nft_register_expr(&reject_ops); +} + +static void __exit nft_reject_module_exit(void) +{ + nft_unregister_expr(&reject_ops); +} + +module_init(nft_reject_module_init); +module_exit(nft_reject_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("reject"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index a7f842b29b67..5677e38eeca3 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,14 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_TABLES_IPV6 + depends on NF_TABLES + tristate "IPv6 nf_tables support" + +config NF_TABLE_ROUTE_IPV6 + depends on NF_TABLES_IPV6 + tristate "IPv6 nf_tables route table support" + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2b53738f798c..956af4492d10 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -23,6 +23,10 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +# nf_tables +obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o +obj-$(CONFIG_NF_TABLE_ROUTE_IPV6) += nf_table_route_ipv6.o + # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/nf_table_route_ipv6.c b/net/ipv6/netfilter/nf_table_route_ipv6.c new file mode 100644 index 000000000000..48ac65c7b398 --- /dev/null +++ b/net/ipv6/netfilter/nf_table_route_ipv6.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct in6_addr saddr, daddr; + u_int8_t hop_limit; + u32 mark, flowlabel; + + /* save source/dest address, mark, hoplimit, flowlabel, priority */ + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; + + /* flowlabel and prio (includes version, which shouldn't change either */ + flowlabel = *((u32 *)ipv6_hdr(skb)); + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_QUEUE && + (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || + memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + skb->mark != mark || + ipv6_hdr(skb)->hop_limit != hop_limit || + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) + return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; + + return ret; +} + +static struct nft_base_chain nf_chain_route_output __read_mostly = { + .chain = { + .name = "OUTPUT", + .rules = LIST_HEAD_INIT(nf_chain_route_output.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_route_table_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_MANGLE, + .priv = &nf_chain_route_output.chain, + }, +}; + +static struct nft_table nf_table_route_ipv6 __read_mostly = { + .name = "route", + .chains = LIST_HEAD_INIT(nf_table_route_ipv6.chains), +}; + +static int __init nf_table_route_init(void) +{ + list_add_tail(&nf_chain_route_output.chain.list, + &nf_table_route_ipv6.chains); + return nft_register_table(&nf_table_route_ipv6, NFPROTO_IPV6); +} + +static void __exit nf_table_route_exit(void) +{ + nft_unregister_table(&nf_table_route_ipv6, NFPROTO_IPV6); +} + +module_init(nf_table_route_init); +module_exit(nf_table_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_TABLE(AF_INET6, "route"); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c new file mode 100644 index 000000000000..e0717cea4913 --- /dev/null +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include + +static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if (unlikely(skb->len < sizeof(struct ipv6hdr))) { + if (net_ratelimit()) + pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " + "packet\n"); + return NF_ACCEPT; + } + + return nft_do_chain(ops, skb, in, out, okfn); +} + +static struct nft_af_info nft_af_ipv6 __read_mostly = { + .family = NFPROTO_IPV6, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .hooks = { + [NF_INET_LOCAL_OUT] = nft_ipv6_output, + }, +}; + +static int __init nf_tables_ipv6_init(void) +{ + return nft_register_afinfo(&nft_af_ipv6); +} + +static void __exit nf_tables_ipv6_exit(void) +{ + nft_unregister_afinfo(&nft_af_ipv6); +} + +module_init(nf_tables_ipv6_init); +module_exit(nf_tables_ipv6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(AF_INET6); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6e839b6dff2b..c271e1af93b5 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -413,6 +413,43 @@ config NETFILTER_SYNPROXY endif # NF_CONNTRACK +config NF_TABLES + depends on NETFILTER_NETLINK + tristate "Netfilter nf_tables support" + +config NFT_EXTHDR + depends on NF_TABLES + tristate "Netfilter nf_tables IPv6 exthdr module" + +config NFT_META + depends on NF_TABLES + tristate "Netfilter nf_tables meta module" + +config NFT_CT + depends on NF_TABLES + depends on NF_CONNTRACK + tristate "Netfilter nf_tables conntrack module" + +config NFT_SET + depends on NF_TABLES + tristate "Netfilter nf_tables set module" + +config NFT_HASH + depends on NF_TABLES + tristate "Netfilter nf_tables hash module" + +config NFT_COUNTER + depends on NF_TABLES + tristate "Netfilter nf_tables counter module" + +config NFT_LOG + depends on NF_TABLES + tristate "Netfilter nf_tables log module" + +config NFT_LIMIT + depends on NF_TABLES + tristate "Netfilter nf_tables limit module" + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c3a0a12907f6..1ca3f3932826 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -64,6 +64,22 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # SYNPROXY obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o +# nf_tables +nf_tables-objs += nf_tables_core.o nf_tables_api.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o +nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o + +obj-$(CONFIG_NF_TABLES) += nf_tables.o +obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o +obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_CT) += nft_ct.o +obj-$(CONFIG_NFT_LIMIT) += nft_limit.o +#nf_tables-objs += nft_meta_target.o +obj-$(CONFIG_NFT_SET) += nft_set.o +obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_COUNTER) += nft_counter.o +obj-$(CONFIG_NFT_LOG) += nft_log.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c new file mode 100644 index 000000000000..7d59c89c6c75 --- /dev/null +++ b/net/netfilter/nf_tables_api.c @@ -0,0 +1,1760 @@ +/* + * Copyright (c) 2007, 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(nf_tables_afinfo); +static LIST_HEAD(nf_tables_expressions); + +/** + * nft_register_afinfo - register nf_tables address family info + * + * @afi: address family info to register + * + * Register the address family for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_afinfo(struct nft_af_info *afi) +{ + INIT_LIST_HEAD(&afi->tables); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&afi->list, &nf_tables_afinfo); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_afinfo); + +/** + * nft_unregister_afinfo - unregister nf_tables address family info + * + * @afi: address family info to unregister + * + * Unregister the address family for use with nf_tables. + */ +void nft_unregister_afinfo(struct nft_af_info *afi) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&afi->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_afinfo); + +static struct nft_af_info *nft_afinfo_lookup(int family) +{ + struct nft_af_info *afi; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (afi->family == family) + return afi; + } + return NULL; +} + +static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload) +{ + struct nft_af_info *afi; + + afi = nft_afinfo_lookup(family); + if (afi != NULL) + return afi; +#ifdef CONFIG_MODULES + if (autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-afinfo-%u", family); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + afi = nft_afinfo_lookup(family); + if (afi != NULL) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-EAFNOSUPPORT); +} + +/* + * Tables + */ + +static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, + const struct nlattr *nla) +{ + struct nft_table *table; + + list_for_each_entry(table, &afi->tables, list) { + if (!nla_strcmp(nla, table->name)) + return table; + } + return NULL; +} + +static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, + const struct nlattr *nla, + bool autoload) +{ + struct nft_table *table; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + table = nft_table_lookup(afi, nla); + if (table != NULL) + return table; + +#ifdef CONFIG_MODULES + if (autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-table-%u-%*.s", afi->family, + nla_len(nla)-1, (const char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (nft_table_lookup(afi, nla)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static inline u64 nf_tables_alloc_handle(struct nft_table *table) +{ + return ++table->hgenerator; +} + +static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { + [NFTA_TABLE_NAME] = { .type = NLA_STRING }, +}; + +static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_TABLE_NAME, table->name)) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_table_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + int event, int family) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + bool report; + int err; + + report = nlh ? nlmsg_report(nlh) : false; + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_table_info(skb, portid, seq, event, 0, + family, table); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_tables(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_table_info(skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWTABLE, + NLM_F_MULTI, + afi->family, table) < 0) + goto done; +cont: + idx++; + } + } +done: + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_tables, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0, + family, table); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nlattr *name; + struct nft_af_info *afi; + struct nft_table *table; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + name = nla[NFTA_TABLE_NAME]; + table = nf_tables_table_lookup(afi, name, false); + if (IS_ERR(table)) { + if (PTR_ERR(table) != -ENOENT) + return PTR_ERR(table); + table = NULL; + } + + if (table != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + return 0; + } + + table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); + if (table == NULL) + return -ENOMEM; + + nla_strlcpy(table->name, name, nla_len(name)); + INIT_LIST_HEAD(&table->chains); + + list_add_tail(&table->list, &afi->tables); + nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); + return 0; +} + +static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + struct nft_af_info *afi; + struct nft_table *table; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + if (table->flags & NFT_TABLE_BUILTIN) + return -EOPNOTSUPP; + + if (table->use) + return -EBUSY; + + list_del(&table->list); + nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family); + kfree(table); + return 0; +} + +static struct nft_table *__nf_tables_table_lookup(const struct nft_af_info *afi, + const char *name) +{ + struct nft_table *table; + + list_for_each_entry(table, &afi->tables, list) { + if (!strcmp(name, table->name)) + return table; + } + + return ERR_PTR(-ENOENT); +} + +static int nf_tables_chain_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + int event, int family); + +/** + * nft_register_table - register a built-in table + * + * @table: the table to register + * @family: protocol family to register table with + * + * Register a built-in table for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_table(struct nft_table *table, int family) +{ + struct nft_af_info *afi; + struct nft_table *t; + struct nft_chain *chain; + int err; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); +again: + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) { + err = PTR_ERR(afi); + if (err == -EAGAIN) + goto again; + goto err; + } + + t = __nf_tables_table_lookup(afi, table->name); + if (IS_ERR(t)) { + err = PTR_ERR(t); + if (err != -ENOENT) + goto err; + t = NULL; + } + + if (t != NULL) { + err = -EEXIST; + goto err; + } + + table->flags |= NFT_TABLE_BUILTIN; + list_add_tail(&table->list, &afi->tables); + nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family); + list_for_each_entry(chain, &table->chains, list) + nf_tables_chain_notify(NULL, NULL, table, chain, + NFT_MSG_NEWCHAIN, family); + err = 0; +err: + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return err; +} +EXPORT_SYMBOL_GPL(nft_register_table); + +/** + * nft_unregister_table - unregister a built-in table + * + * @table: the table to unregister + * @family: protocol family to unregister table with + * + * Unregister a built-in table for use with nf_tables. + */ +void nft_unregister_table(struct nft_table *table, int family) +{ + struct nft_chain *chain; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&table->list); + list_for_each_entry(chain, &table->chains, list) + nf_tables_chain_notify(NULL, NULL, table, chain, + NFT_MSG_DELCHAIN, family); + nf_tables_table_notify(NULL, NULL, table, NFT_MSG_DELTABLE, family); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_table); + +/* + * Chains + */ + +static struct nft_chain * +nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle) +{ + struct nft_chain *chain; + + list_for_each_entry(chain, &table->chains, list) { + if (chain->handle == handle) + return chain; + } + + return ERR_PTR(-ENOENT); +} + +static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, + const struct nlattr *nla) +{ + struct nft_chain *chain; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry(chain, &table->chains, list) { + if (!nla_strcmp(nla, chain->name)) + return chain; + } + + return ERR_PTR(-ENOENT); +} + +static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { + [NFTA_CHAIN_TABLE] = { .type = NLA_STRING }, + [NFTA_CHAIN_HANDLE] = { .type = NLA_U64 }, + [NFTA_CHAIN_NAME] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, + [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { + [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 }, + [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, +}; + +static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table, + const struct nft_chain *chain) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle))) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name)) + goto nla_put_failure; + + if (chain->flags & NFT_BASE_CHAIN) { + const struct nf_hook_ops *ops = &nft_base_chain(chain)->ops; + struct nlattr *nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); + if (nest == NULL) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) + goto nla_put_failure; + nla_nest_end(skb, nest); + } + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_chain_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + int event, int family) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + bool report; + int err; + + report = nlh ? nlmsg_report(nlh) : false; + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family, + table, chain); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_chains(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWCHAIN, + NLM_F_MULTI, + afi->family, table, chain) < 0) + goto done; +cont: + idx++; + } + } + } +done: + cb->args[0] = idx; + return skb->len; +} + + +static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_chains, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0, + family, table, chain); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nlattr * uninitialized_var(name); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + struct nft_base_chain *basechain; + struct nlattr *ha[NFTA_HOOK_MAX + 1]; + int family = nfmsg->nfgen_family; + u64 handle = 0; + int err; + bool create; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], create); + if (IS_ERR(table)) + return PTR_ERR(table); + + if (table->use == UINT_MAX) + return -EOVERFLOW; + + chain = NULL; + name = nla[NFTA_CHAIN_NAME]; + + if (nla[NFTA_CHAIN_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); + chain = nf_tables_chain_lookup_byhandle(table, handle); + if (IS_ERR(chain)) + return PTR_ERR(chain); + } else { + chain = nf_tables_chain_lookup(table, name); + if (IS_ERR(chain)) { + if (PTR_ERR(chain) != -ENOENT) + return PTR_ERR(chain); + chain = NULL; + } + } + + if (chain != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (nla[NFTA_CHAIN_HANDLE] && name && + !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) + return -EEXIST; + + if (nla[NFTA_CHAIN_HANDLE] && name) + nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + + goto notify; + } + + if (nla[NFTA_CHAIN_HOOK]) { + struct nf_hook_ops *ops; + + err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], + nft_hook_policy); + if (err < 0) + return err; + if (ha[NFTA_HOOK_HOOKNUM] == NULL || + ha[NFTA_HOOK_PRIORITY] == NULL) + return -EINVAL; + if (ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])) >= afi->nhooks) + return -EINVAL; + + basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); + if (basechain == NULL) + return -ENOMEM; + chain = &basechain->chain; + + ops = &basechain->ops; + ops->pf = family; + ops->owner = afi->owner; + ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + ops->priv = chain; + ops->hook = nft_do_chain; + if (afi->hooks[ops->hooknum]) + ops->hook = afi->hooks[ops->hooknum]; + + chain->flags |= NFT_BASE_CHAIN; + } else { + chain = kzalloc(sizeof(*chain), GFP_KERNEL); + if (chain == NULL) + return -ENOMEM; + } + + INIT_LIST_HEAD(&chain->rules); + chain->handle = nf_tables_alloc_handle(table); + nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + + list_add_tail(&chain->list, &table->chains); + table->use++; +notify: + nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, + family); + return 0; +} + +static void nf_tables_rcu_chain_destroy(struct rcu_head *head) +{ + struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head); + + BUG_ON(chain->use > 0); + + if (chain->flags & NFT_BASE_CHAIN) + kfree(nft_base_chain(chain)); + else + kfree(chain); +} + +static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (chain->flags & NFT_CHAIN_BUILTIN) + return -EOPNOTSUPP; + + if (!list_empty(&chain->rules)) + return -EBUSY; + + list_del(&chain->list); + table->use--; + + if (chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, + family); + + /* Make sure all rule references are gone before this is released */ + call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy); + return 0; +} + +static void nft_ctx_init(struct nft_ctx *ctx, + const struct nft_af_info *afi, + const struct nft_table *table, + const struct nft_chain *chain) +{ + ctx->afi = afi; + ctx->table = table; + ctx->chain = chain; +} + +/* + * Expressions + */ + +/** + * nft_register_expr - register nf_tables expr operations + * @ops: expr operations + * + * Registers the expr operations for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_expr(struct nft_expr_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&ops->list, &nf_tables_expressions); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_expr); + +/** + * nft_unregister_expr - unregister nf_tables expr operations + * @ops: expr operations + * + * Unregisters the expr operations for use with nf_tables. + */ +void nft_unregister_expr(struct nft_expr_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&ops->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_expr); + +static const struct nft_expr_ops *__nft_expr_ops_get(struct nlattr *nla) +{ + const struct nft_expr_ops *ops; + + list_for_each_entry(ops, &nf_tables_expressions, list) { + if (!nla_strcmp(nla, ops->name)) + return ops; + } + return NULL; +} + +static const struct nft_expr_ops *nft_expr_ops_get(struct nlattr *nla) +{ + const struct nft_expr_ops *ops; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + ops = __nft_expr_ops_get(nla); + if (ops != NULL && try_module_get(ops->owner)) + return ops; + +#ifdef CONFIG_MODULES + if (ops == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-expr-%.*s", + nla_len(nla), (char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_expr_ops_get(nla)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = { + [NFTA_EXPR_NAME] = { .type = NLA_STRING }, + [NFTA_EXPR_DATA] = { .type = NLA_NESTED }, +}; + +static int nf_tables_fill_expr_info(struct sk_buff *skb, + const struct nft_expr *expr) +{ + if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->name)) + goto nla_put_failure; + + if (expr->ops->dump) { + struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA); + if (data == NULL) + goto nla_put_failure; + if (expr->ops->dump(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, data); + } + + return skb->len; + +nla_put_failure: + return -1; +}; + +struct nft_expr_info { + const struct nft_expr_ops *ops; + struct nlattr *tb[NFTA_EXPR_MAX + 1]; +}; + +static int nf_tables_expr_parse(const struct nlattr *nla, + struct nft_expr_info *info) +{ + const struct nft_expr_ops *ops; + int err; + + err = nla_parse_nested(info->tb, NFTA_EXPR_MAX, nla, nft_expr_policy); + if (err < 0) + return err; + + ops = nft_expr_ops_get(info->tb[NFTA_EXPR_NAME]); + if (IS_ERR(ops)) + return PTR_ERR(ops); + info->ops = ops; + return 0; +} + +static int nf_tables_newexpr(const struct nft_ctx *ctx, + struct nft_expr_info *info, + struct nft_expr *expr) +{ + const struct nft_expr_ops *ops = info->ops; + int err; + + expr->ops = ops; + if (ops->init) { + struct nlattr *ma[ops->maxattr + 1]; + + if (info->tb[NFTA_EXPR_DATA]) { + err = nla_parse_nested(ma, ops->maxattr, + info->tb[NFTA_EXPR_DATA], + ops->policy); + if (err < 0) + goto err1; + } else + memset(ma, 0, sizeof(ma[0]) * (ops->maxattr + 1)); + + err = ops->init(ctx, expr, (const struct nlattr **)ma); + if (err < 0) + goto err1; + } + + info->ops = NULL; + return 0; + +err1: + expr->ops = NULL; + return err; +} + +static void nf_tables_expr_destroy(struct nft_expr *expr) +{ + if (expr->ops->destroy) + expr->ops->destroy(expr); + module_put(expr->ops->owner); +} + +/* + * Rules + */ + +static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain, + u64 handle) +{ + struct nft_rule *rule; + + // FIXME: this sucks + list_for_each_entry(rule, &chain->rules, list) { + if (handle == rule->handle) + return rule; + } + + return ERR_PTR(-ENOENT); +} + +static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain, + const struct nlattr *nla) +{ + if (nla == NULL) + return ERR_PTR(-EINVAL); + + return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla))); +} + +static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { + [NFTA_RULE_TABLE] = { .type = NLA_STRING }, + [NFTA_RULE_CHAIN] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, + [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, + [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, +}; + +static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table, + const struct nft_chain *chain, + const struct nft_rule *rule) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + const struct nft_expr *expr, *next; + struct nlattr *list; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_RULE_TABLE, table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle))) + goto nla_put_failure; + + list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS); + if (list == NULL) + goto nla_put_failure; + nft_rule_for_each_expr(expr, next, rule) { + struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM); + if (elem == NULL) + goto nla_put_failure; + if (nf_tables_fill_expr_info(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, elem); + } + nla_nest_end(skb, list); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_rule_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + const struct nft_rule *rule, + int event, u32 flags, int family) +{ + struct sk_buff *skb; + u32 portid = NETLINK_CB(oskb).portid; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + u32 seq = nlh->nlmsg_seq; + bool report; + int err; + + report = nlmsg_report(nlh); + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_rule_info(skb, portid, seq, event, flags, + family, table, chain, rule); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_rules(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + list_for_each_entry(rule, &chain->rules, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWRULE, + NLM_F_MULTI | NLM_F_APPEND, + afi->family, table, chain, rule) < 0) + goto done; +cont: + idx++; + } + } + } + } +done: + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_rules, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, + family, table, chain, rule); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static void nf_tables_rcu_rule_destroy(struct rcu_head *head) +{ + struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head); + struct nft_expr *expr; + + /* + * Careful: some expressions might not be initialized in case this + * is called on error from nf_tables_newrule(). + */ + expr = nft_expr_first(rule); + while (expr->ops && expr != nft_expr_last(rule)) { + nf_tables_expr_destroy(expr); + expr = nft_expr_next(expr); + } + kfree(rule); +} + +static void nf_tables_rule_destroy(struct nft_rule *rule) +{ + call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy); +} + +#define NFT_RULE_MAXEXPRS 128 + +static struct nft_expr_info *info; + +static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule, *old_rule = NULL; + struct nft_expr *expr; + struct nft_ctx ctx; + struct nlattr *tmp; + unsigned int size, i, n; + int err, rem; + bool create; + u64 handle; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], create); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (nla[NFTA_RULE_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); + rule = __nf_tables_rule_lookup(chain, handle); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + old_rule = rule; + else + return -EOPNOTSUPP; + } else { + if (!create || nlh->nlmsg_flags & NLM_F_REPLACE) + return -EINVAL; + handle = nf_tables_alloc_handle(table); + } + + n = 0; + size = 0; + if (nla[NFTA_RULE_EXPRESSIONS]) { + nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) { + err = -EINVAL; + if (nla_type(tmp) != NFTA_LIST_ELEM) + goto err1; + if (n == NFT_RULE_MAXEXPRS) + goto err1; + err = nf_tables_expr_parse(tmp, &info[n]); + if (err < 0) + goto err1; + size += info[n].ops->size; + n++; + } + } + + err = -ENOMEM; + rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL); + if (rule == NULL) + goto err1; + + rule->handle = handle; + rule->dlen = size; + + nft_ctx_init(&ctx, afi, table, chain); + expr = nft_expr_first(rule); + for (i = 0; i < n; i++) { + err = nf_tables_newexpr(&ctx, &info[i], expr); + if (err < 0) + goto err2; + expr = nft_expr_next(expr); + } + + /* Register hook when first rule is inserted into a base chain */ + if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) + goto err2; + } + + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + list_replace_rcu(&old_rule->list, &rule->list); + nf_tables_rule_destroy(old_rule); + } else if (nlh->nlmsg_flags & NLM_F_APPEND) + list_add_tail_rcu(&rule->list, &chain->rules); + else + list_add_rcu(&rule->list, &chain->rules); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, + nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), + nfmsg->nfgen_family); + return 0; + +err2: + nf_tables_rule_destroy(rule); +err1: + for (i = 0; i < n; i++) { + if (info[i].ops != NULL) + module_put(info[i].ops->owner); + } + return err; +} + +static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule, *tmp; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (nla[NFTA_RULE_HANDLE]) { + rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + /* List removal must be visible before destroying expressions */ + list_del_rcu(&rule->list); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, + NFT_MSG_DELRULE, 0, family); + nf_tables_rule_destroy(rule); + } else { + /* Remove all rules in this chain */ + list_for_each_entry_safe(rule, tmp, &chain->rules, list) { + list_del_rcu(&rule->list); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, + NFT_MSG_DELRULE, 0, family); + nf_tables_rule_destroy(rule); + } + } + + /* Unregister hook when last rule from base chain is deleted */ + if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + return 0; +} + +static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { + [NFT_MSG_NEWTABLE] = { + .call = nf_tables_newtable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_GETTABLE] = { + .call = nf_tables_gettable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_DELTABLE] = { + .call = nf_tables_deltable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_NEWCHAIN] = { + .call = nf_tables_newchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_GETCHAIN] = { + .call = nf_tables_getchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_DELCHAIN] = { + .call = nf_tables_delchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_NEWRULE] = { + .call = nf_tables_newrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_GETRULE] = { + .call = nf_tables_getrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_DELRULE] = { + .call = nf_tables_delrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, +}; + +static const struct nfnetlink_subsystem nf_tables_subsys = { + .name = "nf_tables", + .subsys_id = NFNL_SUBSYS_NFTABLES, + .cb_count = NFT_MSG_MAX, + .cb = nf_tables_cb, +}; + +/** + * nft_validate_input_register - validate an expressions' input register + * + * @reg: the register number + * + * Validate that the input register is one of the general purpose + * registers. + */ +int nft_validate_input_register(enum nft_registers reg) +{ + if (reg <= NFT_REG_VERDICT) + return -EINVAL; + if (reg > NFT_REG_MAX) + return -ERANGE; + return 0; +} +EXPORT_SYMBOL_GPL(nft_validate_input_register); + +/** + * nft_validate_output_register - validate an expressions' output register + * + * @reg: the register number + * + * Validate that the output register is one of the general purpose + * registers or the verdict register. + */ +int nft_validate_output_register(enum nft_registers reg) +{ + if (reg < NFT_REG_VERDICT) + return -EINVAL; + if (reg > NFT_REG_MAX) + return -ERANGE; + return 0; +} +EXPORT_SYMBOL_GPL(nft_validate_output_register); + +/** + * nft_validate_data_load - validate an expressions' data load + * + * @ctx: context of the expression performing the load + * @reg: the destination register number + * @data: the data to load + * @type: the data type + * + * Validate that a data load uses the appropriate data type for + * the destination register. A value of NULL for the data means + * that its runtime gathered data, which is always of type + * NFT_DATA_VALUE. + */ +int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type) +{ + switch (reg) { + case NFT_REG_VERDICT: + if (data == NULL || type != NFT_DATA_VERDICT) + return -EINVAL; + // FIXME: do loop detection + return 0; + default: + if (data != NULL && type != NFT_DATA_VALUE) + return -EINVAL; + return 0; + } +} +EXPORT_SYMBOL_GPL(nft_validate_data_load); + +static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { + [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, + [NFTA_VERDICT_CHAIN] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, +}; + +static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + struct nlattr *tb[NFTA_VERDICT_MAX + 1]; + struct nft_chain *chain; + int err; + + err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy); + if (err < 0) + return err; + + if (!tb[NFTA_VERDICT_CODE]) + return -EINVAL; + data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); + + switch (data->verdict) { + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + case NFT_CONTINUE: + case NFT_BREAK: + case NFT_RETURN: + desc->len = sizeof(data->verdict); + break; + case NFT_JUMP: + case NFT_GOTO: + if (!tb[NFTA_VERDICT_CHAIN]) + return -EINVAL; + chain = nf_tables_chain_lookup(ctx->table, + tb[NFTA_VERDICT_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + if (chain->flags & NFT_BASE_CHAIN) + return -EOPNOTSUPP; + + if (ctx->chain->level + 1 > chain->level) { + if (ctx->chain->level + 1 == 16) + return -EMLINK; + chain->level = ctx->chain->level + 1; + } + chain->use++; + data->chain = chain; + desc->len = sizeof(data); + break; + default: + return -EINVAL; + } + + desc->type = NFT_DATA_VERDICT; + return 0; +} + +static void nft_verdict_uninit(const struct nft_data *data) +{ + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + data->chain->use--; + break; + } +} + +static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_DATA_VERDICT); + if (!nest) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict))) + goto nla_put_failure; + + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name)) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + unsigned int len; + + len = nla_len(nla); + if (len == 0) + return -EINVAL; + if (len > sizeof(data->data)) + return -EOVERFLOW; + + nla_memcpy(data->data, nla, sizeof(data->data)); + desc->type = NFT_DATA_VALUE; + desc->len = len; + return 0; +} + +static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data, + unsigned int len) +{ + return nla_put(skb, NFTA_DATA_VALUE, len, data->data); +} + +static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { + [NFTA_DATA_VALUE] = { .type = NLA_BINARY, + .len = FIELD_SIZEOF(struct nft_data, data) }, + [NFTA_DATA_VERDICT] = { .type = NLA_NESTED }, +}; + +/** + * nft_data_init - parse nf_tables data netlink attributes + * + * @ctx: context of the expression using the data + * @data: destination struct nft_data + * @desc: data description + * @nla: netlink attribute containing data + * + * Parse the netlink data attributes and initialize a struct nft_data. + * The type and length of data are returned in the data description. + * + * The caller can indicate that it only wants to accept data of type + * NFT_DATA_VALUE by passing NULL for the ctx argument. + */ +int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + struct nlattr *tb[NFTA_DATA_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy); + if (err < 0) + return err; + + if (tb[NFTA_DATA_VALUE]) + return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]); + if (tb[NFTA_DATA_VERDICT] && ctx != NULL) + return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); + return -EINVAL; +} +EXPORT_SYMBOL_GPL(nft_data_init); + +/** + * nft_data_uninit - release a nft_data item + * + * @data: struct nft_data to release + * @type: type of data + * + * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded, + * all others need to be released by calling this function. + */ +void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) +{ + switch (type) { + case NFT_DATA_VALUE: + return; + case NFT_DATA_VERDICT: + return nft_verdict_uninit(data); + default: + WARN_ON(1); + } +} +EXPORT_SYMBOL_GPL(nft_data_uninit); + +int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, + enum nft_data_types type, unsigned int len) +{ + struct nlattr *nest; + int err; + + nest = nla_nest_start(skb, attr); + if (nest == NULL) + return -1; + + switch (type) { + case NFT_DATA_VALUE: + err = nft_value_dump(skb, data, len); + break; + case NFT_DATA_VERDICT: + err = nft_verdict_dump(skb, data); + break; + default: + err = -EINVAL; + WARN_ON(1); + } + + nla_nest_end(skb, nest); + return err; +} +EXPORT_SYMBOL_GPL(nft_data_dump); + +static int __init nf_tables_module_init(void) +{ + int err; + + info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS, + GFP_KERNEL); + if (info == NULL) { + err = -ENOMEM; + goto err1; + } + + err = nf_tables_core_module_init(); + if (err < 0) + goto err2; + + err = nfnetlink_subsys_register(&nf_tables_subsys); + if (err < 0) + goto err3; + + pr_info("nf_tables: (c) 2007-2009 Patrick McHardy \n"); + return 0; +err3: + nf_tables_core_module_exit(); +err2: + kfree(info); +err1: + return err; +} + +static void __exit nf_tables_module_exit(void) +{ + nfnetlink_subsys_unregister(&nf_tables_subsys); + nf_tables_core_module_exit(); + kfree(info); +} + +module_init(nf_tables_module_init); +module_exit(nf_tables_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c new file mode 100644 index 000000000000..bc7fb85d4002 --- /dev/null +++ b/net/netfilter/nf_tables_core.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFT_JUMP_STACK_SIZE 16 + +unsigned int nft_do_chain(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct nft_chain *chain = ops->priv; + const struct nft_rule *rule; + const struct nft_expr *expr, *last; + struct nft_data data[NFT_REG_MAX + 1]; + const struct nft_pktinfo pkt = { + .skb = skb, + .in = in, + .out = out, + .hooknum = ops->hooknum, + }; + unsigned int stackptr = 0; + struct { + const struct nft_chain *chain; + const struct nft_rule *rule; + } jumpstack[NFT_JUMP_STACK_SIZE]; + +do_chain: + rule = list_entry(&chain->rules, struct nft_rule, list); +next_rule: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + list_for_each_entry_continue_rcu(rule, &chain->rules, list) { + nft_rule_for_each_expr(expr, last, rule) { + expr->ops->eval(expr, data, &pkt); + if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) + break; + } + + switch (data[NFT_REG_VERDICT].verdict) { + case NFT_BREAK: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + /* fall through */ + case NFT_CONTINUE: + continue; + } + break; + } + + switch (data[NFT_REG_VERDICT].verdict) { + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + return data[NFT_REG_VERDICT].verdict; + case NFT_JUMP: + BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); + jumpstack[stackptr].chain = chain; + jumpstack[stackptr].rule = rule; + stackptr++; + /* fall through */ + case NFT_GOTO: + chain = data[NFT_REG_VERDICT].chain; + goto do_chain; + case NFT_RETURN: + case NFT_CONTINUE: + break; + default: + WARN_ON(1); + } + + if (stackptr > 0) { + stackptr--; + chain = jumpstack[stackptr].chain; + rule = jumpstack[stackptr].rule; + goto next_rule; + } + + return NF_ACCEPT; +} +EXPORT_SYMBOL_GPL(nft_do_chain); + +int __init nf_tables_core_module_init(void) +{ + int err; + + err = nft_immediate_module_init(); + if (err < 0) + goto err1; + + err = nft_cmp_module_init(); + if (err < 0) + goto err2; + + err = nft_lookup_module_init(); + if (err < 0) + goto err3; + + err = nft_bitwise_module_init(); + if (err < 0) + goto err4; + + err = nft_byteorder_module_init(); + if (err < 0) + goto err5; + + err = nft_payload_module_init(); + if (err < 0) + goto err6; + + return 0; + +err6: + nft_byteorder_module_exit(); +err5: + nft_bitwise_module_exit(); +err4: + nft_lookup_module_exit(); +err3: + nft_cmp_module_exit(); +err2: + nft_immediate_module_exit(); +err1: + return err; +} + +void nf_tables_core_module_exit(void) +{ + nft_payload_module_exit(); + nft_byteorder_module_exit(); + nft_bitwise_module_exit(); + nft_lookup_module_exit(); + nft_cmp_module_exit(); + nft_immediate_module_exit(); +} diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c new file mode 100644 index 000000000000..0f7501506367 --- /dev/null +++ b/net/netfilter/nft_bitwise.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_bitwise { + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 len; + struct nft_data mask; + struct nft_data xor; +}; + +static void nft_bitwise_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + const struct nft_data *src = &data[priv->sreg]; + struct nft_data *dst = &data[priv->dreg]; + unsigned int i; + + for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) { + dst->data[i] = (src->data[i] & priv->mask.data[i]) ^ + priv->xor.data[i]; + } +} + +static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { + [NFTA_BITWISE_SREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_DREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_LEN] = { .type = NLA_U32 }, + [NFTA_BITWISE_MASK] = { .type = NLA_NESTED }, + [NFTA_BITWISE_XOR] = { .type = NLA_NESTED }, +}; + +static int nft_bitwise_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_bitwise *priv = nft_expr_priv(expr); + struct nft_data_desc d1, d2; + int err; + + if (tb[NFTA_BITWISE_SREG] == NULL || + tb[NFTA_BITWISE_DREG] == NULL || + tb[NFTA_BITWISE_LEN] == NULL || + tb[NFTA_BITWISE_MASK] == NULL || + tb[NFTA_BITWISE_XOR] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); + + err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]); + if (err < 0) + return err; + if (d1.len != priv->len) + return -EINVAL; + + err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]); + if (err < 0) + return err; + if (d2.len != priv->len) + return -EINVAL; + + return 0; +} + +static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_bitwise_ops __read_mostly = { + .name = "bitwise", + .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), + .owner = THIS_MODULE, + .eval = nft_bitwise_eval, + .init = nft_bitwise_init, + .dump = nft_bitwise_dump, + .policy = nft_bitwise_policy, + .maxattr = NFTA_BITWISE_MAX, +}; + +int __init nft_bitwise_module_init(void) +{ + return nft_register_expr(&nft_bitwise_ops); +} + +void nft_bitwise_module_exit(void) +{ + nft_unregister_expr(&nft_bitwise_ops); +} diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c new file mode 100644 index 000000000000..8b0657a4d17b --- /dev/null +++ b/net/netfilter/nft_byteorder.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_byteorder { + enum nft_registers sreg:8; + enum nft_registers dreg:8; + enum nft_byteorder_ops op:8; + u8 len; + u8 size; +}; + +static void nft_byteorder_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_byteorder *priv = nft_expr_priv(expr); + struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg]; + union { u32 u32; u16 u16; } *s, *d; + unsigned int i; + + s = (void *)src->data; + d = (void *)dst->data; + + switch (priv->size) { + case 4: + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 4; i++) + d[i].u32 = ntohl((__force __be32)s[i].u32); + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 4; i++) + d[i].u32 = (__force __u32)htonl(s[i].u32); + break; + } + break; + case 2: + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 2; i++) + d[i].u16 = ntohs((__force __be16)s[i].u16); + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 2; i++) + d[i].u16 = (__force __u16)htons(s[i].u16); + break; + } + break; + } +} + +static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = { + [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_OP] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_LEN] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_SIZE] = { .type = NLA_U32 }, +}; + +static int nft_byteorder_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_byteorder *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_BYTEORDER_SREG] == NULL || + tb[NFTA_BYTEORDER_DREG] == NULL || + tb[NFTA_BYTEORDER_LEN] == NULL || + tb[NFTA_BYTEORDER_SIZE] == NULL || + tb[NFTA_BYTEORDER_OP] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP])); + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + case NFT_BYTEORDER_HTON: + break; + default: + return -EINVAL; + } + + priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); + if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE])); + switch (priv->size) { + case 2: + case 4: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_byteorder *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_byteorder_ops __read_mostly = { + .name = "byteorder", + .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), + .owner = THIS_MODULE, + .eval = nft_byteorder_eval, + .init = nft_byteorder_init, + .dump = nft_byteorder_dump, + .policy = nft_byteorder_policy, + .maxattr = NFTA_BYTEORDER_MAX, +}; + +int __init nft_byteorder_module_init(void) +{ + return nft_register_expr(&nft_byteorder_ops); +} + +void nft_byteorder_module_exit(void) +{ + nft_unregister_expr(&nft_byteorder_ops); +} diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c new file mode 100644 index 000000000000..e734d670120a --- /dev/null +++ b/net/netfilter/nft_cmp.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_cmp_expr { + struct nft_data data; + enum nft_registers sreg:8; + u8 len; + enum nft_cmp_ops op:8; +}; + +static void nft_cmp_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_cmp_expr *priv = nft_expr_priv(expr); + int d; + + d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len); + switch (priv->op) { + case NFT_CMP_EQ: + if (d != 0) + goto mismatch; + break; + case NFT_CMP_NEQ: + if (d == 0) + goto mismatch; + break; + case NFT_CMP_LT: + if (d == 0) + goto mismatch; + case NFT_CMP_LTE: + if (d > 0) + goto mismatch; + break; + case NFT_CMP_GT: + if (d == 0) + goto mismatch; + case NFT_CMP_GTE: + if (d < 0) + goto mismatch; + break; + } + return; + +mismatch: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = { + [NFTA_CMP_SREG] = { .type = NLA_U32 }, + [NFTA_CMP_OP] = { .type = NLA_U32 }, + [NFTA_CMP_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_cmp_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + int err; + + if (tb[NFTA_CMP_SREG] == NULL || + tb[NFTA_CMP_OP] == NULL || + tb[NFTA_CMP_DATA] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); + switch (priv->op) { + case NFT_CMP_EQ: + case NFT_CMP_NEQ: + case NFT_CMP_LT: + case NFT_CMP_LTE: + case NFT_CMP_GT: + case NFT_CMP_GTE: + break; + default: + return -EINVAL; + } + + err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); + if (err < 0) + return err; + + priv->len = desc.len; + return 0; +} + +static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_cmp_expr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_cmp_ops __read_mostly = { + .name = "cmp", + .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), + .owner = THIS_MODULE, + .eval = nft_cmp_eval, + .init = nft_cmp_init, + .dump = nft_cmp_dump, + .policy = nft_cmp_policy, + .maxattr = NFTA_CMP_MAX, +}; + +int __init nft_cmp_module_init(void) +{ + return nft_register_expr(&nft_cmp_ops); +} + +void nft_cmp_module_exit(void) +{ + nft_unregister_expr(&nft_cmp_ops); +} diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c new file mode 100644 index 000000000000..33c5d36819bb --- /dev/null +++ b/net/netfilter/nft_counter.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_counter { + seqlock_t lock; + u64 bytes; + u64 packets; +}; + +static void nft_counter_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_counter *priv = nft_expr_priv(expr); + + write_seqlock_bh(&priv->lock); + priv->bytes += pkt->skb->len; + priv->packets++; + write_sequnlock_bh(&priv->lock); +} + +static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_counter *priv = nft_expr_priv(expr); + unsigned int seq; + u64 bytes; + u64 packets; + + do { + seq = read_seqbegin(&priv->lock); + bytes = priv->bytes; + packets = priv->packets; + } while (read_seqretry(&priv->lock, seq)); + + if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes))) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { + [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, + [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, +}; + +static int nft_counter_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_counter *priv = nft_expr_priv(expr); + + if (tb[NFTA_COUNTER_PACKETS]) + priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + if (tb[NFTA_COUNTER_BYTES]) + priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + + seqlock_init(&priv->lock); + return 0; +} + +static struct nft_expr_ops nft_counter_ops __read_mostly = { + .name = "counter", + .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)), + .policy = nft_counter_policy, + .maxattr = NFTA_COUNTER_MAX, + .owner = THIS_MODULE, + .eval = nft_counter_eval, + .init = nft_counter_init, + .dump = nft_counter_dump, +}; + +static int __init nft_counter_module_init(void) +{ + return nft_register_expr(&nft_counter_ops); +} + +static void __exit nft_counter_module_exit(void) +{ + nft_unregister_expr(&nft_counter_ops); +} + +module_init(nft_counter_module_init); +module_exit(nft_counter_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("counter"); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c new file mode 100644 index 000000000000..a1756d678226 --- /dev/null +++ b/net/netfilter/nft_ct.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_ct { + enum nft_ct_keys key:8; + enum ip_conntrack_dir dir:8; + enum nft_registers dreg:8; + uint8_t family; +}; + +static void nft_ct_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + struct nft_data *dest = &data[priv->dreg]; + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + const struct nf_conn_help *help; + const struct nf_conntrack_tuple *tuple; + const struct nf_conntrack_helper *helper; + long diff; + unsigned int state; + + ct = nf_ct_get(pkt->skb, &ctinfo); + + switch (priv->key) { + case NFT_CT_STATE: + if (ct == NULL) + state = NF_CT_STATE_INVALID_BIT; + else if (nf_ct_is_untracked(ct)) + state = NF_CT_STATE_UNTRACKED_BIT; + else + state = NF_CT_STATE_BIT(ctinfo); + dest->data[0] = state; + return; + } + + if (ct == NULL) + goto err; + + switch (priv->key) { + case NFT_CT_DIRECTION: + dest->data[0] = CTINFO2DIR(ctinfo); + return; + case NFT_CT_STATUS: + dest->data[0] = ct->status; + return; +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: + dest->data[0] = ct->mark; + return; +#endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + case NFT_CT_SECMARK: + dest->data[0] = ct->secmark; + return; +#endif + case NFT_CT_EXPIRATION: + diff = (long)jiffies - (long)ct->timeout.expires; + if (diff < 0) + diff = 0; + dest->data[0] = jiffies_to_msecs(diff); + return; + case NFT_CT_HELPER: + if (ct->master == NULL) + goto err; + help = nfct_help(ct->master); + if (help == NULL) + goto err; + helper = rcu_dereference(help->helper); + if (helper == NULL) + goto err; + if (strlen(helper->name) >= sizeof(dest->data)) + goto err; + strncpy((char *)dest->data, helper->name, sizeof(dest->data)); + return; + } + + tuple = &ct->tuplehash[priv->dir].tuple; + switch (priv->key) { + case NFT_CT_L3PROTOCOL: + dest->data[0] = nf_ct_l3num(ct); + return; + case NFT_CT_SRC: + memcpy(dest->data, tuple->src.u3.all, + nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + return; + case NFT_CT_DST: + memcpy(dest->data, tuple->dst.u3.all, + nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + return; + case NFT_CT_PROTOCOL: + dest->data[0] = nf_ct_protonum(ct); + return; + case NFT_CT_PROTO_SRC: + dest->data[0] = (__force __u16)tuple->src.u.all; + return; + case NFT_CT_PROTO_DST: + dest->data[0] = (__force __u16)tuple->dst.u.all; + return; + } + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { + [NFTA_CT_DREG] = { .type = NLA_U32 }, + [NFTA_CT_KEY] = { .type = NLA_U32 }, + [NFTA_CT_DIRECTION] = { .type = NLA_U8 }, +}; + +static int nft_ct_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ct *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_CT_DREG] == NULL || + tb[NFTA_CT_KEY] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); + if (tb[NFTA_CT_DIRECTION] != NULL) { + priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); + switch (priv->dir) { + case IP_CT_DIR_ORIGINAL: + case IP_CT_DIR_REPLY: + break; + default: + return -EINVAL; + } + } + + switch (priv->key) { + case NFT_CT_STATE: + case NFT_CT_DIRECTION: + case NFT_CT_STATUS: +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: +#endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + case NFT_CT_SECMARK: +#endif + case NFT_CT_EXPIRATION: + case NFT_CT_HELPER: + if (tb[NFTA_CT_DIRECTION] != NULL) + return -EINVAL; + break; + case NFT_CT_PROTOCOL: + case NFT_CT_SRC: + case NFT_CT_DST: + case NFT_CT_PROTO_SRC: + case NFT_CT_PROTO_DST: + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + break; + default: + return -EOPNOTSUPP; + } + + err = nf_ct_l3proto_try_module_get(ctx->afi->family); + if (err < 0) + return err; + priv->family = ctx->afi->family; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + goto err1; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + goto err1; + return 0; + +err1: + nf_ct_l3proto_module_put(ctx->afi->family); + return err; +} + +static void nft_ct_destroy(const struct nft_expr *expr) +{ + struct nft_ct *priv = nft_expr_priv(expr); + + nf_ct_l3proto_module_put(priv->family); +} + +static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) + goto nla_put_failure; + if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_ct_ops __read_mostly = { + .name = "ct", + .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), + .owner = THIS_MODULE, + .eval = nft_ct_eval, + .init = nft_ct_init, + .destroy = nft_ct_destroy, + .dump = nft_ct_dump, + .policy = nft_ct_policy, + .maxattr = NFTA_CT_MAX, +}; + +static int __init nft_ct_module_init(void) +{ + return nft_register_expr(&nft_ct_ops); +} + +static void __exit nft_ct_module_exit(void) +{ + nft_unregister_expr(&nft_ct_ops); +} + +module_init(nft_ct_module_init); +module_exit(nft_ct_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("ct"); diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c new file mode 100644 index 000000000000..9fc8eb308193 --- /dev/null +++ b/net/netfilter/nft_expr_template.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include + +struct nft_template { + +}; + +static void nft_template_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_template *priv = nft_expr_priv(expr); + +} + +static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = { + [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 }, +}; + +static int nft_template_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *tb[]) +{ + struct nft_template *priv = nft_expr_priv(expr); + + return 0; +} + +static void nft_template_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_template *priv = nft_expr_priv(expr); + +} + +static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_template *priv = nft_expr_priv(expr); + + NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops template_ops __read_mostly = { + .name = "template", + .size = NFT_EXPR_SIZE(sizeof(struct nft_template)), + .owner = THIS_MODULE, + .eval = nft_template_eval, + .init = nft_template_init, + .destroy = nft_template_destroy, + .dump = nft_template_dump, + .policy = nft_template_policy, + .maxattr = NFTA_TEMPLATE_MAX, +}; + +static int __init nft_template_module_init(void) +{ + return nft_register_expr(&template_ops); +} + +static void __exit nft_template_module_exit(void) +{ + nft_unregister_expr(&template_ops); +} + +module_init(nft_template_module_init); +module_exit(nft_template_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("template"); diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c new file mode 100644 index 000000000000..21c6a6b7b662 --- /dev/null +++ b/net/netfilter/nft_exthdr.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +// FIXME: +#include + +struct nft_exthdr { + u8 type; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +static void nft_exthdr_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + struct nft_data *dest = &data[priv->dreg]; + unsigned int offset; + int err; + + err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); + if (err < 0) + goto err; + offset += priv->offset; + + if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0) + goto err; + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { + [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, + [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, + [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, + [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, +}; + +static int nft_exthdr_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_EXTHDR_DREG] == NULL || + tb[NFTA_EXTHDR_TYPE] == NULL || + tb[NFTA_EXTHDR_OFFSET] == NULL || + tb[NFTA_EXTHDR_LEN] == NULL) + return -EINVAL; + + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); + priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + if (priv->len == 0 || + priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops exthdr_ops __read_mostly = { + .name = "exthdr", + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .owner = THIS_MODULE, + .eval = nft_exthdr_eval, + .init = nft_exthdr_init, + .dump = nft_exthdr_dump, + .policy = nft_exthdr_policy, + .maxattr = NFTA_EXTHDR_MAX, +}; + +static int __init nft_exthdr_module_init(void) +{ + return nft_register_expr(&exthdr_ops); +} + +static void __exit nft_exthdr_module_exit(void) +{ + nft_unregister_expr(&exthdr_ops); +} + +module_init(nft_exthdr_module_init); +module_exit(nft_exthdr_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("exthdr"); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c new file mode 100644 index 000000000000..67cc502881f1 --- /dev/null +++ b/net/netfilter/nft_hash.c @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_hash { + struct hlist_head *hash; + unsigned int hsize; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 klen; + u8 dlen; + u16 flags; +}; + +struct nft_hash_elem { + struct hlist_node hnode; + struct nft_data key; + struct nft_data data[]; +}; + +static u32 nft_hash_rnd __read_mostly; +static bool nft_hash_rnd_initted __read_mostly; + +static unsigned int nft_hash_data(const struct nft_data *data, + unsigned int hsize, unsigned int len) +{ + unsigned int h; + + // FIXME: can we reasonably guarantee the upper bits are fixed? + h = jhash2(data->data, len >> 2, nft_hash_rnd); + return ((u64)h * hsize) >> 32; +} + +static void nft_hash_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct nft_hash_elem *elem; + const struct nft_data *key = &data[priv->sreg]; + unsigned int h; + + h = nft_hash_data(key, priv->hsize, priv->klen); + hlist_for_each_entry(elem, &priv->hash[h], hnode) { + if (nft_data_cmp(&elem->key, key, priv->klen)) + continue; + if (priv->flags & NFT_HASH_MAP) + nft_data_copy(&data[priv->dreg], elem->data); + return; + } + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static void nft_hash_elem_destroy(const struct nft_expr *expr, + struct nft_hash_elem *elem) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + + nft_data_uninit(&elem->key, NFT_DATA_VALUE); + if (priv->flags & NFT_HASH_MAP) + nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); + kfree(elem); +} + +static const struct nla_policy nft_he_policy[NFTA_HE_MAX + 1] = { + [NFTA_HE_KEY] = { .type = NLA_NESTED }, + [NFTA_HE_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_hash_elem_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *nla, + struct nft_hash_elem **new) +{ + struct nft_hash *priv = nft_expr_priv(expr); + struct nlattr *tb[NFTA_HE_MAX + 1]; + struct nft_hash_elem *elem; + struct nft_data_desc d1, d2; + unsigned int size; + int err; + + err = nla_parse_nested(tb, NFTA_HE_MAX, nla, nft_he_policy); + if (err < 0) + return err; + + if (tb[NFTA_HE_KEY] == NULL) + return -EINVAL; + size = sizeof(*elem); + + if (priv->flags & NFT_HASH_MAP) { + if (tb[NFTA_HE_DATA] == NULL) + return -EINVAL; + size += sizeof(elem->data[0]); + } else { + if (tb[NFTA_HE_DATA] != NULL) + return -EINVAL; + } + + elem = kzalloc(size, GFP_KERNEL); + if (elem == NULL) + return -ENOMEM; + + err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_HE_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) + goto err2; + + if (tb[NFTA_HE_DATA] != NULL) { + err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_HE_DATA]); + if (err < 0) + goto err2; + err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); + if (err < 0) + goto err3; + } + + *new = elem; + return 0; + +err3: + nft_data_uninit(elem->data, d2.type); +err2: + nft_data_uninit(&elem->key, d1.type); +err1: + kfree(elem); + return err; +} + +static int nft_hash_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, + const struct nft_hash_elem *elem) + +{ + const struct nft_hash *priv = nft_expr_priv(expr); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_HE_KEY, &elem->key, + NFT_DATA_VALUE, priv->klen) < 0) + goto nla_put_failure; + + if (priv->flags & NFT_HASH_MAP) { + if (nft_data_dump(skb, NFTA_HE_DATA, elem->data, + NFT_DATA_VALUE, priv->dlen) < 0) + goto nla_put_failure; + } + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static void nft_hash_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct hlist_node *next; + struct nft_hash_elem *elem; + unsigned int i; + + for (i = 0; i < priv->hsize; i++) { + hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { + hlist_del(&elem->hnode); + nft_hash_elem_destroy(expr, elem); + } + } + kfree(priv->hash); +} + +static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { + [NFTA_HASH_FLAGS] = { .type = NLA_U32 }, + [NFTA_HASH_SREG] = { .type = NLA_U32 }, + [NFTA_HASH_DREG] = { .type = NLA_U32 }, + [NFTA_HASH_KLEN] = { .type = NLA_U32 }, + [NFTA_HASH_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_hash *priv = nft_expr_priv(expr); + struct nft_hash_elem *elem, *uninitialized_var(new); + const struct nlattr *nla; + unsigned int cnt, i; + unsigned int h; + int err, rem; + + if (unlikely(!nft_hash_rnd_initted)) { + get_random_bytes(&nft_hash_rnd, 4); + nft_hash_rnd_initted = true; + } + + if (tb[NFTA_HASH_SREG] == NULL || + tb[NFTA_HASH_KLEN] == NULL || + tb[NFTA_HASH_ELEMENTS] == NULL) + return -EINVAL; + + if (tb[NFTA_HASH_FLAGS] != NULL) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_HASH_FLAGS])); + if (priv->flags & ~NFT_HASH_MAP) + return -EINVAL; + } + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_HASH_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_HASH_DREG] != NULL) { + if (!(priv->flags & NFT_HASH_MAP)) + return -EINVAL; + priv->dreg = ntohl(nla_get_be32(tb[NFTA_HASH_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + } + + priv->klen = ntohl(nla_get_be32(tb[NFTA_HASH_KLEN])); + if (priv->klen == 0) + return -EINVAL; + + cnt = 0; + nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { + if (nla_type(nla) != NFTA_LIST_ELEM) + return -EINVAL; + cnt++; + } + + /* Aim for a load factor of 0.75 */ + cnt = cnt * 4 / 3; + + priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL); + if (priv->hash == NULL) + return -ENOMEM; + priv->hsize = cnt; + + for (i = 0; i < cnt; i++) + INIT_HLIST_HEAD(&priv->hash[i]); + + err = -ENOMEM; + nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { + err = nft_hash_elem_init(ctx, expr, nla, &new); + if (err < 0) + goto err1; + + h = nft_hash_data(&new->key, priv->hsize, priv->klen); + hlist_for_each_entry(elem, &priv->hash[h], hnode) { + if (nft_data_cmp(&elem->key, &new->key, priv->klen)) + continue; + nft_hash_elem_destroy(expr, new); + err = -EEXIST; + goto err1; + } + hlist_add_head(&new->hnode, &priv->hash[h]); + } + return 0; + +err1: + nft_hash_destroy(ctx, expr); + return err; +} + +static int nft_hash_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct nft_hash_elem *elem; + struct nlattr *list; + unsigned int i; + + if (priv->flags) + if (nla_put_be32(skb, NFTA_HASH_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (priv->flags & NFT_HASH_MAP) + if (nla_put_be32(skb, NFTA_HASH_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_KLEN, htonl(priv->klen))) + goto nla_put_failure; + + list = nla_nest_start(skb, NFTA_HASH_ELEMENTS); + if (list == NULL) + goto nla_put_failure; + + for (i = 0; i < priv->hsize; i++) { + hlist_for_each_entry(elem, &priv->hash[i], hnode) { + if (nft_hash_elem_dump(skb, expr, elem) < 0) + goto nla_put_failure; + } + } + + nla_nest_end(skb, list); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_hash_ops __read_mostly = { + .name = "hash", + .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), + .owner = THIS_MODULE, + .eval = nft_hash_eval, + .init = nft_hash_init, + .destroy = nft_hash_destroy, + .dump = nft_hash_dump, + .policy = nft_hash_policy, + .maxattr = NFTA_HASH_MAX, +}; + +static int __init nft_hash_module_init(void) +{ + return nft_register_expr(&nft_hash_ops); +} + +static void __exit nft_hash_module_exit(void) +{ + nft_unregister_expr(&nft_hash_ops); +} + +module_init(nft_hash_module_init); +module_exit(nft_hash_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("hash"); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c new file mode 100644 index 000000000000..3bf42c3cc49a --- /dev/null +++ b/net/netfilter/nft_immediate.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_immediate_expr { + struct nft_data data; + enum nft_registers dreg:8; + u8 dlen; +}; + +static void nft_immediate_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + nft_data_copy(&data[priv->dreg], &priv->data); +} + +static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { + [NFTA_IMMEDIATE_DREG] = { .type = NLA_U32 }, + [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_immediate_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_immediate_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + int err; + + if (tb[NFTA_IMMEDIATE_DREG] == NULL || + tb[NFTA_IMMEDIATE_DATA] == NULL) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]); + if (err < 0) + return err; + priv->dlen = desc.len; + + err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type); + if (err < 0) + goto err1; + + return 0; + +err1: + nft_data_uninit(&priv->data, desc.type); + return err; +} + +static void nft_immediate_destroy(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); +} + +static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg))) + goto nla_put_failure; + + return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data, + nft_dreg_to_type(priv->dreg), priv->dlen); + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_imm_ops __read_mostly = { + .name = "immediate", + .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), + .owner = THIS_MODULE, + .eval = nft_immediate_eval, + .init = nft_immediate_init, + .destroy = nft_immediate_destroy, + .dump = nft_immediate_dump, + .policy = nft_immediate_policy, + .maxattr = NFTA_IMMEDIATE_MAX, +}; + +int __init nft_immediate_module_init(void) +{ + return nft_register_expr(&nft_imm_ops); +} + +void nft_immediate_module_exit(void) +{ + nft_unregister_expr(&nft_imm_ops); +} diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c new file mode 100644 index 000000000000..e0e3fc8aebc3 --- /dev/null +++ b/net/netfilter/nft_limit.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(limit_lock); + +struct nft_limit { + u64 tokens; + u64 rate; + u64 unit; + unsigned long stamp; +}; + +static void nft_limit_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + spin_lock_bh(&limit_lock); + if (time_after_eq(jiffies, priv->stamp)) { + priv->tokens = priv->rate; + priv->stamp = jiffies + priv->unit * HZ; + } + + if (priv->tokens >= 1) { + priv->tokens--; + spin_unlock_bh(&limit_lock); + return; + } + spin_unlock_bh(&limit_lock); + + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { + [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, + [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, +}; + +static int nft_limit_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + if (tb[NFTA_LIMIT_RATE] == NULL || + tb[NFTA_LIMIT_UNIT] == NULL) + return -EINVAL; + + priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); + priv->stamp = jiffies + priv->unit * HZ; + priv->tokens = priv->rate; + return 0; +} + +static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_limit *priv = nft_expr_priv(expr); + + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate))) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_limit_ops __read_mostly = { + .name = "limit", + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), + .owner = THIS_MODULE, + .eval = nft_limit_eval, + .init = nft_limit_init, + .dump = nft_limit_dump, + .policy = nft_limit_policy, + .maxattr = NFTA_LIMIT_MAX, +}; + +static int __init nft_limit_module_init(void) +{ + return nft_register_expr(&nft_limit_ops); +} + +static void __exit nft_limit_module_exit(void) +{ + nft_unregister_expr(&nft_limit_ops); +} + +module_init(nft_limit_module_init); +module_exit(nft_limit_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("limit"); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c new file mode 100644 index 000000000000..da495c3b1e7e --- /dev/null +++ b/net/netfilter/nft_log.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const char *nft_log_null_prefix = ""; + +struct nft_log { + struct nf_loginfo loginfo; + char *prefix; + int family; +}; + +static void nft_log_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_log *priv = nft_expr_priv(expr); + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in, + pkt->out, &priv->loginfo, "%s", priv->prefix); +} + +static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { + [NFTA_LOG_GROUP] = { .type = NLA_U16 }, + [NFTA_LOG_PREFIX] = { .type = NLA_STRING }, + [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, + [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, +}; + +static int nft_log_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_log *priv = nft_expr_priv(expr); + struct nf_loginfo *li = &priv->loginfo; + const struct nlattr *nla; + + priv->family = ctx->afi->family; + + nla = tb[NFTA_LOG_PREFIX]; + if (nla != NULL) { + priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL); + if (priv->prefix == NULL) + return -ENOMEM; + nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1); + } else + priv->prefix = (char *)nft_log_null_prefix; + + li->type = NF_LOG_TYPE_ULOG; + if (tb[NFTA_LOG_GROUP] != NULL) + li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); + + if (tb[NFTA_LOG_SNAPLEN] != NULL) + li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); + if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { + li->u.ulog.qthreshold = + ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); + } + + return 0; +} + +static void nft_log_destroy(const struct nft_expr *expr) +{ + struct nft_log *priv = nft_expr_priv(expr); + + if (priv->prefix != nft_log_null_prefix) + kfree(priv->prefix); +} + +static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_log *priv = nft_expr_priv(expr); + const struct nf_loginfo *li = &priv->loginfo; + + if (priv->prefix != nft_log_null_prefix) + if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix)) + goto nla_put_failure; + if (li->u.ulog.group) + if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) + goto nla_put_failure; + if (li->u.ulog.copy_len) + if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, + htonl(li->u.ulog.copy_len))) + goto nla_put_failure; + if (li->u.ulog.qthreshold) + if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, + htons(li->u.ulog.qthreshold))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_log_ops __read_mostly = { + .name = "log", + .size = NFT_EXPR_SIZE(sizeof(struct nft_log)), + .owner = THIS_MODULE, + .eval = nft_log_eval, + .init = nft_log_init, + .destroy = nft_log_destroy, + .dump = nft_log_dump, + .policy = nft_log_policy, + .maxattr = NFTA_LOG_MAX, +}; + +static int __init nft_log_module_init(void) +{ + return nft_register_expr(&nft_log_ops); +} + +static void __exit nft_log_module_exit(void) +{ + nft_unregister_expr(&nft_log_ops); +} + +module_init(nft_log_module_init); +module_exit(nft_log_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("log"); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c new file mode 100644 index 000000000000..96735aa2f039 --- /dev/null +++ b/net/netfilter/nft_meta.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for TCP_TIME_WAIT */ +#include + +struct nft_meta { + enum nft_meta_keys key:8; + enum nft_registers dreg:8; +}; + +static void nft_meta_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + const struct net_device *in = pkt->in, *out = pkt->out; + struct nft_data *dest = &data[priv->dreg]; + + switch (priv->key) { + case NFT_META_LEN: + dest->data[0] = skb->len; + break; + case NFT_META_PROTOCOL: + *(__be16 *)dest->data = skb->protocol; + break; + case NFT_META_PRIORITY: + dest->data[0] = skb->priority; + break; + case NFT_META_MARK: + dest->data[0] = skb->mark; + break; + case NFT_META_IIF: + if (in == NULL) + goto err; + dest->data[0] = in->ifindex; + break; + case NFT_META_OIF: + if (out == NULL) + goto err; + dest->data[0] = out->ifindex; + break; + case NFT_META_IIFNAME: + if (in == NULL) + goto err; + strncpy((char *)dest->data, in->name, sizeof(dest->data)); + break; + case NFT_META_OIFNAME: + if (out == NULL) + goto err; + strncpy((char *)dest->data, out->name, sizeof(dest->data)); + break; + case NFT_META_IIFTYPE: + if (in == NULL) + goto err; + *(u16 *)dest->data = in->type; + break; + case NFT_META_OIFTYPE: + if (out == NULL) + goto err; + *(u16 *)dest->data = out->type; + break; + case NFT_META_SKUID: + if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + goto err; + + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket == NULL || + skb->sk->sk_socket->file == NULL) { + read_unlock_bh(&skb->sk->sk_callback_lock); + goto err; + } + + dest->data[0] = + from_kuid_munged(&init_user_ns, + skb->sk->sk_socket->file->f_cred->fsuid); + read_unlock_bh(&skb->sk->sk_callback_lock); + break; + case NFT_META_SKGID: + if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + goto err; + + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket == NULL || + skb->sk->sk_socket->file == NULL) { + read_unlock_bh(&skb->sk->sk_callback_lock); + goto err; + } + dest->data[0] = + from_kgid_munged(&init_user_ns, + skb->sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&skb->sk->sk_callback_lock); + break; +#ifdef CONFIG_NET_CLS_ROUTE + case NFT_META_RTCLASSID: { + const struct dst_entry *dst = skb_dst(skb); + + if (dst == NULL) + goto err; + dest->data[0] = dst->tclassid; + break; + } +#endif +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: + dest->data[0] = skb->secmark; + break; +#endif + default: + WARN_ON(1); + goto err; + } + return; + +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { + [NFTA_META_DREG] = { .type = NLA_U32 }, + [NFTA_META_KEY] = { .type = NLA_U32 }, +}; + +static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_META_DREG] == NULL || + tb[NFTA_META_KEY] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_LEN: + case NFT_META_PROTOCOL: + case NFT_META_PRIORITY: + case NFT_META_MARK: + case NFT_META_IIF: + case NFT_META_OIF: + case NFT_META_IIFNAME: + case NFT_META_OIFNAME: + case NFT_META_IIFTYPE: + case NFT_META_OIFTYPE: + case NFT_META_SKUID: + case NFT_META_SKGID: +#ifdef CONFIG_NET_CLS_ROUTE + case NFT_META_RTCLASSID: +#endif +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: +#endif + break; + default: + return -EOPNOTSUPP; + } + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_meta_ops __read_mostly = { + .name = "meta", + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .owner = THIS_MODULE, + .eval = nft_meta_eval, + .init = nft_meta_init, + .dump = nft_meta_dump, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, +}; + +static int __init nft_meta_module_init(void) +{ + return nft_register_expr(&nft_meta_ops); +} + +static void __exit nft_meta_module_exit(void) +{ + nft_unregister_expr(&nft_meta_ops); +} + +module_init(nft_meta_module_init); +module_exit(nft_meta_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c new file mode 100644 index 000000000000..71177df75ffb --- /dev/null +++ b/net/netfilter/nft_meta_target.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_meta { + enum nft_meta_keys key; +}; + +static void nft_meta_eval(const struct nft_expr *expr, + struct nft_data *nfres, + struct nft_data *data, + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 val = data->data[0]; + + switch (meta->key) { + case NFT_META_MARK: + skb->mark = val; + break; + case NFT_META_PRIORITY: + skb->priority = val; + break; + case NFT_META_NFTRACE: + skb->nf_trace = val; + break; +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: + skb->secmark = val; + break; +#endif + default: + WARN_ON(1); + } +} + +static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { + [NFTA_META_KEY] = { .type = NLA_U32 }, +}; + +static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[]) +{ + struct nft_meta *meta = nft_expr_priv(expr); + + if (tb[NFTA_META_KEY] == NULL) + return -EINVAL; + + meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (meta->key) { + case NFT_META_MARK: + case NFT_META_PRIORITY: + case NFT_META_NFTRACE: +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: +#endif + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_meta *meta = nft_expr_priv(expr); + + NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key)); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops meta_target __read_mostly = { + .name = "meta", + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .owner = THIS_MODULE, + .eval = nft_meta_eval, + .init = nft_meta_init, + .dump = nft_meta_dump, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, +}; + +static int __init nft_meta_target_init(void) +{ + return nft_register_expr(&meta_target); +} + +static void __exit nft_meta_target_exit(void) +{ + nft_unregister_expr(&meta_target); +} + +module_init(nft_meta_target_init); +module_exit(nft_meta_target_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c new file mode 100644 index 000000000000..329f134b3f89 --- /dev/null +++ b/net/netfilter/nft_payload.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_payload { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +static void nft_payload_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + struct nft_data *dest = &data[priv->dreg]; + int offset; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; + offset = skb_mac_header(skb) - skb->data; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + offset = skb_transport_offset(skb); + break; + default: + BUG(); + } + offset += priv->offset; + + if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0) + goto err; + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { + [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, +}; + +static int nft_payload_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_PAYLOAD_DREG] == NULL || + tb[NFTA_PAYLOAD_BASE] == NULL || + tb[NFTA_PAYLOAD_OFFSET] == NULL || + tb[NFTA_PAYLOAD_LEN] == NULL) + return -EINVAL; + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + case NFT_PAYLOAD_NETWORK_HEADER: + case NFT_PAYLOAD_TRANSPORT_HEADER: + break; + default: + return -EOPNOTSUPP; + } + + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + if (priv->len == 0 || + priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) || + nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || + nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_payload_ops __read_mostly = { + .name = "payload", + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), + .owner = THIS_MODULE, + .eval = nft_payload_eval, + .init = nft_payload_init, + .dump = nft_payload_dump, + .policy = nft_payload_policy, + .maxattr = NFTA_PAYLOAD_MAX, +}; + +int __init nft_payload_module_init(void) +{ + return nft_register_expr(&nft_payload_ops); +} + +void nft_payload_module_exit(void) +{ + nft_unregister_expr(&nft_payload_ops); +} diff --git a/net/netfilter/nft_set.c b/net/netfilter/nft_set.c new file mode 100644 index 000000000000..7b7c8354c327 --- /dev/null +++ b/net/netfilter/nft_set.c @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_set { + struct rb_root root; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 klen; + u8 dlen; + u16 flags; +}; + +struct nft_set_elem { + struct rb_node node; + enum nft_set_elem_flags flags; + struct nft_data key; + struct nft_data data[]; +}; + +static void nft_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_set *priv = nft_expr_priv(expr); + const struct rb_node *parent = priv->root.rb_node; + const struct nft_set_elem *elem, *interval = NULL; + const struct nft_data *key = &data[priv->sreg]; + int d; + + while (parent != NULL) { + elem = rb_entry(parent, struct nft_set_elem, node); + + d = nft_data_cmp(&elem->key, key, priv->klen); + if (d < 0) { + parent = parent->rb_left; + interval = elem; + } else if (d > 0) + parent = parent->rb_right; + else { +found: + if (elem->flags & NFT_SE_INTERVAL_END) + goto out; + if (priv->flags & NFT_SET_MAP) + nft_data_copy(&data[priv->dreg], elem->data); + return; + } + } + + if (priv->flags & NFT_SET_INTERVAL && interval != NULL) { + elem = interval; + goto found; + } +out: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static void nft_set_elem_destroy(const struct nft_expr *expr, + struct nft_set_elem *elem) +{ + const struct nft_set *priv = nft_expr_priv(expr); + + nft_data_uninit(&elem->key, NFT_DATA_VALUE); + if (priv->flags & NFT_SET_MAP) + nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); + kfree(elem); +} + +static const struct nla_policy nft_se_policy[NFTA_SE_MAX + 1] = { + [NFTA_SE_KEY] = { .type = NLA_NESTED }, + [NFTA_SE_DATA] = { .type = NLA_NESTED }, + [NFTA_SE_FLAGS] = { .type = NLA_U32 }, +}; + +static int nft_set_elem_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *nla, + struct nft_set_elem **new) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nlattr *tb[NFTA_SE_MAX + 1]; + struct nft_set_elem *elem; + struct nft_data_desc d1, d2; + enum nft_set_elem_flags flags = 0; + unsigned int size; + int err; + + err = nla_parse_nested(tb, NFTA_SE_MAX, nla, nft_se_policy); + if (err < 0) + return err; + + if (tb[NFTA_SE_KEY] == NULL) + return -EINVAL; + + if (tb[NFTA_SE_FLAGS] != NULL) { + flags = ntohl(nla_get_be32(tb[NFTA_SE_FLAGS])); + if (flags & ~NFT_SE_INTERVAL_END) + return -EINVAL; + } + + size = sizeof(*elem); + if (priv->flags & NFT_SET_MAP) { + if (tb[NFTA_SE_DATA] == NULL && !(flags & NFT_SE_INTERVAL_END)) + return -EINVAL; + size += sizeof(elem->data[0]); + } else { + if (tb[NFTA_SE_DATA] != NULL) + return -EINVAL; + } + + elem = kzalloc(size, GFP_KERNEL); + if (elem == NULL) + return -ENOMEM; + elem->flags = flags; + + err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_SE_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) + goto err2; + + if (tb[NFTA_SE_DATA] != NULL) { + err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_SE_DATA]); + if (err < 0) + goto err2; + err = -EINVAL; + if (priv->dreg != NFT_REG_VERDICT && d2.len != priv->dlen) + goto err2; + err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); + if (err < 0) + goto err3; + } + + *new = elem; + return 0; + +err3: + nft_data_uninit(elem->data, d2.type); +err2: + nft_data_uninit(&elem->key, d1.type); +err1: + kfree(elem); + return err; +} + +static int nft_set_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, + const struct nft_set_elem *elem) + +{ + const struct nft_set *priv = nft_expr_priv(expr); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_SE_KEY, &elem->key, + NFT_DATA_VALUE, priv->klen) < 0) + goto nla_put_failure; + + if (priv->flags & NFT_SET_MAP && !(elem->flags & NFT_SE_INTERVAL_END)) { + if (nft_data_dump(skb, NFTA_SE_DATA, elem->data, + nft_dreg_to_type(priv->dreg), priv->dlen) < 0) + goto nla_put_failure; + } + + if (elem->flags){ + if (nla_put_be32(skb, NFTA_SE_FLAGS, htonl(elem->flags))) + goto nla_put_failure; + } + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static void nft_set_destroy(const struct nft_expr *expr) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nft_set_elem *elem; + struct rb_node *node; + + while ((node = priv->root.rb_node) != NULL) { + rb_erase(node, &priv->root); + elem = rb_entry(node, struct nft_set_elem, node); + nft_set_elem_destroy(expr, elem); + } +} + +static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { + [NFTA_SET_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_SREG] = { .type = NLA_U32 }, + [NFTA_SET_DREG] = { .type = NLA_U32 }, + [NFTA_SET_KLEN] = { .type = NLA_U32 }, + [NFTA_SET_DLEN] = { .type = NLA_U32 }, + [NFTA_SET_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nft_set_elem *elem, *uninitialized_var(new); + struct rb_node *parent, **p; + const struct nlattr *nla; + int err, rem, d; + + if (tb[NFTA_SET_SREG] == NULL || + tb[NFTA_SET_KLEN] == NULL || + tb[NFTA_SET_ELEMENTS] == NULL) + return -EINVAL; + + priv->root = RB_ROOT; + + if (tb[NFTA_SET_FLAGS] != NULL) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_SET_FLAGS])); + if (priv->flags & ~(NFT_SET_INTERVAL | NFT_SET_MAP)) + return -EINVAL; + } + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_SET_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_SET_DREG] != NULL) { + if (!(priv->flags & NFT_SET_MAP)) + return -EINVAL; + if (tb[NFTA_SET_DLEN] == NULL) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_SET_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + if (priv->dreg == NFT_REG_VERDICT) + priv->dlen = FIELD_SIZEOF(struct nft_data, data); + else { + priv->dlen = ntohl(nla_get_be32(tb[NFTA_SET_DLEN])); + if (priv->dlen == 0 || + priv->dlen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + } + } else { + if (priv->flags & NFT_SET_MAP) + return -EINVAL; + if (tb[NFTA_SET_DLEN] != NULL) + return -EINVAL; + } + + priv->klen = ntohl(nla_get_be32(tb[NFTA_SET_KLEN])); + if (priv->klen == 0 || + priv->klen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + nla_for_each_nested(nla, tb[NFTA_SET_ELEMENTS], rem) { + err = -EINVAL; + if (nla_type(nla) != NFTA_LIST_ELEM) + goto err1; + + err = nft_set_elem_init(ctx, expr, nla, &new); + if (err < 0) + goto err1; + + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + elem = rb_entry(parent, struct nft_set_elem, node); + d = nft_data_cmp(&elem->key, &new->key, priv->klen); + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else { + err = -EEXIST; + goto err2; + } + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &priv->root); + } + + return 0; + +err2: + nft_set_elem_destroy(expr, new); +err1: + nft_set_destroy(expr); + return err; +} + +static int nft_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_set *priv = nft_expr_priv(expr); + const struct nft_set_elem *elem; + struct rb_node *node; + struct nlattr *list; + + if (priv->flags) { + if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + } + + if (nla_put_be32(skb, NFTA_SET_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_KLEN, htonl(priv->klen))) + goto nla_put_failure; + + if (priv->flags & NFT_SET_MAP) { + if (nla_put_be32(skb, NFTA_SET_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_DLEN, htonl(priv->dlen))) + goto nla_put_failure; + } + + list = nla_nest_start(skb, NFTA_SET_ELEMENTS); + if (list == NULL) + goto nla_put_failure; + + for (node = rb_first(&priv->root); node; node = rb_next(node)) { + elem = rb_entry(node, struct nft_set_elem, node); + if (nft_set_elem_dump(skb, expr, elem) < 0) + goto nla_put_failure; + } + + nla_nest_end(skb, list); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_set_ops __read_mostly = { + .name = "set", + .size = NFT_EXPR_SIZE(sizeof(struct nft_set)), + .owner = THIS_MODULE, + .eval = nft_set_eval, + .init = nft_set_init, + .destroy = nft_set_destroy, + .dump = nft_set_dump, + .policy = nft_set_policy, + .maxattr = NFTA_SET_MAX, +}; + +static int __init nft_set_module_init(void) +{ + return nft_register_expr(&nft_set_ops); +} + +static void __exit nft_set_module_exit(void) +{ + nft_unregister_expr(&nft_set_ops); +} + +module_init(nft_set_module_init); +module_exit(nft_set_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("set"); -- cgit v1.2.3-55-g7522 From 20a69341f2d00cd042e81c82289fba8a13c05a25 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 11 Oct 2013 12:06:22 +0200 Subject: netfilter: nf_tables: add netlink set API This patch adds the new netlink API for maintaining nf_tables sets independently of the ruleset. The API supports the following operations: - creation of sets - deletion of sets - querying of specific sets - dumping of all sets - addition of set elements - removal of set elements - dumping of all set elements Sets are identified by name, each table defines an individual namespace. The name of a set may be allocated automatically, this is mostly useful in combination with the NFT_SET_ANONYMOUS flag, which destroys a set automatically once the last reference has been released. Sets can be marked constant, meaning they're not allowed to change while linked to a rule. This allows to perform lockless operation for set types that would otherwise require locking. Additionally, if the implementation supports it, sets can (as before) be used as maps, associating a data value with each key (or range), by specifying the NFT_SET_MAP flag and can be used for interval queries by specifying the NFT_SET_INTERVAL flag. Set elements are added and removed incrementally. All element operations support batching, reducing netlink message and set lookup overhead. The old "set" and "hash" expressions are replaced by a generic "lookup" expression, which binds to the specified set. Userspace is not aware of the actual set implementation used by the kernel anymore, all configuration options are generic. Currently the implementation selection logic is largely missing and the kernel will simply use the first registered implementation supporting the requested operation. Eventually, the plan is to have userspace supply a description of the data characteristics and select the implementation based on expected performance and memory use. This patch includes the new 'lookup' expression to look up for element matching in the set. This patch includes kernel-doc descriptions for this set API and it also includes the following fixes. From Patrick McHardy: * netfilter: nf_tables: fix set element data type in dumps * netfilter: nf_tables: fix indentation of struct nft_set_elem comments * netfilter: nf_tables: fix oops in nft_validate_data_load() * netfilter: nf_tables: fix oops while listing sets of built-in tables * netfilter: nf_tables: destroy anonymous sets immediately if binding fails * netfilter: nf_tables: propagate context to set iter callback * netfilter: nf_tables: add loop detection From Pablo Neira Ayuso: * netfilter: nf_tables: allow to dump all existing sets * netfilter: nf_tables: fix wrong type for flags variable in newelem Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 149 ++++- include/uapi/linux/netfilter/nf_tables.h | 191 ++++-- net/netfilter/Kconfig | 6 +- net/netfilter/Makefile | 2 +- net/netfilter/nf_tables_api.c | 1078 +++++++++++++++++++++++++++++- net/netfilter/nf_tables_core.c | 2 - net/netfilter/nft_hash.c | 329 +++------ net/netfilter/nft_immediate.c | 11 + net/netfilter/nft_lookup.c | 135 ++++ net/netfilter/nft_rbtree.c | 247 +++++++ net/netfilter/nft_set.c | 381 ----------- 11 files changed, 1854 insertions(+), 677 deletions(-) create mode 100644 net/netfilter/nft_lookup.c create mode 100644 net/netfilter/nft_rbtree.c delete mode 100644 net/netfilter/nft_set.c (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d26dfa345f49..677dd79380ed 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -6,6 +6,8 @@ #include #include +#define NFT_JUMP_STACK_SIZE 16 + struct nft_pktinfo { struct sk_buff *skb; const struct net_device *in; @@ -48,23 +50,22 @@ static inline void nft_data_debug(const struct nft_data *data) } /** - * struct nft_ctx - nf_tables rule context + * struct nft_ctx - nf_tables rule/set context * + * @skb: netlink skb + * @nlh: netlink message header * @afi: address family info * @table: the table the chain is contained in * @chain: the chain the rule is contained in */ struct nft_ctx { + const struct sk_buff *skb; + const struct nlmsghdr *nlh; const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; }; -enum nft_data_types { - NFT_DATA_VALUE, - NFT_DATA_VERDICT, -}; - struct nft_data_desc { enum nft_data_types type; unsigned int len; @@ -83,6 +84,11 @@ static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg) return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; } +static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) +{ + return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1; +} + extern int nft_validate_input_register(enum nft_registers reg); extern int nft_validate_output_register(enum nft_registers reg); extern int nft_validate_data_load(const struct nft_ctx *ctx, @@ -90,6 +96,132 @@ extern int nft_validate_data_load(const struct nft_ctx *ctx, const struct nft_data *data, enum nft_data_types type); +/** + * struct nft_set_elem - generic representation of set elements + * + * @cookie: implementation specific element cookie + * @key: element key + * @data: element data (maps only) + * @flags: element flags (end of interval) + * + * The cookie can be used to store a handle to the element for subsequent + * removal. + */ +struct nft_set_elem { + void *cookie; + struct nft_data key; + struct nft_data data; + u32 flags; +}; + +struct nft_set; +struct nft_set_iter { + unsigned int count; + unsigned int skip; + int err; + int (*fn)(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem); +}; + +/** + * struct nft_set_ops - nf_tables set operations + * + * @lookup: look up an element within the set + * @insert: insert new element into set + * @remove: remove element from set + * @walk: iterate over all set elemeennts + * @privsize: function to return size of set private data + * @init: initialize private data of new set instance + * @destroy: destroy private data of set instance + * @list: nf_tables_set_ops list node + * @owner: module reference + * @features: features supported by the implementation + */ +struct nft_set_ops { + bool (*lookup)(const struct nft_set *set, + const struct nft_data *key, + struct nft_data *data); + int (*get)(const struct nft_set *set, + struct nft_set_elem *elem); + int (*insert)(const struct nft_set *set, + const struct nft_set_elem *elem); + void (*remove)(const struct nft_set *set, + const struct nft_set_elem *elem); + void (*walk)(const struct nft_ctx *ctx, + const struct nft_set *set, + struct nft_set_iter *iter); + + unsigned int (*privsize)(const struct nlattr * const nla[]); + int (*init)(const struct nft_set *set, + const struct nlattr * const nla[]); + void (*destroy)(const struct nft_set *set); + + struct list_head list; + struct module *owner; + u32 features; +}; + +extern int nft_register_set(struct nft_set_ops *ops); +extern void nft_unregister_set(struct nft_set_ops *ops); + +/** + * struct nft_set - nf_tables set instance + * + * @list: table set list node + * @bindings: list of set bindings + * @name: name of the set + * @ktype: key type (numeric type defined by userspace, not used in the kernel) + * @dtype: data type (verdict or numeric type defined by userspace) + * @ops: set ops + * @flags: set flags + * @klen: key length + * @dlen: data length + * @data: private set data + */ +struct nft_set { + struct list_head list; + struct list_head bindings; + char name[IFNAMSIZ]; + u32 ktype; + u32 dtype; + /* runtime data below here */ + const struct nft_set_ops *ops ____cacheline_aligned; + u16 flags; + u8 klen; + u8 dlen; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); +}; + +static inline void *nft_set_priv(const struct nft_set *set) +{ + return (void *)set->data; +} + +extern struct nft_set *nf_tables_set_lookup(const struct nft_table *table, + const struct nlattr *nla); + +/** + * struct nft_set_binding - nf_tables set binding + * + * @list: set bindings list node + * @chain: chain containing the rule bound to the set + * + * A set binding contains all information necessary for validation + * of new elements added to a bound set. + */ +struct nft_set_binding { + struct list_head list; + const struct nft_chain *chain; +}; + +extern int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding); +extern void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding); + /** * struct nft_expr_ops - nf_tables expression operations * @@ -115,7 +247,7 @@ struct nft_expr_ops { void (*destroy)(const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, const struct nft_expr *expr); - + const struct nft_data * (*get_verdict)(const struct nft_expr *expr); struct list_head list; const char *name; struct module *owner; @@ -298,4 +430,7 @@ extern void nft_unregister_expr(struct nft_expr_ops *); #define MODULE_ALIAS_NFT_EXPR(name) \ MODULE_ALIAS("nft-expr-" name) +#define MODULE_ALIAS_NFT_SET() \ + MODULE_ALIAS("nft-set") + #endif /* _NET_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index ec6d84a8ed1e..9e924014efe3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -44,6 +44,12 @@ enum nft_verdicts { * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes) * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes) * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes) + * @NFT_MSG_NEWSET: create a new set (enum nft_set_attributes) + * @NFT_MSG_GETSET: get a set (enum nft_set_attributes) + * @NFT_MSG_DELSET: delete a set (enum nft_set_attributes) + * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes) + * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes) + * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -55,9 +61,20 @@ enum nf_tables_msg_types { NFT_MSG_NEWRULE, NFT_MSG_GETRULE, NFT_MSG_DELRULE, + NFT_MSG_NEWSET, + NFT_MSG_GETSET, + NFT_MSG_DELSET, + NFT_MSG_NEWSETELEM, + NFT_MSG_GETSETELEM, + NFT_MSG_DELSETELEM, NFT_MSG_MAX, }; +/** + * enum nft_list_attributes - nf_tables generic list netlink attributes + * + * @NFTA_LIST_ELEM: list element (NLA_NESTED) + */ enum nft_list_attributes { NFTA_LIST_UNPEC, NFTA_LIST_ELEM, @@ -127,6 +144,113 @@ enum nft_rule_attributes { }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) +/** + * enum nft_set_flags - nf_tables set flags + * + * @NFT_SET_ANONYMOUS: name allocation, automatic cleanup on unlink + * @NFT_SET_CONSTANT: set contents may not change while bound + * @NFT_SET_INTERVAL: set contains intervals + * @NFT_SET_MAP: set is used as a dictionary + */ +enum nft_set_flags { + NFT_SET_ANONYMOUS = 0x1, + NFT_SET_CONSTANT = 0x2, + NFT_SET_INTERVAL = 0x4, + NFT_SET_MAP = 0x8, +}; + +/** + * enum nft_set_attributes - nf_tables set netlink attributes + * + * @NFTA_SET_TABLE: table name (NLA_STRING) + * @NFTA_SET_NAME: set name (NLA_STRING) + * @NFTA_SET_FLAGS: bitmask of enum nft_set_flags (NLA_U32) + * @NFTA_SET_KEY_TYPE: key data type, informational purpose only (NLA_U32) + * @NFTA_SET_KEY_LEN: key data length (NLA_U32) + * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32) + * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32) + */ +enum nft_set_attributes { + NFTA_SET_UNSPEC, + NFTA_SET_TABLE, + NFTA_SET_NAME, + NFTA_SET_FLAGS, + NFTA_SET_KEY_TYPE, + NFTA_SET_KEY_LEN, + NFTA_SET_DATA_TYPE, + NFTA_SET_DATA_LEN, + __NFTA_SET_MAX +}; +#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) + +/** + * enum nft_set_elem_flags - nf_tables set element flags + * + * @NFT_SET_ELEM_INTERVAL_END: element ends the previous interval + */ +enum nft_set_elem_flags { + NFT_SET_ELEM_INTERVAL_END = 0x1, +}; + +/** + * enum nft_set_elem_attributes - nf_tables set element netlink attributes + * + * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data) + * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes) + * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32) + */ +enum nft_set_elem_attributes { + NFTA_SET_ELEM_UNSPEC, + NFTA_SET_ELEM_KEY, + NFTA_SET_ELEM_DATA, + NFTA_SET_ELEM_FLAGS, + __NFTA_SET_ELEM_MAX +}; +#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) + +/** + * enum nft_set_elem_list_attributes - nf_tables set element list netlink attributes + * + * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes) + */ +enum nft_set_elem_list_attributes { + NFTA_SET_ELEM_LIST_UNSPEC, + NFTA_SET_ELEM_LIST_TABLE, + NFTA_SET_ELEM_LIST_SET, + NFTA_SET_ELEM_LIST_ELEMENTS, + __NFTA_SET_ELEM_LIST_MAX +}; +#define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1) + +/** + * enum nft_data_types - nf_tables data types + * + * @NFT_DATA_VALUE: generic data + * @NFT_DATA_VERDICT: netfilter verdict + * + * The type of data is usually determined by the kernel directly and is not + * explicitly specified by userspace. The only difference are sets, where + * userspace specifies the key and mapping data types. + * + * The values 0xffffff00-0xffffffff are reserved for internally used types. + * The remaining range can be freely used by userspace to encode types, all + * values are equivalent to NFT_DATA_VALUE. + */ +enum nft_data_types { + NFT_DATA_VALUE, + NFT_DATA_VERDICT = 0xffffff00U, +}; + +#define NFT_DATA_RESERVED_MASK 0xffffff00U + +/** + * enum nft_data_attributes - nf_tables data netlink attributes + * + * @NFTA_DATA_VALUE: generic data (NLA_BINARY) + * @NFTA_DATA_VERDICT: nf_tables verdict (NLA_NESTED: nft_verdict_attributes) + */ enum nft_data_attributes { NFTA_DATA_UNSPEC, NFTA_DATA_VALUE, @@ -275,58 +399,21 @@ enum nft_cmp_attributes { }; #define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) -enum nft_set_elem_flags { - NFT_SE_INTERVAL_END = 0x1, -}; - -enum nft_set_elem_attributes { - NFTA_SE_UNSPEC, - NFTA_SE_KEY, - NFTA_SE_DATA, - NFTA_SE_FLAGS, - __NFTA_SE_MAX -}; -#define NFTA_SE_MAX (__NFTA_SE_MAX - 1) - -enum nft_set_flags { - NFT_SET_INTERVAL = 0x1, - NFT_SET_MAP = 0x2, -}; - -enum nft_set_attributes { - NFTA_SET_UNSPEC, - NFTA_SET_FLAGS, - NFTA_SET_SREG, - NFTA_SET_DREG, - NFTA_SET_KLEN, - NFTA_SET_DLEN, - NFTA_SET_ELEMENTS, - __NFTA_SET_MAX -}; -#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) - -enum nft_hash_flags { - NFT_HASH_MAP = 0x1, -}; - -enum nft_hash_elem_attributes { - NFTA_HE_UNSPEC, - NFTA_HE_KEY, - NFTA_HE_DATA, - __NFTA_HE_MAX -}; -#define NFTA_HE_MAX (__NFTA_HE_MAX - 1) - -enum nft_hash_attributes { - NFTA_HASH_UNSPEC, - NFTA_HASH_FLAGS, - NFTA_HASH_SREG, - NFTA_HASH_DREG, - NFTA_HASH_KLEN, - NFTA_HASH_ELEMENTS, - __NFTA_HASH_MAX -}; -#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) +/** + * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes + * + * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING) + * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers) + * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers) + */ +enum nft_lookup_attributes { + NFTA_LOOKUP_UNSPEC, + NFTA_LOOKUP_SET, + NFTA_LOOKUP_SREG, + NFTA_LOOKUP_DREG, + __NFTA_LOOKUP_MAX +}; +#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) /** * enum nft_payload_bases - nf_tables payload expression offset bases diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c271e1af93b5..aa184a46bbf3 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -430,13 +430,13 @@ config NFT_CT depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" -config NFT_SET +config NFT_RBTREE depends on NF_TABLES - tristate "Netfilter nf_tables set module" + tristate "Netfilter nf_tables rbtree set module" config NFT_HASH depends on NF_TABLES - tristate "Netfilter nf_tables hash module" + tristate "Netfilter nf_tables hash set module" config NFT_COUNTER depends on NF_TABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1ca3f3932826..b6b78754e4cc 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -75,7 +75,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o #nf_tables-objs += nft_meta_target.o -obj-$(CONFIG_NFT_SET) += nft_set.o +obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7d59c89c6c75..5092c817c222 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2008 Patrick McHardy + * Copyright (c) 2007-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -315,6 +315,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, nla_strlcpy(table->name, name, nla_len(name)); INIT_LIST_HEAD(&table->chains); + INIT_LIST_HEAD(&table->sets); list_add_tail(&table->list, &afi->tables); nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); @@ -409,6 +410,7 @@ again: } table->flags |= NFT_TABLE_BUILTIN; + INIT_LIST_HEAD(&table->sets); list_add_tail(&table->list, &afi->tables); nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family); list_for_each_entry(chain, &table->chains, list) @@ -820,10 +822,14 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, } static void nft_ctx_init(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nft_af_info *afi, const struct nft_table *table, const struct nft_chain *chain) { + ctx->skb = skb; + ctx->nlh = nlh; ctx->afi = afi; ctx->table = table; ctx->chain = chain; @@ -1301,7 +1307,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, rule->handle = handle; rule->dlen = size; - nft_ctx_init(&ctx, afi, table, chain); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain); expr = nft_expr_first(rule); for (i = 0; i < n; i++) { err = nf_tables_newexpr(&ctx, &info[i], expr); @@ -1392,6 +1398,939 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, return 0; } +/* + * Sets + */ + +static LIST_HEAD(nf_tables_set_ops); + +int nft_register_set(struct nft_set_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&ops->list, &nf_tables_set_ops); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_set); + +void nft_unregister_set(struct nft_set_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&ops->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_set); + +static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[]) +{ + const struct nft_set_ops *ops; + u32 features; + +#ifdef CONFIG_MODULES + if (list_empty(&nf_tables_set_ops)) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-set"); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (!list_empty(&nf_tables_set_ops)) + return ERR_PTR(-EAGAIN); + } +#endif + features = 0; + if (nla[NFTA_SET_FLAGS] != NULL) { + features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); + features &= NFT_SET_INTERVAL | NFT_SET_MAP; + } + + // FIXME: implement selection properly + list_for_each_entry(ops, &nf_tables_set_ops, list) { + if ((ops->features & features) != features) + continue; + if (!try_module_get(ops->owner)) + continue; + return ops; + } + + return ERR_PTR(-EOPNOTSUPP); +} + +static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { + [NFTA_SET_TABLE] = { .type = NLA_STRING }, + [NFTA_SET_NAME] = { .type = NLA_STRING }, + [NFTA_SET_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, + [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, + [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 }, + [NFTA_SET_DATA_LEN] = { .type = NLA_U32 }, +}; + +static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table = NULL; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + if (nla[NFTA_SET_TABLE] != NULL) { + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + } + + nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + return 0; +} + +struct nft_set *nf_tables_set_lookup(const struct nft_table *table, + const struct nlattr *nla) +{ + struct nft_set *set; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry(set, &table->sets, list) { + if (!nla_strcmp(nla, set->name)) + return set; + } + return ERR_PTR(-ENOENT); +} + +static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, + const char *name) +{ + const struct nft_set *i; + const char *p; + unsigned long *inuse; + unsigned int n = 0; + + p = strnchr(name, IFNAMSIZ, '%'); + if (p != NULL) { + if (p[1] != 'd' || strchr(p + 2, '%')) + return -EINVAL; + + inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (inuse == NULL) + return -ENOMEM; + + list_for_each_entry(i, &ctx->table->sets, list) { + if (!sscanf(i->name, name, &n)) + continue; + if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE) + continue; + set_bit(n, inuse); + } + + n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE); + free_page((unsigned long)inuse); + } + + snprintf(set->name, sizeof(set->name), name, n); + list_for_each_entry(i, &ctx->table->sets, list) { + if (!strcmp(set->name, i->name)) + return -ENFILE; + } + return 0; +} + +static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, + const struct nft_set *set, u16 event, u16 flags) +{ + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + u32 portid = NETLINK_CB(ctx->skb).portid; + u32 seq = ctx->nlh->nlmsg_seq; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = ctx->afi->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_SET_NAME, set->name)) + goto nla_put_failure; + if (set->flags != 0) + if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen))) + goto nla_put_failure; + if (set->flags & NFT_SET_MAP) { + if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen))) + goto nla_put_failure; + } + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_set_notify(const struct nft_ctx *ctx, + const struct nft_set *set, + int event) +{ + struct sk_buff *skb; + u32 portid = NETLINK_CB(ctx->skb).portid; + struct net *net = sock_net(ctx->skb->sk); + bool report; + int err; + + report = nlmsg_report(ctx->nlh); + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_set(skb, ctx, set, event, 0); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx = 0, s_idx = cb->args[0]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + goto done; + } +cont: + idx++; + } + cb->args[1] = 1; +done: + return skb->len; +} + +static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx = 0, s_idx = cb->args[0]; + struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(table, &ctx->afi->tables, list) { + if (cur_table && cur_table != table) + continue; + + ctx->table = table; + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + cb->args[2] = (unsigned long) table; + goto done; + } +cont: + idx++; + } + } + cb->args[1] = 1; +done: + return skb->len; +} + +static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nlattr *nla[NFTA_SET_MAX + 1]; + struct nft_ctx ctx; + int err, ret; + + err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX, + nft_set_policy); + if (err < 0) + return err; + + err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla); + if (err < 0) + return err; + + if (ctx.table == NULL) + ret = nf_tables_dump_sets_all(&ctx, skb, cb); + else + ret = nf_tables_dump_sets_table(&ctx, skb, cb); + + return ret; +} + +static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nft_set *set; + struct nft_ctx ctx; + struct sk_buff *skb2; + int err; + + /* Verify existance before starting dump */ + err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_sets, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) + return PTR_ERR(set); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_set_ops *ops; + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_set *set; + struct nft_ctx ctx; + char name[IFNAMSIZ]; + unsigned int size; + bool create; + u32 ktype, klen, dlen, dtype, flags; + int err; + + if (nla[NFTA_SET_TABLE] == NULL || + nla[NFTA_SET_NAME] == NULL || + nla[NFTA_SET_KEY_LEN] == NULL) + return -EINVAL; + + ktype = NFT_DATA_VALUE; + if (nla[NFTA_SET_KEY_TYPE] != NULL) { + ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); + if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) + return -EINVAL; + } + + klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); + if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + flags = 0; + if (nla[NFTA_SET_FLAGS] != NULL) { + flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); + if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | + NFT_SET_INTERVAL | NFT_SET_MAP)) + return -EINVAL; + } + + dtype = 0; + dlen = 0; + if (nla[NFTA_SET_DATA_TYPE] != NULL) { + if (!(flags & NFT_SET_MAP)) + return -EINVAL; + + dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); + if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && + dtype != NFT_DATA_VERDICT) + return -EINVAL; + + if (dtype != NFT_DATA_VERDICT) { + if (nla[NFTA_SET_DATA_LEN] == NULL) + return -EINVAL; + dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); + if (dlen == 0 || + dlen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + } else + dlen = sizeof(struct nft_data); + } else if (flags & NFT_SET_MAP) + return -EINVAL; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], create); + if (IS_ERR(table)) + return PTR_ERR(table); + + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL); + + set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) { + if (PTR_ERR(set) != -ENOENT) + return PTR_ERR(set); + set = NULL; + } + + if (set != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + return 0; + } + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -ENOENT; + + ops = nft_select_set_ops(nla); + if (IS_ERR(ops)) + return PTR_ERR(ops); + + size = 0; + if (ops->privsize != NULL) + size = ops->privsize(nla); + + err = -ENOMEM; + set = kzalloc(sizeof(*set) + size, GFP_KERNEL); + if (set == NULL) + goto err1; + + nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name)); + err = nf_tables_set_alloc_name(&ctx, set, name); + if (err < 0) + goto err2; + + INIT_LIST_HEAD(&set->bindings); + set->ops = ops; + set->ktype = ktype; + set->klen = klen; + set->dtype = dtype; + set->dlen = dlen; + set->flags = flags; + + err = ops->init(set, nla); + if (err < 0) + goto err2; + + list_add_tail(&set->list, &table->sets); + nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET); + return 0; + +err2: + kfree(set); +err1: + module_put(ops->owner); + return err; +} + +static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) +{ + list_del(&set->list); + if (!(set->flags & NFT_SET_ANONYMOUS)) + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); + + set->ops->destroy(set); + module_put(set->ops->owner); + kfree(set); +} + +static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + struct nft_set *set; + struct nft_ctx ctx; + int err; + + if (nla[NFTA_SET_TABLE] == NULL) + return -EINVAL; + + err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings)) + return -EBUSY; + + nf_tables_set_destroy(&ctx, set); + return 0; +} + +static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + enum nft_registers dreg; + + dreg = nft_type_to_reg(set->dtype); + return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype); +} + +int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding) +{ + struct nft_set_binding *i; + struct nft_set_iter iter; + + if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + return -EBUSY; + + if (set->flags & NFT_SET_MAP) { + /* If the set is already bound to the same chain all + * jumps are already validated for that chain. + */ + list_for_each_entry(i, &set->bindings, list) { + if (i->chain == binding->chain) + goto bind; + } + + iter.skip = 0; + iter.count = 0; + iter.err = 0; + iter.fn = nf_tables_bind_check_setelem; + + set->ops->walk(ctx, set, &iter); + if (iter.err < 0) { + /* Destroy anonymous sets if binding fails */ + if (set->flags & NFT_SET_ANONYMOUS) + nf_tables_set_destroy(ctx, set); + + return iter.err; + } + } +bind: + binding->chain = ctx->chain; + list_add_tail(&binding->list, &set->bindings); + return 0; +} + +void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding) +{ + list_del(&binding->list); + + if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + nf_tables_set_destroy(ctx, set); +} + +/* + * Set elements + */ + +static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { + [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, +}; + +static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { + [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING }, + [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING }, + [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + return 0; +} + +static int nf_tables_fill_setelem(struct sk_buff *skb, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE, + set->klen) < 0) + goto nla_put_failure; + + if (set->flags & NFT_SET_MAP && + !(elem->flags & NFT_SET_ELEM_INTERVAL_END) && + nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data, + set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, + set->dlen) < 0) + goto nla_put_failure; + + if (elem->flags != 0) + if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags))) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nlmsg_trim(skb, b); + return -EMSGSIZE; +} + +struct nft_set_dump_args { + const struct netlink_callback *cb; + struct nft_set_iter iter; + struct sk_buff *skb; +}; + +static int nf_tables_dump_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + struct nft_set_dump_args *args; + + args = container_of(iter, struct nft_set_dump_args, iter); + return nf_tables_fill_setelem(args->skb, set, elem); +} + +static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct nft_set *set; + struct nft_set_dump_args args; + struct nft_ctx ctx; + struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1]; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct nlattr *nest; + u32 portid, seq; + int event, err; + + nfmsg = nlmsg_data(cb->nlh); + err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX, + nft_set_elem_list_policy); + if (err < 0) + return err; + + err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + event = NFT_MSG_NEWSETELEM; + event |= NFNL_SUBSYS_NFTABLES << 8; + portid = NETLINK_CB(cb->skb).portid; + seq = cb->nlh->nlmsg_seq; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + NLM_F_MULTI); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = NFPROTO_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS); + if (nest == NULL) + goto nla_put_failure; + + args.cb = cb; + args.skb = skb; + args.iter.skip = cb->args[0]; + args.iter.count = 0; + args.iter.err = 0; + args.iter.fn = nf_tables_dump_setelem; + set->ops->walk(&ctx, set, &args.iter); + + nla_nest_end(skb, nest); + nlmsg_end(skb, nlh); + + if (args.iter.err && args.iter.err != -EMSGSIZE) + return args.iter.err; + if (args.iter.count == cb->args[0]) + return 0; + + cb->args[0] = args.iter.count; + return skb->len; + +nla_put_failure: + return -ENOSPC; +} + +static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nft_set *set; + struct nft_ctx ctx; + int err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_set, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + return -EOPNOTSUPP; +} + +static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +{ + struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + struct nft_data_desc d1, d2; + struct nft_set_elem elem; + struct nft_set_binding *binding; + enum nft_registers dreg; + int err; + + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy); + if (err < 0) + return err; + + if (nla[NFTA_SET_ELEM_KEY] == NULL) + return -EINVAL; + + elem.flags = 0; + if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { + elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); + if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + } + + if (set->flags & NFT_SET_MAP) { + if (nla[NFTA_SET_ELEM_DATA] == NULL && + !(elem.flags & NFT_SET_ELEM_INTERVAL_END)) + return -EINVAL; + } else { + if (nla[NFTA_SET_ELEM_DATA] != NULL) + return -EINVAL; + } + + err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) + goto err2; + + err = -EEXIST; + if (set->ops->get(set, &elem) == 0) + goto err2; + + if (nla[NFTA_SET_ELEM_DATA] != NULL) { + err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]); + if (err < 0) + goto err2; + + err = -EINVAL; + if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen) + goto err3; + + dreg = nft_type_to_reg(set->dtype); + list_for_each_entry(binding, &set->bindings, list) { + struct nft_ctx bind_ctx = { + .afi = ctx->afi, + .table = ctx->table, + .chain = binding->chain, + }; + + err = nft_validate_data_load(&bind_ctx, dreg, + &elem.data, d2.type); + if (err < 0) + goto err3; + } + } + + err = set->ops->insert(set, &elem); + if (err < 0) + goto err3; + + return 0; + +err3: + if (nla[NFTA_SET_ELEM_DATA] != NULL) + nft_data_uninit(&elem.data, d2.type); +err2: + nft_data_uninit(&elem.key, d1.type); +err1: + return err; +} + +static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nlattr *attr; + struct nft_set *set; + struct nft_ctx ctx; + int rem, err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + return -EBUSY; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_add_set_elem(&ctx, set, attr); + if (err < 0) + return err; + } + return 0; +} + +static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +{ + struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + struct nft_data_desc desc; + struct nft_set_elem elem; + int err; + + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy); + if (err < 0) + goto err1; + + err = -EINVAL; + if (nla[NFTA_SET_ELEM_KEY] == NULL) + goto err1; + + err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]); + if (err < 0) + goto err1; + + err = -EINVAL; + if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) + goto err2; + + err = set->ops->get(set, &elem); + if (err < 0) + goto err2; + + set->ops->remove(set, &elem); + + nft_data_uninit(&elem.key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(&elem.data, set->dtype); + +err2: + nft_data_uninit(&elem.key, desc.type); +err1: + return err; +} + +static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nlattr *attr; + struct nft_set *set; + struct nft_ctx ctx; + int rem, err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + return -EBUSY; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_del_setelem(&ctx, set, attr); + if (err < 0) + return err; + } + return 0; +} + static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_NEWTABLE] = { .call = nf_tables_newtable, @@ -1438,6 +2377,36 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, + [NFT_MSG_NEWSET] = { + .call = nf_tables_newset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_GETSET] = { + .call = nf_tables_getset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_DELSET] = { + .call = nf_tables_delset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_NEWSETELEM] = { + .call = nf_tables_newsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, + [NFT_MSG_GETSETELEM] = { + .call = nf_tables_getsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, + [NFT_MSG_DELSETELEM] = { + .call = nf_tables_delsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, }; static const struct nfnetlink_subsystem nf_tables_subsys = { @@ -1447,6 +2416,90 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .cb = nf_tables_cb, }; +/* + * Loop detection - walk through the ruleset beginning at the destination chain + * of a new jump until either the source chain is reached (loop) or all + * reachable chains have been traversed. + * + * The loop check is performed whenever a new jump verdict is added to an + * expression or verdict map or a verdict map is bound to a new chain. + */ + +static int nf_tables_check_loops(const struct nft_ctx *ctx, + const struct nft_chain *chain); + +static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + switch (elem->data.verdict) { + case NFT_JUMP: + case NFT_GOTO: + return nf_tables_check_loops(ctx, elem->data.chain); + default: + return 0; + } +} + +static int nf_tables_check_loops(const struct nft_ctx *ctx, + const struct nft_chain *chain) +{ + const struct nft_rule *rule; + const struct nft_expr *expr, *last; + const struct nft_data *data; + const struct nft_set *set; + struct nft_set_binding *binding; + struct nft_set_iter iter; + int err; + + if (ctx->chain == chain) + return -ELOOP; + + list_for_each_entry(rule, &chain->rules, list) { + nft_rule_for_each_expr(expr, last, rule) { + if (!expr->ops->get_verdict) + continue; + + data = expr->ops->get_verdict(expr); + if (data == NULL) + break; + + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + err = nf_tables_check_loops(ctx, data->chain); + if (err < 0) + return err; + default: + break; + } + } + } + + list_for_each_entry(set, &ctx->table->sets, list) { + if (!(set->flags & NFT_SET_MAP) || + set->dtype != NFT_DATA_VERDICT) + continue; + + list_for_each_entry(binding, &set->bindings, list) { + if (binding->chain != chain) + continue; + + iter.skip = 0; + iter.count = 0; + iter.err = 0; + iter.fn = nf_tables_loop_check_setelem; + + set->ops->walk(ctx, set, &iter); + if (iter.err < 0) + return iter.err; + } + } + + return 0; +} + /** * nft_validate_input_register - validate an expressions' input register * @@ -1500,11 +2553,25 @@ int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg, const struct nft_data *data, enum nft_data_types type) { + int err; + switch (reg) { case NFT_REG_VERDICT: if (data == NULL || type != NFT_DATA_VERDICT) return -EINVAL; - // FIXME: do loop detection + + if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) { + err = nf_tables_check_loops(ctx, data->chain); + if (err < 0) + return err; + + if (ctx->chain->level + 1 > data->chain->level) { + if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE) + return -EMLINK; + data->chain->level = ctx->chain->level + 1; + } + } + return 0; default: if (data != NULL && type != NFT_DATA_VALUE) @@ -1555,11 +2622,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (chain->flags & NFT_BASE_CHAIN) return -EOPNOTSUPP; - if (ctx->chain->level + 1 > chain->level) { - if (ctx->chain->level + 1 == 16) - return -EMLINK; - chain->level = ctx->chain->level + 1; - } chain->use++; data->chain = chain; desc->len = sizeof(data); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index bc7fb85d4002..fd0ecd3255c1 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -20,8 +20,6 @@ #include #include -#define NFT_JUMP_STACK_SIZE 16 - unsigned int nft_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 67cc502881f1..3d3f8fce10a5 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,11 +21,6 @@ struct nft_hash { struct hlist_head *hash; unsigned int hsize; - enum nft_registers sreg:8; - enum nft_registers dreg:8; - u8 klen; - u8 dlen; - u16 flags; }; struct nft_hash_elem { @@ -42,213 +37,140 @@ static unsigned int nft_hash_data(const struct nft_data *data, { unsigned int h; - // FIXME: can we reasonably guarantee the upper bits are fixed? - h = jhash2(data->data, len >> 2, nft_hash_rnd); + h = jhash(data->data, len, nft_hash_rnd); return ((u64)h * hsize) >> 32; } -static void nft_hash_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static bool nft_hash_lookup(const struct nft_set *set, + const struct nft_data *key, + struct nft_data *data) { - const struct nft_hash *priv = nft_expr_priv(expr); - const struct nft_hash_elem *elem; - const struct nft_data *key = &data[priv->sreg]; + const struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; unsigned int h; - h = nft_hash_data(key, priv->hsize, priv->klen); - hlist_for_each_entry(elem, &priv->hash[h], hnode) { - if (nft_data_cmp(&elem->key, key, priv->klen)) + h = nft_hash_data(key, priv->hsize, set->klen); + hlist_for_each_entry(he, &priv->hash[h], hnode) { + if (nft_data_cmp(&he->key, key, set->klen)) continue; - if (priv->flags & NFT_HASH_MAP) - nft_data_copy(&data[priv->dreg], elem->data); - return; + if (set->flags & NFT_SET_MAP) + nft_data_copy(data, he->data); + return true; } - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + return false; } -static void nft_hash_elem_destroy(const struct nft_expr *expr, - struct nft_hash_elem *elem) +static void nft_hash_elem_destroy(const struct nft_set *set, + struct nft_hash_elem *he) { - const struct nft_hash *priv = nft_expr_priv(expr); - - nft_data_uninit(&elem->key, NFT_DATA_VALUE); - if (priv->flags & NFT_HASH_MAP) - nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); - kfree(elem); + nft_data_uninit(&he->key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(he->data, set->dtype); + kfree(he); } -static const struct nla_policy nft_he_policy[NFTA_HE_MAX + 1] = { - [NFTA_HE_KEY] = { .type = NLA_NESTED }, - [NFTA_HE_DATA] = { .type = NLA_NESTED }, -}; - -static int nft_hash_elem_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr *nla, - struct nft_hash_elem **new) +static int nft_hash_insert(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_expr_priv(expr); - struct nlattr *tb[NFTA_HE_MAX + 1]; - struct nft_hash_elem *elem; - struct nft_data_desc d1, d2; - unsigned int size; - int err; + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + unsigned int size, h; - err = nla_parse_nested(tb, NFTA_HE_MAX, nla, nft_he_policy); - if (err < 0) - return err; - - if (tb[NFTA_HE_KEY] == NULL) + if (elem->flags != 0) return -EINVAL; - size = sizeof(*elem); - - if (priv->flags & NFT_HASH_MAP) { - if (tb[NFTA_HE_DATA] == NULL) - return -EINVAL; - size += sizeof(elem->data[0]); - } else { - if (tb[NFTA_HE_DATA] != NULL) - return -EINVAL; - } - elem = kzalloc(size, GFP_KERNEL); - if (elem == NULL) + size = sizeof(*he); + if (set->flags & NFT_SET_MAP) + size += sizeof(he->data[0]); + + he = kzalloc(size, GFP_KERNEL); + if (he == NULL) return -ENOMEM; - err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_HE_KEY]); - if (err < 0) - goto err1; - err = -EINVAL; - if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) - goto err2; - - if (tb[NFTA_HE_DATA] != NULL) { - err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_HE_DATA]); - if (err < 0) - goto err2; - err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); - if (err < 0) - goto err3; - } + nft_data_copy(&he->key, &elem->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(he->data, &elem->data); - *new = elem; + h = nft_hash_data(&he->key, priv->hsize, set->klen); + hlist_add_head_rcu(&he->hnode, &priv->hash[h]); return 0; - -err3: - nft_data_uninit(elem->data, d2.type); -err2: - nft_data_uninit(&elem->key, d1.type); -err1: - kfree(elem); - return err; } -static int nft_hash_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, - const struct nft_hash_elem *elem) - +static void nft_hash_remove(const struct nft_set *set, + const struct nft_set_elem *elem) { - const struct nft_hash *priv = nft_expr_priv(expr); - struct nlattr *nest; + struct nft_hash_elem *he = elem->cookie; - nest = nla_nest_start(skb, NFTA_LIST_ELEM); - if (nest == NULL) - goto nla_put_failure; - - if (nft_data_dump(skb, NFTA_HE_KEY, &elem->key, - NFT_DATA_VALUE, priv->klen) < 0) - goto nla_put_failure; + hlist_del_rcu(&he->hnode); + kfree(he); +} - if (priv->flags & NFT_HASH_MAP) { - if (nft_data_dump(skb, NFTA_HE_DATA, elem->data, - NFT_DATA_VALUE, priv->dlen) < 0) - goto nla_put_failure; - } +static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) +{ + const struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + unsigned int h; - nla_nest_end(skb, nest); - return 0; + h = nft_hash_data(&elem->key, priv->hsize, set->klen); + hlist_for_each_entry(he, &priv->hash[h], hnode) { + if (nft_data_cmp(&he->key, &elem->key, set->klen)) + continue; -nla_put_failure: - return -1; + elem->cookie = he; + elem->flags = 0; + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem->data, he->data); + return 0; + } + return -ENOENT; } -static void nft_hash_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, + struct nft_set_iter *iter) { - const struct nft_hash *priv = nft_expr_priv(expr); - const struct hlist_node *next; - struct nft_hash_elem *elem; + const struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; + struct nft_set_elem elem; unsigned int i; for (i = 0; i < priv->hsize; i++) { - hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { - hlist_del(&elem->hnode); - nft_hash_elem_destroy(expr, elem); + hlist_for_each_entry(he, &priv->hash[i], hnode) { + if (iter->count < iter->skip) + goto cont; + + memcpy(&elem.key, &he->key, sizeof(elem.key)); + if (set->flags & NFT_SET_MAP) + memcpy(&elem.data, he->data, sizeof(elem.data)); + elem.flags = 0; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + return; +cont: + iter->count++; } } - kfree(priv->hash); } -static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { - [NFTA_HASH_FLAGS] = { .type = NLA_U32 }, - [NFTA_HASH_SREG] = { .type = NLA_U32 }, - [NFTA_HASH_DREG] = { .type = NLA_U32 }, - [NFTA_HASH_KLEN] = { .type = NLA_U32 }, - [NFTA_HASH_ELEMENTS] = { .type = NLA_NESTED }, -}; +static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) +{ + return sizeof(struct nft_hash); +} -static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, +static int nft_hash_init(const struct nft_set *set, const struct nlattr * const tb[]) { - struct nft_hash *priv = nft_expr_priv(expr); - struct nft_hash_elem *elem, *uninitialized_var(new); - const struct nlattr *nla; + struct nft_hash *priv = nft_set_priv(set); unsigned int cnt, i; - unsigned int h; - int err, rem; if (unlikely(!nft_hash_rnd_initted)) { get_random_bytes(&nft_hash_rnd, 4); nft_hash_rnd_initted = true; } - if (tb[NFTA_HASH_SREG] == NULL || - tb[NFTA_HASH_KLEN] == NULL || - tb[NFTA_HASH_ELEMENTS] == NULL) - return -EINVAL; - - if (tb[NFTA_HASH_FLAGS] != NULL) { - priv->flags = ntohl(nla_get_be32(tb[NFTA_HASH_FLAGS])); - if (priv->flags & ~NFT_HASH_MAP) - return -EINVAL; - } - - priv->sreg = ntohl(nla_get_be32(tb[NFTA_HASH_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; - - if (tb[NFTA_HASH_DREG] != NULL) { - if (!(priv->flags & NFT_HASH_MAP)) - return -EINVAL; - priv->dreg = ntohl(nla_get_be32(tb[NFTA_HASH_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - } - - priv->klen = ntohl(nla_get_be32(tb[NFTA_HASH_KLEN])); - if (priv->klen == 0) - return -EINVAL; - - cnt = 0; - nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { - if (nla_type(nla) != NFTA_LIST_ELEM) - return -EINVAL; - cnt++; - } - /* Aim for a load factor of 0.75 */ + // FIXME: temporarily broken until we have set descriptions + cnt = 100; cnt = cnt * 4 / 3; priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL); @@ -259,85 +181,46 @@ static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, for (i = 0; i < cnt; i++) INIT_HLIST_HEAD(&priv->hash[i]); - err = -ENOMEM; - nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { - err = nft_hash_elem_init(ctx, expr, nla, &new); - if (err < 0) - goto err1; - - h = nft_hash_data(&new->key, priv->hsize, priv->klen); - hlist_for_each_entry(elem, &priv->hash[h], hnode) { - if (nft_data_cmp(&elem->key, &new->key, priv->klen)) - continue; - nft_hash_elem_destroy(expr, new); - err = -EEXIST; - goto err1; - } - hlist_add_head(&new->hnode, &priv->hash[h]); - } return 0; - -err1: - nft_hash_destroy(ctx, expr); - return err; } -static int nft_hash_dump(struct sk_buff *skb, const struct nft_expr *expr) +static void nft_hash_destroy(const struct nft_set *set) { - const struct nft_hash *priv = nft_expr_priv(expr); - const struct nft_hash_elem *elem; - struct nlattr *list; + const struct nft_hash *priv = nft_set_priv(set); + const struct hlist_node *next; + struct nft_hash_elem *elem; unsigned int i; - if (priv->flags) - if (nla_put_be32(skb, NFTA_HASH_FLAGS, htonl(priv->flags))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_HASH_SREG, htonl(priv->sreg))) - goto nla_put_failure; - if (priv->flags & NFT_HASH_MAP) - if (nla_put_be32(skb, NFTA_HASH_DREG, htonl(priv->dreg))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_HASH_KLEN, htonl(priv->klen))) - goto nla_put_failure; - - list = nla_nest_start(skb, NFTA_HASH_ELEMENTS); - if (list == NULL) - goto nla_put_failure; - for (i = 0; i < priv->hsize; i++) { - hlist_for_each_entry(elem, &priv->hash[i], hnode) { - if (nft_hash_elem_dump(skb, expr, elem) < 0) - goto nla_put_failure; + hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { + hlist_del(&elem->hnode); + nft_hash_elem_destroy(set, elem); } } - - nla_nest_end(skb, list); - return 0; - -nla_put_failure: - return -1; + kfree(priv->hash); } -static struct nft_expr_ops nft_hash_ops __read_mostly = { - .name = "hash", - .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), - .owner = THIS_MODULE, - .eval = nft_hash_eval, +static struct nft_set_ops nft_hash_ops __read_mostly = { + .privsize = nft_hash_privsize, .init = nft_hash_init, .destroy = nft_hash_destroy, - .dump = nft_hash_dump, - .policy = nft_hash_policy, - .maxattr = NFTA_HASH_MAX, + .get = nft_hash_get, + .insert = nft_hash_insert, + .remove = nft_hash_remove, + .lookup = nft_hash_lookup, + .walk = nft_hash_walk, + .features = NFT_SET_MAP, + .owner = THIS_MODULE, }; static int __init nft_hash_module_init(void) { - return nft_register_expr(&nft_hash_ops); + return nft_register_set(&nft_hash_ops); } static void __exit nft_hash_module_exit(void) { - nft_unregister_expr(&nft_hash_ops); + nft_unregister_set(&nft_hash_ops); } module_init(nft_hash_module_init); @@ -345,4 +228,4 @@ module_exit(nft_hash_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_EXPR("hash"); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 3bf42c3cc49a..78334bf37007 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -90,6 +90,16 @@ nla_put_failure: return -1; } +static const struct nft_data *nft_immediate_get_verdict(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg == NFT_REG_VERDICT) + return &priv->data; + else + return NULL; +} + static struct nft_expr_ops nft_imm_ops __read_mostly = { .name = "immediate", .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -98,6 +108,7 @@ static struct nft_expr_ops nft_imm_ops __read_mostly = { .init = nft_immediate_init, .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, + .get_verdict = nft_immediate_get_verdict, .policy = nft_immediate_policy, .maxattr = NFTA_IMMEDIATE_MAX, }; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c new file mode 100644 index 000000000000..4962d2173678 --- /dev/null +++ b/net/netfilter/nft_lookup.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2009 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_lookup { + struct nft_set *set; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + struct nft_set_binding binding; +}; + +static void nft_lookup_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_lookup *priv = nft_expr_priv(expr); + const struct nft_set *set = priv->set; + + if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg])) + return; + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { + [NFTA_LOOKUP_SET] = { .type = NLA_STRING }, + [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, + [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, +}; + +static int nft_lookup_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_lookup *priv = nft_expr_priv(expr); + struct nft_set *set; + int err; + + if (tb[NFTA_LOOKUP_SET] == NULL || + tb[NFTA_LOOKUP_SREG] == NULL) + return -EINVAL; + + set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_LOOKUP_DREG] != NULL) { + if (!(set->flags & NFT_SET_MAP)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + if (priv->dreg == NFT_REG_VERDICT) { + if (set->dtype != NFT_DATA_VERDICT) + return -EINVAL; + } else if (set->dtype == NFT_DATA_VERDICT) + return -EINVAL; + } else if (set->flags & NFT_SET_MAP) + return -EINVAL; + + err = nf_tables_bind_set(ctx, set, &priv->binding); + if (err < 0) + return err; + + priv->set = set; + return 0; +} + +static void nft_lookup_destroy(const struct nft_expr *expr) +{ + struct nft_lookup *priv = nft_expr_priv(expr); + + nf_tables_unbind_set(NULL, priv->set, &priv->binding); +} + +static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_lookup *priv = nft_expr_priv(expr); + + if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (priv->set->flags & NFT_SET_MAP) + if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_lookup_ops __read_mostly = { + .name = "lookup", + .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), + .owner = THIS_MODULE, + .eval = nft_lookup_eval, + .init = nft_lookup_init, + .destroy = nft_lookup_destroy, + .dump = nft_lookup_dump, + .policy = nft_lookup_policy, + .maxattr = NFTA_LOOKUP_MAX, +}; + +int __init nft_lookup_module_init(void) +{ + return nft_register_expr(&nft_lookup_ops); +} + +void nft_lookup_module_exit(void) +{ + nft_unregister_expr(&nft_lookup_ops); +} diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c new file mode 100644 index 000000000000..ca0c1b231bfe --- /dev/null +++ b/net/netfilter/nft_rbtree.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_rbtree { + struct rb_root root; +}; + +struct nft_rbtree_elem { + struct rb_node node; + u16 flags; + struct nft_data key; + struct nft_data data[]; +}; + +static bool nft_rbtree_lookup(const struct nft_set *set, + const struct nft_data *key, + struct nft_data *data) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct nft_rbtree_elem *rbe, *interval = NULL; + const struct rb_node *parent = priv->root.rb_node; + int d; + + while (parent != NULL) { + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + + d = nft_data_cmp(&rbe->key, key, set->klen); + if (d < 0) { + parent = parent->rb_left; + interval = rbe; + } else if (d > 0) + parent = parent->rb_right; + else { +found: + if (rbe->flags & NFT_SET_ELEM_INTERVAL_END) + goto out; + if (set->flags & NFT_SET_MAP) + nft_data_copy(data, rbe->data); + return true; + } + } + + if (set->flags & NFT_SET_INTERVAL && interval != NULL) { + rbe = interval; + goto found; + } +out: + return false; +} + +static void nft_rbtree_elem_destroy(const struct nft_set *set, + struct nft_rbtree_elem *rbe) +{ + nft_data_uninit(&rbe->key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(rbe->data, set->dtype); + kfree(rbe); +} + +static int __nft_rbtree_insert(const struct nft_set *set, + struct nft_rbtree_elem *new) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe; + struct rb_node *parent, **p; + int d; + + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + d = nft_data_cmp(&rbe->key, &new->key, set->klen); + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else + return -EEXIST; + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &priv->root); + return 0; +} + +static int nft_rbtree_insert(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree_elem *rbe; + unsigned int size; + int err; + + size = sizeof(*rbe); + if (set->flags & NFT_SET_MAP) + size += sizeof(rbe->data[0]); + + rbe = kzalloc(size, GFP_KERNEL); + if (rbe == NULL) + return -ENOMEM; + + rbe->flags = elem->flags; + nft_data_copy(&rbe->key, &elem->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(rbe->data, &elem->data); + + err = __nft_rbtree_insert(set, rbe); + if (err < 0) + kfree(rbe); + return err; +} + +static void nft_rbtree_remove(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe = elem->cookie; + + rb_erase(&rbe->node, &priv->root); + kfree(rbe); +} + +static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct rb_node *parent = priv->root.rb_node; + struct nft_rbtree_elem *rbe; + int d; + + while (parent != NULL) { + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + + d = nft_data_cmp(&rbe->key, &elem->key, set->klen); + if (d < 0) + parent = parent->rb_left; + else if (d > 0) + parent = parent->rb_right; + else { + elem->cookie = rbe; + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem->data, rbe->data); + elem->flags = rbe->flags; + return 0; + } + } + return -ENOENT; +} + +static void nft_rbtree_walk(const struct nft_ctx *ctx, + const struct nft_set *set, + struct nft_set_iter *iter) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct nft_rbtree_elem *rbe; + struct nft_set_elem elem; + struct rb_node *node; + + for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + if (iter->count < iter->skip) + goto cont; + + rbe = rb_entry(node, struct nft_rbtree_elem, node); + nft_data_copy(&elem.key, &rbe->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem.data, rbe->data); + elem.flags = rbe->flags; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + return; +cont: + iter->count++; + } +} + +static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) +{ + return sizeof(struct nft_rbtree); +} + +static int nft_rbtree_init(const struct nft_set *set, + const struct nlattr * const nla[]) +{ + struct nft_rbtree *priv = nft_set_priv(set); + + priv->root = RB_ROOT; + return 0; +} + +static void nft_rbtree_destroy(const struct nft_set *set) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe; + struct rb_node *node; + + while ((node = priv->root.rb_node) != NULL) { + rb_erase(node, &priv->root); + rbe = rb_entry(node, struct nft_rbtree_elem, node); + nft_rbtree_elem_destroy(set, rbe); + } +} + +static struct nft_set_ops nft_rbtree_ops __read_mostly = { + .privsize = nft_rbtree_privsize, + .init = nft_rbtree_init, + .destroy = nft_rbtree_destroy, + .insert = nft_rbtree_insert, + .remove = nft_rbtree_remove, + .get = nft_rbtree_get, + .lookup = nft_rbtree_lookup, + .walk = nft_rbtree_walk, + .features = NFT_SET_INTERVAL | NFT_SET_MAP, + .owner = THIS_MODULE, +}; + +static int __init nft_rbtree_module_init(void) +{ + return nft_register_set(&nft_rbtree_ops); +} + +static void __exit nft_rbtree_module_exit(void) +{ + nft_unregister_set(&nft_rbtree_ops); +} + +module_init(nft_rbtree_module_init); +module_exit(nft_rbtree_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_set.c b/net/netfilter/nft_set.c deleted file mode 100644 index 7b7c8354c327..000000000000 --- a/net/netfilter/nft_set.c +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct nft_set { - struct rb_root root; - enum nft_registers sreg:8; - enum nft_registers dreg:8; - u8 klen; - u8 dlen; - u16 flags; -}; - -struct nft_set_elem { - struct rb_node node; - enum nft_set_elem_flags flags; - struct nft_data key; - struct nft_data data[]; -}; - -static void nft_set_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) -{ - const struct nft_set *priv = nft_expr_priv(expr); - const struct rb_node *parent = priv->root.rb_node; - const struct nft_set_elem *elem, *interval = NULL; - const struct nft_data *key = &data[priv->sreg]; - int d; - - while (parent != NULL) { - elem = rb_entry(parent, struct nft_set_elem, node); - - d = nft_data_cmp(&elem->key, key, priv->klen); - if (d < 0) { - parent = parent->rb_left; - interval = elem; - } else if (d > 0) - parent = parent->rb_right; - else { -found: - if (elem->flags & NFT_SE_INTERVAL_END) - goto out; - if (priv->flags & NFT_SET_MAP) - nft_data_copy(&data[priv->dreg], elem->data); - return; - } - } - - if (priv->flags & NFT_SET_INTERVAL && interval != NULL) { - elem = interval; - goto found; - } -out: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; -} - -static void nft_set_elem_destroy(const struct nft_expr *expr, - struct nft_set_elem *elem) -{ - const struct nft_set *priv = nft_expr_priv(expr); - - nft_data_uninit(&elem->key, NFT_DATA_VALUE); - if (priv->flags & NFT_SET_MAP) - nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); - kfree(elem); -} - -static const struct nla_policy nft_se_policy[NFTA_SE_MAX + 1] = { - [NFTA_SE_KEY] = { .type = NLA_NESTED }, - [NFTA_SE_DATA] = { .type = NLA_NESTED }, - [NFTA_SE_FLAGS] = { .type = NLA_U32 }, -}; - -static int nft_set_elem_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr *nla, - struct nft_set_elem **new) -{ - struct nft_set *priv = nft_expr_priv(expr); - struct nlattr *tb[NFTA_SE_MAX + 1]; - struct nft_set_elem *elem; - struct nft_data_desc d1, d2; - enum nft_set_elem_flags flags = 0; - unsigned int size; - int err; - - err = nla_parse_nested(tb, NFTA_SE_MAX, nla, nft_se_policy); - if (err < 0) - return err; - - if (tb[NFTA_SE_KEY] == NULL) - return -EINVAL; - - if (tb[NFTA_SE_FLAGS] != NULL) { - flags = ntohl(nla_get_be32(tb[NFTA_SE_FLAGS])); - if (flags & ~NFT_SE_INTERVAL_END) - return -EINVAL; - } - - size = sizeof(*elem); - if (priv->flags & NFT_SET_MAP) { - if (tb[NFTA_SE_DATA] == NULL && !(flags & NFT_SE_INTERVAL_END)) - return -EINVAL; - size += sizeof(elem->data[0]); - } else { - if (tb[NFTA_SE_DATA] != NULL) - return -EINVAL; - } - - elem = kzalloc(size, GFP_KERNEL); - if (elem == NULL) - return -ENOMEM; - elem->flags = flags; - - err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_SE_KEY]); - if (err < 0) - goto err1; - err = -EINVAL; - if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) - goto err2; - - if (tb[NFTA_SE_DATA] != NULL) { - err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_SE_DATA]); - if (err < 0) - goto err2; - err = -EINVAL; - if (priv->dreg != NFT_REG_VERDICT && d2.len != priv->dlen) - goto err2; - err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); - if (err < 0) - goto err3; - } - - *new = elem; - return 0; - -err3: - nft_data_uninit(elem->data, d2.type); -err2: - nft_data_uninit(&elem->key, d1.type); -err1: - kfree(elem); - return err; -} - -static int nft_set_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, - const struct nft_set_elem *elem) - -{ - const struct nft_set *priv = nft_expr_priv(expr); - struct nlattr *nest; - - nest = nla_nest_start(skb, NFTA_LIST_ELEM); - if (nest == NULL) - goto nla_put_failure; - - if (nft_data_dump(skb, NFTA_SE_KEY, &elem->key, - NFT_DATA_VALUE, priv->klen) < 0) - goto nla_put_failure; - - if (priv->flags & NFT_SET_MAP && !(elem->flags & NFT_SE_INTERVAL_END)) { - if (nft_data_dump(skb, NFTA_SE_DATA, elem->data, - nft_dreg_to_type(priv->dreg), priv->dlen) < 0) - goto nla_put_failure; - } - - if (elem->flags){ - if (nla_put_be32(skb, NFTA_SE_FLAGS, htonl(elem->flags))) - goto nla_put_failure; - } - - nla_nest_end(skb, nest); - return 0; - -nla_put_failure: - return -1; -} - -static void nft_set_destroy(const struct nft_expr *expr) -{ - struct nft_set *priv = nft_expr_priv(expr); - struct nft_set_elem *elem; - struct rb_node *node; - - while ((node = priv->root.rb_node) != NULL) { - rb_erase(node, &priv->root); - elem = rb_entry(node, struct nft_set_elem, node); - nft_set_elem_destroy(expr, elem); - } -} - -static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { - [NFTA_SET_FLAGS] = { .type = NLA_U32 }, - [NFTA_SET_SREG] = { .type = NLA_U32 }, - [NFTA_SET_DREG] = { .type = NLA_U32 }, - [NFTA_SET_KLEN] = { .type = NLA_U32 }, - [NFTA_SET_DLEN] = { .type = NLA_U32 }, - [NFTA_SET_ELEMENTS] = { .type = NLA_NESTED }, -}; - -static int nft_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_set *priv = nft_expr_priv(expr); - struct nft_set_elem *elem, *uninitialized_var(new); - struct rb_node *parent, **p; - const struct nlattr *nla; - int err, rem, d; - - if (tb[NFTA_SET_SREG] == NULL || - tb[NFTA_SET_KLEN] == NULL || - tb[NFTA_SET_ELEMENTS] == NULL) - return -EINVAL; - - priv->root = RB_ROOT; - - if (tb[NFTA_SET_FLAGS] != NULL) { - priv->flags = ntohl(nla_get_be32(tb[NFTA_SET_FLAGS])); - if (priv->flags & ~(NFT_SET_INTERVAL | NFT_SET_MAP)) - return -EINVAL; - } - - priv->sreg = ntohl(nla_get_be32(tb[NFTA_SET_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; - - if (tb[NFTA_SET_DREG] != NULL) { - if (!(priv->flags & NFT_SET_MAP)) - return -EINVAL; - if (tb[NFTA_SET_DLEN] == NULL) - return -EINVAL; - - priv->dreg = ntohl(nla_get_be32(tb[NFTA_SET_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - if (priv->dreg == NFT_REG_VERDICT) - priv->dlen = FIELD_SIZEOF(struct nft_data, data); - else { - priv->dlen = ntohl(nla_get_be32(tb[NFTA_SET_DLEN])); - if (priv->dlen == 0 || - priv->dlen > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; - } - } else { - if (priv->flags & NFT_SET_MAP) - return -EINVAL; - if (tb[NFTA_SET_DLEN] != NULL) - return -EINVAL; - } - - priv->klen = ntohl(nla_get_be32(tb[NFTA_SET_KLEN])); - if (priv->klen == 0 || - priv->klen > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; - - nla_for_each_nested(nla, tb[NFTA_SET_ELEMENTS], rem) { - err = -EINVAL; - if (nla_type(nla) != NFTA_LIST_ELEM) - goto err1; - - err = nft_set_elem_init(ctx, expr, nla, &new); - if (err < 0) - goto err1; - - parent = NULL; - p = &priv->root.rb_node; - while (*p != NULL) { - parent = *p; - elem = rb_entry(parent, struct nft_set_elem, node); - d = nft_data_cmp(&elem->key, &new->key, priv->klen); - if (d < 0) - p = &parent->rb_left; - else if (d > 0) - p = &parent->rb_right; - else { - err = -EEXIST; - goto err2; - } - } - rb_link_node(&new->node, parent, p); - rb_insert_color(&new->node, &priv->root); - } - - return 0; - -err2: - nft_set_elem_destroy(expr, new); -err1: - nft_set_destroy(expr); - return err; -} - -static int nft_set_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - struct nft_set *priv = nft_expr_priv(expr); - const struct nft_set_elem *elem; - struct rb_node *node; - struct nlattr *list; - - if (priv->flags) { - if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(priv->flags))) - goto nla_put_failure; - } - - if (nla_put_be32(skb, NFTA_SET_SREG, htonl(priv->sreg))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_SET_KLEN, htonl(priv->klen))) - goto nla_put_failure; - - if (priv->flags & NFT_SET_MAP) { - if (nla_put_be32(skb, NFTA_SET_DREG, htonl(priv->dreg))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_SET_DLEN, htonl(priv->dlen))) - goto nla_put_failure; - } - - list = nla_nest_start(skb, NFTA_SET_ELEMENTS); - if (list == NULL) - goto nla_put_failure; - - for (node = rb_first(&priv->root); node; node = rb_next(node)) { - elem = rb_entry(node, struct nft_set_elem, node); - if (nft_set_elem_dump(skb, expr, elem) < 0) - goto nla_put_failure; - } - - nla_nest_end(skb, list); - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_ops nft_set_ops __read_mostly = { - .name = "set", - .size = NFT_EXPR_SIZE(sizeof(struct nft_set)), - .owner = THIS_MODULE, - .eval = nft_set_eval, - .init = nft_set_init, - .destroy = nft_set_destroy, - .dump = nft_set_dump, - .policy = nft_set_policy, - .maxattr = NFTA_SET_MAX, -}; - -static int __init nft_set_module_init(void) -{ - return nft_register_expr(&nft_set_ops); -} - -static void __exit nft_set_module_exit(void) -{ - nft_unregister_expr(&nft_set_ops); -} - -module_init(nft_set_module_init); -module_exit(nft_set_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_EXPR("set"); -- cgit v1.2.3-55-g7522 From ef1f7df9170dbd875ce198ba84e6ab80f6fc139e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 11:41:20 +0200 Subject: netfilter: nf_tables: expression ops overloading Split the expression ops into two parts and support overloading of the runtime expression ops based on the requested function through a ->select_ops() callback. This can be used to provide optimized implementations, for instance for loading small aligned amounts of data from the packet or inlining frequently used operations into the main evaluation loop. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 42 +++++++++----- net/ipv4/netfilter/nf_table_nat_ipv4.c | 18 ++++-- net/ipv4/netfilter/nft_reject_ipv4.c | 18 ++++-- net/netfilter/nf_tables_api.c | 101 +++++++++++++++++++-------------- net/netfilter/nft_bitwise.c | 18 ++++-- net/netfilter/nft_byteorder.c | 18 ++++-- net/netfilter/nft_cmp.c | 18 ++++-- net/netfilter/nft_counter.c | 22 ++++--- net/netfilter/nft_ct.c | 18 ++++-- net/netfilter/nft_expr_template.c | 20 ++++--- net/netfilter/nft_exthdr.c | 16 ++++-- net/netfilter/nft_immediate.c | 18 ++++-- net/netfilter/nft_limit.c | 18 ++++-- net/netfilter/nft_log.c | 18 ++++-- net/netfilter/nft_lookup.c | 16 ++++-- net/netfilter/nft_meta.c | 18 ++++-- net/netfilter/nft_payload.c | 18 ++++-- 17 files changed, 267 insertions(+), 148 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 677dd79380ed..66d0359702c6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -222,25 +222,45 @@ extern int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, extern void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); + /** - * struct nft_expr_ops - nf_tables expression operations + * struct nft_expr_type - nf_tables expression type * - * @eval: Expression evaluation function - * @init: initialization function - * @destroy: destruction function - * @dump: function to dump parameters + * @select_ops: function to select nft_expr_ops + * @ops: default ops, used when no select_ops functions is present * @list: used internally * @name: Identifier * @owner: module reference * @policy: netlink attribute policy * @maxattr: highest netlink attribute number + */ +struct nft_expr_type { + const struct nft_expr_ops *(*select_ops)(const struct nlattr * const tb[]); + const struct nft_expr_ops *ops; + struct list_head list; + const char *name; + struct module *owner; + const struct nla_policy *policy; + unsigned int maxattr; +}; + +/** + * struct nft_expr_ops - nf_tables expression operations + * + * @eval: Expression evaluation function * @size: full expression size, including private data size + * @init: initialization function + * @destroy: destruction function + * @dump: function to dump parameters + * @type: expression type */ struct nft_expr; struct nft_expr_ops { void (*eval)(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], const struct nft_pktinfo *pkt); + unsigned int size; + int (*init)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); @@ -248,14 +268,10 @@ struct nft_expr_ops { int (*dump)(struct sk_buff *skb, const struct nft_expr *expr); const struct nft_data * (*get_verdict)(const struct nft_expr *expr); - struct list_head list; - const char *name; - struct module *owner; - const struct nla_policy *policy; - unsigned int maxattr; - unsigned int size; + const struct nft_expr_type *type; }; +#define NFT_EXPR_MAXATTR 16 #define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \ ALIGN(size, __alignof__(struct nft_expr))) @@ -418,8 +434,8 @@ extern void nft_unregister_afinfo(struct nft_af_info *); extern int nft_register_table(struct nft_table *, int family); extern void nft_unregister_table(struct nft_table *, int family); -extern int nft_register_expr(struct nft_expr_ops *); -extern void nft_unregister_expr(struct nft_expr_ops *); +extern int nft_register_expr(struct nft_expr_type *); +extern void nft_unregister_expr(struct nft_expr_type *); #define MODULE_ALIAS_NFT_FAMILY(family) \ MODULE_ALIAS("nft-afinfo-" __stringify(family)) diff --git a/net/ipv4/netfilter/nf_table_nat_ipv4.c b/net/ipv4/netfilter/nf_table_nat_ipv4.c index 2a6f184c10bd..2ecce39077a3 100644 --- a/net/ipv4/netfilter/nf_table_nat_ipv4.c +++ b/net/ipv4/netfilter/nf_table_nat_ipv4.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -149,15 +149,21 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_nat_ops __read_mostly = { - .name = "nat", +static struct nft_expr_type nft_nat_type; +static const struct nft_expr_ops nft_nat_ops = { + .type = &nft_nat_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), - .owner = THIS_MODULE, .eval = nft_nat_eval, .init = nft_nat_init, .dump = nft_nat_dump, +}; + +static struct nft_expr_type nft_nat_type __read_mostly = { + .name = "nat", + .ops = &nft_nat_ops, .policy = nft_nat_policy, .maxattr = NFTA_NAT_MAX, + .owner = THIS_MODULE, }; /* @@ -382,7 +388,7 @@ static int __init nf_table_nat_init(void) if (err < 0) goto err1; - err = nft_register_expr(&nft_nat_ops); + err = nft_register_expr(&nft_nat_type); if (err < 0) goto err2; @@ -396,7 +402,7 @@ err1: static void __exit nf_table_nat_exit(void) { - nft_unregister_expr(&nft_nat_ops); + nft_unregister_expr(&nft_nat_type); nft_unregister_table(&nf_table_nat_ipv4, AF_INET); } diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index b4ee8d3bb1e4..fff5ba1a33b7 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -88,25 +88,31 @@ nla_put_failure: return -1; } -static struct nft_expr_ops reject_ops __read_mostly = { - .name = "reject", +static struct nft_expr_type nft_reject_type; +static const struct nft_expr_ops nft_reject_ops = { + .type = &nft_reject_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), - .owner = THIS_MODULE, .eval = nft_reject_eval, .init = nft_reject_init, .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_type __read_mostly = { + .name = "reject", + .ops = &nft_reject_ops, .policy = nft_reject_policy, .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, }; static int __init nft_reject_module_init(void) { - return nft_register_expr(&reject_ops); + return nft_register_expr(&nft_reject_type); } static void __exit nft_reject_module_exit(void) { - nft_unregister_expr(&reject_ops); + nft_unregister_expr(&nft_reject_type); } module_init(nft_reject_module_init); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5092c817c222..6dac9a3c9c40 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -840,64 +840,64 @@ static void nft_ctx_init(struct nft_ctx *ctx, */ /** - * nft_register_expr - register nf_tables expr operations - * @ops: expr operations + * nft_register_expr - register nf_tables expr type + * @ops: expr type * - * Registers the expr operations for use with nf_tables. Returns zero on + * Registers the expr type for use with nf_tables. Returns zero on * success or a negative errno code otherwise. */ -int nft_register_expr(struct nft_expr_ops *ops) +int nft_register_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&ops->list, &nf_tables_expressions); + list_add_tail(&type->list, &nf_tables_expressions); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } EXPORT_SYMBOL_GPL(nft_register_expr); /** - * nft_unregister_expr - unregister nf_tables expr operations - * @ops: expr operations + * nft_unregister_expr - unregister nf_tables expr type + * @ops: expr type * - * Unregisters the expr operations for use with nf_tables. + * Unregisters the expr typefor use with nf_tables. */ -void nft_unregister_expr(struct nft_expr_ops *ops) +void nft_unregister_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&ops->list); + list_del(&type->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_expr); -static const struct nft_expr_ops *__nft_expr_ops_get(struct nlattr *nla) +static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla) { - const struct nft_expr_ops *ops; + const struct nft_expr_type *type; - list_for_each_entry(ops, &nf_tables_expressions, list) { - if (!nla_strcmp(nla, ops->name)) - return ops; + list_for_each_entry(type, &nf_tables_expressions, list) { + if (!nla_strcmp(nla, type->name)) + return type; } return NULL; } -static const struct nft_expr_ops *nft_expr_ops_get(struct nlattr *nla) +static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla) { - const struct nft_expr_ops *ops; + const struct nft_expr_type *type; if (nla == NULL) return ERR_PTR(-EINVAL); - ops = __nft_expr_ops_get(nla); - if (ops != NULL && try_module_get(ops->owner)) - return ops; + type = __nft_expr_type_get(nla); + if (type != NULL && try_module_get(type->owner)) + return type; #ifdef CONFIG_MODULES - if (ops == NULL) { + if (type == NULL) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-expr-%.*s", nla_len(nla), (char *)nla_data(nla)); nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (__nft_expr_ops_get(nla)) + if (__nft_expr_type_get(nla)) return ERR_PTR(-EAGAIN); } #endif @@ -912,7 +912,7 @@ static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = { static int nf_tables_fill_expr_info(struct sk_buff *skb, const struct nft_expr *expr) { - if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->name)) + if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name)) goto nla_put_failure; if (expr->ops->dump) { @@ -932,28 +932,52 @@ nla_put_failure: struct nft_expr_info { const struct nft_expr_ops *ops; - struct nlattr *tb[NFTA_EXPR_MAX + 1]; + struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; static int nf_tables_expr_parse(const struct nlattr *nla, struct nft_expr_info *info) { + const struct nft_expr_type *type; const struct nft_expr_ops *ops; + struct nlattr *tb[NFTA_EXPR_MAX + 1]; int err; - err = nla_parse_nested(info->tb, NFTA_EXPR_MAX, nla, nft_expr_policy); + err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy); if (err < 0) return err; - ops = nft_expr_ops_get(info->tb[NFTA_EXPR_NAME]); - if (IS_ERR(ops)) - return PTR_ERR(ops); + type = nft_expr_type_get(tb[NFTA_EXPR_NAME]); + if (IS_ERR(type)) + return PTR_ERR(type); + + if (tb[NFTA_EXPR_DATA]) { + err = nla_parse_nested(info->tb, type->maxattr, + tb[NFTA_EXPR_DATA], type->policy); + if (err < 0) + goto err1; + } else + memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1)); + + if (type->select_ops != NULL) { + ops = type->select_ops((const struct nlattr * const *)info->tb); + if (IS_ERR(ops)) { + err = PTR_ERR(ops); + goto err1; + } + } else + ops = type->ops; + info->ops = ops; return 0; + +err1: + module_put(type->owner); + return err; } static int nf_tables_newexpr(const struct nft_ctx *ctx, - struct nft_expr_info *info, + const struct nft_expr_info *info, struct nft_expr *expr) { const struct nft_expr_ops *ops = info->ops; @@ -961,23 +985,11 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx, expr->ops = ops; if (ops->init) { - struct nlattr *ma[ops->maxattr + 1]; - - if (info->tb[NFTA_EXPR_DATA]) { - err = nla_parse_nested(ma, ops->maxattr, - info->tb[NFTA_EXPR_DATA], - ops->policy); - if (err < 0) - goto err1; - } else - memset(ma, 0, sizeof(ma[0]) * (ops->maxattr + 1)); - - err = ops->init(ctx, expr, (const struct nlattr **)ma); + err = ops->init(ctx, expr, (const struct nlattr **)info->tb); if (err < 0) goto err1; } - info->ops = NULL; return 0; err1: @@ -989,7 +1001,7 @@ static void nf_tables_expr_destroy(struct nft_expr *expr) { if (expr->ops->destroy) expr->ops->destroy(expr); - module_put(expr->ops->owner); + module_put(expr->ops->type->owner); } /* @@ -1313,6 +1325,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, err = nf_tables_newexpr(&ctx, &info[i], expr); if (err < 0) goto err2; + info[i].ops = NULL; expr = nft_expr_next(expr); } @@ -1341,7 +1354,7 @@ err2: err1: for (i = 0; i < n; i++) { if (info[i].ops != NULL) - module_put(info[i].ops->owner); + module_put(info[i].ops->type->owner); } return err; } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 0f7501506367..4fb6ee2c1106 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -118,23 +118,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_bitwise_ops __read_mostly = { - .name = "bitwise", +static struct nft_expr_type nft_bitwise_type; +static const struct nft_expr_ops nft_bitwise_ops = { + .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), - .owner = THIS_MODULE, .eval = nft_bitwise_eval, .init = nft_bitwise_init, .dump = nft_bitwise_dump, +}; + +static struct nft_expr_type nft_bitwise_type __read_mostly = { + .name = "bitwise", + .ops = &nft_bitwise_ops, .policy = nft_bitwise_policy, .maxattr = NFTA_BITWISE_MAX, + .owner = THIS_MODULE, }; int __init nft_bitwise_module_init(void) { - return nft_register_expr(&nft_bitwise_ops); + return nft_register_expr(&nft_bitwise_type); } void nft_bitwise_module_exit(void) { - nft_unregister_expr(&nft_bitwise_ops); + nft_unregister_expr(&nft_bitwise_type); } diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 8b0657a4d17b..c39ed8d29df1 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -145,23 +145,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_byteorder_ops __read_mostly = { - .name = "byteorder", +static struct nft_expr_type nft_byteorder_type; +static const struct nft_expr_ops nft_byteorder_ops = { + .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), - .owner = THIS_MODULE, .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, +}; + +static struct nft_expr_type nft_byteorder_type __read_mostly = { + .name = "byteorder", + .ops = &nft_byteorder_ops, .policy = nft_byteorder_policy, .maxattr = NFTA_BYTEORDER_MAX, + .owner = THIS_MODULE, }; int __init nft_byteorder_module_init(void) { - return nft_register_expr(&nft_byteorder_ops); + return nft_register_expr(&nft_byteorder_type); } void nft_byteorder_module_exit(void) { - nft_unregister_expr(&nft_byteorder_ops); + nft_unregister_expr(&nft_byteorder_type); } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index e734d670120a..2c9d5fef2e63 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -124,23 +124,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_cmp_ops __read_mostly = { - .name = "cmp", +static struct nft_expr_type nft_cmp_type; +static const struct nft_expr_ops nft_cmp_ops = { + .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), - .owner = THIS_MODULE, .eval = nft_cmp_eval, .init = nft_cmp_init, .dump = nft_cmp_dump, +}; + +static struct nft_expr_type nft_cmp_type __read_mostly = { + .name = "cmp", + .ops = &nft_cmp_ops, .policy = nft_cmp_policy, .maxattr = NFTA_CMP_MAX, + .owner = THIS_MODULE, }; int __init nft_cmp_module_init(void) { - return nft_register_expr(&nft_cmp_ops); + return nft_register_expr(&nft_cmp_type); } void nft_cmp_module_exit(void) { - nft_unregister_expr(&nft_cmp_ops); + nft_unregister_expr(&nft_cmp_type); } diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 33c5d36819bb..c89ee486ce54 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -78,25 +78,31 @@ static int nft_counter_init(const struct nft_ctx *ctx, return 0; } -static struct nft_expr_ops nft_counter_ops __read_mostly = { - .name = "counter", +static struct nft_expr_type nft_counter_type; +static const struct nft_expr_ops nft_counter_ops = { + .type = &nft_counter_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)), - .policy = nft_counter_policy, - .maxattr = NFTA_COUNTER_MAX, - .owner = THIS_MODULE, .eval = nft_counter_eval, .init = nft_counter_init, .dump = nft_counter_dump, }; +static struct nft_expr_type nft_counter_type __read_mostly = { + .name = "counter", + .ops = &nft_counter_ops, + .policy = nft_counter_policy, + .maxattr = NFTA_COUNTER_MAX, + .owner = THIS_MODULE, +}; + static int __init nft_counter_module_init(void) { - return nft_register_expr(&nft_counter_ops); + return nft_register_expr(&nft_counter_type); } static void __exit nft_counter_module_exit(void) { - nft_unregister_expr(&nft_counter_ops); + nft_unregister_expr(&nft_counter_type); } module_init(nft_counter_module_init); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a1756d678226..955f4e6e7089 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -222,26 +222,32 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_ct_ops __read_mostly = { - .name = "ct", +static struct nft_expr_type nft_ct_type; +static const struct nft_expr_ops nft_ct_ops = { + .type = &nft_ct_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), - .owner = THIS_MODULE, .eval = nft_ct_eval, .init = nft_ct_init, .destroy = nft_ct_destroy, .dump = nft_ct_dump, +}; + +static struct nft_expr_type nft_ct_type __read_mostly = { + .name = "ct", + .ops = &nft_ct_ops, .policy = nft_ct_policy, .maxattr = NFTA_CT_MAX, + .owner = THIS_MODULE, }; static int __init nft_ct_module_init(void) { - return nft_register_expr(&nft_ct_ops); + return nft_register_expr(&nft_ct_type); } static void __exit nft_ct_module_exit(void) { - nft_unregister_expr(&nft_ct_ops); + nft_unregister_expr(&nft_ct_type); } module_init(nft_ct_module_init); diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c index 9fc8eb308193..b6eed4d5a096 100644 --- a/net/netfilter/nft_expr_template.c +++ b/net/netfilter/nft_expr_template.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -33,7 +33,7 @@ static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = { static int nft_template_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr *tb[]) + const struct nlattr * const tb[]) { struct nft_template *priv = nft_expr_priv(expr); @@ -58,26 +58,32 @@ nla_put_failure: return -1; } -static struct nft_expr_ops template_ops __read_mostly = { - .name = "template", +static struct nft_expr_type nft_template_type; +static const struct nft_expr_ops nft_template_ops = { + .type = &nft_template_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_template)), - .owner = THIS_MODULE, .eval = nft_template_eval, .init = nft_template_init, .destroy = nft_template_destroy, .dump = nft_template_dump, +}; + +static struct nft_expr_type nft_template_type __read_mostly = { + .name = "template", + .ops = &nft_template_ops, .policy = nft_template_policy, .maxattr = NFTA_TEMPLATE_MAX, + .owner = THIS_MODULE, }; static int __init nft_template_module_init(void) { - return nft_register_expr(&template_ops); + return nft_register_expr(&nft_template_type); } static void __exit nft_template_module_exit(void) { - nft_unregister_expr(&template_ops); + nft_unregister_expr(&nft_template_type); } module_init(nft_template_module_init); diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 21c6a6b7b662..8e0bb75e7c51 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -98,25 +98,31 @@ nla_put_failure: return -1; } -static struct nft_expr_ops exthdr_ops __read_mostly = { - .name = "exthdr", +static struct nft_expr_type nft_exthdr_type; +static const struct nft_expr_ops nft_exthdr_ops = { + .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), - .owner = THIS_MODULE, .eval = nft_exthdr_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, +}; + +static struct nft_expr_type nft_exthdr_type __read_mostly = { + .name = "exthdr", + .ops = &nft_exthdr_ops, .policy = nft_exthdr_policy, .maxattr = NFTA_EXTHDR_MAX, + .owner = THIS_MODULE, }; static int __init nft_exthdr_module_init(void) { - return nft_register_expr(&exthdr_ops); + return nft_register_expr(&nft_exthdr_type); } static void __exit nft_exthdr_module_exit(void) { - nft_unregister_expr(&exthdr_ops); + nft_unregister_expr(&nft_exthdr_type); } module_init(nft_exthdr_module_init); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 78334bf37007..1bfeeaf865b6 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -100,25 +100,31 @@ static const struct nft_data *nft_immediate_get_verdict(const struct nft_expr *e return NULL; } -static struct nft_expr_ops nft_imm_ops __read_mostly = { - .name = "immediate", +static struct nft_expr_type nft_imm_type; +static const struct nft_expr_ops nft_imm_ops = { + .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), - .owner = THIS_MODULE, .eval = nft_immediate_eval, .init = nft_immediate_init, .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, .get_verdict = nft_immediate_get_verdict, +}; + +static struct nft_expr_type nft_imm_type __read_mostly = { + .name = "immediate", + .ops = &nft_imm_ops, .policy = nft_immediate_policy, .maxattr = NFTA_IMMEDIATE_MAX, + .owner = THIS_MODULE, }; int __init nft_immediate_module_init(void) { - return nft_register_expr(&nft_imm_ops); + return nft_register_expr(&nft_imm_type); } void nft_immediate_module_exit(void) { - nft_unregister_expr(&nft_imm_ops); + nft_unregister_expr(&nft_imm_type); } diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index e0e3fc8aebc3..85da5bd02f64 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -84,25 +84,31 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_limit_ops __read_mostly = { - .name = "limit", +static struct nft_expr_type nft_limit_type; +static const struct nft_expr_ops nft_limit_ops = { + .type = &nft_limit_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), - .owner = THIS_MODULE, .eval = nft_limit_eval, .init = nft_limit_init, .dump = nft_limit_dump, +}; + +static struct nft_expr_type nft_limit_type __read_mostly = { + .name = "limit", + .ops = &nft_limit_ops, .policy = nft_limit_policy, .maxattr = NFTA_LIMIT_MAX, + .owner = THIS_MODULE, }; static int __init nft_limit_module_init(void) { - return nft_register_expr(&nft_limit_ops); + return nft_register_expr(&nft_limit_type); } static void __exit nft_limit_module_exit(void) { - nft_unregister_expr(&nft_limit_ops); + nft_unregister_expr(&nft_limit_type); } module_init(nft_limit_module_init); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index da495c3b1e7e..57cad072a13e 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -110,26 +110,32 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_log_ops __read_mostly = { - .name = "log", +static struct nft_expr_type nft_log_type; +static const struct nft_expr_ops nft_log_ops = { + .type = &nft_log_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_log)), - .owner = THIS_MODULE, .eval = nft_log_eval, .init = nft_log_init, .destroy = nft_log_destroy, .dump = nft_log_dump, +}; + +static struct nft_expr_type nft_log_type __read_mostly = { + .name = "log", + .ops = &nft_log_ops, .policy = nft_log_policy, .maxattr = NFTA_LOG_MAX, + .owner = THIS_MODULE, }; static int __init nft_log_module_init(void) { - return nft_register_expr(&nft_log_ops); + return nft_register_expr(&nft_log_type); } static void __exit nft_log_module_exit(void) { - nft_unregister_expr(&nft_log_ops); + nft_unregister_expr(&nft_log_type); } module_init(nft_log_module_init); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 4962d2173678..8a6116b75b5a 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -112,24 +112,30 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_lookup_ops __read_mostly = { - .name = "lookup", +static struct nft_expr_type nft_lookup_type; +static const struct nft_expr_ops nft_lookup_ops = { + .type = &nft_lookup_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), - .owner = THIS_MODULE, .eval = nft_lookup_eval, .init = nft_lookup_init, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, +}; + +static struct nft_expr_type nft_lookup_type __read_mostly = { + .name = "lookup", + .ops = &nft_lookup_ops, .policy = nft_lookup_policy, .maxattr = NFTA_LOOKUP_MAX, + .owner = THIS_MODULE, }; int __init nft_lookup_module_init(void) { - return nft_register_expr(&nft_lookup_ops); + return nft_register_expr(&nft_lookup_type); } void nft_lookup_module_exit(void) { - nft_unregister_expr(&nft_lookup_ops); + nft_unregister_expr(&nft_lookup_type); } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 96735aa2f039..8c28220a90b3 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -193,25 +193,31 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_meta_ops __read_mostly = { - .name = "meta", +static struct nft_expr_type nft_meta_type; +static const struct nft_expr_ops nft_meta_ops = { + .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .owner = THIS_MODULE, .eval = nft_meta_eval, .init = nft_meta_init, .dump = nft_meta_dump, +}; + +static struct nft_expr_type nft_meta_type __read_mostly = { + .name = "meta", + .ops = &nft_meta_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, + .owner = THIS_MODULE, }; static int __init nft_meta_module_init(void) { - return nft_register_expr(&nft_meta_ops); + return nft_register_expr(&nft_meta_type); } static void __exit nft_meta_module_exit(void) { - nft_unregister_expr(&nft_meta_ops); + nft_unregister_expr(&nft_meta_type); } module_init(nft_meta_module_init); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 329f134b3f89..d99db6e37fb1 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -115,23 +115,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_payload_ops __read_mostly = { - .name = "payload", +static struct nft_expr_type nft_payload_type; +static const struct nft_expr_ops nft_payload_ops = { + .type = &nft_payload_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), - .owner = THIS_MODULE, .eval = nft_payload_eval, .init = nft_payload_init, .dump = nft_payload_dump, +}; + +static struct nft_expr_type nft_payload_type __read_mostly = { + .name = "payload", + .ops = &nft_payload_ops, .policy = nft_payload_policy, .maxattr = NFTA_PAYLOAD_MAX, + .owner = THIS_MODULE, }; int __init nft_payload_module_init(void) { - return nft_register_expr(&nft_payload_ops); + return nft_register_expr(&nft_payload_type); } void nft_payload_module_exit(void) { - nft_unregister_expr(&nft_payload_ops); + nft_unregister_expr(&nft_payload_type); } -- cgit v1.2.3-55-g7522 From cb7dbfd0390c9e244339f3270fe8649568241812 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 23:35:40 +0200 Subject: netfilter: nf_tables: add optimized data comparison for small values Add an optimized version of nft_data_cmp() that only handles values of to 4 bytes length. This patch includes original Patrick McHardy's patch entitled (nf_tables: inline nft_cmp_fast_eval() into main evaluation loop). Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 8 +++ net/netfilter/nf_tables_core.c | 18 ++++- net/netfilter/nft_cmp.c | 116 ++++++++++++++++++++++++++------- 3 files changed, 118 insertions(+), 24 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 283396c916e0..3df6a9be3bdd 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -7,6 +7,14 @@ extern void nf_tables_core_module_exit(void); extern int nft_immediate_module_init(void); extern void nft_immediate_module_exit(void); +struct nft_cmp_fast_expr { + u32 data; + enum nft_registers sreg:8; + u8 len; +}; + +extern const struct nft_expr_ops nft_cmp_fast_ops; + extern int nft_cmp_module_init(void); extern void nft_cmp_module_exit(void); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index fd0ecd3255c1..24000182c8e7 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -20,6 +20,18 @@ #include #include +static void nft_cmp_fast_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1]) +{ + const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + u32 mask; + + mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len); + if ((data[priv->sreg].data[0] & mask) == priv->data) + return; + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + unsigned int nft_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, @@ -48,7 +60,11 @@ next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; list_for_each_entry_continue_rcu(rule, &chain->rules, list) { nft_rule_for_each_expr(expr, last, rule) { - expr->ops->eval(expr, data, &pkt); + if (expr->ops == &nft_cmp_fast_ops) + nft_cmp_fast_eval(expr, data); + else + expr->ops->eval(expr, data, &pkt); + if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) break; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2c9d5fef2e63..37134f3e84fb 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -75,32 +75,11 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct nft_data_desc desc; int err; - if (tb[NFTA_CMP_SREG] == NULL || - tb[NFTA_CMP_OP] == NULL || - tb[NFTA_CMP_DATA] == NULL) - return -EINVAL; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; - priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); - switch (priv->op) { - case NFT_CMP_EQ: - case NFT_CMP_NEQ: - case NFT_CMP_LT: - case NFT_CMP_LTE: - case NFT_CMP_GT: - case NFT_CMP_GTE: - break; - default: - return -EINVAL; - } err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); - if (err < 0) - return err; + BUG_ON(err < 0); priv->len = desc.len; return 0; @@ -133,9 +112,100 @@ static const struct nft_expr_ops nft_cmp_ops = { .dump = nft_cmp_dump, }; +static int nft_cmp_fast_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + struct nft_data data; + u32 mask; + int err; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + BUG_ON(err < 0); + desc.len *= BITS_PER_BYTE; + + mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len); + priv->data = data.data[0] & mask; + priv->len = desc.len; + return 0; +} + +static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + struct nft_data data; + + if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ))) + goto nla_put_failure; + + data.data[0] = priv->data; + if (nft_data_dump(skb, NFTA_CMP_DATA, &data, + NFT_DATA_VALUE, priv->len / BITS_PER_BYTE) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +const struct nft_expr_ops nft_cmp_fast_ops = { + .type = &nft_cmp_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_fast_expr)), + .eval = NULL, /* inlined */ + .init = nft_cmp_fast_init, + .dump = nft_cmp_fast_dump, +}; + +static const struct nft_expr_ops *nft_cmp_select_ops(const struct nlattr * const tb[]) +{ + struct nft_data_desc desc; + struct nft_data data; + enum nft_registers sreg; + enum nft_cmp_ops op; + int err; + + if (tb[NFTA_CMP_SREG] == NULL || + tb[NFTA_CMP_OP] == NULL || + tb[NFTA_CMP_DATA] == NULL) + return ERR_PTR(-EINVAL); + + sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + err = nft_validate_input_register(sreg); + if (err < 0) + return ERR_PTR(err); + + op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); + switch (op) { + case NFT_CMP_EQ: + case NFT_CMP_NEQ: + case NFT_CMP_LT: + case NFT_CMP_LTE: + case NFT_CMP_GT: + case NFT_CMP_GTE: + break; + default: + return ERR_PTR(-EINVAL); + } + + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + if (err < 0) + return ERR_PTR(err); + + if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ) + return &nft_cmp_fast_ops; + else + return &nft_cmp_ops; +} + static struct nft_expr_type nft_cmp_type __read_mostly = { .name = "cmp", - .ops = &nft_cmp_ops, + .select_ops = nft_cmp_select_ops, .policy = nft_cmp_policy, .maxattr = NFTA_CMP_MAX, .owner = THIS_MODULE, -- cgit v1.2.3-55-g7522 From c29b72e02573b8fe5e6cae5d192a6a4772e7bbd6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 11:06:41 +0200 Subject: netfilter: nft_payload: add optimized payload implementation for small loads Add an optimized payload expression implementation for small (up to 4 bytes) aligned data loads from the linear packet area. This patch also includes original Patrick McHardy's entitled (nf_tables: inline nft_payload_fast_eval() into main evaluation loop). Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 9 +++++ net/netfilter/nf_tables_core.c | 31 ++++++++++++++- net/netfilter/nft_payload.c | 69 +++++++++++++++++++++------------- 3 files changed, 81 insertions(+), 28 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 3df6a9be3bdd..fe7b16206a4e 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -27,6 +27,15 @@ extern void nft_bitwise_module_exit(void); extern int nft_byteorder_module_init(void); extern void nft_byteorder_module_exit(void); +struct nft_payload { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +extern const struct nft_expr_ops nft_payload_fast_ops; + extern int nft_payload_module_init(void); extern void nft_payload_module_exit(void); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 24000182c8e7..9aede59ed2d7 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -32,6 +32,34 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +static bool nft_payload_fast_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + struct nft_data *dest = &data[priv->dreg]; + unsigned char *ptr; + + if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) + ptr = skb_network_header(skb); + else + ptr = skb_transport_header(skb); + + ptr += priv->offset; + + if (unlikely(ptr + priv->len >= skb_tail_pointer(skb))) + return false; + + if (priv->len == 2) + *(u16 *)dest->data = *(u16 *)ptr; + else if (priv->len == 4) + *(u32 *)dest->data = *(u32 *)ptr; + else + *(u8 *)dest->data = *(u8 *)ptr; + return true; +} + unsigned int nft_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, @@ -62,7 +90,8 @@ next_rule: nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); - else + else if (expr->ops != &nft_payload_fast_ops || + !nft_payload_fast_eval(expr, data, &pkt)) expr->ops->eval(expr, data, &pkt); if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index d99db6e37fb1..7cf13f7e1e94 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -17,13 +17,6 @@ #include #include -struct nft_payload { - enum nft_payload_bases base:8; - u8 offset; - u8 len; - enum nft_registers dreg:8; -}; - static void nft_payload_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], const struct nft_pktinfo *pkt) @@ -71,27 +64,9 @@ static int nft_payload_init(const struct nft_ctx *ctx, struct nft_payload *priv = nft_expr_priv(expr); int err; - if (tb[NFTA_PAYLOAD_DREG] == NULL || - tb[NFTA_PAYLOAD_BASE] == NULL || - tb[NFTA_PAYLOAD_OFFSET] == NULL || - tb[NFTA_PAYLOAD_LEN] == NULL) - return -EINVAL; - - priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); - switch (priv->base) { - case NFT_PAYLOAD_LL_HEADER: - case NFT_PAYLOAD_NETWORK_HEADER: - case NFT_PAYLOAD_TRANSPORT_HEADER: - break; - default: - return -EOPNOTSUPP; - } - + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - if (priv->len == 0 || - priv->len > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG])); err = nft_validate_output_register(priv->dreg); @@ -124,9 +99,49 @@ static const struct nft_expr_ops nft_payload_ops = { .dump = nft_payload_dump, }; +const struct nft_expr_ops nft_payload_fast_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), + .eval = nft_payload_eval, + .init = nft_payload_init, + .dump = nft_payload_dump, +}; + +static const struct nft_expr_ops *nft_payload_select_ops(const struct nlattr * const tb[]) +{ + enum nft_payload_bases base; + unsigned int offset, len; + + if (tb[NFTA_PAYLOAD_DREG] == NULL || + tb[NFTA_PAYLOAD_BASE] == NULL || + tb[NFTA_PAYLOAD_OFFSET] == NULL || + tb[NFTA_PAYLOAD_LEN] == NULL) + return ERR_PTR(-EINVAL); + + base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + switch (base) { + case NFT_PAYLOAD_LL_HEADER: + case NFT_PAYLOAD_NETWORK_HEADER: + case NFT_PAYLOAD_TRANSPORT_HEADER: + break; + default: + return ERR_PTR(-EOPNOTSUPP); + } + + offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data)) + return ERR_PTR(-EINVAL); + + if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER) + return &nft_payload_fast_ops; + else + return &nft_payload_ops; +} + static struct nft_expr_type nft_payload_type __read_mostly = { .name = "payload", - .ops = &nft_payload_ops, + .select_ops = nft_payload_select_ops, .policy = nft_payload_policy, .maxattr = NFTA_PAYLOAD_MAX, .owner = THIS_MODULE, -- cgit v1.2.3-55-g7522 From 9370761c56b66aa5c65e069a7b010111a025018d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 23:21:26 +0200 Subject: netfilter: nf_tables: convert built-in tables/chains to chain types This patch converts built-in tables/chains to chain types that allows you to deploy customized table and chain configurations from userspace. After this patch, you have to specify the chain type when creating a new chain: add chain ip filter output { type filter hook input priority 0; } ^^^^ ------ The existing chain types after this patch are: filter, route and nat. Note that tables are just containers of chains with no specific semantics, which is a significant change with regards to iptables. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 31 ++- include/uapi/linux/netfilter/nf_tables.h | 2 + net/ipv4/netfilter/Kconfig | 8 +- net/ipv4/netfilter/Makefile | 4 +- net/ipv4/netfilter/nf_table_nat_ipv4.c | 415 ------------------------------ net/ipv4/netfilter/nf_table_route_ipv4.c | 97 ------- net/ipv4/netfilter/nf_tables_ipv4.c | 21 ++ net/ipv4/netfilter/nft_chain_nat_ipv4.c | 353 +++++++++++++++++++++++++ net/ipv4/netfilter/nft_chain_route_ipv4.c | 86 +++++++ net/ipv6/netfilter/Kconfig | 4 +- net/ipv6/netfilter/Makefile | 2 +- net/ipv6/netfilter/nf_table_route_ipv6.c | 93 ------- net/ipv6/netfilter/nf_tables_ipv6.c | 22 +- net/ipv6/netfilter/nft_chain_route_ipv6.c | 82 ++++++ net/netfilter/nf_tables_api.c | 197 +++++++------- 15 files changed, 682 insertions(+), 735 deletions(-) delete mode 100644 net/ipv4/netfilter/nf_table_nat_ipv4.c delete mode 100644 net/ipv4/netfilter/nf_table_route_ipv4.c create mode 100644 net/ipv4/netfilter/nft_chain_nat_ipv4.c create mode 100644 net/ipv4/netfilter/nft_chain_route_ipv4.c delete mode 100644 net/ipv6/netfilter/nf_table_route_ipv6.c create mode 100644 net/ipv6/netfilter/nft_chain_route_ipv6.c (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 66d0359702c6..8403f7f52e81 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -336,7 +336,6 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) enum nft_chain_flags { NFT_BASE_CHAIN = 0x1, - NFT_CHAIN_BUILTIN = 0x2, }; /** @@ -362,14 +361,23 @@ struct nft_chain { char name[NFT_CHAIN_MAXNAMELEN]; }; +enum nft_chain_type { + NFT_CHAIN_T_DEFAULT = 0, + NFT_CHAIN_T_ROUTE, + NFT_CHAIN_T_NAT, + NFT_CHAIN_T_MAX +}; + /** * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops + * @type: chain type * @chain: the chain */ struct nft_base_chain { struct nf_hook_ops ops; + enum nft_chain_type type; struct nft_chain chain; }; @@ -384,10 +392,6 @@ extern unsigned int nft_do_chain(const struct nf_hook_ops *ops, const struct net_device *out, int (*okfn)(struct sk_buff *)); -enum nft_table_flags { - NFT_TABLE_BUILTIN = 0x1, -}; - /** * struct nft_table - nf_tables table * @@ -431,8 +435,17 @@ struct nft_af_info { extern int nft_register_afinfo(struct nft_af_info *); extern void nft_unregister_afinfo(struct nft_af_info *); -extern int nft_register_table(struct nft_table *, int family); -extern void nft_unregister_table(struct nft_table *, int family); +struct nf_chain_type { + unsigned int hook_mask; + const char *name; + enum nft_chain_type type; + nf_hookfn *fn[NF_MAX_HOOKS]; + struct module *me; + int family; +}; + +extern int nft_register_chain_type(struct nf_chain_type *); +extern void nft_unregister_chain_type(struct nf_chain_type *); extern int nft_register_expr(struct nft_expr_type *); extern void nft_unregister_expr(struct nft_expr_type *); @@ -440,8 +453,8 @@ extern void nft_unregister_expr(struct nft_expr_type *); #define MODULE_ALIAS_NFT_FAMILY(family) \ MODULE_ALIAS("nft-afinfo-" __stringify(family)) -#define MODULE_ALIAS_NFT_TABLE(family, name) \ - MODULE_ALIAS("nft-table-" __stringify(family) "-" name) +#define MODULE_ALIAS_NFT_CHAIN(family, name) \ + MODULE_ALIAS("nft-chain-" __stringify(family) "-" name) #define MODULE_ALIAS_NFT_EXPR(name) \ MODULE_ALIAS("nft-expr-" name) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 9e924014efe3..779cf951c8de 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -115,6 +115,7 @@ enum nft_table_attributes { * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING) */ enum nft_chain_attributes { NFTA_CHAIN_UNSPEC, @@ -122,6 +123,7 @@ enum nft_chain_attributes { NFTA_CHAIN_HANDLE, NFTA_CHAIN_NAME, NFTA_CHAIN_HOOK, + NFTA_CHAIN_TYPE, __NFTA_CHAIN_MAX }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index eb1d56ece361..ae65fe98bfbe 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -44,13 +44,13 @@ config NFT_REJECT_IPV4 depends on NF_TABLES_IPV4 tristate "nf_tables IPv4 reject support" -config NF_TABLE_ROUTE_IPV4 +config NFT_CHAIN_ROUTE_IPV4 depends on NF_TABLES_IPV4 - tristate "IPv4 nf_tables route table support" + tristate "IPv4 nf_tables route chain support" -config NF_TABLE_NAT_IPV4 +config NFT_CHAIN_NAT_IPV4 depends on NF_TABLES_IPV4 - tristate "IPv4 nf_tables nat table support" + tristate "IPv4 nf_tables nat chain support" config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index b2f01cd2cd65..91e0bd71a6d3 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -29,8 +29,8 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o -obj-$(CONFIG_NF_TABLE_ROUTE_IPV4) += nf_table_route_ipv4.o -obj-$(CONFIG_NF_TABLE_NAT_IPV4) += nf_table_nat_ipv4.o +obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o +obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/nf_table_nat_ipv4.c b/net/ipv4/netfilter/nf_table_nat_ipv4.c deleted file mode 100644 index 2ecce39077a3..000000000000 --- a/net/ipv4/netfilter/nf_table_nat_ipv4.c +++ /dev/null @@ -1,415 +0,0 @@ -/* - * Copyright (c) 2008-2009 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct nft_nat { - enum nft_registers sreg_addr_min:8; - enum nft_registers sreg_addr_max:8; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; - enum nf_nat_manip_type type; -}; - -static void nft_nat_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) -{ - const struct nft_nat *priv = nft_expr_priv(expr); - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); - struct nf_nat_range range; - - memset(&range, 0, sizeof(range)); - if (priv->sreg_addr_min) { - range.min_addr.ip = data[priv->sreg_addr_min].data[0]; - range.max_addr.ip = data[priv->sreg_addr_max].data[0]; - range.flags |= NF_NAT_RANGE_MAP_IPS; - } - - if (priv->sreg_proto_min) { - range.min_proto.all = data[priv->sreg_proto_min].data[0]; - range.max_proto.all = data[priv->sreg_proto_max].data[0]; - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - - data[NFT_REG_VERDICT].verdict = - nf_nat_setup_info(ct, &range, priv->type); -} - -static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { - [NFTA_NAT_ADDR_MIN] = { .type = NLA_U32 }, - [NFTA_NAT_ADDR_MAX] = { .type = NLA_U32 }, - [NFTA_NAT_PROTO_MIN] = { .type = NLA_U32 }, - [NFTA_NAT_PROTO_MAX] = { .type = NLA_U32 }, - [NFTA_NAT_TYPE] = { .type = NLA_U32 }, -}; - -static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_nat *priv = nft_expr_priv(expr); - int err; - - if (tb[NFTA_NAT_TYPE] == NULL) - return -EINVAL; - - switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { - case NFT_NAT_SNAT: - priv->type = NF_NAT_MANIP_SRC; - break; - case NFT_NAT_DNAT: - priv->type = NF_NAT_MANIP_DST; - break; - default: - return -EINVAL; - } - - if (tb[NFTA_NAT_ADDR_MIN]) { - priv->sreg_addr_min = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MIN])); - err = nft_validate_input_register(priv->sreg_addr_min); - if (err < 0) - return err; - } - - if (tb[NFTA_NAT_ADDR_MAX]) { - priv->sreg_addr_max = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MAX])); - err = nft_validate_input_register(priv->sreg_addr_max); - if (err < 0) - return err; - } else - priv->sreg_addr_max = priv->sreg_addr_min; - - if (tb[NFTA_NAT_PROTO_MIN]) { - priv->sreg_proto_min = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MIN])); - err = nft_validate_input_register(priv->sreg_proto_min); - if (err < 0) - return err; - } - - if (tb[NFTA_NAT_PROTO_MAX]) { - priv->sreg_proto_max = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MAX])); - err = nft_validate_input_register(priv->sreg_proto_max); - if (err < 0) - return err; - } else - priv->sreg_proto_max = priv->sreg_proto_min; - - return 0; -} - -static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - const struct nft_nat *priv = nft_expr_priv(expr); - - switch (priv->type) { - case NF_NAT_MANIP_SRC: - if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT))) - goto nla_put_failure; - break; - case NF_NAT_MANIP_DST: - if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT))) - goto nla_put_failure; - break; - } - - if (nla_put_be32(skb, NFTA_NAT_ADDR_MIN, htonl(priv->sreg_addr_min))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_ADDR_MAX, htonl(priv->sreg_addr_max))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_PROTO_MIN, htonl(priv->sreg_proto_min))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_PROTO_MAX, htonl(priv->sreg_proto_max))) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_type nft_nat_type; -static const struct nft_expr_ops nft_nat_ops = { - .type = &nft_nat_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), - .eval = nft_nat_eval, - .init = nft_nat_init, - .dump = nft_nat_dump, -}; - -static struct nft_expr_type nft_nat_type __read_mostly = { - .name = "nat", - .ops = &nft_nat_ops, - .policy = nft_nat_policy, - .maxattr = NFTA_NAT_MAX, - .owner = THIS_MODULE, -}; - -/* - * NAT table - */ - -static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_nat *nat; - enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); - unsigned int ret; - - if (ct == NULL || nf_ct_is_untracked(ct)) - return NF_ACCEPT; - - NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); - - nat = nfct_nat(ct); - if (nat == NULL) { - /* Conntrack module was loaded late, can't add extension. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) - return NF_ACCEPT; - } - - switch (ctinfo) { - case IP_CT_RELATED: - case IP_CT_RELATED + IP_CT_IS_REPLY: - if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { - if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, - ops->hooknum)) - return NF_DROP; - else - return NF_ACCEPT; - } - /* Fall through */ - case IP_CT_NEW: - if (nf_nat_initialized(ct, maniptype)) - break; - - ret = nft_do_chain(ops, skb, in, out, okfn); - if (ret != NF_ACCEPT) - return ret; - if (!nf_nat_initialized(ct, maniptype)) { - ret = nf_nat_alloc_null_binding(ct, ops->hooknum); - if (ret != NF_ACCEPT) - return ret; - } - default: - break; - } - - return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); -} - -static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - __be32 daddr = ip_hdr(skb)->daddr; - unsigned int ret; - - ret = nf_nat_fn(ops, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN && - ip_hdr(skb)->daddr != daddr) { - skb_dst_drop(skb); - } - return ret; -} - -static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - enum ip_conntrack_info ctinfo __maybe_unused; - const struct nf_conn *ct __maybe_unused; - unsigned int ret; - - ret = nf_nat_fn(ops, skb, in, out, okfn); -#ifdef CONFIG_XFRM - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (ct->tuplehash[dir].tuple.src.u3.ip != - ct->tuplehash[!dir].tuple.dst.u3.ip || - ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all) - return nf_xfrm_me_harder(skb, AF_INET) == 0 ? - ret : NF_DROP; - } -#endif - return ret; -} - -static unsigned int nf_nat_output(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - enum ip_conntrack_info ctinfo; - const struct nf_conn *ct; - unsigned int ret; - - ret = nf_nat_fn(ops, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (ct->tuplehash[dir].tuple.dst.u3.ip != - ct->tuplehash[!dir].tuple.src.u3.ip) { - if (ip_route_me_harder(skb, RTN_UNSPEC)) - ret = NF_DROP; - } -#ifdef CONFIG_XFRM - else if (ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[!dir].tuple.src.u.all) - if (nf_xfrm_me_harder(skb, AF_INET)) - ret = NF_DROP; -#endif - } - return ret; -} - -static struct nft_base_chain nf_chain_nat_prerouting __read_mostly = { - .chain = { - .name = "PREROUTING", - .rules = LIST_HEAD_INIT(nf_chain_nat_prerouting.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_nat_prerouting, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_NAT_DST, - .priv = &nf_chain_nat_prerouting.chain, - }, -}; - -static struct nft_base_chain nf_chain_nat_postrouting __read_mostly = { - .chain = { - .name = "POSTROUTING", - .rules = LIST_HEAD_INIT(nf_chain_nat_postrouting.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_nat_postrouting, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SRC, - .priv = &nf_chain_nat_postrouting.chain, - }, -}; - -static struct nft_base_chain nf_chain_nat_output __read_mostly = { - .chain = { - .name = "OUTPUT", - .rules = LIST_HEAD_INIT(nf_chain_nat_output.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_nat_output, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_NAT_DST, - .priv = &nf_chain_nat_output.chain, - }, -}; - -static struct nft_base_chain nf_chain_nat_input __read_mostly = { - .chain = { - .name = "INPUT", - .rules = LIST_HEAD_INIT(nf_chain_nat_input.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_nat_fn, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_NAT_SRC, - .priv = &nf_chain_nat_input.chain, - }, -}; - - -static struct nft_table nf_table_nat_ipv4 __read_mostly = { - .name = "nat", - .chains = LIST_HEAD_INIT(nf_table_nat_ipv4.chains), -}; - -static int __init nf_table_nat_init(void) -{ - int err; - - list_add_tail(&nf_chain_nat_prerouting.chain.list, - &nf_table_nat_ipv4.chains); - list_add_tail(&nf_chain_nat_postrouting.chain.list, - &nf_table_nat_ipv4.chains); - list_add_tail(&nf_chain_nat_output.chain.list, - &nf_table_nat_ipv4.chains); - list_add_tail(&nf_chain_nat_input.chain.list, - &nf_table_nat_ipv4.chains); - - err = nft_register_table(&nf_table_nat_ipv4, NFPROTO_IPV4); - if (err < 0) - goto err1; - - err = nft_register_expr(&nft_nat_type); - if (err < 0) - goto err2; - - return 0; - -err2: - nft_unregister_table(&nf_table_nat_ipv4, NFPROTO_IPV4); -err1: - return err; -} - -static void __exit nf_table_nat_exit(void) -{ - nft_unregister_expr(&nft_nat_type); - nft_unregister_table(&nf_table_nat_ipv4, AF_INET); -} - -module_init(nf_table_nat_init); -module_exit(nf_table_nat_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_TABLE(AF_INET, "nat"); -MODULE_ALIAS_NFT_EXPR("nat"); diff --git a/net/ipv4/netfilter/nf_table_route_ipv4.c b/net/ipv4/netfilter/nf_table_route_ipv4.c deleted file mode 100644 index 4f257a1ed661..000000000000 --- a/net/ipv4/netfilter/nf_table_route_ipv4.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - unsigned int ret; - u32 mark; - __be32 saddr, daddr; - u_int8_t tos; - const struct iphdr *iph; - - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - - mark = skb->mark; - iph = ip_hdr(skb); - saddr = iph->saddr; - daddr = iph->daddr; - tos = iph->tos; - - ret = nft_do_chain(ops, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_QUEUE) { - iph = ip_hdr(skb); - - if (iph->saddr != saddr || - iph->daddr != daddr || - skb->mark != mark || - iph->tos != tos) - if (ip_route_me_harder(skb, RTN_UNSPEC)) - ret = NF_DROP; - } - return ret; -} - -static struct nft_base_chain nf_chain_route_output __read_mostly = { - .chain = { - .name = "OUTPUT", - .rules = LIST_HEAD_INIT(nf_chain_route_output.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_route_table_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_MANGLE, - .priv = &nf_chain_route_output.chain, - }, -}; - -static struct nft_table nf_table_route_ipv4 __read_mostly = { - .name = "route", - .chains = LIST_HEAD_INIT(nf_table_route_ipv4.chains), -}; - -static int __init nf_table_route_init(void) -{ - list_add_tail(&nf_chain_route_output.chain.list, - &nf_table_route_ipv4.chains); - return nft_register_table(&nf_table_route_ipv4, NFPROTO_IPV4); -} - -static void __exit nf_table_route_exit(void) -{ - nft_unregister_table(&nf_table_route_ipv4, NFPROTO_IPV4); -} - -module_init(nf_table_route_init); -module_exit(nf_table_route_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_TABLE(AF_INET, "route"); diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 63d0a3bf53d3..23525c4c0192 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2012-2013 Pablo Neira Ayuso * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -41,14 +42,34 @@ static struct nft_af_info nft_af_ipv4 __read_mostly = { }, }; +static struct nf_chain_type filter_ipv4 = { + .family = NFPROTO_IPV4, + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), + .fn = { + [NF_INET_LOCAL_IN] = nft_do_chain, + [NF_INET_LOCAL_OUT] = nft_do_chain, + [NF_INET_FORWARD] = nft_do_chain, + [NF_INET_PRE_ROUTING] = nft_do_chain, + [NF_INET_POST_ROUTING] = nft_do_chain, + }, +}; + static int __init nf_tables_ipv4_init(void) { + nft_register_chain_type(&filter_ipv4); return nft_register_afinfo(&nft_af_ipv4); } static void __exit nf_tables_ipv4_exit(void) { nft_unregister_afinfo(&nft_af_ipv4); + nft_unregister_chain_type(&filter_ipv4); } module_init(nf_tables_ipv4_init); diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c new file mode 100644 index 000000000000..cd286306be85 --- /dev/null +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2012 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_nat { + enum nft_registers sreg_addr_min:8; + enum nft_registers sreg_addr_max:8; + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + enum nf_nat_manip_type type; +}; + +static void nft_nat_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); + struct nf_nat_range range; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_addr_min) { + range.min_addr.ip = data[priv->sreg_addr_min].data[0]; + range.max_addr.ip = data[priv->sreg_addr_max].data[0]; + range.flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (priv->sreg_proto_min) { + range.min_proto.all = data[priv->sreg_proto_min].data[0]; + range.max_proto.all = data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + data[NFT_REG_VERDICT].verdict = + nf_nat_setup_info(ct, &range, priv->type); +} + +static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { + [NFTA_NAT_ADDR_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_ADDR_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_PROTO_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_TYPE] = { .type = NLA_U32 }, +}; + +static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_nat *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_NAT_TYPE] == NULL) + return -EINVAL; + + switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { + case NFT_NAT_SNAT: + priv->type = NF_NAT_MANIP_SRC; + break; + case NFT_NAT_DNAT: + priv->type = NF_NAT_MANIP_DST; + break; + default: + return -EINVAL; + } + + if (tb[NFTA_NAT_ADDR_MIN]) { + priv->sreg_addr_min = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_ADDR_MAX]) { + priv->sreg_addr_max = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MAX])); + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else + priv->sreg_addr_max = priv->sreg_addr_min; + + if (tb[NFTA_NAT_PROTO_MIN]) { + priv->sreg_proto_min = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_PROTO_MAX]) { + priv->sreg_proto_max = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MAX])); + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else + priv->sreg_proto_max = priv->sreg_proto_min; + + return 0; +} + +static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NF_NAT_MANIP_SRC: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT))) + goto nla_put_failure; + break; + case NF_NAT_MANIP_DST: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT))) + goto nla_put_failure; + break; + } + + if (nla_put_be32(skb, NFTA_NAT_ADDR_MIN, htonl(priv->sreg_addr_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_ADDR_MAX, htonl(priv->sreg_addr_max))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_PROTO_MIN, htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_PROTO_MAX, htonl(priv->sreg_proto_max))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_nat_type; +static const struct nft_expr_ops nft_nat_ops = { + .type = &nft_nat_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), + .eval = nft_nat_eval, + .init = nft_nat_init, + .dump = nft_nat_dump, +}; + +static struct nft_expr_type nft_nat_type __read_mostly = { + .name = "nat", + .ops = &nft_nat_ops, + .policy = nft_nat_policy, + .maxattr = NFTA_NAT_MAX, + .owner = THIS_MODULE, +}; + +/* + * NAT chains + */ + +static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + unsigned int ret; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); + + nat = nfct_nat(ct); + if (nat == NULL) { + /* Conntrack module was loaded late, can't add extension. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) + return NF_ACCEPT; + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED + IP_CT_IS_REPLY: + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + ops->hooknum)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall through */ + case IP_CT_NEW: + if (nf_nat_initialized(ct, maniptype)) + break; + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_ACCEPT) + return ret; + if (!nf_nat_initialized(ct, maniptype)) { + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } + default: + break; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); +} + +static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + __be32 daddr = ip_hdr(skb)->daddr; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ip_hdr(skb)->daddr != daddr) { + skb_dst_drop(skb); + } + return ret; +} + +static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo __maybe_unused; + const struct nf_conn *ct __maybe_unused; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip || + ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all) + return nf_xfrm_me_harder(skb, AF_INET) == 0 ? + ret : NF_DROP; + } +#endif + return ret; +} + +static unsigned int nf_nat_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET)) + ret = NF_DROP; +#endif + } + return ret; +} + +struct nf_chain_type nft_chain_nat_ipv4 = { + .family = NFPROTO_IPV4, + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .fn = { + [NF_INET_PRE_ROUTING] = nf_nat_prerouting, + [NF_INET_POST_ROUTING] = nf_nat_postrouting, + [NF_INET_LOCAL_OUT] = nf_nat_output, + [NF_INET_LOCAL_IN] = nf_nat_fn, + }, + .me = THIS_MODULE, +}; + +static int __init nft_chain_nat_init(void) +{ + int err; + + err = nft_register_chain_type(&nft_chain_nat_ipv4); + if (err < 0) + return err; + + err = nft_register_expr(&nft_nat_type); + if (err < 0) + goto err; + + return 0; + +err: + nft_unregister_chain_type(&nft_chain_nat_ipv4); + return err; +} + +static void __exit nft_chain_nat_exit(void) +{ + nft_unregister_expr(&nft_nat_type); + nft_unregister_chain_type(&nft_chain_nat_ipv4); +} + +module_init(nft_chain_nat_init); +module_exit(nft_chain_nat_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); +MODULE_ALIAS_NFT_EXPR("nat"); diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c new file mode 100644 index 000000000000..6b84e097b8fc --- /dev/null +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2012 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + u32 mark; + __be32 saddr, daddr; + u_int8_t tos; + const struct iphdr *iph; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_QUEUE) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } + return ret; +} + +static struct nf_chain_type nft_chain_route_ipv4 = { + .family = NFPROTO_IPV4, + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .fn = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook, + }, + .me = THIS_MODULE, +}; + +static int __init nft_chain_route_init(void) +{ + return nft_register_chain_type(&nft_chain_route_ipv4); +} + +static void __exit nft_chain_route_exit(void) +{ + nft_unregister_chain_type(&nft_chain_route_ipv4); +} + +module_init(nft_chain_route_init); +module_exit(nft_chain_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "route"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 5677e38eeca3..23833064b7b5 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -29,9 +29,9 @@ config NF_TABLES_IPV6 depends on NF_TABLES tristate "IPv6 nf_tables support" -config NF_TABLE_ROUTE_IPV6 +config NFT_CHAIN_ROUTE_IPV6 depends on NF_TABLES_IPV6 - tristate "IPv6 nf_tables route table support" + tristate "IPv6 nf_tables route chain support" config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 956af4492d10..be4913aa524d 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o # nf_tables obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o -obj-$(CONFIG_NF_TABLE_ROUTE_IPV6) += nf_table_route_ipv6.o +obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nf_table_route_ipv6.c b/net/ipv6/netfilter/nf_table_route_ipv6.c deleted file mode 100644 index 48ac65c7b398..000000000000 --- a/net/ipv6/netfilter/nf_table_route_ipv6.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - unsigned int ret; - struct in6_addr saddr, daddr; - u_int8_t hop_limit; - u32 mark, flowlabel; - - /* save source/dest address, mark, hoplimit, flowlabel, priority */ - memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); - memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); - mark = skb->mark; - hop_limit = ipv6_hdr(skb)->hop_limit; - - /* flowlabel and prio (includes version, which shouldn't change either */ - flowlabel = *((u32 *)ipv6_hdr(skb)); - - ret = nft_do_chain(ops, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_QUEUE && - (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || - memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || - skb->mark != mark || - ipv6_hdr(skb)->hop_limit != hop_limit || - flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) - return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; - - return ret; -} - -static struct nft_base_chain nf_chain_route_output __read_mostly = { - .chain = { - .name = "OUTPUT", - .rules = LIST_HEAD_INIT(nf_chain_route_output.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_route_table_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP6_PRI_MANGLE, - .priv = &nf_chain_route_output.chain, - }, -}; - -static struct nft_table nf_table_route_ipv6 __read_mostly = { - .name = "route", - .chains = LIST_HEAD_INIT(nf_table_route_ipv6.chains), -}; - -static int __init nf_table_route_init(void) -{ - list_add_tail(&nf_chain_route_output.chain.list, - &nf_table_route_ipv6.chains); - return nft_register_table(&nf_table_route_ipv6, NFPROTO_IPV6); -} - -static void __exit nf_table_route_exit(void) -{ - nft_unregister_table(&nf_table_route_ipv6, NFPROTO_IPV6); -} - -module_init(nf_table_route_init); -module_exit(nf_table_route_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_TABLE(AF_INET6, "route"); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index e0717cea4913..3631d6238e6f 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2012-2013 Pablo Neira Ayuso * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -39,14 +40,33 @@ static struct nft_af_info nft_af_ipv6 __read_mostly = { }, }; +static struct nf_chain_type filter_ipv6 = { + .family = NFPROTO_IPV6, + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), + .fn = { + [NF_INET_LOCAL_IN] = nft_do_chain, + [NF_INET_LOCAL_OUT] = nft_do_chain, + [NF_INET_FORWARD] = nft_do_chain, + [NF_INET_PRE_ROUTING] = nft_do_chain, + [NF_INET_POST_ROUTING] = nft_do_chain, + }, +}; + static int __init nf_tables_ipv6_init(void) { + nft_register_chain_type(&filter_ipv6); return nft_register_afinfo(&nft_af_ipv6); } - static void __exit nf_tables_ipv6_exit(void) { nft_unregister_afinfo(&nft_af_ipv6); + nft_unregister_chain_type(&filter_ipv6); } module_init(nf_tables_ipv6_init); diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c new file mode 100644 index 000000000000..4cdc992fa067 --- /dev/null +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2012 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct in6_addr saddr, daddr; + u_int8_t hop_limit; + u32 mark, flowlabel; + + /* save source/dest address, mark, hoplimit, flowlabel, priority */ + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; + + /* flowlabel and prio (includes version, which shouldn't change either */ + flowlabel = *((u32 *)ipv6_hdr(skb)); + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_QUEUE && + (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || + memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + skb->mark != mark || + ipv6_hdr(skb)->hop_limit != hop_limit || + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) + return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; + + return ret; +} + +static struct nf_chain_type nft_chain_route_ipv6 = { + .family = NFPROTO_IPV6, + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .fn = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook, + }, + .me = THIS_MODULE, +}; + +static int __init nft_chain_route_init(void) +{ + return nft_register_chain_type(&nft_chain_route_ipv6); +} + +static void __exit nft_chain_route_exit(void) +{ + nft_unregister_chain_type(&nft_chain_route_ipv6); +} + +module_init(nft_chain_route_init); +module_exit(nft_chain_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route"); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6dac9a3c9c40..9c2d8d5af843 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -104,8 +104,7 @@ static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, } static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, - const struct nlattr *nla, - bool autoload) + const struct nlattr *nla) { struct nft_table *table; @@ -116,16 +115,6 @@ static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, if (table != NULL) return table; -#ifdef CONFIG_MODULES - if (autoload) { - nfnl_unlock(NFNL_SUBSYS_NFTABLES); - request_module("nft-table-%u-%*.s", afi->family, - nla_len(nla)-1, (const char *)nla_data(nla)); - nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (nft_table_lookup(afi, nla)) - return ERR_PTR(-EAGAIN); - } -#endif return ERR_PTR(-ENOENT); } @@ -134,6 +123,39 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) return ++table->hgenerator; } +static struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX]; + +static int __nf_tables_chain_type_lookup(int family, const struct nlattr *nla) +{ + int i; + + for (i=0; iname)) + return i; + } + return -1; +} + +static int nf_tables_chain_type_lookup(const struct nft_af_info *afi, + const struct nlattr *nla, + bool autoload) +{ + int type; + + type = __nf_tables_chain_type_lookup(afi->family, nla); +#ifdef CONFIG_MODULES + if (type < 0 && autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-chain-%u-%*.s", afi->family, + nla_len(nla)-1, (const char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + type = __nf_tables_chain_type_lookup(afi->family, nla); + } +#endif + return type; +} + static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING }, }; @@ -258,7 +280,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); @@ -294,7 +316,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return PTR_ERR(afi); name = nla[NFTA_TABLE_NAME]; - table = nf_tables_table_lookup(afi, name, false); + table = nf_tables_table_lookup(afi, name); if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); @@ -335,13 +357,10 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_BUILTIN) - return -EOPNOTSUPP; - if (table->use) return -EBUSY; @@ -351,99 +370,34 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, return 0; } -static struct nft_table *__nf_tables_table_lookup(const struct nft_af_info *afi, - const char *name) +int nft_register_chain_type(struct nf_chain_type *ctype) { - struct nft_table *table; - - list_for_each_entry(table, &afi->tables, list) { - if (!strcmp(name, table->name)) - return table; - } - - return ERR_PTR(-ENOENT); -} - -static int nf_tables_chain_notify(const struct sk_buff *oskb, - const struct nlmsghdr *nlh, - const struct nft_table *table, - const struct nft_chain *chain, - int event, int family); - -/** - * nft_register_table - register a built-in table - * - * @table: the table to register - * @family: protocol family to register table with - * - * Register a built-in table for use with nf_tables. Returns zero on - * success or a negative errno code otherwise. - */ -int nft_register_table(struct nft_table *table, int family) -{ - struct nft_af_info *afi; - struct nft_table *t; - struct nft_chain *chain; - int err; + int err = 0; nfnl_lock(NFNL_SUBSYS_NFTABLES); -again: - afi = nf_tables_afinfo_lookup(family, true); - if (IS_ERR(afi)) { - err = PTR_ERR(afi); - if (err == -EAGAIN) - goto again; - goto err; - } - - t = __nf_tables_table_lookup(afi, table->name); - if (IS_ERR(t)) { - err = PTR_ERR(t); - if (err != -ENOENT) - goto err; - t = NULL; + if (chain_type[ctype->family][ctype->type] != NULL) { + err = -EBUSY; + goto out; } - if (t != NULL) { - err = -EEXIST; - goto err; - } + if (!try_module_get(ctype->me)) + goto out; - table->flags |= NFT_TABLE_BUILTIN; - INIT_LIST_HEAD(&table->sets); - list_add_tail(&table->list, &afi->tables); - nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family); - list_for_each_entry(chain, &table->chains, list) - nf_tables_chain_notify(NULL, NULL, table, chain, - NFT_MSG_NEWCHAIN, family); - err = 0; -err: + chain_type[ctype->family][ctype->type] = ctype; +out: nfnl_unlock(NFNL_SUBSYS_NFTABLES); return err; } -EXPORT_SYMBOL_GPL(nft_register_table); +EXPORT_SYMBOL_GPL(nft_register_chain_type); -/** - * nft_unregister_table - unregister a built-in table - * - * @table: the table to unregister - * @family: protocol family to unregister table with - * - * Unregister a built-in table for use with nf_tables. - */ -void nft_unregister_table(struct nft_table *table, int family) +void nft_unregister_chain_type(struct nf_chain_type *ctype) { - struct nft_chain *chain; - nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&table->list); - list_for_each_entry(chain, &table->chains, list) - nf_tables_chain_notify(NULL, NULL, table, chain, - NFT_MSG_DELCHAIN, family); - nf_tables_table_notify(NULL, NULL, table, NFT_MSG_DELTABLE, family); + chain_type[ctype->family][ctype->type] = NULL; + module_put(ctype->me); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } -EXPORT_SYMBOL_GPL(nft_unregister_table); +EXPORT_SYMBOL_GPL(nft_unregister_chain_type); /* * Chains @@ -484,6 +438,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { [NFTA_CHAIN_NAME] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, + [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING }, }; static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { @@ -526,6 +481,10 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) goto nla_put_failure; nla_nest_end(skb, nest); + + if (nla_put_string(skb, NFTA_CHAIN_TYPE, + chain_type[ops->pf][nft_base_chain(chain)->type]->name)) + goto nla_put_failure; } return nlmsg_end(skb, nlh); @@ -633,7 +592,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -680,7 +639,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], create); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -722,6 +681,17 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_CHAIN_HOOK]) { struct nf_hook_ops *ops; + nf_hookfn *hookfn; + u32 hooknum; + int type = NFT_CHAIN_T_DEFAULT; + + if (nla[NFTA_CHAIN_TYPE]) { + type = nf_tables_chain_type_lookup(afi, + nla[NFTA_CHAIN_TYPE], + create); + if (type < 0) + return -ENOENT; + } err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], nft_hook_policy); @@ -730,12 +700,20 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (ha[NFTA_HOOK_HOOKNUM] == NULL || ha[NFTA_HOOK_PRIORITY] == NULL) return -EINVAL; - if (ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])) >= afi->nhooks) + + hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + if (hooknum >= afi->nhooks) return -EINVAL; + hookfn = chain_type[family][type]->fn[hooknum]; + if (hookfn == NULL) + return -EOPNOTSUPP; + basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); if (basechain == NULL) return -ENOMEM; + + basechain->type = type; chain = &basechain->chain; ops = &basechain->ops; @@ -744,7 +722,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); ops->priv = chain; - ops->hook = nft_do_chain; + ops->hook = hookfn; if (afi->hooks[ops->hooknum]) ops->hook = afi->hooks[ops->hooknum]; @@ -793,7 +771,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -801,9 +779,6 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_CHAIN_BUILTIN) - return -EOPNOTSUPP; - if (!list_empty(&chain->rules)) return -EBUSY; @@ -1190,7 +1165,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -1268,7 +1243,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], create); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -1374,7 +1349,7 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -1490,7 +1465,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, return PTR_ERR(afi); if (nla[NFTA_SET_TABLE] != NULL) { - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); } @@ -1820,7 +1795,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], create); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -2008,7 +1983,7 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); -- cgit v1.2.3-55-g7522 From 0ca743a5599199152a31a7146b83213c786c2eb2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 00:06:06 +0200 Subject: netfilter: nf_tables: add compatibility layer for x_tables This patch adds the x_tables compatibility layer. This allows you to use existing x_tables matches and targets from nf_tables. This compatibility later allows us to use existing matches/targets for features that are still missing in nf_tables. We can progressively replace them with native nf_tables extensions. It also provides the userspace compatibility software that allows you to express the rule-set using the iptables syntax but using the nf_tables kernel components. In order to get this compatibility layer working, I've done the following things: * add NFNL_SUBSYS_NFT_COMPAT: this new nfnetlink subsystem is used to query the x_tables match/target revision, so we don't need to use the native x_table getsockopt interface. * emulate xt structures: this required extending the struct nft_pktinfo to include the fragment offset, which is already obtained from ip[6]_tables and that is used by some matches/targets. * add support for default policy to base chains, required to emulate x_tables. * add NFTA_CHAIN_USE attribute to obtain the number of references to chains, required by x_tables emulation. * add chain packet/byte counters using per-cpu. * support 32-64 bits compat. For historical reasons, this patch includes the following patches that were posted in the netfilter-devel mailing list. From Pablo Neira Ayuso: * nf_tables: add default policy to base chains * netfilter: nf_tables: add NFTA_CHAIN_USE attribute * nf_tables: nft_compat: private data of target and matches in contiguous area * nf_tables: validate hooks for compat match/target * nf_tables: nft_compat: release cached matches/targets * nf_tables: x_tables support as a compile time option * nf_tables: fix alias for xtables over nftables module * nf_tables: add packet and byte counters per chain * nf_tables: fix per-chain counter stats if no counters are passed * nf_tables: don't bump chain stats * nf_tables: add protocol and flags for xtables over nf_tables * nf_tables: add ip[6]t_entry emulation * nf_tables: move specific layer 3 compat code to nf_tables_ipv[4|6] * nf_tables: support 32bits-64bits x_tables compat * nf_tables: fix compilation if CONFIG_COMPAT is disabled From Patrick McHardy: * nf_tables: move policy to struct nft_base_chain * nf_tables: send notifications for base chain policy changes From Alexander Primak: * nf_tables: remove the duplicate NF_INET_LOCAL_OUT From Nicolas Dichtel: * nf_tables: fix compilation when nf-netlink is a module Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 44 +- include/net/netfilter/nf_tables_ipv4.h | 23 + include/net/netfilter/nf_tables_ipv6.h | 30 + include/uapi/linux/netfilter/Kbuild | 1 + include/uapi/linux/netfilter/nf_tables.h | 32 + include/uapi/linux/netfilter/nf_tables_compat.h | 38 ++ include/uapi/linux/netfilter/nfnetlink.h | 3 +- net/ipv4/netfilter/nf_tables_ipv4.c | 32 +- net/ipv4/netfilter/nft_chain_nat_ipv4.c | 6 +- net/ipv4/netfilter/nft_chain_route_ipv4.c | 6 +- net/ipv6/netfilter/nf_tables_ipv6.c | 33 +- net/ipv6/netfilter/nft_chain_route_ipv6.c | 8 +- net/netfilter/Kconfig | 9 + net/netfilter/Makefile | 1 + net/netfilter/nf_tables_api.c | 220 ++++++- net/netfilter/nf_tables_core.c | 46 +- net/netfilter/nft_cmp.c | 3 +- net/netfilter/nft_compat.c | 768 ++++++++++++++++++++++++ net/netfilter/nft_immediate.c | 12 +- net/netfilter/nft_payload.c | 4 +- 20 files changed, 1241 insertions(+), 78 deletions(-) create mode 100644 include/net/netfilter/nf_tables_ipv4.h create mode 100644 include/net/netfilter/nf_tables_ipv6.h create mode 100644 include/uapi/linux/netfilter/nf_tables_compat.h create mode 100644 net/netfilter/nft_compat.c (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8403f7f52e81..a68f45f0fe2e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -15,8 +16,23 @@ struct nft_pktinfo { u8 hooknum; u8 nhoff; u8 thoff; + /* for x_tables compatibility */ + struct xt_action_param xt; }; +static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out) +{ + pkt->skb = skb; + pkt->in = pkt->xt.in = in; + pkt->out = pkt->xt.out = out; + pkt->hooknum = pkt->xt.hooknum = ops->hooknum; + pkt->xt.family = ops->pf; +} + struct nft_data { union { u32 data[4]; @@ -57,6 +73,7 @@ static inline void nft_data_debug(const struct nft_data *data) * @afi: address family info * @table: the table the chain is contained in * @chain: the chain the rule is contained in + * @nla: netlink attributes */ struct nft_ctx { const struct sk_buff *skb; @@ -64,6 +81,7 @@ struct nft_ctx { const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; + const struct nlattr * const *nla; }; struct nft_data_desc { @@ -235,7 +253,8 @@ extern void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @maxattr: highest netlink attribute number */ struct nft_expr_type { - const struct nft_expr_ops *(*select_ops)(const struct nlattr * const tb[]); + const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *, + const struct nlattr * const tb[]); const struct nft_expr_ops *ops; struct list_head list; const char *name; @@ -253,6 +272,8 @@ struct nft_expr_type { * @destroy: destruction function * @dump: function to dump parameters * @type: expression type + * @validate: validate expression, called during loop detection + * @data: extra data to attach to this expression operation */ struct nft_expr; struct nft_expr_ops { @@ -267,8 +288,11 @@ struct nft_expr_ops { void (*destroy)(const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, const struct nft_expr *expr); - const struct nft_data * (*get_verdict)(const struct nft_expr *expr); + int (*validate)(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data); const struct nft_expr_type *type; + void *data; }; #define NFT_EXPR_MAXATTR 16 @@ -368,16 +392,25 @@ enum nft_chain_type { NFT_CHAIN_T_MAX }; +struct nft_stats { + u64 bytes; + u64 pkts; +}; + /** * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops * @type: chain type + * @policy: default policy + * @stats: per-cpu chain stats * @chain: the chain */ struct nft_base_chain { struct nf_hook_ops ops; enum nft_chain_type type; + u8 policy; + struct nft_stats __percpu *stats; struct nft_chain chain; }; @@ -386,11 +419,8 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai return container_of(chain, struct nft_base_chain, chain); } -extern unsigned int nft_do_chain(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)); +extern unsigned int nft_do_chain_pktinfo(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops); /** * struct nft_table - nf_tables table diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h new file mode 100644 index 000000000000..1be1c2c197ee --- /dev/null +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -0,0 +1,23 @@ +#ifndef _NF_TABLES_IPV4_H_ +#define _NF_TABLES_IPV4_H_ + +#include +#include + +static inline void +nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out) +{ + struct iphdr *ip; + + nft_set_pktinfo(pkt, ops, skb, in, out); + + pkt->xt.thoff = ip_hdrlen(pkt->skb); + ip = ip_hdr(pkt->skb); + pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; +} + +#endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h new file mode 100644 index 000000000000..4a9b88a65963 --- /dev/null +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -0,0 +1,30 @@ +#ifndef _NF_TABLES_IPV6_H_ +#define _NF_TABLES_IPV6_H_ + +#include +#include + +static inline int +nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out) +{ + int protohdr, thoff = 0; + unsigned short frag_off; + + nft_set_pktinfo(pkt, ops, skb, in, out); + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + /* If malformed, drop it */ + if (protohdr < 0) + return -1; + + pkt->xt.thoff = thoff; + pkt->xt.fragoff = frag_off; + + return 0; +} + +#endif diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 6ce0b7f566a7..17c3af2c4bb9 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -6,6 +6,7 @@ header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h header-y += nf_tables.h +header-y += nf_tables_compat.h header-y += nf_nat.h header-y += nfnetlink.h header-y += nfnetlink_acct.h diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 779cf951c8de..1563875e6942 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -115,7 +115,10 @@ enum nft_table_attributes { * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + * @NFTA_CHAIN_POLICY: numeric policy of the chain (NLA_U32) + * @NFTA_CHAIN_USE: number of references to this chain (NLA_U32) * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING) + * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes) */ enum nft_chain_attributes { NFTA_CHAIN_UNSPEC, @@ -123,7 +126,10 @@ enum nft_chain_attributes { NFTA_CHAIN_HANDLE, NFTA_CHAIN_NAME, NFTA_CHAIN_HOOK, + NFTA_CHAIN_POLICY, + NFTA_CHAIN_USE, NFTA_CHAIN_TYPE, + NFTA_CHAIN_COUNTERS, __NFTA_CHAIN_MAX }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) @@ -135,6 +141,7 @@ enum nft_chain_attributes { * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING) * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) + * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -142,10 +149,35 @@ enum nft_rule_attributes { NFTA_RULE_CHAIN, NFTA_RULE_HANDLE, NFTA_RULE_EXPRESSIONS, + NFTA_RULE_COMPAT, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) +/** + * enum nft_rule_compat_flags - nf_tables rule compat flags + * + * @NFT_RULE_COMPAT_F_INV: invert the check result + */ +enum nft_rule_compat_flags { + NFT_RULE_COMPAT_F_INV = (1 << 1), + NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, +}; + +/** + * enum nft_rule_compat_attributes - nf_tables rule compat attributes + * + * @NFTA_RULE_COMPAT_PROTO: numerice value of handled protocol (NLA_U32) + * @NFTA_RULE_COMPAT_FLAGS: bitmask of enum nft_rule_compat_flags (NLA_U32) + */ +enum nft_rule_compat_attributes { + NFTA_RULE_COMPAT_UNSPEC, + NFTA_RULE_COMPAT_PROTO, + NFTA_RULE_COMPAT_FLAGS, + __NFTA_RULE_COMPAT_MAX +}; +#define NFTA_RULE_COMPAT_MAX (__NFTA_RULE_COMPAT_MAX - 1) + /** * enum nft_set_flags - nf_tables set flags * diff --git a/include/uapi/linux/netfilter/nf_tables_compat.h b/include/uapi/linux/netfilter/nf_tables_compat.h new file mode 100644 index 000000000000..8310f5f76551 --- /dev/null +++ b/include/uapi/linux/netfilter/nf_tables_compat.h @@ -0,0 +1,38 @@ +#ifndef _NFT_COMPAT_NFNETLINK_H_ +#define _NFT_COMPAT_NFNETLINK_H_ + +enum nft_target_attributes { + NFTA_TARGET_UNSPEC, + NFTA_TARGET_NAME, + NFTA_TARGET_REV, + NFTA_TARGET_INFO, + __NFTA_TARGET_MAX +}; +#define NFTA_TARGET_MAX (__NFTA_TARGET_MAX - 1) + +enum nft_match_attributes { + NFTA_MATCH_UNSPEC, + NFTA_MATCH_NAME, + NFTA_MATCH_REV, + NFTA_MATCH_INFO, + __NFTA_MATCH_MAX +}; +#define NFTA_MATCH_MAX (__NFTA_MATCH_MAX - 1) + +#define NFT_COMPAT_NAME_MAX 32 + +enum { + NFNL_MSG_COMPAT_GET, + NFNL_MSG_COMPAT_MAX +}; + +enum { + NFTA_COMPAT_UNSPEC = 0, + NFTA_COMPAT_NAME, + NFTA_COMPAT_REV, + NFTA_COMPAT_TYPE, + __NFTA_COMPAT_MAX, +}; +#define NFTA_COMPAT_MAX (__NFTA_COMPAT_MAX - 1) + +#endif diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index d276c3bd55b8..288959404d54 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -54,6 +54,7 @@ struct nfgenmsg { #define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 #define NFNL_SUBSYS_CTHELPER 9 #define NFNL_SUBSYS_NFTABLES 10 -#define NFNL_SUBSYS_COUNT 11 +#define NFNL_SUBSYS_NFT_COMPAT 11 +#define NFNL_SUBSYS_COUNT 12 #endif /* _UAPI_NFNETLINK_H */ diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 23525c4c0192..c61cffb9b760 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, @@ -22,6 +24,8 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct nft_pktinfo pkt; + if (unlikely(skb->len < sizeof(struct iphdr) || ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { if (net_ratelimit()) @@ -29,8 +33,9 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, "packet\n"); return NF_ACCEPT; } + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); - return nft_do_chain(ops, skb, in, out, okfn); + return nft_do_chain_pktinfo(&pkt, ops); } static struct nft_af_info nft_af_ipv4 __read_mostly = { @@ -42,6 +47,21 @@ static struct nft_af_info nft_af_ipv4 __read_mostly = { }, }; + +static unsigned int +nft_do_chain_ipv4(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + return nft_do_chain_pktinfo(&pkt, ops); +} + static struct nf_chain_type filter_ipv4 = { .family = NFPROTO_IPV4, .name = "filter", @@ -52,11 +72,11 @@ static struct nf_chain_type filter_ipv4 = { (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .fn = { - [NF_INET_LOCAL_IN] = nft_do_chain, - [NF_INET_LOCAL_OUT] = nft_do_chain, - [NF_INET_FORWARD] = nft_do_chain, - [NF_INET_PRE_ROUTING] = nft_do_chain, - [NF_INET_POST_ROUTING] = nft_do_chain, + [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, + [NF_INET_LOCAL_OUT] = nft_ipv4_output, + [NF_INET_FORWARD] = nft_do_chain_ipv4, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, }, }; diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index cd286306be85..e09c201adf84 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -181,6 +182,7 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct nf_conn_nat *nat; enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + struct nft_pktinfo pkt; unsigned int ret; if (ct == NULL || nf_ct_is_untracked(ct)) @@ -213,7 +215,9 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, if (nf_nat_initialized(ct, maniptype)) break; - ret = nft_do_chain(ops, skb, in, out, okfn); + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + ret = nft_do_chain_pktinfo(&pkt, ops); if (ret != NF_ACCEPT) return ret; if (!nf_nat_initialized(ct, maniptype)) { diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 6b84e097b8fc..4e6bf9a3d7aa 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, int (*okfn)(struct sk_buff *)) { unsigned int ret; + struct nft_pktinfo pkt; u32 mark; __be32 saddr, daddr; u_int8_t tos; @@ -37,13 +39,15 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + mark = skb->mark; iph = ip_hdr(skb); saddr = iph->saddr; daddr = iph->daddr; tos = iph->tos; - ret = nft_do_chain(ops, skb, in, out, okfn); + ret = nft_do_chain_pktinfo(&pkt, ops); if (ret != NF_DROP && ret != NF_QUEUE) { iph = ip_hdr(skb); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 3631d6238e6f..42f905a808a3 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -14,6 +14,7 @@ #include #include #include +#include static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, struct sk_buff *skb, @@ -21,14 +22,18 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct nft_pktinfo pkt; + if (unlikely(skb->len < sizeof(struct ipv6hdr))) { if (net_ratelimit()) pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " "packet\n"); return NF_ACCEPT; } + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; - return nft_do_chain(ops, skb, in, out, okfn); + return nft_do_chain_pktinfo(&pkt, ops); } static struct nft_af_info nft_af_ipv6 __read_mostly = { @@ -40,6 +45,22 @@ static struct nft_af_info nft_af_ipv6 __read_mostly = { }, }; +static unsigned int +nft_do_chain_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + /* malformed packet, drop it */ + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + + return nft_do_chain_pktinfo(&pkt, ops); +} + static struct nf_chain_type filter_ipv6 = { .family = NFPROTO_IPV6, .name = "filter", @@ -50,11 +71,11 @@ static struct nf_chain_type filter_ipv6 = { (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .fn = { - [NF_INET_LOCAL_IN] = nft_do_chain, - [NF_INET_LOCAL_OUT] = nft_do_chain, - [NF_INET_FORWARD] = nft_do_chain, - [NF_INET_PRE_ROUTING] = nft_do_chain, - [NF_INET_POST_ROUTING] = nft_do_chain, + [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, + [NF_INET_LOCAL_OUT] = nft_ipv6_output, + [NF_INET_FORWARD] = nft_do_chain_ipv6, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, }, }; diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 4cdc992fa067..3fe40f0456ad 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -19,6 +19,7 @@ #include #include #include +#include #include static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, @@ -28,10 +29,15 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, int (*okfn)(struct sk_buff *)) { unsigned int ret; + struct nft_pktinfo pkt; struct in6_addr saddr, daddr; u_int8_t hop_limit; u32 mark, flowlabel; + /* malformed packet, drop it */ + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + /* save source/dest address, mark, hoplimit, flowlabel, priority */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); @@ -41,7 +47,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u32 *)ipv6_hdr(skb)); - ret = nft_do_chain(ops, skb, in, out, okfn); + ret = nft_do_chain_pktinfo(&pkt, ops); if (ret != NF_DROP && ret != NF_QUEUE && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index aa184a46bbf3..49e362707379 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -450,6 +450,15 @@ config NFT_LIMIT depends on NF_TABLES tristate "Netfilter nf_tables limit module" +config NFT_COMPAT + depends on NF_TABLES + depends on NETFILTER_XTABLES + tristate "Netfilter x_tables over nf_tables module" + help + This is required if you intend to use any of existing + x_tables match/target extensions over the nf_tables + framework. + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index b6b78754e4cc..a6781450b6fb 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,6 +70,7 @@ nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o obj-$(CONFIG_NF_TABLES) += nf_tables.o +obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9c2d8d5af843..61e017b349cb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -438,7 +438,9 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { [NFTA_CHAIN_NAME] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, + [NFTA_CHAIN_POLICY] = { .type = NLA_U32 }, [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING }, + [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, }; static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { @@ -446,6 +448,33 @@ static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, }; +static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) +{ + struct nft_stats *cpu_stats, total; + struct nlattr *nest; + int cpu; + + memset(&total, 0, sizeof(total)); + for_each_possible_cpu(cpu) { + cpu_stats = per_cpu_ptr(stats, cpu); + total.pkts += cpu_stats->pkts; + total.bytes += cpu_stats->bytes; + } + nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS); + if (nest == NULL) + goto nla_put_failure; + + if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts)) || + nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes))) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -ENOSPC; +} + static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, @@ -472,8 +501,11 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, goto nla_put_failure; if (chain->flags & NFT_BASE_CHAIN) { - const struct nf_hook_ops *ops = &nft_base_chain(chain)->ops; - struct nlattr *nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); + const struct nft_base_chain *basechain = nft_base_chain(chain); + const struct nf_hook_ops *ops = &basechain->ops; + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); if (nest == NULL) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) @@ -482,11 +514,21 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, goto nla_put_failure; nla_nest_end(skb, nest); + if (nla_put_be32(skb, NFTA_CHAIN_POLICY, + htonl(basechain->policy))) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_CHAIN_TYPE, chain_type[ops->pf][nft_base_chain(chain)->type]->name)) goto nla_put_failure; + + if (nft_dump_stats(skb, nft_base_chain(chain)->stats)) + goto nla_put_failure; } + if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -617,6 +659,67 @@ err: return err; } +static int +nf_tables_chain_policy(struct nft_base_chain *chain, const struct nlattr *attr) +{ + switch (ntohl(nla_get_be32(attr))) { + case NF_DROP: + chain->policy = NF_DROP; + break; + case NF_ACCEPT: + chain->policy = NF_ACCEPT; + break; + default: + return -EINVAL; + } + return 0; +} + +static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { + [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, + [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, +}; + +static int +nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) +{ + struct nlattr *tb[NFTA_COUNTER_MAX+1]; + struct nft_stats __percpu *newstats; + struct nft_stats *stats; + int err; + + err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy); + if (err < 0) + return err; + + if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS]) + return -EINVAL; + + newstats = alloc_percpu(struct nft_stats); + if (newstats == NULL) + return -ENOMEM; + + /* Restore old counters on this cpu, no problem. Per-cpu statistics + * are not exposed to userspace. + */ + stats = this_cpu_ptr(newstats); + stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + + if (chain->stats) { + /* nfnl_lock is held, add some nfnl function for this, later */ + struct nft_stats __percpu *oldstats = + rcu_dereference_protected(chain->stats, 1); + + rcu_assign_pointer(chain->stats, newstats); + synchronize_rcu(); + free_percpu(oldstats); + } else + rcu_assign_pointer(chain->stats, newstats); + + return 0; +} + static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -626,7 +729,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; - struct nft_base_chain *basechain; + struct nft_base_chain *basechain = NULL; struct nlattr *ha[NFTA_HOOK_MAX + 1]; int family = nfmsg->nfgen_family; u64 handle = 0; @@ -673,6 +776,26 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) return -EEXIST; + if (nla[NFTA_CHAIN_POLICY]) { + if (!(chain->flags & NFT_BASE_CHAIN)) + return -EOPNOTSUPP; + + err = nf_tables_chain_policy(nft_base_chain(chain), + nla[NFTA_CHAIN_POLICY]); + if (err < 0) + return err; + } + + if (nla[NFTA_CHAIN_COUNTERS]) { + if (!(chain->flags & NFT_BASE_CHAIN)) + return -EOPNOTSUPP; + + err = nf_tables_counters(nft_base_chain(chain), + nla[NFTA_CHAIN_COUNTERS]); + if (err < 0) + return err; + } + if (nla[NFTA_CHAIN_HANDLE] && name) nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); @@ -727,6 +850,36 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, ops->hook = afi->hooks[ops->hooknum]; chain->flags |= NFT_BASE_CHAIN; + + if (nla[NFTA_CHAIN_POLICY]) { + err = nf_tables_chain_policy(basechain, + nla[NFTA_CHAIN_POLICY]); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } else + basechain->policy = NF_ACCEPT; + + if (nla[NFTA_CHAIN_COUNTERS]) { + err = nf_tables_counters(basechain, + nla[NFTA_CHAIN_COUNTERS]); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } else { + struct nft_stats __percpu *newstats; + + newstats = alloc_percpu(struct nft_stats); + if (newstats == NULL) + return -ENOMEM; + + rcu_assign_pointer(nft_base_chain(chain)->stats, + newstats); + } } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) @@ -739,6 +892,15 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, list_add_tail(&chain->list, &table->chains); table->use++; + + if (chain->flags & NFT_BASE_CHAIN) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } notify: nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, family); @@ -751,9 +913,10 @@ static void nf_tables_rcu_chain_destroy(struct rcu_head *head) BUG_ON(chain->use > 0); - if (chain->flags & NFT_BASE_CHAIN) + if (chain->flags & NFT_BASE_CHAIN) { + free_percpu(nft_base_chain(chain)->stats); kfree(nft_base_chain(chain)); - else + } else kfree(chain); } @@ -801,13 +964,15 @@ static void nft_ctx_init(struct nft_ctx *ctx, const struct nlmsghdr *nlh, const struct nft_af_info *afi, const struct nft_table *table, - const struct nft_chain *chain) + const struct nft_chain *chain, + const struct nlattr * const *nla) { ctx->skb = skb; ctx->nlh = nlh; ctx->afi = afi; ctx->table = table; ctx->chain = chain; + ctx->nla = nla; } /* @@ -910,7 +1075,8 @@ struct nft_expr_info { struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; -static int nf_tables_expr_parse(const struct nlattr *nla, +static int nf_tables_expr_parse(const struct nft_ctx *ctx, + const struct nlattr *nla, struct nft_expr_info *info) { const struct nft_expr_type *type; @@ -935,7 +1101,8 @@ static int nf_tables_expr_parse(const struct nlattr *nla, memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1)); if (type->select_ops != NULL) { - ops = type->select_ops((const struct nlattr * const *)info->tb); + ops = type->select_ops(ctx, + (const struct nlattr * const *)info->tb); if (IS_ERR(ops)) { err = PTR_ERR(ops); goto err1; @@ -1012,6 +1179,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, @@ -1269,6 +1437,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, handle = nf_tables_alloc_handle(table); } + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + n = 0; size = 0; if (nla[NFTA_RULE_EXPRESSIONS]) { @@ -1278,7 +1448,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, goto err1; if (n == NFT_RULE_MAXEXPRS) goto err1; - err = nf_tables_expr_parse(tmp, &info[n]); + err = nf_tables_expr_parse(&ctx, tmp, &info[n]); if (err < 0) goto err1; size += info[n].ops->size; @@ -1294,7 +1464,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, rule->handle = handle; rule->dlen = size; - nft_ctx_init(&ctx, skb, nlh, afi, table, chain); expr = nft_expr_first(rule); for (i = 0; i < n; i++) { err = nf_tables_newexpr(&ctx, &info[i], expr); @@ -1304,13 +1473,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, expr = nft_expr_next(expr); } - /* Register hook when first rule is inserted into a base chain */ - if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) { - err = nf_register_hook(&nft_base_chain(chain)->ops); - if (err < 0) - goto err2; - } - if (nlh->nlmsg_flags & NLM_F_REPLACE) { list_replace_rcu(&old_rule->list, &rule->list); nf_tables_rule_destroy(old_rule); @@ -1379,10 +1541,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, } } - /* Unregister hook when last rule from base chain is deleted */ - if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) - nf_unregister_hook(&nft_base_chain(chain)->ops); - return 0; } @@ -1470,7 +1628,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, return PTR_ERR(table); } - nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); return 0; } @@ -1799,7 +1957,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - nft_ctx_init(&ctx, skb, nlh, afi, table, NULL); + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) { @@ -1987,7 +2145,7 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, if (IS_ERR(table)) return PTR_ERR(table); - nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); return 0; } @@ -2435,23 +2593,27 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, { const struct nft_rule *rule; const struct nft_expr *expr, *last; - const struct nft_data *data; const struct nft_set *set; struct nft_set_binding *binding; struct nft_set_iter iter; - int err; if (ctx->chain == chain) return -ELOOP; list_for_each_entry(rule, &chain->rules, list) { nft_rule_for_each_expr(expr, last, rule) { - if (!expr->ops->get_verdict) + const struct nft_data *data = NULL; + int err; + + if (!expr->ops->validate) continue; - data = expr->ops->get_verdict(expr); + err = expr->ops->validate(ctx, expr, &data); + if (err < 0) + return err; + if (data == NULL) - break; + continue; switch (data->verdict) { case NFT_JUMP: diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 9aede59ed2d7..e51a45c12128 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -60,27 +60,34 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, return true; } -unsigned int nft_do_chain(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +struct nft_jumpstack { + const struct nft_chain *chain; + const struct nft_rule *rule; +}; + +static inline void +nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt, + struct nft_jumpstack *jumpstack, unsigned int stackptr) +{ + struct nft_stats __percpu *stats; + const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this; + + rcu_read_lock_bh(); + stats = rcu_dereference(nft_base_chain(chain)->stats); + __this_cpu_inc(stats->pkts); + __this_cpu_add(stats->bytes, pkt->skb->len); + rcu_read_unlock_bh(); +} + +unsigned int +nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv; const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_data data[NFT_REG_MAX + 1]; - const struct nft_pktinfo pkt = { - .skb = skb, - .in = in, - .out = out, - .hooknum = ops->hooknum, - }; unsigned int stackptr = 0; - struct { - const struct nft_chain *chain; - const struct nft_rule *rule; - } jumpstack[NFT_JUMP_STACK_SIZE]; + struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; do_chain: rule = list_entry(&chain->rules, struct nft_rule, list); @@ -91,8 +98,8 @@ next_rule: if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); else if (expr->ops != &nft_payload_fast_ops || - !nft_payload_fast_eval(expr, data, &pkt)) - expr->ops->eval(expr, data, &pkt); + !nft_payload_fast_eval(expr, data, pkt)) + expr->ops->eval(expr, data, pkt); if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) break; @@ -135,10 +142,11 @@ next_rule: rule = jumpstack[stackptr].rule; goto next_rule; } + nft_chain_stats(chain, pkt, jumpstack, stackptr); - return NF_ACCEPT; + return nft_base_chain(chain)->policy; } -EXPORT_SYMBOL_GPL(nft_do_chain); +EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo); int __init nf_tables_core_module_init(void) { diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 37134f3e84fb..954925db414d 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -162,7 +162,8 @@ const struct nft_expr_ops nft_cmp_fast_ops = { .dump = nft_cmp_fast_dump, }; -static const struct nft_expr_ops *nft_cmp_select_ops(const struct nlattr * const tb[]) +static const struct nft_expr_ops * +nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_data_desc desc; struct nft_data data; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c new file mode 100644 index 000000000000..4811f762e060 --- /dev/null +++ b/net/netfilter/nft_compat.c @@ -0,0 +1,768 @@ +/* + * (C) 2012-2013 by Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This software has been sponsored by Sophos Astaro + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for set_fs */ +#include + +union nft_entry { + struct ipt_entry e4; + struct ip6t_entry e6; +}; + +static inline void +nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info) +{ + par->target = xt; + par->targinfo = xt_info; + par->hotdrop = false; +} + +static void nft_target_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + void *info = nft_expr_priv(expr); + struct xt_target *target = expr->ops->data; + struct sk_buff *skb = pkt->skb; + int ret; + + nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info); + + ret = target->target(skb, &pkt->xt); + + if (pkt->xt.hotdrop) + ret = NF_DROP; + + switch(ret) { + case XT_CONTINUE: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + break; + default: + data[NFT_REG_VERDICT].verdict = ret; + break; + } + return; +} + +static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { + [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING }, + [NFTA_TARGET_REV] = { .type = NLA_U32 }, + [NFTA_TARGET_INFO] = { .type = NLA_BINARY }, +}; + +static void +nft_target_set_tgchk_param(struct xt_tgchk_param *par, + const struct nft_ctx *ctx, + struct xt_target *target, void *info, + union nft_entry *entry, u8 proto, bool inv) +{ + par->net = &init_net; + par->table = ctx->table->name; + switch (ctx->afi->family) { + case AF_INET: + entry->e4.ip.proto = proto; + entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; + break; + case AF_INET6: + entry->e6.ipv6.proto = proto; + entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; + break; + } + par->entryinfo = entry; + par->target = target; + par->targinfo = info; + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + par->hook_mask = 1 << ops->hooknum; + } + par->family = ctx->afi->family; +} + +static void target_compat_from_user(struct xt_target *t, void *in, void *out) +{ +#ifdef CONFIG_COMPAT + if (t->compat_from_user) { + int pad; + + t->compat_from_user(out, in); + pad = XT_ALIGN(t->targetsize) - t->targetsize; + if (pad > 0) + memset(out + t->targetsize, 0, pad); + } else +#endif + memcpy(out, in, XT_ALIGN(t->targetsize)); +} + +static inline int nft_compat_target_offset(struct xt_target *target) +{ +#ifdef CONFIG_COMPAT + return xt_compat_target_offset(target); +#else + return 0; +#endif +} + +static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = { + [NFTA_RULE_COMPAT_PROTO] = { .type = NLA_U32 }, + [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, +}; + +static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) +{ + struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; + u32 flags; + int err; + + err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr, + nft_rule_compat_policy); + if (err < 0) + return err; + + if (!tb[NFTA_RULE_COMPAT_PROTO] || !tb[NFTA_RULE_COMPAT_FLAGS]) + return -EINVAL; + + flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS])); + if (flags & ~NFT_RULE_COMPAT_F_MASK) + return -EINVAL; + if (flags & NFT_RULE_COMPAT_F_INV) + *inv = true; + + return ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); +} + +static int +nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + void *info = nft_expr_priv(expr); + struct xt_target *target = expr->ops->data; + struct xt_tgchk_param par; + size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); + u8 proto = 0; + bool inv = false; + union nft_entry e = {}; + int ret; + + target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); + + if (ctx->nla[NFTA_RULE_COMPAT]) + proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + + nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); + + ret = xt_check_target(&par, size, proto, inv); + if (ret < 0) + goto err; + + /* The standard target cannot be used */ + if (target->target == NULL) { + ret = -EINVAL; + goto err; + } + + return 0; +err: + module_put(target->me); + return ret; +} + +static void +nft_target_destroy(const struct nft_expr *expr) +{ + struct xt_target *target = expr->ops->data; + + module_put(target->me); +} + +static int +target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in) +{ + int ret; + +#ifdef CONFIG_COMPAT + if (t->compat_to_user) { + mm_segment_t old_fs; + void *out; + + out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC); + if (out == NULL) + return -ENOMEM; + + /* We want to reuse existing compat_to_user */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + t->compat_to_user(out, in); + set_fs(old_fs); + ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out); + kfree(out); + } else +#endif + ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in); + + return ret; +} + +static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct xt_target *target = expr->ops->data; + void *info = nft_expr_priv(expr); + + if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) || + nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) || + target_dump_info(skb, target, info)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_target_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct xt_target *target = expr->ops->data; + unsigned int hook_mask = 0; + + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + hook_mask = 1 << ops->hooknum; + if (hook_mask & target->hooks) + return 0; + + /* This target is being called from an invalid chain */ + return -EINVAL; + } + return 0; +} + +static void nft_match_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + struct sk_buff *skb = pkt->skb; + bool ret; + + nft_compat_set_par((struct xt_action_param *)&pkt->xt, match, info); + + ret = match->match(skb, (struct xt_action_param *)&pkt->xt); + + if (pkt->xt.hotdrop) { + data[NFT_REG_VERDICT].verdict = NF_DROP; + return; + } + + switch(ret) { + case true: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + break; + case false: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; + break; + } +} + +static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { + [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, + [NFTA_MATCH_REV] = { .type = NLA_U32 }, + [NFTA_MATCH_INFO] = { .type = NLA_BINARY }, +}; + +/* struct xt_mtchk_param and xt_tgchk_param look very similar */ +static void +nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, + struct xt_match *match, void *info, + union nft_entry *entry, u8 proto, bool inv) +{ + par->net = &init_net; + par->table = ctx->table->name; + switch (ctx->afi->family) { + case AF_INET: + entry->e4.ip.proto = proto; + entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; + break; + case AF_INET6: + entry->e6.ipv6.proto = proto; + entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; + break; + } + par->entryinfo = entry; + par->match = match; + par->matchinfo = info; + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + par->hook_mask = 1 << ops->hooknum; + } + par->family = ctx->afi->family; +} + +static void match_compat_from_user(struct xt_match *m, void *in, void *out) +{ +#ifdef CONFIG_COMPAT + if (m->compat_from_user) { + int pad; + + m->compat_from_user(out, in); + pad = XT_ALIGN(m->matchsize) - m->matchsize; + if (pad > 0) + memset(out + m->matchsize, 0, pad); + } else +#endif + memcpy(out, in, XT_ALIGN(m->matchsize)); +} + +static int +nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + struct xt_mtchk_param par; + size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); + u8 proto = 0; + bool inv = false; + union nft_entry e = {}; + int ret; + + match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); + + if (ctx->nla[NFTA_RULE_COMPAT]) + proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + + nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); + + ret = xt_check_match(&par, size, proto, inv); + if (ret < 0) + goto err; + + return 0; +err: + module_put(match->me); + return ret; +} + +static void +nft_match_destroy(const struct nft_expr *expr) +{ + struct xt_match *match = expr->ops->data; + + module_put(match->me); +} + +static int +match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in) +{ + int ret; + +#ifdef CONFIG_COMPAT + if (m->compat_to_user) { + mm_segment_t old_fs; + void *out; + + out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC); + if (out == NULL) + return -ENOMEM; + + /* We want to reuse existing compat_to_user */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + m->compat_to_user(out, in); + set_fs(old_fs); + ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out); + kfree(out); + } else +#endif + ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in); + + return ret; +} + +static inline int nft_compat_match_offset(struct xt_match *match) +{ +#ifdef CONFIG_COMPAT + return xt_compat_match_offset(match); +#else + return 0; +#endif +} + +static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + + if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || + nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) || + match_dump_info(skb, match, info)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_match_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct xt_match *match = expr->ops->data; + unsigned int hook_mask = 0; + + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + hook_mask = 1 << ops->hooknum; + if (hook_mask & match->hooks) + return 0; + + /* This match is being called from an invalid chain */ + return -EINVAL; + } + return 0; +} + +static int +nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, + int event, u16 family, const char *name, + int rev, int target) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = portid ? NLM_F_MULTI : 0; + + event |= NFNL_SUBSYS_NFT_COMPAT << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_COMPAT_NAME, name) || + nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) || + nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target))) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + int ret = 0, target; + struct nfgenmsg *nfmsg; + const char *fmt; + const char *name; + u32 rev; + struct sk_buff *skb2; + + if (tb[NFTA_COMPAT_NAME] == NULL || + tb[NFTA_COMPAT_REV] == NULL || + tb[NFTA_COMPAT_TYPE] == NULL) + return -EINVAL; + + name = nla_data(tb[NFTA_COMPAT_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV])); + target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE])); + + nfmsg = nlmsg_data(nlh); + + switch(nfmsg->nfgen_family) { + case AF_INET: + fmt = "ipt_%s"; + break; + case AF_INET6: + fmt = "ip6t_%s"; + break; + default: + pr_err("nft_compat: unsupported protocol %d\n", + nfmsg->nfgen_family); + return -EINVAL; + } + + try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name, + rev, target, &ret), + fmt, name); + + if (ret < 0) + return ret; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + /* include the best revision for this extension in the message */ + if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_COMPAT_GET, + nfmsg->nfgen_family, + name, ret, target) <= 0) { + kfree_skb(skb2); + return -ENOSPC; + } + + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + return ret == -EAGAIN ? -ENOBUFS : ret; +} + +static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { + [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING, + .len = NFT_COMPAT_NAME_MAX-1 }, + [NFTA_COMPAT_REV] = { .type = NLA_U32 }, + [NFTA_COMPAT_TYPE] = { .type = NLA_U32 }, +}; + +static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = { + [NFNL_MSG_COMPAT_GET] = { .call = nfnl_compat_get, + .attr_count = NFTA_COMPAT_MAX, + .policy = nfnl_compat_policy_get }, +}; + +static const struct nfnetlink_subsystem nfnl_compat_subsys = { + .name = "nft-compat", + .subsys_id = NFNL_SUBSYS_NFT_COMPAT, + .cb_count = NFNL_MSG_COMPAT_MAX, + .cb = nfnl_nft_compat_cb, +}; + +static LIST_HEAD(nft_match_list); + +struct nft_xt { + struct list_head head; + struct nft_expr_ops ops; +}; + +static struct nft_expr_type nft_match_type; + +static const struct nft_expr_ops * +nft_match_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + struct nft_xt *nft_match; + struct xt_match *match; + char *mt_name; + __u32 rev, family; + + if (tb[NFTA_MATCH_NAME] == NULL || + tb[NFTA_MATCH_REV] == NULL || + tb[NFTA_MATCH_INFO] == NULL) + return ERR_PTR(-EINVAL); + + mt_name = nla_data(tb[NFTA_MATCH_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); + family = ctx->afi->family; + + /* Re-use the existing match if it's already loaded. */ + list_for_each_entry(nft_match, &nft_match_list, head) { + struct xt_match *match = nft_match->ops.data; + + if (strcmp(match->name, mt_name) == 0 && + match->revision == rev && match->family == family) + return &nft_match->ops; + } + + match = xt_request_find_match(family, mt_name, rev); + if (IS_ERR(match)) + return ERR_PTR(-ENOENT); + + /* This is the first time we use this match, allocate operations */ + nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); + if (nft_match == NULL) + return ERR_PTR(-ENOMEM); + + nft_match->ops.type = &nft_match_type; + nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) + + nft_compat_match_offset(match)); + nft_match->ops.eval = nft_match_eval; + nft_match->ops.init = nft_match_init; + nft_match->ops.destroy = nft_match_destroy; + nft_match->ops.dump = nft_match_dump; + nft_match->ops.validate = nft_match_validate; + nft_match->ops.data = match; + + list_add(&nft_match->head, &nft_match_list); + + return &nft_match->ops; +} + +static void nft_match_release(void) +{ + struct nft_xt *nft_match; + + list_for_each_entry(nft_match, &nft_match_list, head) + kfree(nft_match); +} + +static struct nft_expr_type nft_match_type __read_mostly = { + .name = "match", + .select_ops = nft_match_select_ops, + .policy = nft_match_policy, + .maxattr = NFTA_MATCH_MAX, + .owner = THIS_MODULE, +}; + +static LIST_HEAD(nft_target_list); + +static struct nft_expr_type nft_target_type; + +static const struct nft_expr_ops * +nft_target_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + struct nft_xt *nft_target; + struct xt_target *target; + char *tg_name; + __u32 rev, family; + + if (tb[NFTA_TARGET_NAME] == NULL || + tb[NFTA_TARGET_REV] == NULL || + tb[NFTA_TARGET_INFO] == NULL) + return ERR_PTR(-EINVAL); + + tg_name = nla_data(tb[NFTA_TARGET_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV])); + family = ctx->afi->family; + + /* Re-use the existing target if it's already loaded. */ + list_for_each_entry(nft_target, &nft_match_list, head) { + struct xt_target *target = nft_target->ops.data; + + if (strcmp(target->name, tg_name) == 0 && + target->revision == rev && target->family == family) + return &nft_target->ops; + } + + target = xt_request_find_target(family, tg_name, rev); + if (IS_ERR(target)) + return ERR_PTR(-ENOENT); + + /* This is the first time we use this target, allocate operations */ + nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); + if (nft_target == NULL) + return ERR_PTR(-ENOMEM); + + nft_target->ops.type = &nft_target_type; + nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) + + nft_compat_target_offset(target)); + nft_target->ops.eval = nft_target_eval; + nft_target->ops.init = nft_target_init; + nft_target->ops.destroy = nft_target_destroy; + nft_target->ops.dump = nft_target_dump; + nft_target->ops.validate = nft_target_validate; + nft_target->ops.data = target; + + list_add(&nft_target->head, &nft_target_list); + + return &nft_target->ops; +} + +static void nft_target_release(void) +{ + struct nft_xt *nft_target; + + list_for_each_entry(nft_target, &nft_target_list, head) + kfree(nft_target); +} + +static struct nft_expr_type nft_target_type __read_mostly = { + .name = "target", + .select_ops = nft_target_select_ops, + .policy = nft_target_policy, + .maxattr = NFTA_TARGET_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_compat_module_init(void) +{ + int ret; + + ret = nft_register_expr(&nft_match_type); + if (ret < 0) + return ret; + + ret = nft_register_expr(&nft_target_type); + if (ret < 0) + goto err_match; + + ret = nfnetlink_subsys_register(&nfnl_compat_subsys); + if (ret < 0) { + pr_err("nft_compat: cannot register with nfnetlink.\n"); + goto err_target; + } + + pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso \n"); + + return ret; + +err_target: + nft_unregister_expr(&nft_target_type); +err_match: + nft_unregister_expr(&nft_match_type); + return ret; +} + +static void __exit nft_compat_module_exit(void) +{ + nfnetlink_subsys_unregister(&nfnl_compat_subsys); + nft_unregister_expr(&nft_target_type); + nft_unregister_expr(&nft_match_type); + nft_match_release(); + nft_target_release(); +} + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); + +module_init(nft_compat_module_init); +module_exit(nft_compat_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_EXPR("match"); +MODULE_ALIAS_NFT_EXPR("target"); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 1bfeeaf865b6..f169501f1ad4 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -90,14 +90,16 @@ nla_put_failure: return -1; } -static const struct nft_data *nft_immediate_get_verdict(const struct nft_expr *expr) +static int nft_immediate_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); if (priv->dreg == NFT_REG_VERDICT) - return &priv->data; - else - return NULL; + *data = &priv->data; + + return 0; } static struct nft_expr_type nft_imm_type; @@ -108,7 +110,7 @@ static const struct nft_expr_ops nft_imm_ops = { .init = nft_immediate_init, .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, - .get_verdict = nft_immediate_get_verdict, + .validate = nft_immediate_validate, }; static struct nft_expr_type nft_imm_type __read_mostly = { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 7cf13f7e1e94..bc8bdb2c1ba7 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -107,7 +107,9 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; -static const struct nft_expr_ops *nft_payload_select_ops(const struct nlattr * const tb[]) +static const struct nft_expr_ops * +nft_payload_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) { enum nft_payload_bases base; unsigned int offset, len; -- cgit v1.2.3-55-g7522 From c54032e05bfcbb261f47aaadf8476e864e8712f4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 11 Oct 2013 10:00:22 +0200 Subject: netfilter: nf_tables: nft_payload: fix transport header base We cannot use skb->transport_header since it's unset, use pkt->xt.thoff instead. Now possible using information made available through the x_tables compatibility layer. Reported-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nft_payload.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index e51a45c12128..3c13007d80df 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -44,7 +44,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb); else - ptr = skb_transport_header(skb); + ptr = skb_network_header(skb) + pkt->xt.thoff; ptr += priv->offset; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index bc8bdb2c1ba7..a2aeb318678f 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -36,7 +36,7 @@ static void nft_payload_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - offset = skb_transport_offset(skb); + offset = pkt->xt.thoff; break; default: BUG(); -- cgit v1.2.3-55-g7522 From 9ddf63235749a9efa1fad2eeb74be2ee9b580f8d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 13:26:33 +0200 Subject: netfilter: nf_tables: add support for dormant tables This patch allows you to temporarily disable an entire table. You can change the state of a dormant table via NFT_MSG_NEWTABLE messages. Using this operation you can wake up a table, so their chains are registered. This provides atomicity at chain level. Thus, the rule-set of one chain is applied at once, avoiding any possible intermediate state in every chain. Still, the chains that belongs to a table are registered consecutively. This also allows you to have inactive tables in the kernel. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 11 ++++ net/netfilter/nf_tables_api.c | 97 +++++++++++++++++++++++++++++--- 2 files changed, 101 insertions(+), 7 deletions(-) (limited to 'net/netfilter') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 1563875e6942..a9c4bce1988f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -96,14 +96,25 @@ enum nft_hook_attributes { }; #define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) +/** + * enum nft_table_flags - nf_tables table flags + * + * @NFT_TABLE_F_DORMANT: this table is not active + */ +enum nft_table_flags { + NFT_TABLE_F_DORMANT = 0x1, +}; + /** * enum nft_table_attributes - nf_tables table netlink attributes * * @NFTA_TABLE_NAME: name of the table (NLA_STRING) + * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) */ enum nft_table_attributes { NFTA_TABLE_UNSPEC, NFTA_TABLE_NAME, + NFTA_TABLE_FLAGS, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 61e017b349cb..a4dd7ce5ec3e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -158,6 +158,7 @@ static int nf_tables_chain_type_lookup(const struct nft_af_info *afi, static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING }, + [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, }; static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, @@ -177,7 +178,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - if (nla_put_string(skb, NFTA_TABLE_NAME, table->name)) + if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || + nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags))) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -301,6 +303,74 @@ err: return err; } +static int nf_tables_table_enable(struct nft_table *table) +{ + struct nft_chain *chain; + int err, i = 0; + + list_for_each_entry(chain, &table->chains, list) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) + goto err; + + i++; + } + return 0; +err: + list_for_each_entry(chain, &table->chains, list) { + if (i-- <= 0) + break; + + nf_unregister_hook(&nft_base_chain(chain)->ops); + } + return err; +} + +static int nf_tables_table_disable(struct nft_table *table) +{ + struct nft_chain *chain; + + list_for_each_entry(chain, &table->chains, list) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + return 0; +} + +static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct nft_af_info *afi, struct nft_table *table) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + int family = nfmsg->nfgen_family, ret = 0; + + if (nla[NFTA_TABLE_FLAGS]) { + __be32 flags; + + flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) + return -EINVAL; + + if ((flags & NFT_TABLE_F_DORMANT) && + !(table->flags & NFT_TABLE_F_DORMANT)) { + ret = nf_tables_table_disable(table); + if (ret >= 0) + table->flags |= NFT_TABLE_F_DORMANT; + } else if (!(flags & NFT_TABLE_F_DORMANT) && + table->flags & NFT_TABLE_F_DORMANT) { + ret = nf_tables_table_enable(table); + if (ret >= 0) + table->flags &= ~NFT_TABLE_F_DORMANT; + } + if (ret < 0) + goto err; + } + + nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); +err: + return ret; +} + static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -328,7 +398,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - return 0; + return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table); } table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); @@ -339,6 +409,18 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); + if (nla[NFTA_TABLE_FLAGS]) { + __be32 flags; + + flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) { + kfree(table); + return -EINVAL; + } + + table->flags |= flags; + } + list_add_tail(&table->list, &afi->tables); nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); return 0; @@ -890,10 +972,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, chain->handle = nf_tables_alloc_handle(table); nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); - list_add_tail(&chain->list, &table->chains); - table->use++; - - if (chain->flags & NFT_BASE_CHAIN) { + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) { err = nf_register_hook(&nft_base_chain(chain)->ops); if (err < 0) { free_percpu(basechain->stats); @@ -901,6 +981,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, return err; } } + list_add_tail(&chain->list, &table->chains); + table->use++; notify: nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, family); @@ -948,7 +1030,8 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, list_del(&chain->list); table->use--; - if (chain->flags & NFT_BASE_CHAIN) + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) nf_unregister_hook(&nft_base_chain(chain)->ops); nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, -- cgit v1.2.3-55-g7522 From eb31628e37a0a4e01fffd79dcc7f815d2357f53a Mon Sep 17 00:00:00 2001 From: Tomasz Bursztyka Date: Thu, 10 Oct 2013 13:39:19 +0200 Subject: netfilter: nf_tables: Add support for IPv6 NAT This patch generalizes the NAT expression to support both IPv4 and IPv6 using the existing IPv4/IPv6 NAT infrastructure. This also adds the NAT chain type for IPv6. This patch collapses the following patches that were posted to the netfilter-devel mailing list, from Tomasz: * nf_tables: Change NFTA_NAT_ attributes to better semantic significance * nf_tables: Split IPv4 NAT into NAT expression and IPv4 NAT chain * nf_tables: Add support for IPv6 NAT expression * nf_tables: Add support for IPv6 NAT chain * nf_tables: Fix up build issue on IPv6 NAT support And, from Pablo Neira Ayuso: * fix missing dependencies in nft_chain_nat Signed-off-by: Tomasz Bursztyka Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 18 +-- net/ipv4/netfilter/Kconfig | 1 + net/ipv4/netfilter/nft_chain_nat_ipv4.c | 156 +--------------------- net/ipv6/netfilter/Kconfig | 5 + net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/nft_chain_nat_ipv6.c | 211 +++++++++++++++++++++++++++++ net/netfilter/Kconfig | 6 + net/netfilter/Makefile | 1 + net/netfilter/nft_nat.c | 220 +++++++++++++++++++++++++++++++ 9 files changed, 457 insertions(+), 162 deletions(-) create mode 100644 net/ipv6/netfilter/nft_chain_nat_ipv6.c create mode 100644 net/netfilter/nft_nat.c (limited to 'net/netfilter') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a9c4bce1988f..7d4a1992f89c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -695,18 +695,20 @@ enum nft_nat_types { * enum nft_nat_attributes - nf_tables nat expression netlink attributes * * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types) - * @NFTA_NAT_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) - * @NFTA_NAT_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) - * @NFTA_NAT_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) - * @NFTA_NAT_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + * @NFTA_NAT_FAMILY: NAT family (NLA_U32) + * @NFTA_NAT_REG_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) */ enum nft_nat_attributes { NFTA_NAT_UNSPEC, NFTA_NAT_TYPE, - NFTA_NAT_ADDR_MIN, - NFTA_NAT_ADDR_MAX, - NFTA_NAT_PROTO_MIN, - NFTA_NAT_PROTO_MAX, + NFTA_NAT_FAMILY, + NFTA_NAT_REG_ADDR_MIN, + NFTA_NAT_REG_ADDR_MAX, + NFTA_NAT_REG_PROTO_MIN, + NFTA_NAT_REG_PROTO_MAX, __NFTA_NAT_MAX }; #define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ae65fe98bfbe..1f37ef67f1ac 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -50,6 +50,7 @@ config NFT_CHAIN_ROUTE_IPV4 config NFT_CHAIN_NAT_IPV4 depends on NF_TABLES_IPV4 + depends on NF_NAT_IPV4 && NFT_NAT tristate "IPv4 nf_tables nat chain support" config IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index e09c201adf84..cf2c792cd971 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2008-2009 Patrick McHardy * Copyright (c) 2012 Pablo Neira Ayuso + * Copyright (c) 2012 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,10 +15,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -27,147 +26,6 @@ #include #include -struct nft_nat { - enum nft_registers sreg_addr_min:8; - enum nft_registers sreg_addr_max:8; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; - enum nf_nat_manip_type type; -}; - -static void nft_nat_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) -{ - const struct nft_nat *priv = nft_expr_priv(expr); - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); - struct nf_nat_range range; - - memset(&range, 0, sizeof(range)); - if (priv->sreg_addr_min) { - range.min_addr.ip = data[priv->sreg_addr_min].data[0]; - range.max_addr.ip = data[priv->sreg_addr_max].data[0]; - range.flags |= NF_NAT_RANGE_MAP_IPS; - } - - if (priv->sreg_proto_min) { - range.min_proto.all = data[priv->sreg_proto_min].data[0]; - range.max_proto.all = data[priv->sreg_proto_max].data[0]; - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - - data[NFT_REG_VERDICT].verdict = - nf_nat_setup_info(ct, &range, priv->type); -} - -static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { - [NFTA_NAT_ADDR_MIN] = { .type = NLA_U32 }, - [NFTA_NAT_ADDR_MAX] = { .type = NLA_U32 }, - [NFTA_NAT_PROTO_MIN] = { .type = NLA_U32 }, - [NFTA_NAT_PROTO_MAX] = { .type = NLA_U32 }, - [NFTA_NAT_TYPE] = { .type = NLA_U32 }, -}; - -static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_nat *priv = nft_expr_priv(expr); - int err; - - if (tb[NFTA_NAT_TYPE] == NULL) - return -EINVAL; - - switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { - case NFT_NAT_SNAT: - priv->type = NF_NAT_MANIP_SRC; - break; - case NFT_NAT_DNAT: - priv->type = NF_NAT_MANIP_DST; - break; - default: - return -EINVAL; - } - - if (tb[NFTA_NAT_ADDR_MIN]) { - priv->sreg_addr_min = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MIN])); - err = nft_validate_input_register(priv->sreg_addr_min); - if (err < 0) - return err; - } - - if (tb[NFTA_NAT_ADDR_MAX]) { - priv->sreg_addr_max = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MAX])); - err = nft_validate_input_register(priv->sreg_addr_max); - if (err < 0) - return err; - } else - priv->sreg_addr_max = priv->sreg_addr_min; - - if (tb[NFTA_NAT_PROTO_MIN]) { - priv->sreg_proto_min = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MIN])); - err = nft_validate_input_register(priv->sreg_proto_min); - if (err < 0) - return err; - } - - if (tb[NFTA_NAT_PROTO_MAX]) { - priv->sreg_proto_max = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MAX])); - err = nft_validate_input_register(priv->sreg_proto_max); - if (err < 0) - return err; - } else - priv->sreg_proto_max = priv->sreg_proto_min; - - return 0; -} - -static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - const struct nft_nat *priv = nft_expr_priv(expr); - - switch (priv->type) { - case NF_NAT_MANIP_SRC: - if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT))) - goto nla_put_failure; - break; - case NF_NAT_MANIP_DST: - if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT))) - goto nla_put_failure; - break; - } - - if (nla_put_be32(skb, NFTA_NAT_ADDR_MIN, htonl(priv->sreg_addr_min))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_ADDR_MAX, htonl(priv->sreg_addr_max))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_PROTO_MIN, htonl(priv->sreg_proto_min))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_PROTO_MAX, htonl(priv->sreg_proto_max))) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_type nft_nat_type; -static const struct nft_expr_ops nft_nat_ops = { - .type = &nft_nat_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), - .eval = nft_nat_eval, - .init = nft_nat_init, - .dump = nft_nat_dump, -}; - -static struct nft_expr_type nft_nat_type __read_mostly = { - .name = "nat", - .ops = &nft_nat_ops, - .policy = nft_nat_policy, - .maxattr = NFTA_NAT_MAX, - .owner = THIS_MODULE, -}; - /* * NAT chains */ @@ -306,7 +164,7 @@ static unsigned int nf_nat_output(const struct nf_hook_ops *ops, return ret; } -struct nf_chain_type nft_chain_nat_ipv4 = { +static struct nf_chain_type nft_chain_nat_ipv4 = { .family = NFPROTO_IPV4, .name = "nat", .type = NFT_CHAIN_T_NAT, @@ -331,20 +189,11 @@ static int __init nft_chain_nat_init(void) if (err < 0) return err; - err = nft_register_expr(&nft_nat_type); - if (err < 0) - goto err; - return 0; - -err: - nft_unregister_chain_type(&nft_chain_nat_ipv4); - return err; } static void __exit nft_chain_nat_exit(void) { - nft_unregister_expr(&nft_nat_type); nft_unregister_chain_type(&nft_chain_nat_ipv4); } @@ -354,4 +203,3 @@ module_exit(nft_chain_nat_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); -MODULE_ALIAS_NFT_EXPR("nat"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 23833064b7b5..7702f9e90a04 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -33,6 +33,11 @@ config NFT_CHAIN_ROUTE_IPV6 depends on NF_TABLES_IPV6 tristate "IPv6 nf_tables route chain support" +config NFT_CHAIN_NAT_IPV6 + depends on NF_TABLES_IPV6 + depends on NF_NAT_IPV6 && NFT_NAT + tristate "IPv6 nf_tables nat chain support" + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index be4913aa524d..d1b4928f34f7 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o # nf_tables obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o +obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c new file mode 100644 index 000000000000..e86dcd70dc76 --- /dev/null +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * IPv6 NAT chains + */ + +static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + __be16 frag_off; + int hdrlen; + u8 nexthdr; + struct nft_pktinfo pkt; + unsigned int ret; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nfct_nat(ct); + if (nat == NULL) { + /* Conntrack module was loaded late, can't add extension. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) + return NF_ACCEPT; + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED + IP_CT_IS_REPLY: + nexthdr = ipv6_hdr(skb)->nexthdr; + hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + + if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { + if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, + ops->hooknum, + hdrlen)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall through */ + case IP_CT_NEW: + if (nf_nat_initialized(ct, maniptype)) + break; + + nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); + + ret = nft_do_chain_pktinfo(&pkt, ops); + if (ret != NF_ACCEPT) + return ret; + if (!nf_nat_initialized(ct, maniptype)) { + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } + default: + break; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); +} + +static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct in6_addr daddr = ipv6_hdr(skb)->daddr; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo __maybe_unused; + const struct nf_conn *ct __maybe_unused; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) + if (nf_xfrm_me_harder(skb, AF_INET6) < 0) + ret = NF_DROP; + } +#endif + return ret; +} + +static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, + &ct->tuplehash[!dir].tuple.src.u3)) { + if (ip6_route_me_harder(skb)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET6)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nf_chain_type nft_chain_nat_ipv6 = { + .family = NFPROTO_IPV6, + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .fn = { + [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting, + [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting, + [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output, + [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn, + }, + .me = THIS_MODULE, +}; + +static int __init nft_chain_nat_ipv6_init(void) +{ + int err; + + err = nft_register_chain_type(&nft_chain_nat_ipv6); + if (err < 0) + return err; + + return 0; +} + +static void __exit nft_chain_nat_ipv6_exit(void) +{ + nft_unregister_chain_type(&nft_chain_nat_ipv6); +} + +module_init(nft_chain_nat_ipv6_init); +module_exit(nft_chain_nat_ipv6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka "); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49e362707379..48acec17e27a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -450,6 +450,12 @@ config NFT_LIMIT depends on NF_TABLES tristate "Netfilter nf_tables limit module" +config NFT_NAT + depends on NF_TABLES + depends on NF_CONNTRACK + depends on NF_NAT + tristate "Netfilter nf_tables nat module" + config NFT_COMPAT depends on NF_TABLES depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a6781450b6fb..394483b2c193 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -75,6 +75,7 @@ obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o +obj-$(CONFIG_NFT_NAT) += nft_nat.o #nf_tables-objs += nft_meta_target.o obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c new file mode 100644 index 000000000000..b0b87b2d2411 --- /dev/null +++ b/net/netfilter/nft_nat.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2012 Pablo Neira Ayuso + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_nat { + enum nft_registers sreg_addr_min:8; + enum nft_registers sreg_addr_max:8; + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + int family; + enum nf_nat_manip_type type; +}; + +static void nft_nat_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); + struct nf_nat_range range; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_addr_min) { + if (priv->family == AF_INET) { + range.min_addr.ip = data[priv->sreg_addr_min].data[0]; + range.max_addr.ip = data[priv->sreg_addr_max].data[0]; + + } else { + memcpy(range.min_addr.ip6, + data[priv->sreg_addr_min].data, + sizeof(struct nft_data)); + memcpy(range.max_addr.ip6, + data[priv->sreg_addr_max].data, + sizeof(struct nft_data)); + } + range.flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (priv->sreg_proto_min) { + range.min_proto.all = data[priv->sreg_proto_min].data[0]; + range.max_proto.all = data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + data[NFT_REG_VERDICT].verdict = + nf_nat_setup_info(ct, &range, priv->type); +} + +static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { + [NFTA_NAT_TYPE] = { .type = NLA_U32 }, + [NFTA_NAT_FAMILY] = { .type = NLA_U32 }, + [NFTA_NAT_REG_ADDR_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 }, +}; + +static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_nat *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_NAT_TYPE] == NULL) + return -EINVAL; + + switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { + case NFT_NAT_SNAT: + priv->type = NF_NAT_MANIP_SRC; + break; + case NFT_NAT_DNAT: + priv->type = NF_NAT_MANIP_DST; + break; + default: + return -EINVAL; + } + + if (tb[NFTA_NAT_FAMILY] == NULL) + return -EINVAL; + + priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); + if (priv->family != AF_INET && priv->family != AF_INET6) + return -EINVAL; + + if (tb[NFTA_NAT_REG_ADDR_MIN]) { + priv->sreg_addr_min = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_REG_ADDR_MAX]) { + priv->sreg_addr_max = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_ADDR_MAX])); + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else + priv->sreg_addr_max = priv->sreg_addr_min; + + if (tb[NFTA_NAT_REG_PROTO_MIN]) { + priv->sreg_proto_min = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_REG_PROTO_MAX]) { + priv->sreg_proto_max = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_PROTO_MAX])); + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else + priv->sreg_proto_max = priv->sreg_proto_min; + + return 0; +} + +static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NF_NAT_MANIP_SRC: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT))) + goto nla_put_failure; + break; + case NF_NAT_MANIP_DST: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT))) + goto nla_put_failure; + break; + } + + if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_nat_type; +static const struct nft_expr_ops nft_nat_ops = { + .type = &nft_nat_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), + .eval = nft_nat_eval, + .init = nft_nat_init, + .dump = nft_nat_dump, +}; + +static struct nft_expr_type nft_nat_type __read_mostly = { + .name = "nat", + .ops = &nft_nat_ops, + .policy = nft_nat_policy, + .maxattr = NFTA_NAT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_nat_module_init(void) +{ + int err; + + err = nft_register_expr(&nft_nat_type); + if (err < 0) + return err; + + return 0; +} + +static void __exit nft_nat_module_exit(void) +{ + nft_unregister_expr(&nft_nat_type); +} + +module_init(nft_nat_module_init); +module_exit(nft_nat_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka "); +MODULE_ALIAS_NFT_EXPR("nat"); -- cgit v1.2.3-55-g7522 From 99633ab29b2131b68089a6c7f60458390860e044 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 23:28:33 +0200 Subject: netfilter: nf_tables: complete net namespace support Register family per netnamespace to ensure that sets are only visible in its approapriate namespace. Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 4 ++ include/net/netfilter/nf_tables.h | 4 +- include/net/netns/nftables.h | 15 ++++++ net/bridge/netfilter/nf_tables_bridge.c | 32 ++++++++++++- net/ipv4/netfilter/nf_tables_ipv4.c | 32 ++++++++++++- net/ipv6/netfilter/nf_tables_ipv6.c | 33 ++++++++++++- net/netfilter/nf_tables_api.c | 83 ++++++++++++++++++++++----------- 7 files changed, 168 insertions(+), 35 deletions(-) create mode 100644 include/net/netns/nftables.h (limited to 'net/netfilter') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bcc4a8ed4450..da68c9a90ac5 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -22,6 +22,7 @@ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #endif +#include #include struct user_namespace; @@ -101,6 +102,9 @@ struct net { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif +#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) + struct netns_nftables nft; +#endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag nf_frag; #endif diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a68f45f0fe2e..d3272e943aac 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -68,6 +68,7 @@ static inline void nft_data_debug(const struct nft_data *data) /** * struct nft_ctx - nf_tables rule/set context * + * @net: net namespace * @skb: netlink skb * @nlh: netlink message header * @afi: address family info @@ -76,6 +77,7 @@ static inline void nft_data_debug(const struct nft_data *data) * @nla: netlink attributes */ struct nft_ctx { + struct net *net; const struct sk_buff *skb; const struct nlmsghdr *nlh; const struct nft_af_info *afi; @@ -462,7 +464,7 @@ struct nft_af_info { nf_hookfn *hooks[NF_MAX_HOOKS]; }; -extern int nft_register_afinfo(struct nft_af_info *); +extern int nft_register_afinfo(struct net *, struct nft_af_info *); extern void nft_unregister_afinfo(struct nft_af_info *); struct nf_chain_type { diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h new file mode 100644 index 000000000000..a98b1c5d9913 --- /dev/null +++ b/include/net/netns/nftables.h @@ -0,0 +1,15 @@ +#ifndef _NETNS_NFTABLES_H_ +#define _NETNS_NFTABLES_H_ + +#include + +struct nft_af_info; + +struct netns_nftables { + struct list_head af_info; + struct nft_af_info *ipv4; + struct nft_af_info *ipv6; + struct nft_af_info *bridge; +}; + +#endif diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index bc5c21c911c0..e8cb016fa34d 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -19,14 +19,42 @@ static struct nft_af_info nft_af_bridge __read_mostly = { .owner = THIS_MODULE, }; +static int nf_tables_bridge_init_net(struct net *net) +{ + net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.bridge == NULL) + return -ENOMEM; + + memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge)); + + if (nft_register_afinfo(net, net->nft.bridge) < 0) + goto err; + + return 0; +err: + kfree(net->nft.bridge); + return -ENOMEM; +} + +static void nf_tables_bridge_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.bridge); + kfree(net->nft.bridge); +} + +static struct pernet_operations nf_tables_bridge_net_ops = { + .init = nf_tables_bridge_init_net, + .exit = nf_tables_bridge_exit_net, +}; + static int __init nf_tables_bridge_init(void) { - return nft_register_afinfo(&nft_af_bridge); + return register_pernet_subsys(&nf_tables_bridge_net_ops); } static void __exit nf_tables_bridge_exit(void) { - nft_unregister_afinfo(&nft_af_bridge); + return unregister_pernet_subsys(&nf_tables_bridge_net_ops); } module_init(nf_tables_bridge_init); diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index c61cffb9b760..8f7536be1322 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,33 @@ static struct nft_af_info nft_af_ipv4 __read_mostly = { }, }; +static int nf_tables_ipv4_init_net(struct net *net) +{ + net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.ipv4 == NULL) + return -ENOMEM; + + memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4)); + + if (nft_register_afinfo(net, net->nft.ipv4) < 0) + goto err; + + return 0; +err: + kfree(net->nft.ipv4); + return -ENOMEM; +} + +static void nf_tables_ipv4_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.ipv4); + kfree(net->nft.ipv4); +} + +static struct pernet_operations nf_tables_ipv4_net_ops = { + .init = nf_tables_ipv4_init_net, + .exit = nf_tables_ipv4_exit_net, +}; static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, @@ -83,12 +111,12 @@ static struct nf_chain_type filter_ipv4 = { static int __init nf_tables_ipv4_init(void) { nft_register_chain_type(&filter_ipv4); - return nft_register_afinfo(&nft_af_ipv4); + return register_pernet_subsys(&nf_tables_ipv4_net_ops); } static void __exit nf_tables_ipv4_exit(void) { - nft_unregister_afinfo(&nft_af_ipv4); + unregister_pernet_subsys(&nf_tables_ipv4_net_ops); nft_unregister_chain_type(&filter_ipv4); } diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 42f905a808a3..d77db8a13505 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -45,6 +45,34 @@ static struct nft_af_info nft_af_ipv6 __read_mostly = { }, }; +static int nf_tables_ipv6_init_net(struct net *net) +{ + net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.ipv6 == NULL) + return -ENOMEM; + + memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6)); + + if (nft_register_afinfo(net, net->nft.ipv6) < 0) + goto err; + + return 0; +err: + kfree(net->nft.ipv6); + return -ENOMEM; +} + +static void nf_tables_ipv6_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.ipv6); + kfree(net->nft.ipv6); +} + +static struct pernet_operations nf_tables_ipv6_net_ops = { + .init = nf_tables_ipv6_init_net, + .exit = nf_tables_ipv6_exit_net, +}; + static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, @@ -82,11 +110,12 @@ static struct nf_chain_type filter_ipv6 = { static int __init nf_tables_ipv6_init(void) { nft_register_chain_type(&filter_ipv6); - return nft_register_afinfo(&nft_af_ipv6); + return register_pernet_subsys(&nf_tables_ipv6_net_ops); } + static void __exit nf_tables_ipv6_exit(void) { - nft_unregister_afinfo(&nft_af_ipv6); + unregister_pernet_subsys(&nf_tables_ipv6_net_ops); nft_unregister_chain_type(&filter_ipv6); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a4dd7ce5ec3e..e1ee85047ec1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -18,9 +18,9 @@ #include #include #include +#include #include -static LIST_HEAD(nf_tables_afinfo); static LIST_HEAD(nf_tables_expressions); /** @@ -31,11 +31,11 @@ static LIST_HEAD(nf_tables_expressions); * Register the address family for use with nf_tables. Returns zero on * success or a negative errno code otherwise. */ -int nft_register_afinfo(struct nft_af_info *afi) +int nft_register_afinfo(struct net *net, struct nft_af_info *afi) { INIT_LIST_HEAD(&afi->tables); nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&afi->list, &nf_tables_afinfo); + list_add_tail(&afi->list, &net->nft.af_info); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } @@ -56,22 +56,23 @@ void nft_unregister_afinfo(struct nft_af_info *afi) } EXPORT_SYMBOL_GPL(nft_unregister_afinfo); -static struct nft_af_info *nft_afinfo_lookup(int family) +static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family) { struct nft_af_info *afi; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (afi->family == family) return afi; } return NULL; } -static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload) +static struct nft_af_info * +nf_tables_afinfo_lookup(struct net *net, int family, bool autoload) { struct nft_af_info *afi; - afi = nft_afinfo_lookup(family); + afi = nft_afinfo_lookup(net, family); if (afi != NULL) return afi; #ifdef CONFIG_MODULES @@ -79,7 +80,7 @@ static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload) nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-afinfo-%u", family); nfnl_lock(NFNL_SUBSYS_NFTABLES); - afi = nft_afinfo_lookup(family); + afi = nft_afinfo_lookup(net, family); if (afi != NULL) return ERR_PTR(-EAGAIN); } @@ -232,9 +233,10 @@ static int nf_tables_dump_tables(struct sk_buff *skb, const struct nft_af_info *afi; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -268,6 +270,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -278,7 +281,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -379,9 +382,10 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, const struct nlattr *name; struct nft_af_info *afi; struct nft_table *table; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, true); + afi = nf_tables_afinfo_lookup(net, family, true); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -433,9 +437,10 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; struct nft_table *table; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -663,9 +668,10 @@ static int nf_tables_dump_chains(struct sk_buff *skb, const struct nft_table *table; const struct nft_chain *chain; unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -702,6 +708,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_table *table; const struct nft_chain *chain; struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -712,7 +719,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -813,6 +820,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, struct nft_chain *chain; struct nft_base_chain *basechain = NULL; struct nlattr *ha[NFTA_HOOK_MAX + 1]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; u64 handle = 0; int err; @@ -820,7 +828,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(family, true); + afi = nf_tables_afinfo_lookup(net, family, true); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1010,9 +1018,10 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1050,6 +1059,7 @@ static void nft_ctx_init(struct nft_ctx *ctx, const struct nft_chain *chain, const struct nlattr * const *nla) { + ctx->net = sock_net(skb->sk); ctx->skb = skb; ctx->nlh = nlh; ctx->afi = afi; @@ -1361,9 +1371,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb, const struct nft_chain *chain; const struct nft_rule *rule; unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -1402,6 +1413,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_chain *chain; const struct nft_rule *rule; struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -1412,7 +1424,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1477,6 +1489,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; @@ -1490,7 +1503,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1585,12 +1598,13 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); const struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *tmp; int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1697,11 +1711,12 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + struct net *net = sock_net(skb->sk); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; const struct nft_table *table = NULL; - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1818,12 +1833,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, { struct sk_buff *skb; u32 portid = NETLINK_CB(ctx->skb).portid; - struct net *net = sock_net(ctx->skb->sk); bool report; int err; report = nlmsg_report(ctx->nlh); - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -1837,11 +1851,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report, GFP_KERNEL); err: if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err); return err; } @@ -1974,6 +1988,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_set_ops *ops; const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; @@ -2032,7 +2047,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -2219,8 +2234,9 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; const struct nft_table *table; + struct net *net = sock_net(skb->sk); - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -3011,6 +3027,16 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, } EXPORT_SYMBOL_GPL(nft_data_dump); +static int nf_tables_init_net(struct net *net) +{ + INIT_LIST_HEAD(&net->nft.af_info); + return 0; +} + +static struct pernet_operations nf_tables_net_ops = { + .init = nf_tables_init_net, +}; + static int __init nf_tables_module_init(void) { int err; @@ -3031,7 +3057,7 @@ static int __init nf_tables_module_init(void) goto err3; pr_info("nf_tables: (c) 2007-2009 Patrick McHardy \n"); - return 0; + return register_pernet_subsys(&nf_tables_net_ops); err3: nf_tables_core_module_exit(); err2: @@ -3042,6 +3068,7 @@ err1: static void __exit nf_tables_module_exit(void) { + unregister_pernet_subsys(&nf_tables_net_ops); nfnetlink_subsys_unregister(&nf_tables_subsys); nf_tables_core_module_exit(); kfree(info); -- cgit v1.2.3-55-g7522 From 5e94846686d027a4c8ecc5d9d52b18036d3e8f7a Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 10 Oct 2013 13:41:44 +0200 Subject: netfilter: nf_tables: add insert operation This patch adds a new rule attribute NFTA_RULE_POSITION which is used to store the position of a rule relatively to the others. By providing the create command and specifying the position, the rule is inserted after the rule with the handle equal to the provided position. Regarding notification, the position attribute specifies the handle of the previous rule to make sure we don't point to any stale rule in notifications coming from the commit path. This patch includes the following fix from Pablo: * nf_tables: fix rule deletion event reporting Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 38 +++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 7d4a1992f89c..fbfd229a8e99 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -153,6 +153,7 @@ enum nft_chain_attributes { * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) + * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -161,6 +162,7 @@ enum nft_rule_attributes { NFTA_RULE_HANDLE, NFTA_RULE_EXPRESSIONS, NFTA_RULE_COMPAT, + NFTA_RULE_POSITION, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e1ee85047ec1..0f140663ec71 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1273,6 +1273,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, + [NFTA_RULE_POSITION] = { .type = NLA_U64 }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, @@ -1285,9 +1286,10 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, struct nfgenmsg *nfmsg; const struct nft_expr *expr, *next; struct nlattr *list; + const struct nft_rule *prule; + int type = event | NFNL_SUBSYS_NFTABLES << 8; - event |= NFNL_SUBSYS_NFTABLES << 8; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); if (nlh == NULL) goto nla_put_failure; @@ -1304,6 +1306,13 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle))) goto nla_put_failure; + if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { + prule = list_entry(rule->list.prev, struct nft_rule, list); + if (nla_put_be64(skb, NFTA_RULE_POSITION, + cpu_to_be64(prule->handle))) + goto nla_put_failure; + } + list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS); if (list == NULL) goto nla_put_failure; @@ -1499,7 +1508,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, unsigned int size, i, n; int err, rem; bool create; - u64 handle; + u64 handle, pos_handle; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -1533,6 +1542,16 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, handle = nf_tables_alloc_handle(table); } + if (nla[NFTA_RULE_POSITION]) { + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -EOPNOTSUPP; + + pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); + old_rule = __nf_tables_rule_lookup(chain, pos_handle); + if (IS_ERR(old_rule)) + return PTR_ERR(old_rule); + } + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); n = 0; @@ -1573,9 +1592,16 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_replace_rcu(&old_rule->list, &rule->list); nf_tables_rule_destroy(old_rule); } else if (nlh->nlmsg_flags & NLM_F_APPEND) - list_add_tail_rcu(&rule->list, &chain->rules); - else - list_add_rcu(&rule->list, &chain->rules); + if (old_rule) + list_add_rcu(&rule->list, &old_rule->list); + else + list_add_tail_rcu(&rule->list, &chain->rules); + else { + if (old_rule) + list_add_tail_rcu(&rule->list, &old_rule->list); + else + list_add_rcu(&rule->list, &chain->rules); + } nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), -- cgit v1.2.3-55-g7522 From 0628b123c96d126e617beb3b4fd63b874d0e4f17 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 11:05:33 +0200 Subject: netfilter: nfnetlink: add batch support and use it from nf_tables This patch adds a batch support to nfnetlink. Basically, it adds two new control messages: * NFNL_MSG_BATCH_BEGIN, that indicates the beginning of a batch, the nfgenmsg->res_id indicates the nfnetlink subsystem ID. * NFNL_MSG_BATCH_END, that results in the invocation of the ss->commit callback function. If not specified or an error ocurred in the batch, the ss->abort function is invoked instead. The end message represents the commit operation in nftables, the lack of end message results in an abort. This patch also adds the .call_batch function that is only called from the batch receival path. This patch adds atomic rule updates and dumps based on bitmask generations. This allows to atomically commit a set of rule-set updates incrementally without altering the internal state of existing nf_tables expressions/matches/targets. The idea consists of using a generation cursor of 1 bit and a bitmask of 2 bits per rule. Assuming the gencursor is 0, then the genmask (expressed as a bitmask) can be interpreted as: 00 active in the present, will be active in the next generation. 01 inactive in the present, will be active in the next generation. 10 active in the present, will be deleted in the next generation. ^ gencursor Once you invoke the transition to the next generation, the global gencursor is updated: 00 active in the present, will be active in the next generation. 01 active in the present, needs to zero its future, it becomes 00. 10 inactive in the present, delete now. ^ gencursor If a dump is in progress and nf_tables enters a new generation, the dump will stop and return -EBUSY to let userspace know that it has to retry again. In order to invalidate dumps, a global genctr counter is increased everytime nf_tables enters a new generation. This new operation can be used from the user-space utility that controls the firewall, eg. nft -f restore The rule updates contained in `file' will be applied atomically. cat file ----- add filter INPUT ip saddr 1.1.1.1 counter accept #1 del filter INPUT ip daddr 2.2.2.2 counter drop #2 -EOF- Note that the rule 1 will be inactive until the transition to the next generation, the rule 2 will be evicted in the next generation. There is a penalty during the rule update due to the branch misprediction in the packet matching framework. But that should be quickly resolved once the iteration over the commit list that contain rules that require updates is finished. Event notification happens once the rule-set update has been committed. So we skip notifications is case the rule-set update is aborted, which can happen in case that the rule-set is tested to apply correctly. This patch squashed the following patches from Pablo: * nf_tables: atomic rule updates and dumps * nf_tables: get rid of per rule list_head for commits * nf_tables: use per netns commit list * nfnetlink: add batch support and use it from nf_tables * nf_tables: all rule updates are transactional * nf_tables: attach replacement rule after stale one * nf_tables: do not allow deletion/replacement of stale rules * nf_tables: remove unused NFTA_RULE_FLAGS Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 5 + include/net/netfilter/nf_tables.h | 25 +++- include/net/netns/nftables.h | 3 + include/uapi/linux/netfilter/nfnetlink.h | 4 + net/netfilter/nf_tables_api.c | 202 ++++++++++++++++++++++++++++--- net/netfilter/nf_tables_core.c | 10 ++ net/netfilter/nfnetlink.c | 175 +++++++++++++++++++++++++- 7 files changed, 401 insertions(+), 23 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 4f68cd7141d2..28c74367e900 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -14,6 +14,9 @@ struct nfnl_callback { int (*call_rcu)(struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]); + int (*call_batch)(struct sock *nl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]); const struct nla_policy *policy; /* netlink attribute policy */ const u_int16_t attr_count; /* number of nlattr's */ }; @@ -23,6 +26,8 @@ struct nfnetlink_subsystem { __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ const struct nfnl_callback *cb; /* callback for individual types */ + int (*commit)(struct sk_buff *skb); + int (*abort)(struct sk_buff *skb); }; int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d3272e943aac..975ad3c573c7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -323,18 +323,39 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) * @list: used internally * @rcu_head: used internally for rcu * @handle: rule handle + * @genmask: generation mask * @dlen: length of expression data * @data: expression data */ struct nft_rule { struct list_head list; struct rcu_head rcu_head; - u64 handle:48, + u64 handle:46, + genmask:2, dlen:16; unsigned char data[] __attribute__((aligned(__alignof__(struct nft_expr)))); }; +/** + * struct nft_rule_trans - nf_tables rule update in transaction + * + * @list: used internally + * @rule: rule that needs to be updated + * @chain: chain that this rule belongs to + * @table: table for which this chain applies + * @nlh: netlink header of the message that contain this update + * @family: family expressesed as AF_* + */ +struct nft_rule_trans { + struct list_head list; + struct nft_rule *rule; + const struct nft_chain *chain; + const struct nft_table *table; + const struct nlmsghdr *nlh; + u8 family; +}; + static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; @@ -370,6 +391,7 @@ enum nft_chain_flags { * @rules: list of rules in the chain * @list: used internally * @rcu_head: used internally + * @net: net namespace that this chain belongs to * @handle: chain handle * @flags: bitmask of enum nft_chain_flags * @use: number of jump references to this chain @@ -380,6 +402,7 @@ struct nft_chain { struct list_head rules; struct list_head list; struct rcu_head rcu_head; + struct net *net; u64 handle; u8 flags; u16 use; diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index a98b1c5d9913..08a4248a12b5 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -7,9 +7,12 @@ struct nft_af_info; struct netns_nftables { struct list_head af_info; + struct list_head commit_list; struct nft_af_info *ipv4; struct nft_af_info *ipv6; struct nft_af_info *bridge; + u8 gencursor; + u8 genctr; }; #endif diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 288959404d54..596ddd45253c 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -57,4 +57,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_NFT_COMPAT 11 #define NFNL_SUBSYS_COUNT 12 +/* Reserved control nfnetlink messages */ +#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE +#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1 + #endif /* _UAPI_NFNETLINK_H */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0f140663ec71..79e1418a6043 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -978,6 +978,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); + chain->net = net; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); if (!(table->flags & NFT_TABLE_F_DORMANT) && @@ -1371,6 +1372,41 @@ err: return err; } +static inline bool +nft_rule_is_active(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << net->nft.gencursor)) == 0; +} + +static inline int gencursor_next(struct net *net) +{ + return net->nft.gencursor+1 == 1 ? 1 : 0; +} + +static inline int +nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << gencursor_next(net))) == 0; +} + +static inline void +nft_rule_activate_next(struct net *net, struct nft_rule *rule) +{ + /* Now inactive, will be active in the future */ + rule->genmask = (1 << net->nft.gencursor); +} + +static inline void +nft_rule_disactivate_next(struct net *net, struct nft_rule *rule) +{ + rule->genmask = (1 << gencursor_next(net)); +} + +static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) +{ + rule->genmask = 0; +} + static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { @@ -1382,6 +1418,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + u8 genctr = ACCESS_ONCE(net->nft.genctr); + u8 gencursor = ACCESS_ONCE(net->nft.gencursor); list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) @@ -1390,6 +1428,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry(table, &afi->tables, list) { list_for_each_entry(chain, &table->chains, list) { list_for_each_entry(rule, &chain->rules, list) { + if (!nft_rule_is_active(net, rule)) + goto cont; if (idx < s_idx) goto cont; if (idx > s_idx) @@ -1408,6 +1448,10 @@ cont: } } done: + /* Invalidate this dump, a transition to the new generation happened */ + if (gencursor != net->nft.gencursor || genctr != net->nft.genctr) + return -EBUSY; + cb->args[0] = idx; return skb->len; } @@ -1492,6 +1536,25 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) static struct nft_expr_info *info; +static struct nft_rule_trans * +nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) +{ + struct nft_rule_trans *rupd; + + rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL); + if (rupd == NULL) + return NULL; + + rupd->chain = ctx->chain; + rupd->table = ctx->table; + rupd->rule = rule; + rupd->family = ctx->afi->family; + rupd->nlh = ctx->nlh; + list_add_tail(&rupd->list, &ctx->net->nft.commit_list); + + return rupd; +} + static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1502,6 +1565,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; + struct nft_rule_trans *repl = NULL; struct nft_expr *expr; struct nft_ctx ctx; struct nlattr *tmp; @@ -1576,6 +1640,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (rule == NULL) goto err1; + nft_rule_activate_next(net, rule); + rule->handle = handle; rule->dlen = size; @@ -1589,8 +1655,18 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - list_replace_rcu(&old_rule->list, &rule->list); - nf_tables_rule_destroy(old_rule); + if (nft_rule_is_active_next(net, old_rule)) { + repl = nf_tables_trans_add(old_rule, &ctx); + if (repl == NULL) { + err = -ENOMEM; + goto err2; + } + nft_rule_disactivate_next(net, old_rule); + list_add_tail(&rule->list, &old_rule->list); + } else { + err = -ENOENT; + goto err2; + } } else if (nlh->nlmsg_flags & NLM_F_APPEND) if (old_rule) list_add_rcu(&rule->list, &old_rule->list); @@ -1603,11 +1679,20 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_add_rcu(&rule->list, &chain->rules); } - nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, - nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), - nfmsg->nfgen_family); + if (nf_tables_trans_add(rule, &ctx) == NULL) { + err = -ENOMEM; + goto err3; + } return 0; +err3: + list_del_rcu(&rule->list); + if (repl) { + list_del_rcu(&repl->rule->list); + list_del(&repl->list); + nft_rule_clear(net, repl->rule); + kfree(repl); + } err2: nf_tables_rule_destroy(rule); err1: @@ -1618,6 +1703,19 @@ err1: return err; } +static int +nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) +{ + /* You cannot delete the same rule twice */ + if (nft_rule_is_active_next(ctx->net, rule)) { + if (nf_tables_trans_add(rule, ctx) == NULL) + return -ENOMEM; + nft_rule_disactivate_next(ctx->net, rule); + return 0; + } + return -ENOENT; +} + static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1628,7 +1726,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *tmp; - int family = nfmsg->nfgen_family; + int family = nfmsg->nfgen_family, err = 0; + struct nft_ctx ctx; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -1642,31 +1741,95 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(chain)) return PTR_ERR(chain); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + if (nla[NFTA_RULE_HANDLE]) { rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) return PTR_ERR(rule); - /* List removal must be visible before destroying expressions */ - list_del_rcu(&rule->list); - - nf_tables_rule_notify(skb, nlh, table, chain, rule, - NFT_MSG_DELRULE, 0, family); - nf_tables_rule_destroy(rule); + err = nf_tables_delrule_one(&ctx, rule); } else { /* Remove all rules in this chain */ list_for_each_entry_safe(rule, tmp, &chain->rules, list) { - list_del_rcu(&rule->list); + err = nf_tables_delrule_one(&ctx, rule); + if (err < 0) + break; + } + } + + return err; +} + +static int nf_tables_commit(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_rule_trans *rupd, *tmp; - nf_tables_rule_notify(skb, nlh, table, chain, rule, - NFT_MSG_DELRULE, 0, family); - nf_tables_rule_destroy(rule); + /* Bump generation counter, invalidate any dump in progress */ + net->nft.genctr++; + + /* A new generation has just started */ + net->nft.gencursor = gencursor_next(net); + + /* Make sure all packets have left the previous generation before + * purging old rules. + */ + synchronize_rcu(); + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + /* Delete this rule from the dirty list */ + list_del(&rupd->list); + + /* This rule was inactive in the past and just became active. + * Clear the next bit of the genmask since its meaning has + * changed, now it is the future. + */ + if (nft_rule_is_active(net, rupd->rule)) { + nft_rule_clear(net, rupd->rule); + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, + rupd->chain, rupd->rule, + NFT_MSG_NEWRULE, 0, + rupd->family); + kfree(rupd); + continue; } + + /* This rule is in the past, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain, + rupd->rule, NFT_MSG_DELRULE, 0, + rupd->family); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); } return 0; } +static int nf_tables_abort(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_rule_trans *rupd, *tmp; + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + /* Delete all rules from the dirty list */ + list_del(&rupd->list); + + if (!nft_rule_is_active_next(net, rupd->rule)) { + nft_rule_clear(net, rupd->rule); + kfree(rupd); + continue; + } + + /* This rule is inactive, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); + } + return 0; +} + /* * Sets */ @@ -2634,7 +2797,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_chain_policy, }, [NFT_MSG_NEWRULE] = { - .call = nf_tables_newrule, + .call_batch = nf_tables_newrule, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, @@ -2644,7 +2807,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_rule_policy, }, [NFT_MSG_DELRULE] = { - .call = nf_tables_delrule, + .call_batch = nf_tables_delrule, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, @@ -2685,6 +2848,8 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .subsys_id = NFNL_SUBSYS_NFTABLES, .cb_count = NFT_MSG_MAX, .cb = nf_tables_cb, + .commit = nf_tables_commit, + .abort = nf_tables_abort, }; /* @@ -3056,6 +3221,7 @@ EXPORT_SYMBOL_GPL(nft_data_dump); static int nf_tables_init_net(struct net *net) { INIT_LIST_HEAD(&net->nft.af_info); + INIT_LIST_HEAD(&net->nft.commit_list); return 0; } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3c13007d80df..d581ef660248 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -88,12 +88,22 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; + /* + * Cache cursor to avoid problems in case that the cursor is updated + * while traversing the ruleset. + */ + unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); do_chain: rule = list_entry(&chain->rules, struct nft_rule, list); next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; list_for_each_entry_continue_rcu(rule, &chain->rules, list) { + + /* This rule is not active, skip. */ + if (unlikely(rule->genmask & (1 << gencursor))) + continue; + nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 572d87dc116f..027f16af51a0 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -147,9 +147,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) const struct nfnetlink_subsystem *ss; int type, err; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - /* All the messages must at least contain nfgenmsg */ if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) return 0; @@ -217,9 +214,179 @@ replay: } } +static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, + u_int16_t subsys_id) +{ + struct sk_buff *nskb, *oskb = skb; + struct net *net = sock_net(skb->sk); + const struct nfnetlink_subsystem *ss; + const struct nfnl_callback *nc; + bool success = true, done = false; + int err; + + if (subsys_id >= NFNL_SUBSYS_COUNT) + return netlink_ack(skb, nlh, -EINVAL); +replay: + nskb = netlink_skb_clone(oskb, GFP_KERNEL); + if (!nskb) + return netlink_ack(oskb, nlh, -ENOMEM); + + nskb->sk = oskb->sk; + skb = nskb; + + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) { +#ifdef CONFIG_MODULES + nfnl_unlock(subsys_id); + request_module("nfnetlink-subsys-%d", subsys_id); + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) +#endif + { + nfnl_unlock(subsys_id); + kfree_skb(nskb); + return netlink_ack(skb, nlh, -EOPNOTSUPP); + } + } + + if (!ss->commit || !ss->abort) { + nfnl_unlock(subsys_id); + kfree_skb(nskb); + return netlink_ack(skb, nlh, -EOPNOTSUPP); + } + + while (skb->len >= nlmsg_total_size(0)) { + int msglen, type; + + nlh = nlmsg_hdr(skb); + err = 0; + + if (nlh->nlmsg_len < NLMSG_HDRLEN) { + err = -EINVAL; + goto ack; + } + + /* Only requests are handled by the kernel */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { + err = -EINVAL; + goto ack; + } + + type = nlh->nlmsg_type; + if (type == NFNL_MSG_BATCH_BEGIN) { + /* Malformed: Batch begin twice */ + success = false; + goto done; + } else if (type == NFNL_MSG_BATCH_END) { + done = true; + goto done; + } else if (type < NLMSG_MIN_TYPE) { + err = -EINVAL; + goto ack; + } + + /* We only accept a batch with messages for the same + * subsystem. + */ + if (NFNL_SUBSYS_ID(type) != subsys_id) { + err = -EINVAL; + goto ack; + } + + nc = nfnetlink_find_client(type, ss); + if (!nc) { + err = -EINVAL; + goto ack; + } + + { + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; + struct nlattr *attr = (void *)nlh + min_len; + int attrlen = nlh->nlmsg_len - min_len; + + err = nla_parse(cda, ss->cb[cb_id].attr_count, + attr, attrlen, ss->cb[cb_id].policy); + if (err < 0) + goto ack; + + if (nc->call_batch) { + err = nc->call_batch(net->nfnl, skb, nlh, + (const struct nlattr **)cda); + } + + /* The lock was released to autoload some module, we + * have to abort and start from scratch using the + * original skb. + */ + if (err == -EAGAIN) { + ss->abort(skb); + nfnl_unlock(subsys_id); + kfree_skb(nskb); + goto replay; + } + } +ack: + if (nlh->nlmsg_flags & NLM_F_ACK || err) { + /* We don't stop processing the batch on errors, thus, + * userspace gets all the errors that the batch + * triggers. + */ + netlink_ack(skb, nlh, err); + if (err) + success = false; + } + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + skb_pull(skb, msglen); + } +done: + if (success && done) + ss->commit(skb); + else + ss->abort(skb); + + nfnl_unlock(subsys_id); + kfree_skb(nskb); +} + static void nfnetlink_rcv(struct sk_buff *skb) { - netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct net *net = sock_net(skb->sk); + int msglen; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return netlink_ack(skb, nlh, -EPERM); + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < nlh->nlmsg_len) + return; + + if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) { + struct nfgenmsg *nfgenmsg; + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) + return; + + nfgenmsg = nlmsg_data(nlh); + skb_pull(skb, msglen); + nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id); + } else { + netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + } } #ifdef CONFIG_MODULES -- cgit v1.2.3-55-g7522 From b5bc89bfa0b46de37754610f46c0ef4e2280edb4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 16:49:19 +0200 Subject: netfilter: nf_tables: add trace support This patch adds support for tracing the packet travel through the ruleset, in a similar fashion to x_tables. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 1 + net/netfilter/nf_tables_core.c | 57 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 975ad3c573c7..54c4a5cafb64 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -392,6 +392,7 @@ enum nft_chain_flags { * @list: used internally * @rcu_head: used internally * @net: net namespace that this chain belongs to + * @table: table that this chain belongs to * @handle: chain handle * @flags: bitmask of enum nft_chain_flags * @use: number of jump references to this chain @@ -403,6 +404,7 @@ struct nft_chain { struct list_head list; struct rcu_head rcu_head; struct net *net; + struct nft_table *table; u64 handle; u8 flags; u16 use; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 79e1418a6043..dcddc49c0e08 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -979,6 +979,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); chain->net = net; + chain->table = table; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); if (!(table->flags & NFT_TABLE_F_DORMANT) && diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index d581ef660248..cb9e685caae1 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -19,6 +19,7 @@ #include #include #include +#include static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1]) @@ -63,6 +64,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, struct nft_jumpstack { const struct nft_chain *chain; const struct nft_rule *rule; + int rulenum; }; static inline void @@ -79,6 +81,40 @@ nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt, rcu_read_unlock_bh(); } +enum nft_trace { + NFT_TRACE_RULE, + NFT_TRACE_RETURN, + NFT_TRACE_POLICY, +}; + +static const char *const comments[] = { + [NFT_TRACE_RULE] = "rule", + [NFT_TRACE_RETURN] = "return", + [NFT_TRACE_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static inline void nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) +{ + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_packet(net, pkt->xt.family, pkt->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); +} + unsigned int nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { @@ -88,6 +124,7 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; + int rulenum = 0; /* * Cache cursor to avoid problems in case that the cursor is updated * while traversing the ruleset. @@ -104,6 +141,8 @@ next_rule: if (unlikely(rule->genmask & (1 << gencursor))) continue; + rulenum++; + nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); @@ -129,17 +168,28 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + return data[NFT_REG_VERDICT].verdict; case NFT_JUMP: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); jumpstack[stackptr].chain = chain; jumpstack[stackptr].rule = rule; + jumpstack[stackptr].rulenum = rulenum; stackptr++; /* fall through */ case NFT_GOTO: chain = data[NFT_REG_VERDICT].chain; goto do_chain; case NFT_RETURN: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); + + /* fall through */ case NFT_CONTINUE: break; default: @@ -147,13 +197,20 @@ next_rule: } if (stackptr > 0) { + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); + stackptr--; chain = jumpstack[stackptr].chain; rule = jumpstack[stackptr].rule; + rulenum = jumpstack[stackptr].rulenum; goto next_rule; } nft_chain_stats(chain, pkt, jumpstack, stackptr); + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY); + return nft_base_chain(chain)->policy; } EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo); -- cgit v1.2.3-55-g7522 From 9e4e948a3edafd2b7f4dc14c395e146ffd0d9611 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 9 Oct 2013 09:24:27 +0300 Subject: ipvs: avoid rcu_barrier during netns cleanup commit 578bc3ef1e473a ("ipvs: reorganize dest trash") added rcu_barrier() on cleanup to wait dest users and schedulers like LBLC and LBLCR to put their last dest reference. Using rcu_barrier with many namespaces is problematic. Trying to fix it by freeing dest with kfree_rcu is not a solution, RCU callbacks can run in parallel and execution order is random. Fix it by creating new function ip_vs_dest_put_and_free() which is heavier than ip_vs_dest_put(). We will use it just for schedulers like LBLC, LBLCR that can delay their dest release. By default, dests reference is above 0 if they are present in service and it is 0 when deleted but still in trash list. Change the dest trash code to use ip_vs_dest_put_and_free(), so that refcnt -1 can be used for freeing. As result, such checks remain in slow path and the rcu_barrier() from netns cleanup can be removed. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 6 ++++++ net/netfilter/ipvs/ip_vs_ctl.c | 6 +----- net/netfilter/ipvs/ip_vs_lblc.c | 2 +- net/netfilter/ipvs/ip_vs_lblcr.c | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1c2e1b9f6b86..cd7275f9c463 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1442,6 +1442,12 @@ static inline void ip_vs_dest_put(struct ip_vs_dest *dest) atomic_dec(&dest->refcnt); } +static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest) +{ + if (atomic_dec_return(&dest->refcnt) < 0) + kfree(dest); +} + /* * IPVS sync daemon data and function prototypes * (from ip_vs_sync.c) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a3df9bddc4f7..62786a495cea 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -704,7 +704,7 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) __ip_vs_dst_cache_reset(dest); __ip_vs_svc_put(svc, false); free_percpu(dest->stats.cpustats); - kfree(dest); + ip_vs_dest_put_and_free(dest); } /* @@ -3820,10 +3820,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - /* Some dest can be in grace period even before cleanup, we have to - * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called. - */ - rcu_barrier(); ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index eff13c94498e..ca056a331e60 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -136,7 +136,7 @@ static void ip_vs_lblc_rcu_free(struct rcu_head *head) struct ip_vs_lblc_entry, rcu_head); - ip_vs_dest_put(en->dest); + ip_vs_dest_put_and_free(en->dest); kfree(en); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 0b8550089a2e..3f21a2f47de1 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -130,7 +130,7 @@ static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) struct ip_vs_dest_set_elem *e; e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); - ip_vs_dest_put(e->dest); + ip_vs_dest_put_and_free(e->dest); kfree(e); } -- cgit v1.2.3-55-g7522 From 1255ce5f10dbb4646c8d43b8d59faab48ae4a6b2 Mon Sep 17 00:00:00 2001 From: Alexander Frolkin Date: Fri, 27 Sep 2013 11:06:23 +0100 Subject: ipvs: improved SH fallback strategy Improve the SH fallback realserver selection strategy. With sh and sh-fallback, if a realserver is down, this attempts to distribute the traffic that would have gone to that server evenly among the remaining servers. Signed-off-by: Alexander Frolkin Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sh.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 3588faebe529..cc65b2f42cd4 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -115,27 +115,46 @@ ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, } -/* As ip_vs_sh_get, but with fallback if selected server is unavailable */ +/* As ip_vs_sh_get, but with fallback if selected server is unavailable + * + * The fallback strategy loops around the table starting from a "random" + * point (in fact, it is chosen to be the original hash value to make the + * algorithm deterministic) to find a new server. + */ static inline struct ip_vs_dest * ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, const union nf_inet_addr *addr, __be16 port) { - unsigned int offset; - unsigned int hash; + unsigned int offset, roffset; + unsigned int hash, ihash; struct ip_vs_dest *dest; + /* first try the dest it's supposed to go to */ + ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0); + dest = rcu_dereference(s->buckets[ihash].dest); + if (!dest) + return NULL; + if (!is_unavailable(dest)) + return dest; + + IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting", + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); + + /* if the original dest is unavailable, loop around the table + * starting from ihash to find a new dest + */ for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { - hash = ip_vs_sh_hashkey(svc->af, addr, port, offset); + roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE; + hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset); dest = rcu_dereference(s->buckets[hash].dest); if (!dest) break; - if (is_unavailable(dest)) - IP_VS_DBG_BUF(6, "SH: selected unavailable server " - "%s:%d (offset %d)", - IP_VS_DBG_ADDR(svc->af, &dest->addr), - ntohs(dest->port), offset); - else + if (!is_unavailable(dest)) return dest; + IP_VS_DBG_BUF(6, "SH: selected unavailable " + "server %s:%d (offset %d), reselecting", + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), roffset); } return NULL; -- cgit v1.2.3-55-g7522 From 1a8bf6eeef9fe417f90e5338a2fd7fba69c6d0e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Oct 2013 09:03:25 -0700 Subject: netfilter: xt_socket: use sock_gen_put() TCP listener refactoring, part 7 : Use sock_gen_put() instead of xt_socket_put_sk() for future SYN_RECV support. Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 3dd0e374bc2b..1ba67931eb1b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -35,15 +35,6 @@ #include #endif -static void -xt_socket_put_sk(struct sock *sk) -{ - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put(inet_twsk(sk)); - else - sock_put(sk); -} - static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, @@ -216,7 +207,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, inet_twsk(sk)->tw_transparent)); if (sk != skb->sk) - xt_socket_put_sk(sk); + sock_gen_put(sk); if (wildcard || !transparent) sk = NULL; @@ -381,7 +372,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) inet_twsk(sk)->tw_transparent)); if (sk != skb->sk) - xt_socket_put_sk(sk); + sock_gen_put(sk); if (wildcard || !transparent) sk = NULL; -- cgit v1.2.3-55-g7522 From c1b1203d65955c179fec617ff17a21273f33a414 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 18 Oct 2013 13:48:25 -0700 Subject: net: misc: Remove extern from function prototypes There are a mix of function prototypes with and without extern in the kernel sources. Standardize on not using extern for function prototypes. Function prototypes don't need to be written with extern. extern is assumed by the compiler. Its use is as unnecessary as using auto to declare automatic/local variables in a block. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/irda/irnet/irnet.h | 15 ++--- net/l2tp/l2tp_core.h | 57 +++++++++------- net/mac80211/rate.h | 12 ++-- net/netfilter/nf_internals.h | 28 ++++---- net/rds/rds.h | 2 +- net/rxrpc/ar-internal.h | 150 ++++++++++++++++++++----------------------- net/tipc/core.h | 28 ++++---- net/wimax/wimax-internal.h | 18 +++--- net/wireless/core.h | 6 +- net/wireless/sysfs.h | 4 +- net/xfrm/xfrm_hash.h | 4 +- 11 files changed, 157 insertions(+), 167 deletions(-) (limited to 'net/netfilter') diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index 564eb0b8afa3..8d65bb9477fc 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -509,16 +509,11 @@ typedef struct irnet_ctrl_channel */ /* -------------------------- IRDA PART -------------------------- */ -extern int - irda_irnet_create(irnet_socket *); /* Initialise a IrNET socket */ -extern int - irda_irnet_connect(irnet_socket *); /* Try to connect over IrDA */ -extern void - irda_irnet_destroy(irnet_socket *); /* Teardown a IrNET socket */ -extern int - irda_irnet_init(void); /* Initialise IrDA part of IrNET */ -extern void - irda_irnet_cleanup(void); /* Teardown IrDA part of IrNET */ +int irda_irnet_create(irnet_socket *); /* Initialise an IrNET socket */ +int irda_irnet_connect(irnet_socket *); /* Try to connect over IrDA */ +void irda_irnet_destroy(irnet_socket *); /* Teardown an IrNET socket */ +int irda_irnet_init(void); /* Initialise IrDA part of IrNET */ +void irda_irnet_cleanup(void); /* Teardown IrDA part of IrNET */ /**************************** VARIABLES ****************************/ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 6f251cbc2ed7..1ee9f6965d68 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -238,29 +238,40 @@ out: return tunnel; } -extern struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel); -extern void l2tp_tunnel_sock_put(struct sock *sk); -extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); -extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); -extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); -extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); -extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); - -extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); -extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); -extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); -extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); -extern void __l2tp_session_unhash(struct l2tp_session *session); -extern int l2tp_session_delete(struct l2tp_session *session); -extern void l2tp_session_free(struct l2tp_session *session); -extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); -extern int l2tp_session_queue_purge(struct l2tp_session *session); -extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); - -extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); - -extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); -extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); +struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel); +void l2tp_tunnel_sock_put(struct sock *sk); +struct l2tp_session *l2tp_session_find(struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id); +struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); +struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); +struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); +struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); + +int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, + u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, + struct l2tp_tunnel **tunnelp); +void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); +int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); +struct l2tp_session *l2tp_session_create(int priv_size, + struct l2tp_tunnel *tunnel, + u32 session_id, u32 peer_session_id, + struct l2tp_session_cfg *cfg); +void __l2tp_session_unhash(struct l2tp_session *session); +int l2tp_session_delete(struct l2tp_session *session); +void l2tp_session_free(struct l2tp_session *session); +void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, + unsigned char *ptr, unsigned char *optr, u16 hdrflags, + int length, int (*payload_hook)(struct sk_buff *skb)); +int l2tp_session_queue_purge(struct l2tp_session *session); +int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); + +int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, + int hdr_len); + +int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, + const struct l2tp_nl_cmd_ops *ops); +void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); /* Session reference counts. Incremented when code obtains a reference * to a session. diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 5dedc56c94db..505bc0dea074 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -144,8 +144,8 @@ void rate_control_deinitialize(struct ieee80211_local *local); /* Rate control algorithms */ #ifdef CONFIG_MAC80211_RC_PID -extern int rc80211_pid_init(void); -extern void rc80211_pid_exit(void); +int rc80211_pid_init(void); +void rc80211_pid_exit(void); #else static inline int rc80211_pid_init(void) { @@ -157,8 +157,8 @@ static inline void rc80211_pid_exit(void) #endif #ifdef CONFIG_MAC80211_RC_MINSTREL -extern int rc80211_minstrel_init(void); -extern void rc80211_minstrel_exit(void); +int rc80211_minstrel_init(void); +void rc80211_minstrel_exit(void); #else static inline int rc80211_minstrel_init(void) { @@ -170,8 +170,8 @@ static inline void rc80211_minstrel_exit(void) #endif #ifdef CONFIG_MAC80211_RC_MINSTREL_HT -extern int rc80211_minstrel_ht_init(void); -extern void rc80211_minstrel_ht_exit(void); +int rc80211_minstrel_ht_init(void); +void rc80211_minstrel_ht_exit(void); #else static inline int rc80211_minstrel_ht_init(void) { diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 3deec997be89..61a3c927e63c 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -13,26 +13,20 @@ /* core.c */ -extern unsigned int nf_iterate(struct list_head *head, - struct sk_buff *skb, - unsigned int hook, - const struct net_device *indev, - const struct net_device *outdev, - struct nf_hook_ops **elemp, - int (*okfn)(struct sk_buff *), - int hook_thresh); +unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, + unsigned int hook, const struct net_device *indev, + const struct net_device *outdev, + struct nf_hook_ops **elemp, + int (*okfn)(struct sk_buff *), int hook_thresh); /* nf_queue.c */ -extern int nf_queue(struct sk_buff *skb, - struct nf_hook_ops *elem, - u_int8_t pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum); -extern int __init netfilter_queue_init(void); +int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf, + unsigned int hook, struct net_device *indev, + struct net_device *outdev, int (*okfn)(struct sk_buff *), + unsigned int queuenum); +int __init netfilter_queue_init(void); /* nf_log.c */ -extern int __init netfilter_log_init(void); +int __init netfilter_log_init(void); #endif diff --git a/net/rds/rds.h b/net/rds/rds.h index ec1d731ecff0..48f8ffc60f8f 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -749,7 +749,7 @@ void rds_atomic_send_complete(struct rds_message *rm, int wc_status); int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); -extern void __rds_put_mr_final(struct rds_mr *mr); +void __rds_put_mr_final(struct rds_mr *mr); static inline void rds_mr_put(struct rds_mr *mr) { if (atomic_dec_and_test(&mr->r_refcount)) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a693aca2ae2e..5f43675ee1df 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -426,17 +426,16 @@ extern struct workqueue_struct *rxrpc_workqueue; /* * ar-accept.c */ -extern void rxrpc_accept_incoming_calls(struct work_struct *); -extern struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, - unsigned long); -extern int rxrpc_reject_call(struct rxrpc_sock *); +void rxrpc_accept_incoming_calls(struct work_struct *); +struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long); +int rxrpc_reject_call(struct rxrpc_sock *); /* * ar-ack.c */ -extern void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); -extern void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); -extern void rxrpc_process_call(struct work_struct *); +void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); +void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); +void rxrpc_process_call(struct work_struct *); /* * ar-call.c @@ -445,19 +444,18 @@ extern struct kmem_cache *rxrpc_call_jar; extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; -extern struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *, - struct rxrpc_transport *, - struct rxrpc_conn_bundle *, - unsigned long, int, gfp_t); -extern struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, - struct rxrpc_connection *, - struct rxrpc_header *, gfp_t); -extern struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, - unsigned long); -extern void rxrpc_release_call(struct rxrpc_call *); -extern void rxrpc_release_calls_on_socket(struct rxrpc_sock *); -extern void __rxrpc_put_call(struct rxrpc_call *); -extern void __exit rxrpc_destroy_all_calls(void); +struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *, + struct rxrpc_transport *, + struct rxrpc_conn_bundle *, + unsigned long, int, gfp_t); +struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, + struct rxrpc_connection *, + struct rxrpc_header *, gfp_t); +struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long); +void rxrpc_release_call(struct rxrpc_call *); +void rxrpc_release_calls_on_socket(struct rxrpc_sock *); +void __rxrpc_put_call(struct rxrpc_call *); +void __exit rxrpc_destroy_all_calls(void); /* * ar-connection.c @@ -465,19 +463,16 @@ extern void __exit rxrpc_destroy_all_calls(void); extern struct list_head rxrpc_connections; extern rwlock_t rxrpc_connection_lock; -extern struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *, - struct rxrpc_transport *, - struct key *, - __be16, gfp_t); -extern void rxrpc_put_bundle(struct rxrpc_transport *, - struct rxrpc_conn_bundle *); -extern int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *, - struct rxrpc_conn_bundle *, struct rxrpc_call *, - gfp_t); -extern void rxrpc_put_connection(struct rxrpc_connection *); -extern void __exit rxrpc_destroy_all_connections(void); -extern struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *, - struct rxrpc_header *); +struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *, + struct rxrpc_transport *, + struct key *, __be16, gfp_t); +void rxrpc_put_bundle(struct rxrpc_transport *, struct rxrpc_conn_bundle *); +int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *, + struct rxrpc_conn_bundle *, struct rxrpc_call *, gfp_t); +void rxrpc_put_connection(struct rxrpc_connection *); +void __exit rxrpc_destroy_all_connections(void); +struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *, + struct rxrpc_header *); extern struct rxrpc_connection * rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *, gfp_t); @@ -485,15 +480,15 @@ rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *, /* * ar-connevent.c */ -extern void rxrpc_process_connection(struct work_struct *); -extern void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *); -extern void rxrpc_reject_packets(struct work_struct *); +void rxrpc_process_connection(struct work_struct *); +void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *); +void rxrpc_reject_packets(struct work_struct *); /* * ar-error.c */ -extern void rxrpc_UDP_error_report(struct sock *); -extern void rxrpc_UDP_error_handler(struct work_struct *); +void rxrpc_UDP_error_report(struct sock *); +void rxrpc_UDP_error_handler(struct work_struct *); /* * ar-input.c @@ -501,18 +496,17 @@ extern void rxrpc_UDP_error_handler(struct work_struct *); extern unsigned long rxrpc_ack_timeout; extern const char *rxrpc_pkts[]; -extern void rxrpc_data_ready(struct sock *, int); -extern int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, - bool); -extern void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); +void rxrpc_data_ready(struct sock *, int); +int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool); +void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); /* * ar-local.c */ extern rwlock_t rxrpc_local_lock; -extern struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *); -extern void rxrpc_put_local(struct rxrpc_local *); -extern void __exit rxrpc_destroy_all_locals(void); +struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *); +void rxrpc_put_local(struct rxrpc_local *); +void __exit rxrpc_destroy_all_locals(void); /* * ar-key.c @@ -520,31 +514,29 @@ extern void __exit rxrpc_destroy_all_locals(void); extern struct key_type key_type_rxrpc; extern struct key_type key_type_rxrpc_s; -extern int rxrpc_request_key(struct rxrpc_sock *, char __user *, int); -extern int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int); -extern int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, - time_t, u32); +int rxrpc_request_key(struct rxrpc_sock *, char __user *, int); +int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int); +int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t, + u32); /* * ar-output.c */ extern int rxrpc_resend_timeout; -extern int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); -extern int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *, - struct rxrpc_transport *, struct msghdr *, - size_t); -extern int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *, - struct msghdr *, size_t); +int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); +int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *, + struct rxrpc_transport *, struct msghdr *, size_t); +int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *, struct msghdr *, + size_t); /* * ar-peer.c */ -extern struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t); -extern void rxrpc_put_peer(struct rxrpc_peer *); -extern struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *, - __be32, __be16); -extern void __exit rxrpc_destroy_all_peers(void); +struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t); +void rxrpc_put_peer(struct rxrpc_peer *); +struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *, __be32, __be16); +void __exit rxrpc_destroy_all_peers(void); /* * ar-proc.c @@ -556,38 +548,36 @@ extern const struct file_operations rxrpc_connection_seq_fops; /* * ar-recvmsg.c */ -extern void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); -extern int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *, - size_t, int); +void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); +int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, + int); /* * ar-security.c */ -extern int rxrpc_register_security(struct rxrpc_security *); -extern void rxrpc_unregister_security(struct rxrpc_security *); -extern int rxrpc_init_client_conn_security(struct rxrpc_connection *); -extern int rxrpc_init_server_conn_security(struct rxrpc_connection *); -extern int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *, - size_t, void *); -extern int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *, - u32 *); -extern void rxrpc_clear_conn_security(struct rxrpc_connection *); +int rxrpc_register_security(struct rxrpc_security *); +void rxrpc_unregister_security(struct rxrpc_security *); +int rxrpc_init_client_conn_security(struct rxrpc_connection *); +int rxrpc_init_server_conn_security(struct rxrpc_connection *); +int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *, size_t, + void *); +int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *, u32 *); +void rxrpc_clear_conn_security(struct rxrpc_connection *); /* * ar-skbuff.c */ -extern void rxrpc_packet_destructor(struct sk_buff *); +void rxrpc_packet_destructor(struct sk_buff *); /* * ar-transport.c */ -extern struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *, - struct rxrpc_peer *, - gfp_t); -extern void rxrpc_put_transport(struct rxrpc_transport *); -extern void __exit rxrpc_destroy_all_transports(void); -extern struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *, - struct rxrpc_peer *); +struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *, + struct rxrpc_peer *, gfp_t); +void rxrpc_put_transport(struct rxrpc_transport *); +void __exit rxrpc_destroy_all_transports(void); +struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *, + struct rxrpc_peer *); /* * debug tracing diff --git a/net/tipc/core.h b/net/tipc/core.h index be72f8cebc53..94895d4e86ab 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -90,21 +90,21 @@ extern int tipc_random __read_mostly; /* * Routines available to privileged subsystems */ -extern int tipc_core_start_net(unsigned long); -extern int tipc_handler_start(void); -extern void tipc_handler_stop(void); -extern int tipc_netlink_start(void); -extern void tipc_netlink_stop(void); -extern int tipc_socket_init(void); -extern void tipc_socket_stop(void); -extern int tipc_sock_create_local(int type, struct socket **res); -extern void tipc_sock_release_local(struct socket *sock); -extern int tipc_sock_accept_local(struct socket *sock, - struct socket **newsock, int flags); +int tipc_core_start_net(unsigned long); +int tipc_handler_start(void); +void tipc_handler_stop(void); +int tipc_netlink_start(void); +void tipc_netlink_stop(void); +int tipc_socket_init(void); +void tipc_socket_stop(void); +int tipc_sock_create_local(int type, struct socket **res); +void tipc_sock_release_local(struct socket *sock); +int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, + int flags); #ifdef CONFIG_SYSCTL -extern int tipc_register_sysctl(void); -extern void tipc_unregister_sysctl(void); +int tipc_register_sysctl(void); +void tipc_unregister_sysctl(void); #else #define tipc_register_sysctl() 0 #define tipc_unregister_sysctl() @@ -201,6 +201,6 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb) return (struct tipc_msg *)skb->data; } -extern struct sk_buff *tipc_buf_acquire(u32 size); +struct sk_buff *tipc_buf_acquire(u32 size); #endif diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h index 1e743d214856..5dcd9c067bf0 100644 --- a/net/wimax/wimax-internal.h +++ b/net/wimax/wimax-internal.h @@ -63,11 +63,11 @@ void __wimax_state_set(struct wimax_dev *wimax_dev, enum wimax_st state) { wimax_dev->state = state; } -extern void __wimax_state_change(struct wimax_dev *, enum wimax_st); +void __wimax_state_change(struct wimax_dev *, enum wimax_st); #ifdef CONFIG_DEBUG_FS -extern int wimax_debugfs_add(struct wimax_dev *); -extern void wimax_debugfs_rm(struct wimax_dev *); +int wimax_debugfs_add(struct wimax_dev *); +void wimax_debugfs_rm(struct wimax_dev *); #else static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev) { @@ -76,13 +76,13 @@ static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev) static inline void wimax_debugfs_rm(struct wimax_dev *wimax_dev) {} #endif -extern void wimax_id_table_add(struct wimax_dev *); -extern struct wimax_dev *wimax_dev_get_by_genl_info(struct genl_info *, int); -extern void wimax_id_table_rm(struct wimax_dev *); -extern void wimax_id_table_release(void); +void wimax_id_table_add(struct wimax_dev *); +struct wimax_dev *wimax_dev_get_by_genl_info(struct genl_info *, int); +void wimax_id_table_rm(struct wimax_dev *); +void wimax_id_table_release(void); -extern int wimax_rfkill_add(struct wimax_dev *); -extern void wimax_rfkill_rm(struct wimax_dev *); +int wimax_rfkill_add(struct wimax_dev *); +void wimax_rfkill_rm(struct wimax_dev *); extern struct genl_family wimax_gnl_family; extern struct genl_multicast_group wimax_gnl_mcg; diff --git a/net/wireless/core.h b/net/wireless/core.h index b43efac4efca..74beff1e926f 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -234,10 +234,10 @@ struct cfg80211_beacon_registration { }; /* free object */ -extern void cfg80211_dev_free(struct cfg80211_registered_device *rdev); +void cfg80211_dev_free(struct cfg80211_registered_device *rdev); -extern int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, - char *newname); +int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, + char *newname); void ieee80211_set_bitrate_flags(struct wiphy *wiphy); diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h index 65acbebd3711..b533ed71daff 100644 --- a/net/wireless/sysfs.h +++ b/net/wireless/sysfs.h @@ -1,8 +1,8 @@ #ifndef __WIRELESS_SYSFS_H #define __WIRELESS_SYSFS_H -extern int wiphy_sysfs_init(void); -extern void wiphy_sysfs_exit(void); +int wiphy_sysfs_init(void); +void wiphy_sysfs_exit(void); extern struct class ieee80211_class; diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index 716502ada53b..0622d319e1f2 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -130,7 +130,7 @@ static inline unsigned int __addr_hash(const xfrm_address_t *daddr, return h & hmask; } -extern struct hlist_head *xfrm_hash_alloc(unsigned int sz); -extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz); +struct hlist_head *xfrm_hash_alloc(unsigned int sz); +void xfrm_hash_free(struct hlist_head *n, unsigned int sz); #endif /* _XFRM_HASH_H */ -- cgit v1.2.3-55-g7522 From 93302880d8a3e5dc6b7da3f9825beb839152c940 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 18 Oct 2013 11:41:55 +0200 Subject: netfilter: ipset: Use netlink callback dump args only Instead of cb->data, use callback dump args only and introduce symbolic names instead of plain numbers at accessing the argument members. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 10 +++++ net/netfilter/ipset/ip_set_bitmap_gen.h | 11 +++--- net/netfilter/ipset/ip_set_core.c | 70 ++++++++++++++++----------------- net/netfilter/ipset/ip_set_hash_gen.h | 20 +++++----- net/netfilter/ipset/ip_set_list_set.c | 11 +++--- 5 files changed, 68 insertions(+), 54 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 7967516adc0d..c7174b816674 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -316,6 +316,16 @@ ip_set_init_counter(struct ip_set_counter *counter, atomic64_set(&(counter)->packets, (long long)(ext->packets)); } +/* Netlink CB args */ +enum { + IPSET_CB_NET = 0, + IPSET_CB_DUMP, + IPSET_CB_INDEX, + IPSET_CB_ARG0, + IPSET_CB_ARG1, + IPSET_CB_ARG2, +}; + /* register and unregister set references */ extern ip_set_id_t ip_set_get_byname(struct net *net, const char *name, struct ip_set **set); diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index a13e15be7911..f2c7d83dc23f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -198,13 +198,14 @@ mtype_list(const struct ip_set *set, struct mtype *map = set->data; struct nlattr *adt, *nested; void *x; - u32 id, first = cb->args[2]; + u32 id, first = cb->args[IPSET_CB_ARG0]; adt = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!adt) return -EMSGSIZE; - for (; cb->args[2] < map->elements; cb->args[2]++) { - id = cb->args[2]; + for (; cb->args[IPSET_CB_ARG0] < map->elements; + cb->args[IPSET_CB_ARG0]++) { + id = cb->args[IPSET_CB_ARG0]; x = get_ext(set, map, id); if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && @@ -231,14 +232,14 @@ mtype_list(const struct ip_set *set, ipset_nest_end(skb, adt); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(id == first)) { - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, adt); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index dc9284bdd2dd..bac7e01df67f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1182,10 +1182,12 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { - struct ip_set_net *inst = (struct ip_set_net *)cb->data; - if (cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name); - __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]); + struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; + if (cb->args[IPSET_CB_ARG0]) { + pr_debug("release set %s\n", + nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name); + __ip_set_put_byindex(inst, + (ip_set_id_t) cb->args[IPSET_CB_INDEX]); } return 0; } @@ -1203,7 +1205,7 @@ dump_attrs(struct nlmsghdr *nlh) } static int -dump_init(struct netlink_callback *cb) +dump_init(struct netlink_callback *cb, struct ip_set_net *inst) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); @@ -1211,15 +1213,15 @@ dump_init(struct netlink_callback *cb) struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; - struct ip_set_net *inst = (struct ip_set_net *)cb->data; /* Second pass, so parser can't fail */ nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); - /* cb->args[0] : dump single set/all sets - * [1] : set index - * [..]: type specific + /* cb->args[IPSET_CB_NET]: net namespace + * [IPSET_CB_DUMP]: dump single set/all sets + * [IPSET_CB_INDEX]: set index + * [IPSET_CB_ARG0]: type specific */ if (cda[IPSET_ATTR_SETNAME]) { @@ -1231,7 +1233,7 @@ dump_init(struct netlink_callback *cb) return -ENOENT; dump_type = DUMP_ONE; - cb->args[1] = index; + cb->args[IPSET_CB_INDEX] = index; } else dump_type = DUMP_ALL; @@ -1239,7 +1241,8 @@ dump_init(struct netlink_callback *cb) u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); dump_type |= (f << 16); } - cb->args[0] = dump_type; + cb->args[IPSET_CB_NET] = (unsigned long)inst; + cb->args[IPSET_CB_DUMP] = dump_type; return 0; } @@ -1251,12 +1254,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; + struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type, dump_flags; int ret = 0; - struct ip_set_net *inst = (struct ip_set_net *)cb->data; - if (!cb->args[0]) { - ret = dump_init(cb); + if (!cb->args[IPSET_CB_DUMP]) { + ret = dump_init(cb, inst); if (ret < 0) { nlh = nlmsg_hdr(cb->skb); /* We have to create and send the error message @@ -1267,17 +1270,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) } } - if (cb->args[1] >= inst->ip_set_max) + if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) goto out; - dump_type = DUMP_TYPE(cb->args[0]); - dump_flags = DUMP_FLAGS(cb->args[0]); - max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max; + dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]); + dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]); + max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1 + : inst->ip_set_max; dump_last: - pr_debug("args[0]: %u %u args[1]: %ld\n", - dump_type, dump_flags, cb->args[1]); - for (; cb->args[1] < max; cb->args[1]++) { - index = (ip_set_id_t) cb->args[1]; + pr_debug("dump type, flag: %u %u index: %ld\n", + dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); + for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { + index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; set = nfnl_set(inst, index); if (set == NULL) { if (dump_type == DUMP_ONE) { @@ -1294,7 +1298,7 @@ dump_last: !!(set->type->features & IPSET_DUMP_LAST))) continue; pr_debug("List set: %s\n", set->name); - if (!cb->args[2]) { + if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); __ip_set_get(set); @@ -1311,7 +1315,7 @@ dump_last: goto nla_put_failure; if (dump_flags & IPSET_FLAG_LIST_SETNAME) goto next_set; - switch (cb->args[2]) { + switch (cb->args[IPSET_CB_ARG0]) { case 0: /* Core header data */ if (nla_put_string(skb, IPSET_ATTR_TYPENAME, @@ -1331,7 +1335,7 @@ dump_last: read_lock_bh(&set->lock); ret = set->variant->list(set, skb, cb); read_unlock_bh(&set->lock); - if (!cb->args[2]) + if (!cb->args[IPSET_CB_ARG0]) /* Set is done, proceed with next one */ goto next_set; goto release_refcount; @@ -1340,8 +1344,8 @@ dump_last: /* If we dump all sets, continue with dumping last ones */ if (dump_type == DUMP_ALL) { dump_type = DUMP_LAST; - cb->args[0] = dump_type | (dump_flags << 16); - cb->args[1] = 0; + cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); + cb->args[IPSET_CB_INDEX] = 0; goto dump_last; } goto out; @@ -1350,15 +1354,15 @@ nla_put_failure: ret = -EFAULT; next_set: if (dump_type == DUMP_ONE) - cb->args[1] = IPSET_INVALID_ID; + cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID; else - cb->args[1]++; + cb->args[IPSET_CB_INDEX]++; release_refcount: /* If there was an error or set is done, release set */ - if (ret || !cb->args[2]) { + if (ret || !cb->args[IPSET_CB_ARG0]) { pr_debug("release set %s\n", nfnl_set(inst, index)->name); __ip_set_put_byindex(inst, index); - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; } out: if (nlh) { @@ -1375,8 +1379,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); - if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1384,7 +1386,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, struct netlink_dump_control c = { .dump = ip_set_dump_start, .done = ip_set_dump_done, - .data = (void *)inst }; return netlink_dump_start(ctnl, skb, nlh, &c); } @@ -1961,7 +1962,6 @@ static int __net_init ip_set_net_init(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); - struct ip_set **list; inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6a80dbd30df7..2f80c74c871f 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -931,7 +931,7 @@ mtype_list(const struct ip_set *set, struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; - u32 first = cb->args[2]; + u32 first = cb->args[IPSET_CB_ARG0]; /* We assume that one hash bucket fills into one page */ void *incomplete; int i; @@ -940,20 +940,22 @@ mtype_list(const struct ip_set *set, if (!atd) return -EMSGSIZE; pr_debug("list hash set %s\n", set->name); - for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { + for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); + cb->args[IPSET_CB_ARG0]++) { incomplete = skb_tail_pointer(skb); - n = hbucket(t, cb->args[2]); - pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); + n = hbucket(t, cb->args[IPSET_CB_ARG0]); + pr_debug("cb->arg bucket: %lu, t %p n %p\n", + cb->args[IPSET_CB_ARG0], t, n); for (i = 0; i < n->pos; i++) { e = ahash_data(n, i, set->dsize); if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", - cb->args[2], n, i, e); + cb->args[IPSET_CB_ARG0], n, i, e); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { - if (cb->args[2] == first) { + if (cb->args[IPSET_CB_ARG0] == first) { nla_nest_cancel(skb, atd); return -EMSGSIZE; } else @@ -968,16 +970,16 @@ mtype_list(const struct ip_set *set, } ipset_nest_end(skb, atd); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nlmsg_trim(skb, incomplete); - if (unlikely(first == cb->args[2])) { + if (unlikely(first == cb->args[IPSET_CB_ARG0])) { pr_warning("Can't list set %s: one bucket does not fit into " "a message. Please report it!\n", set->name); - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, atd); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index ec6f6d15dded..3e2317f3cf68 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -490,14 +490,15 @@ list_set_list(const struct ip_set *set, { const struct list_set *map = set->data; struct nlattr *atd, *nested; - u32 i, first = cb->args[2]; + u32 i, first = cb->args[IPSET_CB_ARG0]; const struct set_elem *e; atd = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; - for (; cb->args[2] < map->size; cb->args[2]++) { - i = cb->args[2]; + for (; cb->args[IPSET_CB_ARG0] < map->size; + cb->args[IPSET_CB_ARG0]++) { + i = cb->args[IPSET_CB_ARG0]; e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto finish; @@ -522,13 +523,13 @@ list_set_list(const struct ip_set *set, finish: ipset_nest_end(skb, atd); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(i == first)) { - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, atd); -- cgit v1.2.3-55-g7522 From 1a869205c75cb222263fa04f200485ffbe9eaadf Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 18 Oct 2013 11:41:57 +0200 Subject: netfilter: ipset: The unnamed union initialization may lead to compilation error The unnamed union should be possible to be initialized directly, but unfortunately it's not so: /usr/src/ipset/kernel/net/netfilter/ipset/ip_set_hash_netnet.c: In function ?hash_netnet4_kadt?: /usr/src/ipset/kernel/net/netfilter/ipset/ip_set_hash_netnet.c:141: error: unknown field ?cidr? specified in initializer Reported-by: Husnu Demir Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_netnet.c | 22 ++++++++++------------ net/netfilter/ipset/ip_set_hash_netportnet.c | 22 ++++++++++------------ 2 files changed, 20 insertions(+), 24 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 426032706ca9..2bc2dec20b00 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -137,12 +137,11 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet4_elem e = { - .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, - .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK, - }; + struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -160,14 +159,14 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet4_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; u8 cidr, cidr2; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || @@ -364,12 +363,11 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet6_elem e = { - .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, - .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK - }; + struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; @@ -386,11 +384,11 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet6_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 363fab933d48..703d1192a6a2 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -147,12 +147,11 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet4_elem e = { - .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), - .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), - }; + struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -174,8 +173,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet4_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; @@ -183,6 +181,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], u8 cidr, cidr2; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -419,12 +418,11 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet6_elem e = { - .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), - .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), - }; + struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; @@ -446,13 +444,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet6_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || -- cgit v1.2.3-55-g7522 From dc476e7c8e7824fd326c79f50d7302faca7a4c2b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 18 Oct 2013 14:03:41 +0200 Subject: netfilter:ipset: Fix memory allocation for bitmap:port At the restructuring of the bitmap types creation in ipset, for the bitmap:port type wrong (too large) memory allocation was copied (netfilter bugzilla id #859). Reported-by: Quentin Armitage Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index e7603c5b53d7..cf99676e69f8 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -254,7 +254,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -ENOMEM; map->elements = last_port - first_port + 1; - map->memsize = map->elements * sizeof(unsigned long); + map->memsize = bitmap_bytes(0, map->elements); set->variant = &bitmap_port; set->dsize = ip_set_elem_len(set, tb, 0); if (!init_map_port(set, map, first_port, last_port)) { -- cgit v1.2.3-55-g7522 From 1e56555ee1f3d04d89bdaa3df57ad2da9fbc0999 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Sun, 27 Oct 2013 19:06:45 +0100 Subject: netfilter: ipset: remove duplicate define This patch removes a duplicate define from net/netfilter/ipset/ip_set_hash_gen.h Signed-off-by: Michael Opdenacker Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_gen.h | 1 - 1 file changed, 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 2f80c74c871f..be6932ad3a86 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -234,7 +234,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype MTYPE -#define mtype_elem IPSET_TOKEN(MTYPE, _elem) #define mtype_add IPSET_TOKEN(MTYPE, _add) #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) -- cgit v1.2.3-55-g7522 From 6e7cd27c0f77847f1b07a81ae2ed17b937a7531a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 25 Oct 2013 11:05:04 +0200 Subject: net: ipvs: sctp: add missing verdict assignments in sctp_conn_schedule If skb_header_pointer() fails, we need to assign a verdict, that is NF_DROP in this case, otherwise, we would leave the verdict from conn_schedule() uninitialized when returning. Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Acked-by: Neil Horman Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 23e596e438b3..9ca7aa033284 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -20,13 +20,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, sctp_sctphdr_t *sh, _sctph; sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); - if (sh == NULL) + if (sh == NULL) { + *verdict = NF_DROP; return 0; + } sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), sizeof(_schunkh), &_schunkh); - if (sch == NULL) + if (sch == NULL) { + *verdict = NF_DROP; return 0; + } + net = skb_net(skb); ipvs = net_ipvs(net); rcu_read_lock(); -- cgit v1.2.3-55-g7522 From 98c37b6b01812d331db4d49cacd603891d0d53ba Mon Sep 17 00:00:00 2001 From: Tomasz Bursztyka Date: Mon, 28 Oct 2013 12:19:45 +0200 Subject: netfilter: nft_nat: Fix endianness issue reported by sparse This patch fixes this: CHECK net/netfilter/nft_nat.c net/netfilter/nft_nat.c:50:43: warning: incorrect type in assignment (different base types) net/netfilter/nft_nat.c:50:43: expected restricted __be32 [addressable] [usertype] ip net/netfilter/nft_nat.c:50:43: got unsigned int [unsigned] [usertype] net/netfilter/nft_nat.c:51:43: warning: incorrect type in assignment (different base types) net/netfilter/nft_nat.c:51:43: expected restricted __be32 [addressable] [usertype] ip net/netfilter/nft_nat.c:51:43: got unsigned int [unsigned] [usertype] net/netfilter/nft_nat.c:65:37: warning: incorrect type in assignment (different base types) net/netfilter/nft_nat.c:65:37: expected restricted __be16 [addressable] [assigned] [usertype] all net/netfilter/nft_nat.c:65:37: got unsigned int [unsigned] net/netfilter/nft_nat.c:66:37: warning: incorrect type in assignment (different base types) net/netfilter/nft_nat.c:66:37: expected restricted __be16 [addressable] [assigned] [usertype] all net/netfilter/nft_nat.c:66:37: got unsigned int [unsigned] Signed-off-by: Tomasz Bursztyka Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index b0b87b2d2411..d3b1ffe26181 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -47,8 +47,10 @@ static void nft_nat_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); if (priv->sreg_addr_min) { if (priv->family == AF_INET) { - range.min_addr.ip = data[priv->sreg_addr_min].data[0]; - range.max_addr.ip = data[priv->sreg_addr_max].data[0]; + range.min_addr.ip = (__force __be32) + data[priv->sreg_addr_min].data[0]; + range.max_addr.ip = (__force __be32) + data[priv->sreg_addr_max].data[0]; } else { memcpy(range.min_addr.ip6, @@ -62,8 +64,10 @@ static void nft_nat_eval(const struct nft_expr *expr, } if (priv->sreg_proto_min) { - range.min_proto.all = data[priv->sreg_proto_min].data[0]; - range.max_proto.all = data[priv->sreg_proto_max].data[0]; + range.min_proto.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + range.max_proto.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } -- cgit v1.2.3-55-g7522 From 97203abe6bc41ee020f37c902bd1a761157f22c1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Oct 2013 10:56:20 +0100 Subject: net: ipvs: sctp: do not recalc sctp csum when ports didn't change Unlike UDP or TCP, we do not take the pseudo-header into account in SCTP checksums. So in case port mapping is the very same, we do not need to recalculate the whole SCTP checksum in software, which is very expensive. Also, similarly as in TCP, take into account when a private helper mangled the packet. In that case, we also need to recalculate the checksum even if ports might be same. Thanks for feedback regarding skb->ip_summed checks from Julian Anastasov; here's a discussion on these checks for snat and dnat: * For snat_handler(), we can see CHECKSUM_PARTIAL from virtual devices, and from LOCAL_OUT, otherwise it should be CHECKSUM_UNNECESSARY. In general, in snat it is more complex. skb contains the original route and ip_vs_route_me_harder() can change the route after snat_handler. So, for locally generated replies from local server we can not preserve the CHECKSUM_PARTIAL mode. It is an chicken or egg dilemma: snat_handler needs the device after rerouting (to check for NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants the snat_handler() to put the new saddr for proper rerouting. * For dnat_handler(), we should not see CHECKSUM_COMPLETE for SCTP, in fact the small set of drivers that support SCTP offloading return CHECKSUM_UNNECESSARY on correctly received SCTP csum. We can see CHECKSUM_PARTIAL from local stack or received from virtual drivers. The idea is that SCTP decides to avoid csum calculation if hardware supports offloading. IPVS can change the device after rerouting to real server but we can preserve the CHECKSUM_PARTIAL mode if the new device supports offloading too. This works because skb dst is changed before dnat_handler and we see the new device. So, checks in the 'if' part will decide whether it is ok to keep CHECKSUM_PARTIAL for the output. If the packet was with CHECKSUM_NONE, hence we deal with unknown checksum. As we recalculate the sum for IP header in all cases, it should be safe to use CHECKSUM_UNNECESSARY. We can forward wrong checksum in this case (without cp->app). In case of CHECKSUM_UNNECESSARY, the csum was valid on receive. Signed-off-by: Daniel Borkmann Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 39 +++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 9ca7aa033284..2f7ea7564044 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -81,6 +81,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; + bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -92,19 +93,31 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + ret = ip_vs_app_pkt_out(cp, skb); + if (ret == 0) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 2) + payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; - sctph->source = cp->vport; - sctp_nat_csum(skb, sctph, sctphoff); + /* Only update csum if we really have to */ + if (sctph->source != cp->vport || payload_csum || + skb->ip_summed == CHECKSUM_PARTIAL) { + sctph->source = cp->vport; + sctp_nat_csum(skb, sctph, sctphoff); + } else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } return 1; } @@ -115,6 +128,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; + bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -126,19 +140,32 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_in(cp, skb)) + ret = ip_vs_app_pkt_in(cp, skb); + if (ret == 0) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 2) + payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; - sctph->dest = cp->dport; - sctp_nat_csum(skb, sctph, sctphoff); + /* Only update csum if we really have to */ + if (sctph->dest != cp->dport || payload_csum || + (skb->ip_summed == CHECKSUM_PARTIAL && + !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) { + sctph->dest = cp->dport; + sctp_nat_csum(skb, sctph, sctphoff); + } else if (skb->ip_summed != CHECKSUM_PARTIAL) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } return 1; } -- cgit v1.2.3-55-g7522 From f7b13e4330ef3c20e62ac4908cc96c1c318056c2 Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Thu, 26 Sep 2013 17:31:51 +0200 Subject: netfilter: introduce nf_conn_acct structure Encapsulate counters for both directions into nf_conn_acct. During that process also consistently name pointers to the extend 'acct', not 'counters'. This patch is a cleanup. Signed-off-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_acct.h | 10 +++++++--- include/net/netfilter/nf_conntrack_extend.h | 2 +- net/netfilter/nf_conntrack_acct.c | 12 +++++++----- net/netfilter/nf_conntrack_core.c | 16 ++++++++++------ net/netfilter/nf_conntrack_netlink.c | 16 +++++++++------- net/netfilter/xt_connbytes.c | 6 ++++-- 6 files changed, 38 insertions(+), 24 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index fef44edf49c1..79d8d16732b4 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -19,17 +19,21 @@ struct nf_conn_counter { atomic64_t bytes; }; +struct nf_conn_acct { + struct nf_conn_counter counter[IP_CT_DIR_MAX]; +}; + static inline -struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct) +struct nf_conn_acct *nf_conn_acct_find(const struct nf_conn *ct) { return nf_ct_ext_find(ct, NF_CT_EXT_ACCT); } static inline -struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) +struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) { struct net *net = nf_ct_net(ct); - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; if (!net->ct.sysctl_acct) return NULL; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 86372ae0ee84..956b175523ff 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -36,7 +36,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat #define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj -#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter +#define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 2d3030ab5b61..a4b5e2a435ac 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -39,21 +39,23 @@ static struct ctl_table acct_sysctl_table[] = { unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; + struct nf_conn_counter *counter; acct = nf_conn_acct_find(ct); if (!acct) return 0; + counter = acct->counter; return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)atomic64_read(&acct[dir].packets), - (unsigned long long)atomic64_read(&acct[dir].bytes)); + (unsigned long long)atomic64_read(&counter[dir].packets), + (unsigned long long)atomic64_read(&counter[dir].bytes)); }; EXPORT_SYMBOL_GPL(seq_print_acct); static struct nf_ct_ext_type acct_extend __read_mostly = { - .len = sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]), - .align = __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]), + .len = sizeof(struct nf_conn_acct), + .align = __alignof__(struct nf_conn_acct), .id = NF_CT_EXT_ACCT, }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5d892febd64c..e22d950c60b3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1109,12 +1109,14 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, acct: if (do_acct) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; acct = nf_conn_acct_find(ct); if (acct) { - atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); - atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes); + struct nf_conn_counter *counter = acct->counter; + + atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); + atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes); } } } @@ -1126,13 +1128,15 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, int do_acct) { if (do_acct) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; acct = nf_conn_acct_find(ct); if (acct) { - atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); + struct nf_conn_counter *counter = acct->counter; + + atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); atomic64_add(skb->len - skb_network_offset(skb), - &acct[CTINFO2DIR(ctinfo)].bytes); + &counter[CTINFO2DIR(ctinfo)].bytes); } } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index eea936b70d15..ddc3777d8340 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -237,19 +237,21 @@ static int ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, enum ip_conntrack_dir dir, int type) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; + struct nf_conn_counter *counter; u64 pkts, bytes; acct = nf_conn_acct_find(ct); if (!acct) return 0; + counter = acct->counter; if (type == IPCTNL_MSG_CT_GET_CTRZERO) { - pkts = atomic64_xchg(&acct[dir].packets, 0); - bytes = atomic64_xchg(&acct[dir].bytes, 0); + pkts = atomic64_xchg(&counter[dir].packets, 0); + bytes = atomic64_xchg(&counter[dir].bytes, 0); } else { - pkts = atomic64_read(&acct[dir].packets); - bytes = atomic64_read(&acct[dir].bytes); + pkts = atomic64_read(&counter[dir].packets); + bytes = atomic64_read(&counter[dir].bytes); } return dump_counters(skb, pkts, bytes, dir); } @@ -530,7 +532,7 @@ ctnetlink_proto_size(const struct nf_conn *ct) } static inline size_t -ctnetlink_counters_size(const struct nf_conn *ct) +ctnetlink_acct_size(const struct nf_conn *ct) { if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT)) return 0; @@ -579,7 +581,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */ + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ - + ctnetlink_counters_size(ct) + + ctnetlink_acct_size(ct) + ctnetlink_timestamp_size(ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ + nla_total_size(0) /* CTA_PROTOINFO */ diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index e595e07a759b..1e634615ab9d 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -26,16 +26,18 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; u_int64_t pkts = 0; + const struct nf_conn_acct *acct; const struct nf_conn_counter *counters; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; - counters = nf_conn_acct_find(ct); - if (!counters) + acct = nf_conn_acct_find(ct); + if (!acct) return false; + counters = acct->counter; switch (sinfo->what) { case XT_CONNBYTES_PKTS: switch (sinfo->direction) { -- cgit v1.2.3-55-g7522 From 4542fa4727f5f83faf9e1f28f35be0b9a2317aec Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Thu, 26 Sep 2013 17:31:52 +0200 Subject: netfilter: ctnetlink: account both directions in one step With the intent to dump other accounting data later. This patch is a cleanup. Signed-off-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 49 ++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 25 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ddc3777d8340..08870b859046 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -211,13 +211,23 @@ nla_put_failure: } static int -dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes, - enum ip_conntrack_dir dir) +dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct, + enum ip_conntrack_dir dir, int type) { - enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + enum ctattr_type attr = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + struct nf_conn_counter *counter = acct->counter; struct nlattr *nest_count; + u64 pkts, bytes; - nest_count = nla_nest_start(skb, type | NLA_F_NESTED); + if (type == IPCTNL_MSG_CT_GET_CTRZERO) { + pkts = atomic64_xchg(&counter[dir].packets, 0); + bytes = atomic64_xchg(&counter[dir].bytes, 0); + } else { + pkts = atomic64_read(&counter[dir].packets); + bytes = atomic64_read(&counter[dir].bytes); + } + + nest_count = nla_nest_start(skb, attr | NLA_F_NESTED); if (!nest_count) goto nla_put_failure; @@ -234,26 +244,19 @@ nla_put_failure: } static int -ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, - enum ip_conntrack_dir dir, int type) +ctnetlink_dump_acct(struct sk_buff *skb, const struct nf_conn *ct, int type) { - struct nf_conn_acct *acct; - struct nf_conn_counter *counter; - u64 pkts, bytes; + struct nf_conn_acct *acct = nf_conn_acct_find(ct); - acct = nf_conn_acct_find(ct); if (!acct) return 0; - counter = acct->counter; - if (type == IPCTNL_MSG_CT_GET_CTRZERO) { - pkts = atomic64_xchg(&counter[dir].packets, 0); - bytes = atomic64_xchg(&counter[dir].bytes, 0); - } else { - pkts = atomic64_read(&counter[dir].packets); - bytes = atomic64_read(&counter[dir].bytes); - } - return dump_counters(skb, pkts, bytes, dir); + if (dump_counters(skb, acct, IP_CT_DIR_ORIGINAL, type) < 0) + return -1; + if (dump_counters(skb, acct, IP_CT_DIR_REPLY, type) < 0) + return -1; + + return 0; } static int @@ -490,8 +493,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 || + ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0 || ctnetlink_dump_protoinfo(skb, ct) < 0 || ctnetlink_dump_helpinfo(skb, ct) < 0 || @@ -675,10 +677,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; if (events & (1 << IPCT_DESTROY)) { - if (ctnetlink_dump_counters(skb, ct, - IP_CT_DIR_ORIGINAL, type) < 0 || - ctnetlink_dump_counters(skb, ct, - IP_CT_DIR_REPLY, type) < 0 || + if (ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0) goto nla_put_failure; } else { -- cgit v1.2.3-55-g7522 From c359c4157cf0d852387aff2f2d83fef039aadc2c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Nov 2013 15:58:56 +0300 Subject: netfilter: nft_compat: use _safe version of list_for_each We need to use the _safe version of list_for_each_entry() here otherwise we have a use after free bug. Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 4811f762e060..a82667c64729 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -634,9 +634,9 @@ nft_match_select_ops(const struct nft_ctx *ctx, static void nft_match_release(void) { - struct nft_xt *nft_match; + struct nft_xt *nft_match, *tmp; - list_for_each_entry(nft_match, &nft_match_list, head) + list_for_each_entry_safe(nft_match, tmp, &nft_match_list, head) kfree(nft_match); } @@ -705,9 +705,9 @@ nft_target_select_ops(const struct nft_ctx *ctx, static void nft_target_release(void) { - struct nft_xt *nft_target; + struct nft_xt *nft_target, *tmp; - list_for_each_entry(nft_target, &nft_target_list, head) + list_for_each_entry_safe(nft_target, tmp, &nft_target_list, head) kfree(nft_target); } -- cgit v1.2.3-55-g7522 From cdbe7c2d6d485459801838c244c409b938e32df5 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 7 Nov 2013 19:59:19 +0100 Subject: nfnetlink: do not ack malformed messages Commit 0628b123c96d ("netfilter: nfnetlink: add batch support and use it from nf_tables") introduced a bug leading to various crashes in netlink_ack when netlink message with invalid nlmsg_len was sent by an unprivileged user. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/netfilter/nfnetlink.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 027f16af51a0..046aa13b4fea 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -363,13 +363,15 @@ static void nfnetlink_rcv(struct sk_buff *skb) struct net *net = sock_net(skb->sk); int msglen; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return netlink_ack(skb, nlh, -EPERM); - if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) return; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + netlink_ack(skb, nlh, -EPERM); + return; + } + if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) { struct nfgenmsg *nfgenmsg; -- cgit v1.2.3-55-g7522 From 6aafeef03b9d9ecf255f3a80ed85ee070260e1ae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 6 Nov 2013 17:52:20 +0100 Subject: netfilter: push reasm skb through instead of original frag skbs Pushing original fragments through causes several problems. For example for matching, frags may not be matched correctly. Take following example: On HOSTA do: ip6tables -I INPUT -p icmpv6 -j DROP ip6tables -I INPUT -p icmpv6 -m icmp6 --icmpv6-type 128 -j ACCEPT and on HOSTB you do: ping6 HOSTA -s2000 (MTU is 1500) Incoming echo requests will be filtered out on HOSTA. This issue does not occur with smaller packets than MTU (where fragmentation does not happen) As was discussed previously, the only correct solution seems to be to use reassembled skb instead of separete frags. Doing this has positive side effects in reducing sk_buff by one pointer (nfct_reasm) and also the reams dances in ipvs and conntrack can be removed. Future plan is to remove net/ipv6/netfilter/nf_conntrack_reasm.c entirely and use code in net/ipv6/reassembly.c instead. Signed-off-by: Jiri Pirko Acked-by: Julian Anastasov Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/skbuff.h | 32 --------------- include/net/ip_vs.h | 32 +-------------- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 4 +- net/core/skbuff.c | 3 -- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 56 +------------------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 19 +-------- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 7 +++- net/netfilter/ipvs/ip_vs_core.c | 55 +------------------------ net/netfilter/ipvs/ip_vs_pe_sip.c | 8 +--- 9 files changed, 13 insertions(+), 203 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 036ec7d8a83a..215b5ea1cb30 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -337,11 +337,6 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif -#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \ - defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE) -#define NET_SKBUFF_NF_DEFRAG_NEEDED 1 -#endif - /** * struct sk_buff - socket buffer * @next: Next buffer in list @@ -374,7 +369,6 @@ typedef unsigned char *sk_buff_data_t; * @protocol: Packet protocol from driver * @destructor: Destruct function * @nfct: Associated connection, if any - * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index @@ -463,9 +457,6 @@ struct sk_buff { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack *nfct; #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - struct sk_buff *nfct_reasm; -#endif #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif @@ -2595,18 +2586,6 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct) atomic_inc(&nfct->use); } #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED -static inline void nf_conntrack_get_reasm(struct sk_buff *skb) -{ - if (skb) - atomic_inc(&skb->users); -} -static inline void nf_conntrack_put_reasm(struct sk_buff *skb) -{ - if (skb) - kfree_skb(skb); -} -#endif #ifdef CONFIG_BRIDGE_NETFILTER static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) { @@ -2625,10 +2604,6 @@ static inline void nf_reset(struct sk_buff *skb) nf_conntrack_put(skb->nfct); skb->nfct = NULL; #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - nf_conntrack_put_reasm(skb->nfct_reasm); - skb->nfct_reasm = NULL; -#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(skb->nf_bridge); skb->nf_bridge = NULL; @@ -2650,10 +2625,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) nf_conntrack_get(src->nfct); dst->nfctinfo = src->nfctinfo; #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - dst->nfct_reasm = src->nfct_reasm; - nf_conntrack_get_reasm(src->nfct_reasm); -#endif #ifdef CONFIG_BRIDGE_NETFILTER dst->nf_bridge = src->nf_bridge; nf_bridge_get(src->nf_bridge); @@ -2665,9 +2636,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(dst->nfct); #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - nf_conntrack_put_reasm(dst->nfct_reasm); -#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(dst->nf_bridge); #endif diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index cd7275f9c463..5679d927562b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -109,7 +109,6 @@ extern int ip_vs_conn_tab_size; struct ip_vs_iphdr { __u32 len; /* IPv4 simply where L4 starts IPv6 where L4 Transport Header starts */ - __u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */ __u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/ __s16 protocol; __s32 flags; @@ -117,34 +116,12 @@ struct ip_vs_iphdr { union nf_inet_addr daddr; }; -/* Dependency to module: nf_defrag_ipv6 */ -#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE) -static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb) -{ - return skb->nfct_reasm; -} -static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, - int len, void *buffer, - const struct ip_vs_iphdr *ipvsh) -{ - if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb))) - return skb_header_pointer(skb_nfct_reasm(skb), - ipvsh->thoff_reasm, len, buffer); - - return skb_header_pointer(skb, offset, len, buffer); -} -#else -static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb) -{ - return NULL; -} static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, int len, void *buffer, const struct ip_vs_iphdr *ipvsh) { return skb_header_pointer(skb, offset, len, buffer); } -#endif static inline void ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr) @@ -171,19 +148,12 @@ ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr) (struct ipv6hdr *)skb_network_header(skb); iphdr->saddr.in6 = iph->saddr; iphdr->daddr.in6 = iph->daddr; - /* ipv6_find_hdr() updates len, flags, thoff_reasm */ - iphdr->thoff_reasm = 0; + /* ipv6_find_hdr() updates len, flags */ iphdr->len = 0; iphdr->flags = 0; iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1, &iphdr->fragoffs, &iphdr->flags); - /* get proto from re-assembled packet and it's offset */ - if (skb_nfct_reasm(skb)) - iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb), - &iphdr->thoff_reasm, - -1, NULL, NULL); - } else #endif { diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index 5613412e7dc2..27666d8a0bd0 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -6,9 +6,7 @@ void nf_defrag_ipv6_enable(void); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user); -void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *)); +void nf_ct_frag6_consume_orig(struct sk_buff *skb); struct inet_frags_ctl; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8c5197fe55a4..8cec1e6b844d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -592,9 +592,6 @@ static void skb_release_head_state(struct sk_buff *skb) #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb->nfct); #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - nf_conntrack_put_reasm(skb->nfct_reasm); -#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(skb->nf_bridge); #endif diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 486545eb42ce..4cbc6b290dd5 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -169,64 +169,13 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int __ipv6_conntrack_in(struct net *net, - unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *reasm = skb->nfct_reasm; - const struct nf_conn_help *help; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - - /* This packet is fragmented and has reassembled packet. */ - if (reasm) { - /* Reassembled packet isn't parsed yet ? */ - if (!reasm->nfct) { - unsigned int ret; - - ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); - if (ret != NF_ACCEPT) - return ret; - } - - /* Conntrack helpers need the entire reassembled packet in the - * POST_ROUTING hook. In case of unconfirmed connections NAT - * might reassign a helper, so the entire packet is also - * required. - */ - ct = nf_ct_get(reasm, &ctinfo); - if (ct != NULL && !nf_ct_is_untracked(ct)) { - help = nfct_help(ct); - if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { - nf_conntrack_get_reasm(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, - (struct net_device *)in, - (struct net_device *)out, - okfn, NF_IP6_PRI_CONNTRACK + 1); - return NF_DROP_ERR(-ECANCELED); - } - } - - nf_conntrack_get(reasm->nfct); - skb->nfct = reasm->nfct; - skb->nfctinfo = reasm->nfctinfo; - return NF_ACCEPT; - } - - return nf_conntrack_in(net, PF_INET6, hooknum, skb); -} - static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out, - okfn); + return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb); } static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, @@ -240,8 +189,7 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out, - okfn); + return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 4a258263d8ec..767ab8da8218 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -633,31 +633,16 @@ ret_orig: return skb; } -void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *)) +void nf_ct_frag6_consume_orig(struct sk_buff *skb) { struct sk_buff *s, *s2; - unsigned int ret = 0; for (s = NFCT_FRAG6_CB(skb)->orig; s;) { - nf_conntrack_put_reasm(s->nfct_reasm); - nf_conntrack_get_reasm(skb); - s->nfct_reasm = skb; - s2 = s->next; s->next = NULL; - - if (ret != -ECANCELED) - ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, - in, out, okfn, - NF_IP6_PRI_CONNTRACK_DEFRAG + 1); - else - kfree_skb(s); - + consume_skb(s); s = s2; } - nf_conntrack_put_reasm(skb); } static int nf_ct_net_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index ec483aa3f60f..7b9a748c6bac 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, if (reasm == skb) return NF_ACCEPT; - nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in, - (struct net_device *)out, okfn); + nf_ct_frag6_consume_orig(reasm); + + NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm, + (struct net_device *) in, (struct net_device *) out, + okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); return NF_STOLEN; } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 34fda62f40f6..4f26ee46b51f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1139,12 +1139,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iph_skb(af, skb, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (!iph.fragoffs && skb_nfct_reasm(skb)) { - struct sk_buff *reasm = skb_nfct_reasm(skb); - /* Save fw mark for coming frags */ - reasm->ipvs_property = 1; - reasm->mark = skb->mark; - } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_out_icmp_v6(skb, &related, @@ -1614,12 +1608,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (!iph.fragoffs && skb_nfct_reasm(skb)) { - struct sk_buff *reasm = skb_nfct_reasm(skb); - /* Save fw mark for coming frags. */ - reasm->ipvs_property = 1; - reasm->mark = skb->mark; - } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum, @@ -1671,9 +1659,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, 0, "ip_vs_in: packet continues traversal as normal"); - if (iph.fragoffs && !skb_nfct_reasm(skb)) { + if (iph.fragoffs) { /* Fragment that couldn't be mapped to a conn entry - * and don't have any pointer to a reasm skb * is missing module nf_defrag_ipv6 */ IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); @@ -1755,38 +1742,6 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 -/* - * AF_INET6 fragment handling - * Copy info from first fragment, to the rest of them. - */ -static unsigned int -ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *reasm = skb_nfct_reasm(skb); - struct net *net; - - /* Skip if not a "replay" from nf_ct_frag6_output or first fragment. - * ipvs_property is set when checking first fragment - * in ip_vs_in() and ip_vs_out(). - */ - if (reasm) - IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property); - if (!reasm || !reasm->ipvs_property) - return NF_ACCEPT; - - net = skb_net(skb); - if (!net_ipvs(net)->enable) - return NF_ACCEPT; - - /* Copy stored fw mark, saved in ip_vs_{in,out} */ - skb->mark = reasm->mark; - - return NF_ACCEPT; -} - /* * AF_INET6 handler in NF_INET_LOCAL_IN chain * Schedule and forward packets from remote clients @@ -1924,14 +1879,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .priority = 100, }, #ifdef CONFIG_IP_VS_IPV6 - /* After mangle & nat fetch 2:nd fragment and following */ - { - .hook = ip_vs_preroute_frag6, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_NAT_DST + 1, - }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply6, diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 9ef22bdce9f1..bed5f7042529 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff, static int ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) { - struct sk_buff *reasm = skb_nfct_reasm(skb); struct ip_vs_iphdr iph; unsigned int dataoff, datalen, matchoff, matchlen; const char *dptr; @@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) /* todo: IPv6 fragments: * I think this only should be done for the first fragment. /HS */ - if (reasm) { - skb = reasm; - dataoff = iph.thoff_reasm + sizeof(struct udphdr); - } else - dataoff = iph.len + sizeof(struct udphdr); + dataoff = iph.len + sizeof(struct udphdr); if (dataoff >= skb->len) return -EINVAL; - /* todo: Check if this will mess-up the reasm skb !!! /HS */ retc = skb_linearize(skb); if (retc < 0) return retc; -- cgit v1.2.3-55-g7522