summaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig12
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c67
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c19
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c18
-rw-r--r--net/netfilter/nf_conncount.c386
-rw-r--r--net/netfilter/nf_conntrack_core.c252
-rw-r--r--net/netfilter/nf_conntrack_expect.c1
-rw-r--r--net/netfilter/nf_conntrack_helper.c6
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c66
-rw-r--r--net/netfilter/nf_conntrack_netlink.c98
-rw-r--r--net/netfilter/nf_conntrack_proto.c845
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c44
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c32
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c24
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c388
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c387
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c46
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c52
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c55
-rw-r--r--net/netfilter/nf_conntrack_standalone.c28
-rw-r--r--net/netfilter/nf_flow_table_core.c13
-rw-r--r--net/netfilter/nf_nat_core.c8
-rw-r--r--net/netfilter/nf_osf.c252
-rw-r--r--net/netfilter/nf_tables_api.c194
-rw-r--r--net/netfilter/nfnetlink.c23
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c13
-rw-r--r--net/netfilter/nft_chain_filter.c4
-rw-r--r--net/netfilter/nft_connlimit.c36
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_socket.c17
-rw-r--r--net/netfilter/utils.c131
-rw-r--r--net/netfilter/xt_CT.c2
-rw-r--r--net/netfilter/xt_TEE.c4
-rw-r--r--net/netfilter/xt_TPROXY.c9
39 files changed, 2556 insertions, 993 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f0a1c536ef15..6f6c959aeb8f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -49,6 +49,8 @@ config NETFILTER_NETLINK_LOG
config NF_CONNTRACK
tristate "Netfilter connection tracking support"
default m if NETFILTER_ADVANCED=n
+ select NF_DEFRAG_IPV4
+ select NF_DEFRAG_IPV6 if IPV6 != n
help
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
@@ -615,7 +617,7 @@ config NFT_SOCKET
tristate "Netfilter nf_tables socket match support"
depends on IPV6 || IPV6=n
select NF_SOCKET_IPV4
- select NF_SOCKET_IPV6 if IPV6
+ select NF_SOCKET_IPV6 if NF_TABLES_IPV6
help
This option allows matching for the presence or absence of a
corresponding socket and its attributes.
@@ -881,7 +883,7 @@ config NETFILTER_XT_TARGET_LOG
tristate "LOG target support"
select NF_LOG_COMMON
select NF_LOG_IPV4
- select NF_LOG_IPV6 if IPV6
+ select NF_LOG_IPV6 if IP6_NF_IPTABLES
default m if NETFILTER_ADVANCED=n
help
This option adds a `LOG' target, which allows you to create rules in
@@ -973,7 +975,7 @@ config NETFILTER_XT_TARGET_TEE
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
- select NF_DUP_IPV6 if IPV6
+ select NF_DUP_IPV6 if IP6_NF_IPTABLES
---help---
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
@@ -1481,8 +1483,8 @@ config NETFILTER_XT_MATCH_SOCKET
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
- depends on NF_SOCKET_IPV4
- depends on NF_SOCKET_IPV6
+ select NF_SOCKET_IPV4
+ select NF_SOCKET_IPV6 if IP6_NF_IPTABLES
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 8a76dced974d..dd26e4961f43 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,7 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
-nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
+nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o \
+ nf_conntrack_proto.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o \
+ nf_conntrack_proto_icmp.o \
+ nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
+
+nf_conntrack-$(subst m,y,$(CONFIG_IPV6)) += nf_conntrack_proto_icmpv6.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 99e0aa350dc5..0edc62910ebf 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -825,12 +825,23 @@ static void ip_vs_conn_expire(struct timer_list *t)
/* Unlink conn if not referenced anymore */
if (likely(ip_vs_conn_unlink(cp))) {
+ struct ip_vs_conn *ct = cp->control;
+
/* delete the timer if it is activated by other users */
del_timer(&cp->timer);
/* does anybody control me? */
- if (cp->control)
+ if (ct) {
ip_vs_control_del(cp);
+ /* Drop CTL or non-assured TPL if not used anymore */
+ if (!cp->timeout && !atomic_read(&ct->n_control) &&
+ (!(ct->flags & IP_VS_CONN_F_TEMPLATE) ||
+ !(ct->state & IP_VS_CTPL_S_ASSURED))) {
+ IP_VS_DBG(4, "drop controlling connection\n");
+ ct->timeout = 0;
+ ip_vs_conn_expire_now(ct);
+ }
+ }
if ((cp->flags & IP_VS_CONN_F_NFCT) &&
!(cp->flags & IP_VS_CONN_F_ONE_PACKET)) {
@@ -872,6 +883,10 @@ static void ip_vs_conn_expire(struct timer_list *t)
/* Modify timer, so that it expires as soon as possible.
* Can be called without reference only if under RCU lock.
+ * We can have such chain of conns linked with ->control: DATA->CTL->TPL
+ * - DATA (eg. FTP) and TPL (persistence) can be present depending on setup
+ * - cp->timeout=0 indicates all conns from chain should be dropped but
+ * TPL is not dropped if in assured state
*/
void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
{
@@ -1107,7 +1122,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
dbuf, ntohs(cp->dport),
- ip_vs_state_name(cp->protocol, cp->state),
+ ip_vs_state_name(cp),
(cp->timer.expires-jiffies)/HZ, pe_data);
else
#endif
@@ -1118,7 +1133,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
dbuf, ntohs(cp->dport),
- ip_vs_state_name(cp->protocol, cp->state),
+ ip_vs_state_name(cp),
(cp->timer.expires-jiffies)/HZ, pe_data);
}
return 0;
@@ -1169,7 +1184,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
dbuf, ntohs(cp->dport),
- ip_vs_state_name(cp->protocol, cp->state),
+ ip_vs_state_name(cp),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
else
@@ -1181,7 +1196,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
dbuf, ntohs(cp->dport),
- ip_vs_state_name(cp->protocol, cp->state),
+ ip_vs_state_name(cp),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
}
@@ -1197,8 +1212,11 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
#endif
-/*
- * Randomly drop connection entries before running out of memory
+/* Randomly drop connection entries before running out of memory
+ * Can be used for DATA and CTL conns. For TPL conns there are exceptions:
+ * - traffic for services in OPS mode increases ct->in_pkts, so it is supported
+ * - traffic for services not in OPS mode does not increase ct->in_pkts in
+ * all cases, so it is not supported
*/
static inline int todrop_entry(struct ip_vs_conn *cp)
{
@@ -1242,7 +1260,7 @@ static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp)
void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
{
int idx;
- struct ip_vs_conn *cp, *cp_c;
+ struct ip_vs_conn *cp;
rcu_read_lock();
/*
@@ -1254,13 +1272,15 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->ipvs != ipvs)
continue;
+ if (atomic_read(&cp->n_control))
+ continue;
if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
- if (atomic_read(&cp->n_control) ||
- !ip_vs_conn_ops_mode(cp))
- continue;
- else
- /* connection template of OPS */
+ /* connection template of OPS */
+ if (ip_vs_conn_ops_mode(cp))
goto try_drop;
+ if (!(cp->state & IP_VS_CTPL_S_ASSURED))
+ goto drop;
+ continue;
}
if (cp->protocol == IPPROTO_TCP) {
switch(cp->state) {
@@ -1294,15 +1314,10 @@ try_drop:
continue;
}
- IP_VS_DBG(4, "del connection\n");
+drop:
+ IP_VS_DBG(4, "drop connection\n");
+ cp->timeout = 0;
ip_vs_conn_expire_now(cp);
- cp_c = cp->control;
- /* cp->control is valid only with reference to cp */
- if (cp_c && __ip_vs_conn_get(cp)) {
- IP_VS_DBG(4, "del conn template\n");
- ip_vs_conn_expire_now(cp_c);
- __ip_vs_conn_put(cp);
- }
}
cond_resched_rcu();
}
@@ -1325,15 +1340,19 @@ flush_again:
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
if (cp->ipvs != ipvs)
continue;
- IP_VS_DBG(4, "del connection\n");
- ip_vs_conn_expire_now(cp);
+ /* As timers are expired in LIFO order, restart
+ * the timer of controlling connection first, so
+ * that it is expired after us.
+ */
cp_c = cp->control;
/* cp->control is valid only with reference to cp */
if (cp_c && __ip_vs_conn_get(cp)) {
- IP_VS_DBG(4, "del conn template\n");
+ IP_VS_DBG(4, "del controlling connection\n");
ip_vs_conn_expire_now(cp_c);
__ip_vs_conn_put(cp);
}
+ IP_VS_DBG(4, "del connection\n");
+ ip_vs_conn_expire_now(cp);
}
cond_resched_rcu();
}
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index ca880a3ad033..54ee84adf0bd 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -42,6 +42,11 @@
static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
+/* States for conn templates: NONE or words separated with ",", max 15 chars */
+static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = {
+ [IP_VS_CTPL_S_NONE] = "NONE",
+ [IP_VS_CTPL_S_ASSURED] = "ASSURED",
+};
/*
* register an ipvs protocol
@@ -193,12 +198,20 @@ ip_vs_create_timeout_table(int *table, int size)
}
-const char * ip_vs_state_name(__u16 proto, int state)
+const char *ip_vs_state_name(const struct ip_vs_conn *cp)
{
- struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+ unsigned int state = cp->state;
+ struct ip_vs_protocol *pp;
+
+ if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+ if (state >= IP_VS_CTPL_S_LAST)
+ return "ERR!";
+ return ip_vs_ctpl_state_name_table[state] ? : "?";
+ }
+ pp = ip_vs_proto_get(cp->protocol);
if (pp == NULL || pp->state_name == NULL)
- return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
+ return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!";
return pp->state_name(state);
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3250c4a1111e..b0cd7d08f2a7 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -461,6 +461,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
}
}
+ if (next_state == IP_VS_SCTP_S_ESTABLISHED)
+ ip_vs_control_assure_ct(cp);
}
if (likely(pd))
cp->timeout = pd->timeout_table[cp->state = next_state];
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 80d10ad12a15..1770fc6ce960 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -569,6 +569,8 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
}
}
+ if (new_state == IP_VS_TCP_S_ESTABLISHED)
+ ip_vs_control_assure_ct(cp);
}
if (likely(pd))
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e0ef11c3691e..0f53c49025f8 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -460,6 +460,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
}
cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
+ if (direction == IP_VS_DIR_OUTPUT)
+ ip_vs_control_assure_ct(cp);
}
static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 001501e25625..d4020c5e831d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1003,12 +1003,9 @@ static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer
continue;
}
} else {
- /* protocol in templates is not used for state/timeout */
- if (state > 0) {
- IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
- state);
- state = 0;
- }
+ if (state >= IP_VS_CTPL_S_LAST)
+ IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n",
+ state);
}
ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
@@ -1166,12 +1163,9 @@ static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *m
goto out;
}
} else {
- /* protocol in templates is not used for state/timeout */
- if (state > 0) {
- IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
- state);
- state = 0;
- }
+ if (state >= IP_VS_CTPL_S_LAST)
+ IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n",
+ state);
}
if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
pe_data_len, pe_name, pe_name_len)) {
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 510039862aa9..02ca7df793f5 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -44,17 +44,19 @@
/* we will save the tuples of all connections we care about */
struct nf_conncount_tuple {
- struct hlist_node node;
+ struct list_head node;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
+ struct rcu_head rcu_head;
};
struct nf_conncount_rb {
struct rb_node node;
- struct hlist_head hhead; /* connections/hosts in same subnet */
+ struct nf_conncount_list list;
u32 key[MAX_KEYLEN];
+ struct rcu_head rcu_head;
};
static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
@@ -62,6 +64,10 @@ static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_i
struct nf_conncount_data {
unsigned int keylen;
struct rb_root root[CONNCOUNT_SLOTS];
+ struct net *net;
+ struct work_struct gc_work;
+ unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)];
+ unsigned int gc_tree;
};
static u_int32_t conncount_rnd __read_mostly;
@@ -82,26 +88,70 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32));
}
-bool nf_conncount_add(struct hlist_head *head,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
+enum nf_conncount_list_add
+nf_conncount_add(struct nf_conncount_list *list,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
{
struct nf_conncount_tuple *conn;
+ if (WARN_ON_ONCE(list->count > INT_MAX))
+ return NF_CONNCOUNT_ERR;
+
conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
if (conn == NULL)
- return false;
+ return NF_CONNCOUNT_ERR;
+
conn->tuple = *tuple;
conn->zone = *zone;
conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies;
- hlist_add_head(&conn->node, head);
- return true;
+ spin_lock(&list->list_lock);
+ if (list->dead == true) {
+ kmem_cache_free(conncount_conn_cachep, conn);
+ spin_unlock(&list->list_lock);
+ return NF_CONNCOUNT_SKIP;
+ }
+ list_add_tail(&conn->node, &list->head);
+ list->count++;
+ spin_unlock(&list->list_lock);
+ return NF_CONNCOUNT_ADDED;
}
EXPORT_SYMBOL_GPL(nf_conncount_add);
+static void __conn_free(struct rcu_head *h)
+{
+ struct nf_conncount_tuple *conn;
+
+ conn = container_of(h, struct nf_conncount_tuple, rcu_head);
+ kmem_cache_free(conncount_conn_cachep, conn);
+}
+
+static bool conn_free(struct nf_conncount_list *list,
+ struct nf_conncount_tuple *conn)
+{
+ bool free_entry = false;
+
+ spin_lock(&list->list_lock);
+
+ if (list->count == 0) {
+ spin_unlock(&list->list_lock);
+ return free_entry;
+ }
+
+ list->count--;
+ list_del_rcu(&conn->node);
+ if (list->count == 0)
+ free_entry = true;
+
+ spin_unlock(&list->list_lock);
+ call_rcu(&conn->rcu_head, __conn_free);
+ return free_entry;
+}
+
static const struct nf_conntrack_tuple_hash *
-find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
+find_or_evict(struct net *net, struct nf_conncount_list *list,
+ struct nf_conncount_tuple *conn, bool *free_entry)
{
const struct nf_conntrack_tuple_hash *found;
unsigned long a, b;
@@ -121,34 +171,37 @@ find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
*/
age = a - b;
if (conn->cpu == cpu || age >= 2) {
- hlist_del(&conn->node);
- kmem_cache_free(conncount_conn_cachep, conn);
+ *free_entry = conn_free(list, conn);
return ERR_PTR(-ENOENT);
}
return ERR_PTR(-EAGAIN);
}
-unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone,
- bool *addit)
+void nf_conncount_lookup(struct net *net,
+ struct nf_conncount_list *list,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone,
+ bool *addit)
{
const struct nf_conntrack_tuple_hash *found;
- struct nf_conncount_tuple *conn;
+ struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct;
- struct hlist_node *n;
- unsigned int length = 0;
+ unsigned int collect = 0;
+ bool free_entry = false;
+ /* best effort only */
*addit = tuple ? true : false;
/* check the saved connections */
- hlist_for_each_entry_safe(conn, n, head, node) {
- found = find_or_evict(net, conn);
+ list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+ if (collect > CONNCOUNT_GC_MAX_NODES)
+ break;
+
+ found = find_or_evict(net, list, conn, &free_entry);
if (IS_ERR(found)) {
/* Not found, but might be about to be confirmed */
if (PTR_ERR(found) == -EAGAIN) {
- length++;
if (!tuple)
continue;
@@ -156,7 +209,8 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
nf_ct_zone_id(zone, zone->dir))
*addit = false;
- }
+ } else if (PTR_ERR(found) == -ENOENT)
+ collect++;
continue;
}
@@ -165,9 +219,10 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
nf_ct_zone_equal(found_ct, zone, zone->dir)) {
/*
- * Just to be sure we have it only once in the list.
* We should not see tuples twice unless someone hooks
* this into a table without "-p tcp --syn".
+ *
+ * Attempt to avoid a re-add in this case.
*/
*addit = false;
} else if (already_closed(found_ct)) {
@@ -176,19 +231,75 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
* closed already -> ditch it
*/
nf_ct_put(found_ct);
- hlist_del(&conn->node);
- kmem_cache_free(conncount_conn_cachep, conn);
+ conn_free(list, conn);
+ collect++;
continue;
}
nf_ct_put(found_ct);
- length++;
}
-
- return length;
}
EXPORT_SYMBOL_GPL(nf_conncount_lookup);
+void nf_conncount_list_init(struct nf_conncount_list *list)
+{
+ spin_lock_init(&list->list_lock);
+ INIT_LIST_HEAD(&list->head);
+ list->count = 1;
+ list->dead = false;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_list_init);
+
+/* Return true if the list is empty */
+bool nf_conncount_gc_list(struct net *net,
+ struct nf_conncount_list *list)
+{
+ const struct nf_conntrack_tuple_hash *found;
+ struct nf_conncount_tuple *conn, *conn_n;
+ struct nf_conn *found_ct;
+ unsigned int collected = 0;
+ bool free_entry = false;
+
+ list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+ found = find_or_evict(net, list, conn, &free_entry);
+ if (IS_ERR(found)) {
+ if (PTR_ERR(found) == -ENOENT) {
+ if (free_entry)
+ return true;
+ collected++;
+ }
+ continue;
+ }
+
+ found_ct = nf_ct_tuplehash_to_ctrack(found);
+ if (already_closed(found_ct)) {
+ /*
+ * we do not care about connections which are
+ * closed already -> ditch it
+ */
+ nf_ct_put(found_ct);
+ if (conn_free(list, conn))
+ return true;
+ collected++;
+ continue;
+ }
+
+ nf_ct_put(found_ct);
+ if (collected > CONNCOUNT_GC_MAX_NODES)
+ return false;
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
+
+static void __tree_nodes_free(struct rcu_head *h)
+{
+ struct nf_conncount_rb *rbconn;
+
+ rbconn = container_of(h, struct nf_conncount_rb, rcu_head);
+ kmem_cache_free(conncount_rb_cachep, rbconn);
+}
+
static void tree_nodes_free(struct rb_root *root,
struct nf_conncount_rb *gc_nodes[],
unsigned int gc_count)
@@ -197,32 +308,46 @@ static void tree_nodes_free(struct rb_root *root,
while (gc_count) {
rbconn = gc_nodes[--gc_count];
- rb_erase(&rbconn->node, root);
- kmem_cache_free(conncount_rb_cachep, rbconn);
+ spin_lock(&rbconn->list.list_lock);
+ if (rbconn->list.count == 0 && rbconn->list.dead == false) {
+ rbconn->list.dead = true;
+ rb_erase(&rbconn->node, root);
+ call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+ }
+ spin_unlock(&rbconn->list.list_lock);
}
}
+static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
+{
+ set_bit(tree, data->pending_trees);
+ schedule_work(&data->gc_work);
+}
+
static unsigned int
-count_tree(struct net *net, struct rb_root *root,
- const u32 *key, u8 keylen,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
+insert_tree(struct net *net,
+ struct nf_conncount_data *data,
+ struct rb_root *root,
+ unsigned int hash,
+ const u32 *key,
+ u8 keylen,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
{
+ enum nf_conncount_list_add ret;
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
struct rb_node **rbnode, *parent;
struct nf_conncount_rb *rbconn;
struct nf_conncount_tuple *conn;
- unsigned int gc_count;
- bool no_gc = false;
+ unsigned int count = 0, gc_count = 0;
+ bool node_found = false;
+
+ spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
- restart:
- gc_count = 0;
parent = NULL;
rbnode = &(root->rb_node);
while (*rbnode) {
int diff;
- bool addit;
-
rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
parent = *rbnode;
@@ -232,33 +357,30 @@ count_tree(struct net *net, struct rb_root *root,
} else if (diff > 0) {
rbnode = &((*rbnode)->rb_right);
} else {
- /* same source network -> be counted! */
- unsigned int count;
-
- count = nf_conncount_lookup(net, &rbconn->hhead, tuple,
- zone, &addit);
-
- tree_nodes_free(root, gc_nodes, gc_count);
- if (!addit)
- return count;
-
- if (!nf_conncount_add(&rbconn->hhead, tuple, zone))
- return 0; /* hotdrop */
-
- return count + 1;
+ /* unlikely: other cpu added node already */
+ node_found = true;
+ ret = nf_conncount_add(&rbconn->list, tuple, zone);
+ if (ret == NF_CONNCOUNT_ERR) {
+ count = 0; /* hotdrop */
+ } else if (ret == NF_CONNCOUNT_ADDED) {
+ count = rbconn->list.count;
+ } else {
+ /* NF_CONNCOUNT_SKIP, rbconn is already
+ * reclaimed by gc, insert a new tree node
+ */
+ node_found = false;
+ }
+ break;
}
- if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
+ if (gc_count >= ARRAY_SIZE(gc_nodes))
continue;
- /* only used for GC on hhead, retval and 'addit' ignored */
- nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit);
- if (hlist_empty(&rbconn->hhead))
+ if (nf_conncount_gc_list(net, &rbconn->list))
gc_nodes[gc_count++] = rbconn;
}
if (gc_count) {
- no_gc = true;
tree_nodes_free(root, gc_nodes, gc_count);
/* tree_node_free before new allocation permits
* allocator to re-use newly free'd object.
@@ -266,58 +388,146 @@ count_tree(struct net *net, struct rb_root *root,
* This is a rare event; in most cases we will find
* existing node to re-use. (or gc_count is 0).
*/
- goto restart;
+
+ if (gc_count >= ARRAY_SIZE(gc_nodes))
+ schedule_gc_worker(data, hash);
}
- if (!tuple)
- return 0;
+ if (node_found)
+ goto out_unlock;
- /* no match, need to insert new node */
+ /* expected case: match, insert new node */
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
if (rbconn == NULL)
- return 0;
+ goto out_unlock;
conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
if (conn == NULL) {
kmem_cache_free(conncount_rb_cachep, rbconn);
- return 0;
+ goto out_unlock;
}
conn->tuple = *tuple;
conn->zone = *zone;
memcpy(rbconn->key, key, sizeof(u32) * keylen);
- INIT_HLIST_HEAD(&rbconn->hhead);
- hlist_add_head(&conn->node, &rbconn->hhead);
+ nf_conncount_list_init(&rbconn->list);
+ list_add(&conn->node, &rbconn->list.head);
+ count = 1;
rb_link_node(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root);
- return 1;
+out_unlock:
+ spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+ return count;
}
-/* Count and return number of conntrack entries in 'net' with particular 'key'.
- * If 'tuple' is not null, insert it into the accounting data structure.
- */
-unsigned int nf_conncount_count(struct net *net,
- struct nf_conncount_data *data,
- const u32 *key,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
+static unsigned int
+count_tree(struct net *net,
+ struct nf_conncount_data *data,
+ const u32 *key,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
{
+ enum nf_conncount_list_add ret;
struct rb_root *root;
- int count;
- u32 hash;
+ struct rb_node *parent;
+ struct nf_conncount_rb *rbconn;
+ unsigned int hash;
+ u8 keylen = data->keylen;
hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
root = &data->root[hash];
- spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+ parent = rcu_dereference_raw(root->rb_node);
+ while (parent) {
+ int diff;
+ bool addit;
- count = count_tree(net, root, key, data->keylen, tuple, zone);
+ rbconn = rb_entry(parent, struct nf_conncount_rb, node);
- spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+ diff = key_diff(key, rbconn->key, keylen);
+ if (diff < 0) {
+ parent = rcu_dereference_raw(parent->rb_left);
+ } else if (diff > 0) {
+ parent = rcu_dereference_raw(parent->rb_right);
+ } else {
+ /* same source network -> be counted! */
+ nf_conncount_lookup(net, &rbconn->list, tuple, zone,
+ &addit);
- return count;
+ if (!addit)
+ return rbconn->list.count;
+
+ ret = nf_conncount_add(&rbconn->list, tuple, zone);
+ if (ret == NF_CONNCOUNT_ERR) {
+ return 0; /* hotdrop */
+ } else if (ret == NF_CONNCOUNT_ADDED) {
+ return rbconn->list.count;
+ } else {
+ /* NF_CONNCOUNT_SKIP, rbconn is already
+ * reclaimed by gc, insert a new tree node
+ */
+ break;
+ }
+ }
+ }
+
+ if (!tuple)
+ return 0;
+
+ return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
+}
+
+static void tree_gc_worker(struct work_struct *work)
+{
+ struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work);
+ struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn;
+ struct rb_root *root;
+ struct rb_node *node;
+ unsigned int tree, next_tree, gc_count = 0;
+
+ tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
+ root = &data->root[tree];
+
+ rcu_read_lock();
+ for (node = rb_first(root); node != NULL; node = rb_next(node)) {
+ rbconn = rb_entry(node, struct nf_conncount_rb, node);
+ if (nf_conncount_gc_list(data->net, &rbconn->list))
+ gc_nodes[gc_count++] = rbconn;
+ }
+ rcu_read_unlock();
+
+ spin_lock_bh(&nf_conncount_locks[tree]);
+
+ if (gc_count) {
+ tree_nodes_free(root, gc_nodes, gc_count);
+ }
+
+ clear_bit(tree, data->pending_trees);
+
+ next_tree = (tree + 1) % CONNCOUNT_SLOTS;
+ next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
+
+ if (next_tree < CONNCOUNT_SLOTS) {
+ data->gc_tree = next_tree;
+ schedule_work(work);
+ }
+
+ spin_unlock_bh(&nf_conncount_locks[tree]);
+}
+
+/* Count and return number of conntrack entries in 'net' with particular 'key'.
+ * If 'tuple' is not null, insert it into the accounting data structure.
+ * Call with RCU read lock.
+ */
+unsigned int nf_conncount_count(struct net *net,
+ struct nf_conncount_data *data,
+ const u32 *key,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
+{
+ return count_tree(net, data, key, tuple, zone);
}
EXPORT_SYMBOL_GPL(nf_conncount_count);
@@ -348,17 +558,18 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
data->root[i] = RB_ROOT;
data->keylen = keylen / sizeof(u32);
+ data->net = net;
+ INIT_WORK(&data->gc_work, tree_gc_worker);
return data;
}
EXPORT_SYMBOL_GPL(nf_conncount_init);
-void nf_conncount_cache_free(struct hlist_head *hhead)
+void nf_conncount_cache_free(struct nf_conncount_list *list)
{
- struct nf_conncount_tuple *conn;
- struct hlist_node *n;
+ struct nf_conncount_tuple *conn, *conn_n;
- hlist_for_each_entry_safe(conn, n, hhead, node)
+ list_for_each_entry_safe(conn, conn_n, &list->head, node)
kmem_cache_free(conncount_conn_cachep, conn);
}
EXPORT_SYMBOL_GPL(nf_conncount_cache_free);
@@ -373,7 +584,7 @@ static void destroy_tree(struct rb_root *r)
rb_erase(node, r);
- nf_conncount_cache_free(&rbconn->hhead);
+ nf_conncount_cache_free(&rbconn->list);
kmem_cache_free(conncount_rb_cachep, rbconn);
}
@@ -384,6 +595,7 @@ void nf_conncount_destroy(struct net *net, unsigned int family,
{
unsigned int i;
+ cancel_work_sync(&data->gc_work);
nf_ct_netns_put(net, family);
for (i = 0; i < ARRAY_SIZE(data->root); ++i)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 805500197c22..8a113ca1eea2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -37,7 +37,6 @@
#include <linux/rculist_nulls.h>
#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
@@ -55,6 +54,7 @@
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
+#include <net/ip.h>
#include "nf_internals.h"
@@ -222,7 +222,7 @@ static u32 hash_conntrack(const struct net *net,
return scale_hash(hash_conntrack_raw(tuple, net));
}
-bool
+static bool
nf_ct_get_tuple(const struct sk_buff *skb,
unsigned int nhoff,
unsigned int dataoff,
@@ -230,37 +230,151 @@ nf_ct_get_tuple(const struct sk_buff *skb,
u_int8_t protonum,
struct net *net,
struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
+ unsigned int size;
+ const __be32 *ap;
+ __be32 _addrs[8];
+ struct {
+ __be16 sport;
+ __be16 dport;
+ } _inet_hdr, *inet_hdr;
+
memset(tuple, 0, sizeof(*tuple));
tuple->src.l3num = l3num;
- if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
+ switch (l3num) {
+ case NFPROTO_IPV4:
+ nhoff += offsetof(struct iphdr, saddr);
+ size = 2 * sizeof(__be32);
+ break;
+ case NFPROTO_IPV6:
+ nhoff += offsetof(struct ipv6hdr, saddr);
+ size = sizeof(_addrs);
+ break;
+ default:
+ return true;
+ }
+
+ ap = skb_header_pointer(skb, nhoff, size, _addrs);
+ if (!ap)
return false;
+ switch (l3num) {
+ case NFPROTO_IPV4:
+ tuple->src.u3.ip = ap[0];
+ tuple->dst.u3.ip = ap[1];
+ break;
+ case NFPROTO_IPV6:
+ memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
+ memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
+ break;
+ }
+
tuple->dst.protonum = protonum;
tuple->dst.dir = IP_CT_DIR_ORIGINAL;
- return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
+ if (unlikely(l4proto->pkt_to_tuple))
+ return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
+
+ /* Actually only need first 4 bytes to get ports. */
+ inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
+ if (!inet_hdr)
+ return false;
+
+ tuple->src.u.udp.port = inet_hdr->sport;
+ tuple->dst.u.udp.port = inet_hdr->dport;
+ return true;
+}
+
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ u_int8_t *protonum)
+{
+ int dataoff = -1;
+ const struct iphdr *iph;
+ struct iphdr _iph;
+
+ iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ if (!iph)
+ return -1;
+
+ /* Conntrack defragments packets, we might still see fragments
+ * inside ICMP packets though.
+ */
+ if (iph->frag_off & htons(IP_OFFSET))
+ return -1;
+
+ dataoff = nhoff + (iph->ihl << 2);
+ *protonum = iph->protocol;
+
+ /* Check bogus IP headers */
+ if (dataoff > skb->len) {
+ pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
+ nhoff, iph->ihl << 2, skb->len);
+ return -1;
+ }
+ return dataoff;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ u8 *protonum)
+{
+ int protoff = -1;
+ unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
+ __be16 frag_off;
+ u8 nexthdr;
+
+ if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
+ &nexthdr, sizeof(nexthdr)) != 0) {
+ pr_debug("can't get nexthdr\n");
+ return -1;
+ }
+ protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
+ /*
+ * (protoff == skb->len) means the packet has not data, just
+ * IPv6 and possibly extensions headers, but it is tracked anyway
+ */
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("can't find proto in pkt\n");
+ return -1;
+ }
+
+ *protonum = nexthdr;
+ return protoff;
+}
+#endif
+
+static int get_l4proto(const struct sk_buff *skb,
+ unsigned int nhoff, u8 pf, u8 *l4num)
+{
+ switch (pf) {
+ case NFPROTO_IPV4:
+ return ipv4_get_l4proto(skb, nhoff, l4num);
+#if IS_ENABLED(CONFIG_IPV6)
+ case NFPROTO_IPV6:
+ return ipv6_get_l4proto(skb, nhoff, l4num);
+#endif
+ default:
+ *l4num = 0;
+ break;
+ }
+ return -1;
}
-EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
u_int16_t l3num,
struct net *net, struct nf_conntrack_tuple *tuple)
{
- const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
- unsigned int protoff;
- u_int8_t protonum;
+ u8 protonum;
+ int protoff;
int ret;
rcu_read_lock();
- l3proto = __nf_ct_l3proto_find(l3num);
- ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
- if (ret != NF_ACCEPT) {
+ protoff = get_l4proto(skb, nhoff, l3num, &protonum);
+ if (protoff <= 0) {
rcu_read_unlock();
return false;
}
@@ -268,7 +382,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
l4proto = __nf_ct_l4proto_find(l3num, protonum);
ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
- l3proto, l4proto);
+ l4proto);
rcu_read_unlock();
return ret;
@@ -278,19 +392,35 @@ EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
bool
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig,
- const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
memset(inverse, 0, sizeof(*inverse));
inverse->src.l3num = orig->src.l3num;
- if (l3proto->invert_tuple(inverse, orig) == 0)
- return false;
+
+ switch (orig->src.l3num) {
+ case NFPROTO_IPV4:
+ inverse->src.u3.ip = orig->dst.u3.ip;
+ inverse->dst.u3.ip = orig->src.u3.ip;
+ break;
+ case NFPROTO_IPV6:
+ inverse->src.u3.in6 = orig->dst.u3.in6;
+ inverse->dst.u3.in6 = orig->src.u3.in6;
+ break;
+ default:
+ break;
+ }
inverse->dst.dir = !orig->dst.dir;
inverse->dst.protonum = orig->dst.protonum;
- return l4proto->invert_tuple(inverse, orig);
+
+ if (unlikely(l4proto->invert_tuple))
+ return l4proto->invert_tuple(inverse, orig);
+
+ inverse->src.u.all = orig->dst.u.all;
+ inverse->dst.u.all = orig->src.u.all;
+ return true;
}
EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
@@ -502,6 +632,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
+static inline bool
+nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
+{
+ return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) &&
+ nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple,
+ &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) &&
+ nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) &&
+ nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) &&
+ net_eq(nf_ct_net(ct1), nf_ct_net(ct2));
+}
+
/* caller must hold rcu readlock and none of the nf_conntrack_locks */
static void nf_ct_gc_expired(struct nf_conn *ct)
{
@@ -695,19 +837,21 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
const struct nf_conntrack_l4proto *l4proto;
+ enum ip_conntrack_info oldinfo;
+ struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
- ((ct->status & IPS_NAT_DONE_MASK) == 0) &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
- enum ip_conntrack_info oldinfo;
- struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
-
- nf_ct_acct_merge(ct, ctinfo, loser_ct);
- nf_conntrack_put(&loser_ct->ct_general);
- nf_ct_set(skb, ct, oldinfo);
- return NF_ACCEPT;
+ if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
+ nf_ct_match(ct, loser_ct)) {
+ nf_ct_acct_merge(ct, ctinfo, loser_ct);
+ nf_conntrack_put(&loser_ct->ct_general);
+ nf_ct_set(skb, ct, oldinfo);
+ return NF_ACCEPT;
+ }
+ nf_ct_put(ct);
}
NF_CT_STAT_INC(net, drop);
return NF_DROP;
@@ -1195,7 +1339,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff, u32 hash)
@@ -1208,9 +1351,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_zone *zone;
struct nf_conn_timeout *timeout_ext;
struct nf_conntrack_zone tmp;
- unsigned int *timeouts;
- if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
+ if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
pr_debug("Can't invert tuple.\n");
return NULL;
}
@@ -1227,15 +1369,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
}
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
- if (timeout_ext) {
- timeouts = nf_ct_timeout_data(timeout_ext);
- if (unlikely(!timeouts))
- timeouts = l4proto->get_timeouts(net);
- } else {
- timeouts = l4proto->get_timeouts(net);
- }
- if (!l4proto->new(ct, skb, dataoff, timeouts)) {
+ if (!l4proto->new(ct, skb, dataoff)) {
nf_conntrack_free(ct);
pr_debug("can't track with proto module\n");
return NULL;
@@ -1266,8 +1401,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
ct->master = exp->master;
if (exp->helper) {
- help = nf_ct_helper_ext_add(ct, exp->helper,
- GFP_ATOMIC);
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help)
rcu_assign_pointer(help->helper, exp->helper);
}
@@ -1307,7 +1441,6 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
- const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
@@ -1319,8 +1452,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
- dataoff, l3num, protonum, net, &tuple, l3proto,
- l4proto)) {
+ dataoff, l3num, protonum, net, &tuple, l4proto)) {
pr_debug("Can't get tuple\n");
return 0;
}
@@ -1330,7 +1462,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
hash = hash_conntrack_raw(&tuple, net);
h = __nf_conntrack_find_get(net, zone, &tuple, hash);
if (!h) {
- h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
+ h = init_conntrack(net, tmpl, &tuple, l4proto,
skb, dataoff, hash);
if (!h)
return 0;
@@ -1363,14 +1495,11 @@ unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
- const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
- unsigned int *timeouts;
- unsigned int dataoff;
u_int8_t protonum;
- int ret;
+ int dataoff, ret;
tmpl = nf_ct_get(skb, &ctinfo);
if (tmpl || ctinfo == IP_CT_UNTRACKED) {
@@ -1384,14 +1513,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
}
/* rcu_read_lock()ed by nf_hook_thresh */
- l3proto = __nf_ct_l3proto_find(pf);
- ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
- &dataoff, &protonum);
- if (ret <= 0) {
+ dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
+ if (dataoff <= 0) {
pr_debug("not prepared to track yet or error occurred\n");
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
- ret = -ret;
+ ret = NF_ACCEPT;
goto out;
}
@@ -1413,8 +1540,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
goto out;
}
repeat:
- ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
- l3proto, l4proto);
+ ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
if (ret < 0) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
@@ -1430,10 +1556,7 @@ repeat:
goto out;
}
- /* Decide what timeout policy we want to apply to this flow. */
- timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
-
- ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
@@ -1471,7 +1594,6 @@ bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
rcu_read_lock();
ret = nf_ct_invert_tuple(inverse, orig,
- __nf_ct_l3proto_find(orig->src.l3num),
__nf_ct_l4proto_find(orig->src.l3num,
orig->dst.protonum));
rcu_read_unlock();
@@ -1609,14 +1731,14 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
{
- const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct nf_nat_hook *nat_hook;
- unsigned int dataoff, status;
+ unsigned int status;
struct nf_conn *ct;
+ int dataoff;
u16 l3num;
u8 l4num;
@@ -1625,16 +1747,15 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
return 0;
l3num = nf_ct_l3num(ct);
- l3proto = nf_ct_l3proto_find_get(l3num);
- if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
- &l4num) <= 0)
+ dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
+ if (dataoff <= 0)
return -1;
l4proto = nf_ct_l4proto_find_get(l3num, l4num);
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
- l4num, net, &tuple, l3proto, l4proto))
+ l4num, net, &tuple, l4proto))
return -1;
if (ct->status & IPS_SRC_NAT) {
@@ -2089,9 +2210,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
}
EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
-module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
- &nf_conntrack_htable_size, 0600);
-
static __always_inline unsigned int total_extension_size(void)
{
/* remember to add new extensions below */
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 853b23206bb7..3f586ba23d92 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -610,7 +610,6 @@ static int exp_seq_show(struct seq_file *s, void *v)
expect->tuple.src.l3num,
expect->tuple.dst.protonum);
print_tuple(s, &expect->tuple,
- __nf_ct_l3proto_find(expect->tuple.src.l3num),
__nf_ct_l4proto_find(expect->tuple.src.l3num,
expect->tuple.dst.protonum));
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index a75b11c39312..d557a425289d 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -24,7 +24,6 @@
#include <linux/rtnetlink.h>
#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -193,8 +192,7 @@ void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
struct nf_conn_help *
-nf_ct_helper_ext_add(struct nf_conn *ct,
- struct nf_conntrack_helper *helper, gfp_t gfp)
+nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
{
struct nf_conn_help *help;
@@ -263,7 +261,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
}
if (help == NULL) {
- help = nf_ct_helper_ext_add(ct, helper, flags);
+ help = nf_ct_helper_ext_add(ct, flags);
if (help == NULL)
return -ENOMEM;
} else {
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
deleted file mode 100644
index 397e6911214f..000000000000
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
- *
- * Based largely upon the original ip_conntrack code which
- * had the following copyright information:
- *
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Author:
- * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/icmp.h>
-#include <linux/sysctl.h>
-#include <net/ip.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
-
-static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
- struct nf_conntrack_tuple *tuple)
-{
- memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
- memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
-
- return true;
-}
-
-static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
-{
- memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
- memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
-
- return true;
-}
-
-static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
- unsigned int *dataoff, u_int8_t *protonum)
-{
- /* Never track !!! */
- return -NF_ACCEPT;
-}
-
-
-struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
- .l3proto = PF_UNSPEC,
- .pkt_to_tuple = generic_pkt_to_tuple,
- .invert_tuple = generic_invert_tuple,
- .get_l4proto = generic_get_l4proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 20a2e37c76d1..f981bfa8db72 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -38,7 +38,6 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_acct.h>
@@ -81,9 +80,26 @@ nla_put_failure:
return -1;
}
+static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple)
+{
+ if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
+ nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple)
+{
+ if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
+ nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
+ return -EMSGSIZE;
+ return 0;
+}
+
static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_l3proto *l3proto)
+ const struct nf_conntrack_tuple *tuple)
{
int ret = 0;
struct nlattr *nest_parms;
@@ -92,8 +108,14 @@ static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
if (!nest_parms)
goto nla_put_failure;
- if (likely(l3proto->tuple_to_nlattr))
- ret = l3proto->tuple_to_nlattr(skb, tuple);
+ switch (tuple->src.l3num) {
+ case NFPROTO_IPV4:
+ ret = ipv4_tuple_to_nlattr(skb, tuple);
+ break;
+ case NFPROTO_IPV6:
+ ret = ipv6_tuple_to_nlattr(skb, tuple);
+ break;
+ }
nla_nest_end(skb, nest_parms);
@@ -106,13 +128,11 @@ nla_put_failure:
static int ctnetlink_dump_tuples(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
- const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
int ret;
rcu_read_lock();
- l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
- ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto);
+ ret = ctnetlink_dump_tuples_ip(skb, tuple);
if (ret >= 0) {
l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
@@ -556,15 +576,20 @@ nla_put_failure:
return -1;
}
+static const struct nla_policy cta_ip_nla_policy[CTA_IP_MAX + 1] = {
+ [CTA_IP_V4_SRC] = { .type = NLA_U32 },
+ [CTA_IP_V4_DST] = { .type = NLA_U32 },
+ [CTA_IP_V6_SRC] = { .len = sizeof(__be32) * 4 },
+ [CTA_IP_V6_DST] = { .len = sizeof(__be32) * 4 },
+};
+
#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
static size_t ctnetlink_proto_size(const struct nf_conn *ct)
{
- const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
size_t len, len4 = 0;
- l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
- len = l3proto->nla_size;
+ len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
len *= 3u; /* ORIG, REPLY, MASTER */
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -936,29 +961,54 @@ out:
return skb->len;
}
+static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *t)
+{
+ if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
+ return -EINVAL;
+
+ t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
+ t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
+
+ return 0;
+}
+
+static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *t)
+{
+ if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
+ return -EINVAL;
+
+ t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
+ t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
+
+ return 0;
+}
+
static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
struct nf_conntrack_tuple *tuple)
{
struct nlattr *tb[CTA_IP_MAX+1];
- struct nf_conntrack_l3proto *l3proto;
int ret = 0;
ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL);
if (ret < 0)
return ret;
- rcu_read_lock();
- l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
+ ret = nla_validate_nested(attr, CTA_IP_MAX,
+ cta_ip_nla_policy, NULL);
+ if (ret)
+ return ret;
- if (likely(l3proto->nlattr_to_tuple)) {
- ret = nla_validate_nested(attr, CTA_IP_MAX,
- l3proto->nla_policy, NULL);
- if (ret == 0)
- ret = l3proto->nlattr_to_tuple(tb, tuple);
+ switch (tuple->src.l3num) {
+ case NFPROTO_IPV4:
+ ret = ipv4_nlattr_to_tuple(tb, tuple);
+ break;
+ case NFPROTO_IPV6:
+ ret = ipv6_nlattr_to_tuple(tb, tuple);
+ break;
}
- rcu_read_unlock();
-
return ret;
}
@@ -1897,7 +1947,7 @@ ctnetlink_create_conntrack(struct net *net,
} else {
struct nf_conn_help *help;
- help = nf_ct_helper_ext_add(ct, helper, GFP_ATOMIC);
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help == NULL) {
err = -ENOMEM;
goto err2;
@@ -2581,7 +2631,6 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple_mask *mask)
{
- const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple m;
struct nlattr *nest_parms;
@@ -2597,8 +2646,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
goto nla_put_failure;
rcu_read_lock();
- l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
- ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto);
+ ret = ctnetlink_dump_tuples_ip(skb, &m);
if (ret >= 0) {
l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
tuple->dst.protonum);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index d88841fbc560..803607a90102 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -1,14 +1,4 @@
-/* L3/L4 protocol support for nf_conntrack. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
- * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/netfilter.h>
@@ -24,14 +14,36 @@
#include <linux/netdevice.h>
#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_log.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+extern unsigned int nf_conntrack_net_id;
+
static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
-struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_l3protos);
static DEFINE_MUTEX(nf_ct_proto_mutex);
@@ -122,137 +134,6 @@ __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
}
EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
-/* this is guaranteed to always return a valid protocol helper, since
- * it falls back to generic_protocol */
-const struct nf_conntrack_l3proto *
-nf_ct_l3proto_find_get(u_int16_t l3proto)
-{
- struct nf_conntrack_l3proto *p;
-
- rcu_read_lock();
- p = __nf_ct_l3proto_find(l3proto);
- if (!try_module_get(p->me))
- p = &nf_conntrack_l3proto_generic;
- rcu_read_unlock();
-
- return p;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
-
-int
-nf_ct_l3proto_try_module_get(unsigned short l3proto)
-{
- const struct nf_conntrack_l3proto *p;
- int ret;
-
-retry: p = nf_ct_l3proto_find_get(l3proto);
- if (p == &nf_conntrack_l3proto_generic) {
- ret = request_module("nf_conntrack-%d", l3proto);
- if (!ret)
- goto retry;
-
- return -EPROTOTYPE;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get);
-
-void nf_ct_l3proto_module_put(unsigned short l3proto)
-{
- struct nf_conntrack_l3proto *p;
-
- /* rcu_read_lock not necessary since the caller holds a reference, but
- * taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find()
- */
- rcu_read_lock();
- p = __nf_ct_l3proto_find(l3proto);
- module_put(p->me);
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
-
-static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
-{
- const struct nf_conntrack_l3proto *l3proto;
- int ret;
-
- might_sleep();
-
- ret = nf_ct_l3proto_try_module_get(nfproto);
- if (ret < 0)
- return ret;
-
- /* we already have a reference, can't fail */
- rcu_read_lock();
- l3proto = __nf_ct_l3proto_find(nfproto);
- rcu_read_unlock();
-
- if (!l3proto->net_ns_get)
- return 0;
-
- ret = l3proto->net_ns_get(net);
- if (ret < 0)
- nf_ct_l3proto_module_put(nfproto);
-
- return ret;
-}
-
-int nf_ct_netns_get(struct net *net, u8 nfproto)
-{
- int err;
-
- if (nfproto == NFPROTO_INET) {
- err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
- if (err < 0)
- goto err1;
- err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
- if (err < 0)
- goto err2;
- } else {
- err = nf_ct_netns_do_get(net, nfproto);
- if (err < 0)
- goto err1;
- }
- return 0;
-
-err2:
- nf_ct_netns_put(net, NFPROTO_IPV4);
-err1:
- return err;
-}
-EXPORT_SYMBOL_GPL(nf_ct_netns_get);
-
-static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
-{
- const struct nf_conntrack_l3proto *l3proto;
-
- might_sleep();
-
- /* same as nf_conntrack_netns_get(), reference assumed */
- rcu_read_lock();
- l3proto = __nf_ct_l3proto_find(nfproto);
- rcu_read_unlock();
-
- if (WARN_ON(!l3proto))
- return;
-
- if (l3proto->net_ns_put)
- l3proto->net_ns_put(net);
-
- nf_ct_l3proto_module_put(nfproto);
-}
-
-void nf_ct_netns_put(struct net *net, uint8_t nfproto)
-{
- if (nfproto == NFPROTO_INET) {
- nf_ct_netns_do_put(net, NFPROTO_IPV4);
- nf_ct_netns_do_put(net, NFPROTO_IPV6);
- } else
- nf_ct_netns_do_put(net, nfproto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_netns_put);
-
const struct nf_conntrack_l4proto *
nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
{
@@ -274,11 +155,6 @@ void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
-static int kill_l3proto(struct nf_conn *i, void *data)
-{
- return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
-}
-
static int kill_l4proto(struct nf_conn *i, void *data)
{
const struct nf_conntrack_l4proto *l4proto;
@@ -287,52 +163,6 @@ static int kill_l4proto(struct nf_conn *i, void *data)
nf_ct_l3num(i) == l4proto->l3proto;
}
-int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
-{
- int ret = 0;
- struct nf_conntrack_l3proto *old;
-
- if (proto->l3proto >= NFPROTO_NUMPROTO)
- return -EBUSY;
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- if (proto->tuple_to_nlattr && proto->nla_size == 0)
- return -EINVAL;
-#endif
- mutex_lock(&nf_ct_proto_mutex);
- old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
- lockdep_is_held(&nf_ct_proto_mutex));
- if (old != &nf_conntrack_l3proto_generic) {
- ret = -EBUSY;
- goto out_unlock;
- }
-
- rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
-
-out_unlock:
- mutex_unlock(&nf_ct_proto_mutex);
- return ret;
-
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
-
-void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
-{
- BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
-
- mutex_lock(&nf_ct_proto_mutex);
- BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
- lockdep_is_held(&nf_ct_proto_mutex)
- ) != proto);
- rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
- &nf_conntrack_l3proto_generic);
- mutex_unlock(&nf_ct_proto_mutex);
-
- synchronize_rcu();
- /* Remove all contrack entries for this protocol */
- nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
-
static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
const struct nf_conntrack_l4proto *l4proto)
{
@@ -499,8 +329,23 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
-int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
- unsigned int num_proto)
+static void
+nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
+ unsigned int num_proto)
+{
+ mutex_lock(&nf_ct_proto_mutex);
+ while (num_proto-- != 0)
+ __nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+ mutex_unlock(&nf_ct_proto_mutex);
+
+ synchronize_net();
+ /* Remove all contrack entries for this protocol */
+ nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
+}
+
+static int
+nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
+ unsigned int num_proto)
{
int ret = -EINVAL, ver;
unsigned int i;
@@ -518,7 +363,6 @@ int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
}
return ret;
}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
int nf_ct_l4proto_pernet_register(struct net *net,
const struct nf_conntrack_l4proto *const l4proto[],
@@ -542,20 +386,6 @@ int nf_ct_l4proto_pernet_register(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
-void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
- unsigned int num_proto)
-{
- mutex_lock(&nf_ct_proto_mutex);
- while (num_proto-- != 0)
- __nf_ct_l4proto_unregister_one(l4proto[num_proto]);
- mutex_unlock(&nf_ct_proto_mutex);
-
- synchronize_net();
- /* Remove all contrack entries for this protocol */
- nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
-
void nf_ct_l4proto_pernet_unregister(struct net *net,
const struct nf_conntrack_l4proto *const l4proto[],
unsigned int num_proto)
@@ -565,6 +395,563 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
+static unsigned int ipv4_helper(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn_help *help;
+ const struct nf_conntrack_helper *helper;
+
+ /* This is where we call the helper: as the packet goes out. */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ return NF_ACCEPT;
+
+ help = nfct_help(ct);
+ if (!help)
+ return NF_ACCEPT;
+
+ /* rcu_read_lock()ed by nf_hook_thresh */
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ return NF_ACCEPT;
+
+ return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+ ct, ctinfo);
+}
+
+static unsigned int ipv4_confirm(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ goto out;
+
+ /* adjust seqs for loopback traffic only in outgoing direction */
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_is_loopback_packet(skb)) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
+ NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+ return NF_DROP;
+ }
+ }
+out:
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(skb);
+}
+
+static unsigned int ipv4_conntrack_in(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+}
+
+static unsigned int ipv4_conntrack_local(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *tmpl;
+
+ tmpl = nf_ct_get(skb, &ctinfo);
+ if (tmpl && nf_ct_is_template(tmpl)) {
+ /* when skipping ct, clear templates to avoid fooling
+ * later targets/matches
+ */
+ skb->_nfct = 0;
+ nf_ct_put(tmpl);
+ }
+ return NF_ACCEPT;
+ }
+
+ return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+ * make it the first hook.
+ */
+static const struct nf_hook_ops ipv4_conntrack_ops[] = {
+ {
+ .hook = ipv4_conntrack_in,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv4_conntrack_local,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv4_helper,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_HELPER,
+ },
+ {
+ .hook = ipv4_confirm,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+ {
+ .hook = ipv4_helper,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_HELPER,
+ },
+ {
+ .hook = ipv4_confirm,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+};
+
+/* Fast function for those who don't want to parse /proc (and I don't
+ * blame them).
+ * Reversing the socket's dst/src point of view gives us the reply
+ * mapping.
+ */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_tuple tuple;
+
+ memset(&tuple, 0, sizeof(tuple));
+
+ lock_sock(sk);
+ tuple.src.u3.ip = inet->inet_rcv_saddr;
+ tuple.src.u.tcp.port = inet->inet_sport;
+ tuple.dst.u3.ip = inet->inet_daddr;
+ tuple.dst.u.tcp.port = inet->inet_dport;
+ tuple.src.l3num = PF_INET;
+ tuple.dst.protonum = sk->sk_protocol;
+ release_sock(sk);
+
+ /* We only do TCP and SCTP at the moment: is there a better way? */
+ if (tuple.dst.protonum != IPPROTO_TCP &&
+ tuple.dst.protonum != IPPROTO_SCTP) {
+ pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
+ return -ENOPROTOOPT;
+ }
+
+ if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
+ pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
+ *len, sizeof(struct sockaddr_in));
+ return -EINVAL;
+ }
+
+ h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
+ if (h) {
+ struct sockaddr_in sin;
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.u.tcp.port;
+ sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.u3.ip;
+ memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+
+ pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
+ &sin.sin_addr.s_addr, ntohs(sin.sin_port));
+ nf_ct_put(ct);
+ if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+ return -EFAULT;
+ else
+ return 0;
+ }
+ pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
+ &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
+ &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
+ return -ENOENT;
+}
+
+static struct nf_sockopt_ops so_getorigdst = {
+ .pf = PF_INET,
+ .get_optmin = SO_ORIGINAL_DST,
+ .get_optmax = SO_ORIGINAL_DST + 1,
+ .get = getorigdst,
+ .owner = THIS_MODULE,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int
+ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+ struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
+ const struct ipv6_pinfo *inet6 = inet6_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct nf_conntrack_tuple_hash *h;
+ struct sockaddr_in6 sin6;
+ struct nf_conn *ct;
+ __be32 flow_label;
+ int bound_dev_if;
+
+ lock_sock(sk);
+ tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
+ tuple.src.u.tcp.port = inet->inet_sport;
+ tuple.dst.u3.in6 = sk->sk_v6_daddr;
+ tuple.dst.u.tcp.port = inet->inet_dport;
+ tuple.dst.protonum = sk->sk_protocol;
+ bound_dev_if = sk->sk_bound_dev_if;
+ flow_label = inet6->flow_label;
+ release_sock(sk);
+
+ if (tuple.dst.protonum != IPPROTO_TCP &&
+ tuple.dst.protonum != IPPROTO_SCTP)
+ return -ENOPROTOOPT;
+
+ if (*len < 0 || (unsigned int)*len < sizeof(sin6))
+ return -EINVAL;
+
+ h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
+ if (!h) {
+ pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
+ &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
+ &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+ return -ENOENT;
+ }
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+ sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
+ memcpy(&sin6.sin6_addr,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
+ sizeof(sin6.sin6_addr));
+
+ nf_ct_put(ct);
+ sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
+ return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
+}
+
+static struct nf_sockopt_ops so_getorigdst6 = {
+ .pf = NFPROTO_IPV6,
+ .get_optmin = IP6T_SO_ORIGINAL_DST,
+ .get_optmax = IP6T_SO_ORIGINAL_DST + 1,
+ .get = ipv6_getorigdst,
+ .owner = THIS_MODULE,
+};
+
+static unsigned int ipv6_confirm(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+ int protoff;
+ __be16 frag_off;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ goto out;
+
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+ &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("proto header not found\n");
+ goto out;
+ }
+
+ /* adjust seqs for loopback traffic only in outgoing direction */
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_is_loopback_packet(skb)) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
+ NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+ return NF_DROP;
+ }
+ }
+out:
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(skb);
+}
+
+static unsigned int ipv6_conntrack_in(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+}
+
+static unsigned int ipv6_conntrack_local(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+}
+
+static unsigned int ipv6_helper(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conn *ct;
+ const struct nf_conn_help *help;
+ const struct nf_conntrack_helper *helper;
+ enum ip_conntrack_info ctinfo;
+ __be16 frag_off;
+ int protoff;
+ u8 nexthdr;
+
+ /* This is where we call the helper: as the packet goes out. */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ return NF_ACCEPT;
+
+ help = nfct_help(ct);
+ if (!help)
+ return NF_ACCEPT;
+ /* rcu_read_lock()ed by nf_hook_thresh */
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ return NF_ACCEPT;
+
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("proto header not found\n");
+ return NF_ACCEPT;
+ }
+
+ return helper->help(skb, protoff, ct, ctinfo);
+}
+
+static const struct nf_hook_ops ipv6_conntrack_ops[] = {
+ {
+ .hook = ipv6_conntrack_in,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP6_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv6_conntrack_local,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP6_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv6_helper,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_CONNTRACK_HELPER,
+ },
+ {
+ .hook = ipv6_confirm,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_LAST,
+ },
+ {
+ .hook = ipv6_helper,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_CONNTRACK_HELPER,
+ },
+ {
+ .hook = ipv6_confirm,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_LAST - 1,
+ },
+};
+#endif
+
+static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
+{
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ int err = 0;
+
+ mutex_lock(&nf_ct_proto_mutex);
+
+ switch (nfproto) {
+ case NFPROTO_IPV4:
+ cnet->users4++;
+ if (cnet->users4 > 1)
+ goto out_unlock;
+ err = nf_defrag_ipv4_enable(net);
+ if (err) {
+ cnet->users4 = 0;
+ goto out_unlock;
+ }
+
+ err = nf_register_net_hooks(net, ipv4_conntrack_ops,
+ ARRAY_SIZE(ipv4_conntrack_ops));
+ if (err)
+ cnet->users4 = 0;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case NFPROTO_IPV6:
+ cnet->users6++;
+ if (cnet->users6 > 1)
+ goto out_unlock;
+ err = nf_defrag_ipv6_enable(net);
+ if (err < 0) {
+ cnet->users6 = 0;
+ goto out_unlock;
+ }
+
+ err = nf_register_net_hooks(net, ipv6_conntrack_ops,
+ ARRAY_SIZE(ipv6_conntrack_ops));
+ if (err)
+ cnet->users6 = 0;
+ break;
+#endif
+ default:
+ err = -EPROTO;
+ break;
+ }
+ out_unlock:
+ mutex_unlock(&nf_ct_proto_mutex);
+ return err;
+}
+
+static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
+{
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
+ mutex_lock(&nf_ct_proto_mutex);
+ switch (nfproto) {
+ case NFPROTO_IPV4:
+ if (cnet->users4 && (--cnet->users4 == 0))
+ nf_unregister_net_hooks(net, ipv4_conntrack_ops,
+ ARRAY_SIZE(ipv4_conntrack_ops));
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case NFPROTO_IPV6:
+ if (cnet->users6 && (--cnet->users6 == 0))
+ nf_unregister_net_hooks(net, ipv6_conntrack_ops,
+ ARRAY_SIZE(ipv6_conntrack_ops));
+ break;
+#endif
+ }
+
+ mutex_unlock(&nf_ct_proto_mutex);
+}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+ int err;
+
+ if (nfproto == NFPROTO_INET) {
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+ if (err < 0)
+ goto err1;
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+ if (err < 0)
+ goto err2;
+ } else {
+ err = nf_ct_netns_do_get(net, nfproto);
+ if (err < 0)
+ goto err1;
+ }
+ return 0;
+
+err2:
+ nf_ct_netns_put(net, NFPROTO_IPV4);
+err1:
+ return err;
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_get);
+
+void nf_ct_netns_put(struct net *net, uint8_t nfproto)
+{
+ if (nfproto == NFPROTO_INET) {
+ nf_ct_netns_do_put(net, NFPROTO_IPV4);
+ nf_ct_netns_do_put(net, NFPROTO_IPV6);
+ } else {
+ nf_ct_netns_do_put(net, nfproto);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_put);
+
+static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
+ &nf_conntrack_l4proto_tcp4,
+ &nf_conntrack_l4proto_udp4,
+ &nf_conntrack_l4proto_icmp,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ &nf_conntrack_l4proto_dccp4,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ &nf_conntrack_l4proto_sctp4,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+ &nf_conntrack_l4proto_udplite4,
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+ &nf_conntrack_l4proto_tcp6,
+ &nf_conntrack_l4proto_udp6,
+ &nf_conntrack_l4proto_icmpv6,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ &nf_conntrack_l4proto_dccp6,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ &nf_conntrack_l4proto_sctp6,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+ &nf_conntrack_l4proto_udplite6,
+#endif
+#endif /* CONFIG_IPV6 */
+};
+
+int nf_conntrack_proto_init(void)
+{
+ int ret = 0;
+
+ ret = nf_register_sockopt(&so_getorigdst);
+ if (ret < 0)
+ return ret;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ ret = nf_register_sockopt(&so_getorigdst6);
+ if (ret < 0)
+ goto cleanup_sockopt;
+#endif
+ ret = nf_ct_l4proto_register(builtin_l4proto,
+ ARRAY_SIZE(builtin_l4proto));
+ if (ret < 0)
+ goto cleanup_sockopt2;
+
+ return ret;
+cleanup_sockopt2:
+ nf_unregister_sockopt(&so_getorigdst);
+#if IS_ENABLED(CONFIG_IPV6)
+cleanup_sockopt:
+ nf_unregister_sockopt(&so_getorigdst6);
+#endif
+ return ret;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+ unsigned int i;
+
+ nf_ct_l4proto_unregister(builtin_l4proto,
+ ARRAY_SIZE(builtin_l4proto));
+ nf_unregister_sockopt(&so_getorigdst);
+#if IS_ENABLED(CONFIG_IPV6)
+ nf_unregister_sockopt(&so_getorigdst6);
+#endif
+
+ /* free l3proto protocol tables */
+ for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
+ kfree(nf_ct_protos[i]);
+}
+
int nf_conntrack_proto_pernet_init(struct net *net)
{
int err;
@@ -581,6 +968,14 @@ int nf_conntrack_proto_pernet_init(struct net *net)
if (err < 0)
return err;
+ err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
+ ARRAY_SIZE(builtin_l4proto));
+ if (err < 0) {
+ nf_ct_l4proto_unregister_sysctl(net, pn,
+ &nf_conntrack_l4proto_generic);
+ return err;
+ }
+
pn->users++;
return 0;
}
@@ -590,25 +985,19 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
struct nf_proto_net *pn = nf_ct_l4proto_net(net,
&nf_conntrack_l4proto_generic);
+ nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
+ ARRAY_SIZE(builtin_l4proto));
pn->users--;
nf_ct_l4proto_unregister_sysctl(net,
pn,
&nf_conntrack_l4proto_generic);
}
-int nf_conntrack_proto_init(void)
-{
- unsigned int i;
- for (i = 0; i < NFPROTO_NUMPROTO; i++)
- rcu_assign_pointer(nf_ct_l3protos[i],
- &nf_conntrack_l3proto_generic);
- return 0;
-}
-void nf_conntrack_proto_fini(void)
-{
- unsigned int i;
- /* free l3proto protocol tables */
- for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
- kfree(nf_ct_protos[i]);
-}
+module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
+ &nf_conntrack_htable_size, 0600);
+
+MODULE_ALIAS("ip_conntrack");
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index abe647d5b8c6..f476d116c816 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -23,6 +23,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_log.h>
/* Timeouts are based on values from RFC4340:
@@ -388,31 +389,8 @@ static inline struct nf_dccp_net *dccp_pernet(struct net *net)
return &net->ct.nf_ct_proto.dccp;
}
-static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct net *net, struct nf_conntrack_tuple *tuple)
-{
- struct dccp_hdr _hdr, *dh;
-
- /* Actually only need first 4 bytes to get ports. */
- dh = skb_header_pointer(skb, dataoff, 4, &_hdr);
- if (dh == NULL)
- return false;
-
- tuple->src.u.dccp.port = dh->dccph_sport;
- tuple->dst.u.dccp.port = dh->dccph_dport;
- return true;
-}
-
-static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv,
- const struct nf_conntrack_tuple *tuple)
-{
- inv->src.u.dccp.port = tuple->dst.u.dccp.port;
- inv->dst.u.dccp.port = tuple->src.u.dccp.port;
- return true;
-}
-
static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff, unsigned int *timeouts)
+ unsigned int dataoff)
{
struct net *net = nf_ct_net(ct);
struct nf_dccp_net *dn;
@@ -460,19 +438,14 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh)
ntohl(dhack->dccph_ack_nr_low);
}
-static unsigned int *dccp_get_timeouts(struct net *net)
-{
- return dccp_pernet(net)->dccp_timeout;
-}
-
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff, enum ip_conntrack_info ctinfo,
- unsigned int *timeouts)
+ unsigned int dataoff, enum ip_conntrack_info ctinfo)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
struct dccp_hdr _dh, *dh;
u_int8_t type, old_state, new_state;
enum ct_dccp_roles role;
+ unsigned int *timeouts;
dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
BUG_ON(dh == NULL);
@@ -546,6 +519,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
if (new_state != old_state)
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+ timeouts = nf_ct_timeout_lookup(ct);
+ if (!timeouts)
+ timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
return NF_ACCEPT;
@@ -864,11 +840,8 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
- .pkt_to_tuple = dccp_pkt_to_tuple,
- .invert_tuple = dccp_invert_tuple,
.new = dccp_new,
.packet = dccp_packet,
- .get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
@@ -900,11 +873,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
.l3proto = AF_INET6,
.l4proto = IPPROTO_DCCP,
- .pkt_to_tuple = dccp_pkt_to_tuple,
- .invert_tuple = dccp_invert_tuple,
.new = dccp_new,
.packet = dccp_packet,
- .get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 6c6896d21cd7..ac4a0b296dcd 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -11,6 +11,7 @@
#include <linux/timer.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
static const unsigned int nf_ct_generic_timeout = 600*HZ;
@@ -41,34 +42,24 @@ static bool generic_pkt_to_tuple(const struct sk_buff *skb,
return true;
}
-static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
-{
- tuple->src.u.all = 0;
- tuple->dst.u.all = 0;
-
- return true;
-}
-
-static unsigned int *generic_get_timeouts(struct net *net)
-{
- return &(generic_pernet(net)->timeout);
-}
-
/* Returns verdict for packet, or -1 for invalid. */
static int generic_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- unsigned int *timeout)
+ enum ip_conntrack_info ctinfo)
{
+ const unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+ if (!timeout)
+ timeout = &generic_pernet(nf_ct_net(ct))->timeout;
+
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff, unsigned int *timeouts)
+ unsigned int dataoff)
{
bool ret;
@@ -87,8 +78,11 @@ static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
- unsigned int *timeout = data;
struct nf_generic_net *gn = generic_pernet(net);
+ unsigned int *timeout = data;
+
+ if (!timeout)
+ timeout = &gn->timeout;
if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT])
*timeout =
@@ -168,9 +162,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
.l3proto = PF_UNSPEC,
.l4proto = 255,
.pkt_to_tuple = generic_pkt_to_tuple,
- .invert_tuple = generic_invert_tuple,
.packet = generic_packet,
- .get_timeouts = generic_get_timeouts,
.new = generic_new,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
.ctnl_timeout = {
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index d049ea5a3770..d1632252bf5b 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -39,6 +39,7 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>
@@ -179,15 +180,6 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
-/* invert gre part of tuple */
-static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
-{
- tuple->dst.u.gre.key = orig->src.u.gre.key;
- tuple->src.u.gre.key = orig->dst.u.gre.key;
- return true;
-}
-
/* gre hdr info to tuple */
static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
@@ -243,8 +235,7 @@ static unsigned int *gre_get_timeouts(struct net *net)
static int gre_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- unsigned int *timeouts)
+ enum ip_conntrack_info ctinfo)
{
/* If we've seen traffic both ways, this is a GRE connection.
* Extend timeout. */
@@ -263,8 +254,13 @@ static int gre_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff, unsigned int *timeouts)
+ unsigned int dataoff)
{
+ unsigned int *timeouts = nf_ct_timeout_lookup(ct);
+
+ if (!timeouts)
+ timeouts = gre_get_timeouts(nf_ct_net(ct));
+
pr_debug(": ");
nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
@@ -300,6 +296,8 @@ static int gre_timeout_nlattr_to_obj(struct nlattr *tb[],
unsigned int *timeouts = data;
struct netns_proto_gre *net_gre = gre_pernet(net);
+ if (!timeouts)
+ timeouts = gre_get_timeouts(net);
/* set default timeouts for GRE. */
timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED];
timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED];
@@ -356,11 +354,9 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_GRE,
.pkt_to_tuple = gre_pkt_to_tuple,
- .invert_tuple = gre_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
#endif
- .get_timeouts = gre_get_timeouts,
.packet = gre_packet,
.new = gre_new,
.destroy = gre_destroy,
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
new file mode 100644
index 000000000000..036670b38282
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -0,0 +1,388 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_log.h>
+
+static const unsigned int nf_ct_icmp_timeout = 30*HZ;
+
+static inline struct nf_icmp_net *icmp_pernet(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmp;
+}
+
+static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct net *net, struct nf_conntrack_tuple *tuple)
+{
+ const struct icmphdr *hp;
+ struct icmphdr _hdr;
+
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return false;
+
+ tuple->dst.u.icmp.type = hp->type;
+ tuple->src.u.icmp.id = hp->un.echo.id;
+ tuple->dst.u.icmp.code = hp->code;
+
+ return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+ [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+ [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+ [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+ [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+ [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+ [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+ [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+ [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
+static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ if (orig->dst.u.icmp.type >= sizeof(invmap) ||
+ !invmap[orig->dst.u.icmp.type])
+ return false;
+
+ tuple->src.u.icmp.id = orig->src.u.icmp.id;
+ tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+ tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+ return true;
+}
+
+static unsigned int *icmp_get_timeouts(struct net *net)
+{
+ return &icmp_pernet(net)->timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmp_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo)
+{
+ /* Do not immediately delete the connection after the first
+ successful reply to avoid excessive conntrackd traffic
+ and also to handle correctly ICMP echo reply duplicates. */
+ unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+ if (!timeout)
+ timeout = icmp_get_timeouts(nf_ct_net(ct));
+
+ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ static const u_int8_t valid_new[] = {
+ [ICMP_ECHO] = 1,
+ [ICMP_TIMESTAMP] = 1,
+ [ICMP_INFO_REQUEST] = 1,
+ [ICMP_ADDRESS] = 1
+ };
+
+ if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
+ !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
+ /* Can't create a new ICMP `conn' with this. */
+ pr_debug("icmp: can't create new conn with type %u\n",
+ ct->tuplehash[0].tuple.dst.u.icmp.type);
+ nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
+ return false;
+ }
+ return true;
+}
+
+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
+static int
+icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
+ unsigned int hooknum)
+{
+ struct nf_conntrack_tuple innertuple, origtuple;
+ const struct nf_conntrack_l4proto *innerproto;
+ const struct nf_conntrack_tuple_hash *h;
+ const struct nf_conntrack_zone *zone;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conntrack_zone tmp;
+
+ WARN_ON(skb_nfct(skb));
+ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
+
+ /* Are they talking about one of our connections? */
+ if (!nf_ct_get_tuplepr(skb,
+ skb_network_offset(skb) + ip_hdrlen(skb)
+ + sizeof(struct icmphdr),
+ PF_INET, net, &origtuple)) {
+ pr_debug("icmp_error_message: failed to get tuple\n");
+ return -NF_ACCEPT;
+ }
+
+ /* rcu_read_lock()ed by nf_hook_thresh */
+ innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
+
+ /* Ordinarily, we'd expect the inverted tupleproto, but it's
+ been preserved inside the ICMP. */
+ if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
+ pr_debug("icmp_error_message: no match\n");
+ return -NF_ACCEPT;
+ }
+
+ ctinfo = IP_CT_RELATED;
+
+ h = nf_conntrack_find_get(net, zone, &innertuple);
+ if (!h) {
+ pr_debug("icmp_error_message: no match\n");
+ return -NF_ACCEPT;
+ }
+
+ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+ ctinfo += IP_CT_IS_REPLY;
+
+ /* Update skb to refer to this connection */
+ nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+ return NF_ACCEPT;
+}
+
+static void icmp_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct net *net, struct nf_conn *tmpl,
+ struct sk_buff *skb, unsigned int dataoff,
+ u8 pf, unsigned int hooknum)
+{
+ const struct icmphdr *icmph;
+ struct icmphdr _ih;
+
+ /* Not enough header? */
+ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+ if (icmph == NULL) {
+ icmp_error_log(skb, net, pf, "short packet");
+ return -NF_ACCEPT;
+ }
+
+ /* See ip_conntrack_proto_tcp.c */
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ nf_ip_checksum(skb, hooknum, dataoff, 0)) {
+ icmp_error_log(skb, net, pf, "bad hw icmp checksum");
+ return -NF_ACCEPT;
+ }
+
+ /*
+ * 18 is the highest 'known' ICMP type. Anything else is a mystery
+ *
+ * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
+ * discarded.
+ */
+ if (icmph->type > NR_ICMP_TYPES) {
+ icmp_error_log(skb, net, pf, "invalid icmp type");
+ return -NF_ACCEPT;
+ }
+
+ /* Need to track icmp error message? */
+ if (icmph->type != ICMP_DEST_UNREACH &&
+ icmph->type != ICMP_SOURCE_QUENCH &&
+ icmph->type != ICMP_TIME_EXCEEDED &&
+ icmph->type != ICMP_PARAMETERPROB &&
+ icmph->type != ICMP_REDIRECT)
+ return NF_ACCEPT;
+
+ return icmp_error_message(net, tmpl, skb, hooknum);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *t)
+{
+ if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
+ nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
+ nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
+ [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 },
+};
+
+static int icmp_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *tuple)
+{
+ if (!tb[CTA_PROTO_ICMP_TYPE] ||
+ !tb[CTA_PROTO_ICMP_CODE] ||
+ !tb[CTA_PROTO_ICMP_ID])
+ return -EINVAL;
+
+ tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
+ tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
+ tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
+
+ if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
+ !invmap[tuple->dst.u.icmp.type])
+ return -EINVAL;
+
+ return 0;
+}
+
+static unsigned int icmp_nlattr_tuple_size(void)
+{
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
+ struct net *net, void *data)
+{
+ unsigned int *timeout = data;
+ struct nf_icmp_net *in = icmp_pernet(net);
+
+ if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
+ if (!timeout)
+ timeout = &in->timeout;
+ *timeout =
+ ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
+ } else if (timeout) {
+ /* Set default ICMP timeout. */
+ *timeout = in->timeout;
+ }
+ return 0;
+}
+
+static int
+icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+ const unsigned int *timeout = data;
+
+ if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static const struct nla_policy
+icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
+ [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table icmp_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_icmp_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+#endif /* CONFIG_SYSCTL */
+
+static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+ pn->ctl_table = kmemdup(icmp_sysctl_table,
+ sizeof(icmp_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+
+ pn->ctl_table[0].data = &in->timeout;
+#endif
+ return 0;
+}
+
+static int icmp_init_net(struct net *net, u_int16_t proto)
+{
+ struct nf_icmp_net *in = icmp_pernet(net);
+ struct nf_proto_net *pn = &in->pn;
+
+ in->timeout = nf_ct_icmp_timeout;
+
+ return icmp_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmp_get_net_proto(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmp.pn;
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
+{
+ .l3proto = PF_INET,
+ .l4proto = IPPROTO_ICMP,
+ .pkt_to_tuple = icmp_pkt_to_tuple,
+ .invert_tuple = icmp_invert_tuple,
+ .packet = icmp_packet,
+ .new = icmp_new,
+ .error = icmp_error,
+ .destroy = NULL,
+ .me = NULL,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .tuple_to_nlattr = icmp_tuple_to_nlattr,
+ .nlattr_tuple_size = icmp_nlattr_tuple_size,
+ .nlattr_to_tuple = icmp_nlattr_to_tuple,
+ .nla_policy = icmp_nla_policy,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+ .ctnl_timeout = {
+ .nlattr_to_obj = icmp_timeout_nlattr_to_obj,
+ .obj_to_nlattr = icmp_timeout_obj_to_nlattr,
+ .nlattr_max = CTA_TIMEOUT_ICMP_MAX,
+ .obj_size = sizeof(unsigned int),
+ .nla_policy = icmp_timeout_nla_policy,
+ },
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+ .init_net = icmp_init_net,
+ .get_net_proto = icmp_get_net_proto,
+};
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
new file mode 100644
index 000000000000..bed07b998a10
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
+#include <net/netfilter/nf_log.h>
+
+static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
+
+static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmpv6;
+}
+
+static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct net *net,
+ struct nf_conntrack_tuple *tuple)
+{
+ const struct icmp6hdr *hp;
+ struct icmp6hdr _hdr;
+
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return false;
+ tuple->dst.u.icmp.type = hp->icmp6_type;
+ tuple->src.u.icmp.id = hp->icmp6_identifier;
+ tuple->dst.u.icmp.code = hp->icmp6_code;
+
+ return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+ [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
+ [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
+ [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1,
+ [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY + 1
+};
+
+static const u_int8_t noct_valid_new[] = {
+ [ICMPV6_MGM_QUERY - 130] = 1,
+ [ICMPV6_MGM_REPORT - 130] = 1,
+ [ICMPV6_MGM_REDUCTION - 130] = 1,
+ [NDISC_ROUTER_SOLICITATION - 130] = 1,
+ [NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
+ [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
+ [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
+ [ICMPV6_MLD2_REPORT - 130] = 1
+};
+
+static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ int type = orig->dst.u.icmp.type - 128;
+ if (type < 0 || type >= sizeof(invmap) || !invmap[type])
+ return false;
+
+ tuple->src.u.icmp.id = orig->src.u.icmp.id;
+ tuple->dst.u.icmp.type = invmap[type] - 1;
+ tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+ return true;
+}
+
+static unsigned int *icmpv6_get_timeouts(struct net *net)
+{
+ return &icmpv6_pernet(net)->timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmpv6_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo)
+{
+ unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+ if (!timeout)
+ timeout = icmpv6_get_timeouts(nf_ct_net(ct));
+
+ /* Do not immediately delete the connection after the first
+ successful reply to avoid excessive conntrackd traffic
+ and also to handle correctly ICMP echo reply duplicates. */
+ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
+{
+ static const u_int8_t valid_new[] = {
+ [ICMPV6_ECHO_REQUEST - 128] = 1,
+ [ICMPV6_NI_QUERY - 128] = 1
+ };
+ int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
+
+ if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
+ /* Can't create a new ICMPv6 `conn' with this. */
+ pr_debug("icmpv6: can't create new conn with type %u\n",
+ type + 128);
+ nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+ return false;
+ }
+ return true;
+}
+
+static int
+icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
+ struct sk_buff *skb,
+ unsigned int icmp6off)
+{
+ struct nf_conntrack_tuple intuple, origtuple;
+ const struct nf_conntrack_tuple_hash *h;
+ const struct nf_conntrack_l4proto *inproto;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conntrack_zone tmp;
+
+ WARN_ON(skb_nfct(skb));
+
+ /* Are they talking about one of our connections? */
+ if (!nf_ct_get_tuplepr(skb,
+ skb_network_offset(skb)
+ + sizeof(struct ipv6hdr)
+ + sizeof(struct icmp6hdr),
+ PF_INET6, net, &origtuple)) {
+ pr_debug("icmpv6_error: Can't get tuple\n");
+ return -NF_ACCEPT;
+ }
+
+ /* rcu_read_lock()ed by nf_hook_thresh */
+ inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
+
+ /* Ordinarily, we'd expect the inverted tupleproto, but it's
+ been preserved inside the ICMP. */
+ if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
+ pr_debug("icmpv6_error: Can't invert tuple\n");
+ return -NF_ACCEPT;
+ }
+
+ ctinfo = IP_CT_RELATED;
+
+ h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
+ &intuple);
+ if (!h) {
+ pr_debug("icmpv6_error: no match\n");
+ return -NF_ACCEPT;
+ } else {
+ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+ ctinfo += IP_CT_IS_REPLY;
+ }
+
+ /* Update skb to refer to this connection */
+ nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+ return NF_ACCEPT;
+}
+
+static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
+}
+
+static int
+icmpv6_error(struct net *net, struct nf_conn *tmpl,
+ struct sk_buff *skb, unsigned int dataoff,
+ u8 pf, unsigned int hooknum)
+{
+ const struct icmp6hdr *icmp6h;
+ struct icmp6hdr _ih;
+ int type;
+
+ icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
+ if (icmp6h == NULL) {
+ icmpv6_error_log(skb, net, pf, "short packet");
+ return -NF_ACCEPT;
+ }
+
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
+ icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
+ return -NF_ACCEPT;
+ }
+
+ type = icmp6h->icmp6_type - 130;
+ if (type >= 0 && type < sizeof(noct_valid_new) &&
+ noct_valid_new[type]) {
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+ return NF_ACCEPT;
+ }
+
+ /* is not error message ? */
+ if (icmp6h->icmp6_type >= 128)
+ return NF_ACCEPT;
+
+ return icmpv6_error_message(net, tmpl, skb, dataoff);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *t)
+{
+ if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) ||
+ nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) ||
+ nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
+ [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMPV6_ID] = { .type = NLA_U16 },
+};
+
+static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *tuple)
+{
+ if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
+ !tb[CTA_PROTO_ICMPV6_CODE] ||
+ !tb[CTA_PROTO_ICMPV6_ID])
+ return -EINVAL;
+
+ tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
+ tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
+ tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
+
+ if (tuple->dst.u.icmp.type < 128 ||
+ tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
+ !invmap[tuple->dst.u.icmp.type - 128])
+ return -EINVAL;
+
+ return 0;
+}
+
+static unsigned int icmpv6_nlattr_tuple_size(void)
+{
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
+ struct net *net, void *data)
+{
+ unsigned int *timeout = data;
+ struct nf_icmp_net *in = icmpv6_pernet(net);
+
+ if (!timeout)
+ timeout = icmpv6_get_timeouts(net);
+ if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
+ *timeout =
+ ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
+ } else {
+ /* Set default ICMPv6 timeout. */
+ *timeout = in->timeout;
+ }
+ return 0;
+}
+
+static int
+icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+ const unsigned int *timeout = data;
+
+ if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static const struct nla_policy
+icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
+ [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table icmpv6_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_icmpv6_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+#endif /* CONFIG_SYSCTL */
+
+static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+ pn->ctl_table = kmemdup(icmpv6_sysctl_table,
+ sizeof(icmpv6_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+
+ pn->ctl_table[0].data = &in->timeout;
+#endif
+ return 0;
+}
+
+static int icmpv6_init_net(struct net *net, u_int16_t proto)
+{
+ struct nf_icmp_net *in = icmpv6_pernet(net);
+ struct nf_proto_net *pn = &in->pn;
+
+ in->timeout = nf_ct_icmpv6_timeout;
+
+ return icmpv6_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmpv6.pn;
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
+{
+ .l3proto = PF_INET6,
+ .l4proto = IPPROTO_ICMPV6,
+ .pkt_to_tuple = icmpv6_pkt_to_tuple,
+ .invert_tuple = icmpv6_invert_tuple,
+ .packet = icmpv6_packet,
+ .new = icmpv6_new,
+ .error = icmpv6_error,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .tuple_to_nlattr = icmpv6_tuple_to_nlattr,
+ .nlattr_tuple_size = icmpv6_nlattr_tuple_size,
+ .nlattr_to_tuple = icmpv6_nlattr_to_tuple,
+ .nla_policy = icmpv6_nla_policy,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+ .ctnl_timeout = {
+ .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj,
+ .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr,
+ .nlattr_max = CTA_TIMEOUT_ICMP_MAX,
+ .obj_size = sizeof(unsigned int),
+ .nla_policy = icmpv6_timeout_nla_policy,
+ },
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+ .init_net = icmpv6_init_net,
+ .get_net_proto = icmpv6_get_net_proto,
+};
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index fb9a35d16069..8d1e085fc14a 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -28,6 +28,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR
@@ -150,30 +151,6 @@ static inline struct nf_sctp_net *sctp_pernet(struct net *net)
return &net->ct.nf_ct_proto.sctp;
}
-static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct net *net, struct nf_conntrack_tuple *tuple)
-{
- const struct sctphdr *hp;
- struct sctphdr _hdr;
-
- /* Actually only need first 4 bytes to get ports. */
- hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
- if (hp == NULL)
- return false;
-
- tuple->src.u.sctp.port = hp->source;
- tuple->dst.u.sctp.port = hp->dest;
- return true;
-}
-
-static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
-{
- tuple->src.u.sctp.port = orig->dst.u.sctp.port;
- tuple->dst.u.sctp.port = orig->src.u.sctp.port;
- return true;
-}
-
#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -296,17 +273,11 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
return sctp_conntracks[dir][i][cur_state];
}
-static unsigned int *sctp_get_timeouts(struct net *net)
-{
- return sctp_pernet(net)->timeouts;
-}
-
/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
static int sctp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- unsigned int *timeouts)
+ enum ip_conntrack_info ctinfo)
{
enum sctp_conntrack new_state, old_state;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -315,6 +286,7 @@ static int sctp_packet(struct nf_conn *ct,
const struct sctp_chunkhdr *sch;
struct sctp_chunkhdr _sch;
u_int32_t offset, count;
+ unsigned int *timeouts;
unsigned long map[256 / sizeof(unsigned long)] = { 0 };
sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
@@ -403,6 +375,10 @@ static int sctp_packet(struct nf_conn *ct,
}
spin_unlock_bh(&ct->lock);
+ timeouts = nf_ct_timeout_lookup(ct);
+ if (!timeouts)
+ timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
+
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
@@ -423,7 +399,7 @@ out:
/* Called when a new connection for this protocol found. */
static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff, unsigned int *timeouts)
+ unsigned int dataoff)
{
enum sctp_conntrack new_state;
const struct sctphdr *sh;
@@ -780,13 +756,10 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
- .pkt_to_tuple = sctp_pkt_to_tuple,
- .invert_tuple = sctp_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
#endif
.packet = sctp_packet,
- .get_timeouts = sctp_get_timeouts,
.new = sctp_new,
.error = sctp_error,
.can_early_drop = sctp_can_early_drop,
@@ -817,13 +790,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
.l3proto = PF_INET6,
.l4proto = IPPROTO_SCTP,
- .pkt_to_tuple = sctp_pkt_to_tuple,
- .invert_tuple = sctp_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
#endif
.packet = sctp_packet,
- .get_timeouts = sctp_get_timeouts,
.new = sctp_new,
.error = sctp_error,
.can_early_drop = sctp_can_early_drop,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 8e67910185a0..d80d322b9d8b 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -29,6 +29,7 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_log.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -276,31 +277,6 @@ static inline struct nf_tcp_net *tcp_pernet(struct net *net)
return &net->ct.nf_ct_proto.tcp;
}
-static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct net *net, struct nf_conntrack_tuple *tuple)
-{
- const struct tcphdr *hp;
- struct tcphdr _hdr;
-
- /* Actually only need first 4 bytes to get ports. */
- hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
- if (hp == NULL)
- return false;
-
- tuple->src.u.tcp.port = hp->source;
- tuple->dst.u.tcp.port = hp->dest;
-
- return true;
-}
-
-static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
-{
- tuple->src.u.tcp.port = orig->dst.u.tcp.port;
- tuple->dst.u.tcp.port = orig->src.u.tcp.port;
- return true;
-}
-
#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -793,27 +769,21 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
return NF_ACCEPT;
}
-static unsigned int *tcp_get_timeouts(struct net *net)
-{
- return tcp_pernet(net)->timeouts;
-}
-
/* Returns verdict for packet, or -1 for invalid. */
static int tcp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- unsigned int *timeouts)
+ enum ip_conntrack_info ctinfo)
{
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = tcp_pernet(net);
struct nf_conntrack_tuple *tuple;
enum tcp_conntrack new_state, old_state;
+ unsigned int index, *timeouts;
enum ip_conntrack_dir dir;
const struct tcphdr *th;
struct tcphdr _tcph;
unsigned long timeout;
- unsigned int index;
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
BUG_ON(th == NULL);
@@ -1046,6 +1016,10 @@ static int tcp_packet(struct nf_conn *ct,
&& new_state == TCP_CONNTRACK_FIN_WAIT)
ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
+ timeouts = nf_ct_timeout_lookup(ct);
+ if (!timeouts)
+ timeouts = tn->timeouts;
+
if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
timeout = timeouts[TCP_CONNTRACK_RETRANS];
@@ -1095,7 +1069,7 @@ static int tcp_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff, unsigned int *timeouts)
+ unsigned int dataoff)
{
enum tcp_conntrack new_state;
const struct tcphdr *th;
@@ -1313,10 +1287,12 @@ static unsigned int tcp_nlattr_tuple_size(void)
static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
- unsigned int *timeouts = data;
struct nf_tcp_net *tn = tcp_pernet(net);
+ unsigned int *timeouts = data;
int i;
+ if (!timeouts)
+ timeouts = tn->timeouts;
/* set default TCP timeouts. */
for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
timeouts[i] = tn->timeouts[i];
@@ -1559,13 +1535,10 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
- .pkt_to_tuple = tcp_pkt_to_tuple,
- .invert_tuple = tcp_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
#endif
.packet = tcp_packet,
- .get_timeouts = tcp_get_timeouts,
.new = tcp_new,
.error = tcp_error,
.can_early_drop = tcp_can_early_drop,
@@ -1597,13 +1570,10 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_TCP,
- .pkt_to_tuple = tcp_pkt_to_tuple,
- .invert_tuple = tcp_invert_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
#endif
.packet = tcp_packet,
- .get_timeouts = tcp_get_timeouts,
.new = tcp_new,
.error = tcp_error,
.can_early_drop = tcp_can_early_drop,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index fe7243970aa4..7a1b8988a931 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -22,6 +22,7 @@
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_log.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -36,33 +37,6 @@ static inline struct nf_udp_net *udp_pernet(struct net *net)
return &net->ct.nf_ct_proto.udp;
}
-static bool udp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct net *net,
- struct nf_conntrack_tuple *tuple)
-{
- const struct udphdr *hp;
- struct udphdr _hdr;
-
- /* Actually only need first 4 bytes to get ports. */
- hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
- if (hp == NULL)
- return false;
-
- tuple->src.u.udp.port = hp->source;
- tuple->dst.u.udp.port = hp->dest;
-
- return true;
-}
-
-static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
-{
- tuple->src.u.udp.port = orig->dst.u.udp.port;
- tuple->dst.u.udp.port = orig->src.u.udp.port;
- return true;
-}
-
static unsigned int *udp_get_timeouts(struct net *net)
{
return udp_pernet(net)->timeouts;
@@ -72,9 +46,14 @@ static unsigned int *udp_get_timeouts(struct net *net)
static int udp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- unsigned int *timeouts)
+ enum ip_conntrack_info ctinfo)
{
+ unsigned int *timeouts;
+
+ timeouts = nf_ct_timeout_lookup(ct);
+ if (!timeouts)
+ timeouts = udp_get_timeouts(nf_ct_net(ct));
+
/* If we've seen traffic both ways, this is some kind of UDP
stream. Extend timeout. */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
@@ -92,7 +71,7 @@ static int udp_packet(struct nf_conn *ct,
/* Called when a new connection for this protocol found. */
static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff, unsigned int *timeouts)
+ unsigned int dataoff)
{
return true;
}
@@ -203,6 +182,9 @@ static int udp_timeout_nlattr_to_obj(struct nlattr *tb[],
unsigned int *timeouts = data;
struct nf_udp_net *un = udp_pernet(net);
+ if (!timeouts)
+ timeouts = un->timeouts;
+
/* set default timeouts for UDP. */
timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED];
timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED];
@@ -301,10 +283,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
.l3proto = PF_INET,
.l4proto = IPPROTO_UDP,
.allow_clash = true,
- .pkt_to_tuple = udp_pkt_to_tuple,
- .invert_tuple = udp_invert_tuple,
.packet = udp_packet,
- .get_timeouts = udp_get_timeouts,
.new = udp_new,
.error = udp_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -333,10 +312,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
.l3proto = PF_INET,
.l4proto = IPPROTO_UDPLITE,
.allow_clash = true,
- .pkt_to_tuple = udp_pkt_to_tuple,
- .invert_tuple = udp_invert_tuple,
.packet = udp_packet,
- .get_timeouts = udp_get_timeouts,
.new = udp_new,
.error = udplite_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -365,10 +341,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDP,
.allow_clash = true,
- .pkt_to_tuple = udp_pkt_to_tuple,
- .invert_tuple = udp_invert_tuple,
.packet = udp_packet,
- .get_timeouts = udp_get_timeouts,
.new = udp_new,
.error = udp_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -397,10 +370,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
.allow_clash = true,
- .pkt_to_tuple = udp_pkt_to_tuple,
- .invert_tuple = udp_invert_tuple,
.packet = udp_packet,
- .get_timeouts = udp_get_timeouts,
.new = udp_new,
.error = udplite_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -423,3 +393,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
#endif
+#include <net/netfilter/nf_conntrack_timeout.h>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b642c0b2495c..13279f683da9 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,12 +1,4 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/netfilter.h>
#include <linux/slab.h>
@@ -24,7 +16,6 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
@@ -33,15 +24,14 @@
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <linux/rculist_nulls.h>
-MODULE_LICENSE("GPL");
+unsigned int nf_conntrack_net_id __read_mostly;
#ifdef CONFIG_NF_CONNTRACK_PROCFS
void
print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
- switch (l3proto->l3proto) {
+ switch (tuple->src.l3num) {
case NFPROTO_IPV4:
seq_printf(s, "src=%pI4 dst=%pI4 ",
&tuple->src.u3.ip, &tuple->dst.u3.ip);
@@ -282,7 +272,6 @@ static int ct_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_tuple_hash *hash = v;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
- const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
struct net *net = seq_file_net(s);
int ret = 0;
@@ -303,14 +292,12 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (!net_eq(nf_ct_net(ct), net))
goto release;
- l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
- WARN_ON(!l3proto);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
WARN_ON(!l4proto);
ret = -ENOSPC;
seq_printf(s, "%-8s %u %-8s %u ",
- l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+ l3proto_name(nf_ct_l3num(ct)), nf_ct_l3num(ct),
l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
@@ -320,7 +307,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
l4proto->print_conntrack(s, ct);
print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- l3proto, l4proto);
+ l4proto);
ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
@@ -333,8 +320,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
seq_puts(s, "[UNREPLIED] ");
- print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
- l3proto, l4proto);
+ print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l4proto);
ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
@@ -680,6 +666,8 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
static struct pernet_operations nf_conntrack_net_ops = {
.init = nf_conntrack_pernet_init,
.exit_batch = nf_conntrack_pernet_exit,
+ .id = &nf_conntrack_net_id,
+ .size = sizeof(struct nf_conntrack_net),
};
static int __init nf_conntrack_standalone_init(void)
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index eb0d1658ac05..d8125616edc7 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -107,11 +107,12 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
tcp->seen[1].td_maxwin = 0;
}
+#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
+#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
+
static void flow_offload_fixup_ct_state(struct nf_conn *ct)
{
const struct nf_conntrack_l4proto *l4proto;
- struct net *net = nf_ct_net(ct);
- unsigned int *timeouts;
unsigned int timeout;
int l4num;
@@ -123,14 +124,10 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
if (!l4proto)
return;
- timeouts = l4proto->get_timeouts(net);
- if (!timeouts)
- return;
-
if (l4num == IPPROTO_TCP)
- timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
+ timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
else if (l4num == IPPROTO_UDP)
- timeout = timeouts[UDP_CT_REPLIED];
+ timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
else
return;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 86df2a1666fd..6366f0c0b8c1 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -28,7 +28,6 @@
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
@@ -743,12 +742,6 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister);
int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
{
- int err;
-
- err = nf_ct_l3proto_try_module_get(l3proto->l3proto);
- if (err < 0)
- return err;
-
mutex_lock(&nf_nat_proto_mutex);
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP],
&nf_nat_l4proto_tcp);
@@ -781,7 +774,6 @@ void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto)
synchronize_rcu();
nf_nat_l3proto_clean(l3proto->l3proto);
- nf_ct_l3proto_module_put(l3proto->l3proto);
}
EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
diff --git a/net/netfilter/nf_osf.c b/net/netfilter/nf_osf.c
index 5ba5c7bef2f9..b44d62d5d9a9 100644
--- a/net/netfilter/nf_osf.c
+++ b/net/netfilter/nf_osf.c
@@ -21,15 +21,14 @@
#include <linux/netfilter/nf_osf.h>
static inline int nf_osf_ttl(const struct sk_buff *skb,
- const struct nf_osf_info *info,
- unsigned char f_ttl)
+ int ttl_check, unsigned char f_ttl)
{
const struct iphdr *ip = ip_hdr(skb);
- if (info->flags & NF_OSF_TTL) {
- if (info->ttl == NF_OSF_TTL_TRUE)
+ if (ttl_check != -1) {
+ if (ttl_check == NF_OSF_TTL_TRUE)
return ip->ttl == f_ttl;
- if (info->ttl == NF_OSF_TTL_NOCHECK)
+ if (ttl_check == NF_OSF_TTL_NOCHECK)
return 1;
else if (ip->ttl <= f_ttl)
return 1;
@@ -52,140 +51,175 @@ static inline int nf_osf_ttl(const struct sk_buff *skb,
return ip->ttl == f_ttl;
}
-bool
-nf_osf_match(const struct sk_buff *skb, u_int8_t family,
- int hooknum, struct net_device *in, struct net_device *out,
- const struct nf_osf_info *info, struct net *net,
- const struct list_head *nf_osf_fingers)
+struct nf_osf_hdr_ctx {
+ bool df;
+ u16 window;
+ u16 totlen;
+ const unsigned char *optp;
+ unsigned int optsize;
+};
+
+static bool nf_osf_match_one(const struct sk_buff *skb,
+ const struct nf_osf_user_finger *f,
+ int ttl_check,
+ struct nf_osf_hdr_ctx *ctx)
{
- const unsigned char *optp = NULL, *_optp = NULL;
- unsigned int optsize = 0, check_WSS = 0;
- int fmatch = FMATCH_WRONG, fcount = 0;
- const struct iphdr *ip = ip_hdr(skb);
- const struct nf_osf_user_finger *f;
- unsigned char opts[MAX_IPOPTLEN];
- const struct nf_osf_finger *kf;
- u16 window, totlen, mss = 0;
- const struct tcphdr *tcp;
- struct tcphdr _tcph;
- bool df;
+ unsigned int check_WSS = 0;
+ int fmatch = FMATCH_WRONG;
+ int foptsize, optnum;
+ u16 mss = 0;
- tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
- if (!tcp)
+ if (ctx->totlen != f->ss || !nf_osf_ttl(skb, ttl_check, f->ttl))
return false;
- if (!tcp->syn)
+ /*
+ * Should not happen if userspace parser was written correctly.
+ */
+ if (f->wss.wc >= OSF_WSS_MAX)
return false;
- totlen = ntohs(ip->tot_len);
- df = ntohs(ip->frag_off) & IP_DF;
- window = ntohs(tcp->window);
+ /* Check options */
- if (tcp->doff * 4 > sizeof(struct tcphdr)) {
- optsize = tcp->doff * 4 - sizeof(struct tcphdr);
+ foptsize = 0;
+ for (optnum = 0; optnum < f->opt_num; ++optnum)
+ foptsize += f->opt[optnum].length;
- _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
- sizeof(struct tcphdr), optsize, opts);
- }
+ if (foptsize > MAX_IPOPTLEN ||
+ ctx->optsize > MAX_IPOPTLEN ||
+ ctx->optsize != foptsize)
+ return false;
- list_for_each_entry_rcu(kf, &nf_osf_fingers[df], finger_entry) {
- int foptsize, optnum;
+ check_WSS = f->wss.wc;
- f = &kf->finger;
+ for (optnum = 0; optnum < f->opt_num; ++optnum) {
+ if (f->opt[optnum].kind == *ctx->optp) {
+ __u32 len = f->opt[optnum].length;
+ const __u8 *optend = ctx->optp + len;
- if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre))
- continue;
+ fmatch = FMATCH_OK;
+
+ switch (*ctx->optp) {
+ case OSFOPT_MSS:
+ mss = ctx->optp[3];
+ mss <<= 8;
+ mss |= ctx->optp[2];
+
+ mss = ntohs((__force __be16)mss);
+ break;
+ case OSFOPT_TS:
+ break;
+ }
+
+ ctx->optp = optend;
+ } else
+ fmatch = FMATCH_OPT_WRONG;
+
+ if (fmatch != FMATCH_OK)
+ break;
+ }
- optp = _optp;
+ if (fmatch != FMATCH_OPT_WRONG) {
fmatch = FMATCH_WRONG;
- if (totlen != f->ss || !nf_osf_ttl(skb, info, f->ttl))
- continue;
+ switch (check_WSS) {
+ case OSF_WSS_PLAIN:
+ if (f->wss.val == 0 || ctx->window == f->wss.val)
+ fmatch = FMATCH_OK;
+ break;
+ case OSF_WSS_MSS:
+ /*
+ * Some smart modems decrease mangle MSS to
+ * SMART_MSS_2, so we check standard, decreased
+ * and the one provided in the fingerprint MSS
+ * values.
+ */
+#define SMART_MSS_1 1460
+#define SMART_MSS_2 1448
+ if (ctx->window == f->wss.val * mss ||
+ ctx->window == f->wss.val * SMART_MSS_1 ||
+ ctx->window == f->wss.val * SMART_MSS_2)
+ fmatch = FMATCH_OK;
+ break;
+ case OSF_WSS_MTU:
+ if (ctx->window == f->wss.val * (mss + 40) ||
+ ctx->window == f->wss.val * (SMART_MSS_1 + 40) ||
+ ctx->window == f->wss.val * (SMART_MSS_2 + 40))
+ fmatch = FMATCH_OK;
+ break;
+ case OSF_WSS_MODULO:
+ if ((ctx->window % f->wss.val) == 0)
+ fmatch = FMATCH_OK;
+ break;
+ }
+ }
- /*
- * Should not happen if userspace parser was written correctly.
- */
- if (f->wss.wc >= OSF_WSS_MAX)
- continue;
+ return fmatch == FMATCH_OK;
+}
- /* Check options */
+static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct iphdr *ip,
+ unsigned char *opts)
+{
+ const struct tcphdr *tcp;
+ struct tcphdr _tcph;
- foptsize = 0;
- for (optnum = 0; optnum < f->opt_num; ++optnum)
- foptsize += f->opt[optnum].length;
+ tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
+ if (!tcp)
+ return NULL;
- if (foptsize > MAX_IPOPTLEN ||
- optsize > MAX_IPOPTLEN ||
- optsize != foptsize)
- continue;
+ if (!tcp->syn)
+ return NULL;
- check_WSS = f->wss.wc;
+ ctx->totlen = ntohs(ip->tot_len);
+ ctx->df = ntohs(ip->frag_off) & IP_DF;
+ ctx->window = ntohs(tcp->window);
- for (optnum = 0; optnum < f->opt_num; ++optnum) {
- if (f->opt[optnum].kind == (*optp)) {
- __u32 len = f->opt[optnum].length;
- const __u8 *optend = optp + len;
+ if (tcp->doff * 4 > sizeof(struct tcphdr)) {
+ ctx->optsize = tcp->doff * 4 - sizeof(struct tcphdr);
- fmatch = FMATCH_OK;
+ ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) +
+ sizeof(struct tcphdr), ctx->optsize, opts);
+ }
- switch (*optp) {
- case OSFOPT_MSS:
- mss = optp[3];
- mss <<= 8;
- mss |= optp[2];
+ return tcp;
+}
- mss = ntohs((__force __be16)mss);
- break;
- case OSFOPT_TS:
- break;
- }
+bool
+nf_osf_match(const struct sk_buff *skb, u_int8_t family,
+ int hooknum, struct net_device *in, struct net_device *out,
+ const struct nf_osf_info *info, struct net *net,
+ const struct list_head *nf_osf_fingers)
+{
+ const struct iphdr *ip = ip_hdr(skb);
+ const struct nf_osf_user_finger *f;
+ unsigned char opts[MAX_IPOPTLEN];
+ const struct nf_osf_finger *kf;
+ int fcount = 0, ttl_check;
+ int fmatch = FMATCH_WRONG;
+ struct nf_osf_hdr_ctx ctx;
+ const struct tcphdr *tcp;
- optp = optend;
- } else
- fmatch = FMATCH_OPT_WRONG;
+ memset(&ctx, 0, sizeof(ctx));
- if (fmatch != FMATCH_OK)
- break;
- }
+ tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
+ if (!tcp)
+ return false;
- if (fmatch != FMATCH_OPT_WRONG) {
- fmatch = FMATCH_WRONG;
+ ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : -1;
- switch (check_WSS) {
- case OSF_WSS_PLAIN:
- if (f->wss.val == 0 || window == f->wss.val)
- fmatch = FMATCH_OK;
- break;
- case OSF_WSS_MSS:
- /*
- * Some smart modems decrease mangle MSS to
- * SMART_MSS_2, so we check standard, decreased
- * and the one provided in the fingerprint MSS
- * values.
- */
-#define SMART_MSS_1 1460
-#define SMART_MSS_2 1448
- if (window == f->wss.val * mss ||
- window == f->wss.val * SMART_MSS_1 ||
- window == f->wss.val * SMART_MSS_2)
- fmatch = FMATCH_OK;
- break;
- case OSF_WSS_MTU:
- if (window == f->wss.val * (mss + 40) ||
- window == f->wss.val * (SMART_MSS_1 + 40) ||
- window == f->wss.val * (SMART_MSS_2 + 40))
- fmatch = FMATCH_OK;
- break;
- case OSF_WSS_MODULO:
- if ((window % f->wss.val) == 0)
- fmatch = FMATCH_OK;
- break;
- }
- }
+ list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
- if (fmatch != FMATCH_OK)
+ f = &kf->finger;
+
+ if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre))
+ continue;
+
+ if (!nf_osf_match_one(skb, f, ttl_check, &ctx))
continue;
+ fmatch = FMATCH_OK;
+
fcount++;
if (info->flags & NF_OSF_LOG)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3f211e1025c1..c0fb2bcd30fe 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -455,20 +455,59 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
return NULL;
}
+/*
+ * Loading a module requires dropping mutex that guards the
+ * transaction.
+ * We first need to abort any pending transactions as once
+ * mutex is unlocked a different client could start a new
+ * transaction. It must not see any 'future generation'
+ * changes * as these changes will never happen.
+ */
+#ifdef CONFIG_MODULES
+static int __nf_tables_abort(struct net *net);
+
+static void nft_request_module(struct net *net, const char *fmt, ...)
+{
+ char module_name[MODULE_NAME_LEN];
+ va_list args;
+ int ret;
+
+ __nf_tables_abort(net);
+
+ va_start(args, fmt);
+ ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
+ va_end(args);
+ if (WARN(ret >= MODULE_NAME_LEN, "truncated: '%s' (len %d)", module_name, ret))
+ return;
+
+ mutex_unlock(&net->nft.commit_mutex);
+ request_module("%s", module_name);
+ mutex_lock(&net->nft.commit_mutex);
+}
+#endif
+
+static void lockdep_nfnl_nft_mutex_not_held(void)
+{
+#ifdef CONFIG_PROVE_LOCKING
+ WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+#endif
+}
+
static const struct nft_chain_type *
-nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
+nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
+ u8 family, bool autoload)
{
const struct nft_chain_type *type;
type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
return type;
+
+ lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (autoload) {
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nft-chain-%u-%.*s", family,
- nla_len(nla), (const char *)nla_data(nla));
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ nft_request_module(net, "nft-chain-%u-%.*s", family,
+ nla_len(nla), (const char *)nla_data(nla));
type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
return ERR_PTR(-EAGAIN);
@@ -772,6 +811,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
struct nft_ctx ctx;
int err;
+ lockdep_assert_held(&net->nft.commit_mutex);
attr = nla[NFTA_TABLE_NAME];
table = nft_table_lookup(net, attr, family, genmask);
if (IS_ERR(table)) {
@@ -1012,7 +1052,17 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
return ERR_PTR(-ENOENT);
}
-static struct nft_chain *nft_chain_lookup(struct nft_table *table,
+static bool lockdep_commit_lock_is_held(struct net *net)
+{
+#ifdef CONFIG_PROVE_LOCKING
+ return lockdep_is_held(&net->nft.commit_mutex);
+#else
+ return true;
+#endif
+}
+
+static struct nft_chain *nft_chain_lookup(struct net *net,
+ struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
char search[NFT_CHAIN_MAXNAMELEN + 1];
@@ -1025,7 +1075,7 @@ static struct nft_chain *nft_chain_lookup(struct nft_table *table,
nla_strlcpy(search, nla, sizeof(search));
WARN_ON(!rcu_read_lock_held() &&
- !lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+ !lockdep_commit_lock_is_held(net));
chain = ERR_PTR(-ENOENT);
rcu_read_lock();
@@ -1265,7 +1315,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
return PTR_ERR(table);
}
- chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+ chain = nft_chain_lookup(net, table, nla[NFTA_CHAIN_NAME], genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
return PTR_ERR(chain);
@@ -1398,6 +1448,9 @@ static int nft_chain_parse_hook(struct net *net,
struct net_device *dev;
int err;
+ lockdep_assert_held(&net->nft.commit_mutex);
+ lockdep_nfnl_nft_mutex_not_held();
+
err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
nft_hook_policy, NULL);
if (err < 0)
@@ -1412,7 +1465,7 @@ static int nft_chain_parse_hook(struct net *net,
type = chain_type[family][NFT_CHAIN_T_DEFAULT];
if (nla[NFTA_CHAIN_TYPE]) {
- type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
+ type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE],
family, create);
if (IS_ERR(type))
return PTR_ERR(type);
@@ -1632,7 +1685,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
nla[NFTA_CHAIN_NAME]) {
struct nft_chain *chain2;
- chain2 = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+ chain2 = nft_chain_lookup(ctx->net, table,
+ nla[NFTA_CHAIN_NAME], genmask);
if (!IS_ERR(chain2))
return -EEXIST;
}
@@ -1694,6 +1748,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+ lockdep_assert_held(&net->nft.commit_mutex);
+
table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
@@ -1712,7 +1768,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
}
attr = nla[NFTA_CHAIN_HANDLE];
} else {
- chain = nft_chain_lookup(table, attr, genmask);
+ chain = nft_chain_lookup(net, table, attr, genmask);
if (IS_ERR(chain)) {
if (PTR_ERR(chain) != -ENOENT) {
NL_SET_BAD_ATTR(extack, attr);
@@ -1790,7 +1846,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
chain = nft_chain_lookup_byhandle(table, handle, genmask);
} else {
attr = nla[NFTA_CHAIN_NAME];
- chain = nft_chain_lookup(table, attr, genmask);
+ chain = nft_chain_lookup(net, table, attr, genmask);
}
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, attr);
@@ -1875,7 +1931,8 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
return NULL;
}
-static const struct nft_expr_type *nft_expr_type_get(u8 family,
+static const struct nft_expr_type *nft_expr_type_get(struct net *net,
+ u8 family,
struct nlattr *nla)
{
const struct nft_expr_type *type;
@@ -1887,19 +1944,16 @@ static const struct nft_expr_type *nft_expr_type_get(u8 family,
if (type != NULL && try_module_get(type->owner))
return type;
+ lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (type == NULL) {
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nft-expr-%u-%.*s", family,
- nla_len(nla), (char *)nla_data(nla));
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ nft_request_module(net, "nft-expr-%u-%.*s", family,
+ nla_len(nla), (char *)nla_data(nla));
if (__nft_expr_type_get(family, nla))
return ERR_PTR(-EAGAIN);
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nft-expr-%.*s",
- nla_len(nla), (char *)nla_data(nla));
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ nft_request_module(net, "nft-expr-%.*s",
+ nla_len(nla), (char *)nla_data(nla));
if (__nft_expr_type_get(family, nla))
return ERR_PTR(-EAGAIN);
}
@@ -1968,7 +2022,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
if (err < 0)
return err;
- type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
+ type = nft_expr_type_get(ctx->net, ctx->family, tb[NFTA_EXPR_NAME]);
if (IS_ERR(type))
return PTR_ERR(type);
@@ -2325,7 +2379,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
return PTR_ERR(table);
}
- chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+ chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
@@ -2359,6 +2413,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
{
struct nft_expr *expr;
+ lockdep_assert_held(&ctx->net->nft.commit_mutex);
/*
* Careful: some expressions might not be initialized in case this
* is called on error from nf_tables_newrule().
@@ -2427,8 +2482,6 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
#define NFT_RULE_MAXEXPRS 128
-static struct nft_expr_info *info;
-
static int nf_tables_newrule(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -2436,6 +2489,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
+ struct nft_expr_info *info = NULL;
int family = nfmsg->nfgen_family;
struct nft_table *table;
struct nft_chain *chain;
@@ -2450,6 +2504,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
bool create;
u64 handle, pos_handle;
+ lockdep_assert_held(&net->nft.commit_mutex);
+
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
@@ -2458,7 +2514,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
return PTR_ERR(table);
}
- chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+ chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
@@ -2506,6 +2562,12 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
n = 0;
size = 0;
if (nla[NFTA_RULE_EXPRESSIONS]) {
+ info = kvmalloc_array(NFT_RULE_MAXEXPRS,
+ sizeof(struct nft_expr_info),
+ GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
err = -EINVAL;
if (nla_type(tmp) != NFTA_LIST_ELEM)
@@ -2598,6 +2660,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
list_add_rcu(&rule->list, &chain->rules);
}
}
+ kvfree(info);
chain->use++;
if (net->nft.validate_state == NFT_VALIDATE_DO)
@@ -2611,6 +2674,7 @@ err1:
if (info[i].ops != NULL)
module_put(info[i].ops->type->owner);
}
+ kvfree(info);
return err;
}
@@ -2650,7 +2714,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
}
if (nla[NFTA_RULE_CHAIN]) {
- chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+ chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
+ genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
@@ -2742,11 +2807,11 @@ nft_select_set_ops(const struct nft_ctx *ctx,
const struct nft_set_type *type;
u32 flags = 0;
+ lockdep_assert_held(&ctx->net->nft.commit_mutex);
+ lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (list_empty(&nf_tables_set_types)) {
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nft-set");
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ nft_request_module(ctx->net, "nft-set");
if (!list_empty(&nf_tables_set_types))
return ERR_PTR(-EAGAIN);
}
@@ -4779,7 +4844,8 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
return NULL;
}
-static const struct nft_object_type *nft_obj_type_get(u32 objtype)
+static const struct nft_object_type *
+nft_obj_type_get(struct net *net, u32 objtype)
{
const struct nft_object_type *type;
@@ -4787,11 +4853,10 @@ static const struct nft_object_type *nft_obj_type_get(u32 objtype)
if (type != NULL && try_module_get(type->owner))
return type;
+ lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (type == NULL) {
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nft-obj-%u", objtype);
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ nft_request_module(net, "nft-obj-%u", objtype);
if (__nft_obj_type_get(objtype))
return ERR_PTR(-EAGAIN);
}
@@ -4843,7 +4908,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
- type = nft_obj_type_get(objtype);
+ type = nft_obj_type_get(net, objtype);
if (IS_ERR(type))
return PTR_ERR(type);
@@ -5339,7 +5404,8 @@ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
return NULL;
}
-static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
+static const struct nf_flowtable_type *
+nft_flowtable_type_get(struct net *net, u8 family)
{
const struct nf_flowtable_type *type;
@@ -5347,11 +5413,10 @@ static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
if (type != NULL && try_module_get(type->owner))
return type;
+ lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (type == NULL) {
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nf-flowtable-%u", family);
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ nft_request_module(net, "nf-flowtable-%u", family);
if (__nft_flowtable_type_get(family))
return ERR_PTR(-EAGAIN);
}
@@ -5431,7 +5496,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
goto err1;
}
- type = nft_flowtable_type_get(family);
+ type = nft_flowtable_type_get(net, family);
if (IS_ERR(type)) {
err = PTR_ERR(type);
goto err2;
@@ -6202,9 +6267,9 @@ static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *cha
next_genbit = nft_gencursor_next(net);
g0 = rcu_dereference_protected(chain->rules_gen_0,
- lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+ lockdep_commit_lock_is_held(net));
g1 = rcu_dereference_protected(chain->rules_gen_1,
- lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+ lockdep_commit_lock_is_held(net));
/* No changes to this chain? */
if (chain->rules_next == NULL) {
@@ -6412,6 +6477,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_commit_release(net);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+ mutex_unlock(&net->nft.commit_mutex);
return 0;
}
@@ -6563,12 +6629,25 @@ static void nf_tables_cleanup(struct net *net)
static int nf_tables_abort(struct net *net, struct sk_buff *skb)
{
- return __nf_tables_abort(net);
+ int ret = __nf_tables_abort(net);
+
+ mutex_unlock(&net->nft.commit_mutex);
+
+ return ret;
}
static bool nf_tables_valid_genid(struct net *net, u32 genid)
{
- return net->nft.base_seq == genid;
+ bool genid_ok;
+
+ mutex_lock(&net->nft.commit_mutex);
+
+ genid_ok = genid == 0 || net->nft.base_seq == genid;
+ if (!genid_ok)
+ mutex_unlock(&net->nft.commit_mutex);
+
+ /* else, commit mutex has to be released by commit or abort function */
+ return genid_ok;
}
static const struct nfnetlink_subsystem nf_tables_subsys = {
@@ -6580,6 +6659,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
.abort = nf_tables_abort,
.cleanup = nf_tables_cleanup,
.valid_genid = nf_tables_valid_genid,
+ .owner = THIS_MODULE,
};
int nft_chain_validate_dependency(const struct nft_chain *chain,
@@ -6906,8 +6986,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
case NFT_GOTO:
if (!tb[NFTA_VERDICT_CHAIN])
return -EINVAL;
- chain = nft_chain_lookup(ctx->table, tb[NFTA_VERDICT_CHAIN],
- genmask);
+ chain = nft_chain_lookup(ctx->net, ctx->table,
+ tb[NFTA_VERDICT_CHAIN], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
@@ -7152,6 +7232,7 @@ static int __net_init nf_tables_init_net(struct net *net)
{
INIT_LIST_HEAD(&net->nft.tables);
INIT_LIST_HEAD(&net->nft.commit_list);
+ mutex_init(&net->nft.commit_mutex);
net->nft.base_seq = 1;
net->nft.validate_state = NFT_VALIDATE_SKIP;
@@ -7160,11 +7241,11 @@ static int __net_init nf_tables_init_net(struct net *net)
static void __net_exit nf_tables_exit_net(struct net *net)
{
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ mutex_lock(&net->nft.commit_mutex);
if (!list_empty(&net->nft.commit_list))
__nf_tables_abort(net);
__nft_release_tables(net);
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ mutex_unlock(&net->nft.commit_mutex);
WARN_ON_ONCE(!list_empty(&net->nft.tables));
}
@@ -7179,29 +7260,19 @@ static int __init nf_tables_module_init(void)
nft_chain_filter_init();
- info = kmalloc_array(NFT_RULE_MAXEXPRS, sizeof(struct nft_expr_info),
- GFP_KERNEL);
- if (info == NULL) {
- err = -ENOMEM;
- goto err1;
- }
-
err = nf_tables_core_module_init();
if (err < 0)
- goto err2;
+ return err;
err = nfnetlink_subsys_register(&nf_tables_subsys);
if (err < 0)
- goto err3;
+ goto err;
register_netdevice_notifier(&nf_tables_flowtable_notifier);
return register_pernet_subsys(&nf_tables_net_ops);
-err3:
+err:
nf_tables_core_module_exit();
-err2:
- kfree(info);
-err1:
return err;
}
@@ -7213,7 +7284,6 @@ static void __exit nf_tables_module_exit(void)
unregister_pernet_subsys(&nf_tables_net_ops);
rcu_barrier();
nf_tables_core_module_exit();
- kfree(info);
}
module_init(nf_tables_module_init);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index e1b6be29848d..916913454624 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -331,18 +331,27 @@ replay:
}
}
- if (!ss->commit || !ss->abort) {
+ if (!ss->valid_genid || !ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
return kfree_skb(skb);
}
- if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) {
+ if (!try_module_get(ss->owner)) {
+ nfnl_unlock(subsys_id);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
+ return kfree_skb(skb);
+ }
+
+ if (!ss->valid_genid(net, genid)) {
+ module_put(ss->owner);
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -ERESTART, NULL);
return kfree_skb(skb);
}
+ nfnl_unlock(subsys_id);
+
while (skb->len >= nlmsg_total_size(0)) {
int msglen, type;
@@ -464,14 +473,10 @@ ack:
}
done:
if (status & NFNL_BATCH_REPLAY) {
- const struct nfnetlink_subsystem *ss2;
-
- ss2 = nfnl_dereference_protected(subsys_id);
- if (ss2 == ss)
- ss->abort(net, oskb);
+ ss->abort(net, oskb);
nfnl_err_reset(&err_list);
- nfnl_unlock(subsys_id);
kfree_skb(skb);
+ module_put(ss->owner);
goto replay;
} else if (status == NFNL_BATCH_DONE) {
err = ss->commit(net, oskb);
@@ -489,8 +494,8 @@ done:
ss->cleanup(net);
nfnl_err_deliver(&err_list, oskb);
- nfnl_unlock(subsys_id);
kfree_skb(skb);
+ module_put(ss->owner);
}
static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = {
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 9ee5fa551fa6..d9d952fad3e0 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -26,7 +26,6 @@
#include <net/sock.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_timeout.h>
@@ -47,7 +46,7 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
};
static int
-ctnl_timeout_parse_policy(void *timeouts,
+ctnl_timeout_parse_policy(void *timeout,
const struct nf_conntrack_l4proto *l4proto,
struct net *net, const struct nlattr *attr)
{
@@ -68,7 +67,7 @@ ctnl_timeout_parse_policy(void *timeouts,
if (ret < 0)
goto err;
- ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
+ ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout);
err:
kfree(tb);
@@ -373,7 +372,6 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
struct netlink_ext_ack *extack)
{
const struct nf_conntrack_l4proto *l4proto;
- unsigned int *timeouts;
__u16 l3num;
__u8 l4num;
int ret;
@@ -393,9 +391,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
goto err;
}
- timeouts = l4proto->get_timeouts(net);
-
- ret = ctnl_timeout_parse_policy(timeouts, l4proto, net,
+ ret = ctnl_timeout_parse_policy(NULL, l4proto, net,
cda[CTA_TIMEOUT_DATA]);
if (ret < 0)
goto err;
@@ -432,7 +428,6 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
struct nlattr *nest_parms;
- unsigned int *timeouts = l4proto->get_timeouts(net);
int ret;
nest_parms = nla_nest_start(skb,
@@ -440,7 +435,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
if (!nest_parms)
goto nla_put_failure;
- ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
+ ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
if (ret < 0)
goto nla_put_failure;
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index d21834bed805..ea5b7c4944f6 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -322,7 +322,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
if (!ctx.net)
return NOTIFY_DONE;
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ mutex_lock(&ctx.net->nft.commit_mutex);
list_for_each_entry(table, &ctx.net->nft.tables, list) {
if (table->family != NFPROTO_NETDEV)
continue;
@@ -337,7 +337,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
nft_netdev_event(event, dev, &ctx);
}
}
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ mutex_unlock(&ctx.net->nft.commit_mutex);
put_net(ctx.net);
return NOTIFY_DONE;
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index a832c59f0a9c..b90d96ba4a12 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -14,10 +14,9 @@
#include <net/netfilter/nf_conntrack_zones.h>
struct nft_connlimit {
- spinlock_t lock;
- struct hlist_head hhead;
- u32 limit;
- bool invert;
+ struct nf_conncount_list list;
+ u32 limit;
+ bool invert;
};
static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
@@ -45,21 +44,19 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
return;
}
- spin_lock_bh(&priv->lock);
- count = nf_conncount_lookup(nft_net(pkt), &priv->hhead, tuple_ptr, zone,
- &addit);
+ nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
+ &addit);
+ count = priv->list.count;
if (!addit)
goto out;
- if (!nf_conncount_add(&priv->hhead, tuple_ptr, zone)) {
+ if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
regs->verdict.code = NF_DROP;
- spin_unlock_bh(&priv->lock);
return;
}
count++;
out:
- spin_unlock_bh(&priv->lock);
if ((count > priv->limit) ^ priv->invert) {
regs->verdict.code = NFT_BREAK;
@@ -87,8 +84,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
invert = true;
}
- spin_lock_init(&priv->lock);
- INIT_HLIST_HEAD(&priv->hhead);
+ nf_conncount_list_init(&priv->list);
priv->limit = limit;
priv->invert = invert;
@@ -99,7 +95,7 @@ static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
struct nft_connlimit *priv)
{
nf_ct_netns_put(ctx->net, ctx->family);
- nf_conncount_cache_free(&priv->hhead);
+ nf_conncount_cache_free(&priv->list);
}
static int nft_connlimit_do_dump(struct sk_buff *skb,
@@ -212,8 +208,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
- spin_lock_init(&priv_dst->lock);
- INIT_HLIST_HEAD(&priv_dst->hhead);
+ nf_conncount_list_init(&priv_dst->list);
priv_dst->limit = priv_src->limit;
priv_dst->invert = priv_src->invert;
@@ -225,21 +220,14 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
{
struct nft_connlimit *priv = nft_expr_priv(expr);
- nf_conncount_cache_free(&priv->hhead);
+ nf_conncount_cache_free(&priv->list);
}
static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
- bool addit, ret;
- spin_lock_bh(&priv->lock);
- nf_conncount_lookup(net, &priv->hhead, NULL, &nf_ct_zone_dflt, &addit);
-
- ret = hlist_empty(&priv->hhead);
- spin_unlock_bh(&priv->lock);
-
- return ret;
+ return nf_conncount_gc_list(net, &priv->list);
}
static struct nft_expr_type nft_connlimit_type;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 1435ffc5f57e..3bc82ee5464d 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -870,7 +870,7 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj,
if (test_bit(IPS_HELPER_BIT, &ct->status))
return;
- help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC);
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help) {
rcu_assign_pointer(help->helper, to_assign);
set_bit(IPS_HELPER_BIT, &ct->status);
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 27d7e4598ab6..81184c244d1a 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -118,6 +118,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
u64 timeout;
int err;
+ lockdep_assert_held(&ctx->net->nft.commit_mutex);
+
if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
tb[NFTA_DYNSET_OP] == NULL ||
tb[NFTA_DYNSET_SREG_KEY] == NULL)
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 998c2b546f6d..d7f3776dfd71 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -31,7 +31,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
case NFPROTO_IPV4:
sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
break;
-#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
case NFPROTO_IPV6:
sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
break;
@@ -43,7 +43,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
}
if (!sk) {
- nft_reg_store8(dest, 0);
+ regs->verdict.code = NFT_BREAK;
return;
}
@@ -54,6 +54,14 @@ static void nft_socket_eval(const struct nft_expr *expr,
case NFT_SOCKET_TRANSPARENT:
nft_reg_store8(dest, inet_sk_transparent(sk));
break;
+ case NFT_SOCKET_MARK:
+ if (sk_fullsock(sk)) {
+ *dest = sk->sk_mark;
+ } else {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+ break;
default:
WARN_ON(1);
regs->verdict.code = NFT_BREAK;
@@ -77,7 +85,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
switch(ctx->family) {
case NFPROTO_IPV4:
-#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
case NFPROTO_IPV6:
#endif
case NFPROTO_INET:
@@ -91,6 +99,9 @@ static int nft_socket_init(const struct nft_ctx *ctx,
case NFT_SOCKET_TRANSPARENT:
len = sizeof(u8);
break;
+ case NFT_SOCKET_MARK:
+ len = sizeof(u32);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 0b660c568156..e8da9a9bba73 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -1,14 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_queue.h>
+#include <net/ip6_checksum.h>
+
+#ifdef CONFIG_INET
+__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u8 protocol)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ __sum16 csum = 0;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+ break;
+ if ((protocol == 0 && !csum_fold(skb->csum)) ||
+ !csum_tcpudp_magic(iph->saddr, iph->daddr,
+ skb->len - dataoff, protocol,
+ skb->csum)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+ /* fall through */
+ case CHECKSUM_NONE:
+ if (protocol == 0)
+ skb->csum = 0;
+ else
+ skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+ skb->len - dataoff,
+ protocol, 0);
+ csum = __skb_checksum_complete(skb);
+ }
+ return csum;
+}
+EXPORT_SYMBOL(nf_ip_checksum);
+#endif
+
+static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u8 protocol)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ __sum16 csum = 0;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (len == skb->len - dataoff)
+ return nf_ip_checksum(skb, hook, dataoff, protocol);
+ /* fall through */
+ case CHECKSUM_NONE:
+ skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
+ skb->len - dataoff, 0);
+ skb->ip_summed = CHECKSUM_NONE;
+ return __skb_checksum_complete_head(skb, dataoff + len);
+ }
+ return csum;
+}
+
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u8 protocol)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ __sum16 csum = 0;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+ break;
+ if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ skb->len - dataoff, protocol,
+ csum_sub(skb->csum,
+ skb_checksum(skb, 0,
+ dataoff, 0)))) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+ /* fall through */
+ case CHECKSUM_NONE:
+ skb->csum = ~csum_unfold(
+ csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ skb->len - dataoff,
+ protocol,
+ csum_sub(0,
+ skb_checksum(skb, 0,
+ dataoff, 0))));
+ csum = __skb_checksum_complete(skb);
+ }
+ return csum;
+}
+EXPORT_SYMBOL(nf_ip6_checksum);
+
+static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u8 protocol)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ __wsum hsum;
+ __sum16 csum = 0;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (len == skb->len - dataoff)
+ return nf_ip6_checksum(skb, hook, dataoff, protocol);
+ /* fall through */
+ case CHECKSUM_NONE:
+ hsum = skb_checksum(skb, 0, dataoff, 0);
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+ &ip6h->daddr,
+ skb->len - dataoff,
+ protocol,
+ csum_sub(0, hsum)));
+ skb->ip_summed = CHECKSUM_NONE;
+ return __skb_checksum_complete_head(skb, dataoff + len);
+ }
+ return csum;
+};
__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, u_int8_t protocol,
+ unsigned int dataoff, u8 protocol,
unsigned short family)
{
- const struct nf_ipv6_ops *v6ops;
__sum16 csum = 0;
switch (family) {
@@ -16,9 +130,7 @@ __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
csum = nf_ip_checksum(skb, hook, dataoff, protocol);
break;
case AF_INET6:
- v6ops = rcu_dereference(nf_ipv6_ops);
- if (v6ops)
- csum = v6ops->checksum(skb, hook, dataoff, protocol);
+ csum = nf_ip6_checksum(skb, hook, dataoff, protocol);
break;
}
@@ -28,9 +140,8 @@ EXPORT_SYMBOL_GPL(nf_checksum);
__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
- u_int8_t protocol, unsigned short family)
+ u8 protocol, unsigned short family)
{
- const struct nf_ipv6_ops *v6ops;
__sum16 csum = 0;
switch (family) {
@@ -39,10 +150,8 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
protocol);
break;
case AF_INET6:
- v6ops = rcu_dereference(nf_ipv6_ops);
- if (v6ops)
- csum = v6ops->checksum_partial(skb, hook, dataoff, len,
- protocol);
+ csum = nf_ip6_checksum_partial(skb, hook, dataoff, len,
+ protocol);
break;
}
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 03b9a50ec93b..7ba454e9e3fa 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -93,7 +93,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
return -ENOENT;
}
- help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
+ help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
if (help == NULL) {
nf_conntrack_helper_put(helper);
return -ENOMEM;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 475957cfcf50..0d0d68c989df 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -38,7 +38,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-#if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static unsigned int
tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -141,7 +141,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
.destroy = tee_tg_destroy,
.me = THIS_MODULE,
},
-#if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "TEE",
.revision = 1,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d76550a8b642..ad7420cdc439 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -36,15 +36,6 @@
#include <net/netfilter/nf_tproxy.h>
#include <linux/netfilter/xt_TPROXY.h>
-/* assign a socket to the skb -- consumes sk */
-static void
-nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
-{
- skb_orphan(skb);
- skb->sk = sk;
- skb->destructor = sock_edemux;
-}
-
static unsigned int
tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
u_int32_t mark_mask, u_int32_t mark_value)