39 files changed, 2556 insertions, 993 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f0a1c536ef15..6f6c959aeb8f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -49,6 +49,8 @@ config NETFILTER_NETLINK_LOG
 config NF_CONNTRACK
 	tristate "Netfilter connection tracking support"
 	default m if NETFILTER_ADVANCED=n
+	select NF_DEFRAG_IPV4
+	select NF_DEFRAG_IPV6 if IPV6 != n
 	help
 	  Connection tracking keeps a record of what packets have passed
 	  through your machine, in order to figure out how they are related
@@ -615,7 +617,7 @@ config NFT_SOCKET
 	tristate "Netfilter nf_tables socket match support"
 	depends on IPV6 || IPV6=n
 	select NF_SOCKET_IPV4
-	select NF_SOCKET_IPV6 if IPV6
+	select NF_SOCKET_IPV6 if NF_TABLES_IPV6
 	help
 	  This option allows matching for the presence or absence of a
 	  corresponding socket and its attributes.
@@ -881,7 +883,7 @@ config NETFILTER_XT_TARGET_LOG
 	tristate "LOG target support"
 	select NF_LOG_COMMON
 	select NF_LOG_IPV4
-	select NF_LOG_IPV6 if IPV6
+	select NF_LOG_IPV6 if IP6_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `LOG' target, which allows you to create rules in
@@ -973,7 +975,7 @@ config NETFILTER_XT_TARGET_TEE
 	depends on IPV6 || IPV6=n
 	depends on !NF_CONNTRACK || NF_CONNTRACK
 	select NF_DUP_IPV4
-	select NF_DUP_IPV6 if IPV6
+	select NF_DUP_IPV6 if IP6_NF_IPTABLES
 	---help---
 	This option adds a "TEE" target with which a packet can be cloned and
 	this clone be rerouted to another nexthop.
@@ -1481,8 +1483,8 @@ config NETFILTER_XT_MATCH_SOCKET
 	depends on NETFILTER_ADVANCED
 	depends on IPV6 || IPV6=n
 	depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
-	depends on NF_SOCKET_IPV4
-	depends on NF_SOCKET_IPV6
+	select NF_SOCKET_IPV4
+	select NF_SOCKET_IPV6 if IP6_NF_IPTABLES
 	select NF_DEFRAG_IPV4
 	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
 	help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 8a76dced974d..dd26e4961f43 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,7 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
 
-nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
+nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o \
+		   nf_conntrack_proto.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o \
+		   nf_conntrack_proto_icmp.o \
+		   nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
+
+nf_conntrack-$(subst m,y,$(CONFIG_IPV6)) += nf_conntrack_proto_icmpv6.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 99e0aa350dc5..0edc62910ebf 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -825,12 +825,23 @@ static void ip_vs_conn_expire(struct timer_list *t)
 
 	/* Unlink conn if not referenced anymore */
 	if (likely(ip_vs_conn_unlink(cp))) {
+		struct ip_vs_conn *ct = cp->control;
+
 		/* delete the timer if it is activated by other users */
 		del_timer(&cp->timer);
 
 		/* does anybody control me? */
-		if (cp->control)
+		if (ct) {
 			ip_vs_control_del(cp);
+			/* Drop CTL or non-assured TPL if not used anymore */
+			if (!cp->timeout && !atomic_read(&ct->n_control) &&
+			    (!(ct->flags & IP_VS_CONN_F_TEMPLATE) ||
+			     !(ct->state & IP_VS_CTPL_S_ASSURED))) {
+				IP_VS_DBG(4, "drop controlling connection\n");
+				ct->timeout = 0;
+				ip_vs_conn_expire_now(ct);
+			}
+		}
 
 		if ((cp->flags & IP_VS_CONN_F_NFCT) &&
 		    !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) {
@@ -872,6 +883,10 @@ static void ip_vs_conn_expire(struct timer_list *t)
 
 /* Modify timer, so that it expires as soon as possible.
  * Can be called without reference only if under RCU lock.
+ * We can have such chain of conns linked with ->control: DATA->CTL->TPL
+ * - DATA (eg. FTP) and TPL (persistence) can be present depending on setup
+ * - cp->timeout=0 indicates all conns from chain should be dropped but
+ * TPL is not dropped if in assured state
  */
 void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
 {
@@ -1107,7 +1122,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
 				&cp->caddr.in6, ntohs(cp->cport),
 				&cp->vaddr.in6, ntohs(cp->vport),
 				dbuf, ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_state_name(cp),
 				(cp->timer.expires-jiffies)/HZ, pe_data);
 		else
 #endif
@@ -1118,7 +1133,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
 				ntohl(cp->caddr.ip), ntohs(cp->cport),
 				ntohl(cp->vaddr.ip), ntohs(cp->vport),
 				dbuf, ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_state_name(cp),
 				(cp->timer.expires-jiffies)/HZ, pe_data);
 	}
 	return 0;
@@ -1169,7 +1184,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
 				&cp->caddr.in6, ntohs(cp->cport),
 				&cp->vaddr.in6, ntohs(cp->vport),
 				dbuf, ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_state_name(cp),
 				ip_vs_origin_name(cp->flags),
 				(cp->timer.expires-jiffies)/HZ);
 		else
@@ -1181,7 +1196,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
 				ntohl(cp->caddr.ip), ntohs(cp->cport),
 				ntohl(cp->vaddr.ip), ntohs(cp->vport),
 				dbuf, ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_state_name(cp),
 				ip_vs_origin_name(cp->flags),
 				(cp->timer.expires-jiffies)/HZ);
 	}
@@ -1197,8 +1212,11 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
 #endif
 
 
-/*
- *      Randomly drop connection entries before running out of memory
+/* Randomly drop connection entries before running out of memory
+ * Can be used for DATA and CTL conns. For TPL conns there are exceptions:
+ * - traffic for services in OPS mode increases ct->in_pkts, so it is supported
+ * - traffic for services not in OPS mode does not increase ct->in_pkts in
+ * all cases, so it is not supported
  */
 static inline int todrop_entry(struct ip_vs_conn *cp)
 {
@@ -1242,7 +1260,7 @@ static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp)
 void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
 {
 	int idx;
-	struct ip_vs_conn *cp, *cp_c;
+	struct ip_vs_conn *cp;
 
 	rcu_read_lock();
 	/*
@@ -1254,13 +1272,15 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
 		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
 			if (cp->ipvs != ipvs)
 				continue;
+			if (atomic_read(&cp->n_control))
+				continue;
 			if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
-				if (atomic_read(&cp->n_control) ||
-				    !ip_vs_conn_ops_mode(cp))
-					continue;
-				else
-					/* connection template of OPS */
+				/* connection template of OPS */
+				if (ip_vs_conn_ops_mode(cp))
 					goto try_drop;
+				if (!(cp->state & IP_VS_CTPL_S_ASSURED))
+					goto drop;
+				continue;
 			}
 			if (cp->protocol == IPPROTO_TCP) {
 				switch(cp->state) {
@@ -1294,15 +1314,10 @@ try_drop:
 					continue;
 			}
 
-			IP_VS_DBG(4, "del connection\n");
+drop:
+			IP_VS_DBG(4, "drop connection\n");
+			cp->timeout = 0;
 			ip_vs_conn_expire_now(cp);
-			cp_c = cp->control;
-			/* cp->control is valid only with reference to cp */
-			if (cp_c && __ip_vs_conn_get(cp)) {
-				IP_VS_DBG(4, "del conn template\n");
-				ip_vs_conn_expire_now(cp_c);
-				__ip_vs_conn_put(cp);
-			}
 		}
 		cond_resched_rcu();
 	}
@@ -1325,15 +1340,19 @@ flush_again:
 		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
 			if (cp->ipvs != ipvs)
 				continue;
-			IP_VS_DBG(4, "del connection\n");
-			ip_vs_conn_expire_now(cp);
+			/* As timers are expired in LIFO order, restart
+			 * the timer of controlling connection first, so
+			 * that it is expired after us.
+			 */
 			cp_c = cp->control;
 			/* cp->control is valid only with reference to cp */
 			if (cp_c && __ip_vs_conn_get(cp)) {
-				IP_VS_DBG(4, "del conn template\n");
+				IP_VS_DBG(4, "del controlling connection\n");
 				ip_vs_conn_expire_now(cp_c);
 				__ip_vs_conn_put(cp);
 			}
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_expire_now(cp);
 		}
 		cond_resched_rcu();
 	}
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index ca880a3ad033..54ee84adf0bd 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -42,6 +42,11 @@
 
 static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
 
+/* States for conn templates: NONE or words separated with ",", max 15 chars */
+static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = {
+	[IP_VS_CTPL_S_NONE]			= "NONE",
+	[IP_VS_CTPL_S_ASSURED]			= "ASSURED",
+};
 
 /*
  *	register an ipvs protocol
@@ -193,12 +198,20 @@ ip_vs_create_timeout_table(int *table, int size)
 }
 
 
-const char * ip_vs_state_name(__u16 proto, int state)
+const char *ip_vs_state_name(const struct ip_vs_conn *cp)
 {
-	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+	unsigned int state = cp->state;
+	struct ip_vs_protocol *pp;
+
+	if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
 
+		if (state >= IP_VS_CTPL_S_LAST)
+			return "ERR!";
+		return ip_vs_ctpl_state_name_table[state] ? : "?";
+	}
+	pp = ip_vs_proto_get(cp->protocol);
 	if (pp == NULL || pp->state_name == NULL)
-		return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
+		return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!";
 	return pp->state_name(state);
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3250c4a1111e..b0cd7d08f2a7 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -461,6 +461,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
 		}
+		if (next_state == IP_VS_SCTP_S_ESTABLISHED)
+			ip_vs_control_assure_ct(cp);
 	}
 	if (likely(pd))
 		cp->timeout = pd->timeout_table[cp->state = next_state];
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 80d10ad12a15..1770fc6ce960 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -569,6 +569,8 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
 		}
+		if (new_state == IP_VS_TCP_S_ESTABLISHED)
+			ip_vs_control_assure_ct(cp);
 	}
 
 	if (likely(pd))
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e0ef11c3691e..0f53c49025f8 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -460,6 +460,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
 	}
 
 	cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
+	if (direction == IP_VS_DIR_OUTPUT)
+		ip_vs_control_assure_ct(cp);
 }
 
 static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 001501e25625..d4020c5e831d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1003,12 +1003,9 @@ static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer
 				continue;
 			}
 		} else {
-			/* protocol in templates is not used for state/timeout */
-			if (state > 0) {
-				IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
-					state);
-				state = 0;
-			}
+			if (state >= IP_VS_CTPL_S_LAST)
+				IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n",
+					  state);
 		}
 
 		ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
@@ -1166,12 +1163,9 @@ static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *m
 			goto out;
 		}
 	} else {
-		/* protocol in templates is not used for state/timeout */
-		if (state > 0) {
-			IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
-				state);
-			state = 0;
-		}
+		if (state >= IP_VS_CTPL_S_LAST)
+			IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n",
+				  state);
 	}
 	if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
 				       pe_data_len, pe_name, pe_name_len)) {
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 510039862aa9..02ca7df793f5 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -44,17 +44,19 @@
 
 /* we will save the tuples of all connections we care about */
 struct nf_conncount_tuple {
-	struct hlist_node		node;
+	struct list_head		node;
 	struct nf_conntrack_tuple	tuple;
 	struct nf_conntrack_zone	zone;
 	int				cpu;
 	u32				jiffies32;
+	struct rcu_head			rcu_head;
 };
 
 struct nf_conncount_rb {
 	struct rb_node node;
-	struct hlist_head hhead; /* connections/hosts in same subnet */
+	struct nf_conncount_list list;
 	u32 key[MAX_KEYLEN];
+	struct rcu_head rcu_head;
 };
 
 static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
@@ -62,6 +64,10 @@ static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_i
 struct nf_conncount_data {
 	unsigned int keylen;
 	struct rb_root root[CONNCOUNT_SLOTS];
+	struct net *net;
+	struct work_struct gc_work;
+	unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)];
+	unsigned int gc_tree;
 };
 
 static u_int32_t conncount_rnd __read_mostly;
@@ -82,26 +88,70 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
 	return memcmp(a, b, klen * sizeof(u32));
 }
 
-bool nf_conncount_add(struct hlist_head *head,
-		      const struct nf_conntrack_tuple *tuple,
-		      const struct nf_conntrack_zone *zone)
+enum nf_conncount_list_add
+nf_conncount_add(struct nf_conncount_list *list,
+		 const struct nf_conntrack_tuple *tuple,
+		 const struct nf_conntrack_zone *zone)
 {
 	struct nf_conncount_tuple *conn;
 
+	if (WARN_ON_ONCE(list->count > INT_MAX))
+		return NF_CONNCOUNT_ERR;
+
 	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
 	if (conn == NULL)
-		return false;
+		return NF_CONNCOUNT_ERR;
+
 	conn->tuple = *tuple;
 	conn->zone = *zone;
 	conn->cpu = raw_smp_processor_id();
 	conn->jiffies32 = (u32)jiffies;
-	hlist_add_head(&conn->node, head);
-	return true;
+	spin_lock(&list->list_lock);
+	if (list->dead == true) {
+		kmem_cache_free(conncount_conn_cachep, conn);
+		spin_unlock(&list->list_lock);
+		return NF_CONNCOUNT_SKIP;
+	}
+	list_add_tail(&conn->node, &list->head);
+	list->count++;
+	spin_unlock(&list->list_lock);
+	return NF_CONNCOUNT_ADDED;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_add);
 
+static void __conn_free(struct rcu_head *h)
+{
+	struct nf_conncount_tuple *conn;
+
+	conn = container_of(h, struct nf_conncount_tuple, rcu_head);
+	kmem_cache_free(conncount_conn_cachep, conn);
+}
+
+static bool conn_free(struct nf_conncount_list *list,
+		      struct nf_conncount_tuple *conn)
+{
+	bool free_entry = false;
+
+	spin_lock(&list->list_lock);
+
+	if (list->count == 0) {
+		spin_unlock(&list->list_lock);
+                return free_entry;
+	}
+
+	list->count--;
+	list_del_rcu(&conn->node);
+	if (list->count == 0)
+		free_entry = true;
+
+	spin_unlock(&list->list_lock);
+	call_rcu(&conn->rcu_head, __conn_free);
+	return free_entry;
+}
+
 static const struct nf_conntrack_tuple_hash *
-find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
+find_or_evict(struct net *net, struct nf_conncount_list *list,
+	      struct nf_conncount_tuple *conn, bool *free_entry)
 {
 	const struct nf_conntrack_tuple_hash *found;
 	unsigned long a, b;
@@ -121,34 +171,37 @@ find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
 	 */
 	age = a - b;
 	if (conn->cpu == cpu || age >= 2) {
-		hlist_del(&conn->node);
-		kmem_cache_free(conncount_conn_cachep, conn);
+		*free_entry = conn_free(list, conn);
 		return ERR_PTR(-ENOENT);
 	}
 
 	return ERR_PTR(-EAGAIN);
 }
 
-unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
-				 const struct nf_conntrack_tuple *tuple,
-				 const struct nf_conntrack_zone *zone,
-				 bool *addit)
+void nf_conncount_lookup(struct net *net,
+			 struct nf_conncount_list *list,
+			 const struct nf_conntrack_tuple *tuple,
+			 const struct nf_conntrack_zone *zone,
+			 bool *addit)
 {
 	const struct nf_conntrack_tuple_hash *found;
-	struct nf_conncount_tuple *conn;
+	struct nf_conncount_tuple *conn, *conn_n;
 	struct nf_conn *found_ct;
-	struct hlist_node *n;
-	unsigned int length = 0;
+	unsigned int collect = 0;
+	bool free_entry = false;
 
+	/* best effort only */
 	*addit = tuple ? true : false;
 
 	/* check the saved connections */
-	hlist_for_each_entry_safe(conn, n, head, node) {
-		found = find_or_evict(net, conn);
+	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+		if (collect > CONNCOUNT_GC_MAX_NODES)
+			break;
+
+		found = find_or_evict(net, list, conn, &free_entry);
 		if (IS_ERR(found)) {
 			/* Not found, but might be about to be confirmed */
 			if (PTR_ERR(found) == -EAGAIN) {
-				length++;
 				if (!tuple)
 					continue;
 
@@ -156,7 +209,8 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
 				    nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
 				    nf_ct_zone_id(zone, zone->dir))
 					*addit = false;
-			}
+			} else if (PTR_ERR(found) == -ENOENT)
+				collect++;
 			continue;
 		}
 
@@ -165,9 +219,10 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
 		if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
 		    nf_ct_zone_equal(found_ct, zone, zone->dir)) {
 			/*
-			 * Just to be sure we have it only once in the list.
 			 * We should not see tuples twice unless someone hooks
 			 * this into a table without "-p tcp --syn".
+			 *
+			 * Attempt to avoid a re-add in this case.
 			 */
 			*addit = false;
 		} else if (already_closed(found_ct)) {
@@ -176,19 +231,75 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
 			 * closed already -> ditch it
 			 */
 			nf_ct_put(found_ct);
-			hlist_del(&conn->node);
-			kmem_cache_free(conncount_conn_cachep, conn);
+			conn_free(list, conn);
+			collect++;
 			continue;
 		}
 
 		nf_ct_put(found_ct);
-		length++;
 	}
-
-	return length;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_lookup);
 
+void nf_conncount_list_init(struct nf_conncount_list *list)
+{
+	spin_lock_init(&list->list_lock);
+	INIT_LIST_HEAD(&list->head);
+	list->count = 1;
+	list->dead = false;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_list_init);
+
+/* Return true if the list is empty */
+bool nf_conncount_gc_list(struct net *net,
+			  struct nf_conncount_list *list)
+{
+	const struct nf_conntrack_tuple_hash *found;
+	struct nf_conncount_tuple *conn, *conn_n;
+	struct nf_conn *found_ct;
+	unsigned int collected = 0;
+	bool free_entry = false;
+
+	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+		found = find_or_evict(net, list, conn, &free_entry);
+		if (IS_ERR(found)) {
+			if (PTR_ERR(found) == -ENOENT)  {
+				if (free_entry)
+					return true;
+				collected++;
+			}
+			continue;
+		}
+
+		found_ct = nf_ct_tuplehash_to_ctrack(found);
+		if (already_closed(found_ct)) {
+			/*
+			 * we do not care about connections which are
+			 * closed already -> ditch it
+			 */
+			nf_ct_put(found_ct);
+			if (conn_free(list, conn))
+				return true;
+			collected++;
+			continue;
+		}
+
+		nf_ct_put(found_ct);
+		if (collected > CONNCOUNT_GC_MAX_NODES)
+			return false;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
+
+static void __tree_nodes_free(struct rcu_head *h)
+{
+	struct nf_conncount_rb *rbconn;
+
+	rbconn = container_of(h, struct nf_conncount_rb, rcu_head);
+	kmem_cache_free(conncount_rb_cachep, rbconn);
+}
+
 static void tree_nodes_free(struct rb_root *root,
 			    struct nf_conncount_rb *gc_nodes[],
 			    unsigned int gc_count)
@@ -197,32 +308,46 @@ static void tree_nodes_free(struct rb_root *root,
 
 	while (gc_count) {
 		rbconn = gc_nodes[--gc_count];
-		rb_erase(&rbconn->node, root);
-		kmem_cache_free(conncount_rb_cachep, rbconn);
+		spin_lock(&rbconn->list.list_lock);
+		if (rbconn->list.count == 0 && rbconn->list.dead == false) {
+			rbconn->list.dead = true;
+			rb_erase(&rbconn->node, root);
+			call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+		}
+		spin_unlock(&rbconn->list.list_lock);
 	}
 }
 
+static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
+{
+	set_bit(tree, data->pending_trees);
+	schedule_work(&data->gc_work);
+}
+
 static unsigned int
-count_tree(struct net *net, struct rb_root *root,
-	   const u32 *key, u8 keylen,
-	   const struct nf_conntrack_tuple *tuple,
-	   const struct nf_conntrack_zone *zone)
+insert_tree(struct net *net,
+	    struct nf_conncount_data *data,
+	    struct rb_root *root,
+	    unsigned int hash,
+	    const u32 *key,
+	    u8 keylen,
+	    const struct nf_conntrack_tuple *tuple,
+	    const struct nf_conntrack_zone *zone)
 {
+	enum nf_conncount_list_add ret;
 	struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
 	struct rb_node **rbnode, *parent;
 	struct nf_conncount_rb *rbconn;
 	struct nf_conncount_tuple *conn;
-	unsigned int gc_count;
-	bool no_gc = false;
+	unsigned int count = 0, gc_count = 0;
+	bool node_found = false;
+
+	spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
 
- restart:
-	gc_count = 0;
 	parent = NULL;
 	rbnode = &(root->rb_node);
 	while (*rbnode) {
 		int diff;
-		bool addit;
-
 		rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
 
 		parent = *rbnode;
@@ -232,33 +357,30 @@ count_tree(struct net *net, struct rb_root *root,
 		} else if (diff > 0) {
 			rbnode = &((*rbnode)->rb_right);
 		} else {
-			/* same source network -> be counted! */
-			unsigned int count;
-
-			count = nf_conncount_lookup(net, &rbconn->hhead, tuple,
-						    zone, &addit);
-
-			tree_nodes_free(root, gc_nodes, gc_count);
-			if (!addit)
-				return count;
-
-			if (!nf_conncount_add(&rbconn->hhead, tuple, zone))
-				return 0; /* hotdrop */
-
-			return count + 1;
+			/* unlikely: other cpu added node already */
+			node_found = true;
+			ret = nf_conncount_add(&rbconn->list, tuple, zone);
+			if (ret == NF_CONNCOUNT_ERR) {
+				count = 0; /* hotdrop */
+			} else if (ret == NF_CONNCOUNT_ADDED) {
+				count = rbconn->list.count;
+			} else {
+				/* NF_CONNCOUNT_SKIP, rbconn is already
+				 * reclaimed by gc, insert a new tree node
+				 */
+				node_found = false;
+			}
+			break;
 		}
 
-		if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
+		if (gc_count >= ARRAY_SIZE(gc_nodes))
 			continue;
 
-		/* only used for GC on hhead, retval and 'addit' ignored */
-		nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit);
-		if (hlist_empty(&rbconn->hhead))
+		if (nf_conncount_gc_list(net, &rbconn->list))
 			gc_nodes[gc_count++] = rbconn;
 	}
 
 	if (gc_count) {
-		no_gc = true;
 		tree_nodes_free(root, gc_nodes, gc_count);
 		/* tree_node_free before new allocation permits
 		 * allocator to re-use newly free'd object.
@@ -266,58 +388,146 @@ count_tree(struct net *net, struct rb_root *root,
 		 * This is a rare event; in most cases we will find
 		 * existing node to re-use. (or gc_count is 0).
 		 */
-		goto restart;
+
+		if (gc_count >= ARRAY_SIZE(gc_nodes))
+			schedule_gc_worker(data, hash);
 	}
 
-	if (!tuple)
-		return 0;
+	if (node_found)
+		goto out_unlock;
 
-	/* no match, need to insert new node */
+	/* expected case: match, insert new node */
 	rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
 	if (rbconn == NULL)
-		return 0;
+		goto out_unlock;
 
 	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
 	if (conn == NULL) {
 		kmem_cache_free(conncount_rb_cachep, rbconn);
-		return 0;
+		goto out_unlock;
 	}
 
 	conn->tuple = *tuple;
 	conn->zone = *zone;
 	memcpy(rbconn->key, key, sizeof(u32) * keylen);
 
-	INIT_HLIST_HEAD(&rbconn->hhead);
-	hlist_add_head(&conn->node, &rbconn->hhead);
+	nf_conncount_list_init(&rbconn->list);
+	list_add(&conn->node, &rbconn->list.head);
+	count = 1;
 
 	rb_link_node(&rbconn->node, parent, rbnode);
 	rb_insert_color(&rbconn->node, root);
-	return 1;
+out_unlock:
+	spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+	return count;
 }
 
-/* Count and return number of conntrack entries in 'net' with particular 'key'.
- * If 'tuple' is not null, insert it into the accounting data structure.
- */
-unsigned int nf_conncount_count(struct net *net,
-				struct nf_conncount_data *data,
-				const u32 *key,
-				const struct nf_conntrack_tuple *tuple,
-				const struct nf_conntrack_zone *zone)
+static unsigned int
+count_tree(struct net *net,
+	   struct nf_conncount_data *data,
+	   const u32 *key,
+	   const struct nf_conntrack_tuple *tuple,
+	   const struct nf_conntrack_zone *zone)
 {
+	enum nf_conncount_list_add ret;
 	struct rb_root *root;
-	int count;
-	u32 hash;
+	struct rb_node *parent;
+	struct nf_conncount_rb *rbconn;
+	unsigned int hash;
+	u8 keylen = data->keylen;
 
 	hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
 	root = &data->root[hash];
 
-	spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+	parent = rcu_dereference_raw(root->rb_node);
+	while (parent) {
+		int diff;
+		bool addit;
 
-	count = count_tree(net, root, key, data->keylen, tuple, zone);
+		rbconn = rb_entry(parent, struct nf_conncount_rb, node);
 
-	spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+		diff = key_diff(key, rbconn->key, keylen);
+		if (diff < 0) {
+			parent = rcu_dereference_raw(parent->rb_left);
+		} else if (diff > 0) {
+			parent = rcu_dereference_raw(parent->rb_right);
+		} else {
+			/* same source network -> be counted! */
+			nf_conncount_lookup(net, &rbconn->list, tuple, zone,
+					    &addit);
 
-	return count;
+			if (!addit)
+				return rbconn->list.count;
+
+			ret = nf_conncount_add(&rbconn->list, tuple, zone);
+			if (ret == NF_CONNCOUNT_ERR) {
+				return 0; /* hotdrop */
+			} else if (ret == NF_CONNCOUNT_ADDED) {
+				return rbconn->list.count;
+			} else {
+				/* NF_CONNCOUNT_SKIP, rbconn is already
+				 * reclaimed by gc, insert a new tree node
+				 */
+				break;
+			}
+		}
+	}
+
+	if (!tuple)
+		return 0;
+
+	return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
+}
+
+static void tree_gc_worker(struct work_struct *work)
+{
+	struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work);
+	struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn;
+	struct rb_root *root;
+	struct rb_node *node;
+	unsigned int tree, next_tree, gc_count = 0;
+
+	tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
+	root = &data->root[tree];
+
+	rcu_read_lock();
+	for (node = rb_first(root); node != NULL; node = rb_next(node)) {
+		rbconn = rb_entry(node, struct nf_conncount_rb, node);
+		if (nf_conncount_gc_list(data->net, &rbconn->list))
+			gc_nodes[gc_count++] = rbconn;
+	}
+	rcu_read_unlock();
+
+	spin_lock_bh(&nf_conncount_locks[tree]);
+
+	if (gc_count) {
+		tree_nodes_free(root, gc_nodes, gc_count);
+	}
+
+	clear_bit(tree, data->pending_trees);
+
+	next_tree = (tree + 1) % CONNCOUNT_SLOTS;
+	next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
+
+	if (next_tree < CONNCOUNT_SLOTS) {
+		data->gc_tree = next_tree;
+		schedule_work(work);
+	}
+
+	spin_unlock_bh(&nf_conncount_locks[tree]);
+}
+
+/* Count and return number of conntrack entries in 'net' with particular 'key'.
+ * If 'tuple' is not null, insert it into the accounting data structure.
+ * Call with RCU read lock.
+ */
+unsigned int nf_conncount_count(struct net *net,
+				struct nf_conncount_data *data,
+				const u32 *key,
+				const struct nf_conntrack_tuple *tuple,
+				const struct nf_conntrack_zone *zone)
+{
+	return count_tree(net, data, key, tuple, zone);
 }
 EXPORT_SYMBOL_GPL(nf_conncount_count);
 
@@ -348,17 +558,18 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
 		data->root[i] = RB_ROOT;
 
 	data->keylen = keylen / sizeof(u32);
+	data->net = net;
+	INIT_WORK(&data->gc_work, tree_gc_worker);
 
 	return data;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_init);
 
-void nf_conncount_cache_free(struct hlist_head *hhead)
+void nf_conncount_cache_free(struct nf_conncount_list *list)
 {
-	struct nf_conncount_tuple *conn;
-	struct hlist_node *n;
+	struct nf_conncount_tuple *conn, *conn_n;
 
-	hlist_for_each_entry_safe(conn, n, hhead, node)
+	list_for_each_entry_safe(conn, conn_n, &list->head, node)
 		kmem_cache_free(conncount_conn_cachep, conn);
 }
 EXPORT_SYMBOL_GPL(nf_conncount_cache_free);
@@ -373,7 +584,7 @@ static void destroy_tree(struct rb_root *r)
 
 		rb_erase(node, r);
 
-		nf_conncount_cache_free(&rbconn->hhead);
+		nf_conncount_cache_free(&rbconn->list);
 
 		kmem_cache_free(conncount_rb_cachep, rbconn);
 	}
@@ -384,6 +595,7 @@ void nf_conncount_destroy(struct net *net, unsigned int family,
 {
 	unsigned int i;
 
+	cancel_work_sync(&data->gc_work);
 	nf_ct_netns_put(net, family);
 
 	for (i = 0; i < ARRAY_SIZE(data->root); ++i)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 805500197c22..8a113ca1eea2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -37,7 +37,6 @@
 #include <linux/rculist_nulls.h>
 
 #include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -55,6 +54,7 @@
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netns/hash.h>
+#include <net/ip.h>
 
 #include "nf_internals.h"
 
@@ -222,7 +222,7 @@ static u32 hash_conntrack(const struct net *net,
 	return scale_hash(hash_conntrack_raw(tuple, net));
 }
 
-bool
+static bool
 nf_ct_get_tuple(const struct sk_buff *skb,
 		unsigned int nhoff,
 		unsigned int dataoff,
@@ -230,37 +230,151 @@ nf_ct_get_tuple(const struct sk_buff *skb,
 		u_int8_t protonum,
 		struct net *net,
 		struct nf_conntrack_tuple *tuple,
-		const struct nf_conntrack_l3proto *l3proto,
 		const struct nf_conntrack_l4proto *l4proto)
 {
+	unsigned int size;
+	const __be32 *ap;
+	__be32 _addrs[8];
+	struct {
+		__be16 sport;
+		__be16 dport;
+	} _inet_hdr, *inet_hdr;
+
 	memset(tuple, 0, sizeof(*tuple));
 
 	tuple->src.l3num = l3num;
-	if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
+	switch (l3num) {
+	case NFPROTO_IPV4:
+		nhoff += offsetof(struct iphdr, saddr);
+		size = 2 * sizeof(__be32);
+		break;
+	case NFPROTO_IPV6:
+		nhoff += offsetof(struct ipv6hdr, saddr);
+		size = sizeof(_addrs);
+		break;
+	default:
+		return true;
+	}
+
+	ap = skb_header_pointer(skb, nhoff, size, _addrs);
+	if (!ap)
 		return false;
 
+	switch (l3num) {
+	case NFPROTO_IPV4:
+		tuple->src.u3.ip = ap[0];
+		tuple->dst.u3.ip = ap[1];
+		break;
+	case NFPROTO_IPV6:
+		memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
+		memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
+		break;
+	}
+
 	tuple->dst.protonum = protonum;
 	tuple->dst.dir = IP_CT_DIR_ORIGINAL;
 
-	return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
+	if (unlikely(l4proto->pkt_to_tuple))
+		return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
+
+	/* Actually only need first 4 bytes to get ports. */
+	inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
+	if (!inet_hdr)
+		return false;
+
+	tuple->src.u.udp.port = inet_hdr->sport;
+	tuple->dst.u.udp.port = inet_hdr->dport;
+	return true;
+}
+
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+			    u_int8_t *protonum)
+{
+	int dataoff = -1;
+	const struct iphdr *iph;
+	struct iphdr _iph;
+
+	iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+	if (!iph)
+		return -1;
+
+	/* Conntrack defragments packets, we might still see fragments
+	 * inside ICMP packets though.
+	 */
+	if (iph->frag_off & htons(IP_OFFSET))
+		return -1;
+
+	dataoff = nhoff + (iph->ihl << 2);
+	*protonum = iph->protocol;
+
+	/* Check bogus IP headers */
+	if (dataoff > skb->len) {
+		pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
+			 nhoff, iph->ihl << 2, skb->len);
+		return -1;
+	}
+	return dataoff;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+			    u8 *protonum)
+{
+	int protoff = -1;
+	unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
+	__be16 frag_off;
+	u8 nexthdr;
+
+	if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
+			  &nexthdr, sizeof(nexthdr)) != 0) {
+		pr_debug("can't get nexthdr\n");
+		return -1;
+	}
+	protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
+	/*
+	 * (protoff == skb->len) means the packet has not data, just
+	 * IPv6 and possibly extensions headers, but it is tracked anyway
+	 */
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("can't find proto in pkt\n");
+		return -1;
+	}
+
+	*protonum = nexthdr;
+	return protoff;
+}
+#endif
+
+static int get_l4proto(const struct sk_buff *skb,
+		       unsigned int nhoff, u8 pf, u8 *l4num)
+{
+	switch (pf) {
+	case NFPROTO_IPV4:
+		return ipv4_get_l4proto(skb, nhoff, l4num);
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		return ipv6_get_l4proto(skb, nhoff, l4num);
+#endif
+	default:
+		*l4num = 0;
+		break;
+	}
+	return -1;
 }
-EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
 
 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 		       u_int16_t l3num,
 		       struct net *net, struct nf_conntrack_tuple *tuple)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
-	unsigned int protoff;
-	u_int8_t protonum;
+	u8 protonum;
+	int protoff;
 	int ret;
 
 	rcu_read_lock();
 
-	l3proto = __nf_ct_l3proto_find(l3num);
-	ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
-	if (ret != NF_ACCEPT) {
+	protoff = get_l4proto(skb, nhoff, l3num, &protonum);
+	if (protoff <= 0) {
 		rcu_read_unlock();
 		return false;
 	}
@@ -268,7 +382,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 	l4proto = __nf_ct_l4proto_find(l3num, protonum);
 
 	ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
-			      l3proto, l4proto);
+			      l4proto);
 
 	rcu_read_unlock();
 	return ret;
@@ -278,19 +392,35 @@ EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
 bool
 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 		   const struct nf_conntrack_tuple *orig,
-		   const struct nf_conntrack_l3proto *l3proto,
 		   const struct nf_conntrack_l4proto *l4proto)
 {
 	memset(inverse, 0, sizeof(*inverse));
 
 	inverse->src.l3num = orig->src.l3num;
-	if (l3proto->invert_tuple(inverse, orig) == 0)
-		return false;
+
+	switch (orig->src.l3num) {
+	case NFPROTO_IPV4:
+		inverse->src.u3.ip = orig->dst.u3.ip;
+		inverse->dst.u3.ip = orig->src.u3.ip;
+		break;
+	case NFPROTO_IPV6:
+		inverse->src.u3.in6 = orig->dst.u3.in6;
+		inverse->dst.u3.in6 = orig->src.u3.in6;
+		break;
+	default:
+		break;
+	}
 
 	inverse->dst.dir = !orig->dst.dir;
 
 	inverse->dst.protonum = orig->dst.protonum;
-	return l4proto->invert_tuple(inverse, orig);
+
+	if (unlikely(l4proto->invert_tuple))
+		return l4proto->invert_tuple(inverse, orig);
+
+	inverse->src.u.all = orig->dst.u.all;
+	inverse->dst.u.all = orig->src.u.all;
+	return true;
 }
 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
 
@@ -502,6 +632,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
 	       net_eq(net, nf_ct_net(ct));
 }
 
+static inline bool
+nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
+{
+	return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				 &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) &&
+	       nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple,
+				 &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) &&
+	       nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) &&
+	       nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) &&
+	       net_eq(nf_ct_net(ct1), nf_ct_net(ct2));
+}
+
 /* caller must hold rcu readlock and none of the nf_conntrack_locks */
 static void nf_ct_gc_expired(struct nf_conn *ct)
 {
@@ -695,19 +837,21 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
 	/* This is the conntrack entry already in hashes that won race. */
 	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
 	const struct nf_conntrack_l4proto *l4proto;
+	enum ip_conntrack_info oldinfo;
+	struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
 
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	if (l4proto->allow_clash &&
-	    ((ct->status & IPS_NAT_DONE_MASK) == 0) &&
 	    !nf_ct_is_dying(ct) &&
 	    atomic_inc_not_zero(&ct->ct_general.use)) {
-		enum ip_conntrack_info oldinfo;
-		struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
-
-		nf_ct_acct_merge(ct, ctinfo, loser_ct);
-		nf_conntrack_put(&loser_ct->ct_general);
-		nf_ct_set(skb, ct, oldinfo);
-		return NF_ACCEPT;
+		if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
+		    nf_ct_match(ct, loser_ct)) {
+			nf_ct_acct_merge(ct, ctinfo, loser_ct);
+			nf_conntrack_put(&loser_ct->ct_general);
+			nf_ct_set(skb, ct, oldinfo);
+			return NF_ACCEPT;
+		}
+		nf_ct_put(ct);
 	}
 	NF_CT_STAT_INC(net, drop);
 	return NF_DROP;
@@ -1195,7 +1339,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 static noinline struct nf_conntrack_tuple_hash *
 init_conntrack(struct net *net, struct nf_conn *tmpl,
 	       const struct nf_conntrack_tuple *tuple,
-	       const struct nf_conntrack_l3proto *l3proto,
 	       const struct nf_conntrack_l4proto *l4proto,
 	       struct sk_buff *skb,
 	       unsigned int dataoff, u32 hash)
@@ -1208,9 +1351,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	const struct nf_conntrack_zone *zone;
 	struct nf_conn_timeout *timeout_ext;
 	struct nf_conntrack_zone tmp;
-	unsigned int *timeouts;
 
-	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
+	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
 		pr_debug("Can't invert tuple.\n");
 		return NULL;
 	}
@@ -1227,15 +1369,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	}
 
 	timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
-	if (timeout_ext) {
-		timeouts = nf_ct_timeout_data(timeout_ext);
-		if (unlikely(!timeouts))
-			timeouts = l4proto->get_timeouts(net);
-	} else {
-		timeouts = l4proto->get_timeouts(net);
-	}
 
-	if (!l4proto->new(ct, skb, dataoff, timeouts)) {
+	if (!l4proto->new(ct, skb, dataoff)) {
 		nf_conntrack_free(ct);
 		pr_debug("can't track with proto module\n");
 		return NULL;
@@ -1266,8 +1401,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 			/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
 			ct->master = exp->master;
 			if (exp->helper) {
-				help = nf_ct_helper_ext_add(ct, exp->helper,
-							    GFP_ATOMIC);
+				help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
 				if (help)
 					rcu_assign_pointer(help->helper, exp->helper);
 			}
@@ -1307,7 +1441,6 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 		  unsigned int dataoff,
 		  u_int16_t l3num,
 		  u_int8_t protonum,
-		  const struct nf_conntrack_l3proto *l3proto,
 		  const struct nf_conntrack_l4proto *l4proto)
 {
 	const struct nf_conntrack_zone *zone;
@@ -1319,8 +1452,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	u32 hash;
 
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
-			     dataoff, l3num, protonum, net, &tuple, l3proto,
-			     l4proto)) {
+			     dataoff, l3num, protonum, net, &tuple, l4proto)) {
 		pr_debug("Can't get tuple\n");
 		return 0;
 	}
@@ -1330,7 +1462,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	hash = hash_conntrack_raw(&tuple, net);
 	h = __nf_conntrack_find_get(net, zone, &tuple, hash);
 	if (!h) {
-		h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
+		h = init_conntrack(net, tmpl, &tuple, l4proto,
 				   skb, dataoff, hash);
 		if (!h)
 			return 0;
@@ -1363,14 +1495,11 @@ unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		struct sk_buff *skb)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conn *ct, *tmpl;
 	enum ip_conntrack_info ctinfo;
-	unsigned int *timeouts;
-	unsigned int dataoff;
 	u_int8_t protonum;
-	int ret;
+	int dataoff, ret;
 
 	tmpl = nf_ct_get(skb, &ctinfo);
 	if (tmpl || ctinfo == IP_CT_UNTRACKED) {
@@ -1384,14 +1513,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	}
 
 	/* rcu_read_lock()ed by nf_hook_thresh */
-	l3proto = __nf_ct_l3proto_find(pf);
-	ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
-				   &dataoff, &protonum);
-	if (ret <= 0) {
+	dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
+	if (dataoff <= 0) {
 		pr_debug("not prepared to track yet or error occurred\n");
 		NF_CT_STAT_INC_ATOMIC(net, error);
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
-		ret = -ret;
+		ret = NF_ACCEPT;
 		goto out;
 	}
 
@@ -1413,8 +1540,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 			goto out;
 	}
 repeat:
-	ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
-				l3proto, l4proto);
+	ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
 	if (ret < 0) {
 		/* Too stressed to deal. */
 		NF_CT_STAT_INC_ATOMIC(net, drop);
@@ -1430,10 +1556,7 @@ repeat:
 		goto out;
 	}
 
-	/* Decide what timeout policy we want to apply to this flow. */
-	timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
-
-	ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
+	ret = l4proto->packet(ct, skb, dataoff, ctinfo);
 	if (ret <= 0) {
 		/* Invalid: inverse of the return code tells
 		 * the netfilter core what to do */
@@ -1471,7 +1594,6 @@ bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
 
 	rcu_read_lock();
 	ret = nf_ct_invert_tuple(inverse, orig,
-				 __nf_ct_l3proto_find(orig->src.l3num),
 				 __nf_ct_l4proto_find(orig->src.l3num,
 						      orig->dst.protonum));
 	rcu_read_unlock();
@@ -1609,14 +1731,14 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
 
 static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
 	enum ip_conntrack_info ctinfo;
 	struct nf_nat_hook *nat_hook;
-	unsigned int dataoff, status;
+	unsigned int status;
 	struct nf_conn *ct;
+	int dataoff;
 	u16 l3num;
 	u8 l4num;
 
@@ -1625,16 +1747,15 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 		return 0;
 
 	l3num = nf_ct_l3num(ct);
-	l3proto = nf_ct_l3proto_find_get(l3num);
 
-	if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
-				 &l4num) <= 0)
+	dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
+	if (dataoff <= 0)
 		return -1;
 
 	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
 
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
-			     l4num, net, &tuple, l3proto, l4proto))
+			     l4num, net, &tuple, l4proto))
 		return -1;
 
 	if (ct->status & IPS_SRC_NAT) {
@@ -2089,9 +2210,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 
-module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
-		  &nf_conntrack_htable_size, 0600);
-
 static __always_inline unsigned int total_extension_size(void)
 {
 	/* remember to add new extensions below */
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 853b23206bb7..3f586ba23d92 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -610,7 +610,6 @@ static int exp_seq_show(struct seq_file *s, void *v)
 		   expect->tuple.src.l3num,
 		   expect->tuple.dst.protonum);
 	print_tuple(s, &expect->tuple,
-		    __nf_ct_l3proto_find(expect->tuple.src.l3num),
 		    __nf_ct_l4proto_find(expect->tuple.src.l3num,
 				       expect->tuple.dst.protonum));
 
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index a75b11c39312..d557a425289d 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -24,7 +24,6 @@
 #include <linux/rtnetlink.h>
 
 #include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
@@ -193,8 +192,7 @@ void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
 
 struct nf_conn_help *
-nf_ct_helper_ext_add(struct nf_conn *ct,
-		     struct nf_conntrack_helper *helper, gfp_t gfp)
+nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 {
 	struct nf_conn_help *help;
 
@@ -263,7 +261,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 	}
 
 	if (help == NULL) {
-		help = nf_ct_helper_ext_add(ct, helper, flags);
+		help = nf_ct_helper_ext_add(ct, flags);
 		if (help == NULL)
 			return -ENOMEM;
 	} else {
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
deleted file mode 100644
index 397e6911214f..000000000000
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
- *
- * Based largely upon the original ip_conntrack code which
- * had the following copyright information:
- *
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Author:
- *	Yasuyuki Kozakai @USAGI	<yasuyuki.kozakai@toshiba.co.jp>
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/icmp.h>
-#include <linux/sysctl.h>
-#include <net/ip.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
-
-static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
-				 struct nf_conntrack_tuple *tuple)
-{
-	memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
-	memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
-
-	return true;
-}
-
-static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
-				 const struct nf_conntrack_tuple *orig)
-{
-	memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
-	memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
-
-	return true;
-}
-
-static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
-			       unsigned int *dataoff, u_int8_t *protonum)
-{
-	/* Never track !!! */
-	return -NF_ACCEPT;
-}
-
-
-struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
-	.l3proto	 = PF_UNSPEC,
-	.pkt_to_tuple	 = generic_pkt_to_tuple,
-	.invert_tuple	 = generic_invert_tuple,
-	.get_l4proto	 = generic_get_l4proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 20a2e37c76d1..f981bfa8db72 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -38,7 +38,6 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_acct.h>
@@ -81,9 +80,26 @@ nla_put_failure:
 	return -1;
 }
 
+static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
+	    nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
+	    nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
+		return -EMSGSIZE;
+	return 0;
+}
+
 static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
-				    const struct nf_conntrack_tuple *tuple,
-				    const struct nf_conntrack_l3proto *l3proto)
+				    const struct nf_conntrack_tuple *tuple)
 {
 	int ret = 0;
 	struct nlattr *nest_parms;
@@ -92,8 +108,14 @@ static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
 	if (!nest_parms)
 		goto nla_put_failure;
 
-	if (likely(l3proto->tuple_to_nlattr))
-		ret = l3proto->tuple_to_nlattr(skb, tuple);
+	switch (tuple->src.l3num) {
+	case NFPROTO_IPV4:
+		ret = ipv4_tuple_to_nlattr(skb, tuple);
+		break;
+	case NFPROTO_IPV6:
+		ret = ipv6_tuple_to_nlattr(skb, tuple);
+		break;
+	}
 
 	nla_nest_end(skb, nest_parms);
 
@@ -106,13 +128,11 @@ nla_put_failure:
 static int ctnetlink_dump_tuples(struct sk_buff *skb,
 				 const struct nf_conntrack_tuple *tuple)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	int ret;
 
 	rcu_read_lock();
-	l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
-	ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto);
+	ret = ctnetlink_dump_tuples_ip(skb, tuple);
 
 	if (ret >= 0) {
 		l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
@@ -556,15 +576,20 @@ nla_put_failure:
 	return -1;
 }
 
+static const struct nla_policy cta_ip_nla_policy[CTA_IP_MAX + 1] = {
+	[CTA_IP_V4_SRC]	= { .type = NLA_U32 },
+	[CTA_IP_V4_DST]	= { .type = NLA_U32 },
+	[CTA_IP_V6_SRC]	= { .len = sizeof(__be32) * 4 },
+	[CTA_IP_V6_DST]	= { .len = sizeof(__be32) * 4 },
+};
+
 #if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
 static size_t ctnetlink_proto_size(const struct nf_conn *ct)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	size_t len, len4 = 0;
 
-	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-	len = l3proto->nla_size;
+	len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
 	len *= 3u; /* ORIG, REPLY, MASTER */
 
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -936,29 +961,54 @@ out:
 	return skb->len;
 }
 
+static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
+		return -EINVAL;
+
+	t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
+	t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
+
+	return 0;
+}
+
+static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
+		return -EINVAL;
+
+	t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
+	t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
+
+	return 0;
+}
+
 static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
 				    struct nf_conntrack_tuple *tuple)
 {
 	struct nlattr *tb[CTA_IP_MAX+1];
-	struct nf_conntrack_l3proto *l3proto;
 	int ret = 0;
 
 	ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL);
 	if (ret < 0)
 		return ret;
 
-	rcu_read_lock();
-	l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
+	ret = nla_validate_nested(attr, CTA_IP_MAX,
+				  cta_ip_nla_policy, NULL);
+	if (ret)
+		return ret;
 
-	if (likely(l3proto->nlattr_to_tuple)) {
-		ret = nla_validate_nested(attr, CTA_IP_MAX,
-					  l3proto->nla_policy, NULL);
-		if (ret == 0)
-			ret = l3proto->nlattr_to_tuple(tb, tuple);
+	switch (tuple->src.l3num) {
+	case NFPROTO_IPV4:
+		ret = ipv4_nlattr_to_tuple(tb, tuple);
+		break;
+	case NFPROTO_IPV6:
+		ret = ipv6_nlattr_to_tuple(tb, tuple);
+		break;
 	}
 
-	rcu_read_unlock();
-
 	return ret;
 }
 
@@ -1897,7 +1947,7 @@ ctnetlink_create_conntrack(struct net *net,
 		} else {
 			struct nf_conn_help *help;
 
-			help = nf_ct_helper_ext_add(ct, helper, GFP_ATOMIC);
+			help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
 			if (help == NULL) {
 				err = -ENOMEM;
 				goto err2;
@@ -2581,7 +2631,6 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
 				   const struct nf_conntrack_tuple *tuple,
 				   const struct nf_conntrack_tuple_mask *mask)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple m;
 	struct nlattr *nest_parms;
@@ -2597,8 +2646,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
 		goto nla_put_failure;
 
 	rcu_read_lock();
-	l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
-	ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto);
+	ret = ctnetlink_dump_tuples_ip(skb, &m);
 	if (ret >= 0) {
 		l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
 					       tuple->dst.protonum);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index d88841fbc560..803607a90102 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -1,14 +1,4 @@
-/* L3/L4 protocol support for nf_conntrack. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
- * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/types.h>
 #include <linux/netfilter.h>
@@ -24,14 +14,36 @@
 #include <linux/netdevice.h>
 
 #include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_log.h>
 
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+extern unsigned int nf_conntrack_net_id;
+
 static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
-struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_l3protos);
 
 static DEFINE_MUTEX(nf_ct_proto_mutex);
 
@@ -122,137 +134,6 @@ __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
 }
 EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
 
-/* this is guaranteed to always return a valid protocol helper, since
- * it falls back to generic_protocol */
-const struct nf_conntrack_l3proto *
-nf_ct_l3proto_find_get(u_int16_t l3proto)
-{
-	struct nf_conntrack_l3proto *p;
-
-	rcu_read_lock();
-	p = __nf_ct_l3proto_find(l3proto);
-	if (!try_module_get(p->me))
-		p = &nf_conntrack_l3proto_generic;
-	rcu_read_unlock();
-
-	return p;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
-
-int
-nf_ct_l3proto_try_module_get(unsigned short l3proto)
-{
-	const struct nf_conntrack_l3proto *p;
-	int ret;
-
-retry:	p = nf_ct_l3proto_find_get(l3proto);
-	if (p == &nf_conntrack_l3proto_generic) {
-		ret = request_module("nf_conntrack-%d", l3proto);
-		if (!ret)
-			goto retry;
-
-		return -EPROTOTYPE;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get);
-
-void nf_ct_l3proto_module_put(unsigned short l3proto)
-{
-	struct nf_conntrack_l3proto *p;
-
-	/* rcu_read_lock not necessary since the caller holds a reference, but
-	 * taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find()
-	 */
-	rcu_read_lock();
-	p = __nf_ct_l3proto_find(l3proto);
-	module_put(p->me);
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
-
-static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
-{
-	const struct nf_conntrack_l3proto *l3proto;
-	int ret;
-
-	might_sleep();
-
-	ret = nf_ct_l3proto_try_module_get(nfproto);
-	if (ret < 0)
-		return ret;
-
-	/* we already have a reference, can't fail */
-	rcu_read_lock();
-	l3proto = __nf_ct_l3proto_find(nfproto);
-	rcu_read_unlock();
-
-	if (!l3proto->net_ns_get)
-		return 0;
-
-	ret = l3proto->net_ns_get(net);
-	if (ret < 0)
-		nf_ct_l3proto_module_put(nfproto);
-
-	return ret;
-}
-
-int nf_ct_netns_get(struct net *net, u8 nfproto)
-{
-	int err;
-
-	if (nfproto == NFPROTO_INET) {
-		err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
-		if (err < 0)
-			goto err1;
-		err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
-		if (err < 0)
-			goto err2;
-	} else {
-		err = nf_ct_netns_do_get(net, nfproto);
-		if (err < 0)
-			goto err1;
-	}
-	return 0;
-
-err2:
-	nf_ct_netns_put(net, NFPROTO_IPV4);
-err1:
-	return err;
-}
-EXPORT_SYMBOL_GPL(nf_ct_netns_get);
-
-static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
-{
-	const struct nf_conntrack_l3proto *l3proto;
-
-	might_sleep();
-
-	/* same as nf_conntrack_netns_get(), reference assumed */
-	rcu_read_lock();
-	l3proto = __nf_ct_l3proto_find(nfproto);
-	rcu_read_unlock();
-
-	if (WARN_ON(!l3proto))
-		return;
-
-	if (l3proto->net_ns_put)
-		l3proto->net_ns_put(net);
-
-	nf_ct_l3proto_module_put(nfproto);
-}
-
-void nf_ct_netns_put(struct net *net, uint8_t nfproto)
-{
-	if (nfproto == NFPROTO_INET) {
-		nf_ct_netns_do_put(net, NFPROTO_IPV4);
-		nf_ct_netns_do_put(net, NFPROTO_IPV6);
-	} else
-		nf_ct_netns_do_put(net, nfproto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_netns_put);
-
 const struct nf_conntrack_l4proto *
 nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
 {
@@ -274,11 +155,6 @@ void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
 
-static int kill_l3proto(struct nf_conn *i, void *data)
-{
-	return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
-}
-
 static int kill_l4proto(struct nf_conn *i, void *data)
 {
 	const struct nf_conntrack_l4proto *l4proto;
@@ -287,52 +163,6 @@ static int kill_l4proto(struct nf_conn *i, void *data)
 	       nf_ct_l3num(i) == l4proto->l3proto;
 }
 
-int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
-{
-	int ret = 0;
-	struct nf_conntrack_l3proto *old;
-
-	if (proto->l3proto >= NFPROTO_NUMPROTO)
-		return -EBUSY;
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	if (proto->tuple_to_nlattr && proto->nla_size == 0)
-		return -EINVAL;
-#endif
-	mutex_lock(&nf_ct_proto_mutex);
-	old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
-					lockdep_is_held(&nf_ct_proto_mutex));
-	if (old != &nf_conntrack_l3proto_generic) {
-		ret = -EBUSY;
-		goto out_unlock;
-	}
-
-	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
-
-out_unlock:
-	mutex_unlock(&nf_ct_proto_mutex);
-	return ret;
-
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
-
-void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
-{
-	BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
-
-	mutex_lock(&nf_ct_proto_mutex);
-	BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
-					 lockdep_is_held(&nf_ct_proto_mutex)
-					 ) != proto);
-	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
-			   &nf_conntrack_l3proto_generic);
-	mutex_unlock(&nf_ct_proto_mutex);
-
-	synchronize_rcu();
-	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
-
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
 				const struct nf_conntrack_l4proto *l4proto)
 {
@@ -499,8 +329,23 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
 
-int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
-			   unsigned int num_proto)
+static void
+nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
+			 unsigned int num_proto)
+{
+	mutex_lock(&nf_ct_proto_mutex);
+	while (num_proto-- != 0)
+		__nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+	mutex_unlock(&nf_ct_proto_mutex);
+
+	synchronize_net();
+	/* Remove all contrack entries for this protocol */
+	nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
+}
+
+static int
+nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
+		       unsigned int num_proto)
 {
 	int ret = -EINVAL, ver;
 	unsigned int i;
@@ -518,7 +363,6 @@ int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
 	}
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
 
 int nf_ct_l4proto_pernet_register(struct net *net,
 				  const struct nf_conntrack_l4proto *const l4proto[],
@@ -542,20 +386,6 @@ int nf_ct_l4proto_pernet_register(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
 
-void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
-			      unsigned int num_proto)
-{
-	mutex_lock(&nf_ct_proto_mutex);
-	while (num_proto-- != 0)
-		__nf_ct_l4proto_unregister_one(l4proto[num_proto]);
-	mutex_unlock(&nf_ct_proto_mutex);
-
-	synchronize_net();
-	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
-
 void nf_ct_l4proto_pernet_unregister(struct net *net,
 				const struct nf_conntrack_l4proto *const l4proto[],
 				unsigned int num_proto)
@@ -565,6 +395,563 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
 
+static unsigned int ipv4_helper(void *priv,
+				struct sk_buff *skb,
+				const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn_help *help;
+	const struct nf_conntrack_helper *helper;
+
+	/* This is where we call the helper: as the packet goes out. */
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		return NF_ACCEPT;
+
+	help = nfct_help(ct);
+	if (!help)
+		return NF_ACCEPT;
+
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	helper = rcu_dereference(help->helper);
+	if (!helper)
+		return NF_ACCEPT;
+
+	return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+			    ct, ctinfo);
+}
+
+static unsigned int ipv4_confirm(void *priv,
+				 struct sk_buff *skb,
+				 const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		goto out;
+
+	/* adjust seqs for loopback traffic only in outgoing direction */
+	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+	    !nf_is_loopback_packet(skb)) {
+		if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
+			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+			return NF_DROP;
+		}
+	}
+out:
+	/* We've seen it coming out the other side: confirm it */
+	return nf_conntrack_confirm(skb);
+}
+
+static unsigned int ipv4_conntrack_in(void *priv,
+				      struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+}
+
+static unsigned int ipv4_conntrack_local(void *priv,
+					 struct sk_buff *skb,
+					 const struct nf_hook_state *state)
+{
+	if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *tmpl;
+
+		tmpl = nf_ct_get(skb, &ctinfo);
+		if (tmpl && nf_ct_is_template(tmpl)) {
+			/* when skipping ct, clear templates to avoid fooling
+			 * later targets/matches
+			 */
+			skb->_nfct = 0;
+			nf_ct_put(tmpl);
+		}
+		return NF_ACCEPT;
+	}
+
+	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+ * make it the first hook.
+ */
+static const struct nf_hook_ops ipv4_conntrack_ops[] = {
+	{
+		.hook		= ipv4_conntrack_in,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv4_conntrack_local,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv4_helper,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
+	},
+	{
+		.hook		= ipv4_confirm,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
+	},
+	{
+		.hook		= ipv4_helper,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
+	},
+	{
+		.hook		= ipv4_confirm,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
+	},
+};
+
+/* Fast function for those who don't want to parse /proc (and I don't
+ * blame them).
+ * Reversing the socket's dst/src point of view gives us the reply
+ * mapping.
+ */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+
+	memset(&tuple, 0, sizeof(tuple));
+
+	lock_sock(sk);
+	tuple.src.u3.ip = inet->inet_rcv_saddr;
+	tuple.src.u.tcp.port = inet->inet_sport;
+	tuple.dst.u3.ip = inet->inet_daddr;
+	tuple.dst.u.tcp.port = inet->inet_dport;
+	tuple.src.l3num = PF_INET;
+	tuple.dst.protonum = sk->sk_protocol;
+	release_sock(sk);
+
+	/* We only do TCP and SCTP at the moment: is there a better way? */
+	if (tuple.dst.protonum != IPPROTO_TCP &&
+	    tuple.dst.protonum != IPPROTO_SCTP) {
+		pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
+		return -ENOPROTOOPT;
+	}
+
+	if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
+		pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
+			 *len, sizeof(struct sockaddr_in));
+		return -EINVAL;
+	}
+
+	h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
+	if (h) {
+		struct sockaddr_in sin;
+		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+		sin.sin_family = AF_INET;
+		sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+			.tuple.dst.u.tcp.port;
+		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+			.tuple.dst.u3.ip;
+		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+
+		pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
+			 &sin.sin_addr.s_addr, ntohs(sin.sin_port));
+		nf_ct_put(ct);
+		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+			return -EFAULT;
+		else
+			return 0;
+	}
+	pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
+		 &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
+		 &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
+	return -ENOENT;
+}
+
+static struct nf_sockopt_ops so_getorigdst = {
+	.pf		= PF_INET,
+	.get_optmin	= SO_ORIGINAL_DST,
+	.get_optmax	= SO_ORIGINAL_DST + 1,
+	.get		= getorigdst,
+	.owner		= THIS_MODULE,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int
+ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+	struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
+	const struct ipv6_pinfo *inet6 = inet6_sk(sk);
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct nf_conntrack_tuple_hash *h;
+	struct sockaddr_in6 sin6;
+	struct nf_conn *ct;
+	__be32 flow_label;
+	int bound_dev_if;
+
+	lock_sock(sk);
+	tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
+	tuple.src.u.tcp.port = inet->inet_sport;
+	tuple.dst.u3.in6 = sk->sk_v6_daddr;
+	tuple.dst.u.tcp.port = inet->inet_dport;
+	tuple.dst.protonum = sk->sk_protocol;
+	bound_dev_if = sk->sk_bound_dev_if;
+	flow_label = inet6->flow_label;
+	release_sock(sk);
+
+	if (tuple.dst.protonum != IPPROTO_TCP &&
+	    tuple.dst.protonum != IPPROTO_SCTP)
+		return -ENOPROTOOPT;
+
+	if (*len < 0 || (unsigned int)*len < sizeof(sin6))
+		return -EINVAL;
+
+	h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
+	if (!h) {
+		pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
+			 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
+			 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+		return -ENOENT;
+	}
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+	sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
+	memcpy(&sin6.sin6_addr,
+	       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
+	       sizeof(sin6.sin6_addr));
+
+	nf_ct_put(ct);
+	sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
+	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
+}
+
+static struct nf_sockopt_ops so_getorigdst6 = {
+	.pf		= NFPROTO_IPV6,
+	.get_optmin	= IP6T_SO_ORIGINAL_DST,
+	.get_optmax	= IP6T_SO_ORIGINAL_DST + 1,
+	.get		= ipv6_getorigdst,
+	.owner		= THIS_MODULE,
+};
+
+static unsigned int ipv6_confirm(void *priv,
+				 struct sk_buff *skb,
+				 const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+	int protoff;
+	__be16 frag_off;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		goto out;
+
+	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+				   &frag_off);
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("proto header not found\n");
+		goto out;
+	}
+
+	/* adjust seqs for loopback traffic only in outgoing direction */
+	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+	    !nf_is_loopback_packet(skb)) {
+		if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
+			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+			return NF_DROP;
+		}
+	}
+out:
+	/* We've seen it coming out the other side: confirm it */
+	return nf_conntrack_confirm(skb);
+}
+
+static unsigned int ipv6_conntrack_in(void *priv,
+				      struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+}
+
+static unsigned int ipv6_conntrack_local(void *priv,
+					 struct sk_buff *skb,
+					 const struct nf_hook_state *state)
+{
+	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+}
+
+static unsigned int ipv6_helper(void *priv,
+				struct sk_buff *skb,
+				const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	const struct nf_conn_help *help;
+	const struct nf_conntrack_helper *helper;
+	enum ip_conntrack_info ctinfo;
+	__be16 frag_off;
+	int protoff;
+	u8 nexthdr;
+
+	/* This is where we call the helper: as the packet goes out. */
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		return NF_ACCEPT;
+
+	help = nfct_help(ct);
+	if (!help)
+		return NF_ACCEPT;
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	helper = rcu_dereference(help->helper);
+	if (!helper)
+		return NF_ACCEPT;
+
+	nexthdr = ipv6_hdr(skb)->nexthdr;
+	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+				   &frag_off);
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("proto header not found\n");
+		return NF_ACCEPT;
+	}
+
+	return helper->help(skb, protoff, ct, ctinfo);
+}
+
+static const struct nf_hook_ops ipv6_conntrack_ops[] = {
+	{
+		.hook		= ipv6_conntrack_in,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP6_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv6_conntrack_local,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv6_helper,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
+	},
+	{
+		.hook		= ipv6_confirm,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP6_PRI_LAST,
+	},
+	{
+		.hook		= ipv6_helper,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
+	},
+	{
+		.hook		= ipv6_confirm,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_LAST - 1,
+	},
+};
+#endif
+
+static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
+{
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	int err = 0;
+
+	mutex_lock(&nf_ct_proto_mutex);
+
+	switch (nfproto) {
+	case NFPROTO_IPV4:
+		cnet->users4++;
+		if (cnet->users4 > 1)
+			goto out_unlock;
+		err = nf_defrag_ipv4_enable(net);
+		if (err) {
+			cnet->users4 = 0;
+			goto out_unlock;
+		}
+
+		err = nf_register_net_hooks(net, ipv4_conntrack_ops,
+					    ARRAY_SIZE(ipv4_conntrack_ops));
+		if (err)
+			cnet->users4 = 0;
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		cnet->users6++;
+		if (cnet->users6 > 1)
+			goto out_unlock;
+		err = nf_defrag_ipv6_enable(net);
+		if (err < 0) {
+			cnet->users6 = 0;
+			goto out_unlock;
+		}
+
+		err = nf_register_net_hooks(net, ipv6_conntrack_ops,
+					    ARRAY_SIZE(ipv6_conntrack_ops));
+		if (err)
+			cnet->users6 = 0;
+		break;
+#endif
+	default:
+		err = -EPROTO;
+		break;
+	}
+ out_unlock:
+	mutex_unlock(&nf_ct_proto_mutex);
+	return err;
+}
+
+static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
+{
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
+	mutex_lock(&nf_ct_proto_mutex);
+	switch (nfproto) {
+	case NFPROTO_IPV4:
+		if (cnet->users4 && (--cnet->users4 == 0))
+			nf_unregister_net_hooks(net, ipv4_conntrack_ops,
+						ARRAY_SIZE(ipv4_conntrack_ops));
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		if (cnet->users6 && (--cnet->users6 == 0))
+			nf_unregister_net_hooks(net, ipv6_conntrack_ops,
+						ARRAY_SIZE(ipv6_conntrack_ops));
+		break;
+#endif
+	}
+
+	mutex_unlock(&nf_ct_proto_mutex);
+}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+	int err;
+
+	if (nfproto == NFPROTO_INET) {
+		err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+		if (err < 0)
+			goto err1;
+		err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+		if (err < 0)
+			goto err2;
+	} else {
+		err = nf_ct_netns_do_get(net, nfproto);
+		if (err < 0)
+			goto err1;
+	}
+	return 0;
+
+err2:
+	nf_ct_netns_put(net, NFPROTO_IPV4);
+err1:
+	return err;
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_get);
+
+void nf_ct_netns_put(struct net *net, uint8_t nfproto)
+{
+	if (nfproto == NFPROTO_INET) {
+		nf_ct_netns_do_put(net, NFPROTO_IPV4);
+		nf_ct_netns_do_put(net, NFPROTO_IPV6);
+	} else {
+		nf_ct_netns_do_put(net, nfproto);
+	}
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_put);
+
+static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
+	&nf_conntrack_l4proto_tcp4,
+	&nf_conntrack_l4proto_udp4,
+	&nf_conntrack_l4proto_icmp,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	&nf_conntrack_l4proto_dccp4,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	&nf_conntrack_l4proto_sctp4,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	&nf_conntrack_l4proto_udplite4,
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+	&nf_conntrack_l4proto_tcp6,
+	&nf_conntrack_l4proto_udp6,
+	&nf_conntrack_l4proto_icmpv6,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	&nf_conntrack_l4proto_dccp6,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	&nf_conntrack_l4proto_sctp6,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	&nf_conntrack_l4proto_udplite6,
+#endif
+#endif /* CONFIG_IPV6 */
+};
+
+int nf_conntrack_proto_init(void)
+{
+	int ret = 0;
+
+	ret = nf_register_sockopt(&so_getorigdst);
+	if (ret < 0)
+		return ret;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	ret = nf_register_sockopt(&so_getorigdst6);
+	if (ret < 0)
+		goto cleanup_sockopt;
+#endif
+	ret = nf_ct_l4proto_register(builtin_l4proto,
+				     ARRAY_SIZE(builtin_l4proto));
+	if (ret < 0)
+		goto cleanup_sockopt2;
+
+	return ret;
+cleanup_sockopt2:
+	nf_unregister_sockopt(&so_getorigdst);
+#if IS_ENABLED(CONFIG_IPV6)
+cleanup_sockopt:
+	nf_unregister_sockopt(&so_getorigdst6);
+#endif
+	return ret;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+	unsigned int i;
+
+	nf_ct_l4proto_unregister(builtin_l4proto,
+				 ARRAY_SIZE(builtin_l4proto));
+	nf_unregister_sockopt(&so_getorigdst);
+#if IS_ENABLED(CONFIG_IPV6)
+	nf_unregister_sockopt(&so_getorigdst6);
+#endif
+
+	/* free l3proto protocol tables */
+	for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
+		kfree(nf_ct_protos[i]);
+}
+
 int nf_conntrack_proto_pernet_init(struct net *net)
 {
 	int err;
@@ -581,6 +968,14 @@ int nf_conntrack_proto_pernet_init(struct net *net)
 	if (err < 0)
 		return err;
 
+	err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
+					    ARRAY_SIZE(builtin_l4proto));
+	if (err < 0) {
+		nf_ct_l4proto_unregister_sysctl(net, pn,
+						&nf_conntrack_l4proto_generic);
+		return err;
+	}
+
 	pn->users++;
 	return 0;
 }
@@ -590,25 +985,19 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
 					&nf_conntrack_l4proto_generic);
 
+	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
+					ARRAY_SIZE(builtin_l4proto));
 	pn->users--;
 	nf_ct_l4proto_unregister_sysctl(net,
 					pn,
 					&nf_conntrack_l4proto_generic);
 }
 
-int nf_conntrack_proto_init(void)
-{
-	unsigned int i;
-	for (i = 0; i < NFPROTO_NUMPROTO; i++)
-		rcu_assign_pointer(nf_ct_l3protos[i],
-				   &nf_conntrack_l3proto_generic);
-	return 0;
-}
 
-void nf_conntrack_proto_fini(void)
-{
-	unsigned int i;
-	/* free l3proto protocol tables */
-	for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
-		kfree(nf_ct_protos[i]);
-}
+module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
+		  &nf_conntrack_htable_size, 0600);
+
+MODULE_ALIAS("ip_conntrack");
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index abe647d5b8c6..f476d116c816 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -23,6 +23,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_log.h>
 
 /* Timeouts are based on values from RFC4340:
@@ -388,31 +389,8 @@ static inline struct nf_dccp_net *dccp_pernet(struct net *net)
 	return &net->ct.nf_ct_proto.dccp;
 }
 
-static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			      struct net *net, struct nf_conntrack_tuple *tuple)
-{
-	struct dccp_hdr _hdr, *dh;
-
-	/* Actually only need first 4 bytes to get ports. */
-	dh = skb_header_pointer(skb, dataoff, 4, &_hdr);
-	if (dh == NULL)
-		return false;
-
-	tuple->src.u.dccp.port = dh->dccph_sport;
-	tuple->dst.u.dccp.port = dh->dccph_dport;
-	return true;
-}
-
-static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv,
-			      const struct nf_conntrack_tuple *tuple)
-{
-	inv->src.u.dccp.port = tuple->dst.u.dccp.port;
-	inv->dst.u.dccp.port = tuple->src.u.dccp.port;
-	return true;
-}
-
 static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		     unsigned int dataoff, unsigned int *timeouts)
+		     unsigned int dataoff)
 {
 	struct net *net = nf_ct_net(ct);
 	struct nf_dccp_net *dn;
@@ -460,19 +438,14 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh)
 		     ntohl(dhack->dccph_ack_nr_low);
 }
 
-static unsigned int *dccp_get_timeouts(struct net *net)
-{
-	return dccp_pernet(net)->dccp_timeout;
-}
-
 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
-		       unsigned int dataoff, enum ip_conntrack_info ctinfo,
-		       unsigned int *timeouts)
+		       unsigned int dataoff, enum ip_conntrack_info ctinfo)
 {
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	struct dccp_hdr _dh, *dh;
 	u_int8_t type, old_state, new_state;
 	enum ct_dccp_roles role;
+	unsigned int *timeouts;
 
 	dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
 	BUG_ON(dh == NULL);
@@ -546,6 +519,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 	if (new_state != old_state)
 		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
 	return NF_ACCEPT;
@@ -864,11 +840,8 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
 	.l3proto		= AF_INET,
 	.l4proto		= IPPROTO_DCCP,
-	.pkt_to_tuple		= dccp_pkt_to_tuple,
-	.invert_tuple		= dccp_invert_tuple,
 	.new			= dccp_new,
 	.packet			= dccp_packet,
-	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
@@ -900,11 +873,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
 	.l3proto		= AF_INET6,
 	.l4proto		= IPPROTO_DCCP,
-	.pkt_to_tuple		= dccp_pkt_to_tuple,
-	.invert_tuple		= dccp_invert_tuple,
 	.new			= dccp_new,
 	.packet			= dccp_packet,
-	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 6c6896d21cd7..ac4a0b296dcd 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -11,6 +11,7 @@
 #include <linux/timer.h>
 #include <linux/netfilter.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 
 static const unsigned int nf_ct_generic_timeout = 600*HZ;
 
@@ -41,34 +42,24 @@ static bool generic_pkt_to_tuple(const struct sk_buff *skb,
 	return true;
 }
 
-static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
-				 const struct nf_conntrack_tuple *orig)
-{
-	tuple->src.u.all = 0;
-	tuple->dst.u.all = 0;
-
-	return true;
-}
-
-static unsigned int *generic_get_timeouts(struct net *net)
-{
-	return &(generic_pernet(net)->timeout);
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
 static int generic_packet(struct nf_conn *ct,
 			  const struct sk_buff *skb,
 			  unsigned int dataoff,
-			  enum ip_conntrack_info ctinfo,
-			  unsigned int *timeout)
+			  enum ip_conntrack_info ctinfo)
 {
+	const unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+	if (!timeout)
+		timeout = &generic_pernet(nf_ct_net(ct))->timeout;
+
 	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
 	return NF_ACCEPT;
 }
 
 /* Called when a new connection for this protocol found. */
 static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
-			unsigned int dataoff, unsigned int *timeouts)
+			unsigned int dataoff)
 {
 	bool ret;
 
@@ -87,8 +78,11 @@ static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
 static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
 					 struct net *net, void *data)
 {
-	unsigned int *timeout = data;
 	struct nf_generic_net *gn = generic_pernet(net);
+	unsigned int *timeout = data;
+
+	if (!timeout)
+		timeout = &gn->timeout;
 
 	if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT])
 		*timeout =
@@ -168,9 +162,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
 	.l3proto		= PF_UNSPEC,
 	.l4proto		= 255,
 	.pkt_to_tuple		= generic_pkt_to_tuple,
-	.invert_tuple		= generic_invert_tuple,
 	.packet			= generic_packet,
-	.get_timeouts		= generic_get_timeouts,
 	.new			= generic_new,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
 	.ctnl_timeout		= {
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index d049ea5a3770..d1632252bf5b 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -39,6 +39,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
@@ -179,15 +180,6 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
 
 /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
 
-/* invert gre part of tuple */
-static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
-			     const struct nf_conntrack_tuple *orig)
-{
-	tuple->dst.u.gre.key = orig->src.u.gre.key;
-	tuple->src.u.gre.key = orig->dst.u.gre.key;
-	return true;
-}
-
 /* gre hdr info to tuple */
 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 			     struct net *net, struct nf_conntrack_tuple *tuple)
@@ -243,8 +235,7 @@ static unsigned int *gre_get_timeouts(struct net *net)
 static int gre_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned int *timeouts)
+		      enum ip_conntrack_info ctinfo)
 {
 	/* If we've seen traffic both ways, this is a GRE connection.
 	 * Extend timeout. */
@@ -263,8 +254,13 @@ static int gre_packet(struct nf_conn *ct,
 
 /* Called when a new connection for this protocol found. */
 static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff, unsigned int *timeouts)
+		    unsigned int dataoff)
 {
+	unsigned int *timeouts = nf_ct_timeout_lookup(ct);
+
+	if (!timeouts)
+		timeouts = gre_get_timeouts(nf_ct_net(ct));
+
 	pr_debug(": ");
 	nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 
@@ -300,6 +296,8 @@ static int gre_timeout_nlattr_to_obj(struct nlattr *tb[],
 	unsigned int *timeouts = data;
 	struct netns_proto_gre *net_gre = gre_pernet(net);
 
+	if (!timeouts)
+		timeouts = gre_get_timeouts(net);
 	/* set default timeouts for GRE. */
 	timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED];
 	timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED];
@@ -356,11 +354,9 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
 	.l3proto	 = AF_INET,
 	.l4proto	 = IPPROTO_GRE,
 	.pkt_to_tuple	 = gre_pkt_to_tuple,
-	.invert_tuple	 = gre_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack = gre_print_conntrack,
 #endif
-	.get_timeouts    = gre_get_timeouts,
 	.packet		 = gre_packet,
 	.new		 = gre_new,
 	.destroy	 = gre_destroy,
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
new file mode 100644
index 000000000000..036670b38282
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -0,0 +1,388 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_log.h>
+
+static const unsigned int nf_ct_icmp_timeout = 30*HZ;
+
+static inline struct nf_icmp_net *icmp_pernet(struct net *net)
+{
+	return &net->ct.nf_ct_proto.icmp;
+}
+
+static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+			      struct net *net, struct nf_conntrack_tuple *tuple)
+{
+	const struct icmphdr *hp;
+	struct icmphdr _hdr;
+
+	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return false;
+
+	tuple->dst.u.icmp.type = hp->type;
+	tuple->src.u.icmp.id = hp->un.echo.id;
+	tuple->dst.u.icmp.code = hp->code;
+
+	return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
+static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
+			      const struct nf_conntrack_tuple *orig)
+{
+	if (orig->dst.u.icmp.type >= sizeof(invmap) ||
+	    !invmap[orig->dst.u.icmp.type])
+		return false;
+
+	tuple->src.u.icmp.id = orig->src.u.icmp.id;
+	tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+	return true;
+}
+
+static unsigned int *icmp_get_timeouts(struct net *net)
+{
+	return &icmp_pernet(net)->timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmp_packet(struct nf_conn *ct,
+		       const struct sk_buff *skb,
+		       unsigned int dataoff,
+		       enum ip_conntrack_info ctinfo)
+{
+	/* Do not immediately delete the connection after the first
+	   successful reply to avoid excessive conntrackd traffic
+	   and also to handle correctly ICMP echo reply duplicates. */
+	unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+	if (!timeout)
+		timeout = icmp_get_timeouts(nf_ct_net(ct));
+
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
+		     unsigned int dataoff)
+{
+	static const u_int8_t valid_new[] = {
+		[ICMP_ECHO] = 1,
+		[ICMP_TIMESTAMP] = 1,
+		[ICMP_INFO_REQUEST] = 1,
+		[ICMP_ADDRESS] = 1
+	};
+
+	if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
+	    !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
+		/* Can't create a new ICMP `conn' with this. */
+		pr_debug("icmp: can't create new conn with type %u\n",
+			 ct->tuplehash[0].tuple.dst.u.icmp.type);
+		nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
+		return false;
+	}
+	return true;
+}
+
+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
+static int
+icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
+		 unsigned int hooknum)
+{
+	struct nf_conntrack_tuple innertuple, origtuple;
+	const struct nf_conntrack_l4proto *innerproto;
+	const struct nf_conntrack_tuple_hash *h;
+	const struct nf_conntrack_zone *zone;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conntrack_zone tmp;
+
+	WARN_ON(skb_nfct(skb));
+	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
+
+	/* Are they talking about one of our connections? */
+	if (!nf_ct_get_tuplepr(skb,
+			       skb_network_offset(skb) + ip_hdrlen(skb)
+						       + sizeof(struct icmphdr),
+			       PF_INET, net, &origtuple)) {
+		pr_debug("icmp_error_message: failed to get tuple\n");
+		return -NF_ACCEPT;
+	}
+
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
+
+	/* Ordinarily, we'd expect the inverted tupleproto, but it's
+	   been preserved inside the ICMP. */
+	if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
+		pr_debug("icmp_error_message: no match\n");
+		return -NF_ACCEPT;
+	}
+
+	ctinfo = IP_CT_RELATED;
+
+	h = nf_conntrack_find_get(net, zone, &innertuple);
+	if (!h) {
+		pr_debug("icmp_error_message: no match\n");
+		return -NF_ACCEPT;
+	}
+
+	if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+		ctinfo += IP_CT_IS_REPLY;
+
+	/* Update skb to refer to this connection */
+	nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+	return NF_ACCEPT;
+}
+
+static void icmp_error_log(const struct sk_buff *skb, struct net *net,
+			   u8 pf, const char *msg)
+{
+	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct net *net, struct nf_conn *tmpl,
+	   struct sk_buff *skb, unsigned int dataoff,
+	   u8 pf, unsigned int hooknum)
+{
+	const struct icmphdr *icmph;
+	struct icmphdr _ih;
+
+	/* Not enough header? */
+	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+	if (icmph == NULL) {
+		icmp_error_log(skb, net, pf, "short packet");
+		return -NF_ACCEPT;
+	}
+
+	/* See ip_conntrack_proto_tcp.c */
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	    nf_ip_checksum(skb, hooknum, dataoff, 0)) {
+		icmp_error_log(skb, net, pf, "bad hw icmp checksum");
+		return -NF_ACCEPT;
+	}
+
+	/*
+	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
+	 *
+	 *	RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
+	 *		  discarded.
+	 */
+	if (icmph->type > NR_ICMP_TYPES) {
+		icmp_error_log(skb, net, pf, "invalid icmp type");
+		return -NF_ACCEPT;
+	}
+
+	/* Need to track icmp error message? */
+	if (icmph->type != ICMP_DEST_UNREACH &&
+	    icmph->type != ICMP_SOURCE_QUENCH &&
+	    icmph->type != ICMP_TIME_EXCEEDED &&
+	    icmph->type != ICMP_PARAMETERPROB &&
+	    icmph->type != ICMP_REDIRECT)
+		return NF_ACCEPT;
+
+	return icmp_error_message(net, tmpl, skb, hooknum);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nlattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *t)
+{
+	if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
+	[CTA_PROTO_ICMP_TYPE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMP_CODE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMP_ID]	= { .type = NLA_U16 },
+};
+
+static int icmp_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMP_TYPE] ||
+	    !tb[CTA_PROTO_ICMP_CODE] ||
+	    !tb[CTA_PROTO_ICMP_ID])
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
+	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
+	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
+
+	if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
+	    !invmap[tuple->dst.u.icmp.type])
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned int icmp_nlattr_tuple_size(void)
+{
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
+				      struct net *net, void *data)
+{
+	unsigned int *timeout = data;
+	struct nf_icmp_net *in = icmp_pernet(net);
+
+	if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
+		if (!timeout)
+			timeout = &in->timeout;
+		*timeout =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
+	} else if (timeout) {
+		/* Set default ICMP timeout. */
+		*timeout = in->timeout;
+	}
+	return 0;
+}
+
+static int
+icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeout = data;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy
+icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
+	[CTA_TIMEOUT_ICMP_TIMEOUT]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table icmp_sysctl_table[] = {
+	{
+		.procname	= "nf_conntrack_icmp_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{ }
+};
+#endif /* CONFIG_SYSCTL */
+
+static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+				     struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+	pn->ctl_table = kmemdup(icmp_sysctl_table,
+				sizeof(icmp_sysctl_table),
+				GFP_KERNEL);
+	if (!pn->ctl_table)
+		return -ENOMEM;
+
+	pn->ctl_table[0].data = &in->timeout;
+#endif
+	return 0;
+}
+
+static int icmp_init_net(struct net *net, u_int16_t proto)
+{
+	struct nf_icmp_net *in = icmp_pernet(net);
+	struct nf_proto_net *pn = &in->pn;
+
+	in->timeout = nf_ct_icmp_timeout;
+
+	return icmp_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmp_get_net_proto(struct net *net)
+{
+	return &net->ct.nf_ct_proto.icmp.pn;
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
+{
+	.l3proto		= PF_INET,
+	.l4proto		= IPPROTO_ICMP,
+	.pkt_to_tuple		= icmp_pkt_to_tuple,
+	.invert_tuple		= icmp_invert_tuple,
+	.packet			= icmp_packet,
+	.new			= icmp_new,
+	.error			= icmp_error,
+	.destroy		= NULL,
+	.me			= NULL,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.tuple_to_nlattr	= icmp_tuple_to_nlattr,
+	.nlattr_tuple_size	= icmp_nlattr_tuple_size,
+	.nlattr_to_tuple	= icmp_nlattr_to_tuple,
+	.nla_policy		= icmp_nla_policy,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= icmp_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= icmp_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_ICMP_MAX,
+		.obj_size	= sizeof(unsigned int),
+		.nla_policy	= icmp_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+	.init_net		= icmp_init_net,
+	.get_net_proto		= icmp_get_net_proto,
+};
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
new file mode 100644
index 000000000000..bed07b998a10
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
+#include <net/netfilter/nf_log.h>
+
+static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
+
+static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
+{
+	return &net->ct.nf_ct_proto.icmpv6;
+}
+
+static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+				unsigned int dataoff,
+				struct net *net,
+				struct nf_conntrack_tuple *tuple)
+{
+	const struct icmp6hdr *hp;
+	struct icmp6hdr _hdr;
+
+	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return false;
+	tuple->dst.u.icmp.type = hp->icmp6_type;
+	tuple->src.u.icmp.id = hp->icmp6_identifier;
+	tuple->dst.u.icmp.code = hp->icmp6_code;
+
+	return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
+	[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
+	[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_REPLY + 1,
+	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_QUERY + 1
+};
+
+static const u_int8_t noct_valid_new[] = {
+	[ICMPV6_MGM_QUERY - 130] = 1,
+	[ICMPV6_MGM_REPORT - 130] = 1,
+	[ICMPV6_MGM_REDUCTION - 130] = 1,
+	[NDISC_ROUTER_SOLICITATION - 130] = 1,
+	[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
+	[NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
+	[NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
+	[ICMPV6_MLD2_REPORT - 130] = 1
+};
+
+static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+				const struct nf_conntrack_tuple *orig)
+{
+	int type = orig->dst.u.icmp.type - 128;
+	if (type < 0 || type >= sizeof(invmap) || !invmap[type])
+		return false;
+
+	tuple->src.u.icmp.id   = orig->src.u.icmp.id;
+	tuple->dst.u.icmp.type = invmap[type] - 1;
+	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+	return true;
+}
+
+static unsigned int *icmpv6_get_timeouts(struct net *net)
+{
+	return &icmpv6_pernet(net)->timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmpv6_packet(struct nf_conn *ct,
+		       const struct sk_buff *skb,
+		       unsigned int dataoff,
+		       enum ip_conntrack_info ctinfo)
+{
+	unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+	if (!timeout)
+		timeout = icmpv6_get_timeouts(nf_ct_net(ct));
+
+	/* Do not immediately delete the connection after the first
+	   successful reply to avoid excessive conntrackd traffic
+	   and also to handle correctly ICMP echo reply duplicates. */
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
+		       unsigned int dataoff)
+{
+	static const u_int8_t valid_new[] = {
+		[ICMPV6_ECHO_REQUEST - 128] = 1,
+		[ICMPV6_NI_QUERY - 128] = 1
+	};
+	int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
+
+	if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
+		/* Can't create a new ICMPv6 `conn' with this. */
+		pr_debug("icmpv6: can't create new conn with type %u\n",
+			 type + 128);
+		nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+		return false;
+	}
+	return true;
+}
+
+static int
+icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
+		     struct sk_buff *skb,
+		     unsigned int icmp6off)
+{
+	struct nf_conntrack_tuple intuple, origtuple;
+	const struct nf_conntrack_tuple_hash *h;
+	const struct nf_conntrack_l4proto *inproto;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conntrack_zone tmp;
+
+	WARN_ON(skb_nfct(skb));
+
+	/* Are they talking about one of our connections? */
+	if (!nf_ct_get_tuplepr(skb,
+			       skb_network_offset(skb)
+				+ sizeof(struct ipv6hdr)
+				+ sizeof(struct icmp6hdr),
+			       PF_INET6, net, &origtuple)) {
+		pr_debug("icmpv6_error: Can't get tuple\n");
+		return -NF_ACCEPT;
+	}
+
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
+
+	/* Ordinarily, we'd expect the inverted tupleproto, but it's
+	   been preserved inside the ICMP. */
+	if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
+		pr_debug("icmpv6_error: Can't invert tuple\n");
+		return -NF_ACCEPT;
+	}
+
+	ctinfo = IP_CT_RELATED;
+
+	h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
+				  &intuple);
+	if (!h) {
+		pr_debug("icmpv6_error: no match\n");
+		return -NF_ACCEPT;
+	} else {
+		if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+			ctinfo += IP_CT_IS_REPLY;
+	}
+
+	/* Update skb to refer to this connection */
+	nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+	return NF_ACCEPT;
+}
+
+static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
+			     u8 pf, const char *msg)
+{
+	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
+}
+
+static int
+icmpv6_error(struct net *net, struct nf_conn *tmpl,
+	     struct sk_buff *skb, unsigned int dataoff,
+	     u8 pf, unsigned int hooknum)
+{
+	const struct icmp6hdr *icmp6h;
+	struct icmp6hdr _ih;
+	int type;
+
+	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
+	if (icmp6h == NULL) {
+		icmpv6_error_log(skb, net, pf, "short packet");
+		return -NF_ACCEPT;
+	}
+
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
+		icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
+		return -NF_ACCEPT;
+	}
+
+	type = icmp6h->icmp6_type - 130;
+	if (type >= 0 && type < sizeof(noct_valid_new) &&
+	    noct_valid_new[type]) {
+		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+		return NF_ACCEPT;
+	}
+
+	/* is not error message ? */
+	if (icmp6h->icmp6_type >= 128)
+		return NF_ACCEPT;
+
+	return icmpv6_error_message(net, tmpl, skb, dataoff);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
+				  const struct nf_conntrack_tuple *t)
+{
+	if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
+	[CTA_PROTO_ICMPV6_TYPE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMPV6_CODE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMPV6_ID]	= { .type = NLA_U16 },
+};
+
+static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
+	    !tb[CTA_PROTO_ICMPV6_CODE] ||
+	    !tb[CTA_PROTO_ICMPV6_ID])
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
+	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
+	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
+
+	if (tuple->dst.u.icmp.type < 128 ||
+	    tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
+	    !invmap[tuple->dst.u.icmp.type - 128])
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned int icmpv6_nlattr_tuple_size(void)
+{
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
+					struct net *net, void *data)
+{
+	unsigned int *timeout = data;
+	struct nf_icmp_net *in = icmpv6_pernet(net);
+
+	if (!timeout)
+		timeout = icmpv6_get_timeouts(net);
+	if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
+		*timeout =
+		    ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
+	} else {
+		/* Set default ICMPv6 timeout. */
+		*timeout = in->timeout;
+	}
+	return 0;
+}
+
+static int
+icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeout = data;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy
+icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
+	[CTA_TIMEOUT_ICMPV6_TIMEOUT]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table icmpv6_sysctl_table[] = {
+	{
+		.procname	= "nf_conntrack_icmpv6_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{ }
+};
+#endif /* CONFIG_SYSCTL */
+
+static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
+				       struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+	pn->ctl_table = kmemdup(icmpv6_sysctl_table,
+				sizeof(icmpv6_sysctl_table),
+				GFP_KERNEL);
+	if (!pn->ctl_table)
+		return -ENOMEM;
+
+	pn->ctl_table[0].data = &in->timeout;
+#endif
+	return 0;
+}
+
+static int icmpv6_init_net(struct net *net, u_int16_t proto)
+{
+	struct nf_icmp_net *in = icmpv6_pernet(net);
+	struct nf_proto_net *pn = &in->pn;
+
+	in->timeout = nf_ct_icmpv6_timeout;
+
+	return icmpv6_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
+{
+	return &net->ct.nf_ct_proto.icmpv6.pn;
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
+{
+	.l3proto		= PF_INET6,
+	.l4proto		= IPPROTO_ICMPV6,
+	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
+	.invert_tuple		= icmpv6_invert_tuple,
+	.packet			= icmpv6_packet,
+	.new			= icmpv6_new,
+	.error			= icmpv6_error,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.tuple_to_nlattr	= icmpv6_tuple_to_nlattr,
+	.nlattr_tuple_size	= icmpv6_nlattr_tuple_size,
+	.nlattr_to_tuple	= icmpv6_nlattr_to_tuple,
+	.nla_policy		= icmpv6_nla_policy,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= icmpv6_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= icmpv6_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_ICMP_MAX,
+		.obj_size	= sizeof(unsigned int),
+		.nla_policy	= icmpv6_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+	.init_net		= icmpv6_init_net,
+	.get_net_proto		= icmpv6_get_net_proto,
+};
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index fb9a35d16069..8d1e085fc14a 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -28,6 +28,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 
 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
    closely.  They're more complex. --RR
@@ -150,30 +151,6 @@ static inline struct nf_sctp_net *sctp_pernet(struct net *net)
 	return &net->ct.nf_ct_proto.sctp;
 }
 
-static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			      struct net *net, struct nf_conntrack_tuple *tuple)
-{
-	const struct sctphdr *hp;
-	struct sctphdr _hdr;
-
-	/* Actually only need first 4 bytes to get ports. */
-	hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
-	if (hp == NULL)
-		return false;
-
-	tuple->src.u.sctp.port = hp->source;
-	tuple->dst.u.sctp.port = hp->dest;
-	return true;
-}
-
-static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
-			      const struct nf_conntrack_tuple *orig)
-{
-	tuple->src.u.sctp.port = orig->dst.u.sctp.port;
-	tuple->dst.u.sctp.port = orig->src.u.sctp.port;
-	return true;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -296,17 +273,11 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
 	return sctp_conntracks[dir][i][cur_state];
 }
 
-static unsigned int *sctp_get_timeouts(struct net *net)
-{
-	return sctp_pernet(net)->timeouts;
-}
-
 /* Returns verdict for packet, or -NF_ACCEPT for invalid. */
 static int sctp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned int *timeouts)
+		       enum ip_conntrack_info ctinfo)
 {
 	enum sctp_conntrack new_state, old_state;
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -315,6 +286,7 @@ static int sctp_packet(struct nf_conn *ct,
 	const struct sctp_chunkhdr *sch;
 	struct sctp_chunkhdr _sch;
 	u_int32_t offset, count;
+	unsigned int *timeouts;
 	unsigned long map[256 / sizeof(unsigned long)] = { 0 };
 
 	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
@@ -403,6 +375,10 @@ static int sctp_packet(struct nf_conn *ct,
 	}
 	spin_unlock_bh(&ct->lock);
 
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
+
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
 	if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
@@ -423,7 +399,7 @@ out:
 
 /* Called when a new connection for this protocol found. */
 static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		     unsigned int dataoff, unsigned int *timeouts)
+		     unsigned int dataoff)
 {
 	enum sctp_conntrack new_state;
 	const struct sctphdr *sh;
@@ -780,13 +756,10 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_SCTP,
-	.pkt_to_tuple 		= sctp_pkt_to_tuple,
-	.invert_tuple 		= sctp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
 #endif
 	.packet 		= sctp_packet,
-	.get_timeouts		= sctp_get_timeouts,
 	.new 			= sctp_new,
 	.error			= sctp_error,
 	.can_early_drop		= sctp_can_early_drop,
@@ -817,13 +790,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
 	.l3proto		= PF_INET6,
 	.l4proto 		= IPPROTO_SCTP,
-	.pkt_to_tuple 		= sctp_pkt_to_tuple,
-	.invert_tuple 		= sctp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
 #endif
 	.packet 		= sctp_packet,
-	.get_timeouts		= sctp_get_timeouts,
 	.new 			= sctp_new,
 	.error			= sctp_error,
 	.can_early_drop		= sctp_can_early_drop,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 8e67910185a0..d80d322b9d8b 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -29,6 +29,7 @@
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_log.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -276,31 +277,6 @@ static inline struct nf_tcp_net *tcp_pernet(struct net *net)
 	return &net->ct.nf_ct_proto.tcp;
 }
 
-static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			     struct net *net, struct nf_conntrack_tuple *tuple)
-{
-	const struct tcphdr *hp;
-	struct tcphdr _hdr;
-
-	/* Actually only need first 4 bytes to get ports. */
-	hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
-	if (hp == NULL)
-		return false;
-
-	tuple->src.u.tcp.port = hp->source;
-	tuple->dst.u.tcp.port = hp->dest;
-
-	return true;
-}
-
-static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
-			     const struct nf_conntrack_tuple *orig)
-{
-	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
-	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
-	return true;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -793,27 +769,21 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	return NF_ACCEPT;
 }
 
-static unsigned int *tcp_get_timeouts(struct net *net)
-{
-	return tcp_pernet(net)->timeouts;
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
 static int tcp_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned int *timeouts)
+		      enum ip_conntrack_info ctinfo)
 {
 	struct net *net = nf_ct_net(ct);
 	struct nf_tcp_net *tn = tcp_pernet(net);
 	struct nf_conntrack_tuple *tuple;
 	enum tcp_conntrack new_state, old_state;
+	unsigned int index, *timeouts;
 	enum ip_conntrack_dir dir;
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
 	unsigned long timeout;
-	unsigned int index;
 
 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 	BUG_ON(th == NULL);
@@ -1046,6 +1016,10 @@ static int tcp_packet(struct nf_conn *ct,
 	    && new_state == TCP_CONNTRACK_FIN_WAIT)
 		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
 
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = tn->timeouts;
+
 	if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
 	    timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
 		timeout = timeouts[TCP_CONNTRACK_RETRANS];
@@ -1095,7 +1069,7 @@ static int tcp_packet(struct nf_conn *ct,
 
 /* Called when a new connection for this protocol found. */
 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff, unsigned int *timeouts)
+		    unsigned int dataoff)
 {
 	enum tcp_conntrack new_state;
 	const struct tcphdr *th;
@@ -1313,10 +1287,12 @@ static unsigned int tcp_nlattr_tuple_size(void)
 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				     struct net *net, void *data)
 {
-	unsigned int *timeouts = data;
 	struct nf_tcp_net *tn = tcp_pernet(net);
+	unsigned int *timeouts = data;
 	int i;
 
+	if (!timeouts)
+		timeouts = tn->timeouts;
 	/* set default TCP timeouts. */
 	for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
 		timeouts[i] = tn->timeouts[i];
@@ -1559,13 +1535,10 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
 {
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_TCP,
-	.pkt_to_tuple 		= tcp_pkt_to_tuple,
-	.invert_tuple 		= tcp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
 #endif
 	.packet 		= tcp_packet,
-	.get_timeouts		= tcp_get_timeouts,
 	.new 			= tcp_new,
 	.error			= tcp_error,
 	.can_early_drop		= tcp_can_early_drop,
@@ -1597,13 +1570,10 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
 {
 	.l3proto		= PF_INET6,
 	.l4proto 		= IPPROTO_TCP,
-	.pkt_to_tuple 		= tcp_pkt_to_tuple,
-	.invert_tuple 		= tcp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
 #endif
 	.packet 		= tcp_packet,
-	.get_timeouts		= tcp_get_timeouts,
 	.new 			= tcp_new,
 	.error			= tcp_error,
 	.can_early_drop		= tcp_can_early_drop,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index fe7243970aa4..7a1b8988a931 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -22,6 +22,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_log.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -36,33 +37,6 @@ static inline struct nf_udp_net *udp_pernet(struct net *net)
 	return &net->ct.nf_ct_proto.udp;
 }
 
-static bool udp_pkt_to_tuple(const struct sk_buff *skb,
-			     unsigned int dataoff,
-			     struct net *net,
-			     struct nf_conntrack_tuple *tuple)
-{
-	const struct udphdr *hp;
-	struct udphdr _hdr;
-
-	/* Actually only need first 4 bytes to get ports. */
-	hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
-	if (hp == NULL)
-		return false;
-
-	tuple->src.u.udp.port = hp->source;
-	tuple->dst.u.udp.port = hp->dest;
-
-	return true;
-}
-
-static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
-			     const struct nf_conntrack_tuple *orig)
-{
-	tuple->src.u.udp.port = orig->dst.u.udp.port;
-	tuple->dst.u.udp.port = orig->src.u.udp.port;
-	return true;
-}
-
 static unsigned int *udp_get_timeouts(struct net *net)
 {
 	return udp_pernet(net)->timeouts;
@@ -72,9 +46,14 @@ static unsigned int *udp_get_timeouts(struct net *net)
 static int udp_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned int *timeouts)
+		      enum ip_conntrack_info ctinfo)
 {
+	unsigned int *timeouts;
+
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = udp_get_timeouts(nf_ct_net(ct));
+
 	/* If we've seen traffic both ways, this is some kind of UDP
 	   stream.  Extend timeout. */
 	if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
@@ -92,7 +71,7 @@ static int udp_packet(struct nf_conn *ct,
 
 /* Called when a new connection for this protocol found. */
 static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff, unsigned int *timeouts)
+		    unsigned int dataoff)
 {
 	return true;
 }
@@ -203,6 +182,9 @@ static int udp_timeout_nlattr_to_obj(struct nlattr *tb[],
 	unsigned int *timeouts = data;
 	struct nf_udp_net *un = udp_pernet(net);
 
+	if (!timeouts)
+		timeouts = un->timeouts;
+
 	/* set default timeouts for UDP. */
 	timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED];
 	timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED];
@@ -301,10 +283,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDP,
 	.allow_clash		= true,
-	.pkt_to_tuple		= udp_pkt_to_tuple,
-	.invert_tuple		= udp_invert_tuple,
 	.packet			= udp_packet,
-	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
 	.error			= udp_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -333,10 +312,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDPLITE,
 	.allow_clash		= true,
-	.pkt_to_tuple		= udp_pkt_to_tuple,
-	.invert_tuple		= udp_invert_tuple,
 	.packet			= udp_packet,
-	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
 	.error			= udplite_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -365,10 +341,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_UDP,
 	.allow_clash		= true,
-	.pkt_to_tuple		= udp_pkt_to_tuple,
-	.invert_tuple		= udp_invert_tuple,
 	.packet			= udp_packet,
-	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
 	.error			= udp_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -397,10 +370,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_UDPLITE,
 	.allow_clash		= true,
-	.pkt_to_tuple		= udp_pkt_to_tuple,
-	.invert_tuple		= udp_invert_tuple,
 	.packet			= udp_packet,
-	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
 	.error			= udplite_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -423,3 +393,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
 };
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
 #endif
+#include <net/netfilter/nf_conntrack_timeout.h>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b642c0b2495c..13279f683da9 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,12 +1,4 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/types.h>
 #include <linux/netfilter.h>
 #include <linux/slab.h>
@@ -24,7 +16,6 @@
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -33,15 +24,14 @@
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <linux/rculist_nulls.h>
 
-MODULE_LICENSE("GPL");
+unsigned int nf_conntrack_net_id __read_mostly;
 
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 void
 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
-            const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *l4proto)
 {
-	switch (l3proto->l3proto) {
+	switch (tuple->src.l3num) {
 	case NFPROTO_IPV4:
 		seq_printf(s, "src=%pI4 dst=%pI4 ",
 			   &tuple->src.u3.ip, &tuple->dst.u3.ip);
@@ -282,7 +272,6 @@ static int ct_seq_show(struct seq_file *s, void *v)
 {
 	struct nf_conntrack_tuple_hash *hash = v;
 	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	struct net *net = seq_file_net(s);
 	int ret = 0;
@@ -303,14 +292,12 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	if (!net_eq(nf_ct_net(ct), net))
 		goto release;
 
-	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-	WARN_ON(!l3proto);
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	WARN_ON(!l4proto);
 
 	ret = -ENOSPC;
 	seq_printf(s, "%-8s %u %-8s %u ",
-		   l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+		   l3proto_name(nf_ct_l3num(ct)), nf_ct_l3num(ct),
 		   l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
 
 	if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
@@ -320,7 +307,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		l4proto->print_conntrack(s, ct);
 
 	print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-		    l3proto, l4proto);
+		    l4proto);
 
 	ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
 
@@ -333,8 +320,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
 		seq_puts(s, "[UNREPLIED] ");
 
-	print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-		    l3proto, l4proto);
+	print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l4proto);
 
 	ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
 
@@ -680,6 +666,8 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
 static struct pernet_operations nf_conntrack_net_ops = {
 	.init		= nf_conntrack_pernet_init,
 	.exit_batch	= nf_conntrack_pernet_exit,
+	.id		= &nf_conntrack_net_id,
+	.size = sizeof(struct nf_conntrack_net),
 };
 
 static int __init nf_conntrack_standalone_init(void)
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index eb0d1658ac05..d8125616edc7 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -107,11 +107,12 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
 	tcp->seen[1].td_maxwin = 0;
 }
 
+#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT	(120 * HZ)
+#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT	(30 * HZ)
+
 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
 {
 	const struct nf_conntrack_l4proto *l4proto;
-	struct net *net = nf_ct_net(ct);
-	unsigned int *timeouts;
 	unsigned int timeout;
 	int l4num;
 
@@ -123,14 +124,10 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
 	if (!l4proto)
 		return;
 
-	timeouts = l4proto->get_timeouts(net);
-	if (!timeouts)
-		return;
-
 	if (l4num == IPPROTO_TCP)
-		timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
+		timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
 	else if (l4num == IPPROTO_UDP)
-		timeout = timeouts[UDP_CT_REPLIED];
+		timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
 	else
 		return;
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 86df2a1666fd..6366f0c0b8c1 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -28,7 +28,6 @@
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_nat.h>
 
@@ -743,12 +742,6 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister);
 
 int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
 {
-	int err;
-
-	err = nf_ct_l3proto_try_module_get(l3proto->l3proto);
-	if (err < 0)
-		return err;
-
 	mutex_lock(&nf_nat_proto_mutex);
 	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP],
 			 &nf_nat_l4proto_tcp);
@@ -781,7 +774,6 @@ void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto)
 	synchronize_rcu();
 
 	nf_nat_l3proto_clean(l3proto->l3proto);
-	nf_ct_l3proto_module_put(l3proto->l3proto);
 }
 EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
 
diff --git a/net/netfilter/nf_osf.c b/net/netfilter/nf_osf.c
index 5ba5c7bef2f9..b44d62d5d9a9 100644
--- a/net/netfilter/nf_osf.c
+++ b/net/netfilter/nf_osf.c
@@ -21,15 +21,14 @@
 #include <linux/netfilter/nf_osf.h>
 
 static inline int nf_osf_ttl(const struct sk_buff *skb,
-			     const struct nf_osf_info *info,
-			     unsigned char f_ttl)
+			     int ttl_check, unsigned char f_ttl)
 {
 	const struct iphdr *ip = ip_hdr(skb);
 
-	if (info->flags & NF_OSF_TTL) {
-		if (info->ttl == NF_OSF_TTL_TRUE)
+	if (ttl_check != -1) {
+		if (ttl_check == NF_OSF_TTL_TRUE)
 			return ip->ttl == f_ttl;
-		if (info->ttl == NF_OSF_TTL_NOCHECK)
+		if (ttl_check == NF_OSF_TTL_NOCHECK)
 			return 1;
 		else if (ip->ttl <= f_ttl)
 			return 1;
@@ -52,140 +51,175 @@ static inline int nf_osf_ttl(const struct sk_buff *skb,
 	return ip->ttl == f_ttl;
 }
 
-bool
-nf_osf_match(const struct sk_buff *skb, u_int8_t family,
-	     int hooknum, struct net_device *in, struct net_device *out,
-	     const struct nf_osf_info *info, struct net *net,
-	     const struct list_head *nf_osf_fingers)
+struct nf_osf_hdr_ctx {
+	bool			df;
+	u16			window;
+	u16			totlen;
+	const unsigned char	*optp;
+	unsigned int		optsize;
+};
+
+static bool nf_osf_match_one(const struct sk_buff *skb,
+			     const struct nf_osf_user_finger *f,
+			     int ttl_check,
+			     struct nf_osf_hdr_ctx *ctx)
 {
-	const unsigned char *optp = NULL, *_optp = NULL;
-	unsigned int optsize = 0, check_WSS = 0;
-	int fmatch = FMATCH_WRONG, fcount = 0;
-	const struct iphdr *ip = ip_hdr(skb);
-	const struct nf_osf_user_finger *f;
-	unsigned char opts[MAX_IPOPTLEN];
-	const struct nf_osf_finger *kf;
-	u16 window, totlen, mss = 0;
-	const struct tcphdr *tcp;
-	struct tcphdr _tcph;
-	bool df;
+	unsigned int check_WSS = 0;
+	int fmatch = FMATCH_WRONG;
+	int foptsize, optnum;
+	u16 mss = 0;
 
-	tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
-	if (!tcp)
+	if (ctx->totlen != f->ss || !nf_osf_ttl(skb, ttl_check, f->ttl))
 		return false;
 
-	if (!tcp->syn)
+	/*
+	 * Should not happen if userspace parser was written correctly.
+	 */
+	if (f->wss.wc >= OSF_WSS_MAX)
 		return false;
 
-	totlen = ntohs(ip->tot_len);
-	df = ntohs(ip->frag_off) & IP_DF;
-	window = ntohs(tcp->window);
+	/* Check options */
 
-	if (tcp->doff * 4 > sizeof(struct tcphdr)) {
-		optsize = tcp->doff * 4 - sizeof(struct tcphdr);
+	foptsize = 0;
+	for (optnum = 0; optnum < f->opt_num; ++optnum)
+		foptsize += f->opt[optnum].length;
 
-		_optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
-				sizeof(struct tcphdr), optsize, opts);
-	}
+	if (foptsize > MAX_IPOPTLEN ||
+	    ctx->optsize > MAX_IPOPTLEN ||
+	    ctx->optsize != foptsize)
+		return false;
 
-	list_for_each_entry_rcu(kf, &nf_osf_fingers[df], finger_entry) {
-		int foptsize, optnum;
+	check_WSS = f->wss.wc;
 
-		f = &kf->finger;
+	for (optnum = 0; optnum < f->opt_num; ++optnum) {
+		if (f->opt[optnum].kind == *ctx->optp) {
+			__u32 len = f->opt[optnum].length;
+			const __u8 *optend = ctx->optp + len;
 
-		if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre))
-			continue;
+			fmatch = FMATCH_OK;
+
+			switch (*ctx->optp) {
+			case OSFOPT_MSS:
+				mss = ctx->optp[3];
+				mss <<= 8;
+				mss |= ctx->optp[2];
+
+				mss = ntohs((__force __be16)mss);
+				break;
+			case OSFOPT_TS:
+				break;
+			}
+
+			ctx->optp = optend;
+		} else
+			fmatch = FMATCH_OPT_WRONG;
+
+		if (fmatch != FMATCH_OK)
+			break;
+	}
 
-		optp = _optp;
+	if (fmatch != FMATCH_OPT_WRONG) {
 		fmatch = FMATCH_WRONG;
 
-		if (totlen != f->ss || !nf_osf_ttl(skb, info, f->ttl))
-			continue;
+		switch (check_WSS) {
+		case OSF_WSS_PLAIN:
+			if (f->wss.val == 0 || ctx->window == f->wss.val)
+				fmatch = FMATCH_OK;
+			break;
+		case OSF_WSS_MSS:
+			/*
+			 * Some smart modems decrease mangle MSS to
+			 * SMART_MSS_2, so we check standard, decreased
+			 * and the one provided in the fingerprint MSS
+			 * values.
+			 */
+#define SMART_MSS_1	1460
+#define SMART_MSS_2	1448
+			if (ctx->window == f->wss.val * mss ||
+			    ctx->window == f->wss.val * SMART_MSS_1 ||
+			    ctx->window == f->wss.val * SMART_MSS_2)
+				fmatch = FMATCH_OK;
+			break;
+		case OSF_WSS_MTU:
+			if (ctx->window == f->wss.val * (mss + 40) ||
+			    ctx->window == f->wss.val * (SMART_MSS_1 + 40) ||
+			    ctx->window == f->wss.val * (SMART_MSS_2 + 40))
+				fmatch = FMATCH_OK;
+			break;
+		case OSF_WSS_MODULO:
+			if ((ctx->window % f->wss.val) == 0)
+				fmatch = FMATCH_OK;
+			break;
+		}
+	}
 
-		/*
-		 * Should not happen if userspace parser was written correctly.
-		 */
-		if (f->wss.wc >= OSF_WSS_MAX)
-			continue;
+	return fmatch == FMATCH_OK;
+}
 
-		/* Check options */
+static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx,
+						const struct sk_buff *skb,
+						const struct iphdr *ip,
+						unsigned char *opts)
+{
+	const struct tcphdr *tcp;
+	struct tcphdr _tcph;
 
-		foptsize = 0;
-		for (optnum = 0; optnum < f->opt_num; ++optnum)
-			foptsize += f->opt[optnum].length;
+	tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
+	if (!tcp)
+		return NULL;
 
-		if (foptsize > MAX_IPOPTLEN ||
-		    optsize > MAX_IPOPTLEN ||
-		    optsize != foptsize)
-			continue;
+	if (!tcp->syn)
+		return NULL;
 
-		check_WSS = f->wss.wc;
+	ctx->totlen = ntohs(ip->tot_len);
+	ctx->df = ntohs(ip->frag_off) & IP_DF;
+	ctx->window = ntohs(tcp->window);
 
-		for (optnum = 0; optnum < f->opt_num; ++optnum) {
-			if (f->opt[optnum].kind == (*optp)) {
-				__u32 len = f->opt[optnum].length;
-				const __u8 *optend = optp + len;
+	if (tcp->doff * 4 > sizeof(struct tcphdr)) {
+		ctx->optsize = tcp->doff * 4 - sizeof(struct tcphdr);
 
-				fmatch = FMATCH_OK;
+		ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) +
+				sizeof(struct tcphdr), ctx->optsize, opts);
+	}
 
-				switch (*optp) {
-				case OSFOPT_MSS:
-					mss = optp[3];
-					mss <<= 8;
-					mss |= optp[2];
+	return tcp;
+}
 
-					mss = ntohs((__force __be16)mss);
-					break;
-				case OSFOPT_TS:
-					break;
-				}
+bool
+nf_osf_match(const struct sk_buff *skb, u_int8_t family,
+	     int hooknum, struct net_device *in, struct net_device *out,
+	     const struct nf_osf_info *info, struct net *net,
+	     const struct list_head *nf_osf_fingers)
+{
+	const struct iphdr *ip = ip_hdr(skb);
+	const struct nf_osf_user_finger *f;
+	unsigned char opts[MAX_IPOPTLEN];
+	const struct nf_osf_finger *kf;
+	int fcount = 0, ttl_check;
+	int fmatch = FMATCH_WRONG;
+	struct nf_osf_hdr_ctx ctx;
+	const struct tcphdr *tcp;
 
-				optp = optend;
-			} else
-				fmatch = FMATCH_OPT_WRONG;
+	memset(&ctx, 0, sizeof(ctx));
 
-			if (fmatch != FMATCH_OK)
-				break;
-		}
+	tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
+	if (!tcp)
+		return false;
 
-		if (fmatch != FMATCH_OPT_WRONG) {
-			fmatch = FMATCH_WRONG;
+	ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : -1;
 
-			switch (check_WSS) {
-			case OSF_WSS_PLAIN:
-				if (f->wss.val == 0 || window == f->wss.val)
-					fmatch = FMATCH_OK;
-				break;
-			case OSF_WSS_MSS:
-				/*
-				 * Some smart modems decrease mangle MSS to
-				 * SMART_MSS_2, so we check standard, decreased
-				 * and the one provided in the fingerprint MSS
-				 * values.
-				 */
-#define SMART_MSS_1	1460
-#define SMART_MSS_2	1448
-				if (window == f->wss.val * mss ||
-				    window == f->wss.val * SMART_MSS_1 ||
-				    window == f->wss.val * SMART_MSS_2)
-					fmatch = FMATCH_OK;
-				break;
-			case OSF_WSS_MTU:
-				if (window == f->wss.val * (mss + 40) ||
-				    window == f->wss.val * (SMART_MSS_1 + 40) ||
-				    window == f->wss.val * (SMART_MSS_2 + 40))
-					fmatch = FMATCH_OK;
-				break;
-			case OSF_WSS_MODULO:
-				if ((window % f->wss.val) == 0)
-					fmatch = FMATCH_OK;
-				break;
-			}
-		}
+	list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
 
-		if (fmatch != FMATCH_OK)
+		f = &kf->finger;
+
+		if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre))
+			continue;
+
+		if (!nf_osf_match_one(skb, f, ttl_check, &ctx))
 			continue;
 
+		fmatch = FMATCH_OK;
+
 		fcount++;
 
 		if (info->flags & NF_OSF_LOG)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3f211e1025c1..c0fb2bcd30fe 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -455,20 +455,59 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
 	return NULL;
 }
 
+/*
+ * Loading a module requires dropping mutex that guards the
+ * transaction.
+ * We first need to abort any pending transactions as once
+ * mutex is unlocked a different client could start a new
+ * transaction.  It must not see any 'future generation'
+ * changes * as these changes will never happen.
+ */
+#ifdef CONFIG_MODULES
+static int __nf_tables_abort(struct net *net);
+
+static void nft_request_module(struct net *net, const char *fmt, ...)
+{
+	char module_name[MODULE_NAME_LEN];
+	va_list args;
+	int ret;
+
+	__nf_tables_abort(net);
+
+	va_start(args, fmt);
+	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
+	va_end(args);
+	if (WARN(ret >= MODULE_NAME_LEN, "truncated: '%s' (len %d)", module_name, ret))
+		return;
+
+	mutex_unlock(&net->nft.commit_mutex);
+	request_module("%s", module_name);
+	mutex_lock(&net->nft.commit_mutex);
+}
+#endif
+
+static void lockdep_nfnl_nft_mutex_not_held(void)
+{
+#ifdef CONFIG_PROVE_LOCKING
+	WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+#endif
+}
+
 static const struct nft_chain_type *
-nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
+nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
+			    u8 family, bool autoload)
 {
 	const struct nft_chain_type *type;
 
 	type = __nf_tables_chain_type_lookup(nla, family);
 	if (type != NULL)
 		return type;
+
+	lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
 	if (autoload) {
-		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-		request_module("nft-chain-%u-%.*s", family,
-			       nla_len(nla), (const char *)nla_data(nla));
-		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		nft_request_module(net, "nft-chain-%u-%.*s", family,
+				   nla_len(nla), (const char *)nla_data(nla));
 		type = __nf_tables_chain_type_lookup(nla, family);
 		if (type != NULL)
 			return ERR_PTR(-EAGAIN);
@@ -772,6 +811,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	struct nft_ctx ctx;
 	int err;
 
+	lockdep_assert_held(&net->nft.commit_mutex);
 	attr = nla[NFTA_TABLE_NAME];
 	table = nft_table_lookup(net, attr, family, genmask);
 	if (IS_ERR(table)) {
@@ -1012,7 +1052,17 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
 	return ERR_PTR(-ENOENT);
 }
 
-static struct nft_chain *nft_chain_lookup(struct nft_table *table,
+static bool lockdep_commit_lock_is_held(struct net *net)
+{
+#ifdef CONFIG_PROVE_LOCKING
+	return lockdep_is_held(&net->nft.commit_mutex);
+#else
+	return true;
+#endif
+}
+
+static struct nft_chain *nft_chain_lookup(struct net *net,
+					  struct nft_table *table,
 					  const struct nlattr *nla, u8 genmask)
 {
 	char search[NFT_CHAIN_MAXNAMELEN + 1];
@@ -1025,7 +1075,7 @@ static struct nft_chain *nft_chain_lookup(struct nft_table *table,
 	nla_strlcpy(search, nla, sizeof(search));
 
 	WARN_ON(!rcu_read_lock_held() &&
-		!lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+		!lockdep_commit_lock_is_held(net));
 
 	chain = ERR_PTR(-ENOENT);
 	rcu_read_lock();
@@ -1265,7 +1315,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
 		return PTR_ERR(table);
 	}
 
-	chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+	chain = nft_chain_lookup(net, table, nla[NFTA_CHAIN_NAME], genmask);
 	if (IS_ERR(chain)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
 		return PTR_ERR(chain);
@@ -1398,6 +1448,9 @@ static int nft_chain_parse_hook(struct net *net,
 	struct net_device *dev;
 	int err;
 
+	lockdep_assert_held(&net->nft.commit_mutex);
+	lockdep_nfnl_nft_mutex_not_held();
+
 	err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
 			       nft_hook_policy, NULL);
 	if (err < 0)
@@ -1412,7 +1465,7 @@ static int nft_chain_parse_hook(struct net *net,
 
 	type = chain_type[family][NFT_CHAIN_T_DEFAULT];
 	if (nla[NFTA_CHAIN_TYPE]) {
-		type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
+		type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE],
 						   family, create);
 		if (IS_ERR(type))
 			return PTR_ERR(type);
@@ -1632,7 +1685,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
 	    nla[NFTA_CHAIN_NAME]) {
 		struct nft_chain *chain2;
 
-		chain2 = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+		chain2 = nft_chain_lookup(ctx->net, table,
+					  nla[NFTA_CHAIN_NAME], genmask);
 		if (!IS_ERR(chain2))
 			return -EEXIST;
 	}
@@ -1694,6 +1748,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
+	lockdep_assert_held(&net->nft.commit_mutex);
+
 	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
 	if (IS_ERR(table)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
@@ -1712,7 +1768,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 		}
 		attr = nla[NFTA_CHAIN_HANDLE];
 	} else {
-		chain = nft_chain_lookup(table, attr, genmask);
+		chain = nft_chain_lookup(net, table, attr, genmask);
 		if (IS_ERR(chain)) {
 			if (PTR_ERR(chain) != -ENOENT) {
 				NL_SET_BAD_ATTR(extack, attr);
@@ -1790,7 +1846,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 		chain = nft_chain_lookup_byhandle(table, handle, genmask);
 	} else {
 		attr = nla[NFTA_CHAIN_NAME];
-		chain = nft_chain_lookup(table, attr, genmask);
+		chain = nft_chain_lookup(net, table, attr, genmask);
 	}
 	if (IS_ERR(chain)) {
 		NL_SET_BAD_ATTR(extack, attr);
@@ -1875,7 +1931,8 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
 	return NULL;
 }
 
-static const struct nft_expr_type *nft_expr_type_get(u8 family,
+static const struct nft_expr_type *nft_expr_type_get(struct net *net,
+						     u8 family,
 						     struct nlattr *nla)
 {
 	const struct nft_expr_type *type;
@@ -1887,19 +1944,16 @@ static const struct nft_expr_type *nft_expr_type_get(u8 family,
 	if (type != NULL && try_module_get(type->owner))
 		return type;
 
+	lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
 	if (type == NULL) {
-		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-		request_module("nft-expr-%u-%.*s", family,
-			       nla_len(nla), (char *)nla_data(nla));
-		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		nft_request_module(net, "nft-expr-%u-%.*s", family,
+				   nla_len(nla), (char *)nla_data(nla));
 		if (__nft_expr_type_get(family, nla))
 			return ERR_PTR(-EAGAIN);
 
-		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-		request_module("nft-expr-%.*s",
-			       nla_len(nla), (char *)nla_data(nla));
-		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		nft_request_module(net, "nft-expr-%.*s",
+				   nla_len(nla), (char *)nla_data(nla));
 		if (__nft_expr_type_get(family, nla))
 			return ERR_PTR(-EAGAIN);
 	}
@@ -1968,7 +2022,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
 	if (err < 0)
 		return err;
 
-	type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
+	type = nft_expr_type_get(ctx->net, ctx->family, tb[NFTA_EXPR_NAME]);
 	if (IS_ERR(type))
 		return PTR_ERR(type);
 
@@ -2325,7 +2379,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 		return PTR_ERR(table);
 	}
 
-	chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+	chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
 	if (IS_ERR(chain)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
 		return PTR_ERR(chain);
@@ -2359,6 +2413,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
 {
 	struct nft_expr *expr;
 
+	lockdep_assert_held(&ctx->net->nft.commit_mutex);
 	/*
 	 * Careful: some expressions might not be initialized in case this
 	 * is called on error from nf_tables_newrule().
@@ -2427,8 +2482,6 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
 
 #define NFT_RULE_MAXEXPRS	128
 
-static struct nft_expr_info *info;
-
 static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
 			     const struct nlattr * const nla[],
@@ -2436,6 +2489,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
+	struct nft_expr_info *info = NULL;
 	int family = nfmsg->nfgen_family;
 	struct nft_table *table;
 	struct nft_chain *chain;
@@ -2450,6 +2504,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	bool create;
 	u64 handle, pos_handle;
 
+	lockdep_assert_held(&net->nft.commit_mutex);
+
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
 	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
@@ -2458,7 +2514,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 		return PTR_ERR(table);
 	}
 
-	chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+	chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
 	if (IS_ERR(chain)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
 		return PTR_ERR(chain);
@@ -2506,6 +2562,12 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	n = 0;
 	size = 0;
 	if (nla[NFTA_RULE_EXPRESSIONS]) {
+		info = kvmalloc_array(NFT_RULE_MAXEXPRS,
+				      sizeof(struct nft_expr_info),
+				      GFP_KERNEL);
+		if (!info)
+			return -ENOMEM;
+
 		nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
 			err = -EINVAL;
 			if (nla_type(tmp) != NFTA_LIST_ELEM)
@@ -2598,6 +2660,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 				list_add_rcu(&rule->list, &chain->rules);
 		}
 	}
+	kvfree(info);
 	chain->use++;
 
 	if (net->nft.validate_state == NFT_VALIDATE_DO)
@@ -2611,6 +2674,7 @@ err1:
 		if (info[i].ops != NULL)
 			module_put(info[i].ops->type->owner);
 	}
+	kvfree(info);
 	return err;
 }
 
@@ -2650,7 +2714,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 	}
 
 	if (nla[NFTA_RULE_CHAIN]) {
-		chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+		chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
+					 genmask);
 		if (IS_ERR(chain)) {
 			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
 			return PTR_ERR(chain);
@@ -2742,11 +2807,11 @@ nft_select_set_ops(const struct nft_ctx *ctx,
 	const struct nft_set_type *type;
 	u32 flags = 0;
 
+	lockdep_assert_held(&ctx->net->nft.commit_mutex);
+	lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
 	if (list_empty(&nf_tables_set_types)) {
-		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-		request_module("nft-set");
-		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		nft_request_module(ctx->net, "nft-set");
 		if (!list_empty(&nf_tables_set_types))
 			return ERR_PTR(-EAGAIN);
 	}
@@ -4779,7 +4844,8 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
 	return NULL;
 }
 
-static const struct nft_object_type *nft_obj_type_get(u32 objtype)
+static const struct nft_object_type *
+nft_obj_type_get(struct net *net, u32 objtype)
 {
 	const struct nft_object_type *type;
 
@@ -4787,11 +4853,10 @@ static const struct nft_object_type *nft_obj_type_get(u32 objtype)
 	if (type != NULL && try_module_get(type->owner))
 		return type;
 
+	lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
 	if (type == NULL) {
-		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-		request_module("nft-obj-%u", objtype);
-		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		nft_request_module(net, "nft-obj-%u", objtype);
 		if (__nft_obj_type_get(objtype))
 			return ERR_PTR(-EAGAIN);
 	}
@@ -4843,7 +4908,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
 
-	type = nft_obj_type_get(objtype);
+	type = nft_obj_type_get(net, objtype);
 	if (IS_ERR(type))
 		return PTR_ERR(type);
 
@@ -5339,7 +5404,8 @@ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
 	return NULL;
 }
 
-static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
+static const struct nf_flowtable_type *
+nft_flowtable_type_get(struct net *net, u8 family)
 {
 	const struct nf_flowtable_type *type;
 
@@ -5347,11 +5413,10 @@ static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
 	if (type != NULL && try_module_get(type->owner))
 		return type;
 
+	lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
 	if (type == NULL) {
-		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-		request_module("nf-flowtable-%u", family);
-		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		nft_request_module(net, "nf-flowtable-%u", family);
 		if (__nft_flowtable_type_get(family))
 			return ERR_PTR(-EAGAIN);
 	}
@@ -5431,7 +5496,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 		goto err1;
 	}
 
-	type = nft_flowtable_type_get(family);
+	type = nft_flowtable_type_get(net, family);
 	if (IS_ERR(type)) {
 		err = PTR_ERR(type);
 		goto err2;
@@ -6202,9 +6267,9 @@ static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *cha
 	next_genbit = nft_gencursor_next(net);
 
 	g0 = rcu_dereference_protected(chain->rules_gen_0,
-				       lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+				       lockdep_commit_lock_is_held(net));
 	g1 = rcu_dereference_protected(chain->rules_gen_1,
-				       lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+				       lockdep_commit_lock_is_held(net));
 
 	/* No changes to this chain? */
 	if (chain->rules_next == NULL) {
@@ -6412,6 +6477,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 
 	nf_tables_commit_release(net);
 	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+	mutex_unlock(&net->nft.commit_mutex);
 
 	return 0;
 }
@@ -6563,12 +6629,25 @@ static void nf_tables_cleanup(struct net *net)
 
 static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 {
-	return __nf_tables_abort(net);
+	int ret = __nf_tables_abort(net);
+
+	mutex_unlock(&net->nft.commit_mutex);
+
+	return ret;
 }
 
 static bool nf_tables_valid_genid(struct net *net, u32 genid)
 {
-	return net->nft.base_seq == genid;
+	bool genid_ok;
+
+	mutex_lock(&net->nft.commit_mutex);
+
+	genid_ok = genid == 0 || net->nft.base_seq == genid;
+	if (!genid_ok)
+		mutex_unlock(&net->nft.commit_mutex);
+
+	/* else, commit mutex has to be released by commit or abort function */
+	return genid_ok;
 }
 
 static const struct nfnetlink_subsystem nf_tables_subsys = {
@@ -6580,6 +6659,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
 	.abort		= nf_tables_abort,
 	.cleanup	= nf_tables_cleanup,
 	.valid_genid	= nf_tables_valid_genid,
+	.owner		= THIS_MODULE,
 };
 
 int nft_chain_validate_dependency(const struct nft_chain *chain,
@@ -6906,8 +6986,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
 	case NFT_GOTO:
 		if (!tb[NFTA_VERDICT_CHAIN])
 			return -EINVAL;
-		chain = nft_chain_lookup(ctx->table, tb[NFTA_VERDICT_CHAIN],
-					 genmask);
+		chain = nft_chain_lookup(ctx->net, ctx->table,
+					 tb[NFTA_VERDICT_CHAIN], genmask);
 		if (IS_ERR(chain))
 			return PTR_ERR(chain);
 		if (nft_is_base_chain(chain))
@@ -7152,6 +7232,7 @@ static int __net_init nf_tables_init_net(struct net *net)
 {
 	INIT_LIST_HEAD(&net->nft.tables);
 	INIT_LIST_HEAD(&net->nft.commit_list);
+	mutex_init(&net->nft.commit_mutex);
 	net->nft.base_seq = 1;
 	net->nft.validate_state = NFT_VALIDATE_SKIP;
 
@@ -7160,11 +7241,11 @@ static int __net_init nf_tables_init_net(struct net *net)
 
 static void __net_exit nf_tables_exit_net(struct net *net)
 {
-	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	mutex_lock(&net->nft.commit_mutex);
 	if (!list_empty(&net->nft.commit_list))
 		__nf_tables_abort(net);
 	__nft_release_tables(net);
-	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	mutex_unlock(&net->nft.commit_mutex);
 	WARN_ON_ONCE(!list_empty(&net->nft.tables));
 }
 
@@ -7179,29 +7260,19 @@ static int __init nf_tables_module_init(void)
 
 	nft_chain_filter_init();
 
-	info = kmalloc_array(NFT_RULE_MAXEXPRS, sizeof(struct nft_expr_info),
-			     GFP_KERNEL);
-	if (info == NULL) {
-		err = -ENOMEM;
-		goto err1;
-	}
-
 	err = nf_tables_core_module_init();
 	if (err < 0)
-		goto err2;
+		return err;
 
 	err = nfnetlink_subsys_register(&nf_tables_subsys);
 	if (err < 0)
-		goto err3;
+		goto err;
 
 	register_netdevice_notifier(&nf_tables_flowtable_notifier);
 
 	return register_pernet_subsys(&nf_tables_net_ops);
-err3:
+err:
 	nf_tables_core_module_exit();
-err2:
-	kfree(info);
-err1:
 	return err;
 }
 
@@ -7213,7 +7284,6 @@ static void __exit nf_tables_module_exit(void)
 	unregister_pernet_subsys(&nf_tables_net_ops);
 	rcu_barrier();
 	nf_tables_core_module_exit();
-	kfree(info);
 }
 
 module_init(nf_tables_module_init);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index e1b6be29848d..916913454624 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -331,18 +331,27 @@ replay:
 		}
 	}
 
-	if (!ss->commit || !ss->abort) {
+	if (!ss->valid_genid || !ss->commit || !ss->abort) {
 		nfnl_unlock(subsys_id);
 		netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
 		return kfree_skb(skb);
 	}
 
-	if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) {
+	if (!try_module_get(ss->owner)) {
+		nfnl_unlock(subsys_id);
+		netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
+		return kfree_skb(skb);
+	}
+
+	if (!ss->valid_genid(net, genid)) {
+		module_put(ss->owner);
 		nfnl_unlock(subsys_id);
 		netlink_ack(oskb, nlh, -ERESTART, NULL);
 		return kfree_skb(skb);
 	}
 
+	nfnl_unlock(subsys_id);
+
 	while (skb->len >= nlmsg_total_size(0)) {
 		int msglen, type;
 
@@ -464,14 +473,10 @@ ack:
 	}
 done:
 	if (status & NFNL_BATCH_REPLAY) {
-		const struct nfnetlink_subsystem *ss2;
-
-		ss2 = nfnl_dereference_protected(subsys_id);
-		if (ss2 == ss)
-			ss->abort(net, oskb);
+		ss->abort(net, oskb);
 		nfnl_err_reset(&err_list);
-		nfnl_unlock(subsys_id);
 		kfree_skb(skb);
+		module_put(ss->owner);
 		goto replay;
 	} else if (status == NFNL_BATCH_DONE) {
 		err = ss->commit(net, oskb);
@@ -489,8 +494,8 @@ done:
 		ss->cleanup(net);
 
 	nfnl_err_deliver(&err_list, oskb);
-	nfnl_unlock(subsys_id);
 	kfree_skb(skb);
+	module_put(ss->owner);
 }
 
 static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = {
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 9ee5fa551fa6..d9d952fad3e0 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -26,7 +26,6 @@
 #include <net/sock.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
@@ -47,7 +46,7 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
 };
 
 static int
-ctnl_timeout_parse_policy(void *timeouts,
+ctnl_timeout_parse_policy(void *timeout,
 			  const struct nf_conntrack_l4proto *l4proto,
 			  struct net *net, const struct nlattr *attr)
 {
@@ -68,7 +67,7 @@ ctnl_timeout_parse_policy(void *timeouts,
 	if (ret < 0)
 		goto err;
 
-	ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
+	ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout);
 
 err:
 	kfree(tb);
@@ -373,7 +372,6 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 				 struct netlink_ext_ack *extack)
 {
 	const struct nf_conntrack_l4proto *l4proto;
-	unsigned int *timeouts;
 	__u16 l3num;
 	__u8 l4num;
 	int ret;
@@ -393,9 +391,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 		goto err;
 	}
 
-	timeouts = l4proto->get_timeouts(net);
-
-	ret = ctnl_timeout_parse_policy(timeouts, l4proto, net,
+	ret = ctnl_timeout_parse_policy(NULL, l4proto, net,
 					cda[CTA_TIMEOUT_DATA]);
 	if (ret < 0)
 		goto err;
@@ -432,7 +428,6 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
 
 	if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
 		struct nlattr *nest_parms;
-		unsigned int *timeouts = l4proto->get_timeouts(net);
 		int ret;
 
 		nest_parms = nla_nest_start(skb,
@@ -440,7 +435,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
 		if (!nest_parms)
 			goto nla_put_failure;
 
-		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
+		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
 		if (ret < 0)
 			goto nla_put_failure;
 
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index d21834bed805..ea5b7c4944f6 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -322,7 +322,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 	if (!ctx.net)
 		return NOTIFY_DONE;
 
-	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	mutex_lock(&ctx.net->nft.commit_mutex);
 	list_for_each_entry(table, &ctx.net->nft.tables, list) {
 		if (table->family != NFPROTO_NETDEV)
 			continue;
@@ -337,7 +337,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 			nft_netdev_event(event, dev, &ctx);
 		}
 	}
-	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	mutex_unlock(&ctx.net->nft.commit_mutex);
 	put_net(ctx.net);
 
 	return NOTIFY_DONE;
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index a832c59f0a9c..b90d96ba4a12 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -14,10 +14,9 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 
 struct nft_connlimit {
-	spinlock_t		lock;
-	struct hlist_head	hhead;
-	u32			limit;
-	bool			invert;
+	struct nf_conncount_list	list;
+	u32				limit;
+	bool				invert;
 };
 
 static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
@@ -45,21 +44,19 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
 		return;
 	}
 
-	spin_lock_bh(&priv->lock);
-	count = nf_conncount_lookup(nft_net(pkt), &priv->hhead, tuple_ptr, zone,
-				    &addit);
+	nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
+			    &addit);
+	count = priv->list.count;
 
 	if (!addit)
 		goto out;
 
-	if (!nf_conncount_add(&priv->hhead, tuple_ptr, zone)) {
+	if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
 		regs->verdict.code = NF_DROP;
-		spin_unlock_bh(&priv->lock);
 		return;
 	}
 	count++;
 out:
-	spin_unlock_bh(&priv->lock);
 
 	if ((count > priv->limit) ^ priv->invert) {
 		regs->verdict.code = NFT_BREAK;
@@ -87,8 +84,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
 			invert = true;
 	}
 
-	spin_lock_init(&priv->lock);
-	INIT_HLIST_HEAD(&priv->hhead);
+	nf_conncount_list_init(&priv->list);
 	priv->limit	= limit;
 	priv->invert	= invert;
 
@@ -99,7 +95,7 @@ static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
 				     struct nft_connlimit *priv)
 {
 	nf_ct_netns_put(ctx->net, ctx->family);
-	nf_conncount_cache_free(&priv->hhead);
+	nf_conncount_cache_free(&priv->list);
 }
 
 static int nft_connlimit_do_dump(struct sk_buff *skb,
@@ -212,8 +208,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
 	struct nft_connlimit *priv_dst = nft_expr_priv(dst);
 	struct nft_connlimit *priv_src = nft_expr_priv(src);
 
-	spin_lock_init(&priv_dst->lock);
-	INIT_HLIST_HEAD(&priv_dst->hhead);
+	nf_conncount_list_init(&priv_dst->list);
 	priv_dst->limit	 = priv_src->limit;
 	priv_dst->invert = priv_src->invert;
 
@@ -225,21 +220,14 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
 {
 	struct nft_connlimit *priv = nft_expr_priv(expr);
 
-	nf_conncount_cache_free(&priv->hhead);
+	nf_conncount_cache_free(&priv->list);
 }
 
 static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
 {
 	struct nft_connlimit *priv = nft_expr_priv(expr);
-	bool addit, ret;
 
-	spin_lock_bh(&priv->lock);
-	nf_conncount_lookup(net, &priv->hhead, NULL, &nf_ct_zone_dflt, &addit);
-
-	ret = hlist_empty(&priv->hhead);
-	spin_unlock_bh(&priv->lock);
-
-	return ret;
+	return nf_conncount_gc_list(net, &priv->list);
 }
 
 static struct nft_expr_type nft_connlimit_type;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 1435ffc5f57e..3bc82ee5464d 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -870,7 +870,7 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj,
 	if (test_bit(IPS_HELPER_BIT, &ct->status))
 		return;
 
-	help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC);
+	help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
 	if (help) {
 		rcu_assign_pointer(help->helper, to_assign);
 		set_bit(IPS_HELPER_BIT, &ct->status);
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 27d7e4598ab6..81184c244d1a 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -118,6 +118,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 	u64 timeout;
 	int err;
 
+	lockdep_assert_held(&ctx->net->nft.commit_mutex);
+
 	if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
 	    tb[NFTA_DYNSET_OP] == NULL ||
 	    tb[NFTA_DYNSET_SREG_KEY] == NULL)
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 998c2b546f6d..d7f3776dfd71 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -31,7 +31,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
 		case NFPROTO_IPV4:
 			sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
 			break;
-#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
 		case NFPROTO_IPV6:
 			sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
 			break;
@@ -43,7 +43,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
 		}
 
 	if (!sk) {
-		nft_reg_store8(dest, 0);
+		regs->verdict.code = NFT_BREAK;
 		return;
 	}
 
@@ -54,6 +54,14 @@ static void nft_socket_eval(const struct nft_expr *expr,
 	case NFT_SOCKET_TRANSPARENT:
 		nft_reg_store8(dest, inet_sk_transparent(sk));
 		break;
+	case NFT_SOCKET_MARK:
+		if (sk_fullsock(sk)) {
+			*dest = sk->sk_mark;
+		} else {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
+		break;
 	default:
 		WARN_ON(1);
 		regs->verdict.code = NFT_BREAK;
@@ -77,7 +85,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 
 	switch(ctx->family) {
 	case NFPROTO_IPV4:
-#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
 	case NFPROTO_IPV6:
 #endif
 	case NFPROTO_INET:
@@ -91,6 +99,9 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 	case NFT_SOCKET_TRANSPARENT:
 		len = sizeof(u8);
 		break;
+	case NFT_SOCKET_MARK:
+		len = sizeof(u32);
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 0b660c568156..e8da9a9bba73 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -1,14 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/ip6_checksum.h>
+
+#ifdef CONFIG_INET
+__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+		       unsigned int dataoff, u8 protocol)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+			break;
+		if ((protocol == 0 && !csum_fold(skb->csum)) ||
+		    !csum_tcpudp_magic(iph->saddr, iph->daddr,
+				       skb->len - dataoff, protocol,
+				       skb->csum)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			break;
+		}
+		/* fall through */
+	case CHECKSUM_NONE:
+		if (protocol == 0)
+			skb->csum = 0;
+		else
+			skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+						       skb->len - dataoff,
+						       protocol, 0);
+		csum = __skb_checksum_complete(skb);
+	}
+	return csum;
+}
+EXPORT_SYMBOL(nf_ip_checksum);
+#endif
+
+static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+				      unsigned int dataoff, unsigned int len,
+				      u8 protocol)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (len == skb->len - dataoff)
+			return nf_ip_checksum(skb, hook, dataoff, protocol);
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
+					       skb->len - dataoff, 0);
+		skb->ip_summed = CHECKSUM_NONE;
+		return __skb_checksum_complete_head(skb, dataoff + len);
+	}
+	return csum;
+}
+
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+			unsigned int dataoff, u8 protocol)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+			break;
+		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+				     skb->len - dataoff, protocol,
+				     csum_sub(skb->csum,
+					      skb_checksum(skb, 0,
+							   dataoff, 0)))) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			break;
+		}
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb->csum = ~csum_unfold(
+				csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					     skb->len - dataoff,
+					     protocol,
+					     csum_sub(0,
+						      skb_checksum(skb, 0,
+								   dataoff, 0))));
+		csum = __skb_checksum_complete(skb);
+	}
+	return csum;
+}
+EXPORT_SYMBOL(nf_ip6_checksum);
+
+static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
+				       unsigned int dataoff, unsigned int len,
+				       u8 protocol)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	__wsum hsum;
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (len == skb->len - dataoff)
+			return nf_ip6_checksum(skb, hook, dataoff, protocol);
+		/* fall through */
+	case CHECKSUM_NONE:
+		hsum = skb_checksum(skb, 0, dataoff, 0);
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+							 &ip6h->daddr,
+							 skb->len - dataoff,
+							 protocol,
+							 csum_sub(0, hsum)));
+		skb->ip_summed = CHECKSUM_NONE;
+		return __skb_checksum_complete_head(skb, dataoff + len);
+	}
+	return csum;
+};
 
 __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
-		    unsigned int dataoff, u_int8_t protocol,
+		    unsigned int dataoff, u8 protocol,
 		    unsigned short family)
 {
-	const struct nf_ipv6_ops *v6ops;
 	__sum16 csum = 0;
 
 	switch (family) {
@@ -16,9 +130,7 @@ __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
 		csum = nf_ip_checksum(skb, hook, dataoff, protocol);
 		break;
 	case AF_INET6:
-		v6ops = rcu_dereference(nf_ipv6_ops);
-		if (v6ops)
-			csum = v6ops->checksum(skb, hook, dataoff, protocol);
+		csum = nf_ip6_checksum(skb, hook, dataoff, protocol);
 		break;
 	}
 
@@ -28,9 +140,8 @@ EXPORT_SYMBOL_GPL(nf_checksum);
 
 __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
 			    unsigned int dataoff, unsigned int len,
-			    u_int8_t protocol, unsigned short family)
+			    u8 protocol, unsigned short family)
 {
-	const struct nf_ipv6_ops *v6ops;
 	__sum16 csum = 0;
 
 	switch (family) {
@@ -39,10 +150,8 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
 					      protocol);
 		break;
 	case AF_INET6:
-		v6ops = rcu_dereference(nf_ipv6_ops);
-		if (v6ops)
-			csum = v6ops->checksum_partial(skb, hook, dataoff, len,
-						       protocol);
+		csum = nf_ip6_checksum_partial(skb, hook, dataoff, len,
+					       protocol);
 		break;
 	}
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 03b9a50ec93b..7ba454e9e3fa 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -93,7 +93,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
 		return -ENOENT;
 	}
 
-	help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
+	help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
 	if (help == NULL) {
 		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 475957cfcf50..0d0d68c989df 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -38,7 +38,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-#if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 static unsigned int
 tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -141,7 +141,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
 		.destroy    = tee_tg_destroy,
 		.me         = THIS_MODULE,
 	},
-#if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 	{
 		.name       = "TEE",
 		.revision   = 1,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d76550a8b642..ad7420cdc439 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -36,15 +36,6 @@
 #include <net/netfilter/nf_tproxy.h>
 #include <linux/netfilter/xt_TPROXY.h>
 
-/* assign a socket to the skb -- consumes sk */
-static void
-nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
-{
-	skb_orphan(skb);
-	skb->sk = sk;
-	skb->destructor = sock_edemux;
-}
-
 static unsigned int
 tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
 	   u_int32_t mark_mask, u_int32_t mark_value)