summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/netfilter/nf_log.h5
-rw-r--r--include/net/netfilter/nf_socket.h27
-rw-r--r--include/net/netfilter/nft_fib.h31
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h63
-rw-r--r--net/bridge/netfilter/Kconfig1
-rw-r--r--net/bridge/netfilter/nf_log_bridge.c16
-rw-r--r--net/ipv4/netfilter/Kconfig14
-rw-r--r--net/ipv4/netfilter/Makefile3
-rw-r--r--net/ipv4/netfilter/nf_socket_ipv4.c163
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c238
-rw-r--r--net/ipv6/netfilter/Kconfig14
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c151
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c275
-rw-r--r--net/netfilter/Kconfig26
-rw-r--r--net/netfilter/Makefile6
-rw-r--r--net/netfilter/nf_log_common.c27
-rw-r--r--net/netfilter/nf_log_netdev.c80
-rw-r--r--net/netfilter/nf_tables_core.c1
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_cmp.c3
-rw-r--r--net/netfilter/nft_ct.c50
-rw-r--r--net/netfilter/nft_fib.c159
-rw-r--r--net/netfilter/nft_fib_inet.c82
-rw-r--r--net/netfilter/nft_immediate.c3
-rw-r--r--net/netfilter/nft_meta.c5
-rw-r--r--net/netfilter/nft_numgen.c2
-rw-r--r--net/netfilter/nft_rt.c153
-rw-r--r--net/netfilter/xt_multiport.c40
-rw-r--r--net/netfilter/xt_socket.c305
31 files changed, 1610 insertions, 340 deletions
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 309cd267be4f..a559aa41253c 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -109,5 +109,10 @@ void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
const struct net_device *out,
const struct nf_loginfo *loginfo,
const char *prefix);
+void nf_log_l2packet(struct net *net, u_int8_t pf, unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo, const char *prefix);
#endif /* _NF_LOG_H */
diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h
new file mode 100644
index 000000000000..f2fc39c97d43
--- /dev/null
+++ b/include/net/netfilter/nf_socket.h
@@ -0,0 +1,27 @@
+#ifndef _NF_SOCK_H_
+#define _NF_SOCK_H_
+
+struct net_device;
+struct sk_buff;
+struct sock;
+struct net;
+
+static inline bool nf_sk_is_transparent(struct sock *sk)
+{
+ switch (sk->sk_state) {
+ case TCP_TIME_WAIT:
+ return inet_twsk(sk)->tw_transparent;
+ case TCP_NEW_SYN_RECV:
+ return inet_rsk(inet_reqsk(sk))->no_srccheck;
+ default:
+ return inet_sk(sk)->transparent;
+ }
+}
+
+struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
+ const struct net_device *indev);
+
+struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
+ const struct net_device *indev);
+
+#endif
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
new file mode 100644
index 000000000000..cbedda077db2
--- /dev/null
+++ b/include/net/netfilter/nft_fib.h
@@ -0,0 +1,31 @@
+#ifndef _NFT_FIB_H_
+#define _NFT_FIB_H_
+
+struct nft_fib {
+ enum nft_registers dreg:8;
+ u8 result;
+ u32 flags;
+};
+
+extern const struct nla_policy nft_fib_policy[];
+
+int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr);
+int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[]);
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nft_data **data);
+
+
+void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt);
+void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt);
+
+void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt);
+void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt);
+
+void nft_fib_store_result(void *reg, enum nft_fib_result r,
+ const struct nft_pktinfo *pkt, int index);
+#endif
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c6c4477c136b..14e5f619167e 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -759,6 +759,19 @@ enum nft_meta_keys {
};
/**
+ * enum nft_rt_keys - nf_tables routing expression keys
+ *
+ * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid)
+ * @NFT_RT_NEXTHOP4: routing nexthop for IPv4
+ * @NFT_RT_NEXTHOP6: routing nexthop for IPv6
+ */
+enum nft_rt_keys {
+ NFT_RT_CLASSID,
+ NFT_RT_NEXTHOP4,
+ NFT_RT_NEXTHOP6,
+};
+
+/**
* enum nft_hash_attributes - nf_tables hash expression netlink attributes
*
* @NFTA_HASH_SREG: source register (NLA_U32)
@@ -797,6 +810,20 @@ enum nft_meta_attributes {
#define NFTA_META_MAX (__NFTA_META_MAX - 1)
/**
+ * enum nft_rt_attributes - nf_tables routing expression netlink attributes
+ *
+ * @NFTA_RT_DREG: destination register (NLA_U32)
+ * @NFTA_RT_KEY: routing data item to load (NLA_U32: nft_rt_keys)
+ */
+enum nft_rt_attributes {
+ NFTA_RT_UNSPEC,
+ NFTA_RT_DREG,
+ NFTA_RT_KEY,
+ __NFTA_RT_MAX
+};
+#define NFTA_RT_MAX (__NFTA_RT_MAX - 1)
+
+/**
* enum nft_ct_keys - nf_tables ct expression keys
*
* @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info)
@@ -1109,6 +1136,42 @@ enum nft_gen_attributes {
};
#define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1)
+/*
+ * enum nft_fib_attributes - nf_tables fib expression netlink attributes
+ *
+ * @NFTA_FIB_DREG: destination register (NLA_U32)
+ * @NFTA_FIB_RESULT: desired result (NLA_U32)
+ * @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32)
+ *
+ * The FIB expression performs a route lookup according
+ * to the packet data.
+ */
+enum nft_fib_attributes {
+ NFTA_FIB_UNSPEC,
+ NFTA_FIB_DREG,
+ NFTA_FIB_RESULT,
+ NFTA_FIB_FLAGS,
+ __NFTA_FIB_MAX
+};
+#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1)
+
+enum nft_fib_result {
+ NFT_FIB_RESULT_UNSPEC,
+ NFT_FIB_RESULT_OIF,
+ NFT_FIB_RESULT_OIFNAME,
+ NFT_FIB_RESULT_ADDRTYPE,
+ __NFT_FIB_RESULT_MAX
+};
+#define NFT_FIB_RESULT_MAX (__NFT_FIB_RESULT_MAX - 1)
+
+enum nft_fib_flags {
+ NFTA_FIB_F_SADDR = 1 << 0, /* look up src */
+ NFTA_FIB_F_DADDR = 1 << 1, /* look up dst */
+ NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */
+ NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */
+ NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */
+};
+
/**
* enum nft_trace_attributes - nf_tables trace netlink attributes
*
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 9cebf47ac840..e7ef1a1ef3a6 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -22,6 +22,7 @@ config NFT_BRIDGE_REJECT
config NF_LOG_BRIDGE
tristate "Bridge packet logging"
+ select NF_LOG_COMMON
endif # NF_TABLES_BRIDGE
diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c
index 1663df598545..c197b1f844ee 100644
--- a/net/bridge/netfilter/nf_log_bridge.c
+++ b/net/bridge/netfilter/nf_log_bridge.c
@@ -24,21 +24,7 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf,
const struct nf_loginfo *loginfo,
const char *prefix)
{
- switch (eth_hdr(skb)->h_proto) {
- case htons(ETH_P_IP):
- nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out,
- loginfo, "%s", prefix);
- break;
- case htons(ETH_P_IPV6):
- nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out,
- loginfo, "%s", prefix);
- break;
- case htons(ETH_P_ARP):
- case htons(ETH_P_RARP):
- nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out,
- loginfo, "%s", prefix);
- break;
- }
+ nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
}
static struct nf_logger nf_bridge_logger __read_mostly = {
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index d613309e3e5d..c11eb1744ab1 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV4
To compile it as a module, choose M here. If unsure, say N.
+config NF_SOCKET_IPV4
+ tristate "IPv4 socket lookup support"
+ help
+ This option enables the IPv4 socket lookup infrastructure. This is
+ is required by the iptables socket match.
+
if NF_TABLES
config NF_TABLES_IPV4
@@ -54,6 +60,14 @@ config NFT_DUP_IPV4
help
This module enables IPv4 packet duplication support for nf_tables.
+config NFT_FIB_IPV4
+ select NFT_FIB
+ tristate "nf_tables fib / ip route lookup support"
+ help
+ This module enables IPv4 FIB lookups, e.g. for reverse path filtering.
+ It also allows query of the FIB for the route type, e.g. local, unicast,
+ multicast or blackhole.
+
endif # NF_TABLES_IPV4
config NF_TABLES_ARP
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 853328f8fd05..f462fee66ac8 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -14,6 +14,8 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
# defrag
obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
+obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o
+
# logging
obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o
@@ -34,6 +36,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
new file mode 100644
index 000000000000..a83d558e1aae
--- /dev/null
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <net/netfilter/nf_socket.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static int
+extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol,
+ __be32 *raddr, __be32 *laddr,
+ __be16 *rport, __be16 *lport)
+{
+ unsigned int outside_hdrlen = ip_hdrlen(skb);
+ struct iphdr *inside_iph, _inside_iph;
+ struct icmphdr *icmph, _icmph;
+ __be16 *ports, _ports[2];
+
+ icmph = skb_header_pointer(skb, outside_hdrlen,
+ sizeof(_icmph), &_icmph);
+ if (icmph == NULL)
+ return 1;
+
+ switch (icmph->type) {
+ case ICMP_DEST_UNREACH:
+ case ICMP_SOURCE_QUENCH:
+ case ICMP_REDIRECT:
+ case ICMP_TIME_EXCEEDED:
+ case ICMP_PARAMETERPROB:
+ break;
+ default:
+ return 1;
+ }
+
+ inside_iph = skb_header_pointer(skb, outside_hdrlen +
+ sizeof(struct icmphdr),
+ sizeof(_inside_iph), &_inside_iph);
+ if (inside_iph == NULL)
+ return 1;
+
+ if (inside_iph->protocol != IPPROTO_TCP &&
+ inside_iph->protocol != IPPROTO_UDP)
+ return 1;
+
+ ports = skb_header_pointer(skb, outside_hdrlen +
+ sizeof(struct icmphdr) +
+ (inside_iph->ihl << 2),
+ sizeof(_ports), &_ports);
+ if (ports == NULL)
+ return 1;
+
+ /* the inside IP packet is the one quoted from our side, thus
+ * its saddr is the local address */
+ *protocol = inside_iph->protocol;
+ *laddr = inside_iph->saddr;
+ *lport = ports[0];
+ *raddr = inside_iph->daddr;
+ *rport = ports[1];
+
+ return 0;
+}
+
+static struct sock *
+nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
+ const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ return inet_lookup(net, &tcp_hashinfo, skb, doff,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ case IPPROTO_UDP:
+ return udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ }
+ return NULL;
+}
+
+struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
+ const struct net_device *indev)
+{
+ __be32 uninitialized_var(daddr), uninitialized_var(saddr);
+ __be16 uninitialized_var(dport), uninitialized_var(sport);
+ const struct iphdr *iph = ip_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ u8 uninitialized_var(protocol);
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn const *ct;
+#endif
+ int doff = 0;
+
+ if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
+ struct udphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, ip_hdrlen(skb),
+ sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return NULL;
+
+ protocol = iph->protocol;
+ saddr = iph->saddr;
+ sport = hp->source;
+ daddr = iph->daddr;
+ dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = iph->protocol == IPPROTO_TCP ?
+ ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
+ ip_hdrlen(skb) + sizeof(*hp);
+
+ } else if (iph->protocol == IPPROTO_ICMP) {
+ if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
+ &sport, &dport))
+ return NULL;
+ } else {
+ return NULL;
+ }
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ /* Do the lookup with the original socket address in
+ * case this is a reply packet of an established
+ * SNAT-ted connection.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct && !nf_ct_is_untracked(ct) &&
+ ((iph->protocol != IPPROTO_ICMP &&
+ ctinfo == IP_CT_ESTABLISHED_REPLY) ||
+ (iph->protocol == IPPROTO_ICMP &&
+ ctinfo == IP_CT_RELATED_REPLY)) &&
+ (ct->status & IPS_SRC_NAT_DONE)) {
+
+ daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+ dport = (iph->protocol == IPPROTO_TCP) ?
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+ }
+#endif
+
+ return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
+ daddr, sport, dport, indev);
+}
+EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
+MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure");
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
new file mode 100644
index 000000000000..db91fd42db67
--- /dev/null
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -0,0 +1,238 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+#include <net/ip_fib.h>
+#include <net/route.h>
+
+/* don't try to find route from mcast/bcast/zeronet */
+static __be32 get_saddr(__be32 addr)
+{
+ if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+ ipv4_is_zeronet(addr))
+ return 0;
+ return addr;
+}
+
+static bool fib4_is_local(const struct sk_buff *skb)
+{
+ const struct rtable *rt = skb_rtable(skb);
+
+ return rt && (rt->rt_flags & RTCF_LOCAL);
+}
+
+#define DSCP_BITS 0xfc
+
+void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+ u32 *dst = &regs->data[priv->dreg];
+ const struct net_device *dev = NULL;
+ const struct iphdr *iph;
+ __be32 addr;
+
+ if (priv->flags & NFTA_FIB_F_IIF)
+ dev = pkt->in;
+ else if (priv->flags & NFTA_FIB_F_OIF)
+ dev = pkt->out;
+
+ iph = ip_hdr(pkt->skb);
+ if (priv->flags & NFTA_FIB_F_DADDR)
+ addr = iph->daddr;
+ else
+ addr = iph->saddr;
+
+ *dst = inet_dev_addr_type(pkt->net, dev, addr);
+}
+EXPORT_SYMBOL_GPL(nft_fib4_eval_type);
+
+static int get_ifindex(const struct net_device *dev)
+{
+ return dev ? dev->ifindex : 0;
+}
+
+void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+ u32 *dest = &regs->data[priv->dreg];
+ const struct iphdr *iph;
+ struct fib_result res;
+ struct flowi4 fl4 = {
+ .flowi4_scope = RT_SCOPE_UNIVERSE,
+ .flowi4_iif = LOOPBACK_IFINDEX,
+ };
+ const struct net_device *oif;
+ struct net_device *found;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ int i;
+#endif
+
+ /*
+ * Do not set flowi4_oif, it restricts results (for example, asking
+ * for oif 3 will get RTN_UNICAST result even if the daddr exits
+ * on another interface.
+ *
+ * Search results for the desired outinterface instead.
+ */
+ if (priv->flags & NFTA_FIB_F_OIF)
+ oif = pkt->out;
+ else if (priv->flags & NFTA_FIB_F_IIF)
+ oif = pkt->in;
+ else
+ oif = NULL;
+
+ if (pkt->hook == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) {
+ nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
+ return;
+ }
+
+ iph = ip_hdr(pkt->skb);
+ if (ipv4_is_multicast(iph->daddr) &&
+ ipv4_is_zeronet(iph->saddr) &&
+ ipv4_is_local_multicast(iph->daddr)) {
+ nft_fib_store_result(dest, priv->result, pkt,
+ get_ifindex(pkt->skb->dev));
+ return;
+ }
+
+ if (priv->flags & NFTA_FIB_F_MARK)
+ fl4.flowi4_mark = pkt->skb->mark;
+
+ fl4.flowi4_tos = iph->tos & DSCP_BITS;
+
+ if (priv->flags & NFTA_FIB_F_DADDR) {
+ fl4.daddr = iph->daddr;
+ fl4.saddr = get_saddr(iph->saddr);
+ } else {
+ fl4.daddr = iph->saddr;
+ fl4.saddr = get_saddr(iph->daddr);
+ }
+
+ if (fib_lookup(pkt->net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
+ return;
+
+ switch (res.type) {
+ case RTN_UNICAST:
+ break;
+ case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */
+ return;
+ default:
+ break;
+ }
+
+ if (!oif) {
+ found = FIB_RES_DEV(res);
+ goto ok;
+ }
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ for (i = 0; i < res.fi->fib_nhs; i++) {
+ struct fib_nh *nh = &res.fi->fib_nh[i];
+
+ if (nh->nh_dev == oif) {
+ found = nh->nh_dev;
+ goto ok;
+ }
+ }
+ return;
+#else
+ found = FIB_RES_DEV(res);
+ if (found != oif)
+ return;
+#endif
+ok:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ *dest = found->ifindex;
+ break;
+ case NFT_FIB_RESULT_OIFNAME:
+ strncpy((char *)dest, found->name, IFNAMSIZ);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nft_fib4_eval);
+
+static struct nft_expr_type nft_fib4_type;
+
+static const struct nft_expr_ops nft_fib4_type_ops = {
+ .type = &nft_fib4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib4_eval_type,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static const struct nft_expr_ops nft_fib4_ops = {
+ .type = &nft_fib4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib4_eval,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static const struct nft_expr_ops *
+nft_fib4_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ enum nft_fib_result result;
+
+ if (!tb[NFTA_FIB_RESULT])
+ return ERR_PTR(-EINVAL);
+
+ result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+
+ switch (result) {
+ case NFT_FIB_RESULT_OIF:
+ return &nft_fib4_ops;
+ case NFT_FIB_RESULT_OIFNAME:
+ return &nft_fib4_ops;
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return &nft_fib4_type_ops;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+static struct nft_expr_type nft_fib4_type __read_mostly = {
+ .name = "fib",
+ .select_ops = &nft_fib4_select_ops,
+ .policy = nft_fib_policy,
+ .maxattr = NFTA_FIB_MAX,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fib4_module_init(void)
+{
+ return nft_register_expr(&nft_fib4_type);
+}
+
+static void __exit nft_fib4_module_exit(void)
+{
+ nft_unregister_expr(&nft_fib4_type);
+}
+
+module_init(nft_fib4_module_init);
+module_exit(nft_fib4_module_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(2, "fib");
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index e10a04c9cdc7..6acb2eecd986 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV6
To compile it as a module, choose M here. If unsure, say N.
+config NF_SOCKET_IPV6
+ tristate "IPv6 socket lookup support"
+ help
+ This option enables the IPv6 socket lookup infrastructure. This
+ is used by the ip6tables socket match.
+
if NF_TABLES
config NF_TABLES_IPV6
@@ -54,6 +60,14 @@ config NFT_DUP_IPV6
help
This module enables IPv6 packet duplication support for nf_tables.
+config NFT_FIB_IPV6
+ tristate "nf_tables fib / ipv6 route lookup support"
+ select NFT_FIB
+ help
+ This module enables IPv6 FIB lookups, e.g. for reverse path filtering.
+ It also allows query of the FIB for the route type, e.g. local, unicast,
+ multicast or blackhole.
+
endif # NF_TABLES_IPV6
endif # NF_TABLES
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index b4f7d0b4e2af..fe180c96040e 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -24,6 +24,8 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o
+
# logging
obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
@@ -40,6 +42,7 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
+obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
new file mode 100644
index 000000000000..ebb2bf84232a
--- /dev/null
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <net/inet6_hashtables.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netfilter/nf_socket.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static int
+extract_icmp6_fields(const struct sk_buff *skb,
+ unsigned int outside_hdrlen,
+ int *protocol,
+ const struct in6_addr **raddr,
+ const struct in6_addr **laddr,
+ __be16 *rport,
+ __be16 *lport,
+ struct ipv6hdr *ipv6_var)
+{
+ const struct ipv6hdr *inside_iph;
+ struct icmp6hdr *icmph, _icmph;
+ __be16 *ports, _ports[2];
+ u8 inside_nexthdr;
+ __be16 inside_fragoff;
+ int inside_hdrlen;
+
+ icmph = skb_header_pointer(skb, outside_hdrlen,
+ sizeof(_icmph), &_icmph);
+ if (icmph == NULL)
+ return 1;
+
+ if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
+ return 1;
+
+ inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph),
+ sizeof(*ipv6_var), ipv6_var);
+ if (inside_iph == NULL)
+ return 1;
+ inside_nexthdr = inside_iph->nexthdr;
+
+ inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) +
+ sizeof(*ipv6_var),
+ &inside_nexthdr, &inside_fragoff);
+ if (inside_hdrlen < 0)
+ return 1; /* hjm: Packet has no/incomplete transport layer headers. */
+
+ if (inside_nexthdr != IPPROTO_TCP &&
+ inside_nexthdr != IPPROTO_UDP)
+ return 1;
+
+ ports = skb_header_pointer(skb, inside_hdrlen,
+ sizeof(_ports), &_ports);
+ if (ports == NULL)
+ return 1;
+
+ /* the inside IP packet is the one quoted from our side, thus
+ * its saddr is the local address */
+ *protocol = inside_nexthdr;
+ *laddr = &inside_iph->saddr;
+ *lport = ports[0];
+ *raddr = &inside_iph->daddr;
+ *rport = ports[1];
+
+ return 0;
+}
+
+static struct sock *
+nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
+ const u8 protocol,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ return inet6_lookup(net, &tcp_hashinfo, skb, doff,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ case IPPROTO_UDP:
+ return udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ }
+
+ return NULL;
+}
+
+struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
+ const struct net_device *indev)
+{
+ __be16 uninitialized_var(dport), uninitialized_var(sport);
+ const struct in6_addr *daddr = NULL, *saddr = NULL;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
+ int thoff = 0, tproto;
+
+ tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
+ if (tproto < 0) {
+ pr_debug("unable to find transport header in IPv6 packet, dropping\n");
+ return NULL;
+ }
+
+ if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
+ struct udphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return NULL;
+
+ saddr = &iph->saddr;
+ sport = hp->source;
+ daddr = &iph->daddr;
+ dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = tproto == IPPROTO_TCP ?
+ thoff + __tcp_hdrlen((struct tcphdr *)hp) :
+ thoff + sizeof(*hp);
+
+ } else if (tproto == IPPROTO_ICMPV6) {
+ struct ipv6hdr ipv6_var;
+
+ if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
+ &sport, &dport, &ipv6_var))
+ return NULL;
+ } else {
+ return NULL;
+ }
+
+ return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
+ sport, dport, indev);
+}
+EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v6);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
+MODULE_DESCRIPTION("Netfilter IPv6 socket lookup infrastructure");
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
new file mode 100644
index 000000000000..ff1f1b6b4a4a
--- /dev/null
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -0,0 +1,275 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+
+static bool fib6_is_local(const struct sk_buff *skb)
+{
+ const struct rt6_info *rt = (const void *)skb_dst(skb);
+
+ return rt && (rt->rt6i_flags & RTF_LOCAL);
+}
+
+static int get_ifindex(const struct net_device *dev)
+{
+ return dev ? dev->ifindex : 0;
+}
+
+static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
+ const struct nft_pktinfo *pkt,
+ const struct net_device *dev)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(pkt->skb);
+ int lookup_flags = 0;
+
+ if (priv->flags & NFTA_FIB_F_DADDR) {
+ fl6->daddr = iph->daddr;
+ fl6->saddr = iph->saddr;
+ } else {
+ fl6->daddr = iph->saddr;
+ fl6->saddr = iph->daddr;
+ }
+
+ if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
+ lookup_flags |= RT6_LOOKUP_F_IFACE;
+ fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
+ }
+
+ if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
+ lookup_flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+ if (priv->flags & NFTA_FIB_F_MARK)
+ fl6->flowi6_mark = pkt->skb->mark;
+
+ fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK;
+
+ return lookup_flags;
+}
+
+static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
+ const struct nft_pktinfo *pkt)
+{
+ const struct net_device *dev = NULL;
+ const struct nf_ipv6_ops *v6ops;
+ const struct nf_afinfo *afinfo;
+ int route_err, addrtype;
+ struct rt6_info *rt;
+ struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_proto = pkt->tprot,
+ };
+ u32 ret = 0;
+
+ afinfo = nf_get_afinfo(NFPROTO_IPV6);
+ if (!afinfo)
+ return RTN_UNREACHABLE;
+
+ if (priv->flags & NFTA_FIB_F_IIF)
+ dev = pkt->in;
+ else if (priv->flags & NFTA_FIB_F_OIF)
+ dev = pkt->out;
+
+ nft_fib6_flowi_init(&fl6, priv, pkt, dev);
+
+ v6ops = nf_get_ipv6_ops();
+ if (dev && v6ops && v6ops->chk_addr(pkt->net, &fl6.daddr, dev, true))
+ ret = RTN_LOCAL;
+
+ route_err = afinfo->route(pkt->net, (struct dst_entry **)&rt,
+ flowi6_to_flowi(&fl6), false);
+ if (route_err)
+ goto err;
+
+ if (rt->rt6i_flags & RTF_REJECT) {
+ route_err = rt->dst.error;
+ dst_release(&rt->dst);
+ goto err;
+ }
+
+ if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr))
+ ret = RTN_ANYCAST;
+ else if (!dev && rt->rt6i_flags & RTF_LOCAL)
+ ret = RTN_LOCAL;
+
+ dst_release(&rt->dst);
+
+ if (ret)
+ return ret;
+
+ addrtype = ipv6_addr_type(&fl6.daddr);
+
+ if (addrtype & IPV6_ADDR_MULTICAST)
+ return RTN_MULTICAST;
+ if (addrtype & IPV6_ADDR_UNICAST)
+ return RTN_UNICAST;
+
+ return RTN_UNSPEC;
+ err:
+ switch (route_err) {
+ case -EINVAL:
+ return RTN_BLACKHOLE;
+ case -EACCES:
+ return RTN_PROHIBIT;
+ case -EAGAIN:
+ return RTN_THROW;
+ default:
+ break;
+ }
+
+ return RTN_UNREACHABLE;
+}
+
+void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+ u32 *dest = &regs->data[priv->dreg];
+
+ *dest = __nft_fib6_eval_type(priv, pkt);
+}
+EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
+
+void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+ const struct net_device *oif = NULL;
+ u32 *dest = &regs->data[priv->dreg];
+ struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_proto = pkt->tprot,
+ };
+ struct rt6_info *rt;
+ int lookup_flags;
+
+ if (priv->flags & NFTA_FIB_F_IIF)
+ oif = pkt->in;
+ else if (priv->flags & NFTA_FIB_F_OIF)
+ oif = pkt->out;
+
+ lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);
+
+ if (pkt->hook == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) {
+ nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
+ return;
+ }
+
+ *dest = 0;
+ again:
+ rt = (void *)ip6_route_lookup(pkt->net, &fl6, lookup_flags);
+ if (rt->dst.error)
+ goto put_rt_err;
+
+ /* Should not see RTF_LOCAL here */
+ if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
+ goto put_rt_err;
+
+ if (oif && oif != rt->rt6i_idev->dev) {
+ /* multipath route? Try again with F_IFACE */
+ if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) {
+ lookup_flags |= RT6_LOOKUP_F_IFACE;
+ fl6.flowi6_oif = oif->ifindex;
+ ip6_rt_put(rt);
+ goto again;
+ }
+ }
+
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ *dest = rt->rt6i_idev->dev->ifindex;
+ break;
+ case NFT_FIB_RESULT_OIFNAME:
+ strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ put_rt_err:
+ ip6_rt_put(rt);
+}
+EXPORT_SYMBOL_GPL(nft_fib6_eval);
+
+static struct nft_expr_type nft_fib6_type;
+
+static const struct nft_expr_ops nft_fib6_type_ops = {
+ .type = &nft_fib6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib6_eval_type,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static const struct nft_expr_ops nft_fib6_ops = {
+ .type = &nft_fib6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib6_eval,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static const struct nft_expr_ops *
+nft_fib6_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ enum nft_fib_result result;
+
+ if (!tb[NFTA_FIB_RESULT])
+ return ERR_PTR(-EINVAL);
+
+ result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+
+ switch (result) {
+ case NFT_FIB_RESULT_OIF:
+ return &nft_fib6_ops;
+ case NFT_FIB_RESULT_OIFNAME:
+ return &nft_fib6_ops;
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return &nft_fib6_type_ops;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+static struct nft_expr_type nft_fib6_type __read_mostly = {
+ .name = "fib",
+ .select_ops = &nft_fib6_select_ops,
+ .policy = nft_fib_policy,
+ .maxattr = NFTA_FIB_MAX,
+ .family = NFPROTO_IPV6,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fib6_module_init(void)
+{
+ return nft_register_expr(&nft_fib6_type);
+}
+
+static void __exit nft_fib6_module_exit(void)
+{
+ nft_unregister_expr(&nft_fib6_type);
+}
+module_init(nft_fib6_module_init);
+module_exit(nft_fib6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(10, "fib");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e8d56d9a4df2..44410d30d461 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -57,6 +57,10 @@ config NF_CONNTRACK
config NF_LOG_COMMON
tristate
+config NF_LOG_NETDEV
+ tristate "Netdev packet logging"
+ select NF_LOG_COMMON
+
if NF_CONNTRACK
config NF_CONNTRACK_MARK
@@ -474,6 +478,12 @@ config NFT_META
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
+config NFT_RT
+ tristate "Netfilter nf_tables routing module"
+ help
+ This option adds the "rt" expression that you can use to match
+ packet routing information such as the packet nexthop.
+
config NFT_NUMGEN
tristate "Netfilter nf_tables number generator module"
help
@@ -581,6 +591,19 @@ config NFT_HASH
This option adds the "hash" expression that you can use to perform
a hash operation on registers.
+config NFT_FIB
+ tristate
+
+config NFT_FIB_INET
+ depends on NF_TABLES_INET
+ depends on NFT_FIB_IPV4
+ depends on NFT_FIB_IPV6
+ tristate "Netfilter nf_tables fib inet support"
+ help
+ This option allows using the FIB expression from the inet table.
+ The lookup will be delegated to the IPv4 or IPv6 FIB depending
+ on the protocol of the packet.
+
if NF_TABLES_NETDEV
config NF_DUP_NETDEV
@@ -1409,9 +1432,10 @@ config NETFILTER_XT_MATCH_SOCKET
tristate '"socket" match support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
- depends on !NF_CONNTRACK || NF_CONNTRACK
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
+ depends on NF_SOCKET_IPV4
+ depends on NF_SOCKET_IPV6
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c23c3c84416f..5bbf767672ec 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -48,6 +48,9 @@ nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
# generic transport layer logging
obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
+# packet logging for netdev family
+obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
+
obj-$(CONFIG_NF_NAT) += nf_nat.o
obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
@@ -81,6 +84,7 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
obj-$(CONFIG_NFT_META) += nft_meta.o
+obj-$(CONFIG_NFT_RT) += nft_rt.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
@@ -96,6 +100,8 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
obj-$(CONFIG_NFT_REDIR) += nft_redir.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
+obj-$(CONFIG_NFT_FIB) += nft_fib.o
+obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index 119fe1cb1ea9..ed9b80815fa0 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -175,6 +175,33 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
}
EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
+/* bridge and netdev logging families share this code. */
+void nf_log_l2packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out,
+ loginfo, "%s", prefix);
+ break;
+ case htons(ETH_P_IPV6):
+ nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out,
+ loginfo, "%s", prefix);
+ break;
+ case htons(ETH_P_ARP):
+ case htons(ETH_P_RARP):
+ nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out,
+ loginfo, "%s", prefix);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nf_log_l2packet);
+
static int __init nf_log_common_init(void)
{
return 0;
diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c
new file mode 100644
index 000000000000..1f645949f3d8
--- /dev/null
+++ b/net/netfilter/nf_log_netdev.c
@@ -0,0 +1,80 @@
+/*
+ * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_log.h>
+
+static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
+}
+
+static struct nf_logger nf_netdev_logger __read_mostly = {
+ .name = "nf_log_netdev",
+ .type = NF_LOG_TYPE_LOG,
+ .logfn = nf_log_netdev_packet,
+ .me = THIS_MODULE,
+};
+
+static int __net_init nf_log_netdev_net_init(struct net *net)
+{
+ return nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger);
+}
+
+static void __net_exit nf_log_netdev_net_exit(struct net *net)
+{
+ nf_log_unset(net, &nf_netdev_logger);
+}
+
+static struct pernet_operations nf_log_netdev_net_ops = {
+ .init = nf_log_netdev_net_init,
+ .exit = nf_log_netdev_net_exit,
+};
+
+static int __init nf_log_netdev_init(void)
+{
+ int ret;
+
+ /* Request to load the real packet loggers. */
+ nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG);
+ nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG);
+ nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG);
+
+ ret = register_pernet_subsys(&nf_log_netdev_net_ops);
+ if (ret < 0)
+ return ret;
+
+ nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger);
+ return 0;
+}
+
+static void __exit nf_log_netdev_exit(void)
+{
+ unregister_pernet_subsys(&nf_log_netdev_net_ops);
+ nf_log_unregister(&nf_netdev_logger);
+}
+
+module_init(nf_log_netdev_init);
+module_exit(nf_log_netdev_exit);
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter netdev packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 0dd5c695482f..70de32a6d5c0 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -178,6 +178,7 @@ next_rule:
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
+ case NF_STOLEN:
nft_trace_packet(&info, chain, rule,
rulenum, NFT_TRACETYPE_RULE);
return regs.verdict.code;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index eb086a192c5a..7435505037b7 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -330,7 +330,7 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
* message. WARNING: has to be <= 128k due to slab restrictions */
n = max(inst_size, pkt_size);
- skb = alloc_skb(n, GFP_ATOMIC);
+ skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
if (!skb) {
if (n > pkt_size) {
/* try to allocate only as much as we need for current
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index af832c526048..5379f788a372 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -69,7 +69,7 @@ struct nfqnl_instance {
* Following fields are dirtied for each queued packet,
* keep them in same cache line if possible.
*/
- spinlock_t lock;
+ spinlock_t lock ____cacheline_aligned_in_smp;
unsigned int queue_total;
unsigned int id_sequence; /* 'sequence' of pkt ids */
struct list_head queue_list; /* packets in queue */
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 2e53739812b1..e25b35d70e4d 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -84,9 +84,6 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (err < 0)
return err;
- if (desc.len > U8_MAX)
- return -ERANGE;
-
priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
priv->len = desc.len;
return 0;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index d7b0d171172a..6837348c8993 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -518,15 +519,61 @@ static struct nft_expr_type nft_ct_type __read_mostly = {
.owner = THIS_MODULE,
};
+static void nft_notrack_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct sk_buff *skb = pkt->skb;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(pkt->skb, &ctinfo);
+ /* Previously seen (loopback or untracked)? Ignore. */
+ if (ct)
+ return;
+
+ ct = nf_ct_untracked_get();
+ atomic_inc(&ct->ct_general.use);
+ skb->nfct = &ct->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+}
+
+static struct nft_expr_type nft_notrack_type;
+static const struct nft_expr_ops nft_notrack_ops = {
+ .type = &nft_notrack_type,
+ .size = NFT_EXPR_SIZE(0),
+ .eval = nft_notrack_eval,
+};
+
+static struct nft_expr_type nft_notrack_type __read_mostly = {
+ .name = "notrack",
+ .ops = &nft_notrack_ops,
+ .owner = THIS_MODULE,
+};
+
static int __init nft_ct_module_init(void)
{
+ int err;
+
BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
- return nft_register_expr(&nft_ct_type);
+ err = nft_register_expr(&nft_ct_type);
+ if (err < 0)
+ return err;
+
+ err = nft_register_expr(&nft_notrack_type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err1:
+ nft_unregister_expr(&nft_ct_type);
+ return err;
}
static void __exit nft_ct_module_exit(void)
{
+ nft_unregister_expr(&nft_notrack_type);
nft_unregister_expr(&nft_ct_type);
}
@@ -536,3 +583,4 @@ module_exit(nft_ct_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("ct");
+MODULE_ALIAS_NFT_EXPR("notrack");
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
new file mode 100644
index 000000000000..4944a8b7f7a7
--- /dev/null
+++ b/net/netfilter/nft_fib.c
@@ -0,0 +1,159 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Generic part shared by ipv4 and ipv6 backends.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
+ [NFTA_FIB_DREG] = { .type = NLA_U32 },
+ [NFTA_FIB_RESULT] = { .type = NLA_U32 },
+ [NFTA_FIB_FLAGS] = { .type = NLA_U32 },
+};
+EXPORT_SYMBOL(nft_fib_policy);
+
+#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
+ NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)
+
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF: /* fallthrough */
+ case NFT_FIB_RESULT_OIFNAME:
+ hooks = (1 << NF_INET_PRE_ROUTING);
+ break;
+ case NFT_FIB_RESULT_ADDRTYPE:
+ if (priv->flags & NFTA_FIB_F_IIF)
+ hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
+ else if (priv->flags & NFTA_FIB_F_OIF)
+ hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_FORWARD);
+ else
+ hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING);
+
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+EXPORT_SYMBOL_GPL(nft_fib_validate);
+
+int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_fib *priv = nft_expr_priv(expr);
+ unsigned int len;
+ int err;
+
+ if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS])
+ return -EINVAL;
+
+ priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS]));
+
+ if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL))
+ return -EINVAL;
+
+ if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) ==
+ (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR))
+ return -EINVAL;
+ if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) ==
+ (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF))
+ return -EINVAL;
+ if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0)
+ return -EINVAL;
+
+ priv->result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+ priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]);
+
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ if (priv->flags & NFTA_FIB_F_OIF)
+ return -EINVAL;
+ len = sizeof(int);
+ break;
+ case NFT_FIB_RESULT_OIFNAME:
+ if (priv->flags & NFTA_FIB_F_OIF)
+ return -EINVAL;
+ len = IFNAMSIZ;
+ break;
+ case NFT_FIB_RESULT_ADDRTYPE:
+ len = sizeof(u32);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ err = nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
+ if (err < 0)
+ return err;
+
+ return nft_fib_validate(ctx, expr, NULL);
+}
+EXPORT_SYMBOL_GPL(nft_fib_init);
+
+int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg))
+ return -1;
+
+ if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result)))
+ return -1;
+
+ if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags)))
+ return -1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_fib_dump);
+
+void nft_fib_store_result(void *reg, enum nft_fib_result r,
+ const struct nft_pktinfo *pkt, int index)
+{
+ struct net_device *dev;
+ u32 *dreg = reg;
+
+ switch (r) {
+ case NFT_FIB_RESULT_OIF:
+ *dreg = index;
+ break;
+ case NFT_FIB_RESULT_OIFNAME:
+ dev = dev_get_by_index_rcu(pkt->net, index);
+ strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ *dreg = 0;
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nft_fib_store_result);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
new file mode 100644
index 000000000000..fe8943b572b7
--- /dev/null
+++ b/net/netfilter/nft_fib_inet.c
@@ -0,0 +1,82 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_inet_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+
+ switch (pkt->pf) {
+ case NFPROTO_IPV4:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib4_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib4_eval_type(expr, regs, pkt);
+ }
+ break;
+ case NFPROTO_IPV6:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib6_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib6_eval_type(expr, regs, pkt);
+ }
+ break;
+ }
+
+ regs->verdict.code = NF_DROP;
+}
+
+static struct nft_expr_type nft_fib_inet_type;
+static const struct nft_expr_ops nft_fib_inet_ops = {
+ .type = &nft_fib_inet_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib_inet_eval,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static struct nft_expr_type nft_fib_inet_type __read_mostly = {
+ .family = NFPROTO_INET,
+ .name = "fib",
+ .ops = &nft_fib_inet_ops,
+ .policy = nft_fib_policy,
+ .maxattr = NFTA_FIB_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fib_inet_module_init(void)
+{
+ return nft_register_expr(&nft_fib_inet_type);
+}
+
+static void __exit nft_fib_inet_module_exit(void)
+{
+ nft_unregister_expr(&nft_fib_inet_type);
+}
+
+module_init(nft_fib_inet_module_init);
+module_exit(nft_fib_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(1, "fib");
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index d17018ff54e6..4528adea7ede 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -54,9 +54,6 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- if (desc.len > U8_MAX)
- return -ERANGE;
-
priv->dlen = desc.len;
priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 6c1e0246706e..64994023bf81 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -310,6 +310,11 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
case NFPROTO_NETDEV:
hooks = 1 << NF_NETDEV_INGRESS;
break;
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ hooks = 1 << NF_INET_PRE_ROUTING;
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 55bc5ab78d4a..a66b36097b8f 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -65,7 +65,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
return -EOVERFLOW;
priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
- atomic_set(&priv->counter, 0);
+ atomic_set(&priv->counter, priv->modulus - 1);
return nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, sizeof(u32));
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
new file mode 100644
index 000000000000..9e5ec1f67020
--- /dev/null
+++ b/net/netfilter/nft_rt.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2016 Anders K. Pedersen <akp@cohaesio.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/dst.h>
+#include <net/ip6_route.h>
+#include <net/route.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_rt {
+ enum nft_rt_keys key:8;
+ enum nft_registers dreg:8;
+};
+
+void nft_rt_get_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_rt *priv = nft_expr_priv(expr);
+ const struct sk_buff *skb = pkt->skb;
+ u32 *dest = &regs->data[priv->dreg];
+ const struct dst_entry *dst;
+
+ dst = skb_dst(skb);
+ if (!dst)
+ goto err;
+
+ switch (priv->key) {
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ case NFT_RT_CLASSID:
+ *dest = dst->tclassid;
+ break;
+#endif
+ case NFT_RT_NEXTHOP4:
+ if (pkt->pf != NFPROTO_IPV4)
+ goto err;
+
+ *dest = rt_nexthop((const struct rtable *)dst,
+ ip_hdr(skb)->daddr);
+ break;
+ case NFT_RT_NEXTHOP6:
+ if (pkt->pf != NFPROTO_IPV6)
+ goto err;
+
+ memcpy(dest, rt6_nexthop((struct rt6_info *)dst,
+ &ipv6_hdr(skb)->daddr),
+ sizeof(struct in6_addr));
+ break;
+ default:
+ WARN_ON(1);
+ goto err;
+ }
+ return;
+
+err:
+ regs->verdict.code = NFT_BREAK;
+}
+
+const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
+ [NFTA_RT_DREG] = { .type = NLA_U32 },
+ [NFTA_RT_KEY] = { .type = NLA_U32 },
+};
+
+int nft_rt_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_rt *priv = nft_expr_priv(expr);
+ unsigned int len;
+
+ if (tb[NFTA_RT_KEY] == NULL ||
+ tb[NFTA_RT_DREG] == NULL)
+ return -EINVAL;
+
+ priv->key = ntohl(nla_get_be32(tb[NFTA_RT_KEY]));
+ switch (priv->key) {
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ case NFT_RT_CLASSID:
+#endif
+ case NFT_RT_NEXTHOP4:
+ len = sizeof(u32);
+ break;
+ case NFT_RT_NEXTHOP6:
+ len = sizeof(struct in6_addr);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
+}
+
+int nft_rt_get_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_rt *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_RT_KEY, htonl(priv->key)))
+ goto nla_put_failure;
+ if (nft_dump_register(skb, NFTA_RT_DREG, priv->dreg))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_rt_type;
+static const struct nft_expr_ops nft_rt_get_ops = {
+ .type = &nft_rt_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_rt)),
+ .eval = nft_rt_get_eval,
+ .init = nft_rt_get_init,
+ .dump = nft_rt_get_dump,
+};
+
+static struct nft_expr_type nft_rt_type __read_mostly = {
+ .name = "rt",
+ .ops = &nft_rt_get_ops,
+ .policy = nft_rt_policy,
+ .maxattr = NFTA_RT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_rt_module_init(void)
+{
+ return nft_register_expr(&nft_rt_type);
+}
+
+static void __exit nft_rt_module_exit(void)
+{
+ nft_unregister_expr(&nft_rt_type);
+}
+
+module_init(nft_rt_module_init);
+module_exit(nft_rt_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Anders K. Pedersen <akp@cohaesio.com>");
+MODULE_ALIAS_NFT_EXPR("rt");
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index ac1d3c3d09e7..ec06fb1cb16f 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -42,29 +42,31 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
e = minfo->ports[++i];
pr_debug("src or dst matches with %d-%d?\n", s, e);
- if (minfo->flags == XT_MULTIPORT_SOURCE
- && src >= s && src <= e)
- return true ^ minfo->invert;
- if (minfo->flags == XT_MULTIPORT_DESTINATION
- && dst >= s && dst <= e)
- return true ^ minfo->invert;
- if (minfo->flags == XT_MULTIPORT_EITHER
- && ((dst >= s && dst <= e)
- || (src >= s && src <= e)))
- return true ^ minfo->invert;
+ switch (minfo->flags) {
+ case XT_MULTIPORT_SOURCE:
+ return (src >= s && src <= e) ^ minfo->invert;
+ case XT_MULTIPORT_DESTINATION:
+ return (dst >= s && dst <= e) ^ minfo->invert;
+ case XT_MULTIPORT_EITHER:
+ return ((dst >= s && dst <= e) ||
+ (src >= s && src <= e)) ^ minfo->invert;
+ default:
+ break;
+ }
} else {
/* exact port matching */
pr_debug("src or dst matches with %d?\n", s);
- if (minfo->flags == XT_MULTIPORT_SOURCE
- && src == s)
- return true ^ minfo->invert;
- if (minfo->flags == XT_MULTIPORT_DESTINATION
- && dst == s)
- return true ^ minfo->invert;
- if (minfo->flags == XT_MULTIPORT_EITHER
- && (src == s || dst == s))
- return true ^ minfo->invert;
+ switch (minfo->flags) {
+ case XT_MULTIPORT_SOURCE:
+ return (src == s) ^ minfo->invert;
+ case XT_MULTIPORT_DESTINATION:
+ return (dst == s) ^ minfo->invert;
+ case XT_MULTIPORT_EITHER:
+ return (src == s || dst == s) ^ minfo->invert;
+ default:
+ break;
+ }
}
}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index b10ade272b50..018c369c9f0d 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -22,76 +22,14 @@
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-#define XT_SOCKET_HAVE_IPV6 1
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/inet6_hashtables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
+#include <net/netfilter/nf_socket.h>
#include <linux/netfilter/xt_socket.h>
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-#define XT_SOCKET_HAVE_CONNTRACK 1
-#include <net/netfilter/nf_conntrack.h>
-#endif
-
-static int
-extract_icmp4_fields(const struct sk_buff *skb,
- u8 *protocol,
- __be32 *raddr,
- __be32 *laddr,
- __be16 *rport,
- __be16 *lport)
-{
- unsigned int outside_hdrlen = ip_hdrlen(skb);
- struct iphdr *inside_iph, _inside_iph;
- struct icmphdr *icmph, _icmph;
- __be16 *ports, _ports[2];
-
- icmph = skb_header_pointer(skb, outside_hdrlen,
- sizeof(_icmph), &_icmph);
- if (icmph == NULL)
- return 1;
-
- switch (icmph->type) {
- case ICMP_DEST_UNREACH:
- case ICMP_SOURCE_QUENCH:
- case ICMP_REDIRECT:
- case ICMP_TIME_EXCEEDED:
- case ICMP_PARAMETERPROB:
- break;
- default:
- return 1;
- }
-
- inside_iph = skb_header_pointer(skb, outside_hdrlen +
- sizeof(struct icmphdr),
- sizeof(_inside_iph), &_inside_iph);
- if (inside_iph == NULL)
- return 1;
-
- if (inside_iph->protocol != IPPROTO_TCP &&
- inside_iph->protocol != IPPROTO_UDP)
- return 1;
-
- ports = skb_header_pointer(skb, outside_hdrlen +
- sizeof(struct icmphdr) +
- (inside_iph->ihl << 2),
- sizeof(_ports), &_ports);
- if (ports == NULL)
- return 1;
-
- /* the inside IP packet is the one quoted from our side, thus
- * its saddr is the local address */
- *protocol = inside_iph->protocol;
- *laddr = inside_iph->saddr;
- *lport = ports[0];
- *raddr = inside_iph->daddr;
- *rport = ports[1];
-
- return 0;
-}
-
/* "socket" match based redirection (no specific rule)
* ===================================================
*
@@ -111,104 +49,6 @@ extract_icmp4_fields(const struct sk_buff *skb,
* then local services could intercept traffic going through the
* box.
*/
-static struct sock *
-xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
- const u8 protocol,
- const __be32 saddr, const __be32 daddr,
- const __be16 sport, const __be16 dport,
- const struct net_device *in)
-{
- switch (protocol) {
- case IPPROTO_TCP:
- return inet_lookup(net, &tcp_hashinfo, skb, doff,
- saddr, sport, daddr, dport,
- in->ifindex);
- case IPPROTO_UDP:
- return udp4_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
- }
- return NULL;
-}
-
-static bool xt_socket_sk_is_transparent(struct sock *sk)
-{
- switch (sk->sk_state) {
- case TCP_TIME_WAIT:
- return inet_twsk(sk)->tw_transparent;
-
- case TCP_NEW_SYN_RECV:
- return inet_rsk(inet_reqsk(sk))->no_srccheck;
-
- default:
- return inet_sk(sk)->transparent;
- }
-}
-
-static struct sock *xt_socket_lookup_slow_v4(struct net *net,
- const struct sk_buff *skb,
- const struct net_device *indev)
-{
- const struct iphdr *iph = ip_hdr(skb);
- struct sk_buff *data_skb = NULL;
- int doff = 0;
- __be32 uninitialized_var(daddr), uninitialized_var(saddr);
- __be16 uninitialized_var(dport), uninitialized_var(sport);
- u8 uninitialized_var(protocol);
-#ifdef XT_SOCKET_HAVE_CONNTRACK
- struct nf_conn const *ct;
- enum ip_conntrack_info ctinfo;
-#endif
-
- if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
- struct udphdr _hdr, *hp;
-
- hp = skb_header_pointer(skb, ip_hdrlen(skb),
- sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return NULL;
-
- protocol = iph->protocol;
- saddr = iph->saddr;
- sport = hp->source;
- daddr = iph->daddr;
- dport = hp->dest;
- data_skb = (struct sk_buff *)skb;
- doff = iph->protocol == IPPROTO_TCP ?
- ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
- ip_hdrlen(skb) + sizeof(*hp);
-
- } else if (iph->protocol == IPPROTO_ICMP) {
- if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
- &sport, &dport))
- return NULL;
- } else {
- return NULL;
- }
-
-#ifdef XT_SOCKET_HAVE_CONNTRACK
- /* Do the lookup with the original socket address in
- * case this is a reply packet of an established
- * SNAT-ted connection.
- */
- ct = nf_ct_get(skb, &ctinfo);
- if (ct && !nf_ct_is_untracked(ct) &&
- ((iph->protocol != IPPROTO_ICMP &&
- ctinfo == IP_CT_ESTABLISHED_REPLY) ||
- (iph->protocol == IPPROTO_ICMP &&
- ctinfo == IP_CT_RELATED_REPLY)) &&
- (ct->status & IPS_SRC_NAT_DONE)) {
-
- daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
- dport = (iph->protocol == IPPROTO_TCP) ?
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
- }
-#endif
-
- return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
- daddr, sport, dport, indev);
-}
-
static bool
socket_match(const struct sk_buff *skb, struct xt_action_param *par,
const struct xt_socket_mtinfo1 *info)
@@ -217,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
struct sock *sk = skb->sk;
if (!sk)
- sk = xt_socket_lookup_slow_v4(par->net, skb, par->in);
+ sk = nf_sk_lookup_slow_v4(par->net, skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -233,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
* if XT_SOCKET_TRANSPARENT is used
*/
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = xt_socket_sk_is_transparent(sk);
+ transparent = nf_sk_is_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent)
@@ -265,132 +105,7 @@ socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
return socket_match(skb, par, par->matchinfo);
}
-#ifdef XT_SOCKET_HAVE_IPV6
-
-static int
-extract_icmp6_fields(const struct sk_buff *skb,
- unsigned int outside_hdrlen,
- int *protocol,
- const struct in6_addr **raddr,
- const struct in6_addr **laddr,
- __be16 *rport,
- __be16 *lport,
- struct ipv6hdr *ipv6_var)
-{
- const struct ipv6hdr *inside_iph;
- struct icmp6hdr *icmph, _icmph;
- __be16 *ports, _ports[2];
- u8 inside_nexthdr;
- __be16 inside_fragoff;
- int inside_hdrlen;
-
- icmph = skb_header_pointer(skb, outside_hdrlen,
- sizeof(_icmph), &_icmph);
- if (icmph == NULL)
- return 1;
-
- if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
- return 1;
-
- inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph),
- sizeof(*ipv6_var), ipv6_var);
- if (inside_iph == NULL)
- return 1;
- inside_nexthdr = inside_iph->nexthdr;
-
- inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) +
- sizeof(*ipv6_var),
- &inside_nexthdr, &inside_fragoff);
- if (inside_hdrlen < 0)
- return 1; /* hjm: Packet has no/incomplete transport layer headers. */
-
- if (inside_nexthdr != IPPROTO_TCP &&
- inside_nexthdr != IPPROTO_UDP)
- return 1;
-
- ports = skb_header_pointer(skb, inside_hdrlen,
- sizeof(_ports), &_ports);
- if (ports == NULL)
- return 1;
-
- /* the inside IP packet is the one quoted from our side, thus
- * its saddr is the local address */
- *protocol = inside_nexthdr;
- *laddr = &inside_iph->saddr;
- *lport = ports[0];
- *raddr = &inside_iph->daddr;
- *rport = ports[1];
-
- return 0;
-}
-
-static struct sock *
-xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
- const u8 protocol,
- const struct in6_addr *saddr, const struct in6_addr *daddr,
- const __be16 sport, const __be16 dport,
- const struct net_device *in)
-{
- switch (protocol) {
- case IPPROTO_TCP:
- return inet6_lookup(net, &tcp_hashinfo, skb, doff,
- saddr, sport, daddr, dport,
- in->ifindex);
- case IPPROTO_UDP:
- return udp6_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
- }
-
- return NULL;
-}
-
-static struct sock *xt_socket_lookup_slow_v6(struct net *net,
- const struct sk_buff *skb,
- const struct net_device *indev)
-{
- __be16 uninitialized_var(dport), uninitialized_var(sport);
- const struct in6_addr *daddr = NULL, *saddr = NULL;
- struct ipv6hdr *iph = ipv6_hdr(skb);
- struct sk_buff *data_skb = NULL;
- int doff = 0;
- int thoff = 0, tproto;
-
- tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
- if (tproto < 0) {
- pr_debug("unable to find transport header in IPv6 packet, dropping\n");
- return NULL;
- }
-
- if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
- struct udphdr _hdr, *hp;
-
- hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return NULL;
-
- saddr = &iph->saddr;
- sport = hp->source;
- daddr = &iph->daddr;
- dport = hp->dest;
- data_skb = (struct sk_buff *)skb;
- doff = tproto == IPPROTO_TCP ?
- thoff + __tcp_hdrlen((struct tcphdr *)hp) :
- thoff + sizeof(*hp);
-
- } else if (tproto == IPPROTO_ICMPV6) {
- struct ipv6hdr ipv6_var;
-
- if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
- &sport, &dport, &ipv6_var))
- return NULL;
- } else {
- return NULL;
- }
-
- return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
- sport, dport, indev);
-}
-
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static bool
socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
{
@@ -399,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
struct sock *sk = skb->sk;
if (!sk)
- sk = xt_socket_lookup_slow_v6(par->net, skb, par->in);
+ sk = nf_sk_lookup_slow_v6(par->net, skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -415,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
* if XT_SOCKET_TRANSPARENT is used
*/
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = xt_socket_sk_is_transparent(sk);
+ transparent = nf_sk_is_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent)
@@ -488,7 +203,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
},
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "socket",
.revision = 1,
@@ -512,7 +227,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
},
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "socket",
.revision = 2,
@@ -536,7 +251,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
},
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "socket",
.revision = 3,
@@ -554,7 +269,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
static int __init socket_mt_init(void)
{
nf_defrag_ipv4_enable();
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
nf_defrag_ipv6_enable();
#endif