summaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/core.c22
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c72
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c83
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c155
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c9
-rw-r--r--net/netfilter/nf_conntrack_proto.c126
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c4
-rw-r--r--net/netfilter/nf_flow_table_core.c1
-rw-r--r--net/netfilter/nf_nat_helper.c4
-rw-r--r--net/netfilter/nf_nat_proto.c24
-rw-r--r--net/netfilter/nf_nat_redirect.c12
-rw-r--r--net/netfilter/nf_nat_sip.c2
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c26
-rw-r--r--net/netfilter/nfnetlink_osf.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_exthdr.c3
-rw-r--r--net/netfilter/nft_payload.c6
-rw-r--r--net/netfilter/xt_DSCP.c8
-rw-r--r--net/netfilter/xt_HL.c4
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c28
-rw-r--r--net/netfilter/xt_owner.c23
29 files changed, 481 insertions, 164 deletions
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b96fd3f54705..817a9e5d16e4 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -536,28 +536,6 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
}
EXPORT_SYMBOL(nf_hook_slow);
-
-int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
-{
- if (writable_len > skb->len)
- return 0;
-
- /* Not exclusive use of packet? Must copy. */
- if (!skb_cloned(skb)) {
- if (writable_len <= skb_headlen(skb))
- return 1;
- } else if (skb_clone_writable(skb, writable_len))
- return 1;
-
- if (writable_len <= skb_headlen(skb))
- writable_len = 0;
- else
- writable_len -= skb_headlen(skb);
-
- return !!__pskb_pull_tail(skb, writable_len);
-}
-EXPORT_SYMBOL(skb_make_writable);
-
/* This needs to be compiled in any case to avoid dependencies between the
* nfnetlink_queue code and nf_conntrack.
*/
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index bfd4365a8d73..4515056ef1c2 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -358,7 +358,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
struct tcphdr *th;
__u32 seq;
- if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+ if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
return 0;
th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
@@ -435,7 +435,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
struct tcphdr *th;
__u32 seq;
- if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+ if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
return 0;
th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 7138556b206b..e8651fd621ef 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -34,6 +34,7 @@
#include <net/tcp.h>
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
+#include <net/gue.h>
#include <net/route.h>
#include <net/ip6_checksum.h>
#include <net/netns/generic.h> /* net_generic() */
@@ -892,7 +893,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
IPPROTO_SCTP == protocol)
offset += 2 * sizeof(__u16);
- if (!skb_make_writable(skb, offset))
+ if (skb_ensure_writable(skb, offset))
goto out;
#ifdef CONFIG_IP_VS_IPV6
@@ -1282,7 +1283,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
- if (!skb_make_writable(skb, iph->len))
+ if (skb_ensure_writable(skb, iph->len))
goto drop;
/* mangle the packet */
@@ -1574,6 +1575,41 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 1;
}
+/* Check the UDP tunnel and return its header length */
+static int ipvs_udp_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ unsigned int offset, __u16 af,
+ const union nf_inet_addr *daddr, __u8 *proto)
+{
+ struct udphdr _udph, *udph;
+ struct ip_vs_dest *dest;
+
+ udph = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+ if (!udph)
+ goto unk;
+ offset += sizeof(struct udphdr);
+ dest = ip_vs_find_tunnel(ipvs, af, daddr, udph->dest);
+ if (!dest)
+ goto unk;
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ struct guehdr _gueh, *gueh;
+
+ gueh = skb_header_pointer(skb, offset, sizeof(_gueh), &_gueh);
+ if (!gueh)
+ goto unk;
+ if (gueh->control != 0 || gueh->version != 0)
+ goto unk;
+ /* Later we can support also IPPROTO_IPV6 */
+ if (gueh->proto_ctype != IPPROTO_IPIP)
+ goto unk;
+ *proto = gueh->proto_ctype;
+ return sizeof(struct udphdr) + sizeof(struct guehdr) +
+ (gueh->hlen << 2);
+ }
+
+unk:
+ return 0;
+}
+
/*
* Handle ICMP messages in the outside-to-inside direction (incoming).
* Find any that might be relevant, check against existing connections,
@@ -1593,6 +1629,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
struct ip_vs_proto_data *pd;
unsigned int offset, offset2, ihl, verdict;
bool ipip, new_cp = false;
+ union nf_inet_addr *raddr;
*related = 1;
@@ -1631,20 +1668,51 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
+ raddr = (union nf_inet_addr *)&cih->daddr;
/* Special case for errors for IPIP packets */
ipip = false;
if (cih->protocol == IPPROTO_IPIP) {
+ struct ip_vs_dest *dest;
+
if (unlikely(cih->frag_off & htons(IP_OFFSET)))
return NF_ACCEPT;
/* Error for our IPIP must arrive at LOCAL_IN */
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
return NF_ACCEPT;
+ dest = ip_vs_find_tunnel(ipvs, AF_INET, raddr, 0);
+ /* Only for known tunnel */
+ if (!dest || dest->tun_type != IP_VS_CONN_F_TUNNEL_TYPE_IPIP)
+ return NF_ACCEPT;
offset += cih->ihl * 4;
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
ipip = true;
+ } else if (cih->protocol == IPPROTO_UDP && /* Can be UDP encap */
+ /* Error for our tunnel must arrive at LOCAL_IN */
+ (skb_rtable(skb)->rt_flags & RTCF_LOCAL)) {
+ __u8 iproto;
+ int ulen;
+
+ /* Non-first fragment has no UDP header */
+ if (unlikely(cih->frag_off & htons(IP_OFFSET)))
+ return NF_ACCEPT;
+ offset2 = offset + cih->ihl * 4;
+ ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET, raddr,
+ &iproto);
+ if (ulen > 0) {
+ /* Skip IP and UDP tunnel headers */
+ offset = offset2 + ulen;
+ /* Now we should be at the original IP header */
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph),
+ &_ciph);
+ if (cih && cih->version == 4 && cih->ihl >= 5 &&
+ iproto == IPPROTO_IPIP)
+ ipip = true;
+ else
+ return NF_ACCEPT;
+ }
}
pd = ip_vs_proto_data_get(ipvs, cih->protocol);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 776c87ed4813..84384d896e29 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -510,15 +510,36 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{
unsigned int hash;
+ __be16 port;
if (dest->in_rs_table)
return;
+ switch (IP_VS_DFWD_METHOD(dest)) {
+ case IP_VS_CONN_F_MASQ:
+ port = dest->port;
+ break;
+ case IP_VS_CONN_F_TUNNEL:
+ switch (dest->tun_type) {
+ case IP_VS_CONN_F_TUNNEL_TYPE_GUE:
+ port = dest->tun_port;
+ break;
+ case IP_VS_CONN_F_TUNNEL_TYPE_IPIP:
+ port = 0;
+ break;
+ default:
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+
/*
* Hash by proto,addr,port,
* which are the parameters of the real service.
*/
- hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
+ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port);
hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
dest->in_rs_table = 1;
@@ -550,7 +571,8 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
- (dest->protocol == protocol || dest->vfwmark)) {
+ (dest->protocol == protocol || dest->vfwmark) &&
+ IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
/* HIT */
return true;
}
@@ -580,7 +602,37 @@ struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
- (dest->protocol == protocol || dest->vfwmark)) {
+ (dest->protocol == protocol || dest->vfwmark) &&
+ IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
+ /* HIT */
+ return dest;
+ }
+ }
+
+ return NULL;
+}
+
+/* Find real service record by <af,addr,tun_port>.
+ * In case of multiple records with the same <af,addr,tun_port>, only
+ * the first found record is returned.
+ *
+ * To be called under RCU lock.
+ */
+struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
+ const union nf_inet_addr *daddr,
+ __be16 tun_port)
+{
+ struct ip_vs_dest *dest;
+ unsigned int hash;
+
+ /* Check for "full" addressed entries */
+ hash = ip_vs_rs_hashkey(af, daddr, tun_port);
+
+ hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+ if (dest->tun_port == tun_port &&
+ dest->af == af &&
+ ip_vs_addr_equal(af, &dest->addr, daddr) &&
+ IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) {
/* HIT */
return dest;
}
@@ -826,24 +878,29 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
conn_flags |= IP_VS_CONN_F_INACTIVE;
+ /* Need to rehash? */
+ if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) !=
+ IP_VS_DFWD_METHOD(dest) ||
+ udest->tun_type != dest->tun_type ||
+ udest->tun_port != dest->tun_port)
+ ip_vs_rs_unhash(dest);
+
/* set the tunnel info */
dest->tun_type = udest->tun_type;
dest->tun_port = udest->tun_port;
+ dest->tun_flags = udest->tun_flags;
/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
conn_flags |= IP_VS_CONN_F_NOOUTPUT;
} else {
- /*
- * Put the real service in rs_table if not present.
- * For now only for NAT!
- */
- ip_vs_rs_hash(ipvs, dest);
/* FTP-NAT requires conntrack for mangling */
if (svc->port == FTPPORT)
ip_vs_register_conntrack(svc);
}
atomic_set(&dest->conn_flags, conn_flags);
+ /* Put the real service in rs_table if not present. */
+ ip_vs_rs_hash(ipvs, dest);
/* bind the service */
old_svc = rcu_dereference_protected(dest->svc, 1);
@@ -2906,6 +2963,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
[IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
[IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 },
[IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 },
+ [IPVS_DEST_ATTR_TUN_FLAGS] = { .type = NLA_U16 },
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
@@ -3212,6 +3270,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
dest->tun_type) ||
nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
dest->tun_port) ||
+ nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS,
+ dest->tun_flags) ||
nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
@@ -3332,7 +3392,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
- *nla_l_thresh, *nla_tun_type, *nla_tun_port;
+ *nla_l_thresh, *nla_tun_type, *nla_tun_port,
+ *nla_tun_flags;
nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
@@ -3340,6 +3401,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE];
nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT];
+ nla_tun_flags = attrs[IPVS_DEST_ATTR_TUN_FLAGS];
if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
return -EINVAL;
@@ -3355,6 +3417,9 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
if (nla_tun_port)
udest->tun_port = nla_get_be16(nla_tun_port);
+
+ if (nla_tun_flags)
+ udest->tun_flags = nla_get_u16(nla_tun_flags);
}
return 0;
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index c244b2545e24..cf925906f59b 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -267,7 +267,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1;
/* Linear packets are much easier to deal with. */
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return 0;
if (cp->app_data == (void *) IP_VS_FTP_PASV) {
@@ -433,7 +433,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1;
/* Linear packets are much easier to deal with. */
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return 0;
data = data_start = ip_vs_ftp_data_ptr(skb, ipvsh);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index b58ddb7dffd1..a0921adc31a9 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -101,7 +101,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
#endif
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+ if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
return 0;
if (unlikely(cp->app != NULL)) {
@@ -148,7 +148,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
#endif
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+ if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
return 0;
if (unlikely(cp->app != NULL)) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 915ac8206076..000d961b97e4 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -159,7 +159,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+ if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
@@ -237,7 +237,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+ if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 379140075e95..153d89647c87 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -148,7 +148,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+ if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
@@ -231,7 +231,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+ if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index e101eda05d55..71fc6d63a67f 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -36,6 +36,7 @@
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip_tunnels.h>
+#include <net/ip6_checksum.h>
#include <net/addrconf.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
@@ -275,7 +276,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
}
/* don't propagate ttl change to cloned packets */
- if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
return false;
ipv6_hdr(skb)->hop_limit--;
@@ -290,7 +291,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
}
/* don't propagate ttl change to cloned packets */
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return false;
/* Decrease ttl */
@@ -381,8 +382,13 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (!dest)
goto err_put;
- if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
+ if ((dest->tun_flags &
+ IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL)
+ mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ }
if (mtu < 68) {
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto err_put;
@@ -536,8 +542,13 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
if (!dest)
goto err_put;
- if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
+ if ((dest->tun_flags &
+ IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL)
+ mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ }
if (mtu < IPV6_MIN_MTU) {
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
IPV6_MIN_MTU);
@@ -792,7 +803,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -881,7 +892,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -1002,17 +1013,56 @@ ipvs_gue_encap(struct net *net, struct sk_buff *skb,
__be16 sport = udp_flow_src_port(net, skb, 0, 0, false);
struct udphdr *udph; /* Our new UDP header */
struct guehdr *gueh; /* Our new GUE header */
+ size_t hdrlen, optlen = 0;
+ void *data;
+ bool need_priv = false;
+
+ if ((cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ need_priv = true;
+ }
- skb_push(skb, sizeof(struct guehdr));
+ hdrlen = sizeof(struct guehdr) + optlen;
+
+ skb_push(skb, hdrlen);
gueh = (struct guehdr *)skb->data;
gueh->control = 0;
gueh->version = 0;
- gueh->hlen = 0;
+ gueh->hlen = optlen >> 2;
gueh->flags = 0;
gueh->proto_ctype = *next_protocol;
+ data = &gueh[1];
+
+ if (need_priv) {
+ __be32 *flags = data;
+ u16 csum_start = skb_checksum_start_offset(skb);
+ __be16 *pd;
+
+ gueh->flags |= GUE_FLAG_PRIV;
+ *flags = 0;
+ data += GUE_LEN_PRIV;
+
+ if (csum_start < hdrlen)
+ return -EINVAL;
+
+ csum_start -= hdrlen;
+ pd = data;
+ pd[0] = htons(csum_start);
+ pd[1] = htons(csum_start + skb->csum_offset);
+
+ if (!skb_is_gso(skb)) {
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->encapsulation = 0;
+ }
+
+ *flags |= GUE_PFLAG_REMCSUM;
+ data += GUE_PLEN_REMCSUM;
+ }
+
skb_push(skb, sizeof(struct udphdr));
skb_reset_transport_header(skb);
@@ -1066,6 +1116,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
int tun_type, gso_type;
+ int tun_flags;
EnterFunction(10);
@@ -1088,9 +1139,19 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
tun_type = cp->dest->tun_type;
+ tun_flags = cp->dest->tun_flags;
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ size_t gue_hdrlen, gue_optlen = 0;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ }
+ gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
+
+ max_headroom += sizeof(struct udphdr) + gue_hdrlen;
+ }
/* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
@@ -1101,8 +1162,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
gso_type = __tun_gso_type_mask(AF_INET, cp->af);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- gso_type |= SKB_GSO_UDP_TUNNEL;
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ else
+ gso_type |= SKB_GSO_UDP_TUNNEL;
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+ }
+ }
if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error;
@@ -1111,8 +1181,19 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_set_inner_ipproto(skb, next_protocol);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- ipvs_gue_encap(net, skb, cp, &next_protocol);
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ bool check = false;
+
+ if (ipvs_gue_encap(net, skb, cp, &next_protocol))
+ goto tx_error;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ check = true;
+
+ udp_set_csum(!check, skb, saddr, cp->daddr.ip, skb->len);
+ }
+
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
@@ -1170,6 +1251,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
int tun_type, gso_type;
+ int tun_flags;
EnterFunction(10);
@@ -1193,9 +1275,19 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
tun_type = cp->dest->tun_type;
+ tun_flags = cp->dest->tun_flags;
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ size_t gue_hdrlen, gue_optlen = 0;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ }
+ gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
+
+ max_headroom += sizeof(struct udphdr) + gue_hdrlen;
+ }
skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
&next_protocol, &payload_len,
@@ -1204,8 +1296,17 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
gso_type = __tun_gso_type_mask(AF_INET6, cp->af);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- gso_type |= SKB_GSO_UDP_TUNNEL;
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ else
+ gso_type |= SKB_GSO_UDP_TUNNEL;
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+ }
+ }
if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error;
@@ -1214,8 +1315,18 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_set_inner_ipproto(skb, next_protocol);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- ipvs_gue_encap(net, skb, cp, &next_protocol);
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ bool check = false;
+
+ if (ipvs_gue_encap(net, skb, cp, &next_protocol))
+ goto tx_error;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ check = true;
+
+ udp6_set_csum(!check, skb, &saddr, &cp->daddr.in6, skb->len);
+ }
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
@@ -1400,7 +1511,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, offset))
+ if (skb_ensure_writable(skb, offset))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -1489,7 +1600,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, offset))
+ if (skb_ensure_writable(skb, offset))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index e52fcb9c9a96..921a7b95be68 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -37,12 +37,17 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
in_dev = __in_dev_get_rcu(rt->dst.dev);
if (in_dev != NULL) {
- for_primary_ifa(in_dev) {
+ const struct in_ifaddr *ifa;
+
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ if (ifa->ifa_flags & IFA_F_SECONDARY)
+ continue;
+
if (ifa->ifa_broadcast == iph->daddr) {
mask = ifa->ifa_mask;
break;
}
- } endfor_ifa(in_dev);
+ }
}
if (mask == 0)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 37bb530d848f..a0560d175a7f 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -16,6 +16,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
#include <net/netfilter/nf_log.h>
#include <linux/ip.h>
@@ -120,10 +121,8 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
};
EXPORT_SYMBOL_GPL(nf_ct_l4proto_find);
-static unsigned int nf_confirm(struct sk_buff *skb,
- unsigned int protoff,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
+unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
const struct nf_conn_help *help;
@@ -154,6 +153,7 @@ static unsigned int nf_confirm(struct sk_buff *skb,
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
+EXPORT_SYMBOL_GPL(nf_confirm);
static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb,
@@ -442,12 +442,14 @@ static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto)
return 0;
}
+static struct nf_ct_bridge_info *nf_ct_bridge_info;
+
static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
{
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
- bool fixup_needed = false;
+ bool fixup_needed = false, retry = true;
int err = 0;
-
+retry:
mutex_lock(&nf_ct_proto_mutex);
switch (nfproto) {
@@ -487,6 +489,32 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
fixup_needed = true;
break;
#endif
+ case NFPROTO_BRIDGE:
+ if (!nf_ct_bridge_info) {
+ if (!retry) {
+ err = -EPROTO;
+ goto out_unlock;
+ }
+ mutex_unlock(&nf_ct_proto_mutex);
+ request_module("nf_conntrack_bridge");
+ retry = false;
+ goto retry;
+ }
+ if (!try_module_get(nf_ct_bridge_info->me)) {
+ err = -EPROTO;
+ goto out_unlock;
+ }
+ cnet->users_bridge++;
+ if (cnet->users_bridge > 1)
+ goto out_unlock;
+
+ err = nf_register_net_hooks(net, nf_ct_bridge_info->ops,
+ nf_ct_bridge_info->ops_size);
+ if (err)
+ cnet->users_bridge = 0;
+ else
+ fixup_needed = true;
+ break;
default:
err = -EPROTO;
break;
@@ -519,47 +547,99 @@ static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
ARRAY_SIZE(ipv6_conntrack_ops));
break;
#endif
+ case NFPROTO_BRIDGE:
+ if (!nf_ct_bridge_info)
+ break;
+ if (cnet->users_bridge && (--cnet->users_bridge == 0))
+ nf_unregister_net_hooks(net, nf_ct_bridge_info->ops,
+ nf_ct_bridge_info->ops_size);
+
+ module_put(nf_ct_bridge_info->me);
+ break;
}
-
mutex_unlock(&nf_ct_proto_mutex);
}
-int nf_ct_netns_get(struct net *net, u8 nfproto)
+static int nf_ct_netns_inet_get(struct net *net)
{
int err;
- if (nfproto == NFPROTO_INET) {
- err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
- if (err < 0)
- goto err1;
- err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
- if (err < 0)
- goto err2;
- } else {
- err = nf_ct_netns_do_get(net, nfproto);
- if (err < 0)
- goto err1;
- }
- return 0;
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+ if (err < 0)
+ goto err1;
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+ if (err < 0)
+ goto err2;
+ return err;
err2:
nf_ct_netns_put(net, NFPROTO_IPV4);
err1:
return err;
}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+ int err;
+
+ switch (nfproto) {
+ case NFPROTO_INET:
+ err = nf_ct_netns_inet_get(net);
+ break;
+ case NFPROTO_BRIDGE:
+ err = nf_ct_netns_do_get(net, NFPROTO_BRIDGE);
+ if (err < 0)
+ return err;
+
+ err = nf_ct_netns_inet_get(net);
+ if (err < 0) {
+ nf_ct_netns_put(net, NFPROTO_BRIDGE);
+ return err;
+ }
+ break;
+ default:
+ err = nf_ct_netns_do_get(net, nfproto);
+ break;
+ }
+ return err;
+}
EXPORT_SYMBOL_GPL(nf_ct_netns_get);
void nf_ct_netns_put(struct net *net, uint8_t nfproto)
{
- if (nfproto == NFPROTO_INET) {
+ switch (nfproto) {
+ case NFPROTO_BRIDGE:
+ nf_ct_netns_do_put(net, NFPROTO_BRIDGE);
+ /* fall through */
+ case NFPROTO_INET:
nf_ct_netns_do_put(net, NFPROTO_IPV4);
nf_ct_netns_do_put(net, NFPROTO_IPV6);
- } else {
+ break;
+ default:
nf_ct_netns_do_put(net, nfproto);
+ break;
}
}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
+void nf_ct_bridge_register(struct nf_ct_bridge_info *info)
+{
+ WARN_ON(nf_ct_bridge_info);
+ mutex_lock(&nf_ct_proto_mutex);
+ nf_ct_bridge_info = info;
+ mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_register);
+
+void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info)
+{
+ WARN_ON(!nf_ct_bridge_info);
+ mutex_lock(&nf_ct_proto_mutex);
+ nf_ct_bridge_info = NULL;
+ mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_unregister);
+
int nf_conntrack_proto_init(void)
{
int ret;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 5b8dde266412..07c5208a4ea0 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -339,7 +339,7 @@ static bool sctp_error(struct sk_buff *skb,
if (state->hook == NF_INET_PRE_ROUTING &&
state->net->ct.sysctl_checksum &&
skb->ip_summed == CHECKSUM_NONE) {
- if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
+ if (skb_ensure_writable(skb, dataoff + sizeof(*sh))) {
logmsg = "nf_ct_sctp: failed to read header ";
goto out_invalid;
}
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index dc21a43cd145..3066449f8bd8 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -126,7 +126,7 @@ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
optoff = protoff + sizeof(struct tcphdr);
optend = protoff + tcph->doff * 4;
- if (!skb_make_writable(skb, optend))
+ if (skb_ensure_writable(skb, optend))
return 0;
tcph = (void *)skb->data + protoff;
@@ -176,7 +176,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
this_way = &seqadj->seq[dir];
other_way = &seqadj->seq[!dir];
- if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
+ if (skb_ensure_writable(skb, protoff + sizeof(*tcph)))
return 0;
tcph = (void *)skb->data + protoff;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 948b4ebbe3fb..e3d797252a98 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -53,7 +53,6 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
ft->dst_port = ctt->dst.u.tcp.port;
ft->iifidx = other_dst->dev->ifindex;
- ft->oifidx = dst->dev->ifindex;
ft->dst_cache = dst;
}
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 53aeb12b70fb..e85423fd7b3c 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -98,7 +98,7 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
struct tcphdr *tcph;
int oldlen, datalen;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return false;
if (rep_len > match_len &&
@@ -148,7 +148,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
struct udphdr *udph;
int datalen, oldlen;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return false;
if (rep_len > match_len &&
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 84f5c90a7f21..04a6c1ac2526 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -73,7 +73,7 @@ static bool udp_manip_pkt(struct sk_buff *skb,
struct udphdr *hdr;
bool do_csum;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
@@ -91,7 +91,7 @@ static bool udplite_manip_pkt(struct sk_buff *skb,
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
struct udphdr *hdr;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
@@ -117,7 +117,7 @@ sctp_manip_pkt(struct sk_buff *skb,
if (skb->len >= hdroff + sizeof(*hdr))
hdrsize = sizeof(*hdr);
- if (!skb_make_writable(skb, hdroff + hdrsize))
+ if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct sctphdr *)(skb->data + hdroff);
@@ -158,7 +158,7 @@ tcp_manip_pkt(struct sk_buff *skb,
if (skb->len >= hdroff + sizeof(struct tcphdr))
hdrsize = sizeof(struct tcphdr);
- if (!skb_make_writable(skb, hdroff + hdrsize))
+ if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct tcphdr *)(skb->data + hdroff);
@@ -198,7 +198,7 @@ dccp_manip_pkt(struct sk_buff *skb,
if (skb->len >= hdroff + sizeof(struct dccp_hdr))
hdrsize = sizeof(struct dccp_hdr);
- if (!skb_make_writable(skb, hdroff + hdrsize))
+ if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct dccp_hdr *)(skb->data + hdroff);
@@ -232,7 +232,7 @@ icmp_manip_pkt(struct sk_buff *skb,
{
struct icmphdr *hdr;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmphdr *)(skb->data + hdroff);
@@ -250,7 +250,7 @@ icmpv6_manip_pkt(struct sk_buff *skb,
{
struct icmp6hdr *hdr;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmp6hdr *)(skb->data + hdroff);
@@ -278,7 +278,7 @@ gre_manip_pkt(struct sk_buff *skb,
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
- if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
+ if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
return false;
greh = (void *)skb->data + hdroff;
@@ -350,7 +350,7 @@ static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
struct iphdr *iph;
unsigned int hdroff;
- if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+ if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
return false;
iph = (void *)skb->data + iphdroff;
@@ -381,7 +381,7 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
int hdroff;
u8 nexthdr;
- if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
+ if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
return false;
ipv6h = (void *)skb->data + iphdroff;
@@ -565,7 +565,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
- if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
return 0;
if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
return 0;
@@ -787,7 +787,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
- if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
return 0;
if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
return 0;
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 78a9e6454ff3..8598e80968e0 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -47,15 +47,17 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
if (hooknum == NF_INET_LOCAL_OUT) {
newdst = htonl(0x7F000001);
} else {
- struct in_device *indev;
- struct in_ifaddr *ifa;
+ const struct in_device *indev;
newdst = 0;
indev = __in_dev_get_rcu(skb->dev);
- if (indev && indev->ifa_list) {
- ifa = indev->ifa_list;
- newdst = ifa->ifa_local;
+ if (indev) {
+ const struct in_ifaddr *ifa;
+
+ ifa = rcu_dereference(indev->ifa_list);
+ if (ifa)
+ newdst = ifa->ifa_local;
}
if (!newdst)
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 464387b3600f..07805bf4d62a 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -285,7 +285,7 @@ next:
if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) {
struct udphdr *uh;
- if (!skb_make_writable(skb, skb->len)) {
+ if (skb_ensure_writable(skb, skb->len)) {
nf_ct_helper_log(skb, ct, "cannot mangle packet");
return NF_DROP;
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 8ff4d22f10b2..3d58a9e93e5a 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -196,7 +196,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
optoff = protoff + sizeof(struct tcphdr);
optend = protoff + th->doff * 4;
- if (!skb_make_writable(skb, optend))
+ if (skb_ensure_writable(skb, optend))
return 0;
while (optoff < optend) {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4b5159936034..d444405211c5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1449,25 +1449,18 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
return newstats;
}
-static void nft_chain_stats_replace(struct net *net,
- struct nft_base_chain *chain,
- struct nft_stats __percpu *newstats)
+static void nft_chain_stats_replace(struct nft_trans *trans)
{
- struct nft_stats __percpu *oldstats;
+ struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain);
- if (newstats == NULL)
+ if (!nft_trans_chain_stats(trans))
return;
- if (rcu_access_pointer(chain->stats)) {
- oldstats = rcu_dereference_protected(chain->stats,
- lockdep_commit_lock_is_held(net));
- rcu_assign_pointer(chain->stats, newstats);
- synchronize_rcu();
- free_percpu(oldstats);
- } else {
- rcu_assign_pointer(chain->stats, newstats);
+ rcu_swap_protected(chain->stats, nft_trans_chain_stats(trans),
+ lockdep_commit_lock_is_held(trans->ctx.net));
+
+ if (!nft_trans_chain_stats(trans))
static_branch_inc(&nft_counters_enabled);
- }
}
static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
@@ -6362,9 +6355,9 @@ static void nft_chain_commit_update(struct nft_trans *trans)
if (!nft_is_base_chain(trans->ctx.chain))
return;
+ nft_chain_stats_replace(trans);
+
basechain = nft_base_chain(trans->ctx.chain);
- nft_chain_stats_replace(trans->ctx.net, basechain,
- nft_trans_chain_stats(trans));
switch (nft_trans_chain_policy(trans)) {
case NF_DROP:
@@ -6381,6 +6374,7 @@ static void nft_commit_release(struct nft_trans *trans)
nf_tables_table_destroy(&trans->ctx);
break;
case NFT_MSG_NEWCHAIN:
+ free_percpu(nft_trans_chain_stats(trans));
kfree(nft_trans_chain_name(trans));
break;
case NFT_MSG_DELCHAIN:
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index f42326b40d6f..9f5dea0064ea 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -33,6 +33,7 @@ static inline int nf_osf_ttl(const struct sk_buff *skb,
{
struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
const struct iphdr *ip = ip_hdr(skb);
+ const struct in_ifaddr *ifa;
int ret = 0;
if (ttl_check == NF_OSF_TTL_TRUE)
@@ -42,15 +43,13 @@ static inline int nf_osf_ttl(const struct sk_buff *skb,
else if (ip->ttl <= f_ttl)
return 1;
- for_ifa(in_dev) {
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
if (inet_ifa_match(ip->saddr, ifa)) {
ret = (ip->ttl == f_ttl);
break;
}
}
- endfor_ifa(in_dev);
-
return ret;
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 27dac47b29c2..831f57008d78 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -863,7 +863,7 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
}
skb_put(e->skb, diff);
}
- if (!skb_make_writable(e->skb, data_len))
+ if (skb_ensure_writable(e->skb, data_len))
return -ENOMEM;
skb_copy_to_linear_data(e->skb, data, data_len);
e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a940c9fd9045..45c8a6c07783 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -156,7 +156,8 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
return;
- if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
+ if (skb_ensure_writable(pkt->skb,
+ pkt->xt.thoff + i + priv->len))
return;
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 54e15de4b79a..1465b7d6d2b0 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -243,7 +243,7 @@ static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt,
tsum));
}
- if (!skb_make_writable(skb, l4csum_offset + sizeof(sum)) ||
+ if (skb_ensure_writable(skb, l4csum_offset + sizeof(sum)) ||
skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0)
return -1;
@@ -259,7 +259,7 @@ static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src,
return -1;
nft_csum_replace(&sum, fsum, tsum);
- if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
+ if (skb_ensure_writable(skb, csum_offset + sizeof(sum)) ||
skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
return -1;
@@ -312,7 +312,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
goto err;
}
- if (!skb_make_writable(skb, max(offset + priv->len, 0)) ||
+ if (skb_ensure_writable(skb, max(offset + priv->len, 0)) ||
skb_store_bits(skb, offset, src, priv->len) < 0)
goto err;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 098ed851b7a7..30d554d6c213 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -34,7 +34,7 @@ dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
ipv4_change_dsfield(ip_hdr(skb),
@@ -52,7 +52,7 @@ dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par)
u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
- if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
return NF_DROP;
ipv6_change_dsfield(ipv6_hdr(skb),
@@ -82,7 +82,7 @@ tos_tg(struct sk_buff *skb, const struct xt_action_param *par)
nv = (orig & ~info->tos_mask) ^ info->tos_value;
if (orig != nv) {
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
iph = ip_hdr(skb);
ipv4_change_dsfield(iph, 0, nv);
@@ -102,7 +102,7 @@ tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
nv = (orig & ~info->tos_mask) ^ info->tos_value;
if (orig != nv) {
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
iph = ipv6_hdr(skb);
ipv6_change_dsfield(iph, 0, nv);
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 4653b071bed4..a37b8824221f 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -32,7 +32,7 @@ ttl_tg(struct sk_buff *skb, const struct xt_action_param *par)
const struct ipt_TTL_info *info = par->targinfo;
int new_ttl;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, sizeof(*iph)))
return NF_DROP;
iph = ip_hdr(skb);
@@ -72,7 +72,7 @@ hl_tg6(struct sk_buff *skb, const struct xt_action_param *par)
const struct ip6t_HL_info *info = par->targinfo;
int new_hl;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, sizeof(*ip6h)))
return NF_DROP;
ip6h = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 98efb202f8b4..3e24443ab81c 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -89,7 +89,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
if (par->fragoff != 0)
return 0;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return -1;
len = skb->len - tcphoff;
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index eb92bffff11c..5a274813076a 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -31,33 +31,33 @@ static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset)
static unsigned int
tcpoptstrip_mangle_packet(struct sk_buff *skb,
const struct xt_action_param *par,
- unsigned int tcphoff, unsigned int minlen)
+ unsigned int tcphoff)
{
const struct xt_tcpoptstrip_target_info *info = par->targinfo;
+ struct tcphdr *tcph, _th;
unsigned int optl, i, j;
- struct tcphdr *tcph;
u_int16_t n, o;
u_int8_t *opt;
- int len, tcp_hdrlen;
+ int tcp_hdrlen;
/* This is a fragment, no TCP header is available */
if (par->fragoff != 0)
return XT_CONTINUE;
- if (!skb_make_writable(skb, skb->len))
+ tcph = skb_header_pointer(skb, tcphoff, sizeof(_th), &_th);
+ if (!tcph)
return NF_DROP;
- len = skb->len - tcphoff;
- if (len < (int)sizeof(struct tcphdr))
- return NF_DROP;
-
- tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
tcp_hdrlen = tcph->doff * 4;
+ if (tcp_hdrlen < sizeof(struct tcphdr))
+ return NF_DROP;
- if (len < tcp_hdrlen)
+ if (skb_ensure_writable(skb, tcphoff + tcp_hdrlen))
return NF_DROP;
- opt = (u_int8_t *)tcph;
+ /* must reload tcph, might have been moved */
+ tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+ opt = (u8 *)tcph;
/*
* Walk through all TCP options - if we find some option to remove,
@@ -91,8 +91,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
static unsigned int
tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
- return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb),
- sizeof(struct iphdr) + sizeof(struct tcphdr));
+ return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb));
}
#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
@@ -109,8 +108,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
if (tcphoff < 0)
return NF_DROP;
- return tcpoptstrip_mangle_packet(skb, par, tcphoff,
- sizeof(*ipv6h) + sizeof(struct tcphdr));
+ return tcpoptstrip_mangle_packet(skb, par, tcphoff);
}
#endif
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 46686fb73784..a8784502aca6 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -91,11 +91,28 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
}
if (info->match & XT_OWNER_GID) {
+ unsigned int i, match = false;
kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
- if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
- gid_lte(filp->f_cred->fsgid, gid_max)) ^
- !(info->invert & XT_OWNER_GID))
+ struct group_info *gi = filp->f_cred->group_info;
+
+ if (gid_gte(filp->f_cred->fsgid, gid_min) &&
+ gid_lte(filp->f_cred->fsgid, gid_max))
+ match = true;
+
+ if (!match && (info->match & XT_OWNER_SUPPL_GROUPS) && gi) {
+ for (i = 0; i < gi->ngroups; ++i) {
+ kgid_t group = gi->gid[i];
+
+ if (gid_gte(group, gid_min) &&
+ gid_lte(group, gid_max)) {
+ match = true;
+ break;
+ }
+ }
+ }
+
+ if (match ^ !(info->invert & XT_OWNER_GID))
return false;
}