diff options
Diffstat (limited to 'net/netfilter/ipvs/ip_vs_xmit.c')
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 215 |
1 files changed, 193 insertions, 22 deletions
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index e101eda05d55..9c464d24beec 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -29,6 +29,7 @@ #include <linux/tcp.h> /* for tcphdr */ #include <net/ip.h> #include <net/gue.h> +#include <net/gre.h> #include <net/tcp.h> /* for csum_tcpudp_magic */ #include <net/udp.h> #include <net/icmp.h> /* for icmp_send */ @@ -36,6 +37,7 @@ #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/ip_tunnels.h> +#include <net/ip6_checksum.h> #include <net/addrconf.h> #include <linux/icmpv6.h> #include <linux/netfilter.h> @@ -275,7 +277,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, } /* don't propagate ttl change to cloned packets */ - if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) + if (skb_ensure_writable(skb, sizeof(struct ipv6hdr))) return false; ipv6_hdr(skb)->hop_limit--; @@ -290,7 +292,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, } /* don't propagate ttl change to cloned packets */ - if (!skb_make_writable(skb, sizeof(struct iphdr))) + if (skb_ensure_writable(skb, sizeof(struct iphdr))) return false; /* Decrease ttl */ @@ -381,8 +383,19 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (!dest) goto err_put; - if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); + if ((dest->tun_flags & + IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) && + skb->ip_summed == CHECKSUM_PARTIAL) + mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV; + } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + __be16 tflags = 0; + + if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + tflags |= TUNNEL_CSUM; + mtu -= gre_calc_hlen(tflags); + } if (mtu < 68) { IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto err_put; @@ -536,8 +549,19 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); if (!dest) goto err_put; - if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); + if ((dest->tun_flags & + IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) && + skb->ip_summed == CHECKSUM_PARTIAL) + mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV; + } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + __be16 tflags = 0; + + if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + tflags |= TUNNEL_CSUM; + mtu -= gre_calc_hlen(tflags); + } if (mtu < IPV6_MIN_MTU) { IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, IPV6_MIN_MTU); @@ -792,7 +816,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!skb_make_writable(skb, sizeof(struct iphdr))) + if (skb_ensure_writable(skb, sizeof(struct iphdr))) goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) @@ -881,7 +905,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) + if (skb_ensure_writable(skb, sizeof(struct ipv6hdr))) goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) @@ -1002,17 +1026,56 @@ ipvs_gue_encap(struct net *net, struct sk_buff *skb, __be16 sport = udp_flow_src_port(net, skb, 0, 0, false); struct udphdr *udph; /* Our new UDP header */ struct guehdr *gueh; /* Our new GUE header */ + size_t hdrlen, optlen = 0; + void *data; + bool need_priv = false; + + if ((cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) && + skb->ip_summed == CHECKSUM_PARTIAL) { + optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV; + need_priv = true; + } - skb_push(skb, sizeof(struct guehdr)); + hdrlen = sizeof(struct guehdr) + optlen; + + skb_push(skb, hdrlen); gueh = (struct guehdr *)skb->data; gueh->control = 0; gueh->version = 0; - gueh->hlen = 0; + gueh->hlen = optlen >> 2; gueh->flags = 0; gueh->proto_ctype = *next_protocol; + data = &gueh[1]; + + if (need_priv) { + __be32 *flags = data; + u16 csum_start = skb_checksum_start_offset(skb); + __be16 *pd; + + gueh->flags |= GUE_FLAG_PRIV; + *flags = 0; + data += GUE_LEN_PRIV; + + if (csum_start < hdrlen) + return -EINVAL; + + csum_start -= hdrlen; + pd = data; + pd[0] = htons(csum_start); + pd[1] = htons(csum_start + skb->csum_offset); + + if (!skb_is_gso(skb)) { + skb->ip_summed = CHECKSUM_NONE; + skb->encapsulation = 0; + } + + *flags |= GUE_PFLAG_REMCSUM; + data += GUE_PLEN_REMCSUM; + } + skb_push(skb, sizeof(struct udphdr)); skb_reset_transport_header(skb); @@ -1029,6 +1092,24 @@ ipvs_gue_encap(struct net *net, struct sk_buff *skb, return 0; } +static void +ipvs_gre_encap(struct net *net, struct sk_buff *skb, + struct ip_vs_conn *cp, __u8 *next_protocol) +{ + __be16 proto = *next_protocol == IPPROTO_IPIP ? + htons(ETH_P_IP) : htons(ETH_P_IPV6); + __be16 tflags = 0; + size_t hdrlen; + + if (cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + tflags |= TUNNEL_CSUM; + + hdrlen = gre_calc_hlen(tflags); + gre_build_header(skb, hdrlen, tflags, proto, 0, 0); + + *next_protocol = IPPROTO_GRE; +} + /* * IP Tunneling transmitter * @@ -1066,6 +1147,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int max_headroom; /* The extra header space needed */ int ret, local; int tun_type, gso_type; + int tun_flags; EnterFunction(10); @@ -1088,9 +1170,28 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); tun_type = cp->dest->tun_type; + tun_flags = cp->dest->tun_flags; + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + size_t gue_hdrlen, gue_optlen = 0; + + if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) && + skb->ip_summed == CHECKSUM_PARTIAL) { + gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV; + } + gue_hdrlen = sizeof(struct guehdr) + gue_optlen; + + max_headroom += sizeof(struct udphdr) + gue_hdrlen; + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + size_t gre_hdrlen; + __be16 tflags = 0; - if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) - max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr); + if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + tflags |= TUNNEL_CSUM; + gre_hdrlen = gre_calc_hlen(tflags); + + max_headroom += gre_hdrlen; + } /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */ dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL; @@ -1101,8 +1202,22 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; gso_type = __tun_gso_type_mask(AF_INET, cp->af); - if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) - gso_type |= SKB_GSO_UDP_TUNNEL; + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) || + (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM)) + gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + else + gso_type |= SKB_GSO_UDP_TUNNEL; + if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) && + skb->ip_summed == CHECKSUM_PARTIAL) { + gso_type |= SKB_GSO_TUNNEL_REMCSUM; + } + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + gso_type |= SKB_GSO_GRE_CSUM; + else + gso_type |= SKB_GSO_GRE; + } if (iptunnel_handle_offloads(skb, gso_type)) goto tx_error; @@ -1111,8 +1226,19 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb_set_inner_ipproto(skb, next_protocol); - if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) - ipvs_gue_encap(net, skb, cp, &next_protocol); + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + bool check = false; + + if (ipvs_gue_encap(net, skb, cp, &next_protocol)) + goto tx_error; + + if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) || + (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM)) + check = true; + + udp_set_csum(!check, skb, saddr, cp->daddr.ip, skb->len); + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) + ipvs_gre_encap(net, skb, cp, &next_protocol); skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); @@ -1170,6 +1296,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int max_headroom; /* The extra header space needed */ int ret, local; int tun_type, gso_type; + int tun_flags; EnterFunction(10); @@ -1193,9 +1320,28 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); tun_type = cp->dest->tun_type; + tun_flags = cp->dest->tun_flags; + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + size_t gue_hdrlen, gue_optlen = 0; + + if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) && + skb->ip_summed == CHECKSUM_PARTIAL) { + gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV; + } + gue_hdrlen = sizeof(struct guehdr) + gue_optlen; + + max_headroom += sizeof(struct udphdr) + gue_hdrlen; + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + size_t gre_hdrlen; + __be16 tflags = 0; + + if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + tflags |= TUNNEL_CSUM; + gre_hdrlen = gre_calc_hlen(tflags); - if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) - max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr); + max_headroom += gre_hdrlen; + } skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, &next_protocol, &payload_len, @@ -1204,8 +1350,22 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; gso_type = __tun_gso_type_mask(AF_INET6, cp->af); - if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) - gso_type |= SKB_GSO_UDP_TUNNEL; + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) || + (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM)) + gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + else + gso_type |= SKB_GSO_UDP_TUNNEL; + if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) && + skb->ip_summed == CHECKSUM_PARTIAL) { + gso_type |= SKB_GSO_TUNNEL_REMCSUM; + } + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + gso_type |= SKB_GSO_GRE_CSUM; + else + gso_type |= SKB_GSO_GRE; + } if (iptunnel_handle_offloads(skb, gso_type)) goto tx_error; @@ -1214,8 +1374,19 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb_set_inner_ipproto(skb, next_protocol); - if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) - ipvs_gue_encap(net, skb, cp, &next_protocol); + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + bool check = false; + + if (ipvs_gue_encap(net, skb, cp, &next_protocol)) + goto tx_error; + + if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) || + (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM)) + check = true; + + udp6_set_csum(!check, skb, &saddr, &cp->daddr.in6, skb->len); + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) + ipvs_gre_encap(net, skb, cp, &next_protocol); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); @@ -1400,7 +1571,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!skb_make_writable(skb, offset)) + if (skb_ensure_writable(skb, offset)) goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) @@ -1489,7 +1660,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!skb_make_writable(skb, offset)) + if (skb_ensure_writable(skb, offset)) goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) |