summaryrefslogtreecommitdiffstats
path: root/net/netfilter/ipvs/ip_vs_core.c
diff options
context:
space:
mode:
authorDavid S. Miller2012-08-23 03:48:21 +0200
committerDavid S. Miller2012-08-23 03:48:52 +0200
commitbf277b0ccea7d2422b85e232017ce3fddbe9c49c (patch)
tree252232255b8a6024eccf2fea668bb89de5ae3073 /net/netfilter/ipvs/ip_vs_core.c
parentMerge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsh... (diff)
parentnetfilter: remove unnecessary goto statement for error recovery (diff)
downloadkernel-qcow2-linux-bf277b0ccea7d2422b85e232017ce3fddbe9c49c.tar.gz
kernel-qcow2-linux-bf277b0ccea7d2422b85e232017ce3fddbe9c49c.tar.xz
kernel-qcow2-linux-bf277b0ccea7d2422b85e232017ce3fddbe9c49c.zip
Merge git://1984.lsi.us.es/nf-next
Pablo Neira Ayuso says: ==================== This is the first batch of Netfilter and IPVS updates for your net-next tree. Mostly cleanups for the Netfilter side. They are: * Remove unnecessary RTNL locking now that we have support for namespace in nf_conntrack, from Patrick McHardy. * Cleanup to eliminate unnecessary goto in the initialization path of several Netfilter tables, from Jean Sacren. * Another cleanup from Wu Fengguang, this time to PTR_RET instead of if IS_ERR then return PTR_ERR. * Use list_for_each_entry_continue_rcu in nf_iterate, from Michael Wang. * Add pmtu_disc sysctl option to disable PMTU in their tunneling transmitter, from Julian Anastasov. * Generalize application protocol registration in IPVS and modify IPVS FTP helper to use it, from Julian Anastasov. * update Kconfig. The IPVS FTP helper depends on the Netfilter FTP helper for NAT support, from Julian Anastasov. * Add logic to update PMTU for IPIP packets in IPVS, again from Julian Anastasov. * A couple of sparse warning fixes for IPVS and Netfilter from Claudiu Ghioc and Patrick McHardy respectively. Patrick's IPv6 NAT changes will follow after this batch, I need to flush this batch first before refreshing my tree. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter/ipvs/ip_vs_core.c')
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c76
1 files changed, 73 insertions, 3 deletions
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b54eccef40b5..58918e20f9d5 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1303,7 +1303,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
- unsigned int offset, ihl, verdict;
+ unsigned int offset, offset2, ihl, verdict;
+ bool ipip;
*related = 1;
@@ -1345,6 +1346,21 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
net = skb_net(skb);
+ /* Special case for errors for IPIP packets */
+ ipip = false;
+ if (cih->protocol == IPPROTO_IPIP) {
+ if (unlikely(cih->frag_off & htons(IP_OFFSET)))
+ return NF_ACCEPT;
+ /* Error for our IPIP must arrive at LOCAL_IN */
+ if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
+ return NF_ACCEPT;
+ offset += cih->ihl * 4;
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
+ return NF_ACCEPT; /* The packet looks wrong, ignore */
+ ipip = true;
+ }
+
pd = ip_vs_proto_data_get(net, cih->protocol);
if (!pd)
return NF_ACCEPT;
@@ -1358,11 +1374,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
"Checking incoming ICMP for");
+ offset2 = offset;
offset += cih->ihl * 4;
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
- /* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
+ /* The embedded headers contain source and dest in reverse order.
+ * For IPIP this is error for request, not for reply.
+ */
+ cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1);
if (!cp)
return NF_ACCEPT;
@@ -1376,6 +1395,57 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
goto out;
}
+ if (ipip) {
+ __be32 info = ic->un.gateway;
+
+ /* Update the MTU */
+ if (ic->type == ICMP_DEST_UNREACH &&
+ ic->code == ICMP_FRAG_NEEDED) {
+ struct ip_vs_dest *dest = cp->dest;
+ u32 mtu = ntohs(ic->un.frag.mtu);
+
+ /* Strip outer IP and ICMP, go to IPIP header */
+ __skb_pull(skb, ihl + sizeof(_icmph));
+ offset2 -= ihl + sizeof(_icmph);
+ skb_reset_network_header(skb);
+ IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
+ &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
+ rcu_read_lock();
+ ipv4_update_pmtu(skb, dev_net(skb->dev),
+ mtu, 0, 0, 0, 0);
+ rcu_read_unlock();
+ /* Client uses PMTUD? */
+ if (!(cih->frag_off & htons(IP_DF)))
+ goto ignore_ipip;
+ /* Prefer the resulting PMTU */
+ if (dest) {
+ spin_lock(&dest->dst_lock);
+ if (dest->dst_cache)
+ mtu = dst_mtu(dest->dst_cache);
+ spin_unlock(&dest->dst_lock);
+ }
+ if (mtu > 68 + sizeof(struct iphdr))
+ mtu -= sizeof(struct iphdr);
+ info = htonl(mtu);
+ }
+ /* Strip outer IP, ICMP and IPIP, go to IP header of
+ * original request.
+ */
+ __skb_pull(skb, offset2);
+ skb_reset_network_header(skb);
+ IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
+ &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
+ ic->type, ic->code, ntohl(info));
+ icmp_send(skb, ic->type, ic->code, info);
+ /* ICMP can be shorter but anyways, account it */
+ ip_vs_out_stats(cp, skb);
+
+ignore_ipip:
+ consume_skb(skb);
+ verdict = NF_STOLEN;
+ goto out;
+ }
+
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)