From a36e185e8c85523413c1ae3e03a0bdde5501f403 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:14 +0100 Subject: udp: Handle ICMP errors for tunnels with same destination port on both endpoints For both IPv4 and IPv6, if we can't match errors to a socket, try tunnels before ignoring them. Look up a socket with the original source and destination ports as found in the UDP packet inside the ICMP payload, this will work for tunnels that force the same destination port for both endpoints, i.e. VXLAN and GENEVE. Actually, lwtunnels could break this assumption if they are configured by an external control plane to have different destination ports on the endpoints: in this case, we won't be able to trace ICMP messages back to them. For IPv6 redirect messages, call ip6_redirect() directly with the output interface argument set to the interface we received the packet from (as it's the very interface we should build the exception on), otherwise the new nexthop will be rejected. There's no such need for IPv4. Tunnels can now export an encap_err_lookup() operation that indicates a match. Pass the packet to the lookup function, and if the tunnel driver reports a matching association, continue with regular ICMP error handling. v2: - Added newline between network and transport header sets in __udp{4,6}_lib_err_encap() (David Miller) - Removed redundant skb_reset_network_header(skb); in __udp4_lib_err_encap() - Removed redundant reassignment of iph in __udp4_lib_err_encap() (Sabrina Dubroca) - Edited comment to __udp{4,6}_lib_err_encap() to reflect the fact this won't work with lwtunnels configured to use asymmetric ports. By the way, it's VXLAN, not VxLAN (Jiri Benc) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv4/udp.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 9 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3488650b90ac..ce759b61f6cd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -583,6 +583,62 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, return true; } +DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); +void udp_encap_enable(void) +{ + static_branch_enable(&udp_encap_needed_key); +} +EXPORT_SYMBOL(udp_encap_enable); + +/* Try to match ICMP errors to UDP tunnels by looking up a socket without + * reversing source and destination port: this will match tunnels that force the + * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that + * lwtunnels might actually break this assumption by being configured with + * different destination ports on endpoints, in this case we won't be able to + * trace ICMP messages back to them. + * + * Then ask the tunnel implementation to match the error against a valid + * association. + * + * Return the socket if we have a match. + */ +static struct sock *__udp4_lib_err_encap(struct net *net, + const struct iphdr *iph, + struct udphdr *uh, + struct udp_table *udptable, + struct sk_buff *skb) +{ + int (*lookup)(struct sock *sk, struct sk_buff *skb); + int network_offset, transport_offset; + struct udp_sock *up; + struct sock *sk; + + sk = __udp4_lib_lookup(net, iph->daddr, uh->source, + iph->saddr, uh->dest, skb->dev->ifindex, 0, + udptable, NULL); + if (!sk) + return NULL; + + network_offset = skb_network_offset(skb); + transport_offset = skb_transport_offset(skb); + + /* Network header needs to point to the outer IPv4 header inside ICMP */ + skb_reset_network_header(skb); + + /* Transport header needs to point to the UDP header */ + skb_set_transport_header(skb, iph->ihl << 2); + + up = udp_sk(sk); + lookup = READ_ONCE(up->encap_err_lookup); + if (!lookup || lookup(sk, skb)) + sk = NULL; + + skb_set_transport_header(skb, transport_offset); + skb_set_network_header(skb, network_offset); + + return sk; +} + /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should @@ -601,6 +657,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; + bool tunnel = false; struct sock *sk; int harderr; int err; @@ -610,8 +667,15 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) iph->saddr, uh->source, skb->dev->ifindex, inet_sdif(skb), udptable, NULL); if (!sk) { - __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return; /* No socket for error */ + /* No socket for error: try tunnels before discarding */ + if (static_branch_unlikely(&udp_encap_needed_key)) + sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb); + + if (!sk) { + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); + return; + } + tunnel = true; } err = 0; @@ -654,6 +718,10 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ + if (tunnel) { + /* ...not for tunnels though: we don't have a sending socket */ + goto out; + } if (!inet->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; @@ -1891,13 +1959,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; } -DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); -void udp_encap_enable(void) -{ - static_branch_enable(&udp_encap_needed_key); -} -EXPORT_SYMBOL(udp_encap_enable); - /* returns: * -1: error * 0: success -- cgit v1.2.3-55-g7522 From 32bbd8793f24b0d5beb1cdb33c45c75ad1140e4b Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:21 +0100 Subject: net: Convert protocol error handlers from void to int We'll need this to handle ICMP errors for tunnels without a sending socket (i.e. FoU and GUE). There, we might have to look up different types of IP tunnels, registered as network protocols, before we get a match, so we want this for the error handlers of IPPROTO_IPIP and IPPROTO_IPV6 in both inet_protos and inet6_protos. These error codes will be used in the next patch. For consistency, return sensible error codes in protocol error handlers whenever handlers can't handle errors because, even if valid, they don't match a protocol or any of its states. This has no effect on existing error handling paths. Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/icmp.h | 2 +- include/net/protocol.h | 9 +++++++-- include/net/sctp/sctp.h | 2 +- include/net/tcp.h | 2 +- include/net/udp.h | 2 +- net/dccp/ipv4.c | 13 ++++++++----- net/dccp/ipv6.c | 13 ++++++++----- net/ipv4/gre_demux.c | 9 +++++++-- net/ipv4/icmp.c | 6 ++++-- net/ipv4/ip_gre.c | 48 ++++++++++++++++++++++++----------------------- net/ipv4/ipip.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 22 ++++++++++++---------- net/ipv4/tunnel4.c | 18 ++++++++++++------ net/ipv4/udp.c | 10 +++++----- net/ipv4/udp_impl.h | 2 +- net/ipv4/udplite.c | 4 ++-- net/ipv4/xfrm4_protocol.c | 18 ++++++++++++------ net/ipv6/icmp.c | 4 +++- net/ipv6/ip6_gre.c | 18 ++++++++++-------- net/ipv6/tcp_ipv6.c | 13 ++++++++----- net/ipv6/tunnel6.c | 12 ++++++++---- net/ipv6/udp.c | 18 +++++++++--------- net/ipv6/udp_impl.h | 4 ++-- net/ipv6/udplite.c | 5 +++-- net/ipv6/xfrm6_protocol.c | 18 ++++++++++++------ net/sctp/input.c | 5 +++-- net/sctp/ipv6.c | 7 +++++-- 27 files changed, 177 insertions(+), 121 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/include/net/icmp.h b/include/net/icmp.h index 3ef2743a8eec..6ac3a5bd0117 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -41,7 +41,7 @@ struct net; void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); int icmp_rcv(struct sk_buff *skb); -void icmp_err(struct sk_buff *skb, u32 info); +int icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); void icmp_out_count(struct net *net, unsigned char type); diff --git a/include/net/protocol.h b/include/net/protocol.h index 4fc75f7ae23b..92b3eaad6088 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -42,7 +42,10 @@ struct net_protocol { int (*early_demux)(struct sk_buff *skb); int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); - void (*err_handler)(struct sk_buff *skb, u32 info); + + /* This returns an error if we weren't able to handle the error. */ + int (*err_handler)(struct sk_buff *skb, u32 info); + unsigned int no_policy:1, netns_ok:1, /* does the protocol do more stringent @@ -58,10 +61,12 @@ struct inet6_protocol { void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); - void (*err_handler)(struct sk_buff *skb, + /* This returns an error if we weren't able to handle the error. */ + int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); + unsigned int flags; /* INET6_PROTO_xxx */ }; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 8c2caa370e0f..9a3b48a35e90 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -151,7 +151,7 @@ int sctp_primitive_RECONF(struct net *net, struct sctp_association *asoc, * sctp/input.c */ int sctp_rcv(struct sk_buff *skb); -void sctp_v4_err(struct sk_buff *skb, u32 info); +int sctp_v4_err(struct sk_buff *skb, u32 info); void sctp_hash_endpoint(struct sctp_endpoint *); void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, diff --git a/include/net/tcp.h b/include/net/tcp.h index a18914d20486..4743836bed2e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -313,7 +313,7 @@ extern struct proto tcp_prot; void tcp_tasklet_init(void); -void tcp_v4_err(struct sk_buff *skb, u32); +int tcp_v4_err(struct sk_buff *skb, u32); void tcp_shutdown(struct sock *sk, int how); diff --git a/include/net/udp.h b/include/net/udp.h index eccca2325ee6..fd6d948755c8 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -283,7 +283,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); -void udp_err(struct sk_buff *, u32); +int udp_err(struct sk_buff *, u32); int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udp_push_pending_frames(struct sock *sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8e08cea6f178..26a21d97b6b0 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -231,7 +231,7 @@ EXPORT_SYMBOL(dccp_req_err); * check at all. A more general error queue to queue errors for later handling * is probably better. */ -static void dccp_v4_err(struct sk_buff *skb, u32 info) +static int dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; const u8 offset = iph->ihl << 2; @@ -259,16 +259,18 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) inet_iif(skb), 0); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return; + return -ENOENT; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); - return; + return 0; } seq = dccp_hdr_seq(dh); - if (sk->sk_state == DCCP_NEW_SYN_RECV) - return dccp_req_err(sk, seq); + if (sk->sk_state == DCCP_NEW_SYN_RECV) { + dccp_req_err(sk, seq); + return 0; + } bh_lock_sock(sk); /* If too many ICMPs get dropped on busy @@ -357,6 +359,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) out: bh_unlock_sock(sk); sock_put(sk); + return 0; } static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6344f1b18a6a..d5740bad5b18 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -68,7 +68,7 @@ static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) } -static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; @@ -96,16 +96,18 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); - return; + return -ENOENT; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); - return; + return 0; } seq = dccp_hdr_seq(dh); - if (sk->sk_state == DCCP_NEW_SYN_RECV) - return dccp_req_err(sk, seq); + if (sk->sk_state == DCCP_NEW_SYN_RECV) { + dccp_req_err(sk, seq); + return 0; + } bh_lock_sock(sk); if (sock_owned_by_user(sk)) @@ -183,6 +185,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, out: bh_unlock_sock(sk); sock_put(sk); + return 0; } diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 7efe740c06eb..a4bf22ee3aed 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -151,20 +151,25 @@ drop: return NET_RX_DROP; } -static void gre_err(struct sk_buff *skb, u32 info) +static int gre_err(struct sk_buff *skb, u32 info) { const struct gre_protocol *proto; const struct iphdr *iph = (const struct iphdr *)skb->data; u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f; + int err = 0; if (ver >= GREPROTO_MAX) - return; + return -EINVAL; rcu_read_lock(); proto = rcu_dereference(gre_proto[ver]); if (proto && proto->err_handler) proto->err_handler(skb, info); + else + err = -EPROTONOSUPPORT; rcu_read_unlock(); + + return err; } static const struct net_protocol net_gre_protocol = { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index d832beed6e3a..065997f414e6 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1079,7 +1079,7 @@ error: goto drop; } -void icmp_err(struct sk_buff *skb, u32 info) +int icmp_err(struct sk_buff *skb, u32 info) { struct iphdr *iph = (struct iphdr *)skb->data; int offset = iph->ihl<<2; @@ -1094,13 +1094,15 @@ void icmp_err(struct sk_buff *skb, u32 info) */ if (icmph->type != ICMP_ECHOREPLY) { ping_err(skb, offset, info); - return; + return 0; } if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) ipv4_update_pmtu(skb, net, info, 0, IPPROTO_ICMP); else if (type == ICMP_REDIRECT) ipv4_redirect(skb, net, 0, IPPROTO_ICMP); + + return 0; } /* diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2c67af644e64..76a9a5f7a40e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -121,8 +121,8 @@ static unsigned int ipgre_net_id __read_mostly; static unsigned int gre_tap_net_id __read_mostly; static unsigned int erspan_net_id __read_mostly; -static void ipgre_err(struct sk_buff *skb, u32 info, - const struct tnl_ptk_info *tpi) +static int ipgre_err(struct sk_buff *skb, u32 info, + const struct tnl_ptk_info *tpi) { /* All the routers (except for Linux) return only @@ -146,17 +146,32 @@ static void ipgre_err(struct sk_buff *skb, u32 info, unsigned int data_len = 0; struct ip_tunnel *t; + if (tpi->proto == htons(ETH_P_TEB)) + itn = net_generic(net, gre_tap_net_id); + else if (tpi->proto == htons(ETH_P_ERSPAN) || + tpi->proto == htons(ETH_P_ERSPAN2)) + itn = net_generic(net, erspan_net_id); + else + itn = net_generic(net, ipgre_net_id); + + iph = (const struct iphdr *)(icmp_hdr(skb) + 1); + t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, + iph->daddr, iph->saddr, tpi->key); + + if (!t) + return -ENOENT; + switch (type) { default: case ICMP_PARAMETERPROB: - return; + return 0; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: case ICMP_PORT_UNREACH: /* Impossible event. */ - return; + return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -168,7 +183,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info, case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) - return; + return 0; data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ break; @@ -176,40 +191,27 @@ static void ipgre_err(struct sk_buff *skb, u32 info, break; } - if (tpi->proto == htons(ETH_P_TEB)) - itn = net_generic(net, gre_tap_net_id); - else if (tpi->proto == htons(ETH_P_ERSPAN) || - tpi->proto == htons(ETH_P_ERSPAN2)) - itn = net_generic(net, erspan_net_id); - else - itn = net_generic(net, ipgre_net_id); - - iph = (const struct iphdr *)(icmp_hdr(skb) + 1); - t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, - iph->daddr, iph->saddr, tpi->key); - - if (!t) - return; - #if IS_ENABLED(CONFIG_IPV6) if (tpi->proto == htons(ETH_P_IPV6) && !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, type, data_len)) - return; + return 0; #endif if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) - return; + return 0; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - return; + return 0; if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; + + return 0; } static void gre_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e65287c27e3d..57c5dd283a2c 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -140,6 +140,13 @@ static int ipip_err(struct sk_buff *skb, u32 info) struct ip_tunnel *t; int err = 0; + t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->daddr, iph->saddr, 0); + if (!t) { + err = -ENOENT; + goto out; + } + switch (type) { case ICMP_DEST_UNREACH: switch (code) { @@ -167,13 +174,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) goto out; } - t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->daddr, iph->saddr, 0); - if (!t) { - err = -ENOENT; - goto out; - } - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, net, info, t->parms.link, iph->protocol); goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index de47038afdf0..a336787d75e5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -423,7 +423,7 @@ EXPORT_SYMBOL(tcp_req_err); * */ -void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) +int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) { const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); @@ -446,20 +446,21 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) inet_iif(icmp_skb), 0); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return; + return -ENOENT; } if (sk->sk_state == TCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); - return; + return 0; } seq = ntohl(th->seq); - if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq, - type == ICMP_PARAMETERPROB || - type == ICMP_TIME_EXCEEDED || - (type == ICMP_DEST_UNREACH && - (code == ICMP_NET_UNREACH || - code == ICMP_HOST_UNREACH))); + if (sk->sk_state == TCP_NEW_SYN_RECV) { + tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB || + type == ICMP_TIME_EXCEEDED || + (type == ICMP_DEST_UNREACH && + (code == ICMP_NET_UNREACH || + code == ICMP_HOST_UNREACH))); + return 0; + } bh_lock_sock(sk); /* If too many ICMPs get dropped on busy @@ -613,6 +614,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) out: bh_unlock_sock(sk); sock_put(sk); + return 0; } void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index c0630013c1ae..33bf8e9c8663 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -149,34 +149,40 @@ drop: } #endif -static void tunnel4_err(struct sk_buff *skb, u32 info) +static int tunnel4_err(struct sk_buff *skb, u32 info) { struct xfrm_tunnel *handler; for_each_tunnel_rcu(tunnel4_handlers, handler) if (!handler->err_handler(skb, info)) - break; + return 0; + + return -ENOENT; } #if IS_ENABLED(CONFIG_IPV6) -static void tunnel64_err(struct sk_buff *skb, u32 info) +static int tunnel64_err(struct sk_buff *skb, u32 info) { struct xfrm_tunnel *handler; for_each_tunnel_rcu(tunnel64_handlers, handler) if (!handler->err_handler(skb, info)) - break; + return 0; + + return -ENOENT; } #endif #if IS_ENABLED(CONFIG_MPLS) -static void tunnelmpls4_err(struct sk_buff *skb, u32 info) +static int tunnelmpls4_err(struct sk_buff *skb, u32 info) { struct xfrm_tunnel *handler; for_each_tunnel_rcu(tunnelmpls4_handlers, handler) if (!handler->err_handler(skb, info)) - break; + return 0; + + return -ENOENT; } #endif diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ce759b61f6cd..a505ee5eb92c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -650,7 +650,7 @@ static struct sock *__udp4_lib_err_encap(struct net *net, * to find the appropriate port. */ -void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) +int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) { struct inet_sock *inet; const struct iphdr *iph = (const struct iphdr *)skb->data; @@ -673,7 +673,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return; + return -ENOENT; } tunnel = true; } @@ -731,12 +731,12 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) sk->sk_err = err; sk->sk_error_report(sk); out: - return; + return 0; } -void udp_err(struct sk_buff *skb, u32 info) +int udp_err(struct sk_buff *skb, u32 info) { - __udp4_lib_err(skb, info, &udp_table); + return __udp4_lib_err(skb, info, &udp_table); } /* diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index e7d18b140287..322672655419 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -7,7 +7,7 @@ #include int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int); -void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); +int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); int udp_v4_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 8545457752fb..39c7f17d916f 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -25,9 +25,9 @@ static int udplite_rcv(struct sk_buff *skb) return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } -static void udplite_err(struct sk_buff *skb, u32 info) +static int udplite_err(struct sk_buff *skb, u32 info) { - __udp4_lib_err(skb, info, &udplite_table); + return __udp4_lib_err(skb, info, &udplite_table); } static const struct net_protocol udplite_protocol = { diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index 8dd0e6ab8606..35c54865dc42 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -106,13 +106,15 @@ static int xfrm4_esp_rcv(struct sk_buff *skb) return 0; } -static void xfrm4_esp_err(struct sk_buff *skb, u32 info) +static int xfrm4_esp_err(struct sk_buff *skb, u32 info) { struct xfrm4_protocol *handler; for_each_protocol_rcu(esp4_handlers, handler) if (!handler->err_handler(skb, info)) - break; + return 0; + + return -ENOENT; } static int xfrm4_ah_rcv(struct sk_buff *skb) @@ -132,13 +134,15 @@ static int xfrm4_ah_rcv(struct sk_buff *skb) return 0; } -static void xfrm4_ah_err(struct sk_buff *skb, u32 info) +static int xfrm4_ah_err(struct sk_buff *skb, u32 info) { struct xfrm4_protocol *handler; for_each_protocol_rcu(ah4_handlers, handler) if (!handler->err_handler(skb, info)) - break; + return 0; + + return -ENOENT; } static int xfrm4_ipcomp_rcv(struct sk_buff *skb) @@ -158,13 +162,15 @@ static int xfrm4_ipcomp_rcv(struct sk_buff *skb) return 0; } -static void xfrm4_ipcomp_err(struct sk_buff *skb, u32 info) +static int xfrm4_ipcomp_err(struct sk_buff *skb, u32 info) { struct xfrm4_protocol *handler; for_each_protocol_rcu(ipcomp4_handlers, handler) if (!handler->err_handler(skb, info)) - break; + return 0; + + return -ENOENT; } static const struct net_protocol esp4_protocol = { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index c9c53ade55c3..5d7aa2c2770c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -84,7 +84,7 @@ static inline struct sock *icmpv6_sk(struct net *net) return net->ipv6.icmp_sk[smp_processor_id()]; } -static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ @@ -100,6 +100,8 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) ping_err(skb, offset, ntohl(info)); + + return 0; } static int icmpv6_rcv(struct sk_buff *skb); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 515adbdba1d2..81b69bcee714 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -423,7 +423,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) } -static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); @@ -433,13 +433,13 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IPV6), offset) < 0) - return; + return -EINVAL; ipv6h = (const struct ipv6hdr *)skb->data; t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, tpi.key, tpi.proto); if (!t) - return; + return -ENOENT; switch (type) { struct ipv6_tlv_tnl_enc_lim *tel; @@ -449,14 +449,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, t->parms.name); if (code != ICMPV6_PORT_UNREACH) break; - return; + return 0; case ICMPV6_TIME_EXCEED: if (code == ICMPV6_EXC_HOPLIMIT) { net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", t->parms.name); break; } - return; + return 0; case ICMPV6_PARAMPROB: teli = 0; if (code == ICMPV6_HDR_FIELD) @@ -472,14 +472,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", t->parms.name); } - return; + return 0; case ICMPV6_PKT_TOOBIG: ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); - return; + return 0; case NDISC_REDIRECT: ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); - return; + return 0; } if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) @@ -487,6 +487,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, else t->err_count = 1; t->err_time = jiffies; + + return 0; } static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 03e6b7a2bc53..a3f559162521 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -349,7 +349,7 @@ static void tcp_v6_mtu_reduced(struct sock *sk) } } -static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; @@ -371,17 +371,19 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); - return; + return -ENOENT; } if (sk->sk_state == TCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); - return; + return 0; } seq = ntohl(th->seq); fatal = icmpv6_err_convert(type, code, &err); - if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq, fatal); + if (sk->sk_state == TCP_NEW_SYN_RECV) { + tcp_req_err(sk, seq, fatal); + return 0; + } bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) @@ -467,6 +469,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, out: bh_unlock_sock(sk); sock_put(sk); + return 0; } diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index dae25cad05cd..1991dede7367 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -134,24 +134,28 @@ drop: return 0; } -static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_tunnel *handler; for_each_tunnel_rcu(tunnel6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) - break; + return 0; + + return -ENOENT; } -static void tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_tunnel *handler; for_each_tunnel_rcu(tunnel46_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) - break; + return 0; + + return -ENOENT; } static const struct inet6_protocol tunnel6_protocol = { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1216c920f945..61316ec48b51 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -517,9 +517,9 @@ static struct sock *__udp6_lib_err_encap(struct net *net, return sk; } -void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info, - struct udp_table *udptable) +int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info, + struct udp_table *udptable) { struct ipv6_pinfo *np; const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; @@ -544,7 +544,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); - return; + return -ENOENT; } tunnel = true; } @@ -583,7 +583,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk->sk_err = err; sk->sk_error_report(sk); out: - return; + return 0; } static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -614,11 +614,11 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; } -static __inline__ void udpv6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, u8 type, - u8 code, int offset, __be32 info) +static __inline__ int udpv6_err(struct sk_buff *skb, + struct inet6_skb_parm *opt, u8 type, + u8 code, int offset, __be32 info) { - __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); + return __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 7903e21c178b..5730e6503cb4 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -9,8 +9,8 @@ #include int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int); -void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, - __be32, struct udp_table *); +int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, + __be32, struct udp_table *); int udp_v6_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 5000ad6878e6..a125aebc29e5 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -20,11 +20,12 @@ static int udplitev6_rcv(struct sk_buff *skb) return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } -static void udplitev6_err(struct sk_buff *skb, +static int udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table); + return __udp6_lib_err(skb, opt, type, code, offset, info, + &udplite_table); } static const struct inet6_protocol udplitev6_protocol = { diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index b2dc8ce49378..cc979b702c89 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -80,14 +80,16 @@ static int xfrm6_esp_rcv(struct sk_buff *skb) return 0; } -static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(esp6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) - break; + return 0; + + return -ENOENT; } static int xfrm6_ah_rcv(struct sk_buff *skb) @@ -107,14 +109,16 @@ static int xfrm6_ah_rcv(struct sk_buff *skb) return 0; } -static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(ah6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) - break; + return 0; + + return -ENOENT; } static int xfrm6_ipcomp_rcv(struct sk_buff *skb) @@ -134,14 +138,16 @@ static int xfrm6_ipcomp_rcv(struct sk_buff *skb) return 0; } -static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(ipcomp6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) - break; + return 0; + + return -ENOENT; } static const struct inet6_protocol esp6_protocol = { diff --git a/net/sctp/input.c b/net/sctp/input.c index 5c36a99882ed..7ab08a5b36dc 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -574,7 +574,7 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t) * is probably better. * */ -void sctp_v4_err(struct sk_buff *skb, __u32 info) +int sctp_v4_err(struct sk_buff *skb, __u32 info) { const struct iphdr *iph = (const struct iphdr *)skb->data; const int ihlen = iph->ihl * 4; @@ -599,7 +599,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) skb->transport_header = savesctp; if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return; + return -ENOENT; } /* Warning: The sock lock is held. Remember to call * sctp_err_finish! @@ -653,6 +653,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) out_unlock: sctp_err_finish(sk, transport); + return 0; } /* diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index fc6c5e4bffa5..6e27c62646e9 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -138,7 +138,7 @@ static struct notifier_block sctp_inet6addr_notifier = { }; /* ICMP error handler. */ -static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct inet6_dev *idev; @@ -147,7 +147,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sctp_transport *transport; struct ipv6_pinfo *np; __u16 saveip, savesctp; - int err; + int err, ret = 0; struct net *net = dev_net(skb->dev); idev = in6_dev_get(skb->dev); @@ -163,6 +163,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb->transport_header = savesctp; if (!sk) { __ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS); + ret = -ENOENT; goto out; } @@ -202,6 +203,8 @@ out_unlock: out: if (likely(idev != NULL)) in6_dev_put(idev); + + return ret; } static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) -- cgit v1.2.3-55-g7522 From e7cc082455cb49ea937a3ec4ab3d001b0b5f137b Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:22 +0100 Subject: udp: Support for error handlers of tunnels with arbitrary destination port ICMP error handling is currently not possible for UDP tunnels not employing a receiving socket with local destination port matching the remote one, because we have no way to look them up. Add an err_handler tunnel encapsulation operation that can be exported by tunnels in order to pass the error to the protocol implementing the encapsulation. We can't easily use a lookup function as we did for VXLAN and GENEVE, as protocol error handlers, which would be in turn called by implementations of this new operation, handle the errors themselves, together with the tunnel lookup. Without a socket, we can't be sure which encapsulation error handler is the appropriate one: encapsulation handlers (the ones for FoU and GUE introduced in the next patch, e.g.) will need to check the new error codes returned by protocol handlers to figure out if errors match the given encapsulation, and, in turn, report this error back, so that we can try all of them in __udp{4,6}_lib_err_encap_no_sk() until we have a match. v2: - Name all arguments in err_handler prototypes (David Miller) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 2 ++ include/net/ip_tunnels.h | 1 + net/ipv4/udp.c | 70 ++++++++++++++++++++++++++++++++------------ net/ipv6/udp.c | 75 +++++++++++++++++++++++++++++++++++++----------- 4 files changed, 113 insertions(+), 35 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 236e40ba06bf..69b4bcf880c9 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -69,6 +69,8 @@ struct ip6_tnl_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi6 *fl6); + int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info); }; #ifdef CONFIG_INET diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index b0d022ff6ea1..db6b2218a2ad 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -311,6 +311,7 @@ struct ip_tunnel_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4); + int (*err_handler)(struct sk_buff *skb, u32 info); }; #define MAX_IPTUN_ENCAP_OPS 8 diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a505ee5eb92c..6f8890c5bc7e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -105,6 +105,7 @@ #include #include #include +#include #include #include #include @@ -590,6 +591,26 @@ void udp_encap_enable(void) } EXPORT_SYMBOL(udp_encap_enable); +/* Handler for tunnels with arbitrary destination ports: no socket lookup, go + * through error handlers in encapsulations looking for a match. + */ +static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info) +{ + int i; + + for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) { + int (*handler)(struct sk_buff *skb, u32 info); + + if (!iptun_encaps[i]) + continue; + handler = rcu_dereference(iptun_encaps[i]->err_handler); + if (handler && !handler(skb, info)) + return 0; + } + + return -ENOENT; +} + /* Try to match ICMP errors to UDP tunnels by looking up a socket without * reversing source and destination port: this will match tunnels that force the * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that @@ -597,28 +618,25 @@ EXPORT_SYMBOL(udp_encap_enable); * different destination ports on endpoints, in this case we won't be able to * trace ICMP messages back to them. * + * If this doesn't match any socket, probe tunnels with arbitrary destination + * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port + * we've sent packets to won't necessarily match the local destination port. + * * Then ask the tunnel implementation to match the error against a valid * association. * - * Return the socket if we have a match. + * Return an error if we can't find a match, the socket if we need further + * processing, zero otherwise. */ static struct sock *__udp4_lib_err_encap(struct net *net, const struct iphdr *iph, struct udphdr *uh, struct udp_table *udptable, - struct sk_buff *skb) + struct sk_buff *skb, u32 info) { - int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; - struct udp_sock *up; struct sock *sk; - sk = __udp4_lib_lookup(net, iph->daddr, uh->source, - iph->saddr, uh->dest, skb->dev->ifindex, 0, - udptable, NULL); - if (!sk) - return NULL; - network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); @@ -628,10 +646,20 @@ static struct sock *__udp4_lib_err_encap(struct net *net, /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, iph->ihl << 2); - up = udp_sk(sk); - lookup = READ_ONCE(up->encap_err_lookup); - if (!lookup || lookup(sk, skb)) - sk = NULL; + sk = __udp4_lib_lookup(net, iph->daddr, uh->source, + iph->saddr, uh->dest, skb->dev->ifindex, 0, + udptable, NULL); + if (sk) { + int (*lookup)(struct sock *sk, struct sk_buff *skb); + struct udp_sock *up = udp_sk(sk); + + lookup = READ_ONCE(up->encap_err_lookup); + if (!lookup || lookup(sk, skb)) + sk = NULL; + } + + if (!sk) + sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info)); skb_set_transport_header(skb, transport_offset); skb_set_network_header(skb, network_offset); @@ -668,13 +696,19 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) inet_sdif(skb), udptable, NULL); if (!sk) { /* No socket for error: try tunnels before discarding */ - if (static_branch_unlikely(&udp_encap_needed_key)) - sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb); + sk = ERR_PTR(-ENOENT); + if (static_branch_unlikely(&udp_encap_needed_key)) { + sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb, + info); + if (!sk) + return 0; + } - if (!sk) { + if (IS_ERR(sk)) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return -ENOENT; + return PTR_ERR(sk); } + tunnel = true; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 61316ec48b51..0c0cb1611aef 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -469,6 +470,29 @@ void udpv6_encap_enable(void) } EXPORT_SYMBOL(udpv6_encap_enable); +/* Handler for tunnels with arbitrary destination ports: no socket lookup, go + * through error handlers in encapsulations looking for a match. + */ +static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb, + struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, u32 info) +{ + int i; + + for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) { + int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, u32 info); + + if (!ip6tun_encaps[i]) + continue; + handler = rcu_dereference(ip6tun_encaps[i]->err_handler); + if (handler && !handler(skb, opt, type, code, offset, info)) + return 0; + } + + return -ENOENT; +} + /* Try to match ICMP errors to UDP tunnels by looking up a socket without * reversing source and destination port: this will match tunnels that force the * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that @@ -476,28 +500,27 @@ EXPORT_SYMBOL(udpv6_encap_enable); * different destination ports on endpoints, in this case we won't be able to * trace ICMP messages back to them. * + * If this doesn't match any socket, probe tunnels with arbitrary destination + * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port + * we've sent packets to won't necessarily match the local destination port. + * * Then ask the tunnel implementation to match the error against a valid * association. * - * Return the socket if we have a match. + * Return an error if we can't find a match, the socket if we need further + * processing, zero otherwise. */ static struct sock *__udp6_lib_err_encap(struct net *net, const struct ipv6hdr *hdr, int offset, struct udphdr *uh, struct udp_table *udptable, - struct sk_buff *skb) + struct sk_buff *skb, + struct inet6_skb_parm *opt, + u8 type, u8 code, __be32 info) { - int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; - struct udp_sock *up; struct sock *sk; - sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, - &hdr->saddr, uh->dest, - inet6_iif(skb), 0, udptable, skb); - if (!sk) - return NULL; - network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); @@ -507,13 +530,26 @@ static struct sock *__udp6_lib_err_encap(struct net *net, /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, offset); - up = udp_sk(sk); - lookup = READ_ONCE(up->encap_err_lookup); - if (!lookup || lookup(sk, skb)) - sk = NULL; + sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, + &hdr->saddr, uh->dest, + inet6_iif(skb), 0, udptable, skb); + if (sk) { + int (*lookup)(struct sock *sk, struct sk_buff *skb); + struct udp_sock *up = udp_sk(sk); + + lookup = READ_ONCE(up->encap_err_lookup); + if (!lookup || lookup(sk, skb)) + sk = NULL; + } + + if (!sk) { + sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code, + offset, info)); + } skb_set_transport_header(skb, transport_offset); skb_set_network_header(skb, network_offset); + return sk; } @@ -536,16 +572,21 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, inet6_iif(skb), inet6_sdif(skb), udptable, skb); if (!sk) { /* No socket for error: try tunnels before discarding */ + sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udpv6_encap_needed_key)) { sk = __udp6_lib_err_encap(net, hdr, offset, uh, - udptable, skb); + udptable, skb, + opt, type, code, info); + if (!sk) + return 0; } - if (!sk) { + if (IS_ERR(sk)) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); - return -ENOENT; + return PTR_ERR(sk); } + tunnel = true; } -- cgit v1.2.3-55-g7522