summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDavid S. Miller2018-11-09 02:13:09 +0100
committerDavid S. Miller2018-11-09 02:13:09 +0100
commit20da4ef91cb9335d7dcef6f1546dc31c455081f2 (patch)
treef43bfbaffc23b1ede86bd005dd6904cf95d95b49 /drivers
parentnet: hns3: fix spelling mistake, "assertting" -> "asserting" (diff)
parentselftests: pmtu: Introduce FoU and GUE PMTU exceptions tests (diff)
downloadkernel-qcow2-linux-20da4ef91cb9335d7dcef6f1546dc31c455081f2.tar.gz
kernel-qcow2-linux-20da4ef91cb9335d7dcef6f1546dc31c455081f2.tar.xz
kernel-qcow2-linux-20da4ef91cb9335d7dcef6f1546dc31c455081f2.zip
Merge branch 'ICMP-error-handling-for-UDP-tunnels'
Stefano Brivio says: ==================== ICMP error handling for UDP tunnels This series introduces ICMP error handling for UDP tunnels and encapsulations and related selftests. We need to handle ICMP errors to support PMTU discovery and route redirection -- this support is entirely missing right now: - patch 1/11 adds a socket lookup for UDP tunnels that use, by design, the same destination port on both endpoints -- i.e. VXLAN and GENEVE - patches 2/11 to 7/11 are specific to VxLAN and GENEVE - patches 8/11 and 9/11 add infrastructure for lookup of encapsulations where sent packets cannot be matched via receiving socket lookup, i.e. FoU and GUE - patches 10/11 and 11/11 are specific to FoU and GUE v2: changes are listed in the single patches ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/geneve.c107
-rw-r--r--drivers/net/vxlan.c58
2 files changed, 156 insertions, 9 deletions
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index fbfc13d81f66..7c53e06b31c3 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -70,6 +70,7 @@ struct geneve_dev {
bool collect_md;
bool use_udp6_rx_checksums;
bool ttl_inherit;
+ enum ifla_geneve_df df;
};
struct geneve_sock {
@@ -387,6 +388,57 @@ drop:
return 0;
}
+/* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
+static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
+{
+ struct genevehdr *geneveh;
+ struct geneve_sock *gs;
+ u8 zero_vni[3] = { 0 };
+ u8 *vni = zero_vni;
+
+ if (skb->len < GENEVE_BASE_HLEN)
+ return -EINVAL;
+
+ geneveh = geneve_hdr(skb);
+ if (geneveh->ver != GENEVE_VER)
+ return -EINVAL;
+
+ if (geneveh->proto_type != htons(ETH_P_TEB))
+ return -EINVAL;
+
+ gs = rcu_dereference_sk_user_data(sk);
+ if (!gs)
+ return -ENOENT;
+
+ if (geneve_get_sk_family(gs) == AF_INET) {
+ struct iphdr *iph = ip_hdr(skb);
+ __be32 addr4 = 0;
+
+ if (!gs->collect_md) {
+ vni = geneve_hdr(skb)->vni;
+ addr4 = iph->daddr;
+ }
+
+ return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
+ }
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (geneve_get_sk_family(gs) == AF_INET6) {
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct in6_addr addr6 = { 0 };
+
+ if (!gs->collect_md) {
+ vni = geneve_hdr(skb)->vni;
+ addr6 = ip6h->daddr;
+ }
+
+ return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
+ }
+#endif
+
+ return -EPFNOSUPPORT;
+}
+
static struct socket *geneve_create_sock(struct net *net, bool ipv6,
__be16 port, bool ipv6_rx_csum)
{
@@ -544,6 +596,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
tunnel_cfg.gro_receive = geneve_gro_receive;
tunnel_cfg.gro_complete = geneve_gro_complete;
tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
+ tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
tunnel_cfg.encap_destroy = NULL;
setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
list_add(&gs->list, &gn->sock_list);
@@ -823,8 +876,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct rtable *rt;
struct flowi4 fl4;
__u8 tos, ttl;
+ __be16 df = 0;
__be16 sport;
- __be16 df;
int err;
rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
@@ -838,6 +891,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (geneve->collect_md) {
tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
ttl = key->ttl;
+
+ df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
} else {
tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
if (geneve->ttl_inherit)
@@ -845,8 +900,22 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
else
ttl = key->ttl;
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
+
+ if (geneve->df == GENEVE_DF_SET) {
+ df = htons(IP_DF);
+ } else if (geneve->df == GENEVE_DF_INHERIT) {
+ struct ethhdr *eth = eth_hdr(skb);
+
+ if (ntohs(eth->h_proto) == ETH_P_IPV6) {
+ df = htons(IP_DF);
+ } else if (ntohs(eth->h_proto) == ETH_P_IP) {
+ struct iphdr *iph = ip_hdr(skb);
+
+ if (iph->frag_off & htons(IP_DF))
+ df = htons(IP_DF);
+ }
+ }
}
- df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
if (unlikely(err))
@@ -1093,6 +1162,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
[IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 },
+ [IFLA_GENEVE_DF] = { .type = NLA_U8 },
};
static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1128,6 +1198,16 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
}
}
+ if (data[IFLA_GENEVE_DF]) {
+ enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);
+
+ if (df < 0 || df > GENEVE_DF_MAX) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_GENEVE_DF],
+ "Invalid DF attribute");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -1173,7 +1253,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
struct netlink_ext_ack *extack,
const struct ip_tunnel_info *info,
bool metadata, bool ipv6_rx_csum,
- bool ttl_inherit)
+ bool ttl_inherit, enum ifla_geneve_df df)
{
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *t, *geneve = netdev_priv(dev);
@@ -1223,6 +1303,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
geneve->collect_md = metadata;
geneve->use_udp6_rx_checksums = ipv6_rx_csum;
geneve->ttl_inherit = ttl_inherit;
+ geneve->df = df;
err = register_netdevice(dev);
if (err)
@@ -1242,7 +1323,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack,
struct ip_tunnel_info *info, bool *metadata,
bool *use_udp6_rx_checksums, bool *ttl_inherit,
- bool changelink)
+ enum ifla_geneve_df *df, bool changelink)
{
int attrtype;
@@ -1330,6 +1411,9 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
if (data[IFLA_GENEVE_TOS])
info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
+ if (data[IFLA_GENEVE_DF])
+ *df = nla_get_u8(data[IFLA_GENEVE_DF]);
+
if (data[IFLA_GENEVE_LABEL]) {
info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
IPV6_FLOWLABEL_MASK;
@@ -1448,6 +1532,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
+ enum ifla_geneve_df df = GENEVE_DF_UNSET;
bool use_udp6_rx_checksums = false;
struct ip_tunnel_info info;
bool ttl_inherit = false;
@@ -1456,12 +1541,12 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
init_tnl_info(&info, GENEVE_UDP_PORT);
err = geneve_nl2info(tb, data, extack, &info, &metadata,
- &use_udp6_rx_checksums, &ttl_inherit, false);
+ &use_udp6_rx_checksums, &ttl_inherit, &df, false);
if (err)
return err;
err = geneve_configure(net, dev, extack, &info, metadata,
- use_udp6_rx_checksums, ttl_inherit);
+ use_udp6_rx_checksums, ttl_inherit, df);
if (err)
return err;
@@ -1524,6 +1609,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_info info;
bool metadata;
bool use_udp6_rx_checksums;
+ enum ifla_geneve_df df;
bool ttl_inherit;
int err;
@@ -1539,7 +1625,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
use_udp6_rx_checksums = geneve->use_udp6_rx_checksums;
ttl_inherit = geneve->ttl_inherit;
err = geneve_nl2info(tb, data, extack, &info, &metadata,
- &use_udp6_rx_checksums, &ttl_inherit, true);
+ &use_udp6_rx_checksums, &ttl_inherit, &df, true);
if (err)
return err;
@@ -1572,6 +1658,7 @@ static size_t geneve_get_size(const struct net_device *dev)
nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
+ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */
nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
@@ -1620,6 +1707,9 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->df))
+ goto nla_put_failure;
+
if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
goto nla_put_failure;
@@ -1671,7 +1761,8 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
return dev;
init_tnl_info(&info, dst_port);
- err = geneve_configure(net, dev, NULL, &info, true, true, false);
+ err = geneve_configure(net, dev, NULL, &info,
+ true, true, false, GENEVE_DF_UNSET);
if (err) {
free_netdev(dev);
return ERR_PTR(err);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ae969f806d56..c3e65e78f015 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1552,6 +1552,34 @@ drop:
return 0;
}
+/* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */
+static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
+{
+ struct vxlan_dev *vxlan;
+ struct vxlan_sock *vs;
+ struct vxlanhdr *hdr;
+ __be32 vni;
+
+ if (skb->len < VXLAN_HLEN)
+ return -EINVAL;
+
+ hdr = vxlan_hdr(skb);
+
+ if (!(hdr->vx_flags & VXLAN_HF_VNI))
+ return -EINVAL;
+
+ vs = rcu_dereference_sk_user_data(sk);
+ if (!vs)
+ return -ENOENT;
+
+ vni = vxlan_vni(hdr->vx_vni);
+ vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
+ if (!vxlan)
+ return -ENOENT;
+
+ return 0;
+}
+
static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -2250,13 +2278,24 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- /* Bypass encapsulation if the destination is local */
if (!info) {
+ /* Bypass encapsulation if the destination is local */
err = encap_bypass_if_local(skb, dev, vxlan, dst,
dst_port, ifindex, vni,
&rt->dst, rt->rt_flags);
if (err)
goto out_unlock;
+
+ if (vxlan->cfg.df == VXLAN_DF_SET) {
+ df = htons(IP_DF);
+ } else if (vxlan->cfg.df == VXLAN_DF_INHERIT) {
+ struct ethhdr *eth = eth_hdr(skb);
+
+ if (ntohs(eth->h_proto) == ETH_P_IPV6 ||
+ (ntohs(eth->h_proto) == ETH_P_IP &&
+ old_iph->frag_off & htons(IP_DF)))
+ df = htons(IP_DF);
+ }
} else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
df = htons(IP_DF);
}
@@ -2809,6 +2848,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
[IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
+ [IFLA_VXLAN_DF] = { .type = NLA_U8 },
};
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -2865,6 +2905,16 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
}
}
+ if (data[IFLA_VXLAN_DF]) {
+ enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]);
+
+ if (df < 0 || df > VXLAN_DF_MAX) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_DF],
+ "Invalid DF attribute");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -2948,6 +2998,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
tunnel_cfg.sk_user_data = vs;
tunnel_cfg.encap_type = 1;
tunnel_cfg.encap_rcv = vxlan_rcv;
+ tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
tunnel_cfg.encap_destroy = NULL;
tunnel_cfg.gro_receive = vxlan_gro_receive;
tunnel_cfg.gro_complete = vxlan_gro_complete;
@@ -3509,6 +3560,9 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
conf->mtu = nla_get_u32(tb[IFLA_MTU]);
}
+ if (data[IFLA_VXLAN_DF])
+ conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
+
return 0;
}
@@ -3601,6 +3655,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL_INHERIT */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
+ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
@@ -3667,6 +3722,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
!!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
+ nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
!!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||