diff options
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r-- | drivers/net/vxlan.c | 216 |
1 files changed, 84 insertions, 132 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 8ff30c3bdfce..6e65832051d6 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -11,32 +11,18 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> -#include <linux/types.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/slab.h> -#include <linux/skbuff.h> -#include <linux/rculist.h> -#include <linux/netdevice.h> -#include <linux/in.h> -#include <linux/ip.h> #include <linux/udp.h> #include <linux/igmp.h> -#include <linux/etherdevice.h> #include <linux/if_ether.h> -#include <linux/if_vlan.h> -#include <linux/hash.h> #include <linux/ethtool.h> #include <net/arp.h> #include <net/ndisc.h> #include <net/ip.h> -#include <net/ip_tunnels.h> #include <net/icmp.h> -#include <net/udp.h> -#include <net/udp_tunnel.h> #include <net/rtnetlink.h> -#include <net/route.h> -#include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -44,12 +30,9 @@ #include <net/protocol.h> #if IS_ENABLED(CONFIG_IPV6) -#include <net/ipv6.h> -#include <net/addrconf.h> #include <net/ip6_tunnel.h> #include <net/ip6_checksum.h> #endif -#include <net/dst_metadata.h> #define VXLAN_VERSION "0.1" @@ -619,42 +602,6 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr)); } -/* Notify netdevs that UDP port started listening */ -static void vxlan_notify_add_rx_port(struct vxlan_sock *vs) -{ - struct net_device *dev; - struct sock *sk = vs->sock->sk; - struct net *net = sock_net(sk); - sa_family_t sa_family = vxlan_get_sk_family(vs); - __be16 port = inet_sk(sk)->inet_sport; - - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (dev->netdev_ops->ndo_add_vxlan_port) - dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family, - port); - } - rcu_read_unlock(); -} - -/* Notify netdevs that UDP port is no more listening */ -static void vxlan_notify_del_rx_port(struct vxlan_sock *vs) -{ - struct net_device *dev; - struct sock *sk = vs->sock->sk; - struct net *net = sock_net(sk); - sa_family_t sa_family = vxlan_get_sk_family(vs); - __be16 port = inet_sk(sk)->inet_sport; - - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (dev->netdev_ops->ndo_del_vxlan_port) - dev->netdev_ops->ndo_del_vxlan_port(dev, sa_family, - port); - } - rcu_read_unlock(); -} - /* Add new entry to forwarding table -- assumes lock held */ static int vxlan_fdb_create(struct vxlan_dev *vxlan, const u8 *mac, union vxlan_addr *ip, @@ -1050,7 +997,10 @@ static bool __vxlan_sock_release_prep(struct vxlan_sock *vs) vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id); spin_lock(&vn->sock_lock); hlist_del_rcu(&vs->hlist); - vxlan_notify_del_rx_port(vs); + udp_tunnel_notify_del_rx_port(vs->sock, + (vs->flags & VXLAN_F_GPE) ? + UDP_TUNNEL_TYPE_VXLAN_GPE : + UDP_TUNNEL_TYPE_VXLAN); spin_unlock(&vn->sock_lock); return true; @@ -1861,7 +1811,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, fl4.flowi4_mark = skb->mark; fl4.flowi4_proto = IPPROTO_UDP; fl4.daddr = daddr; - fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr; + fl4.saddr = *saddr; rt = ip_route_output_key(vxlan->net, &fl4); if (!IS_ERR(rt)) { @@ -1897,7 +1847,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.daddr = *daddr; - fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr; + fl6.saddr = *saddr; fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; @@ -1970,7 +1920,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct rtable *rt = NULL; const struct iphdr *old_iph; union vxlan_addr *dst; - union vxlan_addr remote_ip; + union vxlan_addr remote_ip, local_ip; + union vxlan_addr *src; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; __be16 src_port = 0, dst_port; @@ -1988,6 +1939,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = rdst->remote_vni; dst = &rdst->remote_ip; + src = &vxlan->cfg.saddr; dst_cache = &rdst->dst_cache; } else { if (!info) { @@ -1998,11 +1950,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; vni = vxlan_tun_id_to_vni(info->key.tun_id); remote_ip.sa.sa_family = ip_tunnel_info_af(info); - if (remote_ip.sa.sa_family == AF_INET) + if (remote_ip.sa.sa_family == AF_INET) { remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; - else + local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src; + } else { remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst; + local_ip.sin6.sin6_addr = info->key.u.ipv6.src; + } dst = &remote_ip; + src = &local_ip; dst_cache = &info->dst_cache; } @@ -2042,15 +1998,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } if (dst->sa.sa_family == AF_INET) { - __be32 saddr; - if (!vxlan->vn4_sock) goto drop; sk = vxlan->vn4_sock->sock->sk; rt = vxlan_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, - dst->sin.sin_addr.s_addr, &saddr, + dst->sin.sin_addr.s_addr, + &src->sin.sin_addr.s_addr, dst_cache, info); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to %pI4\n", @@ -2067,7 +2022,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } /* Bypass encapsulation if the destination is local */ - if (rt->rt_flags & RTCF_LOCAL && + if (!info && rt->rt_flags & RTCF_LOCAL && !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { struct vxlan_dev *dst_vxlan; @@ -2093,13 +2048,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (err < 0) goto xmit_tx_error; - udp_tunnel_xmit_skb(rt, sk, skb, saddr, + udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr, dst->sin.sin_addr.s_addr, tos, ttl, df, src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; - struct in6_addr saddr; u32 rt6i_flags; if (!vxlan->vn6_sock) @@ -2108,7 +2062,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ndst = vxlan6_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, - label, &dst->sin6.sin6_addr, &saddr, + label, &dst->sin6.sin6_addr, + &src->sin6.sin6_addr, dst_cache, info); if (IS_ERR(ndst)) { netdev_dbg(dev, "no route to %pI6\n", @@ -2127,7 +2082,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, /* Bypass encapsulation if the destination is local */ rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags; - if (rt6i_flags & RTF_LOCAL && + if (!info && rt6i_flags & RTF_LOCAL && !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { struct vxlan_dev *dst_vxlan; @@ -2154,7 +2109,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, return; } udp_tunnel6_xmit_skb(ndst, sk, skb, dev, - &saddr, &dst->sin6.sin6_addr, tos, ttl, + &src->sin6.sin6_addr, + &dst->sin6.sin6_addr, tos, ttl, label, src_port, dst_port, !udp_sum); #endif } @@ -2525,30 +2481,24 @@ static struct device_type vxlan_type = { .name = "vxlan", }; -/* Calls the ndo_add_vxlan_port of the caller in order to +/* Calls the ndo_udp_tunnel_add of the caller in order to * supply the listening VXLAN udp ports. Callers are expected - * to implement the ndo_add_vxlan_port. + * to implement the ndo_udp_tunnel_add. */ static void vxlan_push_rx_ports(struct net_device *dev) { struct vxlan_sock *vs; struct net *net = dev_net(dev); struct vxlan_net *vn = net_generic(net, vxlan_net_id); - sa_family_t sa_family; - __be16 port; unsigned int i; - if (!dev->netdev_ops->ndo_add_vxlan_port) - return; - spin_lock(&vn->sock_lock); for (i = 0; i < PORT_HASH_SIZE; ++i) { - hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) { - port = inet_sk(vs->sock->sk)->inet_sport; - sa_family = vxlan_get_sk_family(vs); - dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family, - port); - } + hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) + udp_tunnel_push_rx_port(dev, vs->sock, + (vs->flags & VXLAN_F_GPE) ? + UDP_TUNNEL_TYPE_VXLAN_GPE : + UDP_TUNNEL_TYPE_VXLAN); } spin_unlock(&vn->sock_lock); } @@ -2750,7 +2700,10 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6, spin_lock(&vn->sock_lock); hlist_add_head_rcu(&vs->hlist, vs_head(net, port)); - vxlan_notify_add_rx_port(vs); + udp_tunnel_notify_add_rx_port(sock, + (vs->flags & VXLAN_F_GPE) ? + UDP_TUNNEL_TYPE_VXLAN_GPE : + UDP_TUNNEL_TYPE_VXLAN); spin_unlock(&vn->sock_lock); /* Mark socket as an encapsulation socket. */ @@ -2829,14 +2782,15 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, struct net_device *lowerdev = NULL; if (conf->flags & VXLAN_F_GPE) { - if (conf->flags & ~VXLAN_F_ALLOWED_GPE) - return -EINVAL; /* For now, allow GPE only together with COLLECT_METADATA. * This can be relaxed later; in such case, the other side * of the PtP link will have to be provided. */ - if (!(conf->flags & VXLAN_F_COLLECT_METADATA)) + if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) || + !(conf->flags & VXLAN_F_COLLECT_METADATA)) { + pr_info("unsupported combination of extensions\n"); return -EINVAL; + } vxlan_raw_setup(dev); } else { @@ -2889,6 +2843,9 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); needed_headroom = lowerdev->hard_header_len; + } else if (vxlan_addr_multicast(&dst->remote_ip)) { + pr_info("multicast destination requires interface to be specified\n"); + return -EINVAL; } if (conf->mtu) { @@ -2921,8 +2878,10 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, tmp->cfg.saddr.sa.sa_family == AF_INET6) == use_ipv6 && tmp->cfg.dst_port == vxlan->cfg.dst_port && (tmp->flags & VXLAN_F_RCV_FLAGS) == - (vxlan->flags & VXLAN_F_RCV_FLAGS)) - return -EEXIST; + (vxlan->flags & VXLAN_F_RCV_FLAGS)) { + pr_info("duplicate VNI %u\n", be32_to_cpu(conf->vni)); + return -EEXIST; + } } dev->ethtool_ops = &vxlan_ethtool_ops; @@ -2952,35 +2911,10 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, return 0; } -struct net_device *vxlan_dev_create(struct net *net, const char *name, - u8 name_assign_type, struct vxlan_config *conf) -{ - struct nlattr *tb[IFLA_MAX+1]; - struct net_device *dev; - int err; - - memset(&tb, 0, sizeof(tb)); - - dev = rtnl_create_link(net, name, name_assign_type, - &vxlan_link_ops, tb); - if (IS_ERR(dev)) - return dev; - - err = vxlan_dev_configure(net, dev, conf); - if (err < 0) { - free_netdev(dev); - return ERR_PTR(err); - } - - return dev; -} -EXPORT_SYMBOL_GPL(vxlan_dev_create); - static int vxlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct vxlan_config conf; - int err; memset(&conf, 0, sizeof(conf)); @@ -3086,26 +3020,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL; - err = vxlan_dev_configure(src_net, dev, &conf); - switch (err) { - case -ENODEV: - pr_info("ifindex %d does not exist\n", conf.remote_ifindex); - break; - - case -EPERM: - pr_info("IPv6 is disabled via sysctl\n"); - break; - - case -EEXIST: - pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni)); - break; + if (tb[IFLA_MTU]) + conf.mtu = nla_get_u32(tb[IFLA_MTU]); - case -EINVAL: - pr_info("unsupported combination of extensions\n"); - break; - } - - return err; + return vxlan_dev_configure(src_net, dev, &conf); } static void vxlan_dellink(struct net_device *dev, struct list_head *head) @@ -3265,6 +3183,40 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = { .get_link_net = vxlan_get_link_net, }; +struct net_device *vxlan_dev_create(struct net *net, const char *name, + u8 name_assign_type, + struct vxlan_config *conf) +{ + struct nlattr *tb[IFLA_MAX + 1]; + struct net_device *dev; + int err; + + memset(&tb, 0, sizeof(tb)); + + dev = rtnl_create_link(net, name, name_assign_type, + &vxlan_link_ops, tb); + if (IS_ERR(dev)) + return dev; + + err = vxlan_dev_configure(net, dev, conf); + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } + + err = rtnl_configure_link(dev, NULL); + if (err < 0) { + LIST_HEAD(list_kill); + + vxlan_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + return ERR_PTR(err); + } + + return dev; +} +EXPORT_SYMBOL_GPL(vxlan_dev_create); + static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn, struct net_device *dev) { @@ -3295,7 +3247,7 @@ static int vxlan_netdevice_event(struct notifier_block *unused, if (event == NETDEV_UNREGISTER) vxlan_handle_lowerdev_unregister(vn, dev); - else if (event == NETDEV_OFFLOAD_PUSH_VXLAN) + else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) vxlan_push_rx_ports(dev); return NOTIFY_DONE; |