From ad1242d8a063ceb8c6e1b9c1a63b73ec94fa0295 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Mon, 3 Sep 2018 18:05:27 +0200 Subject: selftests/bpf: add missing executables to .gitignore Signed-off-by: Mauricio Vasquez B Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/.gitignore | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/testing/selftests/bpf') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 49938d72cf63..4d789c1e5167 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -19,3 +19,7 @@ test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user +test_skb_cgroup_id_user +test_socket_cookie +test_cgroup_storage +test_select_reuseport -- cgit v1.2.3-55-g7522 From f5bd3948eb07e76fcd73d0b8ab7b3265be226038 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Mon, 3 Sep 2018 19:01:59 +0200 Subject: selftests/bpf/test_progs: do not check errno == 0 The errno man page states: "The value in errno is significant only when the return value of the call indicated an error..." then it is not correct to check it, it could be different than zero even if the function succeeded. It causes some false positives if errno is set by a previous function. Signed-off-by: Mauricio Vasquez B Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_progs.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'tools/testing/selftests/bpf') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 0ef68204c84b..63a671803ed6 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -112,13 +112,13 @@ static void test_pkt_access(void) err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4), NULL, NULL, &retval, &duration); - CHECK(err || errno || retval, "ipv4", + CHECK(err || retval, "ipv4", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6), NULL, NULL, &retval, &duration); - CHECK(err || errno || retval, "ipv6", + CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); bpf_object__close(obj); @@ -153,14 +153,14 @@ static void test_xdp(void) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 74 || + CHECK(err || retval != XDP_TX || size != 74 || iph->protocol != IPPROTO_IPIP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 114 || + CHECK(err || retval != XDP_TX || size != 114 || iph6->nexthdr != IPPROTO_IPV6, "ipv6", "err %d errno %d retval %d size %d\n", err, errno, retval, size); @@ -185,13 +185,13 @@ static void test_xdp_adjust_tail(void) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_DROP, + CHECK(err || retval != XDP_DROP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 54, + CHECK(err || retval != XDP_TX || size != 54, "ipv6", "err %d errno %d retval %d size %d\n", err, errno, retval, size); bpf_object__close(obj); @@ -254,14 +254,14 @@ static void test_l4lb(const char *file) err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 || + CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 || *magic != MAGIC_VAL, "ipv4", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 || + CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 || *magic != MAGIC_VAL, "ipv6", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); @@ -343,14 +343,14 @@ static void test_xdp_noinline(void) err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 1 || size != 54 || + CHECK(err || retval != 1 || size != 54 || *magic != MAGIC_VAL, "ipv4", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 1 || size != 74 || + CHECK(err || retval != 1 || size != 74 || *magic != MAGIC_VAL, "ipv6", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); -- cgit v1.2.3-55-g7522 From 9c98b13cc3bb5d90ee2ec047d591272b382468fd Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Fri, 14 Sep 2018 07:46:21 -0700 Subject: flow_dissector: implements eBPF parser This eBPF program extracts basic/control/ip address/ports keys from incoming packets. It supports recursive parsing for IP encapsulation, and VLAN, along with IPv4/IPv6 and extension headers. This program is meant to show how flow dissection and key extraction can be done in eBPF. Link: http://vger.kernel.org/netconf2017_files/rx_hardening_and_udp_gso.pdf Signed-off-by: Petar Penkov Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/bpf_flow.c | 373 +++++++++++++++++++++++++++++++++ 2 files changed, 374 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/bpf_flow.c (limited to 'tools/testing/selftests/bpf') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fff7fb1285fc..e65f50f9185e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -35,7 +35,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ - test_skb_cgroup_id_kern.o + test_skb_cgroup_id_kern.o bpf_flow.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c new file mode 100644 index 000000000000..5fb809d95867 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_flow.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; +#define PROG(F) SEC(#F) int bpf_func_##F + +/* These are the identifiers of the BPF programs that will be used in tail + * calls. Name is limited to 16 characters, with the terminating character and + * bpf_func_ above, we have only 6 to work with, anything after will be cropped. + */ +enum { + IP, + IPV6, + IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */ + IPV6FR, /* Fragmentation IPv6 Extension Header */ + MPLS, + VLAN, +}; + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF +#define IP6_MF 0x0001 +#define IP6_OFFSET 0xFFF8 + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct gre_hdr { + __be16 flags; + __be16 proto; +}; + +struct frag_hdr { + __u8 nexthdr; + __u8 reserved; + __be16 frag_off; + __be32 identification; +}; + +struct bpf_map_def SEC("maps") jmp_table = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 8 +}; + +static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, + __u16 hdr_size, + void *buffer) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + __u16 nhoff = skb->flow_keys->nhoff; + __u8 *hdr; + + /* Verifies this variable offset does not overflow */ + if (nhoff > (USHRT_MAX - hdr_size)) + return NULL; + + hdr = data + nhoff; + if (hdr + hdr_size <= data_end) + return hdr; + + if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size)) + return NULL; + + return buffer; +} + +/* Dispatches on ETHERTYPE */ +static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + + keys->n_proto = proto; + switch (proto) { + case bpf_htons(ETH_P_IP): + bpf_tail_call(skb, &jmp_table, IP); + break; + case bpf_htons(ETH_P_IPV6): + bpf_tail_call(skb, &jmp_table, IPV6); + break; + case bpf_htons(ETH_P_MPLS_MC): + case bpf_htons(ETH_P_MPLS_UC): + bpf_tail_call(skb, &jmp_table, MPLS); + break; + case bpf_htons(ETH_P_8021Q): + case bpf_htons(ETH_P_8021AD): + bpf_tail_call(skb, &jmp_table, VLAN); + break; + default: + /* Protocol not supported */ + return BPF_DROP; + } + + return BPF_DROP; +} + +SEC("dissect") +int dissect(struct __sk_buff *skb) +{ + if (!skb->vlan_present) + return parse_eth_proto(skb, skb->protocol); + else + return parse_eth_proto(skb, skb->vlan_proto); +} + +/* Parses on IPPROTO_* */ +static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + void *data_end = (void *)(long)skb->data_end; + struct icmphdr *icmp, _icmp; + struct gre_hdr *gre, _gre; + struct ethhdr *eth, _eth; + struct tcphdr *tcp, _tcp; + struct udphdr *udp, _udp; + + keys->ip_proto = proto; + switch (proto) { + case IPPROTO_ICMP: + icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); + if (!icmp) + return BPF_DROP; + return BPF_OK; + case IPPROTO_IPIP: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); + case IPPROTO_IPV6: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); + case IPPROTO_GRE: + gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); + if (!gre) + return BPF_DROP; + + if (bpf_htons(gre->flags & GRE_VERSION)) + /* Only inspect standard GRE packets with version 0 */ + return BPF_OK; + + keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */ + if (GRE_IS_CSUM(gre->flags)) + keys->nhoff += 4; /* Step over chksum and Padding */ + if (GRE_IS_KEY(gre->flags)) + keys->nhoff += 4; /* Step over key */ + if (GRE_IS_SEQ(gre->flags)) + keys->nhoff += 4; /* Step over sequence number */ + + keys->is_encap = true; + + if (gre->proto == bpf_htons(ETH_P_TEB)) { + eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), + &_eth); + if (!eth) + return BPF_DROP; + + keys->nhoff += sizeof(*eth); + + return parse_eth_proto(skb, eth->h_proto); + } else { + return parse_eth_proto(skb, gre->proto); + } + case IPPROTO_TCP: + tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); + if (!tcp) + return BPF_DROP; + + if (tcp->doff < 5) + return BPF_DROP; + + if ((__u8 *)tcp + (tcp->doff << 2) > data_end) + return BPF_DROP; + + keys->thoff = keys->nhoff; + keys->sport = tcp->source; + keys->dport = tcp->dest; + return BPF_OK; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); + if (!udp) + return BPF_DROP; + + keys->thoff = keys->nhoff; + keys->sport = udp->source; + keys->dport = udp->dest; + return BPF_OK; + default: + return BPF_DROP; + } + + return BPF_DROP; +} + +static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + + keys->ip_proto = nexthdr; + switch (nexthdr) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + bpf_tail_call(skb, &jmp_table, IPV6OP); + break; + case IPPROTO_FRAGMENT: + bpf_tail_call(skb, &jmp_table, IPV6FR); + break; + default: + return parse_ip_proto(skb, nexthdr); + } + + return BPF_DROP; +} + +PROG(IP)(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + struct bpf_flow_keys *keys = skb->flow_keys; + void *data = (void *)(long)skb->data; + struct iphdr *iph, _iph; + bool done = false; + + iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); + if (!iph) + return BPF_DROP; + + /* IP header cannot be smaller than 20 bytes */ + if (iph->ihl < 5) + return BPF_DROP; + + keys->addr_proto = ETH_P_IP; + keys->ipv4_src = iph->saddr; + keys->ipv4_dst = iph->daddr; + + keys->nhoff += iph->ihl << 2; + if (data + keys->nhoff > data_end) + return BPF_DROP; + + if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { + keys->is_frag = true; + if (iph->frag_off & bpf_htons(IP_OFFSET)) + /* From second fragment on, packets do not have headers + * we can parse. + */ + done = true; + else + keys->is_first_frag = true; + } + + if (done) + return BPF_OK; + + return parse_ip_proto(skb, iph->protocol); +} + +PROG(IPV6)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct ipv6hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + keys->addr_proto = ETH_P_IPV6; + memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); + + keys->nhoff += sizeof(struct ipv6hdr); + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6OP)(struct __sk_buff *skb) +{ + struct ipv6_opt_hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + /* hlen is in 8-octets and does not include the first 8 bytes + * of the header + */ + skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3; + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6FR)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct frag_hdr *fragh, _fragh; + + fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); + if (!fragh) + return BPF_DROP; + + keys->nhoff += sizeof(*fragh); + keys->is_frag = true; + if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) + keys->is_first_frag = true; + + return parse_ipv6_proto(skb, fragh->nexthdr); +} + +PROG(MPLS)(struct __sk_buff *skb) +{ + struct mpls_label *mpls, _mpls; + + mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); + if (!mpls) + return BPF_DROP; + + return BPF_OK; +} + +PROG(VLAN)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct vlan_hdr *vlan, _vlan; + __be16 proto; + + /* Peek back to see if single or double-tagging */ + if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto, + sizeof(proto))) + return BPF_DROP; + + /* Account for double-tagging */ + if (proto == bpf_htons(ETH_P_8021AD)) { + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + keys->nhoff += sizeof(*vlan); + } + + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + keys->nhoff += sizeof(*vlan); + /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ + if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || + vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); +} + +char __license[] SEC("license") = "GPL"; -- cgit v1.2.3-55-g7522 From 50b3ed57dee9cd0e06c59826cec8af14b51bab3e Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Fri, 14 Sep 2018 07:46:22 -0700 Subject: selftests/bpf: test bpf flow dissection Adds a test that sends different types of packets over multiple tunnels and verifies that valid packets are dissected correctly. To do so, a tc-flower rule is added to drop packets on UDP src port 9, and packets are sent from ports 8, 9, and 10. Only the packets on port 9 should be dropped. Because tc-flower relies on the flow dissector to match flows, correct classification demonstrates correct dissection. Also add support logic to load the BPF program and to inject the test packets. Signed-off-by: Petar Penkov Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/.gitignore | 2 + tools/testing/selftests/bpf/Makefile | 6 +- tools/testing/selftests/bpf/config | 1 + tools/testing/selftests/bpf/flow_dissector_load.c | 140 ++++ tools/testing/selftests/bpf/test_flow_dissector.c | 782 +++++++++++++++++++++ tools/testing/selftests/bpf/test_flow_dissector.sh | 115 +++ tools/testing/selftests/bpf/with_addr.sh | 54 ++ tools/testing/selftests/bpf/with_tunnels.sh | 36 + 8 files changed, 1134 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/flow_dissector_load.c create mode 100644 tools/testing/selftests/bpf/test_flow_dissector.c create mode 100755 tools/testing/selftests/bpf/test_flow_dissector.sh create mode 100755 tools/testing/selftests/bpf/with_addr.sh create mode 100755 tools/testing/selftests/bpf/with_tunnels.sh (limited to 'tools/testing/selftests/bpf') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 4d789c1e5167..8a60c9b9892d 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -23,3 +23,5 @@ test_skb_cgroup_id_user test_socket_cookie test_cgroup_storage test_select_reuseport +test_flow_dissector +flow_dissector_load diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e65f50f9185e..fd3851d5c079 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -47,10 +47,12 @@ TEST_PROGS := test_kmod.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ - test_skb_cgroup_id.sh + test_skb_cgroup_id.sh \ + test_flow_dissector.sh # Compile but not part of 'make run_tests' -TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user +TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \ + flow_dissector_load test_flow_dissector include ../lib.mk diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index b4994a94968b..3655508f95fd 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -18,3 +18,4 @@ CONFIG_CRYPTO_HMAC=m CONFIG_CRYPTO_SHA256=m CONFIG_VXLAN=y CONFIG_GENEVE=y +CONFIG_NET_CLS_FLOWER=m diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c new file mode 100644 index 000000000000..d3273b5b3173 --- /dev/null +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; +const char *cfg_map_name = "jmp_table"; +bool cfg_attach = true; +char *cfg_section_name; +char *cfg_path_name; + +static void load_and_attach_program(void) +{ + struct bpf_program *prog, *main_prog; + struct bpf_map *prog_array; + int i, fd, prog_fd, ret; + struct bpf_object *obj; + int prog_array_fd; + + ret = bpf_prog_load(cfg_path_name, BPF_PROG_TYPE_FLOW_DISSECTOR, &obj, + &prog_fd); + if (ret) + error(1, 0, "bpf_prog_load %s", cfg_path_name); + + main_prog = bpf_object__find_program_by_title(obj, cfg_section_name); + if (!main_prog) + error(1, 0, "bpf_object__find_program_by_title %s", + cfg_section_name); + + prog_fd = bpf_program__fd(main_prog); + if (prog_fd < 0) + error(1, 0, "bpf_program__fd"); + + prog_array = bpf_object__find_map_by_name(obj, cfg_map_name); + if (!prog_array) + error(1, 0, "bpf_object__find_map_by_name %s", cfg_map_name); + + prog_array_fd = bpf_map__fd(prog_array); + if (prog_array_fd < 0) + error(1, 0, "bpf_map__fd %s", cfg_map_name); + + i = 0; + bpf_object__for_each_program(prog, obj) { + fd = bpf_program__fd(prog); + if (fd < 0) + error(1, 0, "bpf_program__fd"); + + if (fd != prog_fd) { + printf("%d: %s\n", i, bpf_program__title(prog, false)); + bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY); + ++i; + } + } + + ret = bpf_prog_attach(prog_fd, 0 /* Ignore */, BPF_FLOW_DISSECTOR, 0); + if (ret) + error(1, 0, "bpf_prog_attach %s", cfg_path_name); + + ret = bpf_object__pin(obj, cfg_pin_path); + if (ret) + error(1, 0, "bpf_object__pin %s", cfg_pin_path); + +} + +static void detach_program(void) +{ + char command[64]; + int ret; + + ret = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); + if (ret) + error(1, 0, "bpf_prog_detach"); + + /* To unpin, it is necessary and sufficient to just remove this dir */ + sprintf(command, "rm -r %s", cfg_pin_path); + ret = system(command); + if (ret) + error(1, errno, command); +} + +static void parse_opts(int argc, char **argv) +{ + bool attach = false; + bool detach = false; + int c; + + while ((c = getopt(argc, argv, "adp:s:")) != -1) { + switch (c) { + case 'a': + if (detach) + error(1, 0, "attach/detach are exclusive"); + attach = true; + break; + case 'd': + if (attach) + error(1, 0, "attach/detach are exclusive"); + detach = true; + break; + case 'p': + if (cfg_path_name) + error(1, 0, "only one prog name can be given"); + + cfg_path_name = optarg; + break; + case 's': + if (cfg_section_name) + error(1, 0, "only one section can be given"); + + cfg_section_name = optarg; + break; + } + } + + if (detach) + cfg_attach = false; + + if (cfg_attach && !cfg_path_name) + error(1, 0, "must provide a path to the BPF program"); + + if (cfg_attach && !cfg_section_name) + error(1, 0, "must provide a section name"); +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + if (cfg_attach) + load_and_attach_program(); + else + detach_program(); + return 0; +} diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c new file mode 100644 index 000000000000..12b784afba31 --- /dev/null +++ b/tools/testing/selftests/bpf/test_flow_dissector.c @@ -0,0 +1,782 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Inject packets with all sorts of encapsulation into the kernel. + * + * IPv4/IPv6 outer layer 3 + * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/.. + * IPv4/IPv6 inner layer 3 + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CFG_PORT_INNER 8000 + +/* Add some protocol definitions that do not exist in userspace */ + +struct grehdr { + uint16_t unused; + uint16_t protocol; +} __attribute__((packed)); + +struct guehdr { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 hlen:5, + control:1, + version:2; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u8 version:2, + control:1, + hlen:5; +#else +#error "Please fix " +#endif + __u8 proto_ctype; + __be16 flags; + }; + __be32 word; + }; +}; + +static uint8_t cfg_dsfield_inner; +static uint8_t cfg_dsfield_outer; +static uint8_t cfg_encap_proto; +static bool cfg_expect_failure = false; +static int cfg_l3_extra = AF_UNSPEC; /* optional SIT prefix */ +static int cfg_l3_inner = AF_UNSPEC; +static int cfg_l3_outer = AF_UNSPEC; +static int cfg_num_pkt = 10; +static int cfg_num_secs = 0; +static char cfg_payload_char = 'a'; +static int cfg_payload_len = 100; +static int cfg_port_gue = 6080; +static bool cfg_only_rx; +static bool cfg_only_tx; +static int cfg_src_port = 9; + +static char buf[ETH_DATA_LEN]; + +#define INIT_ADDR4(name, addr4, port) \ + static struct sockaddr_in name = { \ + .sin_family = AF_INET, \ + .sin_port = __constant_htons(port), \ + .sin_addr.s_addr = __constant_htonl(addr4), \ + }; + +#define INIT_ADDR6(name, addr6, port) \ + static struct sockaddr_in6 name = { \ + .sin6_family = AF_INET6, \ + .sin6_port = __constant_htons(port), \ + .sin6_addr = addr6, \ + }; + +INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER) +INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0) +INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0) +INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0) +INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0) +INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0) + +INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER) +INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0) + +static unsigned long util_gettime(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void util_printaddr(const char *msg, struct sockaddr *addr) +{ + unsigned long off = 0; + char nbuf[INET6_ADDRSTRLEN]; + + switch (addr->sa_family) { + case PF_INET: + off = __builtin_offsetof(struct sockaddr_in, sin_addr); + break; + case PF_INET6: + off = __builtin_offsetof(struct sockaddr_in6, sin6_addr); + break; + default: + error(1, 0, "printaddr: unsupported family %u\n", + addr->sa_family); + } + + if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf, + sizeof(nbuf))) + error(1, errno, "inet_ntop"); + + fprintf(stderr, "%s: %s\n", msg, nbuf); +} + +static unsigned long add_csum_hword(const uint16_t *start, int num_u16) +{ + unsigned long sum = 0; + int i; + + for (i = 0; i < num_u16; i++) + sum += start[i]; + + return sum; +} + +static uint16_t build_ip_csum(const uint16_t *start, int num_u16, + unsigned long sum) +{ + sum += add_csum_hword(start, num_u16); + + while (sum >> 16) + sum = (sum & 0xffff) + (sum >> 16); + + return ~sum; +} + +static void build_ipv4_header(void *header, uint8_t proto, + uint32_t src, uint32_t dst, + int payload_len, uint8_t tos) +{ + struct iphdr *iph = header; + + iph->ihl = 5; + iph->version = 4; + iph->tos = tos; + iph->ttl = 8; + iph->tot_len = htons(sizeof(*iph) + payload_len); + iph->id = htons(1337); + iph->protocol = proto; + iph->saddr = src; + iph->daddr = dst; + iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0); +} + +static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield) +{ + uint16_t val, *ptr = (uint16_t *)ip6h; + + val = ntohs(*ptr); + val &= 0xF00F; + val |= ((uint16_t) dsfield) << 4; + *ptr = htons(val); +} + +static void build_ipv6_header(void *header, uint8_t proto, + struct sockaddr_in6 *src, + struct sockaddr_in6 *dst, + int payload_len, uint8_t dsfield) +{ + struct ipv6hdr *ip6h = header; + + ip6h->version = 6; + ip6h->payload_len = htons(payload_len); + ip6h->nexthdr = proto; + ip6h->hop_limit = 8; + ipv6_set_dsfield(ip6h, dsfield); + + memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr)); + memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr)); +} + +static uint16_t build_udp_v4_csum(const struct iphdr *iph, + const struct udphdr *udph, + int num_words) +{ + unsigned long pseudo_sum; + int num_u16 = sizeof(iph->saddr); /* halfwords: twice byte len */ + + pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16); + pseudo_sum += htons(IPPROTO_UDP); + pseudo_sum += udph->len; + return build_ip_csum((void *) udph, num_words, pseudo_sum); +} + +static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h, + const struct udphdr *udph, + int num_words) +{ + unsigned long pseudo_sum; + int num_u16 = sizeof(ip6h->saddr); /* halfwords: twice byte len */ + + pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16); + pseudo_sum += htons(ip6h->nexthdr); + pseudo_sum += ip6h->payload_len; + return build_ip_csum((void *) udph, num_words, pseudo_sum); +} + +static void build_udp_header(void *header, int payload_len, + uint16_t dport, int family) +{ + struct udphdr *udph = header; + int len = sizeof(*udph) + payload_len; + + udph->source = htons(cfg_src_port); + udph->dest = htons(dport); + udph->len = htons(len); + udph->check = 0; + if (family == AF_INET) + udph->check = build_udp_v4_csum(header - sizeof(struct iphdr), + udph, len >> 1); + else + udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr), + udph, len >> 1); +} + +static void build_gue_header(void *header, uint8_t proto) +{ + struct guehdr *gueh = header; + + gueh->proto_ctype = proto; +} + +static void build_gre_header(void *header, uint16_t proto) +{ + struct grehdr *greh = header; + + greh->protocol = htons(proto); +} + +static int l3_length(int family) +{ + if (family == AF_INET) + return sizeof(struct iphdr); + else + return sizeof(struct ipv6hdr); +} + +static int build_packet(void) +{ + int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0; + int el3_len = 0; + + if (cfg_l3_extra) + el3_len = l3_length(cfg_l3_extra); + + /* calculate header offsets */ + if (cfg_encap_proto) { + ol3_len = l3_length(cfg_l3_outer); + + if (cfg_encap_proto == IPPROTO_GRE) + ol4_len = sizeof(struct grehdr); + else if (cfg_encap_proto == IPPROTO_UDP) + ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr); + } + + il3_len = l3_length(cfg_l3_inner); + il4_len = sizeof(struct udphdr); + + if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >= + sizeof(buf)) + error(1, 0, "packet too large\n"); + + /* + * Fill packet from inside out, to calculate correct checksums. + * But create ip before udp headers, as udp uses ip for pseudo-sum. + */ + memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len, + cfg_payload_char, cfg_payload_len); + + /* add zero byte for udp csum padding */ + buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0; + + switch (cfg_l3_inner) { + case PF_INET: + build_ipv4_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, + in_saddr4.sin_addr.s_addr, + in_daddr4.sin_addr.s_addr, + il4_len + cfg_payload_len, + cfg_dsfield_inner); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, + &in_saddr6, &in_daddr6, + il4_len + cfg_payload_len, + cfg_dsfield_inner); + break; + } + + build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len, + cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner); + + if (!cfg_encap_proto) + return il3_len + il4_len + cfg_payload_len; + + switch (cfg_l3_outer) { + case PF_INET: + build_ipv4_header(buf + el3_len, cfg_encap_proto, + out_saddr4.sin_addr.s_addr, + out_daddr4.sin_addr.s_addr, + ol4_len + il3_len + il4_len + cfg_payload_len, + cfg_dsfield_outer); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len, cfg_encap_proto, + &out_saddr6, &out_daddr6, + ol4_len + il3_len + il4_len + cfg_payload_len, + cfg_dsfield_outer); + break; + } + + switch (cfg_encap_proto) { + case IPPROTO_UDP: + build_gue_header(buf + el3_len + ol3_len + ol4_len - + sizeof(struct guehdr), + cfg_l3_inner == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6); + build_udp_header(buf + el3_len + ol3_len, + sizeof(struct guehdr) + il3_len + il4_len + + cfg_payload_len, + cfg_port_gue, cfg_l3_outer); + break; + case IPPROTO_GRE: + build_gre_header(buf + el3_len + ol3_len, + cfg_l3_inner == PF_INET ? ETH_P_IP + : ETH_P_IPV6); + break; + } + + switch (cfg_l3_extra) { + case PF_INET: + build_ipv4_header(buf, + cfg_l3_outer == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6, + extra_saddr4.sin_addr.s_addr, + extra_daddr4.sin_addr.s_addr, + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len, 0); + break; + case PF_INET6: + build_ipv6_header(buf, + cfg_l3_outer == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6, + &extra_saddr6, &extra_daddr6, + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len, 0); + break; + } + + return el3_len + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len; +} + +/* sender transmits encapsulated over RAW or unencap'd over UDP */ +static int setup_tx(void) +{ + int family, fd, ret; + + if (cfg_l3_extra) + family = cfg_l3_extra; + else if (cfg_l3_outer) + family = cfg_l3_outer; + else + family = cfg_l3_inner; + + fd = socket(family, SOCK_RAW, IPPROTO_RAW); + if (fd == -1) + error(1, errno, "socket tx"); + + if (cfg_l3_extra) { + if (cfg_l3_extra == PF_INET) + ret = connect(fd, (void *) &extra_daddr4, + sizeof(extra_daddr4)); + else + ret = connect(fd, (void *) &extra_daddr6, + sizeof(extra_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } else if (cfg_l3_outer) { + /* connect to destination if not encapsulated */ + if (cfg_l3_outer == PF_INET) + ret = connect(fd, (void *) &out_daddr4, + sizeof(out_daddr4)); + else + ret = connect(fd, (void *) &out_daddr6, + sizeof(out_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } else { + /* otherwise using loopback */ + if (cfg_l3_inner == PF_INET) + ret = connect(fd, (void *) &in_daddr4, + sizeof(in_daddr4)); + else + ret = connect(fd, (void *) &in_daddr6, + sizeof(in_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } + + return fd; +} + +/* receiver reads unencapsulated UDP */ +static int setup_rx(void) +{ + int fd, ret; + + fd = socket(cfg_l3_inner, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket rx"); + + if (cfg_l3_inner == PF_INET) + ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4)); + else + ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6)); + if (ret) + error(1, errno, "bind rx"); + + return fd; +} + +static int do_tx(int fd, const char *pkt, int len) +{ + int ret; + + ret = write(fd, pkt, len); + if (ret == -1) + error(1, errno, "send"); + if (ret != len) + error(1, errno, "send: len (%d < %d)\n", ret, len); + + return 1; +} + +static int do_poll(int fd, short events, int timeout) +{ + struct pollfd pfd; + int ret; + + pfd.fd = fd; + pfd.events = events; + + ret = poll(&pfd, 1, timeout); + if (ret == -1) + error(1, errno, "poll"); + if (ret && !(pfd.revents & POLLIN)) + error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents); + + return ret; +} + +static int do_rx(int fd) +{ + char rbuf; + int ret, num = 0; + + while (1) { + ret = recv(fd, &rbuf, 1, MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "recv"); + if (rbuf != cfg_payload_char) + error(1, 0, "recv: payload mismatch"); + num++; + }; + + return num; +} + +static int do_main(void) +{ + unsigned long tstop, treport, tcur; + int fdt = -1, fdr = -1, len, tx = 0, rx = 0; + + if (!cfg_only_tx) + fdr = setup_rx(); + if (!cfg_only_rx) + fdt = setup_tx(); + + len = build_packet(); + + tcur = util_gettime(); + treport = tcur + 1000; + tstop = tcur + (cfg_num_secs * 1000); + + while (1) { + if (!cfg_only_rx) + tx += do_tx(fdt, buf, len); + + if (!cfg_only_tx) + rx += do_rx(fdr); + + if (cfg_num_secs) { + tcur = util_gettime(); + if (tcur >= tstop) + break; + if (tcur >= treport) { + fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); + tx = 0; + rx = 0; + treport = tcur + 1000; + } + } else { + if (tx == cfg_num_pkt) + break; + } + } + + /* read straggler packets, if any */ + if (rx < tx) { + tstop = util_gettime() + 100; + while (rx < tx) { + tcur = util_gettime(); + if (tcur >= tstop) + break; + + do_poll(fdr, POLLIN, tstop - tcur); + rx += do_rx(fdr); + } + } + + fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); + + if (fdr != -1 && close(fdr)) + error(1, errno, "close rx"); + if (fdt != -1 && close(fdt)) + error(1, errno, "close tx"); + + /* + * success (== 0) only if received all packets + * unless failure is expected, in which case none must arrive. + */ + if (cfg_expect_failure) + return rx != 0; + else + return rx != tx; +} + + +static void __attribute__((noreturn)) usage(const char *filepath) +{ + fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] " + "[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] " + "[-s [-d ] [-S ] [-D ] " + "[-x ] [-X ] [-f ] [-F]\n", + filepath); + exit(1); +} + +static void parse_addr(int family, void *addr, const char *optarg) +{ + int ret; + + ret = inet_pton(family, optarg, addr); + if (ret == -1) + error(1, errno, "inet_pton"); + if (ret == 0) + error(1, 0, "inet_pton: bad string"); +} + +static void parse_addr4(struct sockaddr_in *addr, const char *optarg) +{ + parse_addr(AF_INET, &addr->sin_addr, optarg); +} + +static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg) +{ + parse_addr(AF_INET6, &addr->sin6_addr, optarg); +} + +static int parse_protocol_family(const char *filepath, const char *optarg) +{ + if (!strcmp(optarg, "4")) + return PF_INET; + if (!strcmp(optarg, "6")) + return PF_INET6; + + usage(filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) { + switch (c) { + case 'd': + if (cfg_l3_outer == AF_UNSPEC) + error(1, 0, "-d must be preceded by -o"); + if (cfg_l3_outer == AF_INET) + parse_addr4(&out_daddr4, optarg); + else + parse_addr6(&out_daddr6, optarg); + break; + case 'D': + if (cfg_l3_inner == AF_UNSPEC) + error(1, 0, "-D must be preceded by -i"); + if (cfg_l3_inner == AF_INET) + parse_addr4(&in_daddr4, optarg); + else + parse_addr6(&in_daddr6, optarg); + break; + case 'e': + if (!strcmp(optarg, "gre")) + cfg_encap_proto = IPPROTO_GRE; + else if (!strcmp(optarg, "gue")) + cfg_encap_proto = IPPROTO_UDP; + else if (!strcmp(optarg, "bare")) + cfg_encap_proto = IPPROTO_IPIP; + else if (!strcmp(optarg, "none")) + cfg_encap_proto = IPPROTO_IP; /* == 0 */ + else + usage(argv[0]); + break; + case 'f': + cfg_src_port = strtol(optarg, NULL, 0); + break; + case 'F': + cfg_expect_failure = true; + break; + case 'h': + usage(argv[0]); + break; + case 'i': + if (!strcmp(optarg, "4")) + cfg_l3_inner = PF_INET; + else if (!strcmp(optarg, "6")) + cfg_l3_inner = PF_INET6; + else + usage(argv[0]); + break; + case 'l': + cfg_payload_len = strtol(optarg, NULL, 0); + break; + case 'n': + cfg_num_pkt = strtol(optarg, NULL, 0); + break; + case 'o': + cfg_l3_outer = parse_protocol_family(argv[0], optarg); + break; + case 'O': + cfg_l3_extra = parse_protocol_family(argv[0], optarg); + break; + case 'R': + cfg_only_rx = true; + break; + case 's': + if (cfg_l3_outer == AF_INET) + parse_addr4(&out_saddr4, optarg); + else + parse_addr6(&out_saddr6, optarg); + break; + case 'S': + if (cfg_l3_inner == AF_INET) + parse_addr4(&in_saddr4, optarg); + else + parse_addr6(&in_saddr6, optarg); + break; + case 't': + cfg_num_secs = strtol(optarg, NULL, 0); + break; + case 'T': + cfg_only_tx = true; + break; + case 'x': + cfg_dsfield_outer = strtol(optarg, NULL, 0); + break; + case 'X': + cfg_dsfield_inner = strtol(optarg, NULL, 0); + break; + } + } + + if (cfg_only_rx && cfg_only_tx) + error(1, 0, "options: cannot combine rx-only and tx-only"); + + if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC) + error(1, 0, "options: must specify outer with encap"); + else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC) + error(1, 0, "options: cannot combine no-encap and outer"); + else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC) + error(1, 0, "options: cannot combine no-encap and extra"); + + if (cfg_l3_inner == AF_UNSPEC) + cfg_l3_inner = AF_INET6; + if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP) + cfg_encap_proto = IPPROTO_IPV6; + + /* RFC 6040 4.2: + * on decap, if outer encountered congestion (CE == 0x3), + * but inner cannot encode ECN (NoECT == 0x0), then drop packet. + */ + if (((cfg_dsfield_outer & 0x3) == 0x3) && + ((cfg_dsfield_inner & 0x3) == 0x0)) + cfg_expect_failure = true; +} + +static void print_opts(void) +{ + if (cfg_l3_inner == PF_INET6) { + util_printaddr("inner.dest6", (void *) &in_daddr6); + util_printaddr("inner.source6", (void *) &in_saddr6); + } else { + util_printaddr("inner.dest4", (void *) &in_daddr4); + util_printaddr("inner.source4", (void *) &in_saddr4); + } + + if (!cfg_l3_outer) + return; + + fprintf(stderr, "encap proto: %u\n", cfg_encap_proto); + + if (cfg_l3_outer == PF_INET6) { + util_printaddr("outer.dest6", (void *) &out_daddr6); + util_printaddr("outer.source6", (void *) &out_saddr6); + } else { + util_printaddr("outer.dest4", (void *) &out_daddr4); + util_printaddr("outer.source4", (void *) &out_saddr4); + } + + if (!cfg_l3_extra) + return; + + if (cfg_l3_outer == PF_INET6) { + util_printaddr("extra.dest6", (void *) &extra_daddr6); + util_printaddr("extra.source6", (void *) &extra_saddr6); + } else { + util_printaddr("extra.dest4", (void *) &extra_daddr4); + util_printaddr("extra.source4", (void *) &extra_saddr4); + } + +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + print_opts(); + return do_main(); +} diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh new file mode 100755 index 000000000000..c0fb073b5eab --- /dev/null +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Load BPF flow dissector and verify it correctly dissects traffic +export TESTNAME=test_flow_dissector +unmount=0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +msg="skip all tests:" +if [ $UID != 0 ]; then + echo $msg please run this as root >&2 + exit $ksft_skip +fi + +# This test needs to be run in a network namespace with in_netns.sh. Check if +# this is the case and run it with in_netns.sh if it is being run in the root +# namespace. +if [[ -z $(ip netns identify $$) ]]; then + ../net/in_netns.sh "$0" "$@" + exit $? +fi + +# Determine selftest success via shell exit code +exit_handler() +{ + if (( $? == 0 )); then + echo "selftests: $TESTNAME [PASS]"; + else + echo "selftests: $TESTNAME [FAILED]"; + fi + + set +e + + # Cleanup + tc filter del dev lo ingress pref 1337 2> /dev/null + tc qdisc del dev lo ingress 2> /dev/null + ./flow_dissector_load -d 2> /dev/null + if [ $unmount -ne 0 ]; then + umount bpffs 2> /dev/null + fi +} + +# Exit script immediately (well catched by trap handler) if any +# program/thing exits with a non-zero status. +set -e + +# (Use 'trap -l' to list meaning of numbers) +trap exit_handler 0 2 3 6 9 + +# Mount BPF file system +if /bin/mount | grep /sys/fs/bpf > /dev/null; then + echo "bpffs already mounted" +else + echo "bpffs not mounted. Mounting..." + unmount=1 + /bin/mount bpffs /sys/fs/bpf -t bpf +fi + +# Attach BPF program +./flow_dissector_load -p bpf_flow.o -s dissect + +# Setup +tc qdisc add dev lo ingress + +echo "Testing IPv4..." +# Drops all IP/UDP packets coming from port 9 +tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \ + udp src_port 9 action drop + +# Send 10 IPv4/UDP packets from port 8. Filter should not drop any. +./test_flow_dissector -i 4 -f 8 +# Send 10 IPv4/UDP packets from port 9. Filter should drop all. +./test_flow_dissector -i 4 -f 9 -F +# Send 10 IPv4/UDP packets from port 10. Filter should not drop any. +./test_flow_dissector -i 4 -f 10 + +echo "Testing IPIP..." +# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 8 +# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 9 -F +# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 10 + +echo "Testing IPv4 + GRE..." +# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 8 +# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 9 -F +# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 10 + +tc filter del dev lo ingress pref 1337 + +echo "Testing IPv6..." +# Drops all IPv6/UDP packets coming from port 9 +tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \ + udp src_port 9 action drop + +# Send 10 IPv6/UDP packets from port 8. Filter should not drop any. +./test_flow_dissector -i 6 -f 8 +# Send 10 IPv6/UDP packets from port 9. Filter should drop all. +./test_flow_dissector -i 6 -f 9 -F +# Send 10 IPv6/UDP packets from port 10. Filter should not drop any. +./test_flow_dissector -i 6 -f 10 + +exit 0 diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh new file mode 100755 index 000000000000..ffcd3953f94c --- /dev/null +++ b/tools/testing/selftests/bpf/with_addr.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# add private ipv4 and ipv6 addresses to loopback + +readonly V6_INNER='100::a/128' +readonly V4_INNER='192.168.0.1/32' + +if getopts ":s" opt; then + readonly SIT_DEV_NAME='sixtofourtest0' + readonly V6_SIT='2::/64' + readonly V4_SIT='172.17.0.1/32' + shift +fi + +fail() { + echo "error: $*" 1>&2 + exit 1 +} + +setup() { + ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address' + ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address' + + if [[ -n "${V6_SIT}" ]]; then + ip link add "${SIT_DEV_NAME}" type sit remote any local any \ + || fail 'failed to add sit' + ip link set dev "${SIT_DEV_NAME}" up \ + || fail 'failed to bring sit device up' + ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \ + || fail 'failed to setup v6 SIT address' + ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \ + || fail 'failed to setup v4 SIT address' + fi + + sleep 2 # avoid race causing bind to fail +} + +cleanup() { + if [[ -n "${V6_SIT}" ]]; then + ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}" + ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}" + ip link del "${SIT_DEV_NAME}" + fi + + ip -4 addr del "${V4_INNER}" dev lo + ip -6 addr del "${V6_INNER}" dev lo +} + +trap cleanup EXIT + +setup +"$@" +exit "$?" diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh new file mode 100755 index 000000000000..e24949ed3a20 --- /dev/null +++ b/tools/testing/selftests/bpf/with_tunnels.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# setup tunnels for flow dissection test + +readonly SUFFIX="test_$(mktemp -u XXXX)" +CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo" + +setup() { + ip link add "ipip_${SUFFIX}" type ipip ${CONFIG} + ip link add "gre_${SUFFIX}" type gre ${CONFIG} + ip link add "sit_${SUFFIX}" type sit ${CONFIG} + + echo "tunnels before test:" + ip tunnel show + + ip link set "ipip_${SUFFIX}" up + ip link set "gre_${SUFFIX}" up + ip link set "sit_${SUFFIX}" up +} + + +cleanup() { + ip tunnel del "ipip_${SUFFIX}" + ip tunnel del "gre_${SUFFIX}" + ip tunnel del "sit_${SUFFIX}" + + echo "tunnels after test:" + ip tunnel show +} + +trap cleanup EXIT + +setup +"$@" +exit "$?" -- cgit v1.2.3-55-g7522 From 70e88c758a6b8544b5e0d982e55d1e36f9aa0b85 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 14 Sep 2018 12:09:05 -0700 Subject: selftests/bpf: fix bpf_flow.c build fix the following build error: clang -I. -I./include/uapi -I../../../include/uapi -idirafter /usr/local/include -idirafter /data/users/ast/llvm/bld/lib/clang/7.0.0/include -idirafter /usr/include -Wno-compare-distinct-pointer-types \ -O2 -target bpf -emit-llvm -c bpf_flow.c -o - | \ llc -march=bpf -mcpu=generic -filetype=obj -o /data/users/ast/bpf-next/tools/testing/selftests/bpf/bpf_flow.o LLVM ERROR: 'dissect' label emitted multiple times to assembly file make: *** [/data/users/ast/bpf-next/tools/testing/selftests/bpf/bpf_flow.o] Error 1 Fixes: 9c98b13cc3bb ("flow_dissector: implements eBPF parser") Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing/selftests/bpf') diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c index 5fb809d95867..107350a7821d 100644 --- a/tools/testing/selftests/bpf/bpf_flow.c +++ b/tools/testing/selftests/bpf/bpf_flow.c @@ -117,7 +117,7 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) } SEC("dissect") -int dissect(struct __sk_buff *skb) +int _dissect(struct __sk_buff *skb) { if (!skb->vlan_present) return parse_eth_proto(skb, skb->protocol); -- cgit v1.2.3-55-g7522