From 1e54ad251a93a524f1a2950b1d65bc7437c57a53 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Mon, 25 Jun 2018 14:25:02 +0200 Subject: samples/bpf: Add xdp_sample_pkts example Add an example program showing how to sample packets from XDP using the perf event buffer. The example userspace program just prints the ethernet header for every packet sampled. Reviewed-by: Jakub Kicinski Signed-off-by: Toke Høiland-Jørgensen Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 4 + samples/bpf/xdp_sample_pkts_kern.c | 66 +++++++++++++++ samples/bpf/xdp_sample_pkts_user.c | 169 +++++++++++++++++++++++++++++++++++++ 3 files changed, 239 insertions(+) create mode 100644 samples/bpf/xdp_sample_pkts_kern.c create mode 100644 samples/bpf/xdp_sample_pkts_user.c (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 1303af10e54d..9ea2f7b64869 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -52,6 +52,7 @@ hostprogs-y += xdp_adjust_tail hostprogs-y += xdpsock hostprogs-y += xdp_fwd hostprogs-y += task_fd_query +hostprogs-y += xdp_sample_pkts # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -107,6 +108,7 @@ xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := bpf_load.o xdpsock_user.o xdp_fwd-objs := bpf_load.o xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) +xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -163,6 +165,7 @@ always += xdp_adjust_tail_kern.o always += xdpsock_kern.o always += xdp_fwd_kern.o always += task_fd_query_kern.o +always += xdp_sample_pkts_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -179,6 +182,7 @@ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/ +HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/ HOST_LOADLIBES += $(LIBBPF) -lelf HOSTLOADLIBES_tracex4 += -lrt diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c new file mode 100644 index 000000000000..f7ca8b850978 --- /dev/null +++ b/samples/bpf/xdp_sample_pkts_kern.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include "bpf_helpers.h" + +#define SAMPLE_SIZE 64ul +#define MAX_CPUS 128 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = MAX_CPUS, +}; + +SEC("xdp_sample") +int xdp_sample_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + + /* Metadata will be in the perf event before the packet data. */ + struct S { + u16 cookie; + u16 pkt_len; + } __packed metadata; + + if (data < data_end) { + /* The XDP perf_event_output handler will use the upper 32 bits + * of the flags argument as a number of bytes to include of the + * packet payload in the event data. If the size is too big, the + * call to bpf_perf_event_output will fail and return -EFAULT. + * + * See bpf_xdp_event_output in net/core/filter.c. + * + * The BPF_F_CURRENT_CPU flag means that the event output fd + * will be indexed by the CPU number in the event map. + */ + u64 flags = BPF_F_CURRENT_CPU; + u16 sample_size; + int ret; + + metadata.cookie = 0xdead; + metadata.pkt_len = (u16)(data_end - data); + sample_size = min(metadata.pkt_len, SAMPLE_SIZE); + flags |= (u64)sample_size << 32; + + ret = bpf_perf_event_output(ctx, &my_map, flags, + &metadata, sizeof(metadata)); + if (ret) + bpf_printk("perf_event_output failed: %d\n", ret); + } + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c new file mode 100644 index 000000000000..8dd87c1eb560 --- /dev/null +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "perf-sys.h" +#include "trace_helpers.h" + +#define MAX_CPUS 128 +static int pmu_fds[MAX_CPUS], if_idx; +static struct perf_event_mmap_page *headers[MAX_CPUS]; +static char *if_name; + +static int do_attach(int idx, int fd, const char *name) +{ + int err; + + err = bpf_set_link_xdp_fd(idx, fd, 0); + if (err < 0) + printf("ERROR: failed to attach program to %s\n", name); + + return err; +} + +static int do_detach(int idx, const char *name) +{ + int err; + + err = bpf_set_link_xdp_fd(idx, -1, 0); + if (err < 0) + printf("ERROR: failed to detach program from %s\n", name); + + return err; +} + +#define SAMPLE_SIZE 64 + +static int print_bpf_output(void *data, int size) +{ + struct { + __u16 cookie; + __u16 pkt_len; + __u8 pkt_data[SAMPLE_SIZE]; + } __packed *e = data; + int i; + + if (e->cookie != 0xdead) { + printf("BUG cookie %x sized %d\n", + e->cookie, size); + return LIBBPF_PERF_EVENT_ERROR; + } + + printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len); + for (i = 0; i < 14 && i < e->pkt_len; i++) + printf("%02x ", e->pkt_data[i]); + printf("\n"); + + return LIBBPF_PERF_EVENT_CONT; +} + +static void test_bpf_perf_event(int map_fd, int num) +{ + struct perf_event_attr attr = { + .sample_type = PERF_SAMPLE_RAW, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_BPF_OUTPUT, + .wakeup_events = 1, /* get an fd notification for every event */ + }; + int i; + + for (i = 0; i < num; i++) { + int key = i; + + pmu_fds[i] = sys_perf_event_open(&attr, -1/*pid*/, i/*cpu*/, + -1/*group_fd*/, 0); + + assert(pmu_fds[i] >= 0); + assert(bpf_map_update_elem(map_fd, &key, + &pmu_fds[i], BPF_ANY) == 0); + ioctl(pmu_fds[i], PERF_EVENT_IOC_ENABLE, 0); + } +} + +static void sig_handler(int signo) +{ + do_detach(if_idx, if_name); + exit(0); +} + +int main(int argc, char **argv) +{ + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct bpf_object *obj; + struct bpf_map *map; + int prog_fd, map_fd; + char filename[256]; + int ret, err, i; + int numcpus; + + if (argc < 2) { + printf("Usage: %s \n", argv[0]); + return 1; + } + + numcpus = get_nprocs(); + if (numcpus > MAX_CPUS) + numcpus = MAX_CPUS; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; + + if (!prog_fd) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + map = bpf_map__next(NULL, obj); + if (!map) { + printf("finding a map in obj file failed\n"); + return 1; + } + map_fd = bpf_map__fd(map); + + if_idx = if_nametoindex(argv[1]); + if (!if_idx) + if_idx = strtoul(argv[1], NULL, 0); + + if (!if_idx) { + fprintf(stderr, "Invalid ifname\n"); + return 1; + } + if_name = argv[1]; + err = do_attach(if_idx, prog_fd, argv[1]); + if (err) + return err; + + if (signal(SIGINT, sig_handler) || + signal(SIGHUP, sig_handler) || + signal(SIGTERM, sig_handler)) { + perror("signal"); + return 1; + } + + test_bpf_perf_event(map_fd, numcpus); + + for (i = 0; i < numcpus; i++) + if (perf_event_mmap_header(pmu_fds[i], &headers[i]) < 0) + return 1; + + ret = perf_event_poller_multi(pmu_fds, headers, numcpus, + print_bpf_output); + kill(0, SIGINT); + return ret; +} -- cgit v1.2.3-55-g7522 From 0d25c43ab988766ad52ff2930af3bf47d92c20ac Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 25 Jun 2018 16:27:43 +0200 Subject: samples/bpf: extend xdp_rxq_info to read packet payload There is a cost associated with reading the packet data payload that this test ignored. Add option --read to allow enabling reading part of the payload. This sample/tool helps us analyse an issue observed with a NIC mlx5 (ConnectX-5 Ex) and an Intel(R) Xeon(R) CPU E5-1650 v4. With no_touch of data: Running XDP on dev:mlx5p1 (ifindex:8) action:XDP_DROP options:no_touch XDP stats CPU pps issue-pps XDP-RX CPU 0 14,465,157 0 XDP-RX CPU 1 14,464,728 0 XDP-RX CPU 2 14,465,283 0 XDP-RX CPU 3 14,465,282 0 XDP-RX CPU 4 14,464,159 0 XDP-RX CPU 5 14,465,379 0 XDP-RX CPU total 86,789,992 When not touching data, we observe that the CPUs have idle cycles. When reading data the CPUs are 100% busy in softirq. With reading data: Running XDP on dev:mlx5p1 (ifindex:8) action:XDP_DROP options:read XDP stats CPU pps issue-pps XDP-RX CPU 0 9,620,639 0 XDP-RX CPU 1 9,489,843 0 XDP-RX CPU 2 9,407,854 0 XDP-RX CPU 3 9,422,289 0 XDP-RX CPU 4 9,321,959 0 XDP-RX CPU 5 9,395,242 0 XDP-RX CPU total 56,657,828 The effect seen above is a result of cache-misses occuring when more RXQs are being used. Based on perf-event observations, our conclusion is that the CPUs DDIO (Direct Data I/O) choose to deliver packet into main memory, instead of L3-cache. We also found, that this can be mitigated by either using less RXQs or by reducing NICs the RX-ring size. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Toke Høiland-Jørgensen Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_rxq_info_kern.c | 19 +++++++++++++++++++ samples/bpf/xdp_rxq_info_user.c | 34 ++++++++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 6 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c index 3fd209291653..61af6210df2f 100644 --- a/samples/bpf/xdp_rxq_info_kern.c +++ b/samples/bpf/xdp_rxq_info_kern.c @@ -4,6 +4,8 @@ * Example howto extract XDP RX-queue info */ #include +#include +#include #include "bpf_helpers.h" /* Config setup from with userspace @@ -14,6 +16,11 @@ struct config { __u32 action; int ifindex; + __u32 options; +}; +enum cfg_options_flags { + NO_TOUCH = 0x0U, + READ_MEM = 0x1U, }; struct bpf_map_def SEC("maps") config_map = { .type = BPF_MAP_TYPE_ARRAY, @@ -90,6 +97,18 @@ int xdp_prognum0(struct xdp_md *ctx) if (key == MAX_RXQs) rxq_rec->issue++; + /* Default: Don't touch packet data, only count packets */ + if (unlikely(config->options & READ_MEM)) { + struct ethhdr *eth = data; + + if (eth + 1 > data_end) + return XDP_ABORTED; + + /* Avoid compiler removing this: Drop non 802.3 Ethertypes */ + if (ntohs(eth->h_proto) < ETH_P_802_3_MIN) + return XDP_ABORTED; + } + return config->action; } diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index e4e9ba52bff0..435485d4f49e 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -50,6 +50,7 @@ static const struct option long_options[] = { {"sec", required_argument, NULL, 's' }, {"no-separators", no_argument, NULL, 'z' }, {"action", required_argument, NULL, 'a' }, + {"readmem", no_argument, NULL, 'r' }, {0, 0, NULL, 0 } }; @@ -66,6 +67,11 @@ static void int_exit(int sig) struct config { __u32 action; int ifindex; + __u32 options; +}; +enum cfg_options_flags { + NO_TOUCH = 0x0U, + READ_MEM = 0x1U, }; #define XDP_ACTION_MAX (XDP_TX + 1) #define XDP_ACTION_MAX_STRLEN 11 @@ -109,6 +115,16 @@ static void list_xdp_actions(void) printf("\n"); } +static char* options2str(enum cfg_options_flags flag) +{ + if (flag == NO_TOUCH) + return "no_touch"; + if (flag & READ_MEM) + return "read"; + fprintf(stderr, "ERR: Unknown config option flags"); + exit(EXIT_FAIL); +} + static void usage(char *argv[]) { int i; @@ -305,7 +321,7 @@ static __u64 calc_errs_pps(struct datarec *r, static void stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev, - int action) + int action, __u32 cfg_opt) { unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; unsigned int nr_cpus = bpf_num_possible_cpus(); @@ -316,8 +332,8 @@ static void stats_print(struct stats_record *stats_rec, int i; /* Header */ - printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n", - ifname, ifindex, action2str(action)); + printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s options:%s\n", + ifname, ifindex, action2str(action), options2str(cfg_opt)); /* stats_global_map */ { @@ -399,7 +415,7 @@ static inline void swap(struct stats_record **a, struct stats_record **b) *b = tmp; } -static void stats_poll(int interval, int action) +static void stats_poll(int interval, int action, __u32 cfg_opt) { struct stats_record *record, *prev; @@ -410,7 +426,7 @@ static void stats_poll(int interval, int action) while (1) { swap(&prev, &record); stats_collect(record); - stats_print(record, prev, action); + stats_print(record, prev, action, cfg_opt); sleep(interval); } @@ -421,6 +437,7 @@ static void stats_poll(int interval, int action) int main(int argc, char **argv) { + __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */ struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, @@ -435,6 +452,7 @@ int main(int argc, char **argv) int interval = 2; __u32 key = 0; + char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 }; int action = XDP_PASS; /* Default action */ char *action_str = NULL; @@ -496,6 +514,9 @@ int main(int argc, char **argv) action_str = (char *)&action_str_buf; strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN); break; + case 'r': + cfg_options |= READ_MEM; + break; case 'h': error: default: @@ -522,6 +543,7 @@ int main(int argc, char **argv) } } cfg.action = action; + cfg.options = cfg_options; /* Trick to pretty printf with thousands separators use %' */ if (use_separators) @@ -542,6 +564,6 @@ int main(int argc, char **argv) return EXIT_FAIL_XDP; } - stats_poll(interval, action); + stats_poll(interval, action, cfg_options); return EXIT_OK; } -- cgit v1.2.3-55-g7522 From 509fda105ba8f9a1a5c6f8b79e4c7fc50b35c1e3 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 25 Jun 2018 16:27:48 +0200 Subject: samples/bpf: xdp_rxq_info action XDP_TX must adjust MAC-addrs XDP_TX requires also changing the MAC-addrs, else some hardware may drop the TX packet before reaching the wire. This was observed with driver mlx5. If xdp_rxq_info select --action XDP_TX the swapmac functionality is activated. It is also possible to manually enable via cmdline option --swapmac. This is practical if wanting to measure the overhead of writing/updating payload for other action types. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Toke Høiland-Jørgensen Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_rxq_info_kern.c | 26 +++++++++++++++++++++++++- samples/bpf/xdp_rxq_info_user.c | 11 +++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) (limited to 'samples/bpf') diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c index 61af6210df2f..222a83eed1cb 100644 --- a/samples/bpf/xdp_rxq_info_kern.c +++ b/samples/bpf/xdp_rxq_info_kern.c @@ -21,6 +21,7 @@ struct config { enum cfg_options_flags { NO_TOUCH = 0x0U, READ_MEM = 0x1U, + SWAP_MAC = 0x2U, }; struct bpf_map_def SEC("maps") config_map = { .type = BPF_MAP_TYPE_ARRAY, @@ -52,6 +53,23 @@ struct bpf_map_def SEC("maps") rx_queue_index_map = { .max_entries = MAX_RXQs + 1, }; +static __always_inline +void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + SEC("xdp_prog0") int xdp_prognum0(struct xdp_md *ctx) { @@ -98,7 +116,7 @@ int xdp_prognum0(struct xdp_md *ctx) rxq_rec->issue++; /* Default: Don't touch packet data, only count packets */ - if (unlikely(config->options & READ_MEM)) { + if (unlikely(config->options & (READ_MEM|SWAP_MAC))) { struct ethhdr *eth = data; if (eth + 1 > data_end) @@ -107,6 +125,12 @@ int xdp_prognum0(struct xdp_md *ctx) /* Avoid compiler removing this: Drop non 802.3 Ethertypes */ if (ntohs(eth->h_proto) < ETH_P_802_3_MIN) return XDP_ABORTED; + + /* XDP_TX requires changing MAC-addrs, else HW may drop. + * Can also be enabled with --swapmac (for test purposes) + */ + if (unlikely(config->options & SWAP_MAC)) + swap_src_dst_mac(data); } return config->action; diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index 435485d4f49e..248a7eab9531 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -51,6 +51,7 @@ static const struct option long_options[] = { {"no-separators", no_argument, NULL, 'z' }, {"action", required_argument, NULL, 'a' }, {"readmem", no_argument, NULL, 'r' }, + {"swapmac", no_argument, NULL, 'm' }, {0, 0, NULL, 0 } }; @@ -72,6 +73,7 @@ struct config { enum cfg_options_flags { NO_TOUCH = 0x0U, READ_MEM = 0x1U, + SWAP_MAC = 0x2U, }; #define XDP_ACTION_MAX (XDP_TX + 1) #define XDP_ACTION_MAX_STRLEN 11 @@ -119,6 +121,8 @@ static char* options2str(enum cfg_options_flags flag) { if (flag == NO_TOUCH) return "no_touch"; + if (flag & SWAP_MAC) + return "swapmac"; if (flag & READ_MEM) return "read"; fprintf(stderr, "ERR: Unknown config option flags"); @@ -517,6 +521,9 @@ int main(int argc, char **argv) case 'r': cfg_options |= READ_MEM; break; + case 'm': + cfg_options |= SWAP_MAC; + break; case 'h': error: default: @@ -543,6 +550,10 @@ int main(int argc, char **argv) } } cfg.action = action; + + /* XDP_TX requires changing MAC-addrs, else HW may drop */ + if (action == XDP_TX) + cfg_options |= SWAP_MAC; cfg.options = cfg_options; /* Trick to pretty printf with thousands separators use %' */ -- cgit v1.2.3-55-g7522