summaryrefslogtreecommitdiffstats
path: root/samples
diff options
context:
space:
mode:
Diffstat (limited to 'samples')
-rw-r--r--samples/bpf/Makefile3
-rwxr-xr-xsamples/bpf/do_hbm_test.sh22
-rw-r--r--samples/bpf/hbm.c18
-rw-r--r--samples/bpf/hbm_edt_kern.c168
-rw-r--r--samples/bpf/hbm_kern.h40
-rw-r--r--samples/bpf/ibumad_kern.c18
-rw-r--r--samples/bpf/tcp_bpf.readme2
-rw-r--r--samples/bpf/tcp_dumpstats_kern.c68
-rw-r--r--samples/bpf/xdp_adjust_tail_user.c12
-rw-r--r--samples/bpf/xdp_redirect_map_user.c15
-rw-r--r--samples/bpf/xdp_redirect_user.c15
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c12
-rw-r--r--samples/bpf/xdpsock_user.c44
13 files changed, 377 insertions, 60 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 0917f8cf4fab..f90daadfbc89 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -154,6 +154,7 @@ always += tcp_iw_kern.o
always += tcp_clamp_kern.o
always += tcp_basertt_kern.o
always += tcp_tos_reflect_kern.o
+always += tcp_dumpstats_kern.o
always += xdp_redirect_kern.o
always += xdp_redirect_map_kern.o
always += xdp_redirect_cpu_kern.o
@@ -168,6 +169,7 @@ always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o
always += ibumad_kern.o
always += hbm_out_kern.o
+always += hbm_edt_kern.o
KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/
@@ -272,6 +274,7 @@ $(src)/*.c: verify_target_bpf $(LIBBPF)
$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
$(obj)/hbm.o: $(src)/hbm.h
+$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh
index e48b047d4646..ffe4c0607341 100755
--- a/samples/bpf/do_hbm_test.sh
+++ b/samples/bpf/do_hbm_test.sh
@@ -14,7 +14,7 @@ Usage() {
echo "loads. The output is the goodput in Mbps (unless -D was used)."
echo ""
echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
- echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E]"
+ echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]"
echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
@@ -30,6 +30,7 @@ Usage() {
echo " other detailed information. This information is"
echo " test dependent (i.e. iperf3 or netperf)."
echo " -E enable ECN (not required for dctcp)"
+ echo " --edt use fq's Earliest Departure Time (requires fq)"
echo " -f or --flows number of concurrent flows (default=1)"
echo " -i or --id cgroup id (an integer, default is 1)"
echo " -N use netperf instead of iperf3"
@@ -130,13 +131,12 @@ processArgs () {
details=1
;;
-E)
- ecn=1
+ ecn=1
+ ;;
+ --edt)
+ flags="$flags --edt"
+ qdisc="fq"
;;
- # Support for upcomming fq Early Departure Time egress rate limiting
- #--edt)
- # prog="hbm_out_edt_kern.o"
- # qdisc="fq"
- # ;;
-f=*|--flows=*)
flows="${i#*=}"
;;
@@ -228,8 +228,8 @@ if [ "$netem" -ne "0" ] ; then
tc qdisc del dev lo root > /dev/null 2>&1
tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
elif [ "$qdisc" != "" ] ; then
- tc qdisc del dev lo root > /dev/null 2>&1
- tc qdisc add dev lo root $qdisc > /dev/null 2>&1
+ tc qdisc del dev eth0 root > /dev/null 2>&1
+ tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1
fi
n=0
@@ -399,7 +399,9 @@ fi
if [ "$netem" -ne "0" ] ; then
tc qdisc del dev lo root > /dev/null 2>&1
fi
-
+if [ "$qdisc" != "" ] ; then
+ tc qdisc del dev eth0 root > /dev/null 2>&1
+fi
sleep 2
hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index b905b32ff185..e0fbab9bec83 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -62,6 +62,7 @@ bool loopback_flag;
bool debugFlag;
bool work_conserving_flag;
bool no_cn_flag;
+bool edt_flag;
static void Usage(void);
static void read_trace_pipe2(void);
@@ -372,9 +373,14 @@ static int run_bpf_prog(char *prog, int cg_id)
fprintf(fout, "avg rtt:%d\n",
(int)(qstats.sum_rtt / (qstats.pkts_total + 1)));
// Average credit
- fprintf(fout, "avg credit:%d\n",
- (int)(qstats.sum_credit /
- (1500 * ((int)qstats.pkts_total) + 1)));
+ if (edt_flag)
+ fprintf(fout, "avg credit_ms:%.03f\n",
+ (qstats.sum_credit /
+ (qstats.pkts_total + 1.0)) / 1000000.0);
+ else
+ fprintf(fout, "avg credit:%d\n",
+ (int)(qstats.sum_credit /
+ (1500 * ((int)qstats.pkts_total ) + 1)));
// Return values stats
for (k = 0; k < RET_VAL_COUNT; k++) {
@@ -408,6 +414,7 @@ static void Usage(void)
" Where:\n"
" -o indicates egress direction (default)\n"
" -d print BPF trace debug buffer\n"
+ " --edt use fq's Earliest Departure Time\n"
" -l also limit flows using loopback\n"
" -n <#> to create cgroup \"/hbm#\" and attach prog\n"
" Default is /hbm1\n"
@@ -433,6 +440,7 @@ int main(int argc, char **argv)
char *optstring = "iodln:r:st:wh";
struct option loptions[] = {
{"no_cn", 0, NULL, 1},
+ {"edt", 0, NULL, 2},
{NULL, 0, NULL, 0}
};
@@ -441,6 +449,10 @@ int main(int argc, char **argv)
case 1:
no_cn_flag = true;
break;
+ case 2:
+ prog = "hbm_edt_kern.o";
+ edt_flag = true;
+ break;
case'o':
break;
case 'd':
diff --git a/samples/bpf/hbm_edt_kern.c b/samples/bpf/hbm_edt_kern.c
new file mode 100644
index 000000000000..a65b677acdb0
--- /dev/null
+++ b/samples/bpf/hbm_edt_kern.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Sample Host Bandwidth Manager (HBM) BPF program.
+ *
+ * A cgroup skb BPF egress program to limit cgroup output bandwidth.
+ * It uses a modified virtual token bucket queue to limit average
+ * egress bandwidth. The implementation uses credits instead of tokens.
+ * Negative credits imply that queueing would have happened (this is
+ * a virtual queue, so no queueing is done by it. However, queueing may
+ * occur at the actual qdisc (which is not used for rate limiting).
+ *
+ * This implementation uses 3 thresholds, one to start marking packets and
+ * the other two to drop packets:
+ * CREDIT
+ * - <--------------------------|------------------------> +
+ * | | | 0
+ * | Large pkt |
+ * | drop thresh |
+ * Small pkt drop Mark threshold
+ * thresh
+ *
+ * The effect of marking depends on the type of packet:
+ * a) If the packet is ECN enabled and it is a TCP packet, then the packet
+ * is ECN marked.
+ * b) If the packet is a TCP packet, then we probabilistically call tcp_cwr
+ * to reduce the congestion window. The current implementation uses a linear
+ * distribution (0% probability at marking threshold, 100% probability
+ * at drop threshold).
+ * c) If the packet is not a TCP packet, then it is dropped.
+ *
+ * If the credit is below the drop threshold, the packet is dropped. If it
+ * is a TCP packet, then it also calls tcp_cwr since packets dropped by
+ * by a cgroup skb BPF program do not automatically trigger a call to
+ * tcp_cwr in the current kernel code.
+ *
+ * This BPF program actually uses 2 drop thresholds, one threshold
+ * for larger packets (>= 120 bytes) and another for smaller packets. This
+ * protects smaller packets such as SYNs, ACKs, etc.
+ *
+ * The default bandwidth limit is set at 1Gbps but this can be changed by
+ * a user program through a shared BPF map. In addition, by default this BPF
+ * program does not limit connections using loopback. This behavior can be
+ * overwritten by the user program. There is also an option to calculate
+ * some statistics, such as percent of packets marked or dropped, which
+ * a user program, such as hbm, can access.
+ */
+
+#include "hbm_kern.h"
+
+SEC("cgroup_skb/egress")
+int _hbm_out_cg(struct __sk_buff *skb)
+{
+ long long delta = 0, delta_send;
+ unsigned long long curtime, sendtime;
+ struct hbm_queue_stats *qsp = NULL;
+ unsigned int queue_index = 0;
+ bool congestion_flag = false;
+ bool ecn_ce_flag = false;
+ struct hbm_pkt_info pkti = {};
+ struct hbm_vqueue *qdp;
+ bool drop_flag = false;
+ bool cwr_flag = false;
+ int len = skb->len;
+ int rv = ALLOW_PKT;
+
+ qsp = bpf_map_lookup_elem(&queue_stats, &queue_index);
+
+ // Check if we should ignore loopback traffic
+ if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1))
+ return ALLOW_PKT;
+
+ hbm_get_pkt_info(skb, &pkti);
+
+ // We may want to account for the length of headers in len
+ // calculation, like ETH header + overhead, specially if it
+ // is a gso packet. But I am not doing it right now.
+
+ qdp = bpf_get_local_storage(&queue_state, 0);
+ if (!qdp)
+ return ALLOW_PKT;
+ if (qdp->lasttime == 0)
+ hbm_init_edt_vqueue(qdp, 1024);
+
+ curtime = bpf_ktime_get_ns();
+
+ // Begin critical section
+ bpf_spin_lock(&qdp->lock);
+ delta = qdp->lasttime - curtime;
+ // bound bursts to 100us
+ if (delta < -BURST_SIZE_NS) {
+ // negative delta is a credit that allows bursts
+ qdp->lasttime = curtime - BURST_SIZE_NS;
+ delta = -BURST_SIZE_NS;
+ }
+ sendtime = qdp->lasttime;
+ delta_send = BYTES_TO_NS(len, qdp->rate);
+ __sync_add_and_fetch(&(qdp->lasttime), delta_send);
+ bpf_spin_unlock(&qdp->lock);
+ // End critical section
+
+ // Set EDT of packet
+ skb->tstamp = sendtime;
+
+ // Check if we should update rate
+ if (qsp != NULL && (qsp->rate * 128) != qdp->rate)
+ qdp->rate = qsp->rate * 128;
+
+ // Set flags (drop, congestion, cwr)
+ // last packet will be sent in the future, bound latency
+ if (delta > DROP_THRESH_NS || (delta > LARGE_PKT_DROP_THRESH_NS &&
+ len > LARGE_PKT_THRESH)) {
+ drop_flag = true;
+ if (pkti.is_tcp && pkti.ecn == 0)
+ cwr_flag = true;
+ } else if (delta > MARK_THRESH_NS) {
+ if (pkti.is_tcp)
+ congestion_flag = true;
+ else
+ drop_flag = true;
+ }
+
+ if (congestion_flag) {
+ if (bpf_skb_ecn_set_ce(skb)) {
+ ecn_ce_flag = true;
+ } else {
+ if (pkti.is_tcp) {
+ unsigned int rand = bpf_get_prandom_u32();
+
+ if (delta >= MARK_THRESH_NS +
+ (rand % MARK_REGION_SIZE_NS)) {
+ // Do congestion control
+ cwr_flag = true;
+ }
+ } else if (len > LARGE_PKT_THRESH) {
+ // Problem if too many small packets?
+ drop_flag = true;
+ congestion_flag = false;
+ }
+ }
+ }
+
+ if (pkti.is_tcp && drop_flag && pkti.packets_out <= 1) {
+ drop_flag = false;
+ cwr_flag = true;
+ congestion_flag = false;
+ }
+
+ if (qsp != NULL && qsp->no_cn)
+ cwr_flag = false;
+
+ hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag,
+ cwr_flag, ecn_ce_flag, &pkti, (int) delta);
+
+ if (drop_flag) {
+ __sync_add_and_fetch(&(qdp->lasttime), -delta_send);
+ rv = DROP_PKT;
+ }
+
+ if (cwr_flag)
+ rv |= CWR;
+ return rv;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h
index be19cf1d5cd5..aa207a2eebbd 100644
--- a/samples/bpf/hbm_kern.h
+++ b/samples/bpf/hbm_kern.h
@@ -29,6 +29,7 @@
#define DROP_PKT 0
#define ALLOW_PKT 1
#define TCP_ECN_OK 1
+#define CWR 2
#ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging
#undef bpf_printk
@@ -45,8 +46,18 @@
#define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET)
#define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
+// Time base accounting for fq's EDT
+#define BURST_SIZE_NS 100000 // 100us
+#define MARK_THRESH_NS 50000 // 50us
+#define DROP_THRESH_NS 500000 // 500us
+// Reserve 20us of queuing for small packets (less than 120 bytes)
+#define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
+#define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
+
// rate in bytes per ns << 20
#define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
+#define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
+#define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
struct bpf_map_def SEC("maps") queue_state = {
.type = BPF_MAP_TYPE_CGROUP_STORAGE,
@@ -67,6 +78,7 @@ BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats);
struct hbm_pkt_info {
int cwnd;
int rtt;
+ int packets_out;
bool is_ip;
bool is_tcp;
short ecn;
@@ -86,16 +98,20 @@ static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
if (tp) {
pkti->cwnd = tp->snd_cwnd;
pkti->rtt = tp->srtt_us >> 3;
+ pkti->packets_out = tp->packets_out;
return 0;
}
}
}
}
+ pkti->cwnd = 0;
+ pkti->rtt = 0;
+ pkti->packets_out = 0;
return 1;
}
-static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb,
- struct hbm_pkt_info *pkti)
+static void hbm_get_pkt_info(struct __sk_buff *skb,
+ struct hbm_pkt_info *pkti)
{
struct iphdr iph;
struct ipv6hdr *ip6h;
@@ -123,10 +139,22 @@ static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb,
static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
{
- bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
- qdp->lasttime = bpf_ktime_get_ns();
- qdp->credit = INIT_CREDIT;
- qdp->rate = rate * 128;
+ bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
+ qdp->lasttime = bpf_ktime_get_ns();
+ qdp->credit = INIT_CREDIT;
+ qdp->rate = rate * 128;
+}
+
+static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp,
+ int rate)
+{
+ unsigned long long curtime;
+
+ curtime = bpf_ktime_get_ns();
+ bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
+ qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst
+ qdp->credit = 0; // not used
+ qdp->rate = rate * 128;
}
static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c
index 38b2b3f22049..f281df7e0089 100644
--- a/samples/bpf/ibumad_kern.c
+++ b/samples/bpf/ibumad_kern.c
@@ -31,15 +31,9 @@ struct bpf_map_def SEC("maps") write_count = {
};
#undef DEBUG
-#ifdef DEBUG
-#define bpf_debug(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-#else
-#define bpf_debug(fmt, ...)
+#ifndef DEBUG
+#undef bpf_printk
+#define bpf_printk(fmt, ...)
#endif
/* Taken from the current format defined in
@@ -86,7 +80,7 @@ int on_ib_umad_read_recv(struct ib_umad_rw_args *ctx)
u64 zero = 0, *val;
u8 class = ctx->mgmt_class;
- bpf_debug("ib_umad read recv : class 0x%x\n", class);
+ bpf_printk("ib_umad read recv : class 0x%x\n", class);
val = bpf_map_lookup_elem(&read_count, &class);
if (!val) {
@@ -106,7 +100,7 @@ int on_ib_umad_read_send(struct ib_umad_rw_args *ctx)
u64 zero = 0, *val;
u8 class = ctx->mgmt_class;
- bpf_debug("ib_umad read send : class 0x%x\n", class);
+ bpf_printk("ib_umad read send : class 0x%x\n", class);
val = bpf_map_lookup_elem(&read_count, &class);
if (!val) {
@@ -126,7 +120,7 @@ int on_ib_umad_write(struct ib_umad_rw_args *ctx)
u64 zero = 0, *val;
u8 class = ctx->mgmt_class;
- bpf_debug("ib_umad write : class 0x%x\n", class);
+ bpf_printk("ib_umad write : class 0x%x\n", class);
val = bpf_map_lookup_elem(&write_count, &class);
if (!val) {
diff --git a/samples/bpf/tcp_bpf.readme b/samples/bpf/tcp_bpf.readme
index fee746621aec..78e247f62108 100644
--- a/samples/bpf/tcp_bpf.readme
+++ b/samples/bpf/tcp_bpf.readme
@@ -25,4 +25,4 @@ attached to the cgroupv2).
To remove (unattach) a socket_ops BPF program from a cgroupv2:
- bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
+ bpftool cgroup detach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c
new file mode 100644
index 000000000000..8557913106a0
--- /dev/null
+++ b/samples/bpf/tcp_dumpstats_kern.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Refer to samples/bpf/tcp_bpf.readme for the instructions on
+ * how to run this sample program.
+ */
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define INTERVAL 1000000000ULL
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __u32 type;
+ __u32 map_flags;
+ int *key;
+ __u64 *value;
+} bpf_next_dump SEC(".maps") = {
+ .type = BPF_MAP_TYPE_SK_STORAGE,
+ .map_flags = BPF_F_NO_PREALLOC,
+};
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+ struct bpf_tcp_sock *tcp_sk;
+ struct bpf_sock *sk;
+ __u64 *next_dump;
+ __u64 now;
+
+ switch (ctx->op) {
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG);
+ return 1;
+ case BPF_SOCK_OPS_RTT_CB:
+ break;
+ default:
+ return 1;
+ }
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ next_dump = bpf_sk_storage_get(&bpf_next_dump, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!next_dump)
+ return 1;
+
+ now = bpf_ktime_get_ns();
+ if (now < *next_dump)
+ return 1;
+
+ tcp_sk = bpf_tcp_sock(sk);
+ if (!tcp_sk)
+ return 1;
+
+ *next_dump = now + INTERVAL;
+
+ bpf_printk("dsack_dups=%u delivered=%u\n",
+ tcp_sk->dsack_dups, tcp_sk->delivered);
+ bpf_printk("delivered_ce=%u icsk_retransmits=%u\n",
+ tcp_sk->delivered_ce, tcp_sk->icsk_retransmits);
+
+ return 1;
+}
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index 586ff751aba9..a3596b617c4c 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -13,6 +13,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <net/if.h>
#include <sys/resource.h>
#include <arpa/inet.h>
#include <netinet/ether.h>
@@ -69,7 +70,7 @@ static void usage(const char *cmd)
printf("Start a XDP prog which send ICMP \"packet too big\" \n"
"messages if ingress packet is bigger then MAX_SIZE bytes\n");
printf("Usage: %s [...]\n", cmd);
- printf(" -i <ifindex> Interface Index\n");
+ printf(" -i <ifname|ifindex> Interface\n");
printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
printf(" -S use skb-mode\n");
printf(" -N enforce native mode\n");
@@ -102,7 +103,9 @@ int main(int argc, char **argv)
switch (opt) {
case 'i':
- ifindex = atoi(optarg);
+ ifindex = if_nametoindex(optarg);
+ if (!ifindex)
+ ifindex = atoi(optarg);
break;
case 'T':
kill_after_s = atoi(optarg);
@@ -136,6 +139,11 @@ int main(int argc, char **argv)
return 1;
}
+ if (!ifindex) {
+ fprintf(stderr, "Invalid ifname\n");
+ return 1;
+ }
+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index 15bb6f67f9c3..f70ee33907fd 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
+#include <net/if.h>
#include <unistd.h>
#include <libgen.h>
#include <sys/resource.h>
@@ -85,7 +86,7 @@ static void poll_stats(int interval, int ifindex)
static void usage(const char *prog)
{
fprintf(stderr,
- "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+ "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
"OPTS:\n"
" -S use skb-mode\n"
" -N enforce native mode\n"
@@ -127,7 +128,7 @@ int main(int argc, char **argv)
}
if (optind == argc) {
- printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+ printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;
}
@@ -136,8 +137,14 @@ int main(int argc, char **argv)
return 1;
}
- ifindex_in = strtoul(argv[optind], NULL, 0);
- ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+ ifindex_in = if_nametoindex(argv[optind]);
+ if (!ifindex_in)
+ ifindex_in = strtoul(argv[optind], NULL, 0);
+
+ ifindex_out = if_nametoindex(argv[optind + 1]);
+ if (!ifindex_out)
+ ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+
printf("input: %d output: %d\n", ifindex_in, ifindex_out);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index ce71be187205..39de06f3ec25 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
+#include <net/if.h>
#include <unistd.h>
#include <libgen.h>
#include <sys/resource.h>
@@ -85,7 +86,7 @@ static void poll_stats(int interval, int ifindex)
static void usage(const char *prog)
{
fprintf(stderr,
- "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+ "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
"OPTS:\n"
" -S use skb-mode\n"
" -N enforce native mode\n"
@@ -128,7 +129,7 @@ int main(int argc, char **argv)
}
if (optind == argc) {
- printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+ printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;
}
@@ -137,8 +138,14 @@ int main(int argc, char **argv)
return 1;
}
- ifindex_in = strtoul(argv[optind], NULL, 0);
- ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+ ifindex_in = if_nametoindex(argv[optind]);
+ if (!ifindex_in)
+ ifindex_in = strtoul(argv[optind], NULL, 0);
+
+ ifindex_out = if_nametoindex(argv[optind + 1]);
+ if (!ifindex_out)
+ ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+
printf("input: %d output: %d\n", ifindex_in, ifindex_out);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 394896430712..dfb68582e243 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -9,6 +9,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <net/if.h>
#include <sys/resource.h>
#include <arpa/inet.h>
#include <netinet/ether.h>
@@ -83,7 +84,7 @@ static void usage(const char *cmd)
"in an IPv4/v6 header and XDP_TX it out. The dst <VIP:PORT>\n"
"is used to select packets to encapsulate\n\n");
printf("Usage: %s [...]\n", cmd);
- printf(" -i <ifindex> Interface Index\n");
+ printf(" -i <ifname|ifindex> Interface\n");
printf(" -a <vip-service-address> IPv4 or IPv6\n");
printf(" -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n");
printf(" -s <source-ip> Used in the IPTunnel header\n");
@@ -181,7 +182,9 @@ int main(int argc, char **argv)
switch (opt) {
case 'i':
- ifindex = atoi(optarg);
+ ifindex = if_nametoindex(optarg);
+ if (!ifindex)
+ ifindex = atoi(optarg);
break;
case 'a':
vip.family = parse_ipstr(optarg, vip.daddr.v6);
@@ -253,6 +256,11 @@ int main(int argc, char **argv)
return 1;
}
+ if (!ifindex) {
+ fprintf(stderr, "Invalid ifname\n");
+ return 1;
+ }
+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 0f5eb0d7f2df..93eaaf7239b2 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -68,6 +68,7 @@ static int opt_queue;
static int opt_poll;
static int opt_interval = 1;
static u32 opt_xdp_bind_flags;
+static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
static __u32 prog_id;
struct xsk_umem_info {
@@ -276,6 +277,12 @@ static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
{
struct xsk_umem_info *umem;
+ struct xsk_umem_config cfg = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = opt_xsk_frame_size,
+ .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+ };
int ret;
umem = calloc(1, sizeof(*umem));
@@ -283,7 +290,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
exit_with_error(errno);
ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
- NULL);
+ &cfg);
if (ret)
exit_with_error(-ret);
@@ -323,11 +330,9 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem)
&idx);
if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
exit_with_error(-ret);
- for (i = 0;
- i < XSK_RING_PROD__DEFAULT_NUM_DESCS *
- XSK_UMEM__DEFAULT_FRAME_SIZE;
- i += XSK_UMEM__DEFAULT_FRAME_SIZE)
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = i;
+ for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
+ *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) =
+ i * opt_xsk_frame_size;
xsk_ring_prod__submit(&xsk->umem->fq,
XSK_RING_PROD__DEFAULT_NUM_DESCS);
@@ -346,6 +351,7 @@ static struct option long_options[] = {
{"interval", required_argument, 0, 'n'},
{"zero-copy", no_argument, 0, 'z'},
{"copy", no_argument, 0, 'c'},
+ {"frame-size", required_argument, 0, 'f'},
{0, 0, 0, 0}
};
@@ -365,8 +371,9 @@ static void usage(const char *prog)
" -n, --interval=n Specify statistics update interval (default 1 sec).\n"
" -z, --zero-copy Force zero-copy mode.\n"
" -c, --copy Force copy mode.\n"
+ " -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n"
"\n";
- fprintf(stderr, str, prog);
+ fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
exit(EXIT_FAILURE);
}
@@ -377,7 +384,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "Frtli:q:psSNn:cz", long_options,
+ c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options,
&option_index);
if (c == -1)
break;
@@ -420,6 +427,9 @@ static void parse_command_line(int argc, char **argv)
case 'F':
opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
+ case 'f':
+ opt_xsk_frame_size = atoi(optarg);
+ break;
default:
usage(basename(argv[0]));
}
@@ -432,6 +442,11 @@ static void parse_command_line(int argc, char **argv)
usage(basename(argv[0]));
}
+ if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) {
+ fprintf(stderr, "--frame-size=%d is not a power of two\n",
+ opt_xsk_frame_size);
+ usage(basename(argv[0]));
+ }
}
static void kick_tx(struct xsk_socket_info *xsk)
@@ -583,8 +598,7 @@ static void tx_only(struct xsk_socket_info *xsk)
for (i = 0; i < BATCH_SIZE; i++) {
xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr
- = (frame_nb + i) <<
- XSK_UMEM__DEFAULT_FRAME_SHIFT;
+ = (frame_nb + i) * opt_xsk_frame_size;
xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
sizeof(pkt_data) - 1;
}
@@ -661,21 +675,19 @@ int main(int argc, char **argv)
}
ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
- NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE);
+ NUM_FRAMES * opt_xsk_frame_size);
if (ret)
exit_with_error(ret);
/* Create sockets... */
- umem = xsk_configure_umem(bufs,
- NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE);
+ umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
xsks[num_socks++] = xsk_configure_socket(umem);
if (opt_bench == BENCH_TXONLY) {
int i;
- for (i = 0; i < NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE;
- i += XSK_UMEM__DEFAULT_FRAME_SIZE)
- (void)gen_eth_frame(umem, i);
+ for (i = 0; i < NUM_FRAMES; i++)
+ (void)gen_eth_frame(umem, i * opt_xsk_frame_size);
}
signal(SIGINT, int_exit);