summaryrefslogtreecommitdiffstats
path: root/include/uapi/linux/bpf.h
diff options
context:
space:
mode:
authorYonghong Song2017-10-05 18:19:20 +0200
committerDavid S. Miller2017-10-08 00:05:57 +0200
commit908432ca84fc229e906ba164219e9ad0fe56f755 (patch)
tree042a24e92305abbd98d761b695356d5d82760a61 /include/uapi/linux/bpf.h
parentbpf: perf event change needed for subsequent bpf helpers (diff)
downloadkernel-qcow2-linux-908432ca84fc229e906ba164219e9ad0fe56f755.tar.gz
kernel-qcow2-linux-908432ca84fc229e906ba164219e9ad0fe56f755.tar.xz
kernel-qcow2-linux-908432ca84fc229e906ba164219e9ad0fe56f755.zip
bpf: add helper bpf_perf_event_read_value for perf event array map
Hardware pmu counters are limited resources. When there are more pmu based perf events opened than available counters, kernel will multiplex these events so each event gets certain percentage (but not 100%) of the pmu time. In case that multiplexing happens, the number of samples or counter value will not reflect the case compared to no multiplexing. This makes comparison between different runs difficult. Typically, the number of samples or counter value should be normalized before comparing to other experiments. The typical normalization is done like: normalized_num_samples = num_samples * time_enabled / time_running normalized_counter_value = counter_value * time_enabled / time_running where time_enabled is the time enabled for event and time_running is the time running for event since last normalization. This patch adds helper bpf_perf_event_read_value for kprobed based perf event array map, to read perf counter and enabled/running time. The enabled/running time is accumulated since the perf event open. To achieve scaling factor between two bpf invocations, users can can use cpu_id as the key (which is typical for perf array usage model) to remember the previous value and do the calculation inside the bpf program. Signed-off-by: Yonghong Song <yhs@fb.com> Acked-by: Alexei Starovoitov <ast@fb.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/uapi/linux/bpf.h')
-rw-r--r--include/uapi/linux/bpf.h21
1 files changed, 19 insertions, 2 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6082faf5fd2a..7b57a212c7d7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -641,6 +641,14 @@ union bpf_attr {
* @xdp_md: pointer to xdp_md
* @delta: An positive/negative integer to be added to xdp_md.data_meta
* Return: 0 on success or negative on error
+ *
+ * int bpf_perf_event_read_value(map, flags, buf, buf_size)
+ * read perf event counter value and perf event enabled/running time
+ * @map: pointer to perf_event_array map
+ * @flags: index of event in the map or bitmask flags
+ * @buf: buf to fill
+ * @buf_size: size of the buf
+ * Return: 0 on success or negative error code
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -697,7 +705,8 @@ union bpf_attr {
FN(redirect_map), \
FN(sk_redirect_map), \
FN(sock_map_update), \
- FN(xdp_adjust_meta),
+ FN(xdp_adjust_meta), \
+ FN(perf_event_read_value),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -741,7 +750,9 @@ enum bpf_func_id {
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
-/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+ * BPF_FUNC_perf_event_read_value flags.
+ */
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
/* BPF_FUNC_perf_event_output for sk_buff input context. */
@@ -934,4 +945,10 @@ enum {
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+struct bpf_perf_event_value {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */