From ef53e9e14714de2ce26eaae0244c07c426064d69 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Mon, 16 Apr 2018 15:07:13 -0700 Subject: net: Remove unused tcp_set_state tracepoint This tracepoint was replaced by inet_sock_set_state in 563e0bb and not used anywhere in the kernel anymore. Remove it. Signed-off-by: Andrey Ignatov Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 47 ---------------------------------------------- 1 file changed, 47 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 878b2be7ce77..3dd68029d77a 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -166,53 +166,6 @@ DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock, TP_ARGS(sk) ); -TRACE_EVENT(tcp_set_state, - - TP_PROTO(const struct sock *sk, const int oldstate, const int newstate), - - TP_ARGS(sk, oldstate, newstate), - - TP_STRUCT__entry( - __field(const void *, skaddr) - __field(int, oldstate) - __field(int, newstate) - __field(__u16, sport) - __field(__u16, dport) - __array(__u8, saddr, 4) - __array(__u8, daddr, 4) - __array(__u8, saddr_v6, 16) - __array(__u8, daddr_v6, 16) - ), - - TP_fast_assign( - struct inet_sock *inet = inet_sk(sk); - __be32 *p32; - - __entry->skaddr = sk; - __entry->oldstate = oldstate; - __entry->newstate = newstate; - - __entry->sport = ntohs(inet->inet_sport); - __entry->dport = ntohs(inet->inet_dport); - - p32 = (__be32 *) __entry->saddr; - *p32 = inet->inet_saddr; - - p32 = (__be32 *) __entry->daddr; - *p32 = inet->inet_daddr; - - TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, - sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); - ), - - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s", - __entry->sport, __entry->dport, - __entry->saddr, __entry->daddr, - __entry->saddr_v6, __entry->daddr_v6, - show_tcp_state_name(__entry->oldstate), - show_tcp_state_name(__entry->newstate)) -); - TRACE_EVENT(tcp_retransmit_synack, TP_PROTO(const struct sock *sk, const struct request_sock *req), -- cgit v1.2.3-55-g7522 From 6163849d289be6ff2acd2fb520da303dec3219f0 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 20 Apr 2018 23:18:26 +0800 Subject: net: introduce a new tracepoint for tcp_rcv_space_adjust tcp_rcv_space_adjust is called every time data is copied to user space, introducing a tcp tracepoint for which could show us when the packet is copied to user. When a tcp packet arrives, tcp_rcv_established() will be called and with the existed tracepoint tcp_probe we could get the time when this packet arrives. Then this packet will be copied to user, and tcp_rcv_space_adjust will be called and with this new introduced tracepoint we could get the time when this packet is copied to user. With these two tracepoints, we could figure out whether the user program processes this packet immediately or there's latency. Hence in the printk message, sk_cookie is printed as a key to relate tcp_rcv_space_adjust with tcp_probe. Maybe we could export sockfd in this new tracepoint as well, then we could relate this new tracepoint with epoll/read/recv* tracepoints, and finally that could show us the whole lifespan of this packet. But we could also implement that with pid as these functions are executed in process context. Signed-off-by: Yafang Shao Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 30 +++++++++++++++++++++--------- net/ipv4/tcp_input.c | 2 ++ 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 3dd68029d77a..c1a5284713b8 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -10,6 +10,7 @@ #include #include #include +#include #define TP_STORE_V4MAPPED(__entry, saddr, daddr) \ do { \ @@ -113,7 +114,7 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset, */ DECLARE_EVENT_CLASS(tcp_event_sk, - TP_PROTO(const struct sock *sk), + TP_PROTO(struct sock *sk), TP_ARGS(sk), @@ -125,6 +126,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk, __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) + __field(__u64, sock_cookie) ), TP_fast_assign( @@ -144,24 +146,34 @@ DECLARE_EVENT_CLASS(tcp_event_sk, TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); + + __entry->sock_cookie = sock_gen_cookie(sk); ), - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx", __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, - __entry->saddr_v6, __entry->daddr_v6) + __entry->saddr_v6, __entry->daddr_v6, + __entry->sock_cookie) ); DEFINE_EVENT(tcp_event_sk, tcp_receive_reset, - TP_PROTO(const struct sock *sk), + TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock, - TP_PROTO(const struct sock *sk), + TP_PROTO(struct sock *sk), + + TP_ARGS(sk) +); + +DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust, + + TP_PROTO(struct sock *sk), TP_ARGS(sk) ); @@ -232,6 +244,7 @@ TRACE_EVENT(tcp_probe, __field(__u32, snd_wnd) __field(__u32, srtt) __field(__u32, rcv_wnd) + __field(__u64, sock_cookie) ), TP_fast_assign( @@ -256,15 +269,14 @@ TRACE_EVENT(tcp_probe, __entry->rcv_wnd = tp->rcv_wnd; __entry->ssthresh = tcp_current_ssthresh(sk); __entry->srtt = tp->srtt_us >> 3; + __entry->sock_cookie = sock_gen_cookie(sk); ), - TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x " - "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u " - "rcv_wnd=%u", + TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx", __entry->saddr, __entry->daddr, __entry->mark, __entry->length, __entry->snd_nxt, __entry->snd_una, __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, - __entry->srtt, __entry->rcv_wnd) + __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie) ); #endif /* _TRACE_TCP_H */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0396fb919b5d..5a17cfc75326 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -582,6 +582,8 @@ void tcp_rcv_space_adjust(struct sock *sk) u32 copied; int time; + trace_tcp_rcv_space_adjust(sk); + tcp_mstamp_refresh(tp); time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) -- cgit v1.2.3-55-g7522 From 4d220ed0f8140c478ab7b0a14d96821da639b646 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sat, 28 Apr 2018 19:56:37 -0700 Subject: bpf: remove tracepoints from bpf core tracepoints to bpf core were added as a way to provide introspection to bpf programs and maps, but after some time it became clear that this approach is inadequate, so prog_id, map_id and corresponding get_next_id, get_fd_by_id, get_info_by_fd, prog_query APIs were introduced and fully adopted by bpftool and other applications. The tracepoints in bpf core started to rot and causing syzbot warnings: WARNING: CPU: 0 PID: 3008 at kernel/trace/trace_event_perf.c:274 Kernel panic - not syncing: panic_on_warn set ... perf_trace_bpf_map_keyval+0x260/0xbd0 include/trace/events/bpf.h:228 trace_bpf_map_update_elem include/trace/events/bpf.h:274 [inline] map_update_elem kernel/bpf/syscall.c:597 [inline] SYSC_bpf kernel/bpf/syscall.c:1478 [inline] Hence this patch deletes tracepoints in bpf core. Reported-by: Eric Biggers Reported-by: syzbot Signed-off-by: Alexei Starovoitov Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- MAINTAINERS | 1 - include/linux/bpf_trace.h | 1 - include/trace/events/bpf.h | 355 --------------------------------------------- kernel/bpf/core.c | 6 - kernel/bpf/inode.c | 16 +- kernel/bpf/syscall.c | 15 +- 6 files changed, 2 insertions(+), 392 deletions(-) delete mode 100644 include/trace/events/bpf.h (limited to 'include/trace/events') diff --git a/MAINTAINERS b/MAINTAINERS index a52800867850..537fd17a211b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2727,7 +2727,6 @@ F: Documentation/networking/filter.txt F: Documentation/bpf/ F: include/linux/bpf* F: include/linux/filter.h -F: include/trace/events/bpf.h F: include/trace/events/xdp.h F: include/uapi/linux/bpf* F: include/uapi/linux/filter.h diff --git a/include/linux/bpf_trace.h b/include/linux/bpf_trace.h index e6fe98ae3794..ddf896abcfb6 100644 --- a/include/linux/bpf_trace.h +++ b/include/linux/bpf_trace.h @@ -2,7 +2,6 @@ #ifndef __LINUX_BPF_TRACE_H__ #define __LINUX_BPF_TRACE_H__ -#include #include #endif /* __LINUX_BPF_TRACE_H__ */ diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h deleted file mode 100644 index 150185647e6b..000000000000 --- a/include/trace/events/bpf.h +++ /dev/null @@ -1,355 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM bpf - -#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_BPF_H - -/* These are only used within the BPF_SYSCALL code */ -#ifdef CONFIG_BPF_SYSCALL - -#include -#include -#include -#include - -#define __PROG_TYPE_MAP(FN) \ - FN(SOCKET_FILTER) \ - FN(KPROBE) \ - FN(SCHED_CLS) \ - FN(SCHED_ACT) \ - FN(TRACEPOINT) \ - FN(XDP) \ - FN(PERF_EVENT) \ - FN(CGROUP_SKB) \ - FN(CGROUP_SOCK) \ - FN(LWT_IN) \ - FN(LWT_OUT) \ - FN(LWT_XMIT) - -#define __MAP_TYPE_MAP(FN) \ - FN(HASH) \ - FN(ARRAY) \ - FN(PROG_ARRAY) \ - FN(PERF_EVENT_ARRAY) \ - FN(PERCPU_HASH) \ - FN(PERCPU_ARRAY) \ - FN(STACK_TRACE) \ - FN(CGROUP_ARRAY) \ - FN(LRU_HASH) \ - FN(LRU_PERCPU_HASH) \ - FN(LPM_TRIE) - -#define __PROG_TYPE_TP_FN(x) \ - TRACE_DEFINE_ENUM(BPF_PROG_TYPE_##x); -#define __PROG_TYPE_SYM_FN(x) \ - { BPF_PROG_TYPE_##x, #x }, -#define __PROG_TYPE_SYM_TAB \ - __PROG_TYPE_MAP(__PROG_TYPE_SYM_FN) { -1, 0 } -__PROG_TYPE_MAP(__PROG_TYPE_TP_FN) - -#define __MAP_TYPE_TP_FN(x) \ - TRACE_DEFINE_ENUM(BPF_MAP_TYPE_##x); -#define __MAP_TYPE_SYM_FN(x) \ - { BPF_MAP_TYPE_##x, #x }, -#define __MAP_TYPE_SYM_TAB \ - __MAP_TYPE_MAP(__MAP_TYPE_SYM_FN) { -1, 0 } -__MAP_TYPE_MAP(__MAP_TYPE_TP_FN) - -DECLARE_EVENT_CLASS(bpf_prog_event, - - TP_PROTO(const struct bpf_prog *prg), - - TP_ARGS(prg), - - TP_STRUCT__entry( - __array(u8, prog_tag, 8) - __field(u32, type) - ), - - TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); - memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); - __entry->type = prg->type; - ), - - TP_printk("prog=%s type=%s", - __print_hex_str(__entry->prog_tag, 8), - __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB)) -); - -DEFINE_EVENT(bpf_prog_event, bpf_prog_get_type, - - TP_PROTO(const struct bpf_prog *prg), - - TP_ARGS(prg) -); - -DEFINE_EVENT(bpf_prog_event, bpf_prog_put_rcu, - - TP_PROTO(const struct bpf_prog *prg), - - TP_ARGS(prg) -); - -TRACE_EVENT(bpf_prog_load, - - TP_PROTO(const struct bpf_prog *prg, int ufd), - - TP_ARGS(prg, ufd), - - TP_STRUCT__entry( - __array(u8, prog_tag, 8) - __field(u32, type) - __field(int, ufd) - ), - - TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); - memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); - __entry->type = prg->type; - __entry->ufd = ufd; - ), - - TP_printk("prog=%s type=%s ufd=%d", - __print_hex_str(__entry->prog_tag, 8), - __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB), - __entry->ufd) -); - -TRACE_EVENT(bpf_map_create, - - TP_PROTO(const struct bpf_map *map, int ufd), - - TP_ARGS(map, ufd), - - TP_STRUCT__entry( - __field(u32, type) - __field(u32, size_key) - __field(u32, size_value) - __field(u32, max_entries) - __field(u32, flags) - __field(int, ufd) - ), - - TP_fast_assign( - __entry->type = map->map_type; - __entry->size_key = map->key_size; - __entry->size_value = map->value_size; - __entry->max_entries = map->max_entries; - __entry->flags = map->map_flags; - __entry->ufd = ufd; - ), - - TP_printk("map type=%s ufd=%d key=%u val=%u max=%u flags=%x", - __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), - __entry->ufd, __entry->size_key, __entry->size_value, - __entry->max_entries, __entry->flags) -); - -DECLARE_EVENT_CLASS(bpf_obj_prog, - - TP_PROTO(const struct bpf_prog *prg, int ufd, - const struct filename *pname), - - TP_ARGS(prg, ufd, pname), - - TP_STRUCT__entry( - __array(u8, prog_tag, 8) - __field(int, ufd) - __string(path, pname->name) - ), - - TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); - memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); - __assign_str(path, pname->name); - __entry->ufd = ufd; - ), - - TP_printk("prog=%s path=%s ufd=%d", - __print_hex_str(__entry->prog_tag, 8), - __get_str(path), __entry->ufd) -); - -DEFINE_EVENT(bpf_obj_prog, bpf_obj_pin_prog, - - TP_PROTO(const struct bpf_prog *prg, int ufd, - const struct filename *pname), - - TP_ARGS(prg, ufd, pname) -); - -DEFINE_EVENT(bpf_obj_prog, bpf_obj_get_prog, - - TP_PROTO(const struct bpf_prog *prg, int ufd, - const struct filename *pname), - - TP_ARGS(prg, ufd, pname) -); - -DECLARE_EVENT_CLASS(bpf_obj_map, - - TP_PROTO(const struct bpf_map *map, int ufd, - const struct filename *pname), - - TP_ARGS(map, ufd, pname), - - TP_STRUCT__entry( - __field(u32, type) - __field(int, ufd) - __string(path, pname->name) - ), - - TP_fast_assign( - __assign_str(path, pname->name); - __entry->type = map->map_type; - __entry->ufd = ufd; - ), - - TP_printk("map type=%s ufd=%d path=%s", - __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), - __entry->ufd, __get_str(path)) -); - -DEFINE_EVENT(bpf_obj_map, bpf_obj_pin_map, - - TP_PROTO(const struct bpf_map *map, int ufd, - const struct filename *pname), - - TP_ARGS(map, ufd, pname) -); - -DEFINE_EVENT(bpf_obj_map, bpf_obj_get_map, - - TP_PROTO(const struct bpf_map *map, int ufd, - const struct filename *pname), - - TP_ARGS(map, ufd, pname) -); - -DECLARE_EVENT_CLASS(bpf_map_keyval, - - TP_PROTO(const struct bpf_map *map, int ufd, - const void *key, const void *val), - - TP_ARGS(map, ufd, key, val), - - TP_STRUCT__entry( - __field(u32, type) - __field(u32, key_len) - __dynamic_array(u8, key, map->key_size) - __field(bool, key_trunc) - __field(u32, val_len) - __dynamic_array(u8, val, map->value_size) - __field(bool, val_trunc) - __field(int, ufd) - ), - - TP_fast_assign( - memcpy(__get_dynamic_array(key), key, map->key_size); - memcpy(__get_dynamic_array(val), val, map->value_size); - __entry->type = map->map_type; - __entry->key_len = min(map->key_size, 16U); - __entry->key_trunc = map->key_size != __entry->key_len; - __entry->val_len = min(map->value_size, 16U); - __entry->val_trunc = map->value_size != __entry->val_len; - __entry->ufd = ufd; - ), - - TP_printk("map type=%s ufd=%d key=[%s%s] val=[%s%s]", - __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), - __entry->ufd, - __print_hex(__get_dynamic_array(key), __entry->key_len), - __entry->key_trunc ? " ..." : "", - __print_hex(__get_dynamic_array(val), __entry->val_len), - __entry->val_trunc ? " ..." : "") -); - -DEFINE_EVENT(bpf_map_keyval, bpf_map_lookup_elem, - - TP_PROTO(const struct bpf_map *map, int ufd, - const void *key, const void *val), - - TP_ARGS(map, ufd, key, val) -); - -DEFINE_EVENT(bpf_map_keyval, bpf_map_update_elem, - - TP_PROTO(const struct bpf_map *map, int ufd, - const void *key, const void *val), - - TP_ARGS(map, ufd, key, val) -); - -TRACE_EVENT(bpf_map_delete_elem, - - TP_PROTO(const struct bpf_map *map, int ufd, - const void *key), - - TP_ARGS(map, ufd, key), - - TP_STRUCT__entry( - __field(u32, type) - __field(u32, key_len) - __dynamic_array(u8, key, map->key_size) - __field(bool, key_trunc) - __field(int, ufd) - ), - - TP_fast_assign( - memcpy(__get_dynamic_array(key), key, map->key_size); - __entry->type = map->map_type; - __entry->key_len = min(map->key_size, 16U); - __entry->key_trunc = map->key_size != __entry->key_len; - __entry->ufd = ufd; - ), - - TP_printk("map type=%s ufd=%d key=[%s%s]", - __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), - __entry->ufd, - __print_hex(__get_dynamic_array(key), __entry->key_len), - __entry->key_trunc ? " ..." : "") -); - -TRACE_EVENT(bpf_map_next_key, - - TP_PROTO(const struct bpf_map *map, int ufd, - const void *key, const void *key_next), - - TP_ARGS(map, ufd, key, key_next), - - TP_STRUCT__entry( - __field(u32, type) - __field(u32, key_len) - __dynamic_array(u8, key, map->key_size) - __dynamic_array(u8, nxt, map->key_size) - __field(bool, key_trunc) - __field(bool, key_null) - __field(int, ufd) - ), - - TP_fast_assign( - if (key) - memcpy(__get_dynamic_array(key), key, map->key_size); - __entry->key_null = !key; - memcpy(__get_dynamic_array(nxt), key_next, map->key_size); - __entry->type = map->map_type; - __entry->key_len = min(map->key_size, 16U); - __entry->key_trunc = map->key_size != __entry->key_len; - __entry->ufd = ufd; - ), - - TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]", - __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), - __entry->ufd, - __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key), - __entry->key_len), - __entry->key_trunc && !__entry->key_null ? " ..." : "", - __print_hex(__get_dynamic_array(nxt), __entry->key_len), - __entry->key_trunc ? " ..." : "") -); -#endif /* CONFIG_BPF_SYSCALL */ -#endif /* _TRACE_BPF_H */ - -#include diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9349a5db3cf2..90feeba3a1a1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1845,9 +1845,3 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, #include EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); - -/* These are only used within the BPF_SYSCALL code */ -#ifdef CONFIG_BPF_SYSCALL -EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type); -EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu); -#endif diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index a41343009ccc..ed13645bd80c 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -429,13 +429,6 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname) ret = bpf_obj_do_pin(pname, raw, type); if (ret != 0) bpf_any_put(raw, type); - if ((trace_bpf_obj_pin_prog_enabled() || - trace_bpf_obj_pin_map_enabled()) && !ret) { - if (type == BPF_TYPE_PROG) - trace_bpf_obj_pin_prog(raw, ufd, pname); - if (type == BPF_TYPE_MAP) - trace_bpf_obj_pin_map(raw, ufd, pname); - } out: putname(pname); return ret; @@ -502,15 +495,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags) else goto out; - if (ret < 0) { + if (ret < 0) bpf_any_put(raw, type); - } else if (trace_bpf_obj_get_prog_enabled() || - trace_bpf_obj_get_map_enabled()) { - if (type == BPF_TYPE_PROG) - trace_bpf_obj_get_prog(raw, ret, pname); - if (type == BPF_TYPE_MAP) - trace_bpf_obj_get_map(raw, ret, pname); - } out: putname(pname); return ret; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0bd2944eafb9..263e13ede029 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -503,7 +503,6 @@ static int map_create(union bpf_attr *attr) return err; } - trace_bpf_map_create(map, err); return err; free_map: @@ -663,7 +662,6 @@ static int map_lookup_elem(union bpf_attr *attr) if (copy_to_user(uvalue, value, value_size) != 0) goto free_value; - trace_bpf_map_lookup_elem(map, ufd, key, value); err = 0; free_value: @@ -760,8 +758,6 @@ static int map_update_elem(union bpf_attr *attr) __this_cpu_dec(bpf_prog_active); preempt_enable(); out: - if (!err) - trace_bpf_map_update_elem(map, ufd, key, value); free_value: kfree(value); free_key: @@ -814,8 +810,6 @@ static int map_delete_elem(union bpf_attr *attr) __this_cpu_dec(bpf_prog_active); preempt_enable(); out: - if (!err) - trace_bpf_map_delete_elem(map, ufd, key); kfree(key); err_put: fdput(f); @@ -879,7 +873,6 @@ out: if (copy_to_user(unext_key, next_key, map->key_size) != 0) goto free_next_key; - trace_bpf_map_next_key(map, ufd, key, next_key); err = 0; free_next_key: @@ -1027,7 +1020,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) if (atomic_dec_and_test(&prog->aux->refcnt)) { int i; - trace_bpf_prog_put_rcu(prog); /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); @@ -1194,11 +1186,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd) struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv) { - struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv); - - if (!IS_ERR(prog)) - trace_bpf_prog_get_type(prog); - return prog; + return __bpf_prog_get(ufd, &type, attach_drv); } EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); @@ -1373,7 +1361,6 @@ static int bpf_prog_load(union bpf_attr *attr) } bpf_prog_kallsyms_add(prog); - trace_bpf_prog_load(prog, err); return err; free_used_maps: -- cgit v1.2.3-55-g7522 From d4bea421f7322400d804c2284739e42e61f78349 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 May 2018 20:34:24 -0700 Subject: net/ipv6: Update fib6 tracepoint to take fib6_info Similar to IPv4, IPv6 should use the FIB lookup result in the tracepoint. Signed-off-by: David Ahern Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/trace/events/fib6.h | 14 +++++++------- net/ipv6/route.c | 14 ++++++-------- 2 files changed, 13 insertions(+), 15 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index 7e8d48a81b91..1b8d951e3c12 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -12,10 +12,10 @@ TRACE_EVENT(fib6_table_lookup, - TP_PROTO(const struct net *net, const struct rt6_info *rt, + TP_PROTO(const struct net *net, const struct fib6_info *f6i, struct fib6_table *table, const struct flowi6 *flp), - TP_ARGS(net, rt, table, flp), + TP_ARGS(net, f6i, table, flp), TP_STRUCT__entry( __field( u32, tb_id ) @@ -48,20 +48,20 @@ TRACE_EVENT(fib6_table_lookup, in6 = (struct in6_addr *)__entry->dst; *in6 = flp->daddr; - if (rt->rt6i_idev) { - __assign_str(name, rt->rt6i_idev->dev->name); + if (f6i->fib6_nh.nh_dev) { + __assign_str(name, f6i->fib6_nh.nh_dev); } else { __assign_str(name, ""); } - if (rt == net->ipv6.ip6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { struct in6_addr in6_zero = {}; in6 = (struct in6_addr *)__entry->gw; *in6 = in6_zero; - } else if (rt) { + } else if (f6i) { in6 = (struct in6_addr *)__entry->gw; - *in6 = rt->rt6i_gateway; + *in6 = f6i->fib6_nh.nh_gw; } ), diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 019d8ba9021e..73f9c29a5878 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1078,6 +1078,8 @@ restart: goto restart; } + trace_fib6_table_lookup(net, f6i, table, fl6); + /* Search through exception table */ rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); if (rt) { @@ -1096,8 +1098,6 @@ restart: rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table, fl6); - return rt; } @@ -1827,6 +1827,8 @@ redo_rt6_select: } } + trace_fib6_table_lookup(net, f6i, table, fl6); + return f6i; } @@ -1853,7 +1855,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, rt = net->ipv6.ip6_null_entry; rcu_read_unlock(); dst_hold(&rt->dst); - trace_fib6_table_lookup(net, rt, table, fl6); return rt; } @@ -1864,7 +1865,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, dst_use_noref(&rt->dst, jiffies); rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && !(f6i->fib6_flags & RTF_GATEWAY))) { @@ -1890,9 +1890,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, dst_hold(&uncached_rt->dst); } - trace_fib6_table_lookup(net, uncached_rt, table, fl6); return uncached_rt; - } else { /* Get a percpu copy */ @@ -1906,7 +1904,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, local_bh_enable(); rcu_read_unlock(); - trace_fib6_table_lookup(net, pcpu_rt, table, fl6); + return pcpu_rt; } } @@ -2491,7 +2489,7 @@ out: rcu_read_unlock(); - trace_fib6_table_lookup(net, ret, table, fl6); + trace_fib6_table_lookup(net, rt, table, fl6); return ret; }; -- cgit v1.2.3-55-g7522 From 67f29e07e131ffa13ea158c259a513f474c7df27 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 24 May 2018 16:45:46 +0200 Subject: bpf: devmap introduce dev_map_enqueue Functionality is the same, but the ndo_xdp_xmit call is now simply invoked from inside the devmap.c code. V2: Fix compile issue reported by kbuild test robot V5: Cleanups requested by Daniel - Newlines before func definition - Use BUILD_BUG_ON checks - Remove unnecessary use return value store in dev_map_enqueue Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 16 +++++++++++++--- include/trace/events/xdp.h | 9 ++++++++- kernel/bpf/devmap.c | 34 ++++++++++++++++++++++++++++------ net/core/filter.c | 15 ++------------- 4 files changed, 51 insertions(+), 23 deletions(-) (limited to 'include/trace/events') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1795eeee846c..23a809da452d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -487,14 +487,16 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); /* Map specifics */ -struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); +struct xdp_buff; + +struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); void __cpu_map_flush(struct bpf_map *map); -struct xdp_buff; int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -573,6 +575,15 @@ static inline void __dev_map_flush(struct bpf_map *map) { } +struct xdp_buff; +struct bpf_dtab_netdev; + +static inline +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) +{ + return 0; +} + static inline struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) { @@ -587,7 +598,6 @@ static inline void __cpu_map_flush(struct bpf_map *map) { } -struct xdp_buff; static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx) diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 8989a92c571a..96104610d40e 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, __entry->map_id, __entry->map_index) ); +#ifndef __DEVMAP_OBJ_TYPE +#define __DEVMAP_OBJ_TYPE +struct _bpf_dtab_netdev { + struct net_device *dev; +}; +#endif /* __DEVMAP_OBJ_TYPE */ + #define devmap_ifindex(fwd, map) \ (!fwd ? 0 : \ (!map ? 0 : \ ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ - ((struct net_device *)fwd)->ifindex : 0))) + ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0))) #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 565f9ece9115..06c400e7e4ff 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -48,13 +48,15 @@ * calls will fail at this point. */ #include +#include #include +#include #define DEV_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) struct bpf_dtab_netdev { - struct net_device *dev; + struct net_device *dev; /* must be first member, due to tracepoint */ struct bpf_dtab *dtab; unsigned int bit; struct rcu_head rcu; @@ -240,21 +242,38 @@ void __dev_map_flush(struct bpf_map *map) * update happens in parallel here a dev_put wont happen until after reading the * ifindex. */ -struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) +struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - struct bpf_dtab_netdev *dev; + struct bpf_dtab_netdev *obj; if (key >= map->max_entries) return NULL; - dev = READ_ONCE(dtab->netdev_map[key]); - return dev ? dev->dev : NULL; + obj = READ_ONCE(dtab->netdev_map[key]); + return obj; +} + +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) +{ + struct net_device *dev = dst->dev; + struct xdp_frame *xdpf; + + if (!dev->netdev_ops->ndo_xdp_xmit) + return -EOPNOTSUPP; + + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) + return -EOVERFLOW; + + /* TODO: implement a bulking/enqueue step later */ + return dev->netdev_ops->ndo_xdp_xmit(dev, xdpf); } static void *dev_map_lookup_elem(struct bpf_map *map, void *key) { - struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key); + struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key); + struct net_device *dev = dev = obj ? obj->dev : NULL; return dev ? &dev->ifindex : NULL; } @@ -405,6 +424,9 @@ static struct notifier_block dev_map_notifier = { static int __init dev_map_init(void) { + /* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */ + BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) != + offsetof(struct _bpf_dtab_netdev, dev)); register_netdevice_notifier(&dev_map_notifier); return 0; } diff --git a/net/core/filter.c b/net/core/filter.c index aa114c4acb25..c867106d3707 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3065,20 +3065,9 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: { - struct net_device *dev = fwd; - struct xdp_frame *xdpf; + struct bpf_dtab_netdev *dst = fwd; - if (!dev->netdev_ops->ndo_xdp_xmit) - return -EOPNOTSUPP; - - xdpf = convert_to_xdp_frame(xdp); - if (unlikely(!xdpf)) - return -EOVERFLOW; - - /* TODO: move to inside map code instead, for bulk support - * err = dev_map_enqueue(dev, xdp); - */ - err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf); + err = dev_map_enqueue(dst, xdp); if (err) return err; __dev_map_insert_ctx(map, index); -- cgit v1.2.3-55-g7522 From 38edddb81172e8b8decb057c0cd23271583a5fa0 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 24 May 2018 16:45:57 +0200 Subject: xdp: add tracepoint for devmap like cpumap have Notice how this allow us get XDP statistic without affecting the XDP performance, as tracepoint is no-longer activated on a per packet basis. V5: Spotted by John Fastabend. Fix 'sent' also counted 'drops' in this patch, a later patch corrected this, but it was a mistake in this intermediate step. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++-- include/trace/events/xdp.h | 39 +++++++++++++++++++++++++++++++++++++++ kernel/bpf/devmap.c | 27 +++++++++++++++++++++++---- net/core/filter.c | 2 +- 4 files changed, 67 insertions(+), 7 deletions(-) (limited to 'include/trace/events') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 23a809da452d..bbe297436e5d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -492,7 +492,8 @@ struct xdp_buff; struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp); +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, + struct net_device *dev_rx); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); @@ -579,7 +580,8 @@ struct xdp_buff; struct bpf_dtab_netdev; static inline -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, + struct net_device *dev_rx) { return 0; } diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 96104610d40e..2e9ef0650144 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -229,6 +229,45 @@ TRACE_EVENT(xdp_cpumap_enqueue, __entry->to_cpu) ); +TRACE_EVENT(xdp_devmap_xmit, + + TP_PROTO(const struct bpf_map *map, u32 map_index, + int sent, int drops, + const struct net_device *from_dev, + const struct net_device *to_dev), + + TP_ARGS(map, map_index, sent, drops, from_dev, to_dev), + + TP_STRUCT__entry( + __field(int, map_id) + __field(u32, act) + __field(u32, map_index) + __field(int, drops) + __field(int, sent) + __field(int, from_ifindex) + __field(int, to_ifindex) + ), + + TP_fast_assign( + __entry->map_id = map->id; + __entry->act = XDP_REDIRECT; + __entry->map_index = map_index; + __entry->drops = drops; + __entry->sent = sent; + __entry->from_ifindex = from_dev->ifindex; + __entry->to_ifindex = to_dev->ifindex; + ), + + TP_printk("ndo_xdp_xmit" + " map_id=%d map_index=%d action=%s" + " sent=%d drops=%d" + " from_ifindex=%d to_ifindex=%d", + __entry->map_id, __entry->map_index, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->sent, __entry->drops, + __entry->from_ifindex, __entry->to_ifindex) +); + #endif /* _TRACE_XDP_H */ #include diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 15293b9dfb77..ff2f3bf59f2f 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -58,6 +58,7 @@ #define DEV_MAP_BULK_SIZE 16 struct xdp_bulk_queue { struct xdp_frame *q[DEV_MAP_BULK_SIZE]; + struct net_device *dev_rx; unsigned int count; }; @@ -219,6 +220,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj, struct xdp_bulk_queue *bq) { struct net_device *dev = obj->dev; + int sent = 0, drops = 0; int i; if (unlikely(!bq->count)) @@ -235,11 +237,18 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj, int err; err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf); - if (err) + if (err) { + drops++; xdp_return_frame(xdpf); + } else { + sent++; + } } bq->count = 0; + trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit, + sent, drops, bq->dev_rx, dev); + bq->dev_rx = NULL; return 0; } @@ -296,18 +305,28 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key) /* Runs under RCU-read-side, plus in softirq under NAPI protection. * Thus, safe percpu variable access. */ -static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf) +static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf, + struct net_device *dev_rx) + { struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) bq_xmit_all(obj, bq); + /* Ingress dev_rx will be the same for all xdp_frame's in + * bulk_queue, because bq stored per-CPU and must be flushed + * from net_device drivers NAPI func end. + */ + if (!bq->dev_rx) + bq->dev_rx = dev_rx; + bq->q[bq->count++] = xdpf; return 0; } -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, + struct net_device *dev_rx) { struct net_device *dev = dst->dev; struct xdp_frame *xdpf; @@ -319,7 +338,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) if (unlikely(!xdpf)) return -EOVERFLOW; - return bq_enqueue(dst, xdpf); + return bq_enqueue(dst, xdpf, dev_rx); } static void *dev_map_lookup_elem(struct bpf_map *map, void *key) diff --git a/net/core/filter.c b/net/core/filter.c index c867106d3707..36cf2f87d742 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3067,7 +3067,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, case BPF_MAP_TYPE_DEVMAP: { struct bpf_dtab_netdev *dst = fwd; - err = dev_map_enqueue(dst, xdp); + err = dev_map_enqueue(dst, xdp, dev_rx); if (err) return err; __dev_map_insert_ctx(map, index); -- cgit v1.2.3-55-g7522 From e74de52e55c092e7113f839e74400ce9dbe12ceb Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 24 May 2018 16:46:17 +0200 Subject: xdp/trace: extend tracepoint in devmap with an err Extending tracepoint xdp:xdp_devmap_xmit in devmap with an err code allow people to easier identify the reason behind the ndo_xdp_xmit call to a given driver is failing. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- include/trace/events/xdp.h | 10 ++++++---- kernel/bpf/devmap.c | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 2e9ef0650144..1ecf4c67fcf7 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -234,9 +234,9 @@ TRACE_EVENT(xdp_devmap_xmit, TP_PROTO(const struct bpf_map *map, u32 map_index, int sent, int drops, const struct net_device *from_dev, - const struct net_device *to_dev), + const struct net_device *to_dev, int err), - TP_ARGS(map, map_index, sent, drops, from_dev, to_dev), + TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err), TP_STRUCT__entry( __field(int, map_id) @@ -246,6 +246,7 @@ TRACE_EVENT(xdp_devmap_xmit, __field(int, sent) __field(int, from_ifindex) __field(int, to_ifindex) + __field(int, err) ), TP_fast_assign( @@ -256,16 +257,17 @@ TRACE_EVENT(xdp_devmap_xmit, __entry->sent = sent; __entry->from_ifindex = from_dev->ifindex; __entry->to_ifindex = to_dev->ifindex; + __entry->err = err; ), TP_printk("ndo_xdp_xmit" " map_id=%d map_index=%d action=%s" " sent=%d drops=%d" - " from_ifindex=%d to_ifindex=%d", + " from_ifindex=%d to_ifindex=%d err=%d", __entry->map_id, __entry->map_index, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->sent, __entry->drops, - __entry->from_ifindex, __entry->to_ifindex) + __entry->from_ifindex, __entry->to_ifindex, __entry->err) ); #endif /* _TRACE_XDP_H */ diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 77908311ec98..ae16d0c373ef 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -220,7 +220,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj, struct xdp_bulk_queue *bq) { struct net_device *dev = obj->dev; - int sent = 0, drops = 0; + int sent = 0, drops = 0, err = 0; int i; if (unlikely(!bq->count)) @@ -234,6 +234,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj, sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q); if (sent < 0) { + err = sent; sent = 0; goto error; } @@ -242,7 +243,7 @@ out: bq->count = 0; trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit, - sent, drops, bq->dev_rx, dev); + sent, drops, bq->dev_rx, dev, err); bq->dev_rx = NULL; return 0; error: -- cgit v1.2.3-55-g7522 From 9f323973c915d402378cb3e1336dd6ed4c45144b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 23 May 2018 17:08:47 -0700 Subject: net/ipv4: Udate fib_table_lookup tracepoint Commit 4a2d73a4fb36 ("ipv4: fib_rules: support match on sport, dport and ip proto") added support for protocol and ports to FIB rules. Update the FIB lookup tracepoint to dump the parameters. In addition, make the IPv4 tracepoint similar to the IPv6 one where the lookup parameters and result are dumped in 1 event. It is much easier to use and understand the outcome of the lookup. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/trace/events/fib.h | 72 +++++++++++++++++++++++++++------------------- net/ipv4/fib_trie.c | 14 +++++---- 2 files changed, 52 insertions(+), 34 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 81b7e985bb45..f5a1d4c518d8 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -12,12 +12,14 @@ TRACE_EVENT(fib_table_lookup, - TP_PROTO(u32 tb_id, const struct flowi4 *flp), + TP_PROTO(u32 tb_id, const struct flowi4 *flp, + const struct fib_nh *nh, int err), - TP_ARGS(tb_id, flp), + TP_ARGS(tb_id, flp, nh, err), TP_STRUCT__entry( __field( u32, tb_id ) + __field( int, err ) __field( int, oif ) __field( int, iif ) __field( __u8, tos ) @@ -25,12 +27,19 @@ TRACE_EVENT(fib_table_lookup, __field( __u8, flags ) __array( __u8, src, 4 ) __array( __u8, dst, 4 ) + __array( __u8, gw, 4 ) + __array( __u8, saddr, 4 ) + __field( u16, sport ) + __field( u16, dport ) + __field( u8, proto ) + __dynamic_array(char, name, IFNAMSIZ ) ), TP_fast_assign( __be32 *p32; __entry->tb_id = tb_id; + __entry->err = err; __entry->oif = flp->flowi4_oif; __entry->iif = flp->flowi4_iif; __entry->tos = flp->flowi4_tos; @@ -42,36 +51,41 @@ TRACE_EVENT(fib_table_lookup, p32 = (__be32 *) __entry->dst; *p32 = flp->daddr; - ), - - TP_printk("table %u oif %d iif %d src %pI4 dst %pI4 tos %d scope %d flags %x", - __entry->tb_id, __entry->oif, __entry->iif, - __entry->src, __entry->dst, __entry->tos, __entry->scope, - __entry->flags) -); - -TRACE_EVENT(fib_table_lookup_nh, - - TP_PROTO(const struct fib_nh *nh), - - TP_ARGS(nh), - - TP_STRUCT__entry( - __string( name, nh->nh_dev->name) - __field( int, oif ) - __array( __u8, src, 4 ) - ), - - TP_fast_assign( - __be32 *p32 = (__be32 *) __entry->src; - __assign_str(name, nh->nh_dev ? nh->nh_dev->name : "not set"); - __entry->oif = nh->nh_oif; - *p32 = nh->nh_saddr; + __entry->proto = flp->flowi4_proto; + if (__entry->proto == IPPROTO_TCP || + __entry->proto == IPPROTO_UDP) { + __entry->sport = ntohs(flp->fl4_sport); + __entry->dport = ntohs(flp->fl4_dport); + } else { + __entry->sport = 0; + __entry->dport = 0; + } + + if (nh) { + p32 = (__be32 *) __entry->saddr; + *p32 = nh->nh_saddr; + + p32 = (__be32 *) __entry->gw; + *p32 = nh->nh_gw; + + __assign_str(name, nh->nh_dev ? nh->nh_dev->name : "-"); + } else { + p32 = (__be32 *) __entry->saddr; + *p32 = 0; + + p32 = (__be32 *) __entry->gw; + *p32 = 0; + + __assign_str(name, "-"); + } ), - TP_printk("nexthop dev %s oif %d src %pI4", - __get_str(name), __entry->oif, __entry->src) + TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4 src %pI4 err %d", + __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, + __entry->src, __entry->sport, __entry->dst, __entry->dport, + __entry->tos, __entry->scope, __entry->flags, + __get_str(name), __entry->gw, __entry->saddr, __entry->err) ); TRACE_EVENT(fib_validate_source, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3dcffd3ce98c..65c340f230ae 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1326,14 +1326,14 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, unsigned long index; t_key cindex; - trace_fib_table_lookup(tb->tb_id, flp); - pn = t->kv; cindex = 0; n = get_child_rcu(pn, cindex); - if (!n) + if (!n) { + trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN); return -EAGAIN; + } #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->gets); @@ -1416,8 +1416,11 @@ backtrace: * nothing for us to do as we do not have any * further nodes to parse. */ - if (IS_TRIE(pn)) + if (IS_TRIE(pn)) { + trace_fib_table_lookup(tb->tb_id, flp, + NULL, -EAGAIN); return -EAGAIN; + } #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->backtrack); #endif @@ -1459,6 +1462,7 @@ found: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif + trace_fib_table_lookup(tb->tb_id, flp, NULL, err); return err; } if (fi->fib_flags & RTNH_F_DEAD) @@ -1494,7 +1498,7 @@ found: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif - trace_fib_table_lookup_nh(nh); + trace_fib_table_lookup(tb->tb_id, flp, nh, err); return err; } -- cgit v1.2.3-55-g7522 From 30d444d30049490398178ca4337ab49156571886 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 23 May 2018 17:08:48 -0700 Subject: net/ipv6: Udate fib6_table_lookup tracepoint Commit bb0ad1987e96 ("ipv6: fib6_rules: support for match on sport, dport and ip proto") added support for protocol and ports to FIB rules. Update the FIB lookup tracepoint to dump the parameters. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/trace/events/fib6.h | 29 ++++++++++++++++++++++------- net/core/net-traces.c | 4 ---- net/ipv6/route.c | 9 +++++++-- 3 files changed, 29 insertions(+), 13 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index 1b8d951e3c12..b088b54d699c 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -19,7 +19,7 @@ TRACE_EVENT(fib6_table_lookup, TP_STRUCT__entry( __field( u32, tb_id ) - + __field( int, err ) __field( int, oif ) __field( int, iif ) __field( __u8, tos ) @@ -27,7 +27,10 @@ TRACE_EVENT(fib6_table_lookup, __field( __u8, flags ) __array( __u8, src, 16 ) __array( __u8, dst, 16 ) - + __field( u16, sport ) + __field( u16, dport ) + __field( u8, proto ) + __field( u8, rt_type ) __dynamic_array( char, name, IFNAMSIZ ) __array( __u8, gw, 16 ) ), @@ -36,6 +39,7 @@ TRACE_EVENT(fib6_table_lookup, struct in6_addr *in6; __entry->tb_id = table->tb6_id; + __entry->err = ip6_rt_type_to_error(f6i->fib6_type); __entry->oif = flp->flowi6_oif; __entry->iif = flp->flowi6_iif; __entry->tos = ip6_tclass(flp->flowlabel); @@ -48,10 +52,20 @@ TRACE_EVENT(fib6_table_lookup, in6 = (struct in6_addr *)__entry->dst; *in6 = flp->daddr; + __entry->proto = flp->flowi6_proto; + if (__entry->proto == IPPROTO_TCP || + __entry->proto == IPPROTO_UDP) { + __entry->sport = ntohs(flp->fl6_sport); + __entry->dport = ntohs(flp->fl6_dport); + } else { + __entry->sport = 0; + __entry->dport = 0; + } + if (f6i->fib6_nh.nh_dev) { __assign_str(name, f6i->fib6_nh.nh_dev); } else { - __assign_str(name, ""); + __assign_str(name, "-"); } if (f6i == net->ipv6.fib6_null_entry) { struct in6_addr in6_zero = {}; @@ -65,10 +79,11 @@ TRACE_EVENT(fib6_table_lookup, } ), - TP_printk("table %3u oif %d iif %d src %pI6c dst %pI6c tos %d scope %d flags %x ==> dev %s gw %pI6c", - __entry->tb_id, __entry->oif, __entry->iif, - __entry->src, __entry->dst, __entry->tos, __entry->scope, - __entry->flags, __get_str(name), __entry->gw) + TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u tos %d scope %d flags %x ==> dev %s gw %pI6c err %d", + __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, + __entry->src, __entry->sport, __entry->dst, __entry->dport, + __entry->tos, __entry->scope, __entry->flags, + __get_str(name), __entry->gw, __entry->err) ); #endif /* _TRACE_FIB6_H */ diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 380934580fa1..419af6dfe29f 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -35,10 +35,6 @@ #include #include #include -#if IS_ENABLED(CONFIG_IPV6) -#include -EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); -#endif #if IS_ENABLED(CONFIG_BRIDGE) #include EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0a35ded448a6..22c4de2317d0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -64,14 +64,19 @@ #include #include #include -#include - #include #ifdef CONFIG_SYSCTL #include #endif +static int ip6_rt_type_to_error(u8 fib6_type); + +#define CREATE_TRACE_POINTS +#include +EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); +#undef CREATE_TRACE_POINTS + enum rt6_nud_state { RT6_NUD_FAIL_HARD = -3, RT6_NUD_FAIL_PROBE = -2, -- cgit v1.2.3-55-g7522 From c949cbbbe5d6ff3dcacf82e8fd26f627285e8a12 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 23 May 2018 17:08:49 -0700 Subject: net/ipv4: Remove tracepoint in fib_validate_source Tracepoint does not add value and the call to fib_lookup follows it which shows the same information and the fib lookup result. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/trace/events/fib.h | 35 ----------------------------------- net/ipv4/fib_frontend.c | 2 -- 2 files changed, 37 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index f5a1d4c518d8..9763cddd0594 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -87,41 +87,6 @@ TRACE_EVENT(fib_table_lookup, __entry->tos, __entry->scope, __entry->flags, __get_str(name), __entry->gw, __entry->saddr, __entry->err) ); - -TRACE_EVENT(fib_validate_source, - - TP_PROTO(const struct net_device *dev, const struct flowi4 *flp), - - TP_ARGS(dev, flp), - - TP_STRUCT__entry( - __string( name, dev->name ) - __field( int, oif ) - __field( int, iif ) - __field( __u8, tos ) - __array( __u8, src, 4 ) - __array( __u8, dst, 4 ) - ), - - TP_fast_assign( - __be32 *p32; - - __assign_str(name, dev ? dev->name : "not set"); - __entry->oif = flp->flowi4_oif; - __entry->iif = flp->flowi4_iif; - __entry->tos = flp->flowi4_tos; - - p32 = (__be32 *) __entry->src; - *p32 = flp->saddr; - - p32 = (__be32 *) __entry->dst; - *p32 = flp->daddr; - ), - - TP_printk("dev %s oif %d iif %d tos %d src %pI4 dst %pI4", - __get_str(name), __entry->oif, __entry->iif, __entry->tos, - __entry->src, __entry->dst) -); #endif /* _TRACE_FIB_H */ /* This part must be outside protection */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 897ae92dff0f..045c43a27c12 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -354,8 +354,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.fl4_dport = 0; } - trace_fib_validate_source(dev, &fl4); - if (fib_lookup(net, &fl4, &res, 0)) goto last_resort; if (res.type != RTN_UNICAST && -- cgit v1.2.3-55-g7522 From 2d68c0745a0e8349ff71b04af5ee020d40f78d90 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 25 May 2018 18:14:05 +0800 Subject: tcp: use data length instead of skb->len in tcp_probe skb->len is meaningless to user. data length could be more helpful, with which we can easily filter out the packet without payload. Signed-off-by: Yafang Shao Acked-by: Song Liu Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index c1a5284713b8..703abb6e11fa 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -236,7 +236,7 @@ TRACE_EVENT(tcp_probe, __field(__u16, sport) __field(__u16, dport) __field(__u32, mark) - __field(__u16, length) + __field(__u16, data_len) __field(__u32, snd_nxt) __field(__u32, snd_una) __field(__u32, snd_cwnd) @@ -261,7 +261,7 @@ TRACE_EVENT(tcp_probe, __entry->dport = ntohs(inet->inet_dport); __entry->mark = skb->mark; - __entry->length = skb->len; + __entry->data_len = skb->len - tcp_hdrlen(skb); __entry->snd_nxt = tp->snd_nxt; __entry->snd_una = tp->snd_una; __entry->snd_cwnd = tp->snd_cwnd; @@ -272,9 +272,9 @@ TRACE_EVENT(tcp_probe, __entry->sock_cookie = sock_gen_cookie(sk); ), - TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx", + TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx", __entry->saddr, __entry->daddr, __entry->mark, - __entry->length, __entry->snd_nxt, __entry->snd_una, + __entry->data_len, __entry->snd_nxt, __entry->snd_una, __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie) ); -- cgit v1.2.3-55-g7522 From 3d97d88e8091f3501e016f6b4ce45a32c4b8f2f6 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 29 May 2018 23:27:31 +0800 Subject: tcp: minor optimization around tcp_hdr() usage in receive path This is additional to the commit ea1627c20c34 ("tcp: minor optimizations around tcp_hdr() usage"). At this point, skb->data is same with tcp_hdr() as tcp header has not been pulled yet. So use the less expensive one to get the tcp header. Remove the third parameter of tcp_rcv_established() and put it into the function body. Furthermore, the local variables are listed as a reverse christmas tree :) Cc: Eric Dumazet Signed-off-by: Yafang Shao Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- include/trace/events/tcp.h | 5 +++-- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/trace/events') diff --git a/include/net/tcp.h b/include/net/tcp.h index 952d842a604a..029a51b91742 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -334,8 +334,7 @@ void tcp_write_timer_handler(struct sock *sk); void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); -void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th); +void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 703abb6e11fa..ac55b328d61b 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -248,8 +248,9 @@ TRACE_EVENT(tcp_probe, ), TP_fast_assign( - const struct tcp_sock *tp = tcp_sk(sk); + const struct tcphdr *th = (const struct tcphdr *)skb->data; const struct inet_sock *inet = inet_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); @@ -261,7 +262,7 @@ TRACE_EVENT(tcp_probe, __entry->dport = ntohs(inet->inet_dport); __entry->mark = skb->mark; - __entry->data_len = skb->len - tcp_hdrlen(skb); + __entry->data_len = skb->len - __tcp_hdrlen(th); __entry->snd_nxt = tp->snd_nxt; __entry->snd_una = tp->snd_una; __entry->snd_cwnd = tp->snd_cwnd; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1191cac72109..d5ffb573ca4d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5390,11 +5390,11 @@ discard: * the rest is checked inline. Fast processing is turned on in * tcp_data_queue when everything is OK. */ -void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th) +void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) { - unsigned int len = skb->len; + const struct tcphdr *th = (const struct tcphdr *)skb->data; struct tcp_sock *tp = tcp_sk(sk); + unsigned int len = skb->len; /* TCP congestion window tracking */ trace_tcp_probe(sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index adbdb503db0c..749b0ef9f405 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1486,7 +1486,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_rx_dst = NULL; } } - tcp_rcv_established(sk, skb, tcp_hdr(skb)); + tcp_rcv_established(sk, skb); return 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7d47c2b550a9..8764a63abd91 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1322,7 +1322,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } } - tcp_rcv_established(sk, skb, tcp_hdr(skb)); + tcp_rcv_established(sk, skb); if (opt_skb) goto ipv6_pktoptions; return 0; -- cgit v1.2.3-55-g7522 From 1a025028d400b23477341aa7ec2ce55f8b39b554 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 3 Jun 2018 02:17:39 +0100 Subject: rxrpc: Fix handling of call quietly cancelled out on server Sometimes an in-progress call will stop responding on the fileserver when the fileserver quietly cancels the call with an internally marked abort (RX_CALL_DEAD), without sending an ABORT to the client. This causes the client's call to eventually expire from lack of incoming packets directed its way, which currently leads to it being cancelled locally with ETIME. Note that it's not currently clear as to why this happens as it's really hard to reproduce. The rotation policy implement by kAFS, however, doesn't differentiate between ETIME meaning we didn't get any response from the server and ETIME meaning the call got cancelled mid-flow. The latter leads to an oops when fetching data as the rotation partially resets the afs_read descriptor, which can result in a cleared page pointer being dereferenced because that page has already been filled. Handle this by the following means: (1) Set a flag on a call when we receive a packet for it. (2) Store the highest packet serial number so far received for a call (bearing in mind this may wrap). (3) If, when the "not received anything recently" timeout expires on a call, we've received at least one packet for a call and the connection as a whole has received packets more recently than that call, then cancel the call locally with ECONNRESET rather than ETIME. This indicates that the call was definitely in progress on the server. (4) In kAFS, if the rotation algorithm sees ECONNRESET rather than ETIME, don't try the next server, but rather abort the call. This avoids the oops as we don't try to reuse the afs_read struct. Rather, as-yet ungotten pages will be reread at a later data. Also: (5) Add an rxrpc tracepoint to log detection of the call being reset. Without this, I occasionally see an oops like the following: general protection fault: 0000 [#1] SMP PTI ... RIP: 0010:_copy_to_iter+0x204/0x310 RSP: 0018:ffff8800cae0f828 EFLAGS: 00010206 RAX: 0000000000000560 RBX: 0000000000000560 RCX: 0000000000000560 RDX: ffff8800cae0f968 RSI: ffff8800d58b3312 RDI: 0005080000000000 RBP: ffff8800cae0f968 R08: 0000000000000560 R09: ffff8800ca00f400 R10: ffff8800c36f28d4 R11: 00000000000008c4 R12: ffff8800cae0f958 R13: 0000000000000560 R14: ffff8800d58b3312 R15: 0000000000000560 FS: 00007fdaef108080(0000) GS:ffff8800ca680000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fb28a8fa000 CR3: 00000000d2a76002 CR4: 00000000001606e0 Call Trace: skb_copy_datagram_iter+0x14e/0x289 rxrpc_recvmsg_data.isra.0+0x6f3/0xf68 ? trace_buffer_unlock_commit_regs+0x4f/0x89 rxrpc_kernel_recv_data+0x149/0x421 afs_extract_data+0x1e0/0x798 ? afs_wait_for_call_to_complete+0xc9/0x52e afs_deliver_fs_fetch_data+0x33a/0x5ab afs_deliver_to_call+0x1ee/0x5e0 ? afs_wait_for_call_to_complete+0xc9/0x52e afs_wait_for_call_to_complete+0x12b/0x52e ? wake_up_q+0x54/0x54 afs_make_call+0x287/0x462 ? afs_fs_fetch_data+0x3e6/0x3ed ? rcu_read_lock_sched_held+0x5d/0x63 afs_fs_fetch_data+0x3e6/0x3ed afs_fetch_data+0xbb/0x14a afs_readpages+0x317/0x40d __do_page_cache_readahead+0x203/0x2ba ? ondemand_readahead+0x3a7/0x3c1 ondemand_readahead+0x3a7/0x3c1 generic_file_buffered_read+0x18b/0x62f __vfs_read+0xdb/0xfe vfs_read+0xb2/0x137 ksys_read+0x50/0x8c do_syscall_64+0x7d/0x1a0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Note the weird value in RDI which is a result of trying to kmap() a NULL page pointer. Signed-off-by: David Howells Signed-off-by: David S. Miller --- fs/afs/rotate.c | 4 ++++ include/trace/events/rxrpc.h | 32 ++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/call_event.c | 8 +++++++- net/rxrpc/input.c | 10 ++++++++-- 5 files changed, 53 insertions(+), 3 deletions(-) (limited to 'include/trace/events') diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index e065bc0768e6..1faef56b12bd 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -310,6 +310,10 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) case -ETIME: _debug("no conn"); goto iterate_address; + + case -ECONNRESET: + _debug("call reset"); + goto failed; } restart_from_beginning: diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 077e664ac9a2..4fff00e9da8a 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1459,6 +1459,38 @@ TRACE_EVENT(rxrpc_tx_fail, __print_symbolic(__entry->what, rxrpc_tx_fail_traces)) ); +TRACE_EVENT(rxrpc_call_reset, + TP_PROTO(struct rxrpc_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(unsigned int, debug_id ) + __field(u32, cid ) + __field(u32, call_id ) + __field(rxrpc_serial_t, call_serial ) + __field(rxrpc_serial_t, conn_serial ) + __field(rxrpc_seq_t, tx_seq ) + __field(rxrpc_seq_t, rx_seq ) + ), + + TP_fast_assign( + __entry->debug_id = call->debug_id; + __entry->cid = call->cid; + __entry->call_id = call->call_id; + __entry->call_serial = call->rx_serial; + __entry->conn_serial = call->conn->hi_serial; + __entry->tx_seq = call->tx_hard_ack; + __entry->rx_seq = call->ackr_seen; + ), + + TP_printk("c=%08x %08x:%08x r=%08x/%08x tx=%08x rx=%08x", + __entry->debug_id, + __entry->cid, __entry->call_id, + __entry->call_serial, __entry->conn_serial, + __entry->tx_seq, __entry->rx_seq) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 19975d2ca9a2..b2393113a251 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -477,6 +477,7 @@ enum rxrpc_call_flag { RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */ + RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ }; /* @@ -624,6 +625,7 @@ struct rxrpc_call { */ rxrpc_seq_t rx_top; /* Highest Rx slot allocated. */ rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ + rxrpc_serial_t rx_serial; /* Highest serial received for this call */ u8 rx_winsize; /* Size of Rx window */ u8 tx_winsize; /* Maximum size of Tx window */ bool tx_phase; /* T if transmission phase, F if receive phase */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6e0d788b4dc4..20210418904b 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -392,7 +392,13 @@ recheck_state: /* Process events */ if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) { - rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); + if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && + (int)call->conn->hi_serial - (int)call->rx_serial > 0) { + trace_rxrpc_call_reset(call); + rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ECONNRESET); + } else { + rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); + } set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index b5fd6381313d..608d078a4981 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1278,8 +1278,14 @@ void rxrpc_data_ready(struct sock *udp_sk) call = NULL; } - if (call && sp->hdr.serviceId != call->service_id) - call->service_id = sp->hdr.serviceId; + if (call) { + if (sp->hdr.serviceId != call->service_id) + call->service_id = sp->hdr.serviceId; + if ((int)sp->hdr.serial - (int)call->rx_serial > 0) + call->rx_serial = sp->hdr.serial; + if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags)) + set_bit(RXRPC_CALL_RX_HEARD, &call->flags); + } } else { skew = 0; call = NULL; -- cgit v1.2.3-55-g7522