From 327835fb1e838c7098731899ec76aba912edddf1 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 10 Jul 2019 12:43:45 +0200 Subject: MAINTAINERS: update BPF JIT S390 maintainers Ilya Leoshkevich is joining as s390 bpf maintainer. With his background as gcc developer he would be valuable for the team and community as a whole. Ilya, have fun! Since there is now enough eyes on s390 bpf, relieve Christian Borntraeger, so that he could focus on his maintainer tasks for other components. Signed-off-by: Vasily Gorbik Acked-by: Christian Borntraeger Acked-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 618e4979960b..20332586bf2c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3082,9 +3082,9 @@ S: Maintained F: arch/riscv/net/ BPF JIT for S390 +M: Ilya Leoshkevich M: Heiko Carstens M: Vasily Gorbik -M: Christian Borntraeger L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained -- cgit v1.2.3-55-g7522 From b3b50f05dc501cc2cd90349a7bbfd932af0ceb31 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 8 Jul 2019 20:32:44 -0700 Subject: bpf: fix precision bit propagation for BPF_ST instructions When backtracking instructions to propagate precision bit for registers and stack slots, one class of instructions (BPF_ST) weren't handled causing extra stack slots to be propagated into parent state. Parent state might not have that much stack allocated, though, which causes warning on invalid stack slot usage. This patch adds handling of BPF_ST instructions: BPF_MEM | | BPF_ST: *(size *) (dst_reg + off) = imm32 Reported-by: syzbot+4da3ff23081bafe74fc2@syzkaller.appspotmail.com Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking") Cc: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a2e763703c30..def87e9cc9c7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1519,9 +1519,9 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, return -EFAULT; } *stack_mask |= 1ull << spi; - } else if (class == BPF_STX) { + } else if (class == BPF_STX || class == BPF_ST) { if (*reg_mask & dreg) - /* stx shouldn't be using _scalar_ dst_reg + /* stx & st shouldn't be using _scalar_ dst_reg * to access memory. It means backtracking * encountered a case of pointer subtraction. */ @@ -1540,7 +1540,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, if (!(*stack_mask & (1ull << spi))) return 0; *stack_mask &= ~(1ull << spi); - *reg_mask |= sreg; + if (class == BPF_STX) + *reg_mask |= sreg; } else if (class == BPF_JMP || class == BPF_JMP32) { if (opcode == BPF_CALL) { if (insn->src_reg == BPF_PSEUDO_CALL) @@ -1569,10 +1570,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, if (mode == BPF_IND || mode == BPF_ABS) /* to be analyzed */ return -ENOTSUPP; - } else if (class == BPF_ST) { - if (*reg_mask & dreg) - /* likely pointer subtraction */ - return -ENOTSUPP; } return 0; } -- cgit v1.2.3-55-g7522 From 36db2a94f19ae1fcf1e6d8253df0c32f90f92860 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 8 Jul 2019 21:00:07 -0700 Subject: libbpf: fix ptr to u64 conversion warning on 32-bit platforms On 32-bit platforms compiler complains about conversion: libbpf.c: In function ‘perf_event_open_probe’: libbpf.c:4112:17: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */ ^ Reported-by: Matt Hart Fixes: b26500274767 ("libbpf: add kprobe/uprobe attach API") Tested-by: Matt Hart Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ed07789b3e62..794dd5064ae8 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4126,8 +4126,8 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, } attr.size = sizeof(attr); attr.type = type; - attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */ - attr.config2 = offset; /* kprobe_addr or probe_offset */ + attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ + attr.config2 = offset; /* kprobe_addr or probe_offset */ /* pid filter is meaningful only for uprobes */ pfd = syscall(__NR_perf_event_open, &attr, -- cgit v1.2.3-55-g7522 From 675716400da6f15b9d3db04ef74ee74ca9a00af3 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 4 Jul 2019 17:25:03 +0300 Subject: xdp: fix possible cq entry leak Completion queue address reservation could not be undone. In case of bad 'queue_id' or skb allocation failure, reserved entry will be leaked reducing the total capacity of completion queue. Fix that by moving reservation to the point where failure is not possible. Additionally, 'queue_id' checking moved out from the loop since there is no point to check it there. Fixes: 35fcde7f8deb ("xsk: support for Tx") Signed-off-by: Ilya Maximets Acked-by: Björn Töpel Tested-by: William Tu Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d4d6f10aa936..b994c32a664a 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -240,6 +240,9 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, mutex_lock(&xs->mutex); + if (xs->queue_id >= xs->dev->real_num_tx_queues) + goto out; + while (xskq_peek_desc(xs->tx, &desc)) { char *buffer; u64 addr; @@ -250,12 +253,6 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, goto out; } - if (xskq_reserve_addr(xs->umem->cq)) - goto out; - - if (xs->queue_id >= xs->dev->real_num_tx_queues) - goto out; - len = desc.len; skb = sock_alloc_send_skb(sk, len, 1, &err); if (unlikely(!skb)) { @@ -267,7 +264,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, addr = desc.addr; buffer = xdp_umem_get_data(xs->umem, addr); err = skb_store_bits(skb, 0, buffer, len); - if (unlikely(err)) { + if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) { kfree_skb(skb); goto out; } -- cgit v1.2.3-55-g7522 From 5464c3a0e9a037b63d5229cdea08dddc01a98aac Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 8 Jul 2019 14:03:44 +0300 Subject: xdp: fix potential deadlock on socket mutex There are 2 call chains: a) xsk_bind --> xdp_umem_assign_dev b) unregister_netdevice_queue --> xsk_notifier with the following locking order: a) xs->mutex --> rtnl_lock b) rtnl_lock --> xdp.lock --> xs->mutex Different order of taking 'xs->mutex' and 'rtnl_lock' could produce a deadlock here. Fix that by moving the 'rtnl_lock' before 'xs->lock' in the bind call chain (a). Reported-by: syzbot+bf64ec93de836d7f4c2c@syzkaller.appspotmail.com Fixes: 455302d1c9ae ("xdp: fix hang while unregistering device bound to xdp socket") Signed-off-by: Ilya Maximets Acked-by: Jonathan Lemon Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 16 ++++++---------- net/xdp/xsk.c | 2 ++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 20c91f02d3d8..83de74ca729a 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -87,21 +87,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, struct netdev_bpf bpf; int err = 0; + ASSERT_RTNL(); + force_zc = flags & XDP_ZEROCOPY; force_copy = flags & XDP_COPY; if (force_zc && force_copy) return -EINVAL; - rtnl_lock(); - if (xdp_get_umem_from_qid(dev, queue_id)) { - err = -EBUSY; - goto out_rtnl_unlock; - } + if (xdp_get_umem_from_qid(dev, queue_id)) + return -EBUSY; err = xdp_reg_umem_at_qid(dev, umem, queue_id); if (err) - goto out_rtnl_unlock; + return err; umem->dev = dev; umem->queue_id = queue_id; @@ -110,7 +109,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, if (force_copy) /* For copy-mode, we are done. */ - goto out_rtnl_unlock; + return 0; if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit) { @@ -125,7 +124,6 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, err = dev->netdev_ops->ndo_bpf(dev, &bpf); if (err) goto err_unreg_umem; - rtnl_unlock(); umem->zc = true; return 0; @@ -135,8 +133,6 @@ err_unreg_umem: err = 0; /* fallback to copy mode */ if (err) xdp_clear_umem_at_qid(dev, queue_id); -out_rtnl_unlock: - rtnl_unlock(); return err; } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index b994c32a664a..59b57d708697 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -430,6 +430,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY)) return -EINVAL; + rtnl_lock(); mutex_lock(&xs->mutex); if (xs->state != XSK_READY) { err = -EBUSY; @@ -515,6 +516,7 @@ out_unlock: xs->state = XSK_BOUND; out_release: mutex_unlock(&xs->mutex); + rtnl_unlock(); return err; } -- cgit v1.2.3-55-g7522 From 59d82657a08d4a9e6e2d99acc525ef748d36016b Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 10 Jul 2019 13:56:54 +0200 Subject: selftests/bpf: fix bpf_target_sparc check bpf_helpers.h fails to compile on sparc: the code should be checking for defined(bpf_target_sparc), but checks simply for bpf_target_sparc. Also change #ifdef bpf_target_powerpc to #if defined() for consistency. Signed-off-by: Ilya Leoshkevich Acked-by: Andrii Nakryiko Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/bpf_helpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 5a3d92c8bec8..5d63c66bc9d7 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -452,10 +452,10 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #endif -#ifdef bpf_target_powerpc +#if defined(bpf_target_powerpc) #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif bpf_target_sparc +#elif defined(bpf_target_sparc) #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP #else -- cgit v1.2.3-55-g7522 From ed4ed4043a127c5ca9a35339bb614693be9037a3 Mon Sep 17 00:00:00 2001 From: Gustavo A. R. Silva Date: Thu, 11 Jul 2019 11:22:33 -0500 Subject: bpf: verifier: avoid fall-through warnings In preparation to enabling -Wimplicit-fallthrough, this patch silences the following warning: kernel/bpf/verifier.c: In function ‘check_return_code’: kernel/bpf/verifier.c:6106:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || ^ kernel/bpf/verifier.c:6109:2: note: here case BPF_PROG_TYPE_CGROUP_SKB: ^~~~ Warning level 3 was used: -Wimplicit-fallthrough=3 Notice that is much clearer to explicitly add breaks in each case statement (that actually contains some code), rather than letting the code to fall through. This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index def87e9cc9c7..5900cbb966b1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6103,11 +6103,13 @@ static int check_return_code(struct bpf_verifier_env *env) if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG) range = tnum_range(1, 1); + break; case BPF_PROG_TYPE_CGROUP_SKB: if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) { range = tnum_range(0, 3); enforce_attach_type_range = tnum_range(2, 3); } + break; case BPF_PROG_TYPE_CGROUP_SOCK: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_CGROUP_DEVICE: -- cgit v1.2.3-55-g7522 From 216b65fb706e34128a5317d71b300daac9c428c4 Mon Sep 17 00:00:00 2001 From: Daniel T. Lee Date: Fri, 12 Jul 2019 10:35:39 +0900 Subject: tools: bpftool: add raw_tracepoint_writable prog type to header From commit 9df1c28bb752 ("bpf: add writable context for raw tracepoints"), a new type of BPF_PROG, RAW_TRACEPOINT_WRITABLE has been added. Since this BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE is not listed at bpftool's header, it causes a segfault when executing 'bpftool feature'. This commit adds BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE entry to prog_type_name enum, and will eventually fixes the segfault issue. Fixes: 9df1c28bb752 ("bpf: add writable context for raw tracepoints") Signed-off-by: Daniel T. Lee Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 3ef0d9051e10..7031a4bf87a0 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -74,6 +74,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", }; -- cgit v1.2.3-55-g7522 From 9cae4ace80ef39005da106fbb89c952b27d7b89e Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 11 Jul 2019 11:12:49 +0200 Subject: selftests/bpf: do not ignore clang failures When compiling an eBPF prog fails, make still returns 0, because failing clang command's output is piped to llc and therefore its exit status is ignored. When clang fails, pipe the string "clang failed" to llc. This will make llc fail with an informative error message. This solution was chosen over using pipefail, having separate targets or getting rid of llc invocation due to its simplicity. In addition, pull Kbuild.include in order to get .DELETE_ON_ERROR target, which would cause partial .o files to be removed. Signed-off-by: Ilya Leoshkevich Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2620406a53ec..8312db1ae7c2 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../../../scripts/Kbuild.include LIBDIR := ../../../lib BPFDIR := $(LIBDIR)/bpf @@ -196,8 +197,8 @@ $(ALU32_BUILD_DIR)/test_progs_32: prog_tests/*.c $(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR) \ $(ALU32_BUILD_DIR)/test_progs_32 - $(CLANG) $(CLANG_FLAGS) \ - -O2 -target bpf -emit-llvm -c $< -o - | \ + ($(CLANG) $(CLANG_FLAGS) -O2 -target bpf -emit-llvm -c $< -o - || \ + echo "clang failed") | \ $(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \ -filetype=obj -o $@ ifeq ($(DWARF2BTF),y) @@ -208,16 +209,16 @@ endif # Have one program compiled without "-target bpf" to test whether libbpf loads # it successfully $(OUTPUT)/test_xdp.o: progs/test_xdp.c - $(CLANG) $(CLANG_FLAGS) \ - -O2 -emit-llvm -c $< -o - | \ + ($(CLANG) $(CLANG_FLAGS) -O2 -emit-llvm -c $< -o - || \ + echo "clang failed") | \ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ endif $(OUTPUT)/%.o: progs/%.c - $(CLANG) $(CLANG_FLAGS) \ - -O2 -target bpf -emit-llvm -c $< -o - | \ + ($(CLANG) $(CLANG_FLAGS) -O2 -target bpf -emit-llvm -c $< -o - || \ + echo "clang failed") | \ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ -- cgit v1.2.3-55-g7522 From 748e50c1c13d33dffcdca66f0b1cb3d96d17fe31 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 11 Jul 2019 16:29:27 +0200 Subject: selftests/bpf: compile progs with -D__TARGET_ARCH_$(SRCARCH) This opens up the possibility of accessing registers in an arch-independent way. Signed-off-by: Ilya Leoshkevich Reviewed-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 8312db1ae7c2..277d8605e340 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 include ../../../../scripts/Kbuild.include +include ../../../scripts/Makefile.arch LIBDIR := ../../../lib BPFDIR := $(LIBDIR)/bpf @@ -139,7 +140,8 @@ CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - &1 \ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types + -Wno-compare-distinct-pointer-types \ + -D__TARGET_ARCH_$(SRCARCH) $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline -- cgit v1.2.3-55-g7522 From 05c2dc17dae310f92a0c5979e227133b0e85f2fa Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 11 Jul 2019 16:29:28 +0200 Subject: selftests/bpf: fix s930 -> s390 typo Also check for __s390__ instead of __s390x__, just in case bpf_helpers.h is ever used by 32-bit userspace. Signed-off-by: Ilya Leoshkevich Acked-by: Andrii Nakryiko Reviewed-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/bpf_helpers.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 5d63c66bc9d7..f6e3768660be 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -315,8 +315,8 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #if defined(__TARGET_ARCH_x86) #define bpf_target_x86 #define bpf_target_defined -#elif defined(__TARGET_ARCH_s930x) - #define bpf_target_s930x +#elif defined(__TARGET_ARCH_s390) + #define bpf_target_s390 #define bpf_target_defined #elif defined(__TARGET_ARCH_arm) #define bpf_target_arm @@ -341,8 +341,8 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #ifndef bpf_target_defined #if defined(__x86_64__) #define bpf_target_x86 -#elif defined(__s390x__) - #define bpf_target_s930x +#elif defined(__s390__) + #define bpf_target_s390 #elif defined(__arm__) #define bpf_target_arm #elif defined(__aarch64__) @@ -369,7 +369,7 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->ip) -#elif defined(bpf_target_s390x) +#elif defined(bpf_target_s390) #define PT_REGS_PARM1(x) ((x)->gprs[2]) #define PT_REGS_PARM2(x) ((x)->gprs[3]) -- cgit v1.2.3-55-g7522 From 7cd04535abc95c50ba9bfa48efc76274b63c70f5 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 11 Jul 2019 16:29:29 +0200 Subject: selftests/bpf: make PT_REGS_* work in userspace Right now, on certain architectures, these macros are usable only with kernel headers. This patch makes it possible to use them with userspace headers and, as a consequence, not only in BPF samples, but also in BPF selftests. On s390, provide the forward declaration of struct pt_regs and cast it to user_pt_regs in PT_REGS_* macros. This is necessary, because instead of the full struct pt_regs, s390 exposes only its first member user_pt_regs to userspace, and bpf_helpers.h is used with both userspace (in selftests) and kernel (in samples) headers. It was added in commit 466698e654e8 ("s390/bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type"). Ditto on arm64. On x86, provide userspace versions of PT_REGS_* macros. Unlike s390 and arm64, x86 provides struct pt_regs to both userspace and kernel, however, with different member names. Signed-off-by: Ilya Leoshkevich Acked-by: Andrii Nakryiko Reviewed-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/bpf_helpers.h | 75 ++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index f6e3768660be..f804f210244e 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -358,6 +358,7 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #if defined(bpf_target_x86) +#ifdef __KERNEL__ #define PT_REGS_PARM1(x) ((x)->di) #define PT_REGS_PARM2(x) ((x)->si) #define PT_REGS_PARM3(x) ((x)->dx) @@ -368,19 +369,49 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #define PT_REGS_RC(x) ((x)->ax) #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->ip) +#else +#ifdef __i386__ +/* i386 kernel is built with -mregparm=3 */ +#define PT_REGS_PARM1(x) ((x)->eax) +#define PT_REGS_PARM2(x) ((x)->edx) +#define PT_REGS_PARM3(x) ((x)->ecx) +#define PT_REGS_PARM4(x) 0 +#define PT_REGS_PARM5(x) 0 +#define PT_REGS_RET(x) ((x)->esp) +#define PT_REGS_FP(x) ((x)->ebp) +#define PT_REGS_RC(x) ((x)->eax) +#define PT_REGS_SP(x) ((x)->esp) +#define PT_REGS_IP(x) ((x)->eip) +#else +#define PT_REGS_PARM1(x) ((x)->rdi) +#define PT_REGS_PARM2(x) ((x)->rsi) +#define PT_REGS_PARM3(x) ((x)->rdx) +#define PT_REGS_PARM4(x) ((x)->rcx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->rsp) +#define PT_REGS_FP(x) ((x)->rbp) +#define PT_REGS_RC(x) ((x)->rax) +#define PT_REGS_SP(x) ((x)->rsp) +#define PT_REGS_IP(x) ((x)->rip) +#endif +#endif #elif defined(bpf_target_s390) -#define PT_REGS_PARM1(x) ((x)->gprs[2]) -#define PT_REGS_PARM2(x) ((x)->gprs[3]) -#define PT_REGS_PARM3(x) ((x)->gprs[4]) -#define PT_REGS_PARM4(x) ((x)->gprs[5]) -#define PT_REGS_PARM5(x) ((x)->gprs[6]) -#define PT_REGS_RET(x) ((x)->gprs[14]) -#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->gprs[2]) -#define PT_REGS_SP(x) ((x)->gprs[15]) -#define PT_REGS_IP(x) ((x)->psw.addr) +/* s390 provides user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_S390 const volatile user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) +#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) +#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) +#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) +#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) +#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) +#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) #elif defined(bpf_target_arm) @@ -397,16 +428,20 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #elif defined(bpf_target_arm64) -#define PT_REGS_PARM1(x) ((x)->regs[0]) -#define PT_REGS_PARM2(x) ((x)->regs[1]) -#define PT_REGS_PARM3(x) ((x)->regs[2]) -#define PT_REGS_PARM4(x) ((x)->regs[3]) -#define PT_REGS_PARM5(x) ((x)->regs[4]) -#define PT_REGS_RET(x) ((x)->regs[30]) -#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->regs[0]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->pc) +/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_ARM64 const volatile struct user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) +#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) +#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) +#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) +#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) +#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) +#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) #elif defined(bpf_target_mips) -- cgit v1.2.3-55-g7522 From af3c24e0e2ed25177b03b60ada9117bb596e8d95 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 11 Jul 2019 16:29:30 +0200 Subject: selftests/bpf: fix compiling loop{1, 2, 3}.c on s390 Use PT_REGS_RC(ctx) instead of ctx->rax, which is not present on s390. Signed-off-by: Ilya Leoshkevich Reviewed-by: Stanislav Fomichev Tested-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/progs/loop1.c | 2 +- tools/testing/selftests/bpf/progs/loop2.c | 2 +- tools/testing/selftests/bpf/progs/loop3.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c index dea395af9ea9..7cdb7f878310 100644 --- a/tools/testing/selftests/bpf/progs/loop1.c +++ b/tools/testing/selftests/bpf/progs/loop1.c @@ -18,7 +18,7 @@ int nested_loops(volatile struct pt_regs* ctx) for (j = 0; j < 300; j++) for (i = 0; i < j; i++) { if (j & 1) - m = ctx->rax; + m = PT_REGS_RC(ctx); else m = j; sum += i * m; diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c index 0637bd8e8bcf..9b2f808a2863 100644 --- a/tools/testing/selftests/bpf/progs/loop2.c +++ b/tools/testing/selftests/bpf/progs/loop2.c @@ -16,7 +16,7 @@ int while_true(volatile struct pt_regs* ctx) int i = 0; while (true) { - if (ctx->rax & 1) + if (PT_REGS_RC(ctx) & 1) i += 3; else i += 7; diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c index 30a0f6cba080..d727657d51e2 100644 --- a/tools/testing/selftests/bpf/progs/loop3.c +++ b/tools/testing/selftests/bpf/progs/loop3.c @@ -16,7 +16,7 @@ int while_true(volatile struct pt_regs* ctx) __u64 i = 0, sum = 0; do { i++; - sum += ctx->rax; + sum += PT_REGS_RC(ctx); } while (i < 0x100000000ULL); return sum; } -- cgit v1.2.3-55-g7522 From fe4e8db0392a6c2e795eb89ef5fcd86522e66248 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 13 Jul 2019 13:45:47 +0200 Subject: r8169: fix issue with confused RX unit after PHY power-down on RTL8411b On RTL8411b the RX unit gets confused if the PHY is powered-down. This was reported in [0] and confirmed by Realtek. Realtek provided a sequence to fix the RX unit after PHY wakeup. The issue itself seems to have been there longer, the Fixes tag refers to where the fix applies properly. [0] https://bugzilla.redhat.com/show_bug.cgi?id=1692075 Fixes: a99790bf5c7f ("r8169: Reinstate ASPM Support") Tested-by: Ionut Radu Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 137 ++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index efef5453b94f..0637c6752a78 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4667,6 +4667,143 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp) /* disable aspm and clock request before access ephy */ rtl_hw_aspm_clkreq_enable(tp, false); rtl_ephy_init(tp, e_info_8411_2); + + /* The following Realtek-provided magic fixes an issue with the RX unit + * getting confused after the PHY having been powered-down. + */ + r8168_mac_ocp_write(tp, 0xFC28, 0x0000); + r8168_mac_ocp_write(tp, 0xFC2A, 0x0000); + r8168_mac_ocp_write(tp, 0xFC2C, 0x0000); + r8168_mac_ocp_write(tp, 0xFC2E, 0x0000); + r8168_mac_ocp_write(tp, 0xFC30, 0x0000); + r8168_mac_ocp_write(tp, 0xFC32, 0x0000); + r8168_mac_ocp_write(tp, 0xFC34, 0x0000); + r8168_mac_ocp_write(tp, 0xFC36, 0x0000); + mdelay(3); + r8168_mac_ocp_write(tp, 0xFC26, 0x0000); + + r8168_mac_ocp_write(tp, 0xF800, 0xE008); + r8168_mac_ocp_write(tp, 0xF802, 0xE00A); + r8168_mac_ocp_write(tp, 0xF804, 0xE00C); + r8168_mac_ocp_write(tp, 0xF806, 0xE00E); + r8168_mac_ocp_write(tp, 0xF808, 0xE027); + r8168_mac_ocp_write(tp, 0xF80A, 0xE04F); + r8168_mac_ocp_write(tp, 0xF80C, 0xE05E); + r8168_mac_ocp_write(tp, 0xF80E, 0xE065); + r8168_mac_ocp_write(tp, 0xF810, 0xC602); + r8168_mac_ocp_write(tp, 0xF812, 0xBE00); + r8168_mac_ocp_write(tp, 0xF814, 0x0000); + r8168_mac_ocp_write(tp, 0xF816, 0xC502); + r8168_mac_ocp_write(tp, 0xF818, 0xBD00); + r8168_mac_ocp_write(tp, 0xF81A, 0x074C); + r8168_mac_ocp_write(tp, 0xF81C, 0xC302); + r8168_mac_ocp_write(tp, 0xF81E, 0xBB00); + r8168_mac_ocp_write(tp, 0xF820, 0x080A); + r8168_mac_ocp_write(tp, 0xF822, 0x6420); + r8168_mac_ocp_write(tp, 0xF824, 0x48C2); + r8168_mac_ocp_write(tp, 0xF826, 0x8C20); + r8168_mac_ocp_write(tp, 0xF828, 0xC516); + r8168_mac_ocp_write(tp, 0xF82A, 0x64A4); + r8168_mac_ocp_write(tp, 0xF82C, 0x49C0); + r8168_mac_ocp_write(tp, 0xF82E, 0xF009); + r8168_mac_ocp_write(tp, 0xF830, 0x74A2); + r8168_mac_ocp_write(tp, 0xF832, 0x8CA5); + r8168_mac_ocp_write(tp, 0xF834, 0x74A0); + r8168_mac_ocp_write(tp, 0xF836, 0xC50E); + r8168_mac_ocp_write(tp, 0xF838, 0x9CA2); + r8168_mac_ocp_write(tp, 0xF83A, 0x1C11); + r8168_mac_ocp_write(tp, 0xF83C, 0x9CA0); + r8168_mac_ocp_write(tp, 0xF83E, 0xE006); + r8168_mac_ocp_write(tp, 0xF840, 0x74F8); + r8168_mac_ocp_write(tp, 0xF842, 0x48C4); + r8168_mac_ocp_write(tp, 0xF844, 0x8CF8); + r8168_mac_ocp_write(tp, 0xF846, 0xC404); + r8168_mac_ocp_write(tp, 0xF848, 0xBC00); + r8168_mac_ocp_write(tp, 0xF84A, 0xC403); + r8168_mac_ocp_write(tp, 0xF84C, 0xBC00); + r8168_mac_ocp_write(tp, 0xF84E, 0x0BF2); + r8168_mac_ocp_write(tp, 0xF850, 0x0C0A); + r8168_mac_ocp_write(tp, 0xF852, 0xE434); + r8168_mac_ocp_write(tp, 0xF854, 0xD3C0); + r8168_mac_ocp_write(tp, 0xF856, 0x49D9); + r8168_mac_ocp_write(tp, 0xF858, 0xF01F); + r8168_mac_ocp_write(tp, 0xF85A, 0xC526); + r8168_mac_ocp_write(tp, 0xF85C, 0x64A5); + r8168_mac_ocp_write(tp, 0xF85E, 0x1400); + r8168_mac_ocp_write(tp, 0xF860, 0xF007); + r8168_mac_ocp_write(tp, 0xF862, 0x0C01); + r8168_mac_ocp_write(tp, 0xF864, 0x8CA5); + r8168_mac_ocp_write(tp, 0xF866, 0x1C15); + r8168_mac_ocp_write(tp, 0xF868, 0xC51B); + r8168_mac_ocp_write(tp, 0xF86A, 0x9CA0); + r8168_mac_ocp_write(tp, 0xF86C, 0xE013); + r8168_mac_ocp_write(tp, 0xF86E, 0xC519); + r8168_mac_ocp_write(tp, 0xF870, 0x74A0); + r8168_mac_ocp_write(tp, 0xF872, 0x48C4); + r8168_mac_ocp_write(tp, 0xF874, 0x8CA0); + r8168_mac_ocp_write(tp, 0xF876, 0xC516); + r8168_mac_ocp_write(tp, 0xF878, 0x74A4); + r8168_mac_ocp_write(tp, 0xF87A, 0x48C8); + r8168_mac_ocp_write(tp, 0xF87C, 0x48CA); + r8168_mac_ocp_write(tp, 0xF87E, 0x9CA4); + r8168_mac_ocp_write(tp, 0xF880, 0xC512); + r8168_mac_ocp_write(tp, 0xF882, 0x1B00); + r8168_mac_ocp_write(tp, 0xF884, 0x9BA0); + r8168_mac_ocp_write(tp, 0xF886, 0x1B1C); + r8168_mac_ocp_write(tp, 0xF888, 0x483F); + r8168_mac_ocp_write(tp, 0xF88A, 0x9BA2); + r8168_mac_ocp_write(tp, 0xF88C, 0x1B04); + r8168_mac_ocp_write(tp, 0xF88E, 0xC508); + r8168_mac_ocp_write(tp, 0xF890, 0x9BA0); + r8168_mac_ocp_write(tp, 0xF892, 0xC505); + r8168_mac_ocp_write(tp, 0xF894, 0xBD00); + r8168_mac_ocp_write(tp, 0xF896, 0xC502); + r8168_mac_ocp_write(tp, 0xF898, 0xBD00); + r8168_mac_ocp_write(tp, 0xF89A, 0x0300); + r8168_mac_ocp_write(tp, 0xF89C, 0x051E); + r8168_mac_ocp_write(tp, 0xF89E, 0xE434); + r8168_mac_ocp_write(tp, 0xF8A0, 0xE018); + r8168_mac_ocp_write(tp, 0xF8A2, 0xE092); + r8168_mac_ocp_write(tp, 0xF8A4, 0xDE20); + r8168_mac_ocp_write(tp, 0xF8A6, 0xD3C0); + r8168_mac_ocp_write(tp, 0xF8A8, 0xC50F); + r8168_mac_ocp_write(tp, 0xF8AA, 0x76A4); + r8168_mac_ocp_write(tp, 0xF8AC, 0x49E3); + r8168_mac_ocp_write(tp, 0xF8AE, 0xF007); + r8168_mac_ocp_write(tp, 0xF8B0, 0x49C0); + r8168_mac_ocp_write(tp, 0xF8B2, 0xF103); + r8168_mac_ocp_write(tp, 0xF8B4, 0xC607); + r8168_mac_ocp_write(tp, 0xF8B6, 0xBE00); + r8168_mac_ocp_write(tp, 0xF8B8, 0xC606); + r8168_mac_ocp_write(tp, 0xF8BA, 0xBE00); + r8168_mac_ocp_write(tp, 0xF8BC, 0xC602); + r8168_mac_ocp_write(tp, 0xF8BE, 0xBE00); + r8168_mac_ocp_write(tp, 0xF8C0, 0x0C4C); + r8168_mac_ocp_write(tp, 0xF8C2, 0x0C28); + r8168_mac_ocp_write(tp, 0xF8C4, 0x0C2C); + r8168_mac_ocp_write(tp, 0xF8C6, 0xDC00); + r8168_mac_ocp_write(tp, 0xF8C8, 0xC707); + r8168_mac_ocp_write(tp, 0xF8CA, 0x1D00); + r8168_mac_ocp_write(tp, 0xF8CC, 0x8DE2); + r8168_mac_ocp_write(tp, 0xF8CE, 0x48C1); + r8168_mac_ocp_write(tp, 0xF8D0, 0xC502); + r8168_mac_ocp_write(tp, 0xF8D2, 0xBD00); + r8168_mac_ocp_write(tp, 0xF8D4, 0x00AA); + r8168_mac_ocp_write(tp, 0xF8D6, 0xE0C0); + r8168_mac_ocp_write(tp, 0xF8D8, 0xC502); + r8168_mac_ocp_write(tp, 0xF8DA, 0xBD00); + r8168_mac_ocp_write(tp, 0xF8DC, 0x0132); + + r8168_mac_ocp_write(tp, 0xFC26, 0x8000); + + r8168_mac_ocp_write(tp, 0xFC2A, 0x0743); + r8168_mac_ocp_write(tp, 0xFC2C, 0x0801); + r8168_mac_ocp_write(tp, 0xFC2E, 0x0BE9); + r8168_mac_ocp_write(tp, 0xFC30, 0x02FD); + r8168_mac_ocp_write(tp, 0xFC32, 0x0C25); + r8168_mac_ocp_write(tp, 0xFC34, 0x00A9); + r8168_mac_ocp_write(tp, 0xFC36, 0x012D); + rtl_hw_aspm_clkreq_enable(tp, true); } -- cgit v1.2.3-55-g7522 From 0307d589c4d6f84349afb7a53717baa2392f6a38 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 13 Jul 2019 16:35:27 +0200 Subject: bonding: add documentation for peer_notif_delay Ability to tweak the interval between peer notifications has been added in 07a4ddec3ce9 ("bonding: add an option to specify a delay between peer notifications") but the documentation was not updated. Signed-off-by: Vincent Bernat Signed-off-by: David S. Miller --- Documentation/networking/bonding.txt | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index d3e5dd26db12..e3abfbd32f71 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -706,9 +706,9 @@ num_unsol_na unsolicited IPv6 Neighbor Advertisements) to be issued after a failover event. As soon as the link is up on the new slave (possibly immediately) a peer notification is sent on the - bonding device and each VLAN sub-device. This is repeated at - each link monitor interval (arp_interval or miimon, whichever - is active) if the number is greater than 1. + bonding device and each VLAN sub-device. This is repeated at + the rate specified by peer_notif_delay if the number is + greater than 1. The valid range is 0 - 255; the default value is 1. These options affect only the active-backup mode. These options were added for @@ -727,6 +727,16 @@ packets_per_slave The valid range is 0 - 65535; the default value is 1. This option has effect only in balance-rr mode. +peer_notif_delay + + Specify the delay, in milliseconds, between each peer + notification (gratuitous ARP and unsolicited IPv6 Neighbor + Advertisement) when they are issued after a failover event. + This delay should be a multiple of the link monitor interval + (arp_interval or miimon, whichever is active). The default + value is 0 which means to match the value of the link monitor + interval. + primary A string (eth0, eth2, etc) specifying which slave is the -- cgit v1.2.3-55-g7522 From 14b4c48bb1ce6ff429a9288c9579cc984193ebaf Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Sun, 14 Jul 2019 15:02:25 +0300 Subject: gve: Remove the exporting of gve_probe The function gve_probe is declared static and marked EXPORT_SYMBOL, which is at best an odd combination. Because the function is not used outside of the drivers/net/ethernet/google/gve/gve_main.c file it is defined in, this commit removes the EXPORT_SYMBOL() marking. Signed-off-by: Denis Efremov Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 24f16e3368cd..e8ee8cac2bbf 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1192,7 +1192,6 @@ abort_with_enabled: pci_disable_device(pdev); return -ENXIO; } -EXPORT_SYMBOL(gve_probe); static void gve_remove(struct pci_dev *pdev) { -- cgit v1.2.3-55-g7522 From 6e3d1bbbba55265d327e44a2d14de70462232853 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sun, 14 Jul 2019 21:31:22 +0800 Subject: sit: use dst_cache in ipip6_tunnel_xmit Same as other ip tunnel, use dst_cache in xmit action to avoid unnecessary fib lookups. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/ipv6/sit.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 80610899a323..b2ccbc473127 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -900,12 +900,17 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6, 0, dst, tiph->saddr, 0, 0, sock_net_uid(tunnel->net, NULL)); - rt = ip_route_output_flow(tunnel->net, &fl4, NULL); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; + rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr); + if (!rt) { + rt = ip_route_output_flow(tunnel->net, &fl4, NULL); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr); } + if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); dev->stats.tx_carrier_errors++; -- cgit v1.2.3-55-g7522 From 79f5943a87d21a902d5248466acd152f766981ad Mon Sep 17 00:00:00 2001 From: Sergej Benilov Date: Sun, 14 Jul 2019 18:56:27 +0200 Subject: sis900: correct a few typos Correct a few typos in comments and debug text. Signed-off-by: Sergej Benilov Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index aba6eea72f15..6e07f5ebacfc 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -262,7 +262,7 @@ static int sis900_get_mac_addr(struct pci_dev *pci_dev, /* check to see if we have sane EEPROM */ signature = (u16) read_eeprom(ioaddr, EEPROMSignature); if (signature == 0xffff || signature == 0x0000) { - printk (KERN_WARNING "%s: Error EERPOM read %x\n", + printk (KERN_WARNING "%s: Error EEPROM read %x\n", pci_name(pci_dev), signature); return 0; } @@ -359,9 +359,9 @@ static int sis635_get_mac_addr(struct pci_dev *pci_dev, * * SiS962 or SiS963 model, use EEPROM to store MAC address. And EEPROM * is shared by - * LAN and 1394. When access EEPROM, send EEREQ signal to hardware first + * LAN and 1394. When accessing EEPROM, send EEREQ signal to hardware first * and wait for EEGNT. If EEGNT is ON, EEPROM is permitted to be accessed - * by LAN, otherwise is not. After MAC address is read from EEPROM, send + * by LAN, otherwise it is not. After MAC address is read from EEPROM, send * EEDONE signal to refuse EEPROM access by LAN. * The EEPROM map of SiS962 or SiS963 is different to SiS900. * The signature field in SiS962 or SiS963 spec is meaningless. -- cgit v1.2.3-55-g7522 From 76104862cccaeaa84fdd23e39f2610a96296291c Mon Sep 17 00:00:00 2001 From: Tasos Sahanidis Date: Sun, 14 Jul 2019 13:31:11 +0300 Subject: sky2: Disable MSI on P5W DH Deluxe The onboard sky2 NICs send IRQs after S3, resulting in ethernet not working after resume. Maskable MSI and MSI-X are also not supported, so fall back to INTx. Signed-off-by: Tasos Sahanidis Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index fe518c854d1f..f518312ffe69 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4917,6 +4917,13 @@ static const struct dmi_system_id msi_blacklist[] = { DMI_MATCH(DMI_PRODUCT_NAME, "P-79"), }, }, + { + .ident = "ASUS P5W DH Deluxe", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTEK COMPUTER INC"), + DMI_MATCH(DMI_PRODUCT_NAME, "P5W DH Deluxe"), + }, + }, {} }; -- cgit v1.2.3-55-g7522 From ffcb60a54f245528e1d49f957ca2d20d6079577c Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Mon, 8 Jul 2019 18:55:33 +0300 Subject: iwlwifi: add new cards for 9000 and 20000 series add two new PCI ID's for 9000 and 20000 series Cc: stable@vger.kernel.org Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index ccc83fd74649..fe645380bd2f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -604,6 +604,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x42A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x6014, iwl9260_2ac_160_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x8014, iwl9260_2ac_160_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x8010, iwl9260_2ac_160_cfg)}, {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_160_cfg)}, @@ -971,6 +972,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x7A70, 0x0310, iwlax211_2ax_cfg_so_gf_a0)}, {IWL_PCI_DEVICE(0x7A70, 0x0510, iwlax211_2ax_cfg_so_gf_a0)}, {IWL_PCI_DEVICE(0x7A70, 0x0A10, iwlax211_2ax_cfg_so_gf_a0)}, + {IWL_PCI_DEVICE(0x7AF0, 0x0090, iwlax211_2ax_cfg_so_gf_a0)}, {IWL_PCI_DEVICE(0x7AF0, 0x0310, iwlax211_2ax_cfg_so_gf_a0)}, {IWL_PCI_DEVICE(0x7AF0, 0x0510, iwlax211_2ax_cfg_so_gf_a0)}, {IWL_PCI_DEVICE(0x7AF0, 0x0A10, iwlax211_2ax_cfg_so_gf_a0)}, -- cgit v1.2.3-55-g7522 From a7d544d63120061f89459585f06ca44d30842a22 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 8 Jul 2019 18:55:34 +0300 Subject: iwlwifi: pcie: add support for qu c-step devices Add support for C-step devices. Currently we don't have a nice way of matching the step and choosing the proper configuration, so we need to switch the config structs one by one. Cc: stable@vger.kernel.org Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 53 +++++++++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 7 ++++ drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 2 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 21 ++++++++++ 4 files changed, 83 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 93526dfaf791..1f500cddb3a7 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -80,7 +80,9 @@ #define IWL_22000_QU_B_HR_B_FW_PRE "iwlwifi-Qu-b0-hr-b0-" #define IWL_22000_HR_B_FW_PRE "iwlwifi-QuQnj-b0-hr-b0-" #define IWL_22000_HR_A0_FW_PRE "iwlwifi-QuQnj-a0-hr-a0-" +#define IWL_QU_C_HR_B_FW_PRE "iwlwifi-Qu-c0-hr-b0-" #define IWL_QU_B_JF_B_FW_PRE "iwlwifi-Qu-b0-jf-b0-" +#define IWL_QU_C_JF_B_FW_PRE "iwlwifi-Qu-c0-jf-b0-" #define IWL_QUZ_A_HR_B_FW_PRE "iwlwifi-QuZ-a0-hr-b0-" #define IWL_QUZ_A_JF_B_FW_PRE "iwlwifi-QuZ-a0-jf-b0-" #define IWL_QNJ_B_JF_B_FW_PRE "iwlwifi-QuQnj-b0-jf-b0-" @@ -109,6 +111,8 @@ IWL_QUZ_A_HR_B_FW_PRE __stringify(api) ".ucode" #define IWL_QUZ_A_JF_B_MODULE_FIRMWARE(api) \ IWL_QUZ_A_JF_B_FW_PRE __stringify(api) ".ucode" +#define IWL_QU_C_HR_B_MODULE_FIRMWARE(api) \ + IWL_QU_C_HR_B_FW_PRE __stringify(api) ".ucode" #define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \ IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode" #define IWL_QNJ_B_JF_B_MODULE_FIRMWARE(api) \ @@ -256,6 +260,30 @@ const struct iwl_cfg iwl_ax201_cfg_qu_hr = { .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, }; +const struct iwl_cfg iwl_ax101_cfg_qu_c0_hr_b0 = { + .name = "Intel(R) Wi-Fi 6 AX101", + .fw_name_pre = IWL_QU_C_HR_B_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, +}; + +const struct iwl_cfg iwl_ax201_cfg_qu_c0_hr_b0 = { + .name = "Intel(R) Wi-Fi 6 AX201 160MHz", + .fw_name_pre = IWL_QU_C_HR_B_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, +}; + const struct iwl_cfg iwl_ax101_cfg_quz_hr = { .name = "Intel(R) Wi-Fi 6 AX101", .fw_name_pre = IWL_QUZ_A_HR_B_FW_PRE, @@ -372,6 +400,30 @@ const struct iwl_cfg iwl9560_2ac_160_cfg_qu_b0_jf_b0 = { IWL_DEVICE_22500, }; +const struct iwl_cfg iwl9461_2ac_cfg_qu_c0_jf_b0 = { + .name = "Intel(R) Wireless-AC 9461", + .fw_name_pre = IWL_QU_C_JF_B_FW_PRE, + IWL_DEVICE_22500, +}; + +const struct iwl_cfg iwl9462_2ac_cfg_qu_c0_jf_b0 = { + .name = "Intel(R) Wireless-AC 9462", + .fw_name_pre = IWL_QU_C_JF_B_FW_PRE, + IWL_DEVICE_22500, +}; + +const struct iwl_cfg iwl9560_2ac_cfg_qu_c0_jf_b0 = { + .name = "Intel(R) Wireless-AC 9560", + .fw_name_pre = IWL_QU_C_JF_B_FW_PRE, + IWL_DEVICE_22500, +}; + +const struct iwl_cfg iwl9560_2ac_160_cfg_qu_c0_jf_b0 = { + .name = "Intel(R) Wireless-AC 9560 160MHz", + .fw_name_pre = IWL_QU_C_JF_B_FW_PRE, + IWL_DEVICE_22500, +}; + const struct iwl_cfg iwl9560_2ac_cfg_qnj_jf_b0 = { .name = "Intel(R) Wireless-AC 9560 160MHz", .fw_name_pre = IWL_QNJ_B_JF_B_FW_PRE, @@ -590,6 +642,7 @@ MODULE_FIRMWARE(IWL_22000_HR_A_F0_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_22000_HR_B_F0_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_22000_HR_B_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_22000_HR_A0_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL_QU_C_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QU_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QUZ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QUZ_A_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index bc267bd2c3b0..1c1bf1b281cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -565,10 +565,13 @@ extern const struct iwl_cfg iwl22000_2ac_cfg_hr; extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb; extern const struct iwl_cfg iwl22000_2ac_cfg_jf; extern const struct iwl_cfg iwl_ax101_cfg_qu_hr; +extern const struct iwl_cfg iwl_ax101_cfg_qu_c0_hr_b0; extern const struct iwl_cfg iwl_ax101_cfg_quz_hr; extern const struct iwl_cfg iwl22000_2ax_cfg_hr; extern const struct iwl_cfg iwl_ax200_cfg_cc; extern const struct iwl_cfg iwl_ax201_cfg_qu_hr; +extern const struct iwl_cfg iwl_ax201_cfg_qu_hr; +extern const struct iwl_cfg iwl_ax201_cfg_qu_c0_hr_b0; extern const struct iwl_cfg iwl_ax201_cfg_quz_hr; extern const struct iwl_cfg iwl_ax1650i_cfg_quz_hr; extern const struct iwl_cfg iwl_ax1650s_cfg_quz_hr; @@ -580,6 +583,10 @@ extern const struct iwl_cfg iwl9461_2ac_cfg_qu_b0_jf_b0; extern const struct iwl_cfg iwl9462_2ac_cfg_qu_b0_jf_b0; extern const struct iwl_cfg iwl9560_2ac_cfg_qu_b0_jf_b0; extern const struct iwl_cfg iwl9560_2ac_160_cfg_qu_b0_jf_b0; +extern const struct iwl_cfg iwl9461_2ac_cfg_qu_c0_jf_b0; +extern const struct iwl_cfg iwl9462_2ac_cfg_qu_c0_jf_b0; +extern const struct iwl_cfg iwl9560_2ac_cfg_qu_c0_jf_b0; +extern const struct iwl_cfg iwl9560_2ac_160_cfg_qu_c0_jf_b0; extern const struct iwl_cfg killer1550i_2ac_cfg_qu_b0_jf_b0; extern const struct iwl_cfg killer1550s_2ac_cfg_qu_b0_jf_b0; extern const struct iwl_cfg iwl22000_2ax_cfg_jf; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index 93da96a7247c..cb4c5514a556 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -328,6 +328,8 @@ enum { #define CSR_HW_REV_TYPE_NONE (0x00001F0) #define CSR_HW_REV_TYPE_QNJ (0x0000360) #define CSR_HW_REV_TYPE_QNJ_B0 (0x0000364) +#define CSR_HW_REV_TYPE_QU_B0 (0x0000334) +#define CSR_HW_REV_TYPE_QU_C0 (0x0000338) #define CSR_HW_REV_TYPE_QUZ (0x0000354) #define CSR_HW_REV_TYPE_HR_CDB (0x0000340) #define CSR_HW_REV_TYPE_SO (0x0000370) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index fe645380bd2f..ea2a03d4bf55 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1039,6 +1039,27 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } iwl_trans->cfg = cfg; } + + /* + * This is a hack to switch from Qu B0 to Qu C0. We need to + * do this for all cfgs that use Qu B0. All this code is in + * urgent need for a refactor, but for now this is the easiest + * thing to do to support Qu C-step. + */ + if (iwl_trans->hw_rev == CSR_HW_REV_TYPE_QU_C0) { + if (iwl_trans->cfg == &iwl_ax101_cfg_qu_hr) + iwl_trans->cfg = &iwl_ax101_cfg_qu_c0_hr_b0; + else if (iwl_trans->cfg == &iwl_ax201_cfg_qu_hr) + iwl_trans->cfg = &iwl_ax201_cfg_qu_c0_hr_b0; + else if (iwl_trans->cfg == &iwl9461_2ac_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9461_2ac_cfg_qu_c0_jf_b0; + else if (iwl_trans->cfg == &iwl9462_2ac_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9462_2ac_cfg_qu_c0_jf_b0; + else if (iwl_trans->cfg == &iwl9560_2ac_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9560_2ac_cfg_qu_c0_jf_b0; + else if (iwl_trans->cfg == &iwl9560_2ac_160_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9560_2ac_160_cfg_qu_c0_jf_b0; + } #endif pci_set_drvdata(pdev, iwl_trans); -- cgit v1.2.3-55-g7522 From ff414f31ce3750d259549c3b9eac8e855b5348f0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Jul 2019 14:50:06 +0200 Subject: ath10k: work around uninitialized vht_pfr variable As clang points out, the vht_pfr is assigned to a struct member without being initialized in one case: drivers/net/wireless/ath/ath10k/mac.c:7528:7: error: variable 'vht_pfr' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] if (!ath10k_mac_can_set_bitrate_mask(ar, band, mask, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/wireless/ath/ath10k/mac.c:7551:20: note: uninitialized use occurs here arvif->vht_pfr = vht_pfr; ^~~~~~~ drivers/net/wireless/ath/ath10k/mac.c:7528:3: note: remove the 'if' if its condition is always true if (!ath10k_mac_can_set_bitrate_mask(ar, band, mask, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/wireless/ath/ath10k/mac.c:7483:12: note: initialize the variable 'vht_pfr' to silence this warning u8 vht_pfr; Add an explicit but probably incorrect initialization here. I suspect we want a better fix here, but chose this approach to illustrate the issue. Fixes: 8b97b055dc9d ("ath10k: fix failure to set multiple fixed rate") Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index e43a566eef77..0606416dc971 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -7541,6 +7541,8 @@ static int ath10k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, &vht_nss, true); update_bitrate_mask = false; + } else { + vht_pfr = 0; } mutex_lock(&ar->conf_mutex); -- cgit v1.2.3-55-g7522 From 41a531ffa4c5aeb062f892227c00fabb3b4a9c91 Mon Sep 17 00:00:00 2001 From: Soeren Moch Date: Mon, 1 Jul 2019 12:53:13 +0200 Subject: rt2x00usb: fix rx queue hang Since commit ed194d136769 ("usb: core: remove local_irq_save() around ->complete() handler") the handler rt2x00usb_interrupt_rxdone() is not running with interrupts disabled anymore. So this completion handler is not guaranteed to run completely before workqueue processing starts for the same queue entry. Be sure to set all other flags in the entry correctly before marking this entry ready for workqueue processing. This way we cannot miss error conditions that need to be signalled from the completion handler to the worker thread. Note that rt2x00usb_work_rxdone() processes all available entries, not only such for which queue_work() was called. This patch is similar to what commit df71c9cfceea ("rt2x00: fix order of entry flags modification") did for TX processing. This fixes a regression on a RT5370 based wifi stick in AP mode, which suddenly stopped data transmission after some period of heavy load. Also stopping the hanging hostapd resulted in the error message "ieee80211 phy0: rt2x00queue_flush_queue: Warning - Queue 14 failed to flush". Other operation modes are probably affected as well, this just was the used testcase. Fixes: ed194d136769 ("usb: core: remove local_irq_save() around ->complete() handler") Cc: stable@vger.kernel.org # 4.20+ Signed-off-by: Soeren Moch Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2x00usb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index 67b81c7221c4..7e3a621b9c0d 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -372,14 +372,9 @@ static void rt2x00usb_interrupt_rxdone(struct urb *urb) struct queue_entry *entry = (struct queue_entry *)urb->context; struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev; - if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags)) + if (!test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags)) return; - /* - * Report the frame as DMA done - */ - rt2x00lib_dmadone(entry); - /* * Check if the received data is simply too small * to be actually valid, or if the urb is signaling @@ -388,6 +383,11 @@ static void rt2x00usb_interrupt_rxdone(struct urb *urb) if (urb->actual_length < entry->queue->desc_size || urb->status) set_bit(ENTRY_DATA_IO_FAILED, &entry->flags); + /* + * Report the frame as DMA done + */ + rt2x00lib_dmadone(entry); + /* * Schedule the delayed work for reading the RX status * from the device. -- cgit v1.2.3-55-g7522 From 071c37983d99da07797294ea78e9da1a6e287144 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 14 Jul 2019 23:36:11 +0200 Subject: net: neigh: fix multiple neigh timer scheduling Neigh timer can be scheduled multiple times from userspace adding multiple neigh entries and forcing the neigh timer scheduling passing NTF_USE in the netlink requests. This will result in a refcount leak and in the following dump stack: [ 32.465295] NEIGH: BUG, double timer add, state is 8 [ 32.465308] CPU: 0 PID: 416 Comm: double_timer_ad Not tainted 5.2.0+ #65 [ 32.465311] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-2.fc30 04/01/2014 [ 32.465313] Call Trace: [ 32.465318] dump_stack+0x7c/0xc0 [ 32.465323] __neigh_event_send+0x20c/0x880 [ 32.465326] ? ___neigh_create+0x846/0xfb0 [ 32.465329] ? neigh_lookup+0x2a9/0x410 [ 32.465332] ? neightbl_fill_info.constprop.0+0x800/0x800 [ 32.465334] neigh_add+0x4f8/0x5e0 [ 32.465337] ? neigh_xmit+0x620/0x620 [ 32.465341] ? find_held_lock+0x85/0xa0 [ 32.465345] rtnetlink_rcv_msg+0x204/0x570 [ 32.465348] ? rtnl_dellink+0x450/0x450 [ 32.465351] ? mark_held_locks+0x90/0x90 [ 32.465354] ? match_held_lock+0x1b/0x230 [ 32.465357] netlink_rcv_skb+0xc4/0x1d0 [ 32.465360] ? rtnl_dellink+0x450/0x450 [ 32.465363] ? netlink_ack+0x420/0x420 [ 32.465366] ? netlink_deliver_tap+0x115/0x560 [ 32.465369] ? __alloc_skb+0xc9/0x2f0 [ 32.465372] netlink_unicast+0x270/0x330 [ 32.465375] ? netlink_attachskb+0x2f0/0x2f0 [ 32.465378] netlink_sendmsg+0x34f/0x5a0 [ 32.465381] ? netlink_unicast+0x330/0x330 [ 32.465385] ? move_addr_to_kernel.part.0+0x20/0x20 [ 32.465388] ? netlink_unicast+0x330/0x330 [ 32.465391] sock_sendmsg+0x91/0xa0 [ 32.465394] ___sys_sendmsg+0x407/0x480 [ 32.465397] ? copy_msghdr_from_user+0x200/0x200 [ 32.465401] ? _raw_spin_unlock_irqrestore+0x37/0x40 [ 32.465404] ? lockdep_hardirqs_on+0x17d/0x250 [ 32.465407] ? __wake_up_common_lock+0xcb/0x110 [ 32.465410] ? __wake_up_common+0x230/0x230 [ 32.465413] ? netlink_bind+0x3e1/0x490 [ 32.465416] ? netlink_setsockopt+0x540/0x540 [ 32.465420] ? __fget_light+0x9c/0xf0 [ 32.465423] ? sockfd_lookup_light+0x8c/0xb0 [ 32.465426] __sys_sendmsg+0xa5/0x110 [ 32.465429] ? __ia32_sys_shutdown+0x30/0x30 [ 32.465432] ? __fd_install+0xe1/0x2c0 [ 32.465435] ? lockdep_hardirqs_off+0xb5/0x100 [ 32.465438] ? mark_held_locks+0x24/0x90 [ 32.465441] ? do_syscall_64+0xf/0x270 [ 32.465444] do_syscall_64+0x63/0x270 [ 32.465448] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fix the issue unscheduling neigh_timer if selected entry is in 'IN_TIMER' receiving a netlink request with NTF_USE flag set Reported-by: Marek Majkowski Fixes: 0c5c2d308906 ("neigh: Allow for user space users of the neighbour table") Signed-off-by: Lorenzo Bianconi Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 742cea4ce72e..0dfc97bc8760 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1124,6 +1124,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); + neigh_del_timer(neigh); neigh->nud_state = NUD_INCOMPLETE; neigh->updated = now; next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), @@ -1140,6 +1141,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) } } else if (neigh->nud_state & NUD_STALE) { neigh_dbg(2, "neigh %p is delayed\n", neigh); + neigh_del_timer(neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_add_timer(neigh, jiffies + -- cgit v1.2.3-55-g7522 From cef86f1536b02e61e699f53b5f8782067481580d Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Mon, 15 Jul 2019 11:17:09 +0800 Subject: atm: idt77252: Remove call to memset after dma_alloc_coherent In commit 518a2f1925c3 ("dma-mapping: zero memory returned from dma_alloc_*"), dma_alloc_coherent has already zeroed the memory. So memset is not needed. Signed-off-by: Fuqian Huang Signed-off-by: David S. Miller --- drivers/atm/idt77252.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 43a14579e80e..df51680e8931 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -1379,7 +1379,6 @@ init_tsq(struct idt77252_dev *card) printk("%s: can't allocate TSQ.\n", card->name); return -1; } - memset(card->tsq.base, 0, TSQSIZE); card->tsq.last = card->tsq.base + TSQ_NUM_ENTRIES - 1; card->tsq.next = card->tsq.last; -- cgit v1.2.3-55-g7522 From 3a5ee3b30126bd7dc5d6299f3a985d4fefcf42c9 Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Mon, 15 Jul 2019 11:19:11 +0800 Subject: ethernet: remove redundant memset kvzalloc already zeroes the memory during the allocation. pci_alloc_consistent calls dma_alloc_coherent directly. In commit 518a2f1925c3 ("dma-mapping: zero memory returned from dma_alloc_*"), dma_alloc_coherent has already zeroed the memory. So the memset after these function is not needed. Signed-off-by: Fuqian Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atlx/atl1.c | 2 -- drivers/net/ethernet/atheros/atlx/atl2.c | 1 - drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 -- drivers/net/ethernet/chelsio/cxgb4/sched.c | 1 - drivers/net/ethernet/freescale/fec_main.c | 2 -- drivers/net/ethernet/jme.c | 5 ----- drivers/net/ethernet/marvell/skge.c | 2 -- drivers/net/ethernet/mellanox/mlx4/eq.c | 2 -- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 1 - drivers/net/ethernet/neterion/s2io.c | 1 - drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c | 3 --- drivers/net/ethernet/ti/tlan.c | 1 - 14 files changed, 27 deletions(-) diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 7c767ce9aafa..b5c6dc914720 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -1060,8 +1060,6 @@ static s32 atl1_setup_ring_resources(struct atl1_adapter *adapter) goto err_nomem; } - memset(ring_header->desc, 0, ring_header->size); - /* init TPD ring */ tpd_ring->dma = ring_header->dma; offset = (tpd_ring->dma & 0x7) ? (8 - (ring_header->dma & 0x7)) : 0; diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 3a3fb5ce0fee..3aba38322717 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -291,7 +291,6 @@ static s32 atl2_setup_ring_resources(struct atl2_adapter *adapter) &adapter->ring_dma); if (!adapter->ring_vir_addr) return -ENOMEM; - memset(adapter->ring_vir_addr, 0, adapter->ring_size); /* Init TXD Ring */ adapter->txd_dma = adapter->ring_dma ; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3f632028eff0..1069eb01cc55 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2677,8 +2677,6 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) mapping = txr->tx_push_mapping + sizeof(struct tx_push_bd); txr->data_mapping = cpu_to_le64(mapping); - - memset(txr->tx_push, 0, sizeof(struct tx_push_bd)); } qidx = bp->tc_to_qidx[j]; ring->queue_id = bp->q_info[qidx].queue_id; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c index ba6c153ee45c..60218dc676a8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sched.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c @@ -207,7 +207,6 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p) goto out_err; /* Bind queue to specified class */ - memset(qe, 0, sizeof(*qe)); qe->cntxt_id = qid; memcpy(&qe->param, p, sizeof(qe->param)); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 9d459ccf251d..e5610a4da539 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3144,8 +3144,6 @@ static int fec_enet_init(struct net_device *ndev) return -ENOMEM; } - memset(cbd_base, 0, bd_size); - /* Get the Ethernet address */ fec_get_mac(ndev); /* make sure MAC we just acquired is programmed into the hw */ diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 76b7b7b85e35..0b668357db4d 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -582,11 +582,6 @@ jme_setup_tx_resources(struct jme_adapter *jme) if (unlikely(!(txring->bufinf))) goto err_free_txring; - /* - * Initialize Transmit Descriptors - */ - memset(txring->alloc, 0, TX_RING_ALLOC_SIZE(jme->tx_ring_size)); - return 0; err_free_txring: diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 35a92fd2cf39..9ac854c2b371 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -2558,8 +2558,6 @@ static int skge_up(struct net_device *dev) goto free_pci_mem; } - memset(skge->mem, 0, skge->mem_size); - err = skge_ring_alloc(&skge->rx_ring, skge->mem, skge->dma); if (err) goto free_pci_mem; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index a5be27772b8e..c790a5fcea73 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1013,8 +1013,6 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, dma_list[i] = t; eq->page_list[i].map = t; - - memset(eq->page_list[i].buf, 0, PAGE_SIZE); } eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 3b04d8927fb1..1f3891fde2eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2450,7 +2450,6 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, MLX5_SET(query_vport_counter_in, in, vport_number, vport->vport); MLX5_SET(query_vport_counter_in, in, other_vport, 1); - memset(out, 0, outlen); err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); if (err) goto free_out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 957d9b09dc3f..089ae4d48a82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1134,7 +1134,6 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) } /* create send-to-vport group */ - memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS); @@ -1293,8 +1292,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) return -ENOMEM; /* create vport rx group */ - memset(flow_group_in, 0, inlen); - esw_set_flow_group_source_port(esw, flow_group_in); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 051b19388a81..615455a21567 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -847,7 +847,6 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox, &mem_item->mapaddr); if (!mem_item->buf) return -ENOMEM; - memset(mem_item->buf, 0, mem_item->size); q->elem_info = kcalloc(q->count, sizeof(*q->elem_info), GFP_KERNEL); if (!q->elem_info) { diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 3b2ae1a21678..e0b2bf327905 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -747,7 +747,6 @@ static int init_shared_mem(struct s2io_nic *nic) return -ENOMEM; } mem_allocated += size; - memset(tmp_v_addr, 0, size); size = sizeof(struct rxd_info) * rxd_count[nic->rxd_mode]; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c index 433052f734ed..5e9f8ee99800 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c @@ -442,10 +442,8 @@ nx_fw_cmd_create_tx_ctx(struct netxen_adapter *adapter) goto out_free_rq; } - memset(rq_addr, 0, rq_size); prq = rq_addr; - memset(rsp_addr, 0, rsp_size); prsp = rsp_addr; prq->host_rsp_dma_addr = cpu_to_le64(rsp_phys_addr); @@ -755,7 +753,6 @@ int netxen_alloc_hw_resources(struct netxen_adapter *adapter) return -ENOMEM; } - memset(addr, 0, sizeof(struct netxen_ring_ctx)); recv_ctx->hwctx = addr; recv_ctx->hwctx->ctx_id = cpu_to_le32(port); recv_ctx->hwctx->cmd_consumer_offset = diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index b4ab1a5f6cd0..78f0f2d59e22 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -855,7 +855,6 @@ static int tlan_init(struct net_device *dev) dev->name); return -ENOMEM; } - memset(priv->dma_storage, 0, dma_size); priv->rx_list = (struct tlan_list *) ALIGN((unsigned long)priv->dma_storage, 8); priv->rx_list_dma = ALIGN(priv->dma_storage_dma, 8); -- cgit v1.2.3-55-g7522 From 6a24c8fb0d889b39079215970210f88255b81c80 Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Mon, 15 Jul 2019 11:19:21 +0800 Subject: hippi: Remove call to memset after pci_alloc_consistent pci_alloc_consistent calls dma_alloc_coherent directly. In commit 518a2f1925c3 ("dma-mapping: zero memory returned from dma_alloc_*"), dma_alloc_coherent has already zeroed the memory. So memset is not needed. Signed-off-by: Fuqian Huang Signed-off-by: David S. Miller --- drivers/net/hippi/rrunner.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 7b9350dbebdd..2a6ec5394966 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -1196,7 +1196,6 @@ static int rr_open(struct net_device *dev) goto error; } rrpriv->rx_ctrl_dma = dma_addr; - memset(rrpriv->rx_ctrl, 0, 256*sizeof(struct ring_ctrl)); rrpriv->info = pci_alloc_consistent(pdev, sizeof(struct rr_info), &dma_addr); @@ -1205,7 +1204,6 @@ static int rr_open(struct net_device *dev) goto error; } rrpriv->info_dma = dma_addr; - memset(rrpriv->info, 0, sizeof(struct rr_info)); wmb(); spin_lock_irqsave(&rrpriv->lock, flags); -- cgit v1.2.3-55-g7522 From c5ec23bb190267f8b6cb287b761c1f6630c4b694 Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Mon, 15 Jul 2019 11:21:18 +0800 Subject: vmxnet3: Remove call to memset after dma_alloc_coherent In commit 518a2f1925c3 ("dma-mapping: zero memory returned from dma_alloc_*"), dma_alloc_coherent has already zeroed the memory. So memset is not needed. Signed-off-by: Fuqian Huang Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 3f48f05dd2a6..2a1918f25e47 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3430,7 +3430,6 @@ vmxnet3_probe_device(struct pci_dev *pdev, err = -ENOMEM; goto err_ver; } - memset(adapter->coal_conf, 0, sizeof(*adapter->coal_conf)); adapter->coal_conf->coalMode = VMXNET3_COALESCE_DISABLED; adapter->default_coal_mode = true; } -- cgit v1.2.3-55-g7522 From f384e62a82ba5d85408405fdd6aeff89354deaa9 Mon Sep 17 00:00:00 2001 From: Phong Tran Date: Mon, 15 Jul 2019 22:08:14 +0700 Subject: ISDN: hfcsusb: checking idx of ep configuration The syzbot test with random endpoint address which made the idx is overflow in the table of endpoint configuations. this adds the checking for fixing the error report from syzbot KASAN: stack-out-of-bounds Read in hfcsusb_probe [1] The patch tested by syzbot [2] Reported-by: syzbot+8750abbc3a46ef47d509@syzkaller.appspotmail.com [1]: https://syzkaller.appspot.com/bug?id=30a04378dac680c5d521304a00a86156bb913522 [2]: https://groups.google.com/d/msg/syzkaller-bugs/_6HBdge8F3E/OJn7wVNpBAAJ Signed-off-by: Phong Tran Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcsusb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index 4c99739b937e..0e224232f746 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -1955,6 +1955,9 @@ hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) /* get endpoint base */ idx = ((ep_addr & 0x7f) - 1) * 2; + if (idx > 15) + return -EIO; + if (ep_addr & 0x80) idx++; attr = ep->desc.bmAttributes; -- cgit v1.2.3-55-g7522 From d71f895c31952cfd3277f82ebed60fc3f2350b52 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 27 Jun 2019 08:34:41 +0300 Subject: net/mlx5e: Verify encapsulation is supported When mlx5e_attach_encap() calls mlx5e_get_tc_tun() to get the tunnel info data struct, check that returned value is not NULL, as would be in the case of unsupported encapsulation. Fixes: d386939a327d2 ("net/mlx5e: Rearrange tc tunnel code in a modular way") Signed-off-by: Eli Cohen Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2d6436257f9d..018709a4343f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2647,6 +2647,10 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, family = ip_tunnel_info_af(tun_info); key.ip_tun_key = &tun_info->key; key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); + if (!key.tc_tunnel) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); + return -EOPNOTSUPP; + } hash_key = hash_encap_info(&key); -- cgit v1.2.3-55-g7522 From 075973c7d7bb866a5bb37f96027ef99bf7244c83 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 8 Jul 2019 17:02:36 +0300 Subject: net/mlx5e: Rely on filter_dev instead of dissector keys for tunnels Currently, tunnel attributes are parsed and inner header matching is used only when flow dissector specifies match on some of the supported encapsulation fields. When user tries to offload tc filter that doesn't match any encapsulation fields on tunnel device, mlx5 tc layer incorrectly sets to match packet header keys on encap header (outer header) and firmware rejects the rule with syndrome 0x7e1579 when creating new flow group. Change __parse_cls_flower() to determine whether tunnel is used based on fitler_dev tunnel info, instead of determining it indirectly by checking flow dissector enc keys. Fixes: bbd00f7e2349 ("net/mlx5e: Add TC tunnel release action for SRIOV offloads") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 018709a4343f..b95e0ae4d7fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1522,11 +1522,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + if (mlx5e_get_tc_tun(filter_dev)) { if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) return -EOPNOTSUPP; -- cgit v1.2.3-55-g7522 From 3d144578c91a2db417923ba905ce7a84ce0c274b Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 11 Jul 2019 13:03:48 +0300 Subject: net/mlx5e: Allow dissector meta key in tc flower Recently, fl_flow_key->indev_ifindex int field was refactored into flow_dissector_key_meta field. With this, flower classifier also sets FLOW_DISSECTOR_KEY_META flow dissector key. However, mlx5 flower dissector validation code rejects filters that use flow dissector keys that are not supported. Add FLOW_DISSECTOR_KEY_META to the list of allowed dissector keys in __parse_cls_flower() to prevent following error when offloading flower classifier to mlx5: Error: mlx5_core: Unsupported key. Fixes: 8212ed777f40 ("net: sched: cls_flower: use flow_dissector for ingress ifindex") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b95e0ae4d7fd..cc096f6011d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1499,7 +1499,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *match_level = MLX5_MATCH_NONE; if (dissector->used_keys & - ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + ~(BIT(FLOW_DISSECTOR_KEY_META) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | BIT(FLOW_DISSECTOR_KEY_VLAN) | -- cgit v1.2.3-55-g7522 From 1acc5d5c5832da9a98b22374a8fae08ffe31b3f8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 12 Jul 2019 10:25:55 -0700 Subject: bpf: fix BTF verifier size resolution logic BTF verifier has a size resolution bug which in some circumstances leads to invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have [1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer context ARRAY size won't be resolved (because for pointer it doesn't matter, so it's a sink in pointer context), but it will be permanently remembered as zero for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will be resolved correctly, but TYPEDEF resolved_size won't be updated anymore. This, subsequently, will lead to erroneous map creation failure, if that TYPEDEF is specified as either key or value, as key_size/value_size won't correspond to resolved size of TYPEDEF (kernel will believe it's zero). Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already calculated and stored. This bug manifests itself in rejecting BTF-defined maps that use array typedef as a value type: typedef int array_t[16]; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(value, array_t); /* i.e., array_t *value; */ } test_map SEC(".maps"); The fix consists on not relying on modifier's resolved_size and instead using modifier's resolved_id (type ID for "concrete" type to which modifier eventually resolves) and doing size determination for that resolved type. This allow to preserve existing "early DFS termination" logic for PTR or STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier types. Fixes: eb3f595dab40 ("bpf: btf: Validate type reference") Cc: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- kernel/bpf/btf.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 546ebee39e2a..5fcc7a17eb5a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1073,11 +1073,18 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, !btf_type_is_var(size_type))) return NULL; - size = btf->resolved_sizes[size_type_id]; size_type_id = btf->resolved_ids[size_type_id]; size_type = btf_type_by_id(btf, size_type_id); if (btf_type_nosize_or_null(size_type)) return NULL; + else if (btf_type_has_size(size_type)) + size = size_type->size; + else if (btf_type_is_array(size_type)) + size = btf->resolved_sizes[size_type_id]; + else if (btf_type_is_ptr(size_type)) + size = sizeof(void *); + else + return NULL; } *type_id = size_type_id; @@ -1602,7 +1609,6 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, const struct btf_type *next_type; u32 next_type_id = t->type; struct btf *btf = env->btf; - u32 next_type_size = 0; next_type = btf_type_by_id(btf, next_type_id); if (!next_type || btf_type_is_resolve_source_only(next_type)) { @@ -1620,7 +1626,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, * save us a few type-following when we use it later (e.g. in * pretty print). */ - if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) { + if (!btf_type_id_size(btf, &next_type_id, NULL)) { if (env_type_is_resolved(env, next_type_id)) next_type = btf_type_id_resolve(btf, &next_type_id); @@ -1633,7 +1639,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, } } - env_stack_pop_resolved(env, next_type_id, next_type_size); + env_stack_pop_resolved(env, next_type_id, 0); return 0; } @@ -1645,7 +1651,6 @@ static int btf_var_resolve(struct btf_verifier_env *env, const struct btf_type *t = v->t; u32 next_type_id = t->type; struct btf *btf = env->btf; - u32 next_type_size; next_type = btf_type_by_id(btf, next_type_id); if (!next_type || btf_type_is_resolve_source_only(next_type)) { @@ -1675,12 +1680,12 @@ static int btf_var_resolve(struct btf_verifier_env *env, * forward types or similar that would resolve to size of * zero is allowed. */ - if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) { + if (!btf_type_id_size(btf, &next_type_id, NULL)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } - env_stack_pop_resolved(env, next_type_id, next_type_size); + env_stack_pop_resolved(env, next_type_id, 0); return 0; } -- cgit v1.2.3-55-g7522 From dd13f3ca642986b59a1863101e58cd694e81f888 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 12 Jul 2019 10:25:56 -0700 Subject: selftests/bpf: add trickier size resolution tests Add more BTF tests, validating that size resolution logic is correct in few trickier cases. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_btf.c | 88 ++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 8351cb5f4a20..3d617e806054 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -3417,6 +3417,94 @@ static struct btf_raw_test raw_tests[] = { .value_type_id = 1, .max_entries = 4, }, +/* + * typedef int arr_t[16]; + * struct s { + * arr_t *a; + * }; + */ +{ + .descr = "struct->ptr->typedef->array->int size resolution", + .raw_types = { + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_PTR_ENC(3), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_TYPE_ARRAY_ENC(5, 5, 16), /* [4] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0a\0arr_t"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_mod_chain_size_resolve_map", + .key_size = sizeof(int), + .value_size = sizeof(int) * 16, + .key_type_id = 5 /* int */, + .value_type_id = 3 /* arr_t */, + .max_entries = 4, +}, +/* + * typedef int arr_t[16][8][4]; + * struct s { + * arr_t *a; + * }; + */ +{ + .descr = "struct->ptr->typedef->multi-array->int size resolution", + .raw_types = { + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_PTR_ENC(3), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_TYPE_ARRAY_ENC(5, 7, 16), /* [4] */ + BTF_TYPE_ARRAY_ENC(6, 7, 8), /* [5] */ + BTF_TYPE_ARRAY_ENC(7, 7, 4), /* [6] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [7] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0a\0arr_t"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "multi_arr_size_resolve_map", + .key_size = sizeof(int), + .value_size = sizeof(int) * 16 * 8 * 4, + .key_type_id = 7 /* int */, + .value_type_id = 3 /* arr_t */, + .max_entries = 4, +}, +/* + * typedef int int_t; + * typedef int_t arr3_t[4]; + * typedef arr3_t arr2_t[8]; + * typedef arr2_t arr1_t[16]; + * struct s { + * arr1_t *a; + * }; + */ +{ + .descr = "typedef/multi-arr mix size resolution", + .raw_types = { + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [1] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_PTR_ENC(3), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_TYPE_ARRAY_ENC(5, 10, 16), /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 6), /* [5] */ + BTF_TYPE_ARRAY_ENC(7, 10, 8), /* [6] */ + BTF_TYPEDEF_ENC(NAME_TBD, 8), /* [7] */ + BTF_TYPE_ARRAY_ENC(9, 10, 4), /* [8] */ + BTF_TYPEDEF_ENC(NAME_TBD, 10), /* [9] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [10] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0s\0a\0arr1_t\0arr2_t\0arr3_t\0int_t"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_arra_mix_size_resolve_map", + .key_size = sizeof(int), + .value_size = sizeof(int) * 16 * 8 * 4, + .key_type_id = 10 /* int */, + .value_type_id = 3 /* arr_t */, + .max_entries = 4, +}, }; /* struct btf_raw_test raw_tests[] */ -- cgit v1.2.3-55-g7522 From 8981e56fa17282598571958ae6a29cbc3209a6cb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 12 Jul 2019 10:25:57 -0700 Subject: selftests/bpf: use typedef'ed arrays as map values Convert few tests that couldn't use typedef'ed arrays due to kernel bug. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c | 3 ++- tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c | 3 +-- tools/testing/selftests/bpf/progs/test_stacktrace_map.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c index d06b47a09097..33254b771384 100644 --- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c @@ -47,11 +47,12 @@ struct { * issue and avoid complicated C programming massaging. * This is an acceptable workaround since there is one entry here. */ +typedef __u64 raw_stack_trace_t[2 * MAX_STACK_RAWTP]; struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 1); __type(key, __u32); - __u64 (*value)[2 * MAX_STACK_RAWTP]; + __type(value, raw_stack_trace_t); } rawdata_map SEC(".maps"); SEC("tracepoint/raw_syscalls/sys_enter") diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index bbfc8337b6f0..f5638e26865d 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -36,8 +36,7 @@ struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 128); __type(key, __u32); - /* there seems to be a bug in kernel not handling typedef properly */ - struct bpf_stack_build_id (*value)[PERF_MAX_STACK_DEPTH]; + __type(value, stack_trace_t); } stack_amap SEC(".maps"); /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index 803c15dc109d..fa0be3e10a10 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -35,7 +35,7 @@ struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 16384); __type(key, __u32); - __u64 (*value)[PERF_MAX_STACK_DEPTH]; + __type(value, stack_trace_t); } stack_amap SEC(".maps"); /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ -- cgit v1.2.3-55-g7522 From 025c0c0917b78f21e3aaffecda75968d54172095 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 12 Jul 2019 15:41:42 +0200 Subject: selftests/bpf: fix attach_probe on s390 attach_probe test fails, because it cannot install a kprobe on a non-existent sys_nanosleep symbol. Use the correct symbol name for the nanosleep syscall on 64-bit s390. Don't bother adding one for 31-bit mode, since tests are compiled only in 64-bit mode. Fixes: 1e8611bbdfc9 ("selftests/bpf: add kprobe/uprobe selftests") Signed-off-by: Ilya Leoshkevich Acked-by: Vasily Gorbik Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/prog_tests/attach_probe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index a4686395522c..47af4afc5013 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -23,6 +23,8 @@ ssize_t get_base_addr() { #ifdef __x86_64__ #define SYS_KPROBE_NAME "__x64_sys_nanosleep" +#elif defined(__s390x__) +#define SYS_KPROBE_NAME "__s390x_sys_nanosleep" #else #define SYS_KPROBE_NAME "sys_nanosleep" #endif -- cgit v1.2.3-55-g7522 From e46fc22e60a410d896448835adca95aa3332d25d Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 12 Jul 2019 15:56:31 +0200 Subject: selftests/bpf: make directory prerequisites order-only When directories are used as prerequisites in Makefiles, they can cause a lot of unnecessary rebuilds, because a directory is considered changed whenever a file in this directory is added, removed or modified. If the only thing a target is interested in is the existence of the directory it depends on, which is the case for selftests/bpf, this directory should be specified as an order-only prerequisite: it would still be created in case it does not exist, but it would not trigger a rebuild of a target in case it's considered changed. Signed-off-by: Ilya Leoshkevich Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 277d8605e340..0e003fb6641b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -183,12 +183,12 @@ TEST_CUSTOM_PROGS += $(ALU32_BUILD_DIR)/test_progs_32 $(ALU32_BUILD_DIR): mkdir -p $@ -$(ALU32_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read +$(ALU32_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read | $(ALU32_BUILD_DIR) cp $< $@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\ - $(ALU32_BUILD_DIR) \ - $(ALU32_BUILD_DIR)/urandom_read + $(ALU32_BUILD_DIR)/urandom_read \ + | $(ALU32_BUILD_DIR) $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \ -o $(ALU32_BUILD_DIR)/test_progs_32 \ test_progs.c test_stub.c trace_helpers.c prog_tests/*.c \ @@ -197,8 +197,8 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\ $(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H) $(ALU32_BUILD_DIR)/test_progs_32: prog_tests/*.c -$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR) \ - $(ALU32_BUILD_DIR)/test_progs_32 +$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR)/test_progs_32 \ + | $(ALU32_BUILD_DIR) ($(CLANG) $(CLANG_FLAGS) -O2 -target bpf -emit-llvm -c $< -o - || \ echo "clang failed") | \ $(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \ @@ -236,7 +236,7 @@ $(PROG_TESTS_DIR): mkdir -p $@ PROG_TESTS_FILES := $(wildcard prog_tests/*.c) -$(PROG_TESTS_H): $(PROG_TESTS_DIR) $(PROG_TESTS_FILES) +$(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR) $(shell ( cd prog_tests/; \ echo '/* Generated header, do not edit */'; \ echo '#ifdef DECLARE'; \ @@ -257,7 +257,7 @@ MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h test_maps.c: $(MAP_TESTS_H) $(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS) MAP_TESTS_FILES := $(wildcard map_tests/*.c) -$(MAP_TESTS_H): $(MAP_TESTS_DIR) $(MAP_TESTS_FILES) +$(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR) $(shell ( cd map_tests/; \ echo '/* Generated header, do not edit */'; \ echo '#ifdef DECLARE'; \ @@ -279,7 +279,7 @@ $(VERIFIER_TESTS_DIR): mkdir -p $@ VERIFIER_TEST_FILES := $(wildcard verifier/*.c) -$(OUTPUT)/verifier/tests.h: $(VERIFIER_TESTS_DIR) $(VERIFIER_TEST_FILES) +$(OUTPUT)/verifier/tests.h: $(VERIFIER_TEST_FILES) | $(VERIFIER_TESTS_DIR) $(shell ( cd verifier/; \ echo '/* Generated header, do not edit */'; \ echo '#ifdef FILL_ARRAY'; \ -- cgit v1.2.3-55-g7522 From f83a46d4711e789642033eb00fed8e017e34fe7d Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 12 Jul 2019 15:59:50 +0200 Subject: selftests/bpf: put test_stub.o into $(OUTPUT) Add a rule to put test_stub.o in $(OUTPUT) and change the references to it accordingly. This prevents test_stub.o from being created in the source directory. Signed-off-by: Ilya Leoshkevich Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 0e003fb6641b..1296253b3422 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -83,13 +83,16 @@ all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c $(CC) -o $@ $< -Wl,--build-id +$(OUTPUT)/test_stub.o: test_stub.c + $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) -c -o $@ $< + $(OUTPUT)/test_maps: map_tests/*.c BPFOBJ := $(OUTPUT)/libbpf.a -$(TEST_GEN_PROGS): test_stub.o $(BPFOBJ) +$(TEST_GEN_PROGS): $(OUTPUT)/test_stub.o $(BPFOBJ) -$(TEST_GEN_PROGS_EXTENDED): test_stub.o $(OUTPUT)/libbpf.a +$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(OUTPUT)/libbpf.a $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c -- cgit v1.2.3-55-g7522 From 81f522f96f788ba978f505d5a03a039f75360b8b Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 15 Jul 2019 11:11:03 +0200 Subject: samples/bpf: build with -D__TARGET_ARCH_$(SRCARCH) While $ARCH can be relatively flexible (see Makefile and tools/scripts/Makefile.arch), $SRCARCH always corresponds to a directory name under arch/. Therefore, build samples with -D__TARGET_ARCH_$(SRCARCH), since that matches the expectations of bpf_helpers.h. Signed-off-by: Ilya Leoshkevich Acked-by: Vasily Gorbik Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index f90daadfbc89..1d9be26b4edd 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -284,7 +284,7 @@ $(obj)/%.o: $(src)/%.c $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ -I$(srctree)/tools/testing/selftests/bpf/ \ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ - -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \ + -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ -- cgit v1.2.3-55-g7522 From b43995469e5804636a55372e9bbb17ccb22441c5 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 15 Jul 2019 09:39:52 -0700 Subject: bpf: rename bpf_ctx_wide_store_ok to bpf_ctx_wide_access_ok Rename bpf_ctx_wide_store_ok to bpf_ctx_wide_access_ok to indicate that it can be used for both loads and stores. Cc: Yonghong Song Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 2 +- net/core/filter.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 6d944369ca87..ff65d22cf336 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -747,7 +747,7 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) return size <= size_default && (size & (size - 1)) == 0; } -#define bpf_ctx_wide_store_ok(off, size, type, field) \ +#define bpf_ctx_wide_access_ok(off, size, type, field) \ (size == sizeof(__u64) && \ off >= offsetof(type, field) && \ off + sizeof(__u64) <= offsetofend(type, field) && \ diff --git a/net/core/filter.c b/net/core/filter.c index 47f6386fb17a..c5983ddb1a9f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6890,14 +6890,14 @@ static bool sock_addr_is_valid_access(int off, int size, if (!bpf_ctx_narrow_access_ok(off, size, size_default)) return false; } else { - if (bpf_ctx_wide_store_ok(off, size, - struct bpf_sock_addr, - user_ip6)) + if (bpf_ctx_wide_access_ok(off, size, + struct bpf_sock_addr, + user_ip6)) return true; - if (bpf_ctx_wide_store_ok(off, size, - struct bpf_sock_addr, - msg_src_ip6)) + if (bpf_ctx_wide_access_ok(off, size, + struct bpf_sock_addr, + msg_src_ip6)) return true; if (size != size_default) -- cgit v1.2.3-55-g7522 From d4ecfeb15494ec261fef2d25d96eecba66f0b182 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 15 Jul 2019 09:39:53 -0700 Subject: bpf: allow wide aligned loads for bpf_sock_addr user_ip6 and msg_src_ip6 Add explicit check for u64 loads of user_ip6 and msg_src_ip6 and update the comment. Cc: Yonghong Song Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 4 ++-- net/core/filter.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6f68438aa4ed..81be929b89fc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3248,7 +3248,7 @@ struct bpf_sock_addr { __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 user_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. + __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. * Stored in network byte order. */ __u32 user_port; /* Allows 4-byte read and write. @@ -3260,7 +3260,7 @@ struct bpf_sock_addr { __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. + __u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. * Stored in network byte order. */ __bpf_md_ptr(struct bpf_sock *, sk); diff --git a/net/core/filter.c b/net/core/filter.c index c5983ddb1a9f..0f6854ccf894 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6884,9 +6884,19 @@ static bool sock_addr_is_valid_access(int off, int size, case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4): case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], msg_src_ip6[3]): - /* Only narrow read access allowed for now. */ if (type == BPF_READ) { bpf_ctx_record_field_size(info, size_default); + + if (bpf_ctx_wide_access_ok(off, size, + struct bpf_sock_addr, + user_ip6)) + return true; + + if (bpf_ctx_wide_access_ok(off, size, + struct bpf_sock_addr, + msg_src_ip6)) + return true; + if (!bpf_ctx_narrow_access_ok(off, size, size_default)) return false; } else { -- cgit v1.2.3-55-g7522 From 8b45063c8584b3e6caff0b109dd0f47b35487aba Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 15 Jul 2019 09:39:54 -0700 Subject: selftests/bpf: rename verifier/wide_store.c to verifier/wide_access.c Move the file and rename internal BPF_SOCK_ADDR define to BPF_SOCK_ADDR_STORE. This selftest will be extended in the next commit with the wide loads. Cc: Yonghong Song Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/verifier/wide_access.c | 36 ++++++++++++++++++++++ tools/testing/selftests/bpf/verifier/wide_store.c | 36 ---------------------- 2 files changed, 36 insertions(+), 36 deletions(-) create mode 100644 tools/testing/selftests/bpf/verifier/wide_access.c delete mode 100644 tools/testing/selftests/bpf/verifier/wide_store.c diff --git a/tools/testing/selftests/bpf/verifier/wide_access.c b/tools/testing/selftests/bpf/verifier/wide_access.c new file mode 100644 index 000000000000..3ac97328432f --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/wide_access.c @@ -0,0 +1,36 @@ +#define BPF_SOCK_ADDR_STORE(field, off, res, err) \ +{ \ + "wide store to bpf_sock_addr." #field "[" #off "]", \ + .insns = { \ + BPF_MOV64_IMM(BPF_REG_0, 1), \ + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, \ + offsetof(struct bpf_sock_addr, field[off])), \ + BPF_EXIT_INSN(), \ + }, \ + .result = res, \ + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \ + .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \ + .errstr = err, \ +} + +/* user_ip6[0] is u64 aligned */ +BPF_SOCK_ADDR_STORE(user_ip6, 0, ACCEPT, + NULL), +BPF_SOCK_ADDR_STORE(user_ip6, 1, REJECT, + "invalid bpf_context access off=12 size=8"), +BPF_SOCK_ADDR_STORE(user_ip6, 2, ACCEPT, + NULL), +BPF_SOCK_ADDR_STORE(user_ip6, 3, REJECT, + "invalid bpf_context access off=20 size=8"), + +/* msg_src_ip6[0] is _not_ u64 aligned */ +BPF_SOCK_ADDR_STORE(msg_src_ip6, 0, REJECT, + "invalid bpf_context access off=44 size=8"), +BPF_SOCK_ADDR_STORE(msg_src_ip6, 1, ACCEPT, + NULL), +BPF_SOCK_ADDR_STORE(msg_src_ip6, 2, REJECT, + "invalid bpf_context access off=52 size=8"), +BPF_SOCK_ADDR_STORE(msg_src_ip6, 3, REJECT, + "invalid bpf_context access off=56 size=8"), + +#undef BPF_SOCK_ADDR_STORE diff --git a/tools/testing/selftests/bpf/verifier/wide_store.c b/tools/testing/selftests/bpf/verifier/wide_store.c deleted file mode 100644 index 8fe99602ded4..000000000000 --- a/tools/testing/selftests/bpf/verifier/wide_store.c +++ /dev/null @@ -1,36 +0,0 @@ -#define BPF_SOCK_ADDR(field, off, res, err) \ -{ \ - "wide store to bpf_sock_addr." #field "[" #off "]", \ - .insns = { \ - BPF_MOV64_IMM(BPF_REG_0, 1), \ - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, \ - offsetof(struct bpf_sock_addr, field[off])), \ - BPF_EXIT_INSN(), \ - }, \ - .result = res, \ - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \ - .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \ - .errstr = err, \ -} - -/* user_ip6[0] is u64 aligned */ -BPF_SOCK_ADDR(user_ip6, 0, ACCEPT, - NULL), -BPF_SOCK_ADDR(user_ip6, 1, REJECT, - "invalid bpf_context access off=12 size=8"), -BPF_SOCK_ADDR(user_ip6, 2, ACCEPT, - NULL), -BPF_SOCK_ADDR(user_ip6, 3, REJECT, - "invalid bpf_context access off=20 size=8"), - -/* msg_src_ip6[0] is _not_ u64 aligned */ -BPF_SOCK_ADDR(msg_src_ip6, 0, REJECT, - "invalid bpf_context access off=44 size=8"), -BPF_SOCK_ADDR(msg_src_ip6, 1, ACCEPT, - NULL), -BPF_SOCK_ADDR(msg_src_ip6, 2, REJECT, - "invalid bpf_context access off=52 size=8"), -BPF_SOCK_ADDR(msg_src_ip6, 3, REJECT, - "invalid bpf_context access off=56 size=8"), - -#undef BPF_SOCK_ADDR -- cgit v1.2.3-55-g7522 From 7dd8d6119d481da1c4619eb7d8cfef33edfbee81 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 15 Jul 2019 09:39:55 -0700 Subject: selftests/bpf: add selftests for wide loads Mirror existing wide store tests with wide loads. The only significant difference is expected error string. Cc: Yonghong Song Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/verifier/wide_access.c | 37 ++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/wide_access.c b/tools/testing/selftests/bpf/verifier/wide_access.c index 3ac97328432f..ccade9312d21 100644 --- a/tools/testing/selftests/bpf/verifier/wide_access.c +++ b/tools/testing/selftests/bpf/verifier/wide_access.c @@ -34,3 +34,40 @@ BPF_SOCK_ADDR_STORE(msg_src_ip6, 3, REJECT, "invalid bpf_context access off=56 size=8"), #undef BPF_SOCK_ADDR_STORE + +#define BPF_SOCK_ADDR_LOAD(field, off, res, err) \ +{ \ + "wide load from bpf_sock_addr." #field "[" #off "]", \ + .insns = { \ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, \ + offsetof(struct bpf_sock_addr, field[off])), \ + BPF_MOV64_IMM(BPF_REG_0, 1), \ + BPF_EXIT_INSN(), \ + }, \ + .result = res, \ + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \ + .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \ + .errstr = err, \ +} + +/* user_ip6[0] is u64 aligned */ +BPF_SOCK_ADDR_LOAD(user_ip6, 0, ACCEPT, + NULL), +BPF_SOCK_ADDR_LOAD(user_ip6, 1, REJECT, + "invalid bpf_context access off=12 size=8"), +BPF_SOCK_ADDR_LOAD(user_ip6, 2, ACCEPT, + NULL), +BPF_SOCK_ADDR_LOAD(user_ip6, 3, REJECT, + "invalid bpf_context access off=20 size=8"), + +/* msg_src_ip6[0] is _not_ u64 aligned */ +BPF_SOCK_ADDR_LOAD(msg_src_ip6, 0, REJECT, + "invalid bpf_context access off=44 size=8"), +BPF_SOCK_ADDR_LOAD(msg_src_ip6, 1, ACCEPT, + NULL), +BPF_SOCK_ADDR_LOAD(msg_src_ip6, 2, REJECT, + "invalid bpf_context access off=52 size=8"), +BPF_SOCK_ADDR_LOAD(msg_src_ip6, 3, REJECT, + "invalid bpf_context access off=56 size=8"), + +#undef BPF_SOCK_ADDR_LOAD -- cgit v1.2.3-55-g7522 From 073a4834a81368c8af9cc9e99ff83245600a8f6b Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 15 Jul 2019 09:39:56 -0700 Subject: bpf: sync bpf.h to tools/ Update bpf_sock_addr comments to indicate support for 8-byte reads from user_ip6 and msg_src_ip6. Cc: Yonghong Song Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f506c68b2612..1f61374fcf81 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3245,7 +3245,7 @@ struct bpf_sock_addr { __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 user_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. + __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. * Stored in network byte order. */ __u32 user_port; /* Allows 4-byte read and write. @@ -3257,7 +3257,7 @@ struct bpf_sock_addr { __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. + __u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. * Stored in network byte order. */ __bpf_md_ptr(struct bpf_sock *, sk); -- cgit v1.2.3-55-g7522 From d5e1db990fcce571a4af2434d6fc2b312d94b45e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 12 Jul 2019 10:44:41 -0700 Subject: selftests/bpf: remove logic duplication in test_verifier test_verifier tests can specify single- and multi-runs tests. Internally logic of handling them is duplicated. Get rid of it by making single run retval/data specification to be a first run spec. Signed-off-by: Andrii Nakryiko Cc: Krzesimir Nowak Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 35 +++++++++++++---------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b0773291012a..84135d5f4b35 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -86,7 +86,7 @@ struct bpf_test { int fixup_sk_storage_map[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; - uint32_t retval, retval_unpriv, insn_processed; + uint32_t insn_processed; int prog_len; enum { UNDEF, @@ -95,16 +95,20 @@ struct bpf_test { } result, result_unpriv; enum bpf_prog_type prog_type; uint8_t flags; - __u8 data[TEST_DATA_LEN]; void (*fill_helper)(struct bpf_test *self); uint8_t runs; - struct { - uint32_t retval, retval_unpriv; - union { - __u8 data[TEST_DATA_LEN]; - __u64 data64[TEST_DATA_LEN / 8]; - }; - } retvals[MAX_TEST_RUNS]; +#define bpf_testdata_struct_t \ + struct { \ + uint32_t retval, retval_unpriv; \ + union { \ + __u8 data[TEST_DATA_LEN]; \ + __u64 data64[TEST_DATA_LEN / 8]; \ + }; \ + } + union { + bpf_testdata_struct_t; + bpf_testdata_struct_t retvals[MAX_TEST_RUNS]; + }; enum bpf_attach_type expected_attach_type; }; @@ -949,17 +953,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, uint32_t expected_val; int i; - if (!test->runs) { - expected_val = unpriv && test->retval_unpriv ? - test->retval_unpriv : test->retval; - - err = do_prog_test_run(fd_prog, unpriv, expected_val, - test->data, sizeof(test->data)); - if (err) - run_errs++; - else - run_successes++; - } + if (!test->runs) + test->runs = 1; for (i = 0; i < test->runs; i++) { if (unpriv && test->retvals[i].retval_unpriv) -- cgit v1.2.3-55-g7522 From 3461a0a02141b2612f2916e9250a430de41b0290 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 16 Jul 2019 12:53:53 +0200 Subject: selftests/bpf: fix "alu with different scalars 1" on s390 BPF_LDX_MEM is used to load the least significant byte of the retrieved test_val.index, however, on big-endian machines it ends up retrieving the most significant byte. Change the test to load the whole int in order to make it endianness-independent. Signed-off-by: Ilya Leoshkevich Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/value_ptr_arith.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index c3de1a2c9dc5..a53d99cebd9f 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -183,7 +183,7 @@ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), BPF_MOV64_IMM(BPF_REG_2, 0), BPF_MOV64_IMM(BPF_REG_3, 0x100000), -- cgit v1.2.3-55-g7522 From 4e59afbbed9638f95adfac8d2f222aca3b2b1bc0 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 16 Jul 2019 12:56:34 +0200 Subject: selftests/bpf: skip nmi test when perf hw events are disabled Some setups (e.g. virtual machines) might run with hardware perf events disabled. If this is the case, skip the test_send_signal_nmi test. Add a separate test involving a software perf event. This allows testing the perf event path regardless of hardware perf event support. Signed-off-by: Ilya Leoshkevich Acked-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/send_signal.c | 33 +++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 67cea1686305..54218ee3c004 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -173,6 +173,18 @@ static int test_send_signal_tracepoint(void) return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint"); } +static int test_send_signal_perf(void) +{ + struct perf_event_attr attr = { + .sample_period = 1, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + }; + + return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, + "perf_sw_event"); +} + static int test_send_signal_nmi(void) { struct perf_event_attr attr = { @@ -181,8 +193,26 @@ static int test_send_signal_nmi(void) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; + int pmu_fd; + + /* Some setups (e.g. virtual machines) might run with hardware + * perf events disabled. If this is the case, skip this test. + */ + pmu_fd = syscall(__NR_perf_event_open, &attr, 0 /* pid */, + -1 /* cpu */, -1 /* group_fd */, 0 /* flags */); + if (pmu_fd == -1) { + if (errno == ENOENT) { + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", + __func__); + return 0; + } + /* Let the test fail with a more informative message */ + } else { + close(pmu_fd); + } - return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, "perf_event"); + return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, + "perf_hw_event"); } void test_send_signal(void) @@ -190,6 +220,7 @@ void test_send_signal(void) int ret = 0; ret |= test_send_signal_tracepoint(); + ret |= test_send_signal_perf(); ret |= test_send_signal_nmi(); if (!ret) printf("test_send_signal:OK\n"); -- cgit v1.2.3-55-g7522 From 763ff0e7d9c72e7094b31e7fb84a859be9325635 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 Jul 2019 20:57:03 -0700 Subject: libbpf: fix another GCC8 warning for strncpy Similar issue was fixed in cdfc7f888c2a ("libbpf: fix GCC8 warning for strncpy") already. This one was missed. Fixing now. Cc: Magnus Karlsson Signed-off-by: Andrii Nakryiko Acked-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/xsk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index b33740221b7e..5007b5d4fd2c 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -517,7 +517,8 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, err = -errno; goto out_socket; } - strncpy(xsk->ifname, ifname, IFNAMSIZ); + strncpy(xsk->ifname, ifname, IFNAMSIZ - 1); + xsk->ifname[IFNAMSIZ - 1] = '\0'; err = xsk_set_xdp_socket_config(&xsk->config, usr_config); if (err) -- cgit v1.2.3-55-g7522 From 3cd6e20f54d5003ea895717b67017397056749ce Mon Sep 17 00:00:00 2001 From: Nishka Dasgupta Date: Tue, 16 Jul 2019 11:18:43 +0530 Subject: net: ethernet: ti: cpsw: Add of_node_put() before return and break Each iteration of for_each_available_child_of_node puts the previous node, but in the case of a return or break from the middle of the loop, there is no put, thus causing a memory leak. Hence, for function cpsw_probe_dt, create an extra label err_node_put that puts the last used node and returns ret; modify the return statements in the loop to save the return value in ret and goto this new label. For function cpsw_remove_dt, add an of_node_put before the break. Issue found with Coccinelle. Signed-off-by: Nishka Dasgupta Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index f320f9a0de8b..32a89744972d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2570,7 +2570,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, ret = PTR_ERR(slave_data->ifphy); dev_err(&pdev->dev, "%d: Error retrieving port phy: %d\n", i, ret); - return ret; + goto err_node_put; } slave_data->slave_node = slave_node; @@ -2589,7 +2589,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, if (ret) { if (ret != -EPROBE_DEFER) dev_err(&pdev->dev, "failed to register fixed-link phy: %d\n", ret); - return ret; + goto err_node_put; } slave_data->phy_node = of_node_get(slave_node); } else if (parp) { @@ -2607,7 +2607,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, of_node_put(mdio_node); if (!mdio) { dev_err(&pdev->dev, "Missing mdio platform device\n"); - return -EINVAL; + ret = -EINVAL; + goto err_node_put; } snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), PHY_ID_FMT, mdio->name, phyid); @@ -2622,7 +2623,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, if (slave_data->phy_if < 0) { dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n", i); - return slave_data->phy_if; + ret = slave_data->phy_if; + goto err_node_put; } no_phy_slave: @@ -2633,7 +2635,7 @@ no_phy_slave: ret = ti_cm_get_macid(&pdev->dev, i, slave_data->mac_addr); if (ret) - return ret; + goto err_node_put; } if (data->dual_emac) { if (of_property_read_u32(slave_node, "dual_emac_res_vlan", @@ -2648,11 +2650,17 @@ no_phy_slave: } i++; - if (i == data->slaves) - break; + if (i == data->slaves) { + ret = 0; + goto err_node_put; + } } return 0; + +err_node_put: + of_node_put(slave_node); + return ret; } static void cpsw_remove_dt(struct platform_device *pdev) @@ -2675,8 +2683,10 @@ static void cpsw_remove_dt(struct platform_device *pdev) of_node_put(slave_data->phy_node); i++; - if (i == data->slaves) + if (i == data->slaves) { + of_node_put(slave_node); break; + } } of_platform_depopulate(&pdev->dev); -- cgit v1.2.3-55-g7522 From 64fc973dee130019b86f2c4634a10d7a3158687e Mon Sep 17 00:00:00 2001 From: Nishka Dasgupta Date: Tue, 16 Jul 2019 11:22:19 +0530 Subject: net: ethernet: mscc: ocelot_board: Add of_node_put() before return Each iteration of for_each_available_child_of_node puts the previous node, but in the case of a return from the middle of the loop, there is no put, thus causing a memory leak. Hence add an of_node_put before the return in two places. Issue found with Coccinelle. Signed-off-by: Nishka Dasgupta Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_board.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c index 58bde1a9eacb..2451d4a96490 100644 --- a/drivers/net/ethernet/mscc/ocelot_board.c +++ b/drivers/net/ethernet/mscc/ocelot_board.c @@ -291,8 +291,10 @@ static int mscc_ocelot_probe(struct platform_device *pdev) continue; err = ocelot_probe_port(ocelot, port, regs, phy); - if (err) + if (err) { + of_node_put(portnp); return err; + } phy_mode = of_get_phy_mode(portnp); if (phy_mode < 0) @@ -318,6 +320,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev) dev_err(ocelot->dev, "invalid phy mode for port%d, (Q)SGMII only\n", port); + of_node_put(portnp); return -EINVAL; } -- cgit v1.2.3-55-g7522 From cf36dd2f7793e8b2723e3eca518fff1be840a297 Mon Sep 17 00:00:00 2001 From: Nishka Dasgupta Date: Tue, 16 Jul 2019 11:25:04 +0530 Subject: net: ethernet: mediatek: mtk_eth_soc: Add of_node_put() before goto Each iteration of for_each_child_of_node puts the previous node, but in the case of a goto from the middle of the loop, there is no put, thus causing a memory leak. Hence add an of_node_put before the goto. Issue found with Coccinelle. Signed-off-by: Nishka Dasgupta Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b20b3a5a1ebb..c39d7f4ab1d4 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2548,8 +2548,10 @@ static int mtk_probe(struct platform_device *pdev) continue; err = mtk_add_mac(eth, mac_np); - if (err) + if (err) { + of_node_put(mac_np); goto err_deinit_hw; + } } if (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_INT)) { -- cgit v1.2.3-55-g7522 From 7429c6c0d9cb086d8e79f0d2a48ae14851d2115e Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 16 Jul 2019 17:16:55 +0900 Subject: be2net: Signal that the device cannot transmit during reconfiguration While changing the number of interrupt channels, be2net stops adapter operation (including netif_tx_disable()) but it doesn't signal that it cannot transmit. This may lead dev_watchdog() to falsely trigger during that time. Add the missing call to netif_carrier_off(), following the pattern used in many other drivers. netif_carrier_on() is already taken care of in be_open(). Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 82015c8a5ed7..b7a246b33599 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4697,8 +4697,12 @@ int be_update_queues(struct be_adapter *adapter) struct net_device *netdev = adapter->netdev; int status; - if (netif_running(netdev)) + if (netif_running(netdev)) { + /* device cannot transmit now, avoid dev_watchdog timeouts */ + netif_carrier_off(netdev); + be_close(netdev); + } be_cancel_worker(adapter); -- cgit v1.2.3-55-g7522 From db8051f30fbab7f579d691137f1e23f3bb1ac2eb Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 16 Jul 2019 11:43:05 -0400 Subject: skbuff: fix compilation warnings in skb_dump() The commit 6413139dfc64 ("skbuff: increase verbosity when dumping skb data") introduced a few compilation warnings. net/core/skbuff.c:766:32: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] level, sk->sk_family, sk->sk_type, sk->sk_protocol); ^~~~~~~~~~~ net/core/skbuff.c:766:45: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] level, sk->sk_family, sk->sk_type, sk->sk_protocol); ^~~~~~~~~~~~~~~ Fix them by using the proper types. Fixes: 6413139dfc64 ("skbuff: increase verbosity when dumping skb data") Signed-off-by: Qian Cai Reviewed-by: Nathan Chancellor Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6f1e31f674a3..0338820ee0ec 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -762,7 +762,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) printk("%sdev name=%s feat=0x%pNF\n", level, dev->name, &dev->features); if (sk) - printk("%ssk family=%hu type=%hu proto=%hu\n", + printk("%ssk family=%hu type=%u proto=%u\n", level, sk->sk_family, sk->sk_type, sk->sk_protocol); if (full_pkt && headroom) -- cgit v1.2.3-55-g7522 From a5b647007e9d794956dbed9339a3354a9fc4d5c3 Mon Sep 17 00:00:00 2001 From: Vedang Patel Date: Tue, 16 Jul 2019 12:52:18 -0700 Subject: fix: taprio: Change type of txtime-delay parameter to u32 During the review of the iproute2 patches for txtime-assist mode, it was pointed out that it does not make sense for the txtime-delay parameter to be negative. So, change the type of the parameter from s32 to u32. Fixes: 4cfd5779bd6e ("taprio: Add support for txtime-assist mode") Reported-by: Stephen Hemminger Signed-off-by: Vedang Patel Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 +- net/sched/sch_taprio.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 1f623252abe8..18f185299f47 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1174,7 +1174,7 @@ enum { TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */ TCA_TAPRIO_ATTR_FLAGS, /* u32 */ - TCA_TAPRIO_ATTR_TXTIME_DELAY, /* s32 */ + TCA_TAPRIO_ATTR_TXTIME_DELAY, /* u32 */ __TCA_TAPRIO_ATTR_MAX, }; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 388750ddc57a..c39db507ba3f 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -75,7 +75,7 @@ struct taprio_sched { struct sched_gate_list __rcu *admin_sched; struct hrtimer advance_timer; struct list_head taprio_list; - int txtime_delay; + u32 txtime_delay; }; static ktime_t sched_base_time(const struct sched_gate_list *sched) @@ -1113,7 +1113,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto unlock; } - q->txtime_delay = nla_get_s32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]); + q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]); } if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) && @@ -1430,7 +1430,7 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) goto options_error; if (q->txtime_delay && - nla_put_s32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) + nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) goto options_error; if (oper && dump_schedule(skb, oper)) -- cgit v1.2.3-55-g7522 From 36646b22ce2455c25c76c9023a0e5ca58d62899a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 16 Jul 2019 12:38:36 -0700 Subject: selftests/bpf: fix test_verifier/test_maps make dependencies e46fc22e60a4 ("selftests/bpf: make directory prerequisites order-only") exposed existing problem in Makefile for test_verifier and test_maps tests: their dependency on auto-generated header file with a list of all tests wasn't recorded explicitly. This patch fixes these issues. Fixes: 51a0e301a563 ("bpf: Add BPF_MAP_TYPE_SK_STORAGE test to test_maps") Fixes: 6b7b6995c43e ("selftests: bpf: tests.h should depend on .c files, not the output") Cc: Ilya Leoshkevich Cc: Stanislav Fomichev Cc: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 1296253b3422..9bc68d8abc5f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -86,8 +86,6 @@ $(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c $(OUTPUT)/test_stub.o: test_stub.c $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) -c -o $@ $< -$(OUTPUT)/test_maps: map_tests/*.c - BPFOBJ := $(OUTPUT)/libbpf.a $(TEST_GEN_PROGS): $(OUTPUT)/test_stub.o $(BPFOBJ) @@ -257,9 +255,10 @@ MAP_TESTS_DIR = $(OUTPUT)/map_tests $(MAP_TESTS_DIR): mkdir -p $@ MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h +MAP_TESTS_FILES := $(wildcard map_tests/*.c) test_maps.c: $(MAP_TESTS_H) $(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS) -MAP_TESTS_FILES := $(wildcard map_tests/*.c) +$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_H) $(MAP_TESTS_FILES) $(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR) $(shell ( cd map_tests/; \ echo '/* Generated header, do not edit */'; \ @@ -276,6 +275,7 @@ $(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR) VERIFIER_TESTS_H := $(OUTPUT)/verifier/tests.h test_verifier.c: $(VERIFIER_TESTS_H) $(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS) +$(OUTPUT)/test_verifier: test_verifier.c $(VERIFIER_TESTS_H) VERIFIER_TESTS_DIR = $(OUTPUT)/verifier $(VERIFIER_TESTS_DIR): -- cgit v1.2.3-55-g7522 From 9d1f62a6dcf0de3ab184c1f7b7d82117dcbab090 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 16 Jul 2019 12:38:37 -0700 Subject: selftests/bpf: structure test_{progs, maps, verifier} test runners uniformly It's easier to follow the logic if it's structured the same. There is just slight difference between test_progs/test_maps and test_verifier. test_verifier's verifier/*.c files are not really compilable C files (they are more of include headers), so they can't be specified as explicit dependencies of test_verifier. Cc: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9bc68d8abc5f..11c9c62c3362 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -176,6 +176,7 @@ endif endif TEST_PROGS_CFLAGS := -I. -I$(OUTPUT) +TEST_MAPS_CFLAGS := -I. -I$(OUTPUT) TEST_VERIFIER_CFLAGS := -I. -I$(OUTPUT) -Iverifier ifneq ($(SUBREG_CODEGEN),) @@ -227,16 +228,14 @@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ endif -PROG_TESTS_H := $(OUTPUT)/prog_tests/tests.h -test_progs.c: $(PROG_TESTS_H) -$(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS) -$(OUTPUT)/test_progs: prog_tests/*.c - PROG_TESTS_DIR = $(OUTPUT)/prog_tests $(PROG_TESTS_DIR): mkdir -p $@ - +PROG_TESTS_H := $(PROG_TESTS_DIR)/tests.h PROG_TESTS_FILES := $(wildcard prog_tests/*.c) +test_progs.c: $(PROG_TESTS_H) +$(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS) +$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_H) $(PROG_TESTS_FILES) $(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR) $(shell ( cd prog_tests/; \ echo '/* Generated header, do not edit */'; \ @@ -250,7 +249,6 @@ $(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR) echo '#endif' \ ) > $(PROG_TESTS_H)) -TEST_MAPS_CFLAGS := -I. -I$(OUTPUT) MAP_TESTS_DIR = $(OUTPUT)/map_tests $(MAP_TESTS_DIR): mkdir -p $@ @@ -272,17 +270,15 @@ $(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR) echo '#endif' \ ) > $(MAP_TESTS_H)) -VERIFIER_TESTS_H := $(OUTPUT)/verifier/tests.h -test_verifier.c: $(VERIFIER_TESTS_H) -$(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS) -$(OUTPUT)/test_verifier: test_verifier.c $(VERIFIER_TESTS_H) - VERIFIER_TESTS_DIR = $(OUTPUT)/verifier $(VERIFIER_TESTS_DIR): mkdir -p $@ - +VERIFIER_TESTS_H := $(VERIFIER_TESTS_DIR)/tests.h VERIFIER_TEST_FILES := $(wildcard verifier/*.c) -$(OUTPUT)/verifier/tests.h: $(VERIFIER_TEST_FILES) | $(VERIFIER_TESTS_DIR) +test_verifier.c: $(VERIFIER_TESTS_H) +$(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS) +$(OUTPUT)/test_verifier: test_verifier.c $(VERIFIER_TESTS_H) +$(VERIFIER_TESTS_H): $(VERIFIER_TEST_FILES) | $(VERIFIER_TESTS_DIR) $(shell ( cd verifier/; \ echo '/* Generated header, do not edit */'; \ echo '#ifdef FILL_ARRAY'; \ -- cgit v1.2.3-55-g7522 From 1cb59a6074e23c6f6513642f752a6d8d38327354 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 16 Jul 2019 14:58:27 +0200 Subject: selftests/bpf: fix perf_buffer on s390 perf_buffer test fails for exactly the same reason test_attach_probe used to fail: different nanosleep syscall kprobe name. Reuse the test_attach_probe fix. Fixes: ee5cf82ce04a ("selftests/bpf: test perf buffer API") Signed-off-by: Ilya Leoshkevich Acked-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/attach_probe.c | 12 ++---------- tools/testing/selftests/bpf/prog_tests/perf_buffer.c | 8 +------- tools/testing/selftests/bpf/test_progs.h | 8 ++++++++ 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 47af4afc5013..5ecc267d98b0 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -21,14 +21,6 @@ ssize_t get_base_addr() { return -EINVAL; } -#ifdef __x86_64__ -#define SYS_KPROBE_NAME "__x64_sys_nanosleep" -#elif defined(__s390x__) -#define SYS_KPROBE_NAME "__s390x_sys_nanosleep" -#else -#define SYS_KPROBE_NAME "sys_nanosleep" -#endif - void test_attach_probe(void) { const char *kprobe_name = "kprobe/sys_nanosleep"; @@ -86,7 +78,7 @@ void test_attach_probe(void) kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */, - SYS_KPROBE_NAME); + SYS_NANOSLEEP_KPROBE_NAME); if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", "err %ld\n", PTR_ERR(kprobe_link))) { kprobe_link = NULL; @@ -94,7 +86,7 @@ void test_attach_probe(void) } kretprobe_link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */, - SYS_KPROBE_NAME); + SYS_NANOSLEEP_KPROBE_NAME); if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe", "err %ld\n", PTR_ERR(kretprobe_link))) { kretprobe_link = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 3f1ef95865ff..3003fddc0613 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -5,12 +5,6 @@ #include #include -#ifdef __x86_64__ -#define SYS_KPROBE_NAME "__x64_sys_nanosleep" -#else -#define SYS_KPROBE_NAME "sys_nanosleep" -#endif - static void on_sample(void *ctx, int cpu, void *data, __u32 size) { int cpu_data = *(int *)data, duration = 0; @@ -56,7 +50,7 @@ void test_perf_buffer(void) /* attach kprobe */ link = bpf_program__attach_kprobe(prog, false /* retprobe */, - SYS_KPROBE_NAME); + SYS_NANOSLEEP_KPROBE_NAME); if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link))) goto out_close; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index f095e1d4c657..49e0f7d85643 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -92,3 +92,11 @@ int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); int extract_build_id(char *build_id, size_t size); void *spin_lock_thread(void *arg); + +#ifdef __x86_64__ +#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" +#elif defined(__s390x__) +#define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep" +#else +#define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep" +#endif -- cgit v1.2.3-55-g7522 From fdd258d49e88a9e0b49ef04a506a796f1c768a8e Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 15 Jul 2019 14:10:17 +0900 Subject: caif-hsi: fix possible deadlock in cfhsi_exit_module() cfhsi_exit_module() calls unregister_netdev() under rtnl_lock(). but unregister_netdev() internally calls rtnl_lock(). So deadlock would occur. Fixes: c41254006377 ("caif-hsi: Add rtnl support") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/caif/caif_hsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index b2f10b6ad6e5..bbb2575d4728 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -1455,7 +1455,7 @@ static void __exit cfhsi_exit_module(void) rtnl_lock(); list_for_each_safe(list_node, n, &cfhsi_list) { cfhsi = list_entry(list_node, struct cfhsi, list); - unregister_netdev(cfhsi->ndev); + unregister_netdevice(cfhsi->ndev); } rtnl_unlock(); } -- cgit v1.2.3-55-g7522 From 86fda90ab5888d983f7307442ba62978e3504bd3 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Tue, 16 Jul 2019 07:50:02 +0530 Subject: net: sctp: fix warning "NULL check before some freeing functions is not needed" This patch removes NULL checks before calling kfree. fixes below issues reported by coccicheck net/sctp/sm_make_chunk.c:2586:3-8: WARNING: NULL check before some freeing functions is not needed. net/sctp/sm_make_chunk.c:2652:3-8: WARNING: NULL check before some freeing functions is not needed. net/sctp/sm_make_chunk.c:2667:3-8: WARNING: NULL check before some freeing functions is not needed. net/sctp/sm_make_chunk.c:2684:3-8: WARNING: NULL check before some freeing functions is not needed. Signed-off-by: Hariprasad Kelam Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ed39396b9bba..36bd8a6e82df 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2582,8 +2582,7 @@ do_addr_param: case SCTP_PARAM_STATE_COOKIE: asoc->peer.cookie_len = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); - if (asoc->peer.cookie) - kfree(asoc->peer.cookie); + kfree(asoc->peer.cookie); asoc->peer.cookie = kmemdup(param.cookie->body, asoc->peer.cookie_len, gfp); if (!asoc->peer.cookie) retval = 0; @@ -2648,8 +2647,7 @@ do_addr_param: goto fall_through; /* Save peer's random parameter */ - if (asoc->peer.peer_random) - kfree(asoc->peer.peer_random); + kfree(asoc->peer.peer_random); asoc->peer.peer_random = kmemdup(param.p, ntohs(param.p->length), gfp); if (!asoc->peer.peer_random) { @@ -2663,8 +2661,7 @@ do_addr_param: goto fall_through; /* Save peer's HMAC list */ - if (asoc->peer.peer_hmacs) - kfree(asoc->peer.peer_hmacs); + kfree(asoc->peer.peer_hmacs); asoc->peer.peer_hmacs = kmemdup(param.p, ntohs(param.p->length), gfp); if (!asoc->peer.peer_hmacs) { @@ -2680,8 +2677,7 @@ do_addr_param: if (!ep->auth_enable) goto fall_through; - if (asoc->peer.peer_chunks) - kfree(asoc->peer.peer_chunks); + kfree(asoc->peer.peer_chunks); asoc->peer.peer_chunks = kmemdup(param.p, ntohs(param.p->length), gfp); if (!asoc->peer.peer_chunks) -- cgit v1.2.3-55-g7522 From f11fe1dae1c46762a9a7b0dd142b6bfebe7783ff Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 16 Jul 2019 15:16:02 +0800 Subject: net/sched: Make NET_ACT_CT depends on NF_NAT If NF_NAT is m and NET_ACT_CT is y, build fails: net/sched/act_ct.o: In function `tcf_ct_act': act_ct.c:(.text+0x21ac): undefined reference to `nf_ct_nat_ext_add' act_ct.c:(.text+0x229a): undefined reference to `nf_nat_icmp_reply_translation' act_ct.c:(.text+0x233a): undefined reference to `nf_nat_setup_info' act_ct.c:(.text+0x234a): undefined reference to `nf_nat_alloc_null_binding' act_ct.c:(.text+0x237c): undefined reference to `nf_nat_packet' Reported-by: Hulk Robot Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- net/sched/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index dd55b9ac3a66..afd2ba157a13 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -942,7 +942,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" - depends on NET_CLS_ACT && NF_CONNTRACK + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT help Say Y here to allow sending the packets to conntrack module. -- cgit v1.2.3-55-g7522 From 2c7da8e6b041a8df2661def81ac90c9c0c719909 Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Tue, 16 Jul 2019 15:28:51 -0700 Subject: net/rds: Give fr_state a chance to transition to FRMR_IS_FREE In the context of FRMR (ib_frmr.c): Memory regions make it onto the "clean_list" via "rds_ib_flush_mr_pool", after the memory region has been posted for invalidation via "rds_ib_post_inv". At that point in time, "fr_state" may still be in state "FRMR_IS_INUSE", since the only place where "fr_state" transitions to "FRMR_IS_FREE" is in "rds_ib_mr_cqe_handler", which is triggered by a tasklet. So in case we notice that "fr_state != FRMR_IS_FREE" (see below), we wait for "fr_inv_done" to trigger with a maximum of 10msec. Then we check again, and only put the memory region onto the drop_list (via "rds_ib_free_frmr") in case the situation remains unchanged. This avoids the problem of memory-regions bouncing between "clean_list" and "drop_list" before they even have a chance to be properly invalidated. Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_frmr.c | 27 ++++++++++++++++++++++++++- net/rds/ib_mr.h | 1 + 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 32ae26ed58a0..6038138d6e38 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -75,6 +75,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, pool->max_items_soft = pool->max_items; frmr->fr_state = FRMR_IS_FREE; + init_waitqueue_head(&frmr->fr_inv_done); return ibmr; out_no_cigar: @@ -285,6 +286,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) if (frmr->fr_inv) { frmr->fr_state = FRMR_IS_FREE; frmr->fr_inv = false; + wake_up(&frmr->fr_inv_done); } atomic_inc(&ic->i_fastreg_wrs); @@ -345,8 +347,31 @@ struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev, } do { - if (ibmr) + if (ibmr) { + /* Memory regions make it onto the "clean_list" via + * "rds_ib_flush_mr_pool", after the memory region has + * been posted for invalidation via "rds_ib_post_inv". + * + * At that point in time, "fr_state" may still be + * in state "FRMR_IS_INUSE", since the only place where + * "fr_state" transitions to "FRMR_IS_FREE" is in + * is in "rds_ib_mr_cqe_handler", which is + * triggered by a tasklet. + * + * So we wait for "fr_inv_done" to trigger + * and only put memory regions onto the drop_list + * that failed (i.e. not marked "FRMR_IS_FREE"). + * + * This avoids the problem of memory-regions bouncing + * between "clean_list" and "drop_list" before they + * even have a chance to be properly invalidated. + */ + frmr = &ibmr->u.frmr; + wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE); + if (frmr->fr_state == FRMR_IS_FREE) + break; rds_ib_free_frmr(ibmr, true); + } ibmr = rds_ib_alloc_frmr(rds_ibdev, nents); if (IS_ERR(ibmr)) return ibmr; diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 5da12c248431..42daccb7b5eb 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -57,6 +57,7 @@ struct rds_ib_frmr { struct ib_mr *mr; enum rds_ib_fr_state fr_state; bool fr_inv; + wait_queue_head_t fr_inv_done; struct ib_send_wr fr_wr; unsigned int dma_npages; unsigned int sg_byte_len; -- cgit v1.2.3-55-g7522 From c9467447fc50ec3715d8ec98f4da874fce539235 Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Tue, 16 Jul 2019 15:28:57 -0700 Subject: net/rds: Get rid of "wait_clean_list_grace" and add locking Waiting for activity on the "clean_list" to quiesce is no substitute for proper locking. We can have multiple threads competing for "llist_del_first" via "rds_ib_reuse_mr", and a single thread competing for "llist_del_all" and "llist_del_first" via "rds_ib_flush_mr_pool". Since "llist_del_first" depends on "list->first->next" not to change in the midst of the operation, simply waiting for all current calls to "rds_ib_reuse_mr" to quiesce across all CPUs is woefully inadequate: By the time "wait_clean_list_grace" is done iterating over all CPUs to see that there is no concurrent caller to "rds_ib_reuse_mr", a new caller may have just shown up on the first CPU. Furthermore, explicitly calls out the need for locking: * Cases where locking is needed: * If we have multiple consumers with llist_del_first used in one consumer, * and llist_del_first or llist_del_all used in other consumers, * then a lock is needed. Also, while at it, drop the unused "pool" parameter from "list_to_llist_nodes". Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_mr.h | 1 + net/rds/ib_rdma.c | 56 ++++++++++++++++++------------------------------------- 2 files changed, 19 insertions(+), 38 deletions(-) diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 42daccb7b5eb..ab26c20ed66f 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -98,6 +98,7 @@ struct rds_ib_mr_pool { struct llist_head free_list; /* unused MRs */ struct llist_head clean_list; /* unused & unmapped MRs */ wait_queue_head_t flush_wait; + spinlock_t clean_lock; /* "clean_list" concurrency */ atomic_t free_pinned; /* memory pinned by free MRs */ unsigned long max_items; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 0b347f46b2f4..6b047e63a769 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -40,9 +40,6 @@ struct workqueue_struct *rds_ib_mr_wq; -static DEFINE_PER_CPU(unsigned long, clean_list_grace); -#define CLEAN_LIST_BUSY_BIT 0 - static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) { struct rds_ib_device *rds_ibdev; @@ -195,12 +192,11 @@ struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) { struct rds_ib_mr *ibmr = NULL; struct llist_node *ret; - unsigned long *flag; + unsigned long flags; - preempt_disable(); - flag = this_cpu_ptr(&clean_list_grace); - set_bit(CLEAN_LIST_BUSY_BIT, flag); + spin_lock_irqsave(&pool->clean_lock, flags); ret = llist_del_first(&pool->clean_list); + spin_unlock_irqrestore(&pool->clean_lock, flags); if (ret) { ibmr = llist_entry(ret, struct rds_ib_mr, llnode); if (pool->pool_type == RDS_IB_MR_8K_POOL) @@ -209,23 +205,9 @@ struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) rds_ib_stats_inc(s_ib_rdma_mr_1m_reused); } - clear_bit(CLEAN_LIST_BUSY_BIT, flag); - preempt_enable(); return ibmr; } -static inline void wait_clean_list_grace(void) -{ - int cpu; - unsigned long *flag; - - for_each_online_cpu(cpu) { - flag = &per_cpu(clean_list_grace, cpu); - while (test_bit(CLEAN_LIST_BUSY_BIT, flag)) - cpu_relax(); - } -} - void rds_ib_sync_mr(void *trans_private, int direction) { struct rds_ib_mr *ibmr = trans_private; @@ -324,8 +306,7 @@ static unsigned int llist_append_to_list(struct llist_head *llist, * of clusters. Each cluster has linked llist nodes of * MR_CLUSTER_SIZE mrs that are ready for reuse. */ -static void list_to_llist_nodes(struct rds_ib_mr_pool *pool, - struct list_head *list, +static void list_to_llist_nodes(struct list_head *list, struct llist_node **nodes_head, struct llist_node **nodes_tail) { @@ -402,8 +383,13 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, */ dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list); dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list); - if (free_all) + if (free_all) { + unsigned long flags; + + spin_lock_irqsave(&pool->clean_lock, flags); llist_append_to_list(&pool->clean_list, &unmap_list); + spin_unlock_irqrestore(&pool->clean_lock, flags); + } free_goal = rds_ib_flush_goal(pool, free_all); @@ -416,27 +402,20 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal); if (!list_empty(&unmap_list)) { - /* we have to make sure that none of the things we're about - * to put on the clean list would race with other cpus trying - * to pull items off. The llist would explode if we managed to - * remove something from the clean list and then add it back again - * while another CPU was spinning on that same item in llist_del_first. - * - * This is pretty unlikely, but just in case wait for an llist grace period - * here before adding anything back into the clean list. - */ - wait_clean_list_grace(); - - list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail); + unsigned long flags; + + list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail); if (ibmr_ret) { *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); clean_nodes = clean_nodes->next; } /* more than one entry in llist nodes */ - if (clean_nodes) + if (clean_nodes) { + spin_lock_irqsave(&pool->clean_lock, flags); llist_add_batch(clean_nodes, clean_tail, &pool->clean_list); - + spin_unlock_irqrestore(&pool->clean_lock, flags); + } } atomic_sub(unpinned, &pool->free_pinned); @@ -610,6 +589,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev, init_llist_head(&pool->free_list); init_llist_head(&pool->drop_list); init_llist_head(&pool->clean_list); + spin_lock_init(&pool->clean_lock); mutex_init(&pool->flush_lock); init_waitqueue_head(&pool->flush_wait); INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); -- cgit v1.2.3-55-g7522 From 5f33141d2fc05a2d2134ba0e7b47ce4aa88340f0 Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Tue, 16 Jul 2019 15:29:02 -0700 Subject: net/rds: Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition after posting IB_WR_LOCAL_INV In order to: 1) avoid a silly bouncing between "clean_list" and "drop_list" triggered by function "rds_ib_reg_frmr" as it is releases frmr regions whose state is not "FRMR_IS_FREE" right away. 2) prevent an invalid access error in a race from a pending "IB_WR_LOCAL_INV" operation with a teardown ("dma_unmap_sg", "put_page") and de-registration ("ib_dereg_mr") of the corresponding memory region. Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_frmr.c | 65 ++++++++++++++++++++++++++++++++----------------------- net/rds/ib_mr.h | 2 ++ 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 6038138d6e38..708c553c3da5 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -76,6 +76,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, frmr->fr_state = FRMR_IS_FREE; init_waitqueue_head(&frmr->fr_inv_done); + init_waitqueue_head(&frmr->fr_reg_done); return ibmr; out_no_cigar: @@ -124,6 +125,7 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) */ ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); frmr->fr_state = FRMR_IS_INUSE; + frmr->fr_reg = true; memset(®_wr, 0, sizeof(reg_wr)); reg_wr.wr.wr_id = (unsigned long)(void *)ibmr; @@ -144,7 +146,17 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) if (printk_ratelimit()) pr_warn("RDS/IB: %s returned error(%d)\n", __func__, ret); + goto out; } + + /* Wait for the registration to complete in order to prevent an invalid + * access error resulting from a race between the memory region already + * being accessed while registration is still pending. + */ + wait_event(frmr->fr_reg_done, !frmr->fr_reg); + +out: + return ret; } @@ -262,6 +274,19 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr) pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); goto out; } + + /* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to + * 1) avoid a silly bouncing between "clean_list" and "drop_list" + * triggered by function "rds_ib_reg_frmr" as it is releases frmr + * regions whose state is not "FRMR_IS_FREE" right away. + * 2) prevents an invalid access error in a race + * from a pending "IB_WR_LOCAL_INV" operation + * with a teardown ("dma_unmap_sg", "put_page") + * and de-registration ("ib_dereg_mr") of the corresponding + * memory region. + */ + wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE); + out: return ret; } @@ -289,6 +314,11 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) wake_up(&frmr->fr_inv_done); } + if (frmr->fr_reg) { + frmr->fr_reg = false; + wake_up(&frmr->fr_reg_done); + } + atomic_inc(&ic->i_fastreg_wrs); } @@ -297,14 +327,18 @@ void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed, { struct rds_ib_mr *ibmr, *next; struct rds_ib_frmr *frmr; - int ret = 0; + int ret = 0, ret2; unsigned int freed = *nfreed; /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ list_for_each_entry(ibmr, list, unmap_list) { - if (ibmr->sg_dma_len) - ret |= rds_ib_post_inv(ibmr); + if (ibmr->sg_dma_len) { + ret2 = rds_ib_post_inv(ibmr); + if (ret2 && !ret) + ret = ret2; + } } + if (ret) pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret); @@ -347,31 +381,8 @@ struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev, } do { - if (ibmr) { - /* Memory regions make it onto the "clean_list" via - * "rds_ib_flush_mr_pool", after the memory region has - * been posted for invalidation via "rds_ib_post_inv". - * - * At that point in time, "fr_state" may still be - * in state "FRMR_IS_INUSE", since the only place where - * "fr_state" transitions to "FRMR_IS_FREE" is in - * is in "rds_ib_mr_cqe_handler", which is - * triggered by a tasklet. - * - * So we wait for "fr_inv_done" to trigger - * and only put memory regions onto the drop_list - * that failed (i.e. not marked "FRMR_IS_FREE"). - * - * This avoids the problem of memory-regions bouncing - * between "clean_list" and "drop_list" before they - * even have a chance to be properly invalidated. - */ - frmr = &ibmr->u.frmr; - wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE); - if (frmr->fr_state == FRMR_IS_FREE) - break; + if (ibmr) rds_ib_free_frmr(ibmr, true); - } ibmr = rds_ib_alloc_frmr(rds_ibdev, nents); if (IS_ERR(ibmr)) return ibmr; diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index ab26c20ed66f..9045a8c0edff 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -58,6 +58,8 @@ struct rds_ib_frmr { enum rds_ib_fr_state fr_state; bool fr_inv; wait_queue_head_t fr_inv_done; + bool fr_reg; + wait_queue_head_t fr_reg_done; struct ib_send_wr fr_wr; unsigned int dma_npages; unsigned int sg_byte_len; -- cgit v1.2.3-55-g7522 From aea01a2234d26ffa9d9ee01e43705824c0c7b08a Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Tue, 16 Jul 2019 15:29:07 -0700 Subject: net/rds: Fix NULL/ERR_PTR inconsistency Make function "rds_ib_try_reuse_ibmr" return NULL in case memory region could not be allocated, since callers simply check if the return value is not NULL. Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 6b047e63a769..c8c1e3ae8d84 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -450,7 +450,7 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted); else rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted); - return ERR_PTR(-EAGAIN); + break; } /* We do have some empty MRs. Flush them out. */ @@ -464,7 +464,7 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) return ibmr; } - return ibmr; + return NULL; } static void rds_ib_mr_pool_flush_worker(struct work_struct *work) -- cgit v1.2.3-55-g7522 From 9547dff1085d5935d6070377023096821033e30c Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Tue, 16 Jul 2019 15:29:12 -0700 Subject: net/rds: Set fr_state only to FRMR_IS_FREE if IB_WR_LOCAL_INV had been successful Fix a bug where fr_state first goes to FRMR_IS_STALE, because of a failure of operation IB_WR_LOCAL_INV, but then gets set back to "FRMR_IS_FREE" uncoditionally, even though the operation failed. Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_frmr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 708c553c3da5..adaa8e99e5a9 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -309,7 +309,8 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) } if (frmr->fr_inv) { - frmr->fr_state = FRMR_IS_FREE; + if (frmr->fr_state == FRMR_IS_INUSE) + frmr->fr_state = FRMR_IS_FREE; frmr->fr_inv = false; wake_up(&frmr->fr_inv_done); } -- cgit v1.2.3-55-g7522 From 3a2886cca703fde5ee21baea9fedf8b1389c59d7 Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Tue, 16 Jul 2019 15:29:17 -0700 Subject: net/rds: Keep track of and wait for FRWR segments in use upon shutdown Since "rds_ib_free_frmr" and "rds_ib_free_frmr_list" simply put the FRMR memory segments on the "drop_list" or "free_list", and it is the job of "rds_ib_flush_mr_pool" to reap those entries by ultimately issuing a "IB_WR_LOCAL_INV" work-request, we need to trigger and then wait for all those memory segments attached to a particular connection to be fully released before we can move on to release the QP, CQ, etc. So we make "rds_ib_conn_path_shutdown" wait for one more atomic_t called "i_fastreg_inuse_count" that keeps track of how many FRWR memory segments are out there marked "FRMR_IS_INUSE" (and also wake_up rds_ib_ring_empty_wait, as they go away). Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib.h | 1 + net/rds/ib_cm.c | 7 +++++++ net/rds/ib_frmr.c | 43 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/net/rds/ib.h b/net/rds/ib.h index 66c03c7665b2..303c6ee8bdb7 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -156,6 +156,7 @@ struct rds_ib_connection { /* To control the number of wrs from fastreg */ atomic_t i_fastreg_wrs; + atomic_t i_fastreg_inuse_count; /* interrupt handling */ struct tasklet_struct i_send_tasklet; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 8891822eba4f..1b6fd6c8b12b 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -40,6 +40,7 @@ #include "rds_single_path.h" #include "rds.h" #include "ib.h" +#include "ib_mr.h" /* * Set the selected protocol version @@ -993,6 +994,11 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) ic->i_cm_id, err); } + /* kick off "flush_worker" for all pools in order to reap + * all FRMR registrations that are still marked "FRMR_IS_INUSE" + */ + rds_ib_flush_mrs(); + /* * We want to wait for tx and rx completion to finish * before we tear down the connection, but we have to be @@ -1005,6 +1011,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) wait_event(rds_ib_ring_empty_wait, rds_ib_ring_empty(&ic->i_recv_ring) && (atomic_read(&ic->i_signaled_sends) == 0) && + (atomic_read(&ic->i_fastreg_inuse_count) == 0) && (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR)); tasklet_kill(&ic->i_send_tasklet); tasklet_kill(&ic->i_recv_tasklet); diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index adaa8e99e5a9..06ecf9d2d4bf 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -32,6 +32,24 @@ #include "ib_mr.h" +static inline void +rds_transition_frwr_state(struct rds_ib_mr *ibmr, + enum rds_ib_fr_state old_state, + enum rds_ib_fr_state new_state) +{ + if (cmpxchg(&ibmr->u.frmr.fr_state, + old_state, new_state) == old_state && + old_state == FRMR_IS_INUSE) { + /* enforce order of ibmr->u.frmr.fr_state update + * before decrementing i_fastreg_inuse_count + */ + smp_mb__before_atomic(); + atomic_dec(&ibmr->ic->i_fastreg_inuse_count); + if (waitqueue_active(&rds_ib_ring_empty_wait)) + wake_up(&rds_ib_ring_empty_wait); + } +} + static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, int npages) { @@ -118,13 +136,18 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) if (unlikely(ret != ibmr->sg_len)) return ret < 0 ? ret : -EINVAL; + if (cmpxchg(&frmr->fr_state, + FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE) + return -EBUSY; + + atomic_inc(&ibmr->ic->i_fastreg_inuse_count); + /* Perform a WR for the fast_reg_mr. Each individual page * in the sg list is added to the fast reg page list and placed * inside the fast_reg_mr WR. The key used is a rolling 8bit * counter, which should guarantee uniqueness. */ ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); - frmr->fr_state = FRMR_IS_INUSE; frmr->fr_reg = true; memset(®_wr, 0, sizeof(reg_wr)); @@ -141,7 +164,8 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, NULL); if (unlikely(ret)) { /* Failure here can be because of -ENOMEM as well */ - frmr->fr_state = FRMR_IS_STALE; + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); + atomic_inc(&ibmr->ic->i_fastreg_wrs); if (printk_ratelimit()) pr_warn("RDS/IB: %s returned error(%d)\n", @@ -268,8 +292,12 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr) ret = ib_post_send(i_cm_id->qp, s_wr, NULL); if (unlikely(ret)) { - frmr->fr_state = FRMR_IS_STALE; + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); frmr->fr_inv = false; + /* enforce order of frmr->fr_inv update + * before incrementing i_fastreg_wrs + */ + smp_mb__before_atomic(); atomic_inc(&ibmr->ic->i_fastreg_wrs); pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); goto out; @@ -297,7 +325,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) struct rds_ib_frmr *frmr = &ibmr->u.frmr; if (wc->status != IB_WC_SUCCESS) { - frmr->fr_state = FRMR_IS_STALE; + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); if (rds_conn_up(ic->conn)) rds_ib_conn_error(ic->conn, "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n", @@ -309,8 +337,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) } if (frmr->fr_inv) { - if (frmr->fr_state == FRMR_IS_INUSE) - frmr->fr_state = FRMR_IS_FREE; + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE); frmr->fr_inv = false; wake_up(&frmr->fr_inv_done); } @@ -320,6 +347,10 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) wake_up(&frmr->fr_reg_done); } + /* enforce order of frmr->{fr_reg,fr_inv} update + * before incrementing i_fastreg_wrs + */ + smp_mb__before_atomic(); atomic_inc(&ic->i_fastreg_wrs); } -- cgit v1.2.3-55-g7522 From aa4948937b7d5b4a8e6553f4938f8431b2fd783a Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Tue, 16 Jul 2019 15:29:23 -0700 Subject: net/rds: Initialize ic->i_fastreg_wrs upon allocation Otherwise, if an IB connection is torn down before "rds_ib_setup_qp" is called, the value of "ic->i_fastreg_wrs" is still at zero (as it wasn't initialized by "rds_ib_setup_qp"). Consequently "rds_ib_conn_path_shutdown" will spin forever, waiting for it to go back to "RDS_IB_DEFAULT_FR_WR", which of course will never happen as there are no outstanding work requests. Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 1b6fd6c8b12b..4de0214da63c 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -527,7 +527,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn) attr.qp_type = IB_QPT_RC; attr.send_cq = ic->i_send_cq; attr.recv_cq = ic->i_recv_cq; - atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); /* * XXX this can fail if max_*_wr is too large? Are we supposed @@ -1139,6 +1138,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) spin_lock_init(&ic->i_ack_lock); #endif atomic_set(&ic->i_signaled_sends, 0); + atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); /* * rds_ib_conn_shutdown() waits for these to be emptied so they -- cgit v1.2.3-55-g7522 From 3f05e6886a595c9a29a309c52f45326be917823c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 16 Jul 2019 13:57:30 -0700 Subject: net_sched: unset TCQ_F_CAN_BYPASS when adding filters For qdisc's that support TC filters and set TCQ_F_CAN_BYPASS, notably fq_codel, it makes no sense to let packets bypass the TC filters we setup in any scenario, otherwise our packets steering policy could not be enforced. This can be reproduced easily with the following script: ip li add dev dummy0 type dummy ifconfig dummy0 up tc qd add dev dummy0 root fq_codel tc filter add dev dummy0 parent 8001: protocol arp basic action mirred egress redirect dev lo tc filter add dev dummy0 parent 8001: protocol ip basic action mirred egress redirect dev lo ping -I dummy0 192.168.112.1 Without this patch, packets are sent directly to dummy0 without hitting any of the filters. With this patch, packets are redirected to loopback as expected. This fix is not perfect, it only unsets the flag but does not set it back because we have to save the information somewhere in the qdisc if we really want that. Note, both fq_codel and sfq clear this flag in their ->bind_tcf() but this is clearly not sufficient when we don't use any class ID. Fixes: 23624935e0c4 ("net_sched: TCQ_F_CAN_BYPASS generalization") Cc: Eric Dumazet Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + net/sched/sch_fq_codel.c | 2 -- net/sched/sch_sfq.c | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 278014e26aec..d144233423c5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2152,6 +2152,7 @@ replay: tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held); tfilter_put(tp, fh); + q->flags &= ~TCQ_F_CAN_BYPASS; } errout: diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index e2faf33d282b..d59fbcc745d1 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -596,8 +596,6 @@ static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid) static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { - /* we cannot bypass queue discipline anymore */ - sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 420bd8411677..68404a9d2ce4 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -824,8 +824,6 @@ static unsigned long sfq_find(struct Qdisc *sch, u32 classid) static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { - /* we cannot bypass queue discipline anymore */ - sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } -- cgit v1.2.3-55-g7522 From a6574227a81ffd443cd715f8946baddccce66ada Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Wed, 17 Jul 2019 12:46:45 -0700 Subject: net: ag71xx: Add missing header ag71xx uses devm_ioremap_nocache. This fixes usage of an implicit function Fixes: d51b6ce441d3 ("net: ethernet: add ag71xx driver") Signed-off-by: Rosen Penev Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/ag71xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 72a57c6cd254..8f450a03a885 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -35,6 +35,7 @@ #include #include #include +#include /* For our NAPI weight bigger does *NOT* mean better - it means more * D-cache misses and lots more wasted cycles than we'll ever -- cgit v1.2.3-55-g7522 From dedfde2fe1c4ccf27179fcb234e2112d065c39bb Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 17 Jul 2019 23:29:07 +0300 Subject: mlxsw: spectrum_dcb: Configure DSCP map as the last rule is removed Spectrum systems use DSCP rewrite map to update DSCP field in egressing packets to correspond to priority that the packet has. Whether rewriting will take place is determined at the point when the packet ingresses the switch: if the port is in Trust L3 mode, packet priority is determined from the DSCP map at the port, and DSCP rewrite will happen. If the port is in Trust L2 mode, 802.1p is used for packet prioritization, and no DSCP rewrite will happen. The driver determines the port trust mode based on whether any DSCP prioritization rules are in effect at given port. If there are any, trust level is L3, otherwise it's L2. When the last DSCP rule is removed, the port is switched to trust L2. Under that scenario, if DSCP of a packet should be rewritten, it should be rewritten to 0. However, when switching to Trust L2, the driver neglects to also update the DSCP rewrite map. The last DSCP rule thus remains in effect, and packets egressing through this port, if they have the right priority, will have their DSCP set according to this rule. Fix by first configuring the rewrite map, and only then switching to trust L2 and bailing out. Fixes: b2b1dab6884e ("mlxsw: spectrum: Support ieee_setapp, ieee_delapp") Signed-off-by: Petr Machata Reported-by: Alex Veber Tested-by: Alex Veber Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index b25048c6c761..21296fa7f7fb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -408,14 +408,6 @@ static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port) have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port, &prio_map); - if (!have_dscp) { - err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port, - MLXSW_REG_QPTS_TRUST_STATE_PCP); - if (err) - netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L2\n"); - return err; - } - mlxsw_sp_port_dcb_app_dscp_prio_map(mlxsw_sp_port, default_prio, &dscp_map); err = mlxsw_sp_port_dcb_app_update_qpdpm(mlxsw_sp_port, @@ -432,6 +424,14 @@ static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port) return err; } + if (!have_dscp) { + err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port, + MLXSW_REG_QPTS_TRUST_STATE_PCP); + if (err) + netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L2\n"); + return err; + } + err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port, MLXSW_REG_QPTS_TRUST_STATE_DSCP); if (err) { -- cgit v1.2.3-55-g7522 From 577fa14d210073ba1ce6237c659a8820312104ad Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Jul 2019 23:29:08 +0300 Subject: mlxsw: spectrum: Do not process learned records with a dummy FID The switch periodically sends notifications about learned FDB entries. Among other things, the notification includes the FID (Filtering Identifier) and the port on which the MAC was learned. In case the driver does not have the FID defined on the relevant port, the following error will be periodically generated: mlxsw_spectrum2 0000:06:00.0 swp32: Failed to find a matching {Port, VID} following FDB notification This is not supposed to happen under normal conditions, but can happen if an ingress tc filter with a redirect action is installed on a bridged port. The redirect action will cause the packet's FID to be changed to the dummy FID and a learning notification will be emitted with this FID - which is not defined on the bridged port. Fix this by having the driver ignore learning notifications generated with the dummy FID and delete them from the device. Another option is to chain an ignore action after the redirect action which will cause the device to disable learning, but this means that we need to consume another action whenever a redirect action is used. In addition, the scenario described above is merely a corner case. Fixes: cedbb8b25948 ("mlxsw: spectrum_flower: Set dummy FID before forward action") Signed-off-by: Ido Schimmel Reported-by: Alex Kushnarov Acked-by: Jiri Pirko Tested-by: Alex Kushnarov Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c | 10 ++++++++++ drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 6 ++++++ 3 files changed, 17 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a252b080dda9..131f62ce9297 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -830,6 +830,7 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p); /* spectrum_fid.c */ +bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid); struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp, u16 fid_index); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 46baf3b44309..8df3cb21baa6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -126,6 +126,16 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = { [MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types, }; +bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index) +{ + enum mlxsw_sp_fid_type fid_type = MLXSW_SP_FID_TYPE_DUMMY; + struct mlxsw_sp_fid_family *fid_family; + + fid_family = mlxsw_sp->fid_core->fid_family_arr[fid_type]; + + return fid_family->start_index == fid_index; +} + bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid) { return fid->fid_family->lag_vid_valid; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 50111f228d77..5ecb45118400 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2468,6 +2468,9 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, goto just_remove; } + if (mlxsw_sp_fid_is_dummy(mlxsw_sp, fid)) + goto just_remove; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid); if (!mlxsw_sp_port_vlan) { netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n"); @@ -2527,6 +2530,9 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, goto just_remove; } + if (mlxsw_sp_fid_is_dummy(mlxsw_sp, fid)) + goto just_remove; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid); if (!mlxsw_sp_port_vlan) { netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n"); -- cgit v1.2.3-55-g7522 From 66f8209547cc11d8e139d45cb7c937c1bbcce182 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 17 Jul 2019 14:41:58 -0700 Subject: fib: relax source validation check for loopback packets In a rare case where we redirect local packets from veth to lo, these packets fail to pass the source validation when rp_filter is turned on, as the tracing shows: <...>-311708 [040] ..s1 7951180.957825: fib_table_lookup: table 254 oif 0 iif 1 src 10.53.180.130 dst 10.53.180.130 tos 0 scope 0 flags 0 <...>-311708 [040] ..s1 7951180.957826: fib_table_lookup_nh: nexthop dev eth0 oif 4 src 10.53.180.130 So, the fib table lookup returns eth0 as the nexthop even though the packets are local and should be routed to loopback nonetheless, but they can't pass the dev match check in fib_info_nh_uses_dev() without this patch. It should be safe to relax this check for this special case, as normally packets coming out of loopback device still have skb_dst so they won't even hit this slow path. Cc: Julian Anastasov Cc: David Ahern Signed-off-by: Cong Wang Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 317339cd7f03..e8bc939b56dd 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -388,6 +388,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fib_combine_itag(itag, &res); dev_match = fib_info_nh_uses_dev(res.fi, dev); + /* This is not common, loopback packets retain skb_dst so normally they + * would not even hit this slow path. + */ + dev_match = dev_match || (res.type == RTN_LOCAL && + dev == net->loopback_dev); if (dev_match) { ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; return ret; -- cgit v1.2.3-55-g7522 From adb701d6cfa432f5dbdf28839b5e64291a7ed30b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 17 Jul 2019 14:41:59 -0700 Subject: selftests: add a test case for rp_filter Add a test case to simulate the loopback packet case fixed in the previous patch. This test gets passed after the fix: IPv4 rp_filter tests TEST: rp_filter passes local packets [ OK ] TEST: rp_filter passes loopback packets [ OK ] Cc: David Ahern Signed-off-by: Cong Wang Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_tests.sh | 35 +++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 9457aaeae092..4465fc2dae14 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,12 +9,13 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw" +TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter" VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no IP="ip -netns ns1" +NS_EXEC="ip netns exec ns1" log_test() { @@ -433,6 +434,37 @@ fib_carrier_test() fib_carrier_unicast_test } +fib_rp_filter_test() +{ + echo + echo "IPv4 rp_filter tests" + + setup + + set -e + $IP link set dev lo address 52:54:00:6a:c7:5e + $IP link set dummy0 address 52:54:00:6a:c7:5e + $IP link add dummy1 type dummy + $IP link set dummy1 address 52:54:00:6a:c7:5e + $IP link set dev dummy1 up + $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1 + $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1 + $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1 + + $NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel + $NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo + $NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo + set +e + + run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1" + log_test $? 0 "rp_filter passes local packets" + + run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1" + log_test $? 0 "rp_filter passes loopback packets" + + cleanup +} + ################################################################################ # Tests on nexthop spec @@ -1557,6 +1589,7 @@ do fib_unreg_test|unregister) fib_unreg_test;; fib_down_test|down) fib_down_test;; fib_carrier_test|carrier) fib_carrier_test;; + fib_rp_filter_test|rp_filter) fib_rp_filter_test;; fib_nexthop_test|nexthop) fib_nexthop_test;; ipv6_route_test|ipv6_rt) ipv6_route_test;; ipv4_route_test|ipv4_rt) ipv4_route_test;; -- cgit v1.2.3-55-g7522 From 866e5fd8a7123444d865340ff21c1673f74cdecd Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Wed, 17 Jul 2019 23:43:44 +0200 Subject: tipc: initialize 'validated' field of received packets The tipc_msg_validate() function leaves a boolean flag 'validated' in the validated buffer's control block, to avoid performing this action more than once. However, at reception of new packets, the position of this field may already have been set by lower layer protocols, so that the packet is erroneously perceived as already validated by TIPC. We fix this by initializing the said field to 'false' before performing the initial validation. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/node.c b/net/tipc/node.c index 324a1f91b394..3a5be1d7e572 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1807,6 +1807,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) __skb_queue_head_init(&xmitq); /* Ensure message is well-formed before touching the header */ + TIPC_SKB_CB(skb)->validated = false; if (unlikely(!tipc_msg_validate(&skb))) goto discard; hdr = buf_msg(skb); -- cgit v1.2.3-55-g7522 From 49d05fe2c9d1b4a27761c9807fec39b8155bef9e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 17 Jul 2019 15:08:43 -0700 Subject: ipv6: rt6_check should return NULL if 'from' is NULL Paul reported that l2tp sessions were broken after the commit referenced in the Fixes tag. Prior to this commit rt6_check returned NULL if the rt6_info 'from' was NULL - ie., the dst_entry was disconnected from a FIB entry. Restore that behavior. Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes") Reported-by: Paul Donohue Tested-by: Paul Donohue Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4d2e6b31a8d6..6fe3097b9ab7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2563,7 +2563,7 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, { u32 rt_cookie = 0; - if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) || + if (!from || !fib6_get_cookie_safe(from, &rt_cookie) || rt_cookie != cookie) return NULL; -- cgit v1.2.3-55-g7522 From 35cbef9863640f06107144687bd13151bc2e8ce3 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 17 Jul 2019 14:58:53 -0700 Subject: net: bcmgenet: use promisc for unsupported filters Currently we silently ignore filters if we cannot meet the filter requirements. This will lead to the MAC dropping packets that are expected to pass. A better solution would be to set the NIC to promisc mode when the required filters cannot be met. Also correct the number of MDF filters supported. It should be 17, not 16. Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 57 ++++++++++++-------------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 34466b827dde..a2b57807453b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3083,39 +3083,42 @@ static void bcmgenet_timeout(struct net_device *dev) netif_tx_wake_all_queues(dev); } -#define MAX_MC_COUNT 16 +#define MAX_MDF_FILTER 17 static inline void bcmgenet_set_mdf_addr(struct bcmgenet_priv *priv, unsigned char *addr, - int *i, - int *mc) + int *i) { - u32 reg; - bcmgenet_umac_writel(priv, addr[0] << 8 | addr[1], UMAC_MDF_ADDR + (*i * 4)); bcmgenet_umac_writel(priv, addr[2] << 24 | addr[3] << 16 | addr[4] << 8 | addr[5], UMAC_MDF_ADDR + ((*i + 1) * 4)); - reg = bcmgenet_umac_readl(priv, UMAC_MDF_CTRL); - reg |= (1 << (MAX_MC_COUNT - *mc)); - bcmgenet_umac_writel(priv, reg, UMAC_MDF_CTRL); *i += 2; - (*mc)++; } static void bcmgenet_set_rx_mode(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); struct netdev_hw_addr *ha; - int i, mc; + int i, nfilter; u32 reg; netif_dbg(priv, hw, dev, "%s: %08X\n", __func__, dev->flags); - /* Promiscuous mode */ + /* Number of filters needed */ + nfilter = netdev_uc_count(dev) + netdev_mc_count(dev) + 2; + + /* + * Turn on promicuous mode for three scenarios + * 1. IFF_PROMISC flag is set + * 2. IFF_ALLMULTI flag is set + * 3. The number of filters needed exceeds the number filters + * supported by the hardware. + */ reg = bcmgenet_umac_readl(priv, UMAC_CMD); - if (dev->flags & IFF_PROMISC) { + if ((dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) || + (nfilter > MAX_MDF_FILTER)) { reg |= CMD_PROMISC; bcmgenet_umac_writel(priv, reg, UMAC_CMD); bcmgenet_umac_writel(priv, 0, UMAC_MDF_CTRL); @@ -3125,32 +3128,24 @@ static void bcmgenet_set_rx_mode(struct net_device *dev) bcmgenet_umac_writel(priv, reg, UMAC_CMD); } - /* UniMac doesn't support ALLMULTI */ - if (dev->flags & IFF_ALLMULTI) { - netdev_warn(dev, "ALLMULTI is not supported\n"); - return; - } - /* update MDF filter */ i = 0; - mc = 0; /* Broadcast */ - bcmgenet_set_mdf_addr(priv, dev->broadcast, &i, &mc); + bcmgenet_set_mdf_addr(priv, dev->broadcast, &i); /* my own address.*/ - bcmgenet_set_mdf_addr(priv, dev->dev_addr, &i, &mc); - /* Unicast list*/ - if (netdev_uc_count(dev) > (MAX_MC_COUNT - mc)) - return; + bcmgenet_set_mdf_addr(priv, dev->dev_addr, &i); - if (!netdev_uc_empty(dev)) - netdev_for_each_uc_addr(ha, dev) - bcmgenet_set_mdf_addr(priv, ha->addr, &i, &mc); - /* Multicast */ - if (netdev_mc_empty(dev) || netdev_mc_count(dev) >= (MAX_MC_COUNT - mc)) - return; + /* Unicast */ + netdev_for_each_uc_addr(ha, dev) + bcmgenet_set_mdf_addr(priv, ha->addr, &i); + /* Multicast */ netdev_for_each_mc_addr(ha, dev) - bcmgenet_set_mdf_addr(priv, ha->addr, &i, &mc); + bcmgenet_set_mdf_addr(priv, ha->addr, &i); + + /* Enable filters */ + reg = GENMASK(MAX_MDF_FILTER - 1, MAX_MDF_FILTER - nfilter); + bcmgenet_umac_writel(priv, reg, UMAC_MDF_CTRL); } /* Set the hardware MAC address. */ -- cgit v1.2.3-55-g7522 From 666a3d6e1e6b78df34f59e6c0b8907aa3c8dbb2e Mon Sep 17 00:00:00 2001 From: Su Yanjun Date: Thu, 18 Jul 2019 10:19:23 +0800 Subject: udp: Fix typo in net/ipv4/udp.c Signed-off-by: Su Yanjun Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c21862ba9c02..d88821c794fb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2170,7 +2170,7 @@ start_lookup: /* Initialize UDP checksum. If exited with zero value (success), * CHECKSUM_UNNECESSARY means, that no more checks are required. - * Otherwise, csum completion requires chacksumming packet body, + * Otherwise, csum completion requires checksumming packet body, * including udp header and folding it to skb->csum. */ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, -- cgit v1.2.3-55-g7522 From 5a860f9184eb45b2df3fb3364a6b7d076545f83d Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Thu, 18 Jul 2019 15:45:42 +0800 Subject: liquidio: Replace vmalloc + memset with vzalloc Use vzalloc and vzalloc_node instead of using vmalloc and vmalloc_node and then zeroing the allocated memory by memset 0. This simplifies the code. Signed-off-by: Chuhong Yuan Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/request_manager.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index fcf20a8f92d9..032224178b64 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -218,15 +218,13 @@ int octeon_setup_iq(struct octeon_device *oct, return 0; } oct->instr_queue[iq_no] = - vmalloc_node(sizeof(struct octeon_instr_queue), numa_node); + vzalloc_node(sizeof(struct octeon_instr_queue), numa_node); if (!oct->instr_queue[iq_no]) oct->instr_queue[iq_no] = - vmalloc(sizeof(struct octeon_instr_queue)); + vzalloc(sizeof(struct octeon_instr_queue)); if (!oct->instr_queue[iq_no]) return 1; - memset(oct->instr_queue[iq_no], 0, - sizeof(struct octeon_instr_queue)); oct->instr_queue[iq_no]->q_index = q_index; oct->instr_queue[iq_no]->app_ctx = app_ctx; -- cgit v1.2.3-55-g7522 From 54851aa90cf27041d64b12f65ac72e9f97bd90fd Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Jul 2019 23:39:33 +0300 Subject: ipv6: Unlink sibling route in case of failure When a route needs to be appended to an existing multipath route, fib6_add_rt2node() first appends it to the siblings list and increments the number of sibling routes on each sibling. Later, the function notifies the route via call_fib6_entry_notifiers(). In case the notification is vetoed, the route is not unlinked from the siblings list, which can result in a use-after-free. Fix this by unlinking the route from the siblings list before returning an error. Audited the rest of the call sites from which the FIB notification chain is called and could not find more problems. Fixes: 2233000cba40 ("net/ipv6: Move call_fib6_entry_notifiers up for route adds") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 49884f96232b..87f47bc55c5e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1151,8 +1151,24 @@ add: err = call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, rt, extack); - if (err) + if (err) { + struct fib6_info *sibling, *next_sibling; + + /* If the route has siblings, then it first + * needs to be unlinked from them. + */ + if (!rt->fib6_nsiblings) + return err; + + list_for_each_entry_safe(sibling, next_sibling, + &rt->fib6_siblings, + fib6_siblings) + sibling->fib6_nsiblings--; + rt->fib6_nsiblings = 0; + list_del_init(&rt->fib6_siblings); + rt6_multipath_rebalance(next_sibling); return err; + } } rcu_assign_pointer(rt->fib6_next, iter); -- cgit v1.2.3-55-g7522 From 184528af92a87e334789f6a22af4e392baf6bf60 Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Thu, 18 Jul 2019 17:38:30 +0300 Subject: MAINTAINERS: update netsec driver Add myself to maintainers since i provided the XDP and page_pool implementation Signed-off-by: Ilias Apalodimas Acked-by: Jassi Brar Acked-by: Ard Biesheuvel Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 211ea3a199bd..64f659d8346c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14789,6 +14789,7 @@ F: Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt SOCIONEXT (SNI) NETSEC NETWORK DRIVER M: Jassi Brar +M: Ilias Apalodimas L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/socionext/netsec.c -- cgit v1.2.3-55-g7522 From 7369c10f81172b55b284944caa2f51f595bbdb84 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Wed, 17 Jul 2019 18:14:57 +0800 Subject: net/mlx5: Replace kfree with kvfree Variable allocated by kvmalloc should not be freed by kfree. Because it may be allocated by vmalloc. So replace kfree with kvfree here. Fixes: 9b1f298236057 ("net/mlx5: Add support for FW fatal reporter dump") Signed-off-by: Chuhong Yuan Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 2fe6923f7ce0..9314777d99e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -597,7 +597,7 @@ mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, err = devlink_fmsg_arr_pair_nest_end(fmsg); free_data: - kfree(cr_data); + kvfree(cr_data); return err; } -- cgit v1.2.3-55-g7522 From 01a0f9e4496d9f54e06abb71bf9f56c617ef8c24 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 18 Jul 2019 11:13:35 +0200 Subject: selftests/bpf: fix "valid read map access into a read-only array 1" on s390 This test looks up a 32-bit map element and then loads it using a 64-bit load. This does not work on s390, which is a big-endian machine. Since the point of this test doesn't seem to be loading a smaller value using a larger load, simply use a 32-bit load. Signed-off-by: Ilya Leoshkevich Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/array_access.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index bcb83196e459..f3c33e128709 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -226,7 +226,7 @@ BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_array_ro = { 3 }, -- cgit v1.2.3-55-g7522 From 59fd3486c3dd5678bc2fcac75e14466775465c3e Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 17 Jul 2019 14:26:20 +0200 Subject: selftests/bpf: fix test_xdp_noinline on s390 test_xdp_noinline fails on s390 due to a handful of endianness issues. Use ntohs for parsing eth_proto. Replace bswaps with ntohs/htons. Signed-off-by: Ilya Leoshkevich Acked-by: Vasily Gorbik Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_xdp_noinline.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index dad8a7e33eaa..e88d7b9d65ab 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -14,6 +14,7 @@ #include #include #include "bpf_helpers.h" +#include "bpf_endian.h" static __u32 rol32(__u32 word, unsigned int shift) { @@ -305,7 +306,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, ip6h->nexthdr = IPPROTO_IPV6; ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; ip6h->payload_len = - __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr)); + bpf_htons(pkt_bytes + sizeof(struct ipv6hdr)); ip6h->hop_limit = 4; ip6h->saddr.in6_u.u6_addr32[0] = 1; @@ -322,7 +323,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, struct real_definition *dst, __u32 pkt_bytes) { - __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]); + __u32 ip_suffix = bpf_ntohs(pckt->flow.port16[0]); struct eth_hdr *new_eth; struct eth_hdr *old_eth; __u16 *next_iph_u16; @@ -352,7 +353,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, iph->protocol = IPPROTO_IPIP; iph->check = 0; iph->tos = 1; - iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr)); + iph->tot_len = bpf_htons(pkt_bytes + sizeof(struct iphdr)); /* don't update iph->daddr, since it will overwrite old eth_proto * and multiple iterations of bpf_prog_run() will fail */ @@ -639,7 +640,7 @@ static int process_l3_headers_v6(struct packet_description *pckt, iph_len = sizeof(struct ipv6hdr); *protocol = ip6h->nexthdr; pckt->flow.proto = *protocol; - *pkt_bytes = __builtin_bswap16(ip6h->payload_len); + *pkt_bytes = bpf_ntohs(ip6h->payload_len); off += iph_len; if (*protocol == 45) { return XDP_DROP; @@ -671,7 +672,7 @@ static int process_l3_headers_v4(struct packet_description *pckt, return XDP_DROP; *protocol = iph->protocol; pckt->flow.proto = *protocol; - *pkt_bytes = __builtin_bswap16(iph->tot_len); + *pkt_bytes = bpf_ntohs(iph->tot_len); off += 20; if (iph->frag_off & 65343) return XDP_DROP; @@ -808,10 +809,10 @@ int balancer_ingress(struct xdp_md *ctx) nh_off = sizeof(struct eth_hdr); if (data + nh_off > data_end) return XDP_DROP; - eth_proto = eth->eth_proto; - if (eth_proto == 8) + eth_proto = bpf_ntohs(eth->eth_proto); + if (eth_proto == ETH_P_IP) return process_packet(data, nh_off, data_end, 0, ctx); - else if (eth_proto == 56710) + else if (eth_proto == ETH_P_IPV6) return process_packet(data, nh_off, data_end, 1, ctx); else return XDP_DROP; -- cgit v1.2.3-55-g7522 From 8ec1e9006908a606b2a67c905f33ee2d3b6be5c2 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Wed, 17 Jul 2019 10:05:11 +0800 Subject: gve: replace kfree with kvfree Variables allocated by kvzalloc should not be freed by kfree. Because they may be allocated by vmalloc. So we replace kfree with kvfree here. Signed-off-by: Chuhong Yuan Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 22 +++++++++++----------- drivers/net/ethernet/google/gve/gve_rx.c | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index e8ee8cac2bbf..497298752381 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -232,7 +232,7 @@ abort_with_mgmt_vector: abort_with_msix_enabled: pci_disable_msix(priv->pdev); abort_with_msix_vectors: - kfree(priv->msix_vectors); + kvfree(priv->msix_vectors); priv->msix_vectors = NULL; return err; } @@ -256,7 +256,7 @@ static void gve_free_notify_blocks(struct gve_priv *priv) priv->ntfy_blocks = NULL; free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); pci_disable_msix(priv->pdev); - kfree(priv->msix_vectors); + kvfree(priv->msix_vectors); priv->msix_vectors = NULL; } @@ -445,12 +445,12 @@ static int gve_alloc_rings(struct gve_priv *priv) return 0; free_rx: - kfree(priv->rx); + kvfree(priv->rx); priv->rx = NULL; free_tx_queue: gve_tx_free_rings(priv); free_tx: - kfree(priv->tx); + kvfree(priv->tx); priv->tx = NULL; return err; } @@ -500,7 +500,7 @@ static void gve_free_rings(struct gve_priv *priv) gve_remove_napi(priv, ntfy_idx); } gve_tx_free_rings(priv); - kfree(priv->tx); + kvfree(priv->tx); priv->tx = NULL; } if (priv->rx) { @@ -509,7 +509,7 @@ static void gve_free_rings(struct gve_priv *priv) gve_remove_napi(priv, ntfy_idx); } gve_rx_free_rings(priv); - kfree(priv->rx); + kvfree(priv->rx); priv->rx = NULL; } } @@ -592,9 +592,9 @@ static void gve_free_queue_page_list(struct gve_priv *priv, gve_free_page(&priv->pdev->dev, qpl->pages[i], qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); - kfree(qpl->page_buses); + kvfree(qpl->page_buses); free_pages: - kfree(qpl->pages); + kvfree(qpl->pages); priv->num_registered_pages -= qpl->num_entries; } @@ -635,7 +635,7 @@ static int gve_alloc_qpls(struct gve_priv *priv) free_qpls: for (j = 0; j <= i; j++) gve_free_queue_page_list(priv, j); - kfree(priv->qpls); + kvfree(priv->qpls); return err; } @@ -644,12 +644,12 @@ static void gve_free_qpls(struct gve_priv *priv) int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); int i; - kfree(priv->qpl_cfg.qpl_id_map); + kvfree(priv->qpl_cfg.qpl_id_map); for (i = 0; i < num_qpls; i++) gve_free_queue_page_list(priv, i); - kfree(priv->qpls); + kvfree(priv->qpls); } /* Use this to schedule a reset when the device is capable of continuing diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index c1aeabd1c594..1914b8350da7 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -35,7 +35,7 @@ static void gve_rx_free_ring(struct gve_priv *priv, int idx) gve_unassign_qpl(priv, rx->data.qpl->id); rx->data.qpl = NULL; - kfree(rx->data.page_info); + kvfree(rx->data.page_info); slots = rx->data.mask + 1; bytes = sizeof(*rx->data.data_ring) * slots; @@ -168,7 +168,7 @@ abort_with_q_resources: rx->q_resources, rx->q_resources_bus); rx->q_resources = NULL; abort_filled: - kfree(rx->data.page_info); + kvfree(rx->data.page_info); abort_with_slots: bytes = sizeof(*rx->data.data_ring) * slots; dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus); -- cgit v1.2.3-55-g7522 From 008cfbaa3f9f84efead76d2cea12b4dd05cce67d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 17 Jul 2019 06:29:56 +0000 Subject: net: dsa: sja1105: Fix missing unlock on error in sk_buff() Add the missing unlock before return from function sk_buff() in the error handling case. Fixes: f3097be21bf1 ("net: dsa: sja1105: Add a state machine for RX timestamping") Signed-off-by: Wei Yongjun Reviewed-by: Vladimir Oltean Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/tag_sja1105.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 1d96c9d4a8e9..26363d72d25b 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -216,6 +216,7 @@ static struct sk_buff if (!skb) { dev_err_ratelimited(dp->ds->dev, "Failed to copy stampable skb\n"); + spin_unlock(&sp->data->meta_lock); return NULL; } sja1105_transfer_meta(skb, meta); -- cgit v1.2.3-55-g7522 From 9b3d15e6b05e0b916be5fbd915f90300a403098b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 17 Jul 2019 03:07:23 -0400 Subject: bnxt_en: Fix VNIC accounting when enabling aRFS on 57500 chips. Unlike legacy chips, 57500 chips don't need additional VNIC resources for aRFS/ntuple. Fix the code accordingly so that we don't reserve and allocate additional VNICs on 57500 chips. Without this patch, the driver is failing to initialize when it tries to allocate extra VNICs. Fixes: ac33906c67e2 ("bnxt_en: Add support for aRFS on 57500 chips.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1069eb01cc55..7134d2c3eb1c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3075,7 +3075,7 @@ static int bnxt_alloc_vnics(struct bnxt *bp) int num_vnics = 1; #ifdef CONFIG_RFS_ACCEL - if (bp->flags & BNXT_FLAG_RFS) + if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5)) == BNXT_FLAG_RFS) num_vnics += bp->rx_nr_rings; #endif @@ -7186,6 +7186,9 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) #ifdef CONFIG_RFS_ACCEL int i, rc = 0; + if (bp->flags & BNXT_FLAG_CHIP_P5) + return 0; + for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_vnic_info *vnic; u16 vnic_id = i + 1; @@ -9645,7 +9648,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, return -ENOMEM; vnics = 1; - if (bp->flags & BNXT_FLAG_RFS) + if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5)) == BNXT_FLAG_RFS) vnics += rx_rings; if (bp->flags & BNXT_FLAG_AGG_RINGS) -- cgit v1.2.3-55-g7522 From 7d6053097311643545a8118100175a39bd6fa637 Mon Sep 17 00:00:00 2001 From: Rogan Dawes Date: Wed, 17 Jul 2019 11:14:33 +0200 Subject: usb: qmi_wwan: add D-Link DWM-222 A2 device ID Signed-off-by: Rogan Dawes Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 8b4ad10cf940..69e0a2acfcb0 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1292,6 +1292,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2001, 0x7e16, 3)}, /* D-Link DWM-221 */ {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ {QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */ + {QMI_FIXED_INTF(0x2001, 0x7e3d, 4)}, /* D-Link DWM-222 A2 */ {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */ {QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ -- cgit v1.2.3-55-g7522 From 6f5fa8d2c05f247ab50440df0f07867ae0fc9050 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 19 Jul 2019 01:21:57 +0000 Subject: ag71xx: fix error return code in ag71xx_probe() Fix to return error code -ENOMEM from the dmam_alloc_coherent() error handling case instead of 0, as done elsewhere in this function. Fixes: d51b6ce441d3 ("net: ethernet: add ag71xx driver") Signed-off-by: Wei Yongjun Reviewed-by: Oleksij Rempel Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/ag71xx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 8f450a03a885..31d27808ddee 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1725,8 +1725,10 @@ static int ag71xx_probe(struct platform_device *pdev) ag->stop_desc = dmam_alloc_coherent(&pdev->dev, sizeof(struct ag71xx_desc), &ag->stop_desc_dma, GFP_KERNEL); - if (!ag->stop_desc) + if (!ag->stop_desc) { + err = -ENOMEM; goto err_free; + } ag->stop_desc->data = 0; ag->stop_desc->ctrl = 0; -- cgit v1.2.3-55-g7522 From 269b7c5ff78264e3728b95828d219e0e0eeaec94 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 19 Jul 2019 01:22:06 +0000 Subject: ag71xx: fix return value check in ag71xx_probe() In case of error, the function of_get_mac_address() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: d51b6ce441d3 ("net: ethernet: add ag71xx driver") Signed-off-by: Wei Yongjun Reviewed-by: Oleksij Rempel Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/ag71xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 31d27808ddee..8b69d0d7e726 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1735,9 +1735,9 @@ static int ag71xx_probe(struct platform_device *pdev) ag->stop_desc->next = (u32)ag->stop_desc_dma; mac_addr = of_get_mac_address(np); - if (mac_addr) + if (!IS_ERR(mac_addr)) memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); - if (!mac_addr || !is_valid_ether_addr(ndev->dev_addr)) { + if (IS_ERR(mac_addr) || !is_valid_ether_addr(ndev->dev_addr)) { netif_err(ag, probe, ndev, "invalid MAC address, using random address\n"); eth_random_addr(ndev->dev_addr); } -- cgit v1.2.3-55-g7522 From 8d650cdedaabb33e85e9b7c517c0c71fcecc1de9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jul 2019 19:28:14 -0700 Subject: tcp: fix tcp_set_congestion_control() use from bpf hook Neal reported incorrect use of ns_capable() from bpf hook. bpf_setsockopt(...TCP_CONGESTION...) -> tcp_set_congestion_control() -> ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) -> ns_capable_common() -> current_cred() -> rcu_dereference_protected(current->cred, 1) Accessing 'current' in bpf context makes no sense, since packets are processed from softirq context. As Neal stated : The capability check in tcp_set_congestion_control() was written assuming a system call context, and then was reused from a BPF call site. The fix is to add a new parameter to tcp_set_congestion_control(), so that the ns_capable() call is only performed under the right context. Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control") Signed-off-by: Eric Dumazet Cc: Lawrence Brakmo Reported-by: Neal Cardwell Acked-by: Neal Cardwell Acked-by: Lawrence Brakmo Signed-off-by: David S. Miller --- include/net/tcp.h | 3 ++- net/core/filter.c | 2 +- net/ipv4/tcp.c | 4 +++- net/ipv4/tcp_cong.c | 6 +++--- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index cca3c59b98bf..f42d300f0cfa 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1064,7 +1064,8 @@ void tcp_get_default_congestion_control(struct net *net, char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); -int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit); +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, + bool reinit, bool cap_net_admin); u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); diff --git a/net/core/filter.c b/net/core/filter.c index 0f6854ccf894..4e2a79b2fd77 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4335,7 +4335,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, TCP_CA_NAME_MAX-1)); name[TCP_CA_NAME_MAX-1] = 0; ret = tcp_set_congestion_control(sk, name, false, - reinit); + reinit, true); } else { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7846afacdf0b..776905899ac0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2785,7 +2785,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(sk, name, true, true); + err = tcp_set_congestion_control(sk, name, true, true, + ns_capable(sock_net(sk)->user_ns, + CAP_NET_ADMIN)); release_sock(sk); return err; } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index e1862b64a90f..c445a81d144e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -333,7 +333,8 @@ out: * tcp_reinit_congestion_control (if the current congestion control was * already initialized. */ -int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit) +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, + bool reinit, bool cap_net_admin) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; @@ -369,8 +370,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo } else { err = -EBUSY; } - } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || - ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) { + } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) { err = -EPERM; } else if (!try_module_get(ca->owner)) { err = -EBUSY; -- cgit v1.2.3-55-g7522