From b1e8818cabf407a5a2cec696411b0bdfd7fd12f0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 15 Jan 2019 17:07:47 -0800 Subject: bpf: btf: support 128 bit integer type Currently, btf only supports up to 64-bit integer. On the other hand, 128bit support for gcc and clang has existed for a long time. For example, both gcc 4.8 and llvm 3.7 supports types "__int128" and "unsigned __int128" for virtually all 64bit architectures including bpf. The requirement for __int128 support comes from two areas: . bpf program may use __int128. For example, some bcc tools (https://github.com/iovisor/bcc/tree/master/tools), mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc., are using __int128 to represent the ipv6 addresses. . linux itself is using __int128 types. Hence supporting __int128 type in BTF is required for vmlinux BTF, which will be used by "compile once and run everywhere" and other projects. For 128bit integer, instead of base-10, hex numbers are pretty printed out as large decimal number is hard to decipher, e.g., for ipv6 addresses. Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/bpf/btf.c | 104 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 85 insertions(+), 19 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index a2f53642592b..022ef9ca1296 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -157,7 +157,7 @@ * */ -#define BITS_PER_U64 (sizeof(u64) * BITS_PER_BYTE) +#define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2) #define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1) #define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK) #define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3) @@ -525,7 +525,7 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) /* * Regular int is not a bit field and it must be either - * u8/u16/u32/u64. + * u8/u16/u32/u64 or __int128. */ static bool btf_type_int_is_regular(const struct btf_type *t) { @@ -538,7 +538,8 @@ static bool btf_type_int_is_regular(const struct btf_type *t) if (BITS_PER_BYTE_MASKED(nr_bits) || BTF_INT_OFFSET(int_data) || (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) && - nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) { + nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) && + nr_bytes != (2 * sizeof(u64)))) { return false; } @@ -1063,9 +1064,9 @@ static int btf_int_check_member(struct btf_verifier_env *env, nr_copy_bits = BTF_INT_BITS(int_data) + BITS_PER_BYTE_MASKED(struct_bits_off); - if (nr_copy_bits > BITS_PER_U64) { + if (nr_copy_bits > BITS_PER_U128) { btf_verifier_log_member(env, struct_type, member, - "nr_copy_bits exceeds 64"); + "nr_copy_bits exceeds 128"); return -EINVAL; } @@ -1119,9 +1120,9 @@ static int btf_int_check_kflag_member(struct btf_verifier_env *env, bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off); - if (nr_copy_bits > BITS_PER_U64) { + if (nr_copy_bits > BITS_PER_U128) { btf_verifier_log_member(env, struct_type, member, - "nr_copy_bits exceeds 64"); + "nr_copy_bits exceeds 128"); return -EINVAL; } @@ -1168,9 +1169,9 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env, nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data); - if (nr_bits > BITS_PER_U64) { + if (nr_bits > BITS_PER_U128) { btf_verifier_log_type(env, t, "nr_bits exceeds %zu", - BITS_PER_U64); + BITS_PER_U128); return -EINVAL; } @@ -1211,31 +1212,93 @@ static void btf_int_log(struct btf_verifier_env *env, btf_int_encoding_str(BTF_INT_ENCODING(int_data))); } +static void btf_int128_print(struct seq_file *m, void *data) +{ + /* data points to a __int128 number. + * Suppose + * int128_num = *(__int128 *)data; + * The below formulas shows what upper_num and lower_num represents: + * upper_num = int128_num >> 64; + * lower_num = int128_num & 0xffffffffFFFFFFFFULL; + */ + u64 upper_num, lower_num; + +#ifdef __BIG_ENDIAN_BITFIELD + upper_num = *(u64 *)data; + lower_num = *(u64 *)(data + 8); +#else + upper_num = *(u64 *)(data + 8); + lower_num = *(u64 *)data; +#endif + if (upper_num == 0) + seq_printf(m, "0x%llx", lower_num); + else + seq_printf(m, "0x%llx%016llx", upper_num, lower_num); +} + +static void btf_int128_shift(u64 *print_num, u16 left_shift_bits, + u16 right_shift_bits) +{ + u64 upper_num, lower_num; + +#ifdef __BIG_ENDIAN_BITFIELD + upper_num = print_num[0]; + lower_num = print_num[1]; +#else + upper_num = print_num[1]; + lower_num = print_num[0]; +#endif + + /* shake out un-needed bits by shift/or operations */ + if (left_shift_bits >= 64) { + upper_num = lower_num << (left_shift_bits - 64); + lower_num = 0; + } else { + upper_num = (upper_num << left_shift_bits) | + (lower_num >> (64 - left_shift_bits)); + lower_num = lower_num << left_shift_bits; + } + + if (right_shift_bits >= 64) { + lower_num = upper_num >> (right_shift_bits - 64); + upper_num = 0; + } else { + lower_num = (lower_num >> right_shift_bits) | + (upper_num << (64 - right_shift_bits)); + upper_num = upper_num >> right_shift_bits; + } + +#ifdef __BIG_ENDIAN_BITFIELD + print_num[0] = upper_num; + print_num[1] = lower_num; +#else + print_num[0] = lower_num; + print_num[1] = upper_num; +#endif +} + static void btf_bitfield_seq_show(void *data, u8 bits_offset, u8 nr_bits, struct seq_file *m) { u16 left_shift_bits, right_shift_bits; u8 nr_copy_bytes; u8 nr_copy_bits; - u64 print_num; + u64 print_num[2] = {}; nr_copy_bits = nr_bits + bits_offset; nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); - print_num = 0; - memcpy(&print_num, data, nr_copy_bytes); + memcpy(print_num, data, nr_copy_bytes); #ifdef __BIG_ENDIAN_BITFIELD left_shift_bits = bits_offset; #else - left_shift_bits = BITS_PER_U64 - nr_copy_bits; + left_shift_bits = BITS_PER_U128 - nr_copy_bits; #endif - right_shift_bits = BITS_PER_U64 - nr_bits; - - print_num <<= left_shift_bits; - print_num >>= right_shift_bits; + right_shift_bits = BITS_PER_U128 - nr_bits; - seq_printf(m, "0x%llx", print_num); + btf_int128_shift(print_num, left_shift_bits, right_shift_bits); + btf_int128_print(m, print_num); } @@ -1250,7 +1313,7 @@ static void btf_int_bits_seq_show(const struct btf *btf, /* * bits_offset is at most 7. - * BTF_INT_OFFSET() cannot exceed 64 bits. + * BTF_INT_OFFSET() cannot exceed 128 bits. */ total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); data += BITS_ROUNDDOWN_BYTES(total_bits_offset); @@ -1274,6 +1337,9 @@ static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, } switch (nr_bits) { + case 128: + btf_int128_print(m, data); + break; case 64: if (sign) seq_printf(m, "%lld", *(s64 *)data); -- cgit v1.2.3-55-g7522 From d0b2818efbe27e6c2e0c52621c8db18eb5abb5e1 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 16 Jan 2019 10:43:01 -0800 Subject: bpf: fix a (false) compiler warning An older GCC compiler complains: kernel/bpf/verifier.c: In function 'bpf_check': kernel/bpf/verifier.c:4***:13: error: 'prev_offset' may be used uninitialized in this function [-Werror=maybe-uninitialized] } else if (krecord[i].insn_offset <= prev_offset) { ^ kernel/bpf/verifier.c:4***:38: note: 'prev_offset' was declared here u32 i, nfuncs, urec_size, min_size, prev_offset; Although the compiler is wrong here, the patch makes sure that prev_offset is always initialized, just to silence the warning. v2: fix a spelling error in the commit message. Signed-off-by: Peter Oskolkov Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 56674a7c3778..ce87198ecd01 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4997,13 +4997,14 @@ static int check_btf_func(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr) { - u32 i, nfuncs, urec_size, min_size, prev_offset; + u32 i, nfuncs, urec_size, min_size; u32 krec_size = sizeof(struct bpf_func_info); struct bpf_func_info *krecord; const struct btf_type *type; struct bpf_prog *prog; const struct btf *btf; void __user *urecord; + u32 prev_offset = 0; int ret = 0; nfuncs = attr->func_info_cnt; -- cgit v1.2.3-55-g7522 From 2cbd95a5c4fb855a4177c0343a880cc2091f500d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:18 -0800 Subject: bpf: change parameters of call/branch offset adjustment In preparation for code removal change parameters to branch and call adjustment functions to be more universal. The current parameters assume we are patching a single instruction with a longer set. A diagram may help reading the change, this is for the patch single case, patching instruction 1 with a replacement of 4: ____ 0 |____| 1 |____| <-- pos ^ 2 | | <-- end old ^ | 3 | | | delta | len 4 |____| | | (patch region) 5 | | <-- end new v v 6 |____| end_old = pos + 1 end_new = pos + delta + 1 If we are before the patch region - curr variable and the target are fully in old coordinates (hence comparing against end_old). If we are after the region curr is in new coordinates (hence the comparison to end_new) but target is in mixed coordinates, so we just check if it falls before end_new, and if so it needs the adjustment. Note that we will not fix up branches which land in removed region in case of removal, which should be okay, as we are only going to remove dead code. Signed-off-by: Jakub Kicinski Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f908b9356025..ad08ba341197 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -307,15 +307,16 @@ int bpf_prog_calc_tag(struct bpf_prog *fp) return 0; } -static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta, - u32 curr, const bool probe_pass) +static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, + s32 end_new, u32 curr, const bool probe_pass) { const s64 imm_min = S32_MIN, imm_max = S32_MAX; + s32 delta = end_new - end_old; s64 imm = insn->imm; - if (curr < pos && curr + imm + 1 > pos) + if (curr < pos && curr + imm + 1 >= end_old) imm += delta; - else if (curr > pos + delta && curr + imm + 1 <= pos + delta) + else if (curr >= end_new && curr + imm + 1 < end_new) imm -= delta; if (imm < imm_min || imm > imm_max) return -ERANGE; @@ -324,15 +325,16 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta, return 0; } -static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta, - u32 curr, const bool probe_pass) +static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, + s32 end_new, u32 curr, const bool probe_pass) { const s32 off_min = S16_MIN, off_max = S16_MAX; + s32 delta = end_new - end_old; s32 off = insn->off; - if (curr < pos && curr + off + 1 > pos) + if (curr < pos && curr + off + 1 >= end_old) off += delta; - else if (curr > pos + delta && curr + off + 1 <= pos + delta) + else if (curr >= end_new && curr + off + 1 < end_new) off -= delta; if (off < off_min || off > off_max) return -ERANGE; @@ -341,10 +343,10 @@ static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta, return 0; } -static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, - const bool probe_pass) +static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, + s32 end_new, const bool probe_pass) { - u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0); + u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0); struct bpf_insn *insn = prog->insnsi; int ret = 0; @@ -356,8 +358,8 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, * do any other adjustments. Therefore skip the patchlet. */ if (probe_pass && i == pos) { - i += delta + 1; - insn++; + i = end_new; + insn = prog->insnsi + end_old; } code = insn->code; if (BPF_CLASS(code) != BPF_JMP || @@ -367,11 +369,11 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, if (BPF_OP(code) == BPF_CALL) { if (insn->src_reg != BPF_PSEUDO_CALL) continue; - ret = bpf_adj_delta_to_imm(insn, pos, delta, i, - probe_pass); + ret = bpf_adj_delta_to_imm(insn, pos, end_old, + end_new, i, probe_pass); } else { - ret = bpf_adj_delta_to_off(insn, pos, delta, i, - probe_pass); + ret = bpf_adj_delta_to_off(insn, pos, end_old, + end_new, i, probe_pass); } if (ret) break; @@ -421,7 +423,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * we afterwards may not fail anymore. */ if (insn_adj_cnt > cnt_max && - bpf_adj_branches(prog, off, insn_delta, true)) + bpf_adj_branches(prog, off, off + 1, off + len, true)) return NULL; /* Several new instructions need to be inserted. Make room @@ -453,7 +455,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * the ship has sailed to reverse to the original state. An * overflow cannot happen at this point. */ - BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false)); + BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false)); bpf_adj_linfo(prog_adj, off, insn_delta); -- cgit v1.2.3-55-g7522 From e2ae4ca266a1c9a0163738129506dbc63d5cca80 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:19 -0800 Subject: bpf: verifier: hard wire branches to dead code Loading programs with dead code becomes more and more common, as people begin to patch constants at load time. Turn conditional jumps to unconditional ones, to avoid potential branch misprediction penalty. This optimization is enabled for privileged users only. For branches which just fall through we could just mark them as not seen and have dead code removal take care of them, but that seems less clean. v0.2: - don't call capable(CAP_SYS_ADMIN) twice (Jiong). v3: - fix GCC warning; Signed-off-by: Jakub Kicinski Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ce87198ecd01..bf1f98e8beb6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6458,6 +6458,40 @@ static void sanitize_dead_code(struct bpf_verifier_env *env) } } +static bool insn_is_cond_jump(u8 code) +{ + u8 op; + + if (BPF_CLASS(code) != BPF_JMP) + return false; + + op = BPF_OP(code); + return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; +} + +static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); + struct bpf_insn *insn = env->prog->insnsi; + const int insn_cnt = env->prog->len; + int i; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (!insn_is_cond_jump(insn->code)) + continue; + + if (!aux_data[i + 1].seen) + ja.off = insn->off; + else if (!aux_data[i + 1 + insn->off].seen) + ja.off = 0; + else + continue; + + memcpy(insn, &ja, sizeof(ja)); + } +} + /* convert load instructions that access fields of a context type into a * sequence of instructions that access fields of the underlying structure: * struct __sk_buff -> struct sk_buff @@ -7149,6 +7183,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, struct bpf_verifier_env *env; struct bpf_verifier_log *log; int ret = -EINVAL; + bool is_priv; /* no program is valid */ if (ARRAY_SIZE(bpf_verifier_ops) == 0) @@ -7195,6 +7230,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) env->strict_alignment = false; + is_priv = capable(CAP_SYS_ADMIN); + env->allow_ptr_leaks = is_priv; + ret = replace_map_fd_with_map_ptr(env); if (ret < 0) goto skip_full_check; @@ -7212,8 +7250,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (!env->explored_states) goto skip_full_check; - env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); - ret = check_subprogs(env); if (ret < 0) goto skip_full_check; @@ -7243,6 +7279,11 @@ skip_full_check: ret = check_max_stack_depth(env); /* instruction rewrites happen after this point */ + if (is_priv) { + if (ret == 0) + opt_hard_wire_dead_code_branches(env); + } + if (ret == 0) sanitize_dead_code(env); -- cgit v1.2.3-55-g7522 From 52875a04f4b26e7ef30a288ea096f7cfec0e93cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:20 -0800 Subject: bpf: verifier: remove dead code Instead of overwriting dead code with jmp -1 instructions remove it completely for root. Adjust verifier state and line info appropriately. v2: - adjust func_info (Alexei); - make sure first instruction retains line info (Alexei). v4: (Yonghong) - remove unnecessary if (!insn to remove) checks; - always keep last line info if first live instruction lacks one. v5: (Martin Lau) - improve and clarify comments. Signed-off-by: Jakub Kicinski Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 1 + kernel/bpf/core.c | 12 ++++ kernel/bpf/verifier.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 186 insertions(+), 3 deletions(-) (limited to 'kernel/bpf') diff --git a/include/linux/filter.h b/include/linux/filter.h index ad106d845b22..be9af6b4a9e4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -778,6 +778,7 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); void bpf_clear_redirect_map(struct bpf_map *map); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ad08ba341197..2a81b8af3748 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -462,6 +462,18 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, return prog_adj; } +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) +{ + /* Branch offsets can't overflow when program is shrinking, no need + * to call bpf_adj_branches(..., true) here + */ + memmove(prog->insnsi + off, prog->insnsi + off + cnt, + sizeof(struct bpf_insn) * (prog->len - off - cnt)); + prog->len -= cnt; + + return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false)); +} + void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) { int i; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bf1f98e8beb6..099b2541f87f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6432,6 +6432,150 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of return new_prog; } +static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, + u32 off, u32 cnt) +{ + int i, j; + + /* find first prog starting at or after off (first to remove) */ + for (i = 0; i < env->subprog_cnt; i++) + if (env->subprog_info[i].start >= off) + break; + /* find first prog starting at or after off + cnt (first to stay) */ + for (j = i; j < env->subprog_cnt; j++) + if (env->subprog_info[j].start >= off + cnt) + break; + /* if j doesn't start exactly at off + cnt, we are just removing + * the front of previous prog + */ + if (env->subprog_info[j].start != off + cnt) + j--; + + if (j > i) { + struct bpf_prog_aux *aux = env->prog->aux; + int move; + + /* move fake 'exit' subprog as well */ + move = env->subprog_cnt + 1 - j; + + memmove(env->subprog_info + i, + env->subprog_info + j, + sizeof(*env->subprog_info) * move); + env->subprog_cnt -= j - i; + + /* remove func_info */ + if (aux->func_info) { + move = aux->func_info_cnt - j; + + memmove(aux->func_info + i, + aux->func_info + j, + sizeof(*aux->func_info) * move); + aux->func_info_cnt -= j - i; + /* func_info->insn_off is set after all code rewrites, + * in adjust_btf_func() - no need to adjust + */ + } + } else { + /* convert i from "first prog to remove" to "first to adjust" */ + if (env->subprog_info[i].start == off) + i++; + } + + /* update fake 'exit' subprog as well */ + for (; i <= env->subprog_cnt; i++) + env->subprog_info[i].start -= cnt; + + return 0; +} + +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, + u32 cnt) +{ + struct bpf_prog *prog = env->prog; + u32 i, l_off, l_cnt, nr_linfo; + struct bpf_line_info *linfo; + + nr_linfo = prog->aux->nr_linfo; + if (!nr_linfo) + return 0; + + linfo = prog->aux->linfo; + + /* find first line info to remove, count lines to be removed */ + for (i = 0; i < nr_linfo; i++) + if (linfo[i].insn_off >= off) + break; + + l_off = i; + l_cnt = 0; + for (; i < nr_linfo; i++) + if (linfo[i].insn_off < off + cnt) + l_cnt++; + else + break; + + /* First live insn doesn't match first live linfo, it needs to "inherit" + * last removed linfo. prog is already modified, so prog->len == off + * means no live instructions after (tail of the program was removed). + */ + if (prog->len != off && l_cnt && + (i == nr_linfo || linfo[i].insn_off != off + cnt)) { + l_cnt--; + linfo[--i].insn_off = off + cnt; + } + + /* remove the line info which refer to the removed instructions */ + if (l_cnt) { + memmove(linfo + l_off, linfo + i, + sizeof(*linfo) * (nr_linfo - i)); + + prog->aux->nr_linfo -= l_cnt; + nr_linfo = prog->aux->nr_linfo; + } + + /* pull all linfo[i].insn_off >= off + cnt in by cnt */ + for (i = l_off; i < nr_linfo; i++) + linfo[i].insn_off -= cnt; + + /* fix up all subprogs (incl. 'exit') which start >= off */ + for (i = 0; i <= env->subprog_cnt; i++) + if (env->subprog_info[i].linfo_idx > l_off) { + /* program may have started in the removed region but + * may not be fully removed + */ + if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) + env->subprog_info[i].linfo_idx -= l_cnt; + else + env->subprog_info[i].linfo_idx = l_off; + } + + return 0; +} + +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + unsigned int orig_prog_len = env->prog->len; + int err; + + err = bpf_remove_insns(env->prog, off, cnt); + if (err) + return err; + + err = adjust_subprog_starts_after_remove(env, off, cnt); + if (err) + return err; + + err = bpf_adj_linfo_after_remove(env, off, cnt); + if (err) + return err; + + memmove(aux_data + off, aux_data + off + cnt, + sizeof(*aux_data) * (orig_prog_len - off - cnt)); + + return 0; +} + /* The verifier does more data flow analysis than llvm and will not * explore branches that are dead at run time. Malicious programs can * have dead code too. Therefore replace all dead at-run-time code @@ -6492,6 +6636,30 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) } } +static int opt_remove_dead_code(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + int insn_cnt = env->prog->len; + int i, err; + + for (i = 0; i < insn_cnt; i++) { + int j; + + j = 0; + while (i + j < insn_cnt && !aux_data[i + j].seen) + j++; + if (!j) + continue; + + err = verifier_remove_insns(env, i, j); + if (err) + return err; + insn_cnt = env->prog->len; + } + + return 0; +} + /* convert load instructions that access fields of a context type into a * sequence of instructions that access fields of the underlying structure: * struct __sk_buff -> struct sk_buff @@ -7282,11 +7450,13 @@ skip_full_check: if (is_priv) { if (ret == 0) opt_hard_wire_dead_code_branches(env); + if (ret == 0) + ret = opt_remove_dead_code(env); + } else { + if (ret == 0) + sanitize_dead_code(env); } - if (ret == 0) - sanitize_dead_code(env); - if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); -- cgit v1.2.3-55-g7522 From a1b14abc009d9c13be355dbd4a4c4d47816ad3db Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:21 -0800 Subject: bpf: verifier: remove unconditional branches by 0 Unconditional branches by 0 instructions are basically noops but they can result from earlier optimizations, e.g. a conditional jumps which would never be taken or a conditional jump around dead code. Remove those branches. v0.2: - s/opt_remove_dead_branches/opt_remove_nops/ (Jiong). Signed-off-by: Jakub Kicinski Reviewed-by: Jiong Wang Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 099b2541f87f..f39bca188a5c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6660,6 +6660,27 @@ static int opt_remove_dead_code(struct bpf_verifier_env *env) return 0; } +static int opt_remove_nops(struct bpf_verifier_env *env) +{ + const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + int i, err; + + for (i = 0; i < insn_cnt; i++) { + if (memcmp(&insn[i], &ja, sizeof(ja))) + continue; + + err = verifier_remove_insns(env, i, 1); + if (err) + return err; + insn_cnt--; + i--; + } + + return 0; +} + /* convert load instructions that access fields of a context type into a * sequence of instructions that access fields of the underlying structure: * struct __sk_buff -> struct sk_buff @@ -7452,6 +7473,8 @@ skip_full_check: opt_hard_wire_dead_code_branches(env); if (ret == 0) ret = opt_remove_dead_code(env); + if (ret == 0) + ret = opt_remove_nops(env); } else { if (ret == 0) sanitize_dead_code(env); -- cgit v1.2.3-55-g7522 From 9e4c24e7ee7dfd3898269519103e823892b730d8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:23 -0800 Subject: bpf: verifier: record original instruction index The communication between the verifier and advanced JITs is based on instruction indexes. We have to keep them stable throughout the optimizations otherwise referring to a particular instruction gets messy quickly. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/bpf') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 573cca00a0e6..f3ae00ee5516 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -187,6 +187,7 @@ struct bpf_insn_aux_data { int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ u8 alu_state; /* used in combination with alu_limit */ + unsigned int orig_idx; /* original instruction index */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f39bca188a5c..f2c49b4235df 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7371,7 +7371,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, { struct bpf_verifier_env *env; struct bpf_verifier_log *log; - int ret = -EINVAL; + int i, len, ret = -EINVAL; bool is_priv; /* no program is valid */ @@ -7386,12 +7386,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, return -ENOMEM; log = &env->log; + len = (*prog)->len; env->insn_aux_data = - vzalloc(array_size(sizeof(struct bpf_insn_aux_data), - (*prog)->len)); + vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); ret = -ENOMEM; if (!env->insn_aux_data) goto err_free_env; + for (i = 0; i < len; i++) + env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; -- cgit v1.2.3-55-g7522 From 08ca90afba255d05dc3253caa44056e7aecbe8c5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:24 -0800 Subject: bpf: notify offload JITs about optimizations Let offload JITs know when instructions are replaced and optimized out, so they can update their state appropriately. The optimizations are best effort, if JIT returns an error from any callback verifier will stop notifying it as state may now be out of sync, but the verifier continues making progress. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 7 +++++++ include/linux/bpf_verifier.h | 5 +++++ kernel/bpf/offload.c | 35 +++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 6 ++++++ 4 files changed, 53 insertions(+) (limited to 'kernel/bpf') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e734f163bd0b..3851529062ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -268,9 +268,15 @@ struct bpf_verifier_ops { }; struct bpf_prog_offload_ops { + /* verifier basic callbacks */ int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); + /* verifier optimization callbacks (called after .finalize) */ + int (*replace_insn)(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn); + int (*remove_insns)(struct bpf_verifier_env *env, u32 off, u32 cnt); + /* program management callbacks */ int (*prepare)(struct bpf_prog *prog); int (*translate)(struct bpf_prog *prog); void (*destroy)(struct bpf_prog *prog); @@ -283,6 +289,7 @@ struct bpf_prog_offload { void *dev_priv; struct list_head offloads; bool dev_state; + bool opt_failed; void *jited_image; u32 jited_len; }; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f3ae00ee5516..0620e418dde5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -266,5 +266,10 @@ int bpf_prog_offload_verifier_prep(struct bpf_prog *prog); int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int bpf_prog_offload_finalize(struct bpf_verifier_env *env); +void +bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn); +void +bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 54cf2b9c44a4..39dba8c90331 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -173,6 +173,41 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env) return ret; } +void +bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn) +{ + const struct bpf_prog_offload_ops *ops; + struct bpf_prog_offload *offload; + int ret = -EOPNOTSUPP; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) { + ops = offload->offdev->ops; + if (!offload->opt_failed && ops->replace_insn) + ret = ops->replace_insn(env, off, insn); + offload->opt_failed |= ret; + } + up_read(&bpf_devs_lock); +} + +void +bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) +{ + struct bpf_prog_offload *offload; + int ret = -EOPNOTSUPP; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) { + if (!offload->opt_failed && offload->offdev->ops->remove_insns) + ret = offload->offdev->ops->remove_insns(env, off, cnt); + offload->opt_failed |= ret; + } + up_read(&bpf_devs_lock); +} + static void __bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_prog_offload *offload = prog->aux->offload; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f2c49b4235df..8cfe39ef770f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6558,6 +6558,9 @@ static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) unsigned int orig_prog_len = env->prog->len; int err; + if (bpf_prog_is_dev_bound(env->prog->aux)) + bpf_prog_offload_remove_insns(env, off, cnt); + err = bpf_remove_insns(env->prog, off, cnt); if (err) return err; @@ -6632,6 +6635,9 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) else continue; + if (bpf_prog_is_dev_bound(env->prog->aux)) + bpf_prog_offload_replace_insn(env, i, &ja); + memcpy(insn, &ja, sizeof(ja)); } } -- cgit v1.2.3-55-g7522 From a72dafafbd5f11c6ea3a9682d64da1074f28eb67 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 26 Jan 2019 12:26:00 -0500 Subject: bpf: refactor verifier min/max code for condition jump The current min/max code does both signed and unsigned comparisons against the input argument "val" which is "u64" and there is explicit type casting when the comparison is signed. As we will need slightly more complexer type casting when JMP32 introduced, it is better to host the signed type casting. This makes the code more clean with ignorable runtime overhead. Also, code for J*GE/GT/LT/LE and JEQ/JNE are very similar, this patch combine them. The main purpose for this refactor is to make sure the min/max code will still be readable and with minimum code duplication after JMP32 introduced. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 172 +++++++++++++++++++++++++++++--------------------- 1 file changed, 99 insertions(+), 73 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8cfe39ef770f..eae6cb1fe653 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4033,9 +4033,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, */ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) { + s64 sval; + if (__is_pointer_value(false, reg)) return -1; + sval = (s64)val; + switch (opcode) { case BPF_JEQ: if (tnum_is_const(reg->var_off)) @@ -4058,9 +4062,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) return 0; break; case BPF_JSGT: - if (reg->smin_value > (s64)val) + if (reg->smin_value > sval) return 1; - else if (reg->smax_value < (s64)val) + else if (reg->smax_value < sval) return 0; break; case BPF_JLT: @@ -4070,9 +4074,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) return 0; break; case BPF_JSLT: - if (reg->smax_value < (s64)val) + if (reg->smax_value < sval) return 1; - else if (reg->smin_value >= (s64)val) + else if (reg->smin_value >= sval) return 0; break; case BPF_JGE: @@ -4082,9 +4086,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) return 0; break; case BPF_JSGE: - if (reg->smin_value >= (s64)val) + if (reg->smin_value >= sval) return 1; - else if (reg->smax_value < (s64)val) + else if (reg->smax_value < sval) return 0; break; case BPF_JLE: @@ -4094,9 +4098,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) return 0; break; case BPF_JSLE: - if (reg->smax_value <= (s64)val) + if (reg->smax_value <= sval) return 1; - else if (reg->smin_value > (s64)val) + else if (reg->smin_value > sval) return 0; break; } @@ -4113,6 +4117,8 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u8 opcode) { + s64 sval; + /* If the dst_reg is a pointer, we can't learn anything about its * variable offset from the compare (unless src_reg were a pointer into * the same object, but we don't bother with that. @@ -4122,19 +4128,22 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, if (__is_pointer_value(false, false_reg)) return; + sval = (s64)val; + switch (opcode) { case BPF_JEQ: - /* If this is false then we know nothing Jon Snow, but if it is - * true then we know for sure. - */ - __mark_reg_known(true_reg, val); - break; case BPF_JNE: - /* If this is true we know nothing Jon Snow, but if it is false - * we know the value for sure; + { + struct bpf_reg_state *reg = + opcode == BPF_JEQ ? true_reg : false_reg; + + /* For BPF_JEQ, if this is false we know nothing Jon Snow, but + * if it is true we know the value for sure. Likewise for + * BPF_JNE. */ - __mark_reg_known(false_reg, val); + __mark_reg_known(reg, val); break; + } case BPF_JSET: false_reg->var_off = tnum_and(false_reg->var_off, tnum_const(~val)); @@ -4142,38 +4151,46 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, true_reg->var_off = tnum_or(true_reg->var_off, tnum_const(val)); break; - case BPF_JGT: - false_reg->umax_value = min(false_reg->umax_value, val); - true_reg->umin_value = max(true_reg->umin_value, val + 1); - break; - case BPF_JSGT: - false_reg->smax_value = min_t(s64, false_reg->smax_value, val); - true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1); - break; - case BPF_JLT: - false_reg->umin_value = max(false_reg->umin_value, val); - true_reg->umax_value = min(true_reg->umax_value, val - 1); - break; - case BPF_JSLT: - false_reg->smin_value = max_t(s64, false_reg->smin_value, val); - true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1); - break; case BPF_JGE: - false_reg->umax_value = min(false_reg->umax_value, val - 1); - true_reg->umin_value = max(true_reg->umin_value, val); + case BPF_JGT: + { + u64 false_umax = opcode == BPF_JGT ? val : val - 1; + u64 true_umin = opcode == BPF_JGT ? val + 1 : val; + + false_reg->umax_value = min(false_reg->umax_value, false_umax); + true_reg->umin_value = max(true_reg->umin_value, true_umin); break; + } case BPF_JSGE: - false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1); - true_reg->smin_value = max_t(s64, true_reg->smin_value, val); + case BPF_JSGT: + { + s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; + s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; + + false_reg->smax_value = min(false_reg->smax_value, false_smax); + true_reg->smin_value = max(true_reg->smin_value, true_smin); break; + } case BPF_JLE: - false_reg->umin_value = max(false_reg->umin_value, val + 1); - true_reg->umax_value = min(true_reg->umax_value, val); + case BPF_JLT: + { + u64 false_umin = opcode == BPF_JLT ? val : val + 1; + u64 true_umax = opcode == BPF_JLT ? val - 1 : val; + + false_reg->umin_value = max(false_reg->umin_value, false_umin); + true_reg->umax_value = min(true_reg->umax_value, true_umax); break; + } case BPF_JSLE: - false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1); - true_reg->smax_value = min_t(s64, true_reg->smax_value, val); + case BPF_JSLT: + { + s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; + s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; + + false_reg->smin_value = max(false_reg->smin_value, false_smin); + true_reg->smax_value = min(true_reg->smax_value, true_smax); break; + } default: break; } @@ -4198,22 +4215,23 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u8 opcode) { + s64 sval; + if (__is_pointer_value(false, false_reg)) return; + sval = (s64)val; + switch (opcode) { case BPF_JEQ: - /* If this is false then we know nothing Jon Snow, but if it is - * true then we know for sure. - */ - __mark_reg_known(true_reg, val); - break; case BPF_JNE: - /* If this is true we know nothing Jon Snow, but if it is false - * we know the value for sure; - */ - __mark_reg_known(false_reg, val); + { + struct bpf_reg_state *reg = + opcode == BPF_JEQ ? true_reg : false_reg; + + __mark_reg_known(reg, val); break; + } case BPF_JSET: false_reg->var_off = tnum_and(false_reg->var_off, tnum_const(~val)); @@ -4221,38 +4239,46 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, true_reg->var_off = tnum_or(true_reg->var_off, tnum_const(val)); break; - case BPF_JGT: - true_reg->umax_value = min(true_reg->umax_value, val - 1); - false_reg->umin_value = max(false_reg->umin_value, val); - break; - case BPF_JSGT: - true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1); - false_reg->smin_value = max_t(s64, false_reg->smin_value, val); - break; - case BPF_JLT: - true_reg->umin_value = max(true_reg->umin_value, val + 1); - false_reg->umax_value = min(false_reg->umax_value, val); - break; - case BPF_JSLT: - true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1); - false_reg->smax_value = min_t(s64, false_reg->smax_value, val); - break; case BPF_JGE: - true_reg->umax_value = min(true_reg->umax_value, val); - false_reg->umin_value = max(false_reg->umin_value, val + 1); + case BPF_JGT: + { + u64 false_umin = opcode == BPF_JGT ? val : val + 1; + u64 true_umax = opcode == BPF_JGT ? val - 1 : val; + + false_reg->umin_value = max(false_reg->umin_value, false_umin); + true_reg->umax_value = min(true_reg->umax_value, true_umax); break; + } case BPF_JSGE: - true_reg->smax_value = min_t(s64, true_reg->smax_value, val); - false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1); + case BPF_JSGT: + { + s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1; + s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval; + + false_reg->smin_value = max(false_reg->smin_value, false_smin); + true_reg->smax_value = min(true_reg->smax_value, true_smax); break; + } case BPF_JLE: - true_reg->umin_value = max(true_reg->umin_value, val); - false_reg->umax_value = min(false_reg->umax_value, val - 1); + case BPF_JLT: + { + u64 false_umax = opcode == BPF_JLT ? val : val - 1; + u64 true_umin = opcode == BPF_JLT ? val + 1 : val; + + false_reg->umax_value = min(false_reg->umax_value, false_umax); + true_reg->umin_value = max(true_reg->umin_value, true_umin); break; + } case BPF_JSLE: - true_reg->smin_value = max_t(s64, true_reg->smin_value, val); - false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1); + case BPF_JSLT: + { + s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1; + s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval; + + false_reg->smax_value = min(false_reg->smax_value, false_smax); + true_reg->smin_value = max(true_reg->smin_value, true_smin); break; + } default: break; } -- cgit v1.2.3-55-g7522 From 092ed0968bb648cd18e8a0430cd0a8a71727315c Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 26 Jan 2019 12:26:01 -0500 Subject: bpf: verifier support JMP32 This patch teach verifier about the new BPF_JMP32 instruction class. Verifier need to treat it similar as the existing BPF_JMP class. A BPF_JMP32 insn needs to go through all checks that have been done on BPF_JMP. Also, verifier is doing runtime optimizations based on the extra info conditional jump instruction could offer, especially when the comparison is between constant and register that the value range of the register could be improved based on the comparison results. These code are updated accordingly. Acked-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 3 +- kernel/bpf/verifier.c | 203 ++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 173 insertions(+), 33 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2a81b8af3748..1e443ba97310 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -362,7 +362,8 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, insn = prog->insnsi + end_old; } code = insn->code; - if (BPF_CLASS(code) != BPF_JMP || + if ((BPF_CLASS(code) != BPF_JMP && + BPF_CLASS(code) != BPF_JMP32) || BPF_OP(code) == BPF_EXIT) continue; /* Adjust offset of jmps if we cross patch boundaries. */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eae6cb1fe653..8c1c21cd50b4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1095,7 +1095,7 @@ static int check_subprogs(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++) { u8 code = insn[i].code; - if (BPF_CLASS(code) != BPF_JMP) + if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) goto next; if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) goto next; @@ -4031,14 +4031,49 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, * 0 - branch will not be taken and fall-through to next insn * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] */ -static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) +static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, + bool is_jmp32) { + struct bpf_reg_state reg_lo; s64 sval; if (__is_pointer_value(false, reg)) return -1; - sval = (s64)val; + if (is_jmp32) { + reg_lo = *reg; + reg = ®_lo; + /* For JMP32, only low 32 bits are compared, coerce_reg_to_size + * could truncate high bits and update umin/umax according to + * information of low bits. + */ + coerce_reg_to_size(reg, 4); + /* smin/smax need special handling. For example, after coerce, + * if smin_value is 0x00000000ffffffffLL, the value is -1 when + * used as operand to JMP32. It is a negative number from s32's + * point of view, while it is a positive number when seen as + * s64. The smin/smax are kept as s64, therefore, when used with + * JMP32, they need to be transformed into s32, then sign + * extended back to s64. + * + * Also, smin/smax were copied from umin/umax. If umin/umax has + * different sign bit, then min/max relationship doesn't + * maintain after casting into s32, for this case, set smin/smax + * to safest range. + */ + if ((reg->umax_value ^ reg->umin_value) & + (1ULL << 31)) { + reg->smin_value = S32_MIN; + reg->smax_value = S32_MAX; + } + reg->smin_value = (s64)(s32)reg->smin_value; + reg->smax_value = (s64)(s32)reg->smax_value; + + val = (u32)val; + sval = (s64)(s32)val; + } else { + sval = (s64)val; + } switch (opcode) { case BPF_JEQ: @@ -4108,6 +4143,29 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) return -1; } +/* Generate min value of the high 32-bit from TNUM info. */ +static u64 gen_hi_min(struct tnum var) +{ + return var.value & ~0xffffffffULL; +} + +/* Generate max value of the high 32-bit from TNUM info. */ +static u64 gen_hi_max(struct tnum var) +{ + return (var.value | var.mask) & ~0xffffffffULL; +} + +/* Return true if VAL is compared with a s64 sign extended from s32, and they + * are with the same signedness. + */ +static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg) +{ + return ((s32)sval >= 0 && + reg->smin_value >= 0 && reg->smax_value <= S32_MAX) || + ((s32)sval < 0 && + reg->smax_value <= 0 && reg->smin_value >= S32_MIN); +} + /* Adjusts the register min/max values in the case that the dst_reg is the * variable register that we are working on, and src_reg is a constant or we're * simply doing a BPF_K check. @@ -4115,7 +4173,7 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) */ static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, - u8 opcode) + u8 opcode, bool is_jmp32) { s64 sval; @@ -4128,7 +4186,8 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, if (__is_pointer_value(false, false_reg)) return; - sval = (s64)val; + val = is_jmp32 ? (u32)val : val; + sval = is_jmp32 ? (s64)(s32)val : (s64)val; switch (opcode) { case BPF_JEQ: @@ -4141,7 +4200,15 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, * if it is true we know the value for sure. Likewise for * BPF_JNE. */ - __mark_reg_known(reg, val); + if (is_jmp32) { + u64 old_v = reg->var_off.value; + u64 hi_mask = ~0xffffffffULL; + + reg->var_off.value = (old_v & hi_mask) | val; + reg->var_off.mask &= hi_mask; + } else { + __mark_reg_known(reg, val); + } break; } case BPF_JSET: @@ -4157,6 +4224,10 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, u64 false_umax = opcode == BPF_JGT ? val : val - 1; u64 true_umin = opcode == BPF_JGT ? val + 1 : val; + if (is_jmp32) { + false_umax += gen_hi_max(false_reg->var_off); + true_umin += gen_hi_min(true_reg->var_off); + } false_reg->umax_value = min(false_reg->umax_value, false_umax); true_reg->umin_value = max(true_reg->umin_value, true_umin); break; @@ -4167,6 +4238,11 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; + /* If the full s64 was not sign-extended from s32 then don't + * deduct further info. + */ + if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) + break; false_reg->smax_value = min(false_reg->smax_value, false_smax); true_reg->smin_value = max(true_reg->smin_value, true_smin); break; @@ -4177,6 +4253,10 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, u64 false_umin = opcode == BPF_JLT ? val : val + 1; u64 true_umax = opcode == BPF_JLT ? val - 1 : val; + if (is_jmp32) { + false_umin += gen_hi_min(false_reg->var_off); + true_umax += gen_hi_max(true_reg->var_off); + } false_reg->umin_value = max(false_reg->umin_value, false_umin); true_reg->umax_value = min(true_reg->umax_value, true_umax); break; @@ -4187,6 +4267,8 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; + if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) + break; false_reg->smin_value = max(false_reg->smin_value, false_smin); true_reg->smax_value = min(true_reg->smax_value, true_smax); break; @@ -4213,14 +4295,15 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, */ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, - u8 opcode) + u8 opcode, bool is_jmp32) { s64 sval; if (__is_pointer_value(false, false_reg)) return; - sval = (s64)val; + val = is_jmp32 ? (u32)val : val; + sval = is_jmp32 ? (s64)(s32)val : (s64)val; switch (opcode) { case BPF_JEQ: @@ -4229,7 +4312,15 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *reg = opcode == BPF_JEQ ? true_reg : false_reg; - __mark_reg_known(reg, val); + if (is_jmp32) { + u64 old_v = reg->var_off.value; + u64 hi_mask = ~0xffffffffULL; + + reg->var_off.value = (old_v & hi_mask) | val; + reg->var_off.mask &= hi_mask; + } else { + __mark_reg_known(reg, val); + } break; } case BPF_JSET: @@ -4245,6 +4336,10 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, u64 false_umin = opcode == BPF_JGT ? val : val + 1; u64 true_umax = opcode == BPF_JGT ? val - 1 : val; + if (is_jmp32) { + false_umin += gen_hi_min(false_reg->var_off); + true_umax += gen_hi_max(true_reg->var_off); + } false_reg->umin_value = max(false_reg->umin_value, false_umin); true_reg->umax_value = min(true_reg->umax_value, true_umax); break; @@ -4255,6 +4350,8 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1; s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval; + if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) + break; false_reg->smin_value = max(false_reg->smin_value, false_smin); true_reg->smax_value = min(true_reg->smax_value, true_smax); break; @@ -4265,6 +4362,10 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, u64 false_umax = opcode == BPF_JLT ? val : val - 1; u64 true_umin = opcode == BPF_JLT ? val + 1 : val; + if (is_jmp32) { + false_umax += gen_hi_max(false_reg->var_off); + true_umin += gen_hi_min(true_reg->var_off); + } false_reg->umax_value = min(false_reg->umax_value, false_umax); true_reg->umin_value = max(true_reg->umin_value, true_umin); break; @@ -4275,6 +4376,8 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1; s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval; + if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) + break; false_reg->smax_value = min(false_reg->smax_value, false_smax); true_reg->smin_value = max(true_reg->smin_value, true_smin); break; @@ -4416,6 +4519,10 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, if (BPF_SRC(insn->code) != BPF_X) return false; + /* Pointers are always 64-bit. */ + if (BPF_CLASS(insn->code) == BPF_JMP32) + return false; + switch (BPF_OP(insn->code)) { case BPF_JGT: if ((dst_reg->type == PTR_TO_PACKET && @@ -4508,16 +4615,18 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; struct bpf_reg_state *dst_reg, *other_branch_regs; u8 opcode = BPF_OP(insn->code); + bool is_jmp32; int err; - if (opcode > BPF_JSLE) { - verbose(env, "invalid BPF_JMP opcode %x\n", opcode); + /* Only conditional jumps are expected to reach here. */ + if (opcode == BPF_JA || opcode > BPF_JSLE) { + verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode); return -EINVAL; } if (BPF_SRC(insn->code) == BPF_X) { if (insn->imm != 0) { - verbose(env, "BPF_JMP uses reserved fields\n"); + verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); return -EINVAL; } @@ -4533,7 +4642,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } } else { if (insn->src_reg != BPF_REG_0) { - verbose(env, "BPF_JMP uses reserved fields\n"); + verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); return -EINVAL; } } @@ -4544,9 +4653,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return err; dst_reg = ®s[insn->dst_reg]; + is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; if (BPF_SRC(insn->code) == BPF_K) { - int pred = is_branch_taken(dst_reg, insn->imm, opcode); + int pred = is_branch_taken(dst_reg, insn->imm, opcode, + is_jmp32); if (pred == 1) { /* only follow the goto, ignore fall-through */ @@ -4574,30 +4685,51 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, * comparable. */ if (BPF_SRC(insn->code) == BPF_X) { + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state lo_reg0 = *dst_reg; + struct bpf_reg_state lo_reg1 = *src_reg; + struct bpf_reg_state *src_lo, *dst_lo; + + dst_lo = &lo_reg0; + src_lo = &lo_reg1; + coerce_reg_to_size(dst_lo, 4); + coerce_reg_to_size(src_lo, 4); + if (dst_reg->type == SCALAR_VALUE && - regs[insn->src_reg].type == SCALAR_VALUE) { - if (tnum_is_const(regs[insn->src_reg].var_off)) + src_reg->type == SCALAR_VALUE) { + if (tnum_is_const(src_reg->var_off) || + (is_jmp32 && tnum_is_const(src_lo->var_off))) reg_set_min_max(&other_branch_regs[insn->dst_reg], - dst_reg, regs[insn->src_reg].var_off.value, - opcode); - else if (tnum_is_const(dst_reg->var_off)) + dst_reg, + is_jmp32 + ? src_lo->var_off.value + : src_reg->var_off.value, + opcode, is_jmp32); + else if (tnum_is_const(dst_reg->var_off) || + (is_jmp32 && tnum_is_const(dst_lo->var_off))) reg_set_min_max_inv(&other_branch_regs[insn->src_reg], - ®s[insn->src_reg], - dst_reg->var_off.value, opcode); - else if (opcode == BPF_JEQ || opcode == BPF_JNE) + src_reg, + is_jmp32 + ? dst_lo->var_off.value + : dst_reg->var_off.value, + opcode, is_jmp32); + else if (!is_jmp32 && + (opcode == BPF_JEQ || opcode == BPF_JNE)) /* Comparing for equality, we can combine knowledge */ reg_combine_min_max(&other_branch_regs[insn->src_reg], &other_branch_regs[insn->dst_reg], - ®s[insn->src_reg], - ®s[insn->dst_reg], opcode); + src_reg, dst_reg, opcode); } } else if (dst_reg->type == SCALAR_VALUE) { reg_set_min_max(&other_branch_regs[insn->dst_reg], - dst_reg, insn->imm, opcode); + dst_reg, insn->imm, opcode, is_jmp32); } - /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ - if (BPF_SRC(insn->code) == BPF_K && + /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). + * NOTE: these optimizations below are related with pointer comparison + * which will never be JMP32. + */ + if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && reg_type_may_be_null(dst_reg->type)) { /* Mark all identical registers in each branch as either @@ -4926,7 +5058,8 @@ peek_stack: goto check_state; t = insn_stack[cur_stack - 1]; - if (BPF_CLASS(insns[t].code) == BPF_JMP) { + if (BPF_CLASS(insns[t].code) == BPF_JMP || + BPF_CLASS(insns[t].code) == BPF_JMP32) { u8 opcode = BPF_OP(insns[t].code); if (opcode == BPF_EXIT) { @@ -6082,7 +6215,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; - } else if (class == BPF_JMP) { + } else if (class == BPF_JMP || class == BPF_JMP32) { u8 opcode = BPF_OP(insn->code); if (opcode == BPF_CALL) { @@ -6090,7 +6223,8 @@ static int do_check(struct bpf_verifier_env *env) insn->off != 0 || (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL) || - insn->dst_reg != BPF_REG_0) { + insn->dst_reg != BPF_REG_0 || + class == BPF_JMP32) { verbose(env, "BPF_CALL uses reserved fields\n"); return -EINVAL; } @@ -6106,7 +6240,8 @@ static int do_check(struct bpf_verifier_env *env) if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || insn->src_reg != BPF_REG_0 || - insn->dst_reg != BPF_REG_0) { + insn->dst_reg != BPF_REG_0 || + class == BPF_JMP32) { verbose(env, "BPF_JA uses reserved fields\n"); return -EINVAL; } @@ -6118,7 +6253,8 @@ static int do_check(struct bpf_verifier_env *env) if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || insn->src_reg != BPF_REG_0 || - insn->dst_reg != BPF_REG_0) { + insn->dst_reg != BPF_REG_0 || + class == BPF_JMP32) { verbose(env, "BPF_EXIT uses reserved fields\n"); return -EINVAL; } @@ -6635,6 +6771,9 @@ static bool insn_is_cond_jump(u8 code) { u8 op; + if (BPF_CLASS(code) == BPF_JMP32) + return true; + if (BPF_CLASS(code) != BPF_JMP) return false; -- cgit v1.2.3-55-g7522 From 56cbd82ef0b3dc47a16beeebc8d9a9a9269093dc Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 26 Jan 2019 12:26:02 -0500 Subject: bpf: disassembler support JMP32 This patch teaches disassembler about JMP32. There are two places to update: - Class 0x6 now used by BPF_JMP32, not "unused". - BPF_JMP32 need to show comparison operands properly. The disassemble format is to add an extra "(32)" before the operands if it is a sub-register. A better disassemble format for both JMP32 and ALU32 just show the register prefix as "w" instead of "r", this is the format using by LLVM assembler. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- kernel/bpf/disasm.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index d6b76377cb6e..de73f55e42fd 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -67,7 +67,7 @@ const char *const bpf_class_string[8] = { [BPF_STX] = "stx", [BPF_ALU] = "alu", [BPF_JMP] = "jmp", - [BPF_RET] = "BUG", + [BPF_JMP32] = "jmp32", [BPF_ALU64] = "alu64", }; @@ -136,23 +136,22 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, else print_bpf_end_insn(verbose, cbs->private_data, insn); } else if (BPF_OP(insn->code) == BPF_NEG) { - verbose(cbs->private_data, "(%02x) r%d = %s-r%d\n", - insn->code, insn->dst_reg, - class == BPF_ALU ? "(u32) " : "", + verbose(cbs->private_data, "(%02x) %c%d = -%c%d\n", + insn->code, class == BPF_ALU ? 'w' : 'r', + insn->dst_reg, class == BPF_ALU ? 'w' : 'r', insn->dst_reg); } else if (BPF_SRC(insn->code) == BPF_X) { - verbose(cbs->private_data, "(%02x) %sr%d %s %sr%d\n", - insn->code, class == BPF_ALU ? "(u32) " : "", + verbose(cbs->private_data, "(%02x) %c%d %s %c%d\n", + insn->code, class == BPF_ALU ? 'w' : 'r', insn->dst_reg, bpf_alu_string[BPF_OP(insn->code) >> 4], - class == BPF_ALU ? "(u32) " : "", + class == BPF_ALU ? 'w' : 'r', insn->src_reg); } else { - verbose(cbs->private_data, "(%02x) %sr%d %s %s%d\n", - insn->code, class == BPF_ALU ? "(u32) " : "", + verbose(cbs->private_data, "(%02x) %c%d %s %d\n", + insn->code, class == BPF_ALU ? 'w' : 'r', insn->dst_reg, bpf_alu_string[BPF_OP(insn->code) >> 4], - class == BPF_ALU ? "(u32) " : "", insn->imm); } } else if (class == BPF_STX) { @@ -220,7 +219,7 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code); return; } - } else if (class == BPF_JMP) { + } else if (class == BPF_JMP32 || class == BPF_JMP) { u8 opcode = BPF_OP(insn->code); if (opcode == BPF_CALL) { @@ -244,13 +243,18 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, } else if (insn->code == (BPF_JMP | BPF_EXIT)) { verbose(cbs->private_data, "(%02x) exit\n", insn->code); } else if (BPF_SRC(insn->code) == BPF_X) { - verbose(cbs->private_data, "(%02x) if r%d %s r%d goto pc%+d\n", - insn->code, insn->dst_reg, + verbose(cbs->private_data, + "(%02x) if %c%d %s %c%d goto pc%+d\n", + insn->code, class == BPF_JMP32 ? 'w' : 'r', + insn->dst_reg, bpf_jmp_string[BPF_OP(insn->code) >> 4], + class == BPF_JMP32 ? 'w' : 'r', insn->src_reg, insn->off); } else { - verbose(cbs->private_data, "(%02x) if r%d %s 0x%x goto pc%+d\n", - insn->code, insn->dst_reg, + verbose(cbs->private_data, + "(%02x) if %c%d %s 0x%x goto pc%+d\n", + insn->code, class == BPF_JMP32 ? 'w' : 'r', + insn->dst_reg, bpf_jmp_string[BPF_OP(insn->code) >> 4], insn->imm, insn->off); } -- cgit v1.2.3-55-g7522 From 503a8865a47752d0ac2ff642f07e96e8b2103178 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 26 Jan 2019 12:26:04 -0500 Subject: bpf: interpreter support for JMP32 This patch implements interpreting new JMP32 instructions. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 197 +++++++++++++++++------------------------------------- 1 file changed, 63 insertions(+), 134 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1e443ba97310..bba11c2565ee 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1145,6 +1145,31 @@ EXPORT_SYMBOL_GPL(__bpf_call_base); INSN_2(JMP, CALL), \ /* Exit instruction. */ \ INSN_2(JMP, EXIT), \ + /* 32-bit Jump instructions. */ \ + /* Register based. */ \ + INSN_3(JMP32, JEQ, X), \ + INSN_3(JMP32, JNE, X), \ + INSN_3(JMP32, JGT, X), \ + INSN_3(JMP32, JLT, X), \ + INSN_3(JMP32, JGE, X), \ + INSN_3(JMP32, JLE, X), \ + INSN_3(JMP32, JSGT, X), \ + INSN_3(JMP32, JSLT, X), \ + INSN_3(JMP32, JSGE, X), \ + INSN_3(JMP32, JSLE, X), \ + INSN_3(JMP32, JSET, X), \ + /* Immediate based. */ \ + INSN_3(JMP32, JEQ, K), \ + INSN_3(JMP32, JNE, K), \ + INSN_3(JMP32, JGT, K), \ + INSN_3(JMP32, JLT, K), \ + INSN_3(JMP32, JGE, K), \ + INSN_3(JMP32, JLE, K), \ + INSN_3(JMP32, JSGT, K), \ + INSN_3(JMP32, JSLT, K), \ + INSN_3(JMP32, JSGE, K), \ + INSN_3(JMP32, JSLE, K), \ + INSN_3(JMP32, JSET, K), \ /* Jump instructions. */ \ /* Register based. */ \ INSN_3(JMP, JEQ, X), \ @@ -1405,145 +1430,49 @@ select_insn: out: CONT; } - /* JMP */ JMP_JA: insn += insn->off; CONT; - JMP_JEQ_X: - if (DST == SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JEQ_K: - if (DST == IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JNE_X: - if (DST != SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JNE_K: - if (DST != IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGT_X: - if (DST > SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGT_K: - if (DST > IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JLT_X: - if (DST < SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JLT_K: - if (DST < IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGE_X: - if (DST >= SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGE_K: - if (DST >= IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JLE_X: - if (DST <= SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JLE_K: - if (DST <= IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGT_X: - if (((s64) DST) > ((s64) SRC)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGT_K: - if (((s64) DST) > ((s64) IMM)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSLT_X: - if (((s64) DST) < ((s64) SRC)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSLT_K: - if (((s64) DST) < ((s64) IMM)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGE_X: - if (((s64) DST) >= ((s64) SRC)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGE_K: - if (((s64) DST) >= ((s64) IMM)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSLE_X: - if (((s64) DST) <= ((s64) SRC)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSLE_K: - if (((s64) DST) <= ((s64) IMM)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSET_X: - if (DST & SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSET_K: - if (DST & IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; JMP_EXIT: return BPF_R0; - + /* JMP */ +#define COND_JMP(SIGN, OPCODE, CMP_OP) \ + JMP_##OPCODE##_X: \ + if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \ + insn += insn->off; \ + CONT_JMP; \ + } \ + CONT; \ + JMP32_##OPCODE##_X: \ + if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \ + insn += insn->off; \ + CONT_JMP; \ + } \ + CONT; \ + JMP_##OPCODE##_K: \ + if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \ + insn += insn->off; \ + CONT_JMP; \ + } \ + CONT; \ + JMP32_##OPCODE##_K: \ + if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \ + insn += insn->off; \ + CONT_JMP; \ + } \ + CONT; + COND_JMP(u, JEQ, ==) + COND_JMP(u, JNE, !=) + COND_JMP(u, JGT, >) + COND_JMP(u, JLT, <) + COND_JMP(u, JGE, >=) + COND_JMP(u, JLE, <=) + COND_JMP(u, JSET, &) + COND_JMP(s, JSGT, >) + COND_JMP(s, JSLT, <) + COND_JMP(s, JSGE, >=) + COND_JMP(s, JSLE, <=) +#undef COND_JMP /* STX and ST and LDX*/ #define LDST(SIZEOP, SIZE) \ STX_MEM_##SIZEOP: \ -- cgit v1.2.3-55-g7522 From a7b76c8857692b0fce063b94ed83da11c396d341 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 26 Jan 2019 12:26:05 -0500 Subject: bpf: JIT blinds support JMP32 This patch adds JIT blinds support for JMP32. Like BPF_JMP_REG/IMM, JMP32 version are needed for building raw bpf insn. They are added to both include/linux/filter.h and tools/include/linux/filter.h. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 20 ++++++++++++++++++++ kernel/bpf/core.c | 21 +++++++++++++++++++++ tools/include/linux/filter.h | 20 ++++++++++++++++++++ 3 files changed, 61 insertions(+) (limited to 'kernel/bpf') diff --git a/include/linux/filter.h b/include/linux/filter.h index be9af6b4a9e4..e4b473f85b46 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -277,6 +277,26 @@ struct sock_reuseport; .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */ #define BPF_JMP_A(OFF) \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bba11c2565ee..a7bcb23bee84 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -949,6 +949,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); break; + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + /* Accommodate for extra offset in case of a backjump. */ + off = from->off; + if (off < 0) + off -= 2; + *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX, + off); + break; + case BPF_LD | BPF_IMM | BPF_DW: *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index af55acf73e75..cce0b02c0e28 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -199,6 +199,16 @@ .off = OFF, \ .imm = 0 }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ @@ -209,6 +219,16 @@ .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */ #define BPF_JMP_A(OFF) \ -- cgit v1.2.3-55-g7522