summaryrefslogtreecommitdiffstats
path: root/kernel/bpf/verifier.c
diff options
context:
space:
mode:
authorLinus Torvalds2016-10-05 19:11:24 +0200
committerLinus Torvalds2016-10-05 19:11:24 +0200
commit687ee0ad4e897e29f4b41f7a20c866d74c5e0660 (patch)
treeb31a2af35c24a54823674cdd126993b80daeac67 /kernel/bpf/verifier.c
parentmm: filemap: fix mapping->nrpages double accounting in fuse (diff)
parentMerge branch 'mlxsw-fixes' (diff)
downloadkernel-qcow2-linux-687ee0ad4e897e29f4b41f7a20c866d74c5e0660.tar.gz
kernel-qcow2-linux-687ee0ad4e897e29f4b41f7a20c866d74c5e0660.tar.xz
kernel-qcow2-linux-687ee0ad4e897e29f4b41f7a20c866d74c5e0660.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) BBR TCP congestion control, from Neal Cardwell, Yuchung Cheng and co. at Google. https://lwn.net/Articles/701165/ 2) Do TCP Small Queues for retransmits, from Eric Dumazet. 3) Support collect_md mode for all IPV4 and IPV6 tunnels, from Alexei Starovoitov. 4) Allow cls_flower to classify packets in ip tunnels, from Amir Vadai. 5) Support DSA tagging in older mv88e6xxx switches, from Andrew Lunn. 6) Support GMAC protocol in iwlwifi mwm, from Ayala Beker. 7) Support ndo_poll_controller in mlx5, from Calvin Owens. 8) Move VRF processing to an output hook and allow l3mdev to be loopback, from David Ahern. 9) Support SOCK_DESTROY for UDP sockets. Also from David Ahern. 10) Congestion control in RXRPC, from David Howells. 11) Support geneve RX offload in ixgbe, from Emil Tantilov. 12) When hitting pressure for new incoming TCP data SKBs, perform a partial rathern than a full purge of the OFO queue (which could be huge). From Eric Dumazet. 13) Convert XFRM state and policy lookups to RCU, from Florian Westphal. 14) Support RX network flow classification to igb, from Gangfeng Huang. 15) Hardware offloading of eBPF in nfp driver, from Jakub Kicinski. 16) New skbmod packet action, from Jamal Hadi Salim. 17) Remove some inefficiencies in snmp proc output, from Jia He. 18) Add FIB notifications to properly propagate route changes to hardware which is doing forwarding offloading. From Jiri Pirko. 19) New dsa driver for qca8xxx chips, from John Crispin. 20) Implement RFC7559 ipv6 router solicitation backoff, from Maciej Żenczykowski. 21) Add L3 mode to ipvlan, from Mahesh Bandewar. 22) Support 802.1ad in mlx4, from Moshe Shemesh. 23) Support hardware LRO in mediatek driver, from Nelson Chang. 24) Add TC offloading to mlx5, from Or Gerlitz. 25) Convert various drivers to ethtool ksettings interfaces, from Philippe Reynes. 26) TX max rate limiting for cxgb4, from Rahul Lakkireddy. 27) NAPI support for ath10k, from Rajkumar Manoharan. 28) Support XDP in mlx5, from Rana Shahout and Saeed Mahameed. 29) UDP replicast support in TIPC, from Richard Alpe. 30) Per-queue statistics for qed driver, from Sudarsana Reddy Kalluru. 31) Support BQL in thunderx driver, from Sunil Goutham. 32) TSO support in alx driver, from Tobias Regnery. 33) Add stream parser engine and use it in kcm. 34) Support async DHCP replies in ipconfig module, from Uwe Kleine-König. 35) DSA port fast aging for mv88e6xxx driver, from Vivien Didelot. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1715 commits) mlxsw: switchx2: Fix misuse of hard_header_len mlxsw: spectrum: Fix misuse of hard_header_len net/faraday: Stop NCSI device on shutdown net/ncsi: Introduce ncsi_stop_dev() net/ncsi: Rework the channel monitoring net/ncsi: Allow to extend NCSI request properties net/ncsi: Rework request index allocation net/ncsi: Don't probe on the reserved channel ID (0x1f) net/ncsi: Introduce NCSI_RESERVED_CHANNEL net/ncsi: Avoid unused-value build warning from ia64-linux-gcc net: Add netdev all_adj_list refcnt propagation to fix panic net: phy: Add Edge-rate driver for Microsemi PHYs. vmxnet3: Wake queue from reset work i40e: avoid NULL pointer dereference and recursive errors on early PCI error qed: Add RoCE ll2 & GSI support qed: Add support for memory registeration verbs qed: Add support for QP verbs qed: PD,PKEY and CQ verb support qed: Add support for RoCE hw init qede: Add qedr framework ...
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r--kernel/bpf/verifier.c919
1 files changed, 670 insertions, 249 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index daea765d72e6..99a7e5b388f2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
#include <linux/filter.h>
#include <net/netlink.h>
#include <linux/file.h>
@@ -126,76 +127,16 @@
* are set to NOT_INIT to indicate that they are no longer readable.
*/
-struct reg_state {
- enum bpf_reg_type type;
- union {
- /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
- s64 imm;
-
- /* valid when type == PTR_TO_PACKET* */
- struct {
- u32 id;
- u16 off;
- u16 range;
- };
-
- /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
- * PTR_TO_MAP_VALUE_OR_NULL
- */
- struct bpf_map *map_ptr;
- };
-};
-
-enum bpf_stack_slot_type {
- STACK_INVALID, /* nothing was stored in this stack slot */
- STACK_SPILL, /* register spilled into stack */
- STACK_MISC /* BPF program wrote some data into this slot */
-};
-
-#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
-
-/* state of the program:
- * type of all registers and stack info
- */
-struct verifier_state {
- struct reg_state regs[MAX_BPF_REG];
- u8 stack_slot_type[MAX_BPF_STACK];
- struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
-};
-
-/* linked list of verifier states used to prune search */
-struct verifier_state_list {
- struct verifier_state state;
- struct verifier_state_list *next;
-};
-
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
-struct verifier_stack_elem {
+struct bpf_verifier_stack_elem {
/* verifer state is 'st'
* before processing instruction 'insn_idx'
* and after processing instruction 'prev_insn_idx'
*/
- struct verifier_state st;
+ struct bpf_verifier_state st;
int insn_idx;
int prev_insn_idx;
- struct verifier_stack_elem *next;
-};
-
-#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
-
-/* single container for all structs
- * one verifier_env per bpf_check() call
- */
-struct verifier_env {
- struct bpf_prog *prog; /* eBPF program being verified */
- struct verifier_stack_elem *head; /* stack of verifier states to be processed */
- int stack_size; /* number of states to be processed */
- struct verifier_state cur_state; /* current verifier state */
- struct verifier_state_list **explored_states; /* search pruning optimization */
- struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
- u32 used_map_cnt; /* number of used maps */
- u32 id_gen; /* used to generate unique reg IDs */
- bool allow_ptr_leaks;
+ struct bpf_verifier_stack_elem *next;
};
#define BPF_COMPLEXITY_LIMIT_INSNS 65536
@@ -204,6 +145,7 @@ struct verifier_env {
struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
+ bool pkt_access;
int regno;
int access_size;
};
@@ -240,6 +182,7 @@ static const char * const reg_type_str[] = {
[CONST_PTR_TO_MAP] = "map_ptr",
[PTR_TO_MAP_VALUE] = "map_value",
[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
+ [PTR_TO_MAP_VALUE_ADJ] = "map_value_adj",
[FRAME_PTR] = "fp",
[PTR_TO_STACK] = "fp",
[CONST_IMM] = "imm",
@@ -247,9 +190,9 @@ static const char * const reg_type_str[] = {
[PTR_TO_PACKET_END] = "pkt_end",
};
-static void print_verifier_state(struct verifier_state *state)
+static void print_verifier_state(struct bpf_verifier_state *state)
{
- struct reg_state *reg;
+ struct bpf_reg_state *reg;
enum bpf_reg_type t;
int i;
@@ -267,10 +210,17 @@ static void print_verifier_state(struct verifier_state *state)
else if (t == UNKNOWN_VALUE && reg->imm)
verbose("%lld", reg->imm);
else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
- t == PTR_TO_MAP_VALUE_OR_NULL)
+ t == PTR_TO_MAP_VALUE_OR_NULL ||
+ t == PTR_TO_MAP_VALUE_ADJ)
verbose("(ks=%d,vs=%d)",
reg->map_ptr->key_size,
reg->map_ptr->value_size);
+ if (reg->min_value != BPF_REGISTER_MIN_RANGE)
+ verbose(",min_value=%llu",
+ (unsigned long long)reg->min_value);
+ if (reg->max_value != BPF_REGISTER_MAX_RANGE)
+ verbose(",max_value=%llu",
+ (unsigned long long)reg->max_value);
}
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
if (state->stack_slot_type[i] == STACK_SPILL)
@@ -425,9 +375,9 @@ static void print_bpf_insn(struct bpf_insn *insn)
}
}
-static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
+static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
{
- struct verifier_stack_elem *elem;
+ struct bpf_verifier_stack_elem *elem;
int insn_idx;
if (env->head == NULL)
@@ -444,12 +394,12 @@ static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
return insn_idx;
}
-static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
- int prev_insn_idx)
+static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
+ int insn_idx, int prev_insn_idx)
{
- struct verifier_stack_elem *elem;
+ struct bpf_verifier_stack_elem *elem;
- elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
+ elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
if (!elem)
goto err;
@@ -475,13 +425,15 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
-static void init_reg_state(struct reg_state *regs)
+static void init_reg_state(struct bpf_reg_state *regs)
{
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
regs[i].type = NOT_INIT;
regs[i].imm = 0;
+ regs[i].min_value = BPF_REGISTER_MIN_RANGE;
+ regs[i].max_value = BPF_REGISTER_MAX_RANGE;
}
/* frame pointer */
@@ -491,20 +443,26 @@ static void init_reg_state(struct reg_state *regs)
regs[BPF_REG_1].type = PTR_TO_CTX;
}
-static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
+static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
{
BUG_ON(regno >= MAX_BPF_REG);
regs[regno].type = UNKNOWN_VALUE;
regs[regno].imm = 0;
}
+static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
+{
+ regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
+ regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+}
+
enum reg_arg_type {
SRC_OP, /* register is used as source operand */
DST_OP, /* register is used as destination operand */
DST_OP_NO_MARK /* same as above, check only, don't mark */
};
-static int check_reg_arg(struct reg_state *regs, u32 regno,
+static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
enum reg_arg_type t)
{
if (regno >= MAX_BPF_REG) {
@@ -564,8 +522,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
-static int check_stack_write(struct verifier_state *state, int off, int size,
- int value_regno)
+static int check_stack_write(struct bpf_verifier_state *state, int off,
+ int size, int value_regno)
{
int i;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -590,7 +548,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
} else {
/* regular write of data into stack */
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
- (struct reg_state) {};
+ (struct bpf_reg_state) {};
for (i = 0; i < size; i++)
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
@@ -598,7 +556,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
return 0;
}
-static int check_stack_read(struct verifier_state *state, int off, int size,
+static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
int value_regno)
{
u8 *slot_type;
@@ -639,7 +597,7 @@ static int check_stack_read(struct verifier_state *state, int off, int size,
}
/* check read/write into map element returned by bpf_map_lookup_elem() */
-static int check_map_access(struct verifier_env *env, u32 regno, int off,
+static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
int size)
{
struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
@@ -654,24 +612,31 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,
#define MAX_PACKET_OFF 0xffff
-static bool may_write_pkt_data(enum bpf_prog_type type)
+static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
+ const struct bpf_call_arg_meta *meta)
{
- switch (type) {
+ switch (env->prog->type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
case BPF_PROG_TYPE_XDP:
+ if (meta)
+ return meta->pkt_access;
+
+ env->seen_direct_write = true;
return true;
default:
return false;
}
}
-static int check_packet_access(struct verifier_env *env, u32 regno, int off,
+static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
int size)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *reg = &regs[regno];
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *reg = &regs[regno];
off += reg->off;
- if (off < 0 || off + size > reg->range) {
+ if (off < 0 || size <= 0 || off + size > reg->range) {
verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
off, size, regno, reg->id, reg->off, reg->range);
return -EACCES;
@@ -680,9 +645,13 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off,
}
/* check access to 'struct bpf_context' fields */
-static int check_ctx_access(struct verifier_env *env, int off, int size,
+static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
enum bpf_access_type t, enum bpf_reg_type *reg_type)
{
+ /* for analyzer ctx accesses are already validated and converted */
+ if (env->analyzer_ops)
+ return 0;
+
if (env->prog->aux->ops->is_valid_access &&
env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
/* remember the offset of last byte accessed in ctx */
@@ -695,7 +664,7 @@ static int check_ctx_access(struct verifier_env *env, int off, int size,
return -EACCES;
}
-static bool is_pointer_value(struct verifier_env *env, int regno)
+static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
{
if (env->allow_ptr_leaks)
return false;
@@ -709,28 +678,19 @@ static bool is_pointer_value(struct verifier_env *env, int regno)
}
}
-static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
- int off, int size)
+static int check_ptr_alignment(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, int off, int size)
{
- if (reg->type != PTR_TO_PACKET) {
+ if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) {
if (off % size != 0) {
- verbose("misaligned access off %d size %d\n", off, size);
+ verbose("misaligned access off %d size %d\n",
+ off, size);
return -EACCES;
} else {
return 0;
}
}
- switch (env->prog->type) {
- case BPF_PROG_TYPE_SCHED_CLS:
- case BPF_PROG_TYPE_SCHED_ACT:
- case BPF_PROG_TYPE_XDP:
- break;
- default:
- verbose("verifier is misconfigured\n");
- return -EACCES;
- }
-
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
/* misaligned access to packet is ok on x86,arm,arm64 */
return 0;
@@ -741,7 +701,8 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
}
/* skb->data is NET_IP_ALIGN-ed */
- if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
+ if (reg->type == PTR_TO_PACKET &&
+ (NET_IP_ALIGN + reg->off + off) % size != 0) {
verbose("misaligned packet access off %d+%d+%d size %d\n",
NET_IP_ALIGN, reg->off, off, size);
return -EACCES;
@@ -755,12 +716,12 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
* if t==write && value_regno==-1, some unknown value is stored into memory
* if t==read && value_regno==-1, don't care what we read from memory
*/
-static int check_mem_access(struct verifier_env *env, u32 regno, int off,
+static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
int bpf_size, enum bpf_access_type t,
int value_regno)
{
- struct verifier_state *state = &env->cur_state;
- struct reg_state *reg = &state->regs[regno];
+ struct bpf_verifier_state *state = &env->cur_state;
+ struct bpf_reg_state *reg = &state->regs[regno];
int size, err = 0;
if (reg->type == PTR_TO_STACK)
@@ -774,12 +735,52 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
if (err)
return err;
- if (reg->type == PTR_TO_MAP_VALUE) {
+ if (reg->type == PTR_TO_MAP_VALUE ||
+ reg->type == PTR_TO_MAP_VALUE_ADJ) {
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
verbose("R%d leaks addr into map\n", value_regno);
return -EACCES;
}
+
+ /* If we adjusted the register to this map value at all then we
+ * need to change off and size to min_value and max_value
+ * respectively to make sure our theoretical access will be
+ * safe.
+ */
+ if (reg->type == PTR_TO_MAP_VALUE_ADJ) {
+ if (log_level)
+ print_verifier_state(state);
+ env->varlen_map_value_access = true;
+ /* The minimum value is only important with signed
+ * comparisons where we can't assume the floor of a
+ * value is 0. If we are using signed variables for our
+ * index'es we need to make sure that whatever we use
+ * will have a set floor within our range.
+ */
+ if ((s64)reg->min_value < 0) {
+ verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+ regno);
+ return -EACCES;
+ }
+ err = check_map_access(env, regno, reg->min_value + off,
+ size);
+ if (err) {
+ verbose("R%d min value is outside of the array range\n",
+ regno);
+ return err;
+ }
+
+ /* If we haven't set a max value then we need to bail
+ * since we can't be sure we won't do bad things.
+ */
+ if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
+ verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n",
+ regno);
+ return -EACCES;
+ }
+ off += reg->max_value;
+ }
err = check_map_access(env, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown_value(state->regs, value_regno);
@@ -795,9 +796,8 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
err = check_ctx_access(env, off, size, t, &reg_type);
if (!err && t == BPF_READ && value_regno >= 0) {
mark_reg_unknown_value(state->regs, value_regno);
- if (env->allow_ptr_leaks)
- /* note that reg.[id|off|range] == 0 */
- state->regs[value_regno].type = reg_type;
+ /* note that reg.[id|off|range] == 0 */
+ state->regs[value_regno].type = reg_type;
}
} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
@@ -817,7 +817,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
err = check_stack_read(state, off, size, value_regno);
}
} else if (state->regs[regno].type == PTR_TO_PACKET) {
- if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) {
+ if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) {
verbose("cannot write into packet\n");
return -EACCES;
}
@@ -846,9 +846,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
return err;
}
-static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
+static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *regs = env->cur_state.regs;
int err;
if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
@@ -882,12 +882,12 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
* bytes from that pointer, make sure that it's within stack boundary
* and all elements of stack are initialized
*/
-static int check_stack_boundary(struct verifier_env *env, int regno,
+static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
int access_size, bool zero_size_allowed,
struct bpf_call_arg_meta *meta)
{
- struct verifier_state *state = &env->cur_state;
- struct reg_state *regs = state->regs;
+ struct bpf_verifier_state *state = &env->cur_state;
+ struct bpf_reg_state *regs = state->regs;
int off, i;
if (regs[regno].type != PTR_TO_STACK) {
@@ -926,18 +926,18 @@ static int check_stack_boundary(struct verifier_env *env, int regno,
return 0;
}
-static int check_func_arg(struct verifier_env *env, u32 regno,
+static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
enum bpf_arg_type arg_type,
struct bpf_call_arg_meta *meta)
{
- struct reg_state *reg = env->cur_state.regs + regno;
- enum bpf_reg_type expected_type;
+ struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
+ enum bpf_reg_type expected_type, type = reg->type;
int err = 0;
if (arg_type == ARG_DONTCARE)
return 0;
- if (reg->type == NOT_INIT) {
+ if (type == NOT_INIT) {
verbose("R%d !read_ok\n", regno);
return -EACCES;
}
@@ -950,16 +950,29 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
return 0;
}
+ if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) {
+ verbose("helper access to the packet is not allowed\n");
+ return -EACCES;
+ }
+
if (arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE) {
expected_type = PTR_TO_STACK;
+ if (type != PTR_TO_PACKET && type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_CONST_STACK_SIZE ||
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
expected_type = CONST_IMM;
+ if (type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_CONST_MAP_PTR) {
expected_type = CONST_PTR_TO_MAP;
+ if (type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_PTR_TO_CTX) {
expected_type = PTR_TO_CTX;
+ if (type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_PTR_TO_STACK ||
arg_type == ARG_PTR_TO_RAW_STACK) {
expected_type = PTR_TO_STACK;
@@ -967,20 +980,16 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
* passed in as argument, it's a CONST_IMM type. Final test
* happens during stack boundary checking.
*/
- if (reg->type == CONST_IMM && reg->imm == 0)
- expected_type = CONST_IMM;
+ if (type == CONST_IMM && reg->imm == 0)
+ /* final test in check_stack_boundary() */;
+ else if (type != PTR_TO_PACKET && type != expected_type)
+ goto err_type;
meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK;
} else {
verbose("unsupported arg_type %d\n", arg_type);
return -EFAULT;
}
- if (reg->type != expected_type) {
- verbose("R%d type=%s expected=%s\n", regno,
- reg_type_str[reg->type], reg_type_str[expected_type]);
- return -EACCES;
- }
-
if (arg_type == ARG_CONST_MAP_PTR) {
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
meta->map_ptr = reg->map_ptr;
@@ -998,8 +1007,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
verbose("invalid map_ptr to access map->key\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno, meta->map_ptr->key_size,
- false, NULL);
+ if (type == PTR_TO_PACKET)
+ err = check_packet_access(env, regno, 0,
+ meta->map_ptr->key_size);
+ else
+ err = check_stack_boundary(env, regno,
+ meta->map_ptr->key_size,
+ false, NULL);
} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
/* bpf_map_xxx(..., map_ptr, ..., value) call:
* check [value, value + map->value_size) validity
@@ -1009,9 +1023,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
verbose("invalid map_ptr to access map->value\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno,
- meta->map_ptr->value_size,
- false, NULL);
+ if (type == PTR_TO_PACKET)
+ err = check_packet_access(env, regno, 0,
+ meta->map_ptr->value_size);
+ else
+ err = check_stack_boundary(env, regno,
+ meta->map_ptr->value_size,
+ false, NULL);
} else if (arg_type == ARG_CONST_STACK_SIZE ||
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
@@ -1025,11 +1043,18 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno - 1, reg->imm,
- zero_size_allowed, meta);
+ if (regs[regno - 1].type == PTR_TO_PACKET)
+ err = check_packet_access(env, regno - 1, 0, reg->imm);
+ else
+ err = check_stack_boundary(env, regno - 1, reg->imm,
+ zero_size_allowed, meta);
}
return err;
+err_type:
+ verbose("R%d type=%s expected=%s\n", regno,
+ reg_type_str[type], reg_type_str[expected_type]);
+ return -EACCES;
}
static int check_map_func_compatibility(struct bpf_map *map, int func_id)
@@ -1053,7 +1078,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
goto error;
break;
case BPF_MAP_TYPE_CGROUP_ARRAY:
- if (func_id != BPF_FUNC_skb_under_cgroup)
+ if (func_id != BPF_FUNC_skb_under_cgroup &&
+ func_id != BPF_FUNC_current_task_under_cgroup)
goto error;
break;
default:
@@ -1075,6 +1101,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
goto error;
break;
+ case BPF_FUNC_current_task_under_cgroup:
case BPF_FUNC_skb_under_cgroup:
if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
goto error;
@@ -1108,10 +1135,10 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
return count > 1 ? -EINVAL : 0;
}
-static void clear_all_pkt_pointers(struct verifier_env *env)
+static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
{
- struct verifier_state *state = &env->cur_state;
- struct reg_state *regs = state->regs, *reg;
+ struct bpf_verifier_state *state = &env->cur_state;
+ struct bpf_reg_state *regs = state->regs, *reg;
int i;
for (i = 0; i < MAX_BPF_REG; i++)
@@ -1131,12 +1158,12 @@ static void clear_all_pkt_pointers(struct verifier_env *env)
}
}
-static int check_call(struct verifier_env *env, int func_id)
+static int check_call(struct bpf_verifier_env *env, int func_id)
{
- struct verifier_state *state = &env->cur_state;
+ struct bpf_verifier_state *state = &env->cur_state;
const struct bpf_func_proto *fn = NULL;
- struct reg_state *regs = state->regs;
- struct reg_state *reg;
+ struct bpf_reg_state *regs = state->regs;
+ struct bpf_reg_state *reg;
struct bpf_call_arg_meta meta;
bool changes_data;
int i, err;
@@ -1164,6 +1191,7 @@ static int check_call(struct verifier_env *env, int func_id)
changes_data = bpf_helper_changes_skb_data(fn->func);
memset(&meta, 0, sizeof(meta));
+ meta.pkt_access = fn->pkt_access;
/* We only support one arg being in raw mode at the moment, which
* is sufficient for the helper functions we have right now.
@@ -1214,6 +1242,7 @@ static int check_call(struct verifier_env *env, int func_id)
regs[BPF_REG_0].type = NOT_INIT;
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+ regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0;
/* remember map_ptr, so that check_map_access()
* can check 'value_size' boundary of memory access
* to map element returned from bpf_map_lookup_elem()
@@ -1238,12 +1267,13 @@ static int check_call(struct verifier_env *env, int func_id)
return 0;
}
-static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn)
+static int check_packet_ptr_add(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *dst_reg = &regs[insn->dst_reg];
- struct reg_state *src_reg = &regs[insn->src_reg];
- struct reg_state tmp_reg;
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+ struct bpf_reg_state *src_reg = &regs[insn->src_reg];
+ struct bpf_reg_state tmp_reg;
s32 imm;
if (BPF_SRC(insn->code) == BPF_K) {
@@ -1311,10 +1341,10 @@ add_imm:
return 0;
}
-static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *dst_reg = &regs[insn->dst_reg];
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
u8 opcode = BPF_OP(insn->code);
s64 imm_log2;
@@ -1324,7 +1354,7 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
*/
if (BPF_SRC(insn->code) == BPF_X) {
- struct reg_state *src_reg = &regs[insn->src_reg];
+ struct bpf_reg_state *src_reg = &regs[insn->src_reg];
if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
dst_reg->imm && opcode == BPF_ADD) {
@@ -1413,11 +1443,12 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
-static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *dst_reg = &regs[insn->dst_reg];
- struct reg_state *src_reg = &regs[insn->src_reg];
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+ struct bpf_reg_state *src_reg = &regs[insn->src_reg];
u8 opcode = BPF_OP(insn->code);
/* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn.
@@ -1433,10 +1464,110 @@ static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
+static void check_reg_overflow(struct bpf_reg_state *reg)
+{
+ if (reg->max_value > BPF_REGISTER_MAX_RANGE)
+ reg->max_value = BPF_REGISTER_MAX_RANGE;
+ if ((s64)reg->min_value < BPF_REGISTER_MIN_RANGE)
+ reg->min_value = BPF_REGISTER_MIN_RANGE;
+}
+
+static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
+{
+ struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+ u64 min_val = BPF_REGISTER_MIN_RANGE, max_val = BPF_REGISTER_MAX_RANGE;
+ bool min_set = false, max_set = false;
+ u8 opcode = BPF_OP(insn->code);
+
+ dst_reg = &regs[insn->dst_reg];
+ if (BPF_SRC(insn->code) == BPF_X) {
+ check_reg_overflow(&regs[insn->src_reg]);
+ min_val = regs[insn->src_reg].min_value;
+ max_val = regs[insn->src_reg].max_value;
+
+ /* If the source register is a random pointer then the
+ * min_value/max_value values represent the range of the known
+ * accesses into that value, not the actual min/max value of the
+ * register itself. In this case we have to reset the reg range
+ * values so we know it is not safe to look at.
+ */
+ if (regs[insn->src_reg].type != CONST_IMM &&
+ regs[insn->src_reg].type != UNKNOWN_VALUE) {
+ min_val = BPF_REGISTER_MIN_RANGE;
+ max_val = BPF_REGISTER_MAX_RANGE;
+ }
+ } else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
+ (s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
+ min_val = max_val = insn->imm;
+ min_set = max_set = true;
+ }
+
+ /* We don't know anything about what was done to this register, mark it
+ * as unknown.
+ */
+ if (min_val == BPF_REGISTER_MIN_RANGE &&
+ max_val == BPF_REGISTER_MAX_RANGE) {
+ reset_reg_range_values(regs, insn->dst_reg);
+ return;
+ }
+
+ switch (opcode) {
+ case BPF_ADD:
+ dst_reg->min_value += min_val;
+ dst_reg->max_value += max_val;
+ break;
+ case BPF_SUB:
+ dst_reg->min_value -= min_val;
+ dst_reg->max_value -= max_val;
+ break;
+ case BPF_MUL:
+ dst_reg->min_value *= min_val;
+ dst_reg->max_value *= max_val;
+ break;
+ case BPF_AND:
+ /* & is special since it could end up with 0 bits set. */
+ dst_reg->min_value &= min_val;
+ dst_reg->max_value = max_val;
+ break;
+ case BPF_LSH:
+ /* Gotta have special overflow logic here, if we're shifting
+ * more than MAX_RANGE then just assume we have an invalid
+ * range.
+ */
+ if (min_val > ilog2(BPF_REGISTER_MAX_RANGE))
+ dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+ else
+ dst_reg->min_value <<= min_val;
+
+ if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
+ dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+ else
+ dst_reg->max_value <<= max_val;
+ break;
+ case BPF_RSH:
+ dst_reg->min_value >>= min_val;
+ dst_reg->max_value >>= max_val;
+ break;
+ case BPF_MOD:
+ /* % is special since it is an unsigned modulus, so the floor
+ * will always be 0.
+ */
+ dst_reg->min_value = 0;
+ dst_reg->max_value = max_val - 1;
+ break;
+ default:
+ reset_reg_range_values(regs, insn->dst_reg);
+ break;
+ }
+
+ check_reg_overflow(dst_reg);
+}
+
/* check validity of 32-bit and 64-bit arithmetic operations */
-static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
+static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs, *dst_reg;
+ struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
u8 opcode = BPF_OP(insn->code);
int err;
@@ -1496,6 +1627,11 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
if (err)
return err;
+ /* we are setting our register to something new, we need to
+ * reset its range values.
+ */
+ reset_reg_range_values(regs, insn->dst_reg);
+
if (BPF_SRC(insn->code) == BPF_X) {
if (BPF_CLASS(insn->code) == BPF_ALU64) {
/* case: R1 = R2
@@ -1517,6 +1653,8 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
*/
regs[insn->dst_reg].type = CONST_IMM;
regs[insn->dst_reg].imm = insn->imm;
+ regs[insn->dst_reg].max_value = insn->imm;
+ regs[insn->dst_reg].min_value = insn->imm;
}
} else if (opcode > BPF_END) {
@@ -1569,6 +1707,9 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
dst_reg = &regs[insn->dst_reg];
+ /* first we want to adjust our ranges. */
+ adjust_reg_min_max_vals(env, insn);
+
/* pattern match 'bpf_add Rx, imm' instruction */
if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) {
@@ -1603,28 +1744,58 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
return -EACCES;
}
- /* mark dest operand */
- mark_reg_unknown_value(regs, insn->dst_reg);
+ /* If we did pointer math on a map value then just set it to our
+ * PTR_TO_MAP_VALUE_ADJ type so we can deal with any stores or
+ * loads to this register appropriately, otherwise just mark the
+ * register as unknown.
+ */
+ if (env->allow_ptr_leaks &&
+ (dst_reg->type == PTR_TO_MAP_VALUE ||
+ dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
+ dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
+ else
+ mark_reg_unknown_value(regs, insn->dst_reg);
}
return 0;
}
-static void find_good_pkt_pointers(struct verifier_env *env,
- struct reg_state *dst_reg)
+static void find_good_pkt_pointers(struct bpf_verifier_state *state,
+ struct bpf_reg_state *dst_reg)
{
- struct verifier_state *state = &env->cur_state;
- struct reg_state *regs = state->regs, *reg;
+ struct bpf_reg_state *regs = state->regs, *reg;
int i;
- /* r2 = r3;
- * r2 += 8
- * if (r2 > pkt_end) goto somewhere
- * r2 == dst_reg, pkt_end == src_reg,
- * r2=pkt(id=n,off=8,r=0)
- * r3=pkt(id=n,off=0,r=0)
- * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
- * so that range of bytes [r3, r3 + 8) is safe to access
+
+ /* LLVM can generate two kind of checks:
+ *
+ * Type 1:
+ *
+ * r2 = r3;
+ * r2 += 8;
+ * if (r2 > pkt_end) goto <handle exception>
+ * <access okay>
+ *
+ * Where:
+ * r2 == dst_reg, pkt_end == src_reg
+ * r2=pkt(id=n,off=8,r=0)
+ * r3=pkt(id=n,off=0,r=0)
+ *
+ * Type 2:
+ *
+ * r2 = r3;
+ * r2 += 8;
+ * if (pkt_end >= r2) goto <access okay>
+ * <handle exception>
+ *
+ * Where:
+ * pkt_end == dst_reg, r2 == src_reg
+ * r2=pkt(id=n,off=8,r=0)
+ * r3=pkt(id=n,off=0,r=0)
+ *
+ * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
+ * so that range of bytes [r3, r3 + 8) is safe to access.
*/
+
for (i = 0; i < MAX_BPF_REG; i++)
if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
regs[i].range = dst_reg->off;
@@ -1638,11 +1809,109 @@ static void find_good_pkt_pointers(struct verifier_env *env,
}
}
-static int check_cond_jmp_op(struct verifier_env *env,
+/* Adjusts the register min/max values in the case that the dst_reg is the
+ * variable register that we are working on, and src_reg is a constant or we're
+ * simply doing a BPF_K check.
+ */
+static void reg_set_min_max(struct bpf_reg_state *true_reg,
+ struct bpf_reg_state *false_reg, u64 val,
+ u8 opcode)
+{
+ switch (opcode) {
+ case BPF_JEQ:
+ /* If this is false then we know nothing Jon Snow, but if it is
+ * true then we know for sure.
+ */
+ true_reg->max_value = true_reg->min_value = val;
+ break;
+ case BPF_JNE:
+ /* If this is true we know nothing Jon Snow, but if it is false
+ * we know the value for sure;
+ */
+ false_reg->max_value = false_reg->min_value = val;
+ break;
+ case BPF_JGT:
+ /* Unsigned comparison, the minimum value is 0. */
+ false_reg->min_value = 0;
+ case BPF_JSGT:
+ /* If this is false then we know the maximum val is val,
+ * otherwise we know the min val is val+1.
+ */
+ false_reg->max_value = val;
+ true_reg->min_value = val + 1;
+ break;
+ case BPF_JGE:
+ /* Unsigned comparison, the minimum value is 0. */
+ false_reg->min_value = 0;
+ case BPF_JSGE:
+ /* If this is false then we know the maximum value is val - 1,
+ * otherwise we know the mimimum value is val.
+ */
+ false_reg->max_value = val - 1;
+ true_reg->min_value = val;
+ break;
+ default:
+ break;
+ }
+
+ check_reg_overflow(false_reg);
+ check_reg_overflow(true_reg);
+}
+
+/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
+ * is the variable reg.
+ */
+static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
+ struct bpf_reg_state *false_reg, u64 val,
+ u8 opcode)
+{
+ switch (opcode) {
+ case BPF_JEQ:
+ /* If this is false then we know nothing Jon Snow, but if it is
+ * true then we know for sure.
+ */
+ true_reg->max_value = true_reg->min_value = val;
+ break;
+ case BPF_JNE:
+ /* If this is true we know nothing Jon Snow, but if it is false
+ * we know the value for sure;
+ */
+ false_reg->max_value = false_reg->min_value = val;
+ break;
+ case BPF_JGT:
+ /* Unsigned comparison, the minimum value is 0. */
+ true_reg->min_value = 0;
+ case BPF_JSGT:
+ /*
+ * If this is false, then the val is <= the register, if it is
+ * true the register <= to the val.
+ */
+ false_reg->min_value = val;
+ true_reg->max_value = val - 1;
+ break;
+ case BPF_JGE:
+ /* Unsigned comparison, the minimum value is 0. */
+ true_reg->min_value = 0;
+ case BPF_JSGE:
+ /* If this is false then constant < register, if it is true then
+ * the register < constant.
+ */
+ false_reg->min_value = val + 1;
+ true_reg->max_value = val;
+ break;
+ default:
+ break;
+ }
+
+ check_reg_overflow(false_reg);
+ check_reg_overflow(true_reg);
+}
+
+static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_insn *insn, int *insn_idx)
{
- struct reg_state *regs = env->cur_state.regs, *dst_reg;
- struct verifier_state *other_branch;
+ struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
+ struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
u8 opcode = BPF_OP(insn->code);
int err;
@@ -1704,7 +1973,24 @@ static int check_cond_jmp_op(struct verifier_env *env,
if (!other_branch)
return -EFAULT;
- /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
+ /* detect if we are comparing against a constant value so we can adjust
+ * our min/max values for our dst register.
+ */
+ if (BPF_SRC(insn->code) == BPF_X) {
+ if (regs[insn->src_reg].type == CONST_IMM)
+ reg_set_min_max(&other_branch->regs[insn->dst_reg],
+ dst_reg, regs[insn->src_reg].imm,
+ opcode);
+ else if (dst_reg->type == CONST_IMM)
+ reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
+ &regs[insn->src_reg], dst_reg->imm,
+ opcode);
+ } else {
+ reg_set_min_max(&other_branch->regs[insn->dst_reg],
+ dst_reg, insn->imm, opcode);
+ }
+
+ /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
if (BPF_SRC(insn->code) == BPF_K &&
insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
@@ -1723,13 +2009,17 @@ static int check_cond_jmp_op(struct verifier_env *env,
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
- find_good_pkt_pointers(env, dst_reg);
+ find_good_pkt_pointers(this_branch, dst_reg);
+ } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
+ dst_reg->type == PTR_TO_PACKET_END &&
+ regs[insn->src_reg].type == PTR_TO_PACKET) {
+ find_good_pkt_pointers(other_branch, &regs[insn->src_reg]);
} else if (is_pointer_value(env, insn->dst_reg)) {
verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
return -EACCES;
}
if (log_level)
- print_verifier_state(&env->cur_state);
+ print_verifier_state(this_branch);
return 0;
}
@@ -1742,9 +2032,9 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
}
/* verify BPF_LD_IMM64 instruction */
-static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *regs = env->cur_state.regs;
int err;
if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -1760,9 +2050,19 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
if (err)
return err;
- if (insn->src_reg == 0)
- /* generic move 64-bit immediate into a register */
+ if (insn->src_reg == 0) {
+ /* generic move 64-bit immediate into a register,
+ * only analyzer needs to collect the ld_imm value.
+ */
+ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+
+ if (!env->analyzer_ops)
+ return 0;
+
+ regs[insn->dst_reg].type = CONST_IMM;
+ regs[insn->dst_reg].imm = imm;
return 0;
+ }
/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
@@ -1799,11 +2099,11 @@ static bool may_access_skb(enum bpf_prog_type type)
* Output:
* R0 - 8/16/32-bit skb data converted to cpu endianness
*/
-static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *regs = env->cur_state.regs;
u8 mode = BPF_MODE(insn->code);
- struct reg_state *reg;
+ struct bpf_reg_state *reg;
int i, err;
if (!may_access_skb(env->prog->type)) {
@@ -1889,7 +2189,7 @@ enum {
BRANCH = 2,
};
-#define STATE_LIST_MARK ((struct verifier_state_list *) -1L)
+#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
static int *insn_stack; /* stack of insns to process */
static int cur_stack; /* current stack index */
@@ -1900,7 +2200,7 @@ static int *insn_state;
* w - next instruction
* e - edge
*/
-static int push_insn(int t, int w, int e, struct verifier_env *env)
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
{
if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
return 0;
@@ -1941,7 +2241,7 @@ static int push_insn(int t, int w, int e, struct verifier_env *env)
/* non-recursive depth-first-search to detect loops in BPF program
* loop == back-edge in directed graph
*/
-static int check_cfg(struct verifier_env *env)
+static int check_cfg(struct bpf_verifier_env *env)
{
struct bpf_insn *insns = env->prog->insnsi;
int insn_cnt = env->prog->len;
@@ -2050,7 +2350,8 @@ err_free:
/* the following conditions reduce the number of explored insns
* from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
*/
-static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
+static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
+ struct bpf_reg_state *cur)
{
if (old->id != cur->id)
return false;
@@ -2125,9 +2426,11 @@ static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
* whereas register type in current state is meaningful, it means that
* the current state will reach 'bpf_exit' instruction safely
*/
-static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
+static bool states_equal(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *old,
+ struct bpf_verifier_state *cur)
{
- struct reg_state *rold, *rcur;
+ struct bpf_reg_state *rold, *rcur;
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
@@ -2137,6 +2440,13 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
if (memcmp(rold, rcur, sizeof(*rold)) == 0)
continue;
+ /* If the ranges were not the same, but everything else was and
+ * we didn't do a variable access into a map then we are a-ok.
+ */
+ if (!env->varlen_map_value_access &&
+ rold->type == rcur->type && rold->imm == rcur->imm)
+ continue;
+
if (rold->type == NOT_INIT ||
(rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT))
continue;
@@ -2167,9 +2477,9 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
* the same, check that stored pointers types
* are the same as well.
* Ex: explored safe path could have stored
- * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8}
+ * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
* but current path has stored:
- * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16}
+ * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
* such verifier states are not equivalent.
* return false to continue verification of this path
*/
@@ -2180,10 +2490,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
return true;
}
-static int is_state_visited(struct verifier_env *env, int insn_idx)
+static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
- struct verifier_state_list *new_sl;
- struct verifier_state_list *sl;
+ struct bpf_verifier_state_list *new_sl;
+ struct bpf_verifier_state_list *sl;
sl = env->explored_states[insn_idx];
if (!sl)
@@ -2193,7 +2503,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
return 0;
while (sl != STATE_LIST_MARK) {
- if (states_equal(&sl->state, &env->cur_state))
+ if (states_equal(env, &sl->state, &env->cur_state))
/* reached equivalent register/stack state,
* prune the search
*/
@@ -2207,7 +2517,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
* it will be rejected. Since there are no loops, we won't be
* seeing this 'insn_idx' instruction again on the way to bpf_exit
*/
- new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER);
+ new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
if (!new_sl)
return -ENOMEM;
@@ -2218,11 +2528,20 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
return 0;
}
-static int do_check(struct verifier_env *env)
+static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
+ int insn_idx, int prev_insn_idx)
+{
+ if (!env->analyzer_ops || !env->analyzer_ops->insn_hook)
+ return 0;
+
+ return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx);
+}
+
+static int do_check(struct bpf_verifier_env *env)
{
- struct verifier_state *state = &env->cur_state;
+ struct bpf_verifier_state *state = &env->cur_state;
struct bpf_insn *insns = env->prog->insnsi;
- struct reg_state *regs = state->regs;
+ struct bpf_reg_state *regs = state->regs;
int insn_cnt = env->prog->len;
int insn_idx, prev_insn_idx = 0;
int insn_processed = 0;
@@ -2230,6 +2549,7 @@ static int do_check(struct verifier_env *env)
init_reg_state(regs);
insn_idx = 0;
+ env->varlen_map_value_access = false;
for (;;) {
struct bpf_insn *insn;
u8 class;
@@ -2276,13 +2596,17 @@ static int do_check(struct verifier_env *env)
print_bpf_insn(insn);
}
+ err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
+ if (err)
+ return err;
+
if (class == BPF_ALU || class == BPF_ALU64) {
err = check_alu_op(env, insn);
if (err)
return err;
} else if (class == BPF_LDX) {
- enum bpf_reg_type src_reg_type;
+ enum bpf_reg_type *prev_src_type, src_reg_type;
/* check for reserved fields is already done */
@@ -2306,21 +2630,25 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
- if (BPF_SIZE(insn->code) != BPF_W) {
+ reset_reg_range_values(regs, insn->dst_reg);
+ if (BPF_SIZE(insn->code) != BPF_W &&
+ BPF_SIZE(insn->code) != BPF_DW) {
insn_idx++;
continue;
}
- if (insn->imm == 0) {
+ prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+ if (*prev_src_type == NOT_INIT) {
/* saw a valid insn
* dst_reg = *(u32 *)(src_reg + off)
- * use reserved 'imm' field to mark this insn
+ * save type to validate intersecting paths
*/
- insn->imm = src_reg_type;
+ *prev_src_type = src_reg_type;
- } else if (src_reg_type != insn->imm &&
+ } else if (src_reg_type != *prev_src_type &&
(src_reg_type == PTR_TO_CTX ||
- insn->imm == PTR_TO_CTX)) {
+ *prev_src_type == PTR_TO_CTX)) {
/* ABuser program is trying to use the same insn
* dst_reg = *(u32*) (src_reg + off)
* with different pointer types:
@@ -2333,7 +2661,7 @@ static int do_check(struct verifier_env *env)
}
} else if (class == BPF_STX) {
- enum bpf_reg_type dst_reg_type;
+ enum bpf_reg_type *prev_dst_type, dst_reg_type;
if (BPF_MODE(insn->code) == BPF_XADD) {
err = check_xadd(env, insn);
@@ -2361,11 +2689,13 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
- if (insn->imm == 0) {
- insn->imm = dst_reg_type;
- } else if (dst_reg_type != insn->imm &&
+ prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+ if (*prev_dst_type == NOT_INIT) {
+ *prev_dst_type = dst_reg_type;
+ } else if (dst_reg_type != *prev_dst_type &&
(dst_reg_type == PTR_TO_CTX ||
- insn->imm == PTR_TO_CTX)) {
+ *prev_dst_type == PTR_TO_CTX)) {
verbose("same insn cannot be used with different pointers\n");
return -EINVAL;
}
@@ -2471,6 +2801,7 @@ process_bpf_exit:
verbose("invalid BPF_LD mode\n");
return -EINVAL;
}
+ reset_reg_range_values(regs, insn->dst_reg);
} else {
verbose("unknown insn class %d\n", class);
return -EINVAL;
@@ -2483,14 +2814,28 @@ process_bpf_exit:
return 0;
}
+static int check_map_prog_compatibility(struct bpf_map *map,
+ struct bpf_prog *prog)
+
+{
+ if (prog->type == BPF_PROG_TYPE_PERF_EVENT &&
+ (map->map_type == BPF_MAP_TYPE_HASH ||
+ map->map_type == BPF_MAP_TYPE_PERCPU_HASH) &&
+ (map->map_flags & BPF_F_NO_PREALLOC)) {
+ verbose("perf_event programs can only use preallocated hash map\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
/* look for pseudo eBPF instructions that access map FDs and
* replace them with actual map pointers
*/
-static int replace_map_fd_with_map_ptr(struct verifier_env *env)
+static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
- int i, j;
+ int i, j, err;
for (i = 0; i < insn_cnt; i++, insn++) {
if (BPF_CLASS(insn->code) == BPF_LDX &&
@@ -2534,6 +2879,12 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
return PTR_ERR(map);
}
+ err = check_map_prog_compatibility(map, env->prog);
+ if (err) {
+ fdput(f);
+ return err;
+ }
+
/* store map pointer inside BPF_LD_IMM64 instruction */
insn[0].imm = (u32) (unsigned long) map;
insn[1].imm = ((u64) (unsigned long) map) >> 32;
@@ -2577,7 +2928,7 @@ next_insn:
}
/* drop refcnt of maps used by the rejected program */
-static void release_maps(struct verifier_env *env)
+static void release_maps(struct bpf_verifier_env *env)
{
int i;
@@ -2586,7 +2937,7 @@ static void release_maps(struct verifier_env *env)
}
/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
-static void convert_pseudo_ld_imm64(struct verifier_env *env)
+static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
@@ -2600,62 +2951,74 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
/* convert load instructions that access fields of 'struct __sk_buff'
* into sequence of instructions that access fields of 'struct sk_buff'
*/
-static int convert_ctx_accesses(struct verifier_env *env)
+static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
- struct bpf_insn *insn = env->prog->insnsi;
- int insn_cnt = env->prog->len;
- struct bpf_insn insn_buf[16];
+ const struct bpf_verifier_ops *ops = env->prog->aux->ops;
+ const int insn_cnt = env->prog->len;
+ struct bpf_insn insn_buf[16], *insn;
struct bpf_prog *new_prog;
enum bpf_access_type type;
- int i;
+ int i, cnt, delta = 0;
- if (!env->prog->aux->ops->convert_ctx_access)
+ if (ops->gen_prologue) {
+ cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
+ env->prog);
+ if (cnt >= ARRAY_SIZE(insn_buf)) {
+ verbose("bpf verifier is misconfigured\n");
+ return -EINVAL;
+ } else if (cnt) {
+ new_prog = bpf_patch_insn_single(env->prog, 0,
+ insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+ env->prog = new_prog;
+ delta += cnt - 1;
+ }
+ }
+
+ if (!ops->convert_ctx_access)
return 0;
- for (i = 0; i < insn_cnt; i++, insn++) {
- u32 insn_delta, cnt;
+ insn = env->prog->insnsi + delta;
- if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
type = BPF_READ;
- else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+ else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_DW))
type = BPF_WRITE;
else
continue;
- if (insn->imm != PTR_TO_CTX) {
- /* clear internal mark */
- insn->imm = 0;
+ if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
continue;
- }
- cnt = env->prog->aux->ops->
- convert_ctx_access(type, insn->dst_reg, insn->src_reg,
- insn->off, insn_buf, env->prog);
+ cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
+ insn->off, insn_buf, env->prog);
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
verbose("bpf verifier is misconfigured\n");
return -EINVAL;
}
- new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt);
+ new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
+ cnt);
if (!new_prog)
return -ENOMEM;
- insn_delta = cnt - 1;
+ delta += cnt - 1;
/* keep walking new program and skip insns we just inserted */
env->prog = new_prog;
- insn = new_prog->insnsi + i + insn_delta;
-
- insn_cnt += insn_delta;
- i += insn_delta;
+ insn = new_prog->insnsi + i + delta;
}
return 0;
}
-static void free_states(struct verifier_env *env)
+static void free_states(struct bpf_verifier_env *env)
{
- struct verifier_state_list *sl, *sln;
+ struct bpf_verifier_state_list *sl, *sln;
int i;
if (!env->explored_states)
@@ -2678,19 +3041,24 @@ static void free_states(struct verifier_env *env)
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
{
char __user *log_ubuf = NULL;
- struct verifier_env *env;
+ struct bpf_verifier_env *env;
int ret = -EINVAL;
if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
return -E2BIG;
- /* 'struct verifier_env' can be global, but since it's not small,
+ /* 'struct bpf_verifier_env' can be global, but since it's not small,
* allocate/free it every time bpf_check() is called
*/
- env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
+ env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
if (!env)
return -ENOMEM;
+ env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+ (*prog)->len);
+ ret = -ENOMEM;
+ if (!env->insn_aux_data)
+ goto err_free_env;
env->prog = *prog;
/* grab the mutex to protect few globals used by verifier */
@@ -2709,12 +3077,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
/* log_* values have to be sane */
if (log_size < 128 || log_size > UINT_MAX >> 8 ||
log_level == 0 || log_ubuf == NULL)
- goto free_env;
+ goto err_unlock;
ret = -ENOMEM;
log_buf = vmalloc(log_size);
if (!log_buf)
- goto free_env;
+ goto err_unlock;
} else {
log_level = 0;
}
@@ -2724,7 +3092,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
goto skip_full_check;
env->explored_states = kcalloc(env->prog->len,
- sizeof(struct verifier_state_list *),
+ sizeof(struct bpf_verifier_state_list *),
GFP_USER);
ret = -ENOMEM;
if (!env->explored_states)
@@ -2783,14 +3151,67 @@ skip_full_check:
free_log_buf:
if (log_level)
vfree(log_buf);
-free_env:
if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
* them now. Otherwise free_bpf_prog_info() will release them.
*/
release_maps(env);
*prog = env->prog;
+err_unlock:
+ mutex_unlock(&bpf_verifier_lock);
+ vfree(env->insn_aux_data);
+err_free_env:
kfree(env);
+ return ret;
+}
+
+int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
+ void *priv)
+{
+ struct bpf_verifier_env *env;
+ int ret;
+
+ env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
+ if (!env)
+ return -ENOMEM;
+
+ env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+ prog->len);
+ ret = -ENOMEM;
+ if (!env->insn_aux_data)
+ goto err_free_env;
+ env->prog = prog;
+ env->analyzer_ops = ops;
+ env->analyzer_priv = priv;
+
+ /* grab the mutex to protect few globals used by verifier */
+ mutex_lock(&bpf_verifier_lock);
+
+ log_level = 0;
+
+ env->explored_states = kcalloc(env->prog->len,
+ sizeof(struct bpf_verifier_state_list *),
+ GFP_KERNEL);
+ ret = -ENOMEM;
+ if (!env->explored_states)
+ goto skip_full_check;
+
+ ret = check_cfg(env);
+ if (ret < 0)
+ goto skip_full_check;
+
+ env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+
+ ret = do_check(env);
+
+skip_full_check:
+ while (pop_stack(env, NULL) >= 0);
+ free_states(env);
+
mutex_unlock(&bpf_verifier_lock);
+ vfree(env->insn_aux_data);
+err_free_env:
+ kfree(env);
return ret;
}
+EXPORT_SYMBOL_GPL(bpf_analyzer);