On Wed, May 09, 2018 at 02:07:02PM -0700, Joe Stringer wrote: > Teach the verifier a little bit about a new type of pointer, a > PTR_TO_SOCKET. This pointer type is accessed from BPF through the > 'struct bpf_sock' structure. > > Signed-off-by: Joe Stringer <j...@wand.net.nz> > --- > include/linux/bpf.h | 19 +++++++++- > include/linux/bpf_verifier.h | 2 ++ > kernel/bpf/verifier.c | 86 > ++++++++++++++++++++++++++++++++++++++------ > net/core/filter.c | 30 +++++++++------- > 4 files changed, 114 insertions(+), 23 deletions(-)
Ack for patches 1-3. In this one few nits: > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index a38e474bf7ee..a03b4b0edcb6 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -136,7 +136,7 @@ enum bpf_arg_type { > /* the following constraints used to prototype bpf_memcmp() and other > * functions that access data on eBPF program stack > */ > - ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map > value) */ > + ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map > value, socket) */ I don't see where in this patch this change happens... > ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */ > ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be > initialized, > * helper function must fill all bytes or clear > @@ -148,6 +148,7 @@ enum bpf_arg_type { > > ARG_PTR_TO_CTX, /* pointer to context */ > ARG_ANYTHING, /* any (initialized) argument is ok */ > + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ > }; > > /* type of values returned from helper functions */ > @@ -155,6 +156,7 @@ enum bpf_return_type { > RET_INTEGER, /* function returns integer */ > RET_VOID, /* function doesn't return anything */ > RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value > or NULL */ > + RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or > NULL */ > }; > > /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF > programs > @@ -205,6 +207,8 @@ enum bpf_reg_type { > PTR_TO_PACKET_META, /* skb->data - meta_len */ > PTR_TO_PACKET, /* reg points to skb->data */ > PTR_TO_PACKET_END, /* skb->data + headlen */ > + PTR_TO_SOCKET, /* reg points to struct bpf_sock */ > + PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ > }; > > /* The information passed from prog-specific *_is_valid_access > @@ -326,6 +330,11 @@ const struct bpf_func_proto > *bpf_get_trace_printk_proto(void); > > typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, > unsigned long off, unsigned long len); > +typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, > + const struct bpf_insn *src, > + struct bpf_insn *dst, > + struct bpf_prog *prog, > + u32 *target_size); > > u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 > meta_size, > void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); > @@ -729,4 +738,12 @@ extern const struct bpf_func_proto > bpf_sock_map_update_proto; > void bpf_user_rnd_init_once(void); > u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); > > +bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, > + struct bpf_insn_access_aux *info); > +u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, > + const struct bpf_insn *si, > + struct bpf_insn *insn_buf, > + struct bpf_prog *prog, > + u32 *target_size); > + > #endif /* _LINUX_BPF_H */ > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h > index a613b52ce939..9dcd87f1d322 100644 > --- a/include/linux/bpf_verifier.h > +++ b/include/linux/bpf_verifier.h > @@ -57,6 +57,8 @@ struct bpf_reg_state { > * offset, so they can share range knowledge. > * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we > * came from, when one is tested for != NULL. > + * For PTR_TO_SOCKET this is used to share which pointers retain the > + * same reference to the socket, to determine proper reference freeing. > */ > u32 id; > /* Ordering of fields matters. See states_equal() */ > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 1b31b805dea4..d38c7c1e9da6 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -80,8 +80,8 @@ static const struct bpf_verifier_ops * const > bpf_verifier_ops[] = { > * (like pointer plus pointer becomes SCALAR_VALUE type) > * > * When verifier sees load or store instructions the type of base register > - * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three > pointer > - * types recognized by check_mem_access() function. > + * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These > are > + * four pointer types recognized by check_mem_access() function. > * > * PTR_TO_MAP_VALUE means that this register is pointing to 'map element > value' > * and the range of [ptr, ptr + map's value_size) is accessible. > @@ -244,6 +244,8 @@ static const char * const reg_type_str[] = { > [PTR_TO_PACKET] = "pkt", > [PTR_TO_PACKET_META] = "pkt_meta", > [PTR_TO_PACKET_END] = "pkt_end", > + [PTR_TO_SOCKET] = "sock", > + [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", > }; > > static void print_liveness(struct bpf_verifier_env *env, > @@ -977,6 +979,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) > case PTR_TO_PACKET_META: > case PTR_TO_PACKET_END: > case CONST_PTR_TO_MAP: > + case PTR_TO_SOCKET: > + case PTR_TO_SOCKET_OR_NULL: > return true; > default: > return false; > @@ -1360,6 +1364,28 @@ static int check_ctx_access(struct bpf_verifier_env > *env, int insn_idx, int off, > return -EACCES; > } > > +static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int > off, > + int size, enum bpf_access_type t) > +{ > + struct bpf_reg_state *regs = cur_regs(env); > + struct bpf_reg_state *reg = ®s[regno]; > + struct bpf_insn_access_aux info; > + > + if (reg->smin_value < 0) { > + verbose(env, "R%d min value is negative, either use unsigned > index or do a if (index >=0) check.\n", > + regno); > + return -EACCES; > + } > + > + if (!bpf_sock_is_valid_access(off, size, t, &info)) { > + verbose(env, "invalid bpf_sock_ops access off=%d size=%d\n", > + off, size); > + return -EACCES; > + } > + > + return 0; > +} > + > static bool __is_pointer_value(bool allow_ptr_leaks, > const struct bpf_reg_state *reg) > { > @@ -1475,6 +1501,9 @@ static int check_ptr_alignment(struct bpf_verifier_env > *env, > */ > strict = true; > break; > + case PTR_TO_SOCKET: > + pointer_desc = "sock "; > + break; > default: > break; > } > @@ -1723,6 +1752,16 @@ static int check_mem_access(struct bpf_verifier_env > *env, int insn_idx, u32 regn > err = check_packet_access(env, regno, off, size, false); > if (!err && t == BPF_READ && value_regno >= 0) > mark_reg_unknown(env, regs, value_regno); > + > + } else if (reg->type == PTR_TO_SOCKET) { > + if (t == BPF_WRITE) { > + verbose(env, "cannot write into socket\n"); > + return -EACCES; > + } > + err = check_sock_access(env, regno, off, size, t); > + if (!err && t == BPF_READ && value_regno >= 0) t == BPF_READ check is unnecessary. > + mark_reg_unknown(env, regs, value_regno); > + > } else { > verbose(env, "R%d invalid mem access '%s'\n", regno, > reg_type_str[reg->type]); > @@ -1941,6 +1980,10 @@ static int check_func_arg(struct bpf_verifier_env > *env, u32 regno, > expected_type = PTR_TO_CTX; > if (type != expected_type) > goto err_type; > + } else if (arg_type == ARG_PTR_TO_SOCKET) { > + expected_type = PTR_TO_SOCKET; > + if (type != expected_type) > + goto err_type; > } else if (arg_type_is_mem_ptr(arg_type)) { > expected_type = PTR_TO_STACK; > /* One exception here. In case function allows for NULL to be > @@ -2477,6 +2520,10 @@ static int check_helper_call(struct bpf_verifier_env > *env, int func_id, int insn > insn_aux->map_ptr = meta.map_ptr; > else if (insn_aux->map_ptr != meta.map_ptr) > insn_aux->map_ptr = BPF_MAP_PTR_POISON; > + } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { > + mark_reg_known_zero(env, regs, BPF_REG_0); > + regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; > + regs[BPF_REG_0].id = ++env->id_gen; > } else { > verbose(env, "unknown return type %d of func %s#%d\n", > fn->ret_type, func_id_name(func_id), func_id); > @@ -2614,6 +2661,8 @@ static int adjust_ptr_min_max_vals(struct > bpf_verifier_env *env, > return -EACCES; > case CONST_PTR_TO_MAP: > case PTR_TO_PACKET_END: > + case PTR_TO_SOCKET: > + case PTR_TO_SOCKET_OR_NULL: > verbose(env, "R%d pointer arithmetic on %s prohibited\n", > dst, reg_type_str[ptr_reg->type]); > return -EACCES; > @@ -3559,6 +3608,8 @@ static void mark_ptr_or_null_reg(struct bpf_reg_state > *reg, u32 id, > } else { > reg->type = PTR_TO_MAP_VALUE; > } > + } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { > + reg->type = PTR_TO_SOCKET; > } > /* We don't need id from this point onwards anymore, thus we > * should better reset it, so that state pruning has chances > @@ -4333,6 +4384,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct > bpf_reg_state *rcur, > case PTR_TO_CTX: > case CONST_PTR_TO_MAP: > case PTR_TO_PACKET_END: > + case PTR_TO_SOCKET: > + case PTR_TO_SOCKET_OR_NULL: > /* Only valid matches are exact, which memcmp() above > * would have accepted > */ > @@ -5188,10 +5241,14 @@ static void sanitize_dead_code(struct > bpf_verifier_env *env) > } > } > > -/* convert load instructions that access fields of 'struct __sk_buff' > - * into sequence of instructions that access fields of 'struct sk_buff' > +/* convert load instructions that access fields of a context type into a > + * sequence of instructions that access fields of the underlying structure: > + * struct __sk_buff -> struct sk_buff > + * struct bpf_sock_ops -> struct sock > */ > -static int convert_ctx_accesses(struct bpf_verifier_env *env) > +static int convert_ctx_accesses(struct bpf_verifier_env *env, > + bpf_convert_ctx_access_t convert_ctx_access, > + enum bpf_reg_type ctx_type) > { > const struct bpf_verifier_ops *ops = env->ops; > int i, cnt, size, ctx_field_size, delta = 0; > @@ -5218,12 +5275,14 @@ static int convert_ctx_accesses(struct > bpf_verifier_env *env) > } > } > > - if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux)) > + if (!convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux)) > return 0; > > insn = env->prog->insnsi + delta; > > for (i = 0; i < insn_cnt; i++, insn++) { > + enum bpf_reg_type ptr_type; > + > if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || > insn->code == (BPF_LDX | BPF_MEM | BPF_H) || > insn->code == (BPF_LDX | BPF_MEM | BPF_W) || > @@ -5237,7 +5296,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env > *env) > else > continue; > > - if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) > + ptr_type = env->insn_aux_data[i + delta].ptr_type; > + if (ptr_type != ctx_type) > continue; > > ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; > @@ -5269,8 +5329,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env > *env) > } > > target_size = 0; > - cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog, > - &target_size); > + cnt = convert_ctx_access(type, insn, insn_buf, env->prog, > + &target_size); > if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || > (ctx_field_size && !target_size)) { > verbose(env, "bpf verifier is misconfigured\n"); > @@ -5785,7 +5845,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr > *attr) > > if (ret == 0) > /* program is valid, convert *(u32*)(ctx + off) accesses */ > - ret = convert_ctx_accesses(env); > + ret = convert_ctx_accesses(env, env->ops->convert_ctx_access, > + PTR_TO_CTX); > + > + if (ret == 0) > + /* Convert *(u32*)(sock_ops + off) accesses */ > + ret = convert_ctx_accesses(env, bpf_sock_convert_ctx_access, > + PTR_TO_SOCKET); Overall looks great. Only this part is missing for PTR_TO_SOCKET: } else if (dst_reg_type != *prev_dst_type && (dst_reg_type == PTR_TO_CTX || *prev_dst_type == PTR_TO_CTX)) { verbose(env, "same insn cannot be used with different pointers\n"); return -EINVAL; similar logic has to be added. Otherwise the following will be accepted: R1 = sock_ptr goto X; ... R1 = some_other_valid_ptr; goto X; ... R2 = *(u32 *)(R1 + 0); this will be rewritten for first branch, but it's wrong for second.