By storing a subprogno in each insn's aux data, we avoid the need to keep the list of subprog starts sorted or bsearch() it in find_subprog(). Also, get rid of the weird one-based indexing of subprog numbers.
Signed-off-by: Edward Cree <ec...@solarflare.com> --- include/linux/bpf_verifier.h | 3 +- kernel/bpf/verifier.c | 284 ++++++++++++++++++++++++++----------------- 2 files changed, 177 insertions(+), 110 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8f70dc181e23..17990dd56e65 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -146,6 +146,7 @@ struct bpf_insn_aux_data { s32 call_imm; /* saved imm field of call insn */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ + u16 subprogno; /* subprog in which this insn resides */ bool seen; /* this insn was processed by the verifier */ }; @@ -196,7 +197,7 @@ struct bpf_verifier_env { bool seen_direct_write; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ struct bpf_verifier_log log; - struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; + struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS]; u32 subprog_cnt; }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index df4d742360d9..587eb445bfa2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -736,109 +736,171 @@ enum reg_arg_type { DST_OP_NO_MARK /* same as above, check only, don't mark */ }; -static int cmp_subprogs(const void *a, const void *b) +static int find_subprog(struct bpf_verifier_env *env, int insn_idx) { - return ((struct bpf_subprog_info *)a)->start - - ((struct bpf_subprog_info *)b)->start; -} - -static int find_subprog(struct bpf_verifier_env *env, int off) -{ - struct bpf_subprog_info *p; - - p = bsearch(&off, env->subprog_info, env->subprog_cnt, - sizeof(env->subprog_info[0]), cmp_subprogs); - if (!p) - return -ENOENT; - return p - env->subprog_info; + int insn_cnt = env->prog->len; + u32 subprogno; + if (insn_idx >= insn_cnt || insn_idx < 0) { + verbose(env, "find_subprog of invalid insn_idx %d\n", insn_idx); + return -EINVAL; + } + subprogno = env->insn_aux_data[insn_idx].subprogno; + /* validate that we are at start of subprog */ + if (env->subprog_info[subprogno].start != insn_idx) { + verbose(env, "insn_idx %d is in subprog %u but that starts at %d\n", + insn_idx, subprogno, env->subprog_info[subprogno].start); + return -EINVAL; + } + return subprogno; } static int add_subprog(struct bpf_verifier_env *env, int off) { int insn_cnt = env->prog->len; + struct bpf_subprog_info *info; int ret; if (off >= insn_cnt || off < 0) { verbose(env, "call to invalid destination\n"); return -EINVAL; } - ret = find_subprog(env, off); - if (ret >= 0) - return 0; - if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { - verbose(env, "too many subprograms\n"); - return -E2BIG; + ret = env->insn_aux_data[off].subprogno; + /* At the time add_subprog is called, only (some) entry points are + * marked with an aux->subprogno; 'body' insns aren't. Thus, a + * subprogno of 0 means either "not yet marked as an entry point" or + * "entry point of main(), i.e. insn 0". + * However, a call to insn 0 is never legal (it would necessarily imply + * recursion, which check_cfg will catch), therefore we can assume that + * we will only be called with off == 0 once, and in that case we + * should go ahead and add the subprog entry. + */ + if (!ret) { + if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { + verbose(env, "too many subprograms\n"); + return -E2BIG; + } + ret = env->subprog_cnt++; + info = &env->subprog_info[ret]; + info->start = off; + info->stack_depth = 0; + env->insn_aux_data[off].subprogno = ret; } - env->subprog_info[env->subprog_cnt++].start = off; - sort(env->subprog_info, env->subprog_cnt, - sizeof(env->subprog_info[0]), cmp_subprogs, NULL); - return 0; + return ret; } static int check_subprogs(struct bpf_verifier_env *env) { - int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; - struct bpf_insn *insn = env->prog->insnsi; - int insn_cnt = env->prog->len; + struct bpf_insn_aux_data *aux = env->insn_aux_data; + struct bpf_insn *insns = env->prog->insnsi; + int insn_cnt = env->prog->len, i, err; + int cur_subprog; - /* determine subprog starts. The end is one before the next starts */ - for (i = 0; i < insn_cnt; i++) { - if (insn[i].code != (BPF_JMP | BPF_CALL)) - continue; - if (insn[i].src_reg != BPF_PSEUDO_CALL) - continue; - if (!env->allow_ptr_leaks) { - verbose(env, "function calls to other bpf functions are allowed for root only\n"); - return -EPERM; - } - if (bpf_prog_is_dev_bound(env->prog->aux)) { - verbose(env, "function calls in offloaded programs are not supported yet\n"); - return -EINVAL; + /* Find subprog starts */ + err = add_subprog(env, 0); /* subprog 0, main(), starts at insn 0 */ + if (err < 0) + return err; + for (i = 0; i < insn_cnt; i++) + if (insns[i].code == (BPF_JMP | BPF_CALL) && + insns[i].src_reg == BPF_PSEUDO_CALL) { + if (!env->allow_ptr_leaks) { + verbose(env, "function calls to other bpf functions are allowed for root only\n"); + return -EPERM; + } + if (bpf_prog_is_dev_bound(env->prog->aux)) { + verbose(env, "function calls in offloaded programs are not supported yet\n"); + return -EINVAL; + } + /* add_subprog marks the callee entry point with the + * new subprogno. + */ + err = add_subprog(env, i + insns[i].imm + 1); + if (err < 0) + return err; } - ret = add_subprog(env, i + insn[i].imm + 1); - if (ret < 0) - return ret; - } if (env->log.level > 1) for (i = 0; i < env->subprog_cnt; i++) verbose(env, "func#%d @%d\n", i, env->subprog_info[i].start); - /* now check that all jumps are within the same subprog */ - subprog_start = 0; - if (env->subprog_cnt == cur_subprog) - subprog_end = insn_cnt; - else - subprog_end = env->subprog_info[cur_subprog++].start; + /* Fill in subprogno on each insn of function body, on the assumption + * that each subprog is a contiguous block of insns. This will be + * validated by the next step + */ + cur_subprog = 0; + for (i = 0; i < insn_cnt; i++) + if (aux[i].subprogno) + cur_subprog = aux[i].subprogno; + else + aux[i].subprogno = cur_subprog; + + /* Check that control never flows from one subprog to another except by + * a pseudo-call insn. + */ for (i = 0; i < insn_cnt; i++) { - u8 code = insn[i].code; - - if (BPF_CLASS(code) != BPF_JMP) - goto next; - if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) - goto next; - off = i + insn[i].off + 1; - if (off < subprog_start || off >= subprog_end) { - verbose(env, "jump out of range from insn %d to %d\n", i, off); - return -EINVAL; + bool fallthrough = false, jump = false; + u8 opcode = BPF_OP(insns[i].code); + int target; + + /* Determine where control flow from this insn goes */ + if (BPF_CLASS(insns[i].code) != BPF_JMP) { + fallthrough = true; + } else { + switch (opcode) { + case BPF_EXIT: + /* This insn doesn't go anywhere. If we're in + * a subprog, then the return target is handled + * as a fallthrough on the call insn, so no need + * to worry about it here. + */ + continue; + case BPF_CALL: + /* If pseudo-call, already handled marking the + * callee. Both kinds of call will eventually + * return and pass to the following insn. + */ + fallthrough = true; + break; + case BPF_JA: + /* unconditional jump; mark target */ + jump = true; + target = i + insns[i].off + 1; + break; + default: + /* conditional jump; branch both ways */ + fallthrough = true; + jump = true; + target = i + insns[i].off + 1; + break; + } } -next: - if (i == subprog_end - 1) { - /* to avoid fall-through from one subprog into another - * the last insn of the subprog should be either exit - * or unconditional jump back - */ - if (code != (BPF_JMP | BPF_EXIT) && - code != (BPF_JMP | BPF_JA)) { - verbose(env, "last insn is not an exit or jmp\n"); + + /* Check that that control flow doesn't leave the subprog */ + cur_subprog = aux[i].subprogno; + if (fallthrough) { + if (i + 1 >= insn_cnt) { + verbose(env, "no exit/jump at end of program (insn %d)\n", + i); + return -EINVAL; + } + if (aux[i + 1].subprogno != cur_subprog) { + verbose(env, "no exit/jump at end of subprog %d (insn %d)\n", + cur_subprog, i); + return -EINVAL; + } + } + if (jump) { + if (target < 0 || target >= insn_cnt) { + verbose(env, "jump out of range from insn %d to %d\n", + i, target); + return -EINVAL; + } + if (aux[target].subprogno != cur_subprog) { + verbose(env, "jump from insn %d to %d crosses from subprog %d to %d\n", + i, target, cur_subprog, + aux[target].subprogno); return -EINVAL; } - subprog_start = subprog_end; - if (env->subprog_cnt == cur_subprog) - subprog_end = insn_cnt; - else - subprog_end = env->subprog_info[cur_subprog++].start; } } return 0; @@ -1489,7 +1551,8 @@ static int update_stack_depth(struct bpf_verifier_env *env, */ static int check_max_stack_depth(struct bpf_verifier_env *env) { - int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end; + int depth = 0, frame = 0, subprog = 0, i = 0; + struct bpf_insn_aux_data *aux = env->insn_aux_data; struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; int ret_insn[MAX_CALL_FRAMES]; @@ -1507,11 +1570,7 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) return -EACCES; } continue_func: - if (env->subprog_cnt == subprog) - subprog_end = insn_cnt; - else - subprog_end = env->subprog_info[subprog].start; - for (; i < subprog_end; i++) { + for (; i < insn_cnt && aux[i].subprogno == subprog; i++) { if (insn[i].code != (BPF_JMP | BPF_CALL)) continue; if (insn[i].src_reg != BPF_PSEUDO_CALL) @@ -1528,7 +1587,6 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) i); return -EFAULT; } - subprog++; frame++; if (frame >= MAX_CALL_FRAMES) { WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); @@ -1561,7 +1619,6 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env, start); return -EFAULT; } - subprog++; return env->subprog_info[subprog].stack_depth; } #endif @@ -2100,7 +2157,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_tail_call: if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) goto error; - if (env->subprog_cnt) { + if (env->subprog_cnt > 1) { verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); return -EINVAL; } @@ -2245,6 +2302,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } target_insn = *insn_idx + insn->imm; + /* We will increment insn_idx (PC) in do_check() after handling this + * call insn, so the actual start of the function is target + 1. + */ subprog = find_subprog(env, target_insn + 1); if (subprog < 0) { verbose(env, "verifier bug. No program starts at insn %d\n", @@ -2272,7 +2332,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, /* remember the callsite, it will be used by bpf_exit */ *insn_idx /* callsite */, state->curframe + 1 /* frameno within this callchain */, - subprog + 1 /* subprog number within this prog */); + subprog /* subprog number within this prog */); /* copy r1 - r5 args that callee can access */ for (i = BPF_REG_1; i <= BPF_REG_5; i++) @@ -3831,7 +3891,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } - if (env->subprog_cnt) { + if (env->subprog_cnt > 1) { /* when program has LD_ABS insn JITs and interpreter assume * that r1 == ctx == skb which is not the case for callees * that can have arbitrary arguments. It's problematic @@ -4020,10 +4080,6 @@ static int check_cfg(struct bpf_verifier_env *env) int ret = 0; int i, t; - ret = check_subprogs(env); - if (ret < 0) - return ret; - insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); if (!insn_state) return -ENOMEM; @@ -4862,11 +4918,11 @@ static int do_check(struct bpf_verifier_env *env) verbose(env, "processed %d insns (limit %d), stack depth ", insn_processed, BPF_COMPLEXITY_LIMIT_INSNS); - for (i = 0; i < env->subprog_cnt + 1; i++) { + for (i = 0; i < env->subprog_cnt; i++) { u32 depth = env->subprog_info[i].stack_depth; verbose(env, "%d", depth); - if (i + 1 < env->subprog_cnt + 1) + if (i + 1 < env->subprog_cnt) verbose(env, "+"); } verbose(env, "\n"); @@ -5238,12 +5294,12 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) static int jit_subprogs(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog, **func, *tmp; - int i, j, subprog_start, subprog_end = 0, len, subprog; struct bpf_insn *insn; void *old_bpf_func; + int i, j, subprog; int err = -ENOMEM; - if (env->subprog_cnt == 0) + if (env->subprog_cnt <= 1) return 0; for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { @@ -5259,7 +5315,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) /* temporarily remember subprog id inside insn instead of * aux_data, since next loop will split up all insns into funcs */ - insn->off = subprog + 1; + insn->off = subprog; /* remember original imm in case JIT fails and fallback * to interpreter will be needed */ @@ -5268,23 +5324,24 @@ static int jit_subprogs(struct bpf_verifier_env *env) insn->imm = 1; } - func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL); + func = kzalloc(sizeof(prog) * env->subprog_cnt, GFP_KERNEL); if (!func) return -ENOMEM; - for (i = 0; i <= env->subprog_cnt; i++) { - subprog_start = subprog_end; - if (env->subprog_cnt == i) - subprog_end = prog->len; - else - subprog_end = env->subprog_info[i].start; + for (i = 0; i < env->subprog_cnt; i++) { + struct bpf_subprog_info *info = &env->subprog_info[i]; + int len, end = prog->len; + + if (i + 1 < env->subprog_cnt) + end = info[1].start; + len = end - info->start; - len = subprog_end - subprog_start; func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); if (!func[i]) goto out_free; - memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], + memcpy(func[i]->insnsi, prog->insnsi + info->start, len * sizeof(struct bpf_insn)); + func[i]->type = prog->type; func[i]->len = len; if (bpf_prog_calc_tag(func[i])) @@ -5307,7 +5364,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) * now populate all bpf_calls with correct addresses and * run last pass of JIT */ - for (i = 0; i <= env->subprog_cnt; i++) { + for (i = 0; i < env->subprog_cnt; i++) { insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { if (insn->code != (BPF_JMP | BPF_CALL) || @@ -5315,12 +5372,16 @@ static int jit_subprogs(struct bpf_verifier_env *env) continue; subprog = insn->off; insn->off = 0; + if (subprog < 0 || subprog >= env->subprog_cnt) { + err = -EFAULT; + goto out_free; + } insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) func[subprog]->bpf_func - __bpf_call_base; } } - for (i = 0; i <= env->subprog_cnt; i++) { + for (i = 0; i < env->subprog_cnt; i++) { old_bpf_func = func[i]->bpf_func; tmp = bpf_int_jit_compile(func[i]); if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { @@ -5334,7 +5395,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) /* finally lock prog and jit images for all functions and * populate kallsysm */ - for (i = 0; i <= env->subprog_cnt; i++) { + for (i = 0; i < env->subprog_cnt; i++) { bpf_prog_lock_ro(func[i]); bpf_prog_kallsyms_add(func[i]); } @@ -5351,7 +5412,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) continue; insn->off = env->insn_aux_data[i].call_imm; subprog = find_subprog(env, i + insn->off + 1); - addr = (unsigned long)func[subprog + 1]->bpf_func; + addr = (unsigned long)func[subprog]->bpf_func; addr &= PAGE_MASK; insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) addr - __bpf_call_base; @@ -5360,10 +5421,10 @@ static int jit_subprogs(struct bpf_verifier_env *env) prog->jited = 1; prog->bpf_func = func[0]->bpf_func; prog->aux->func = func; - prog->aux->func_cnt = env->subprog_cnt + 1; + prog->aux->func_cnt = env->subprog_cnt; return 0; out_free: - for (i = 0; i <= env->subprog_cnt; i++) + for (i = 0; i < env->subprog_cnt; i++) if (func[i]) bpf_jit_free(func[i]); kfree(func); @@ -5393,6 +5454,7 @@ static int fixup_call_args(struct bpf_verifier_env *env) err = jit_subprogs(env); if (err == 0) return 0; + verbose(env, "failed to jit_subprogs %d\n", err); } #ifndef CONFIG_BPF_JIT_ALWAYS_ON for (i = 0; i < prog->len; i++, insn++) { @@ -5682,6 +5744,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + ret = check_subprogs(env); + if (ret < 0) + goto skip_full_check; + ret = check_cfg(env); if (ret < 0) goto skip_full_check;