verifier: rewrite subprog boundary detection

Edward Cree Fri, 06 Apr 2018 10:15:01 -0700

By storing a subprogno in each insn's aux data, we avoid the need to keep
 the list of subprog starts sorted or bsearch() it in find_subprog().
Also, get rid of the weird one-based indexing of subprog numbers.


Signed-off-by: Edward Cree <ec...@solarflare.com>
---
 include/linux/bpf_verifier.h |   3 +-
 kernel/bpf/verifier.c        | 284 ++++++++++++++++++++++++++-----------------
 2 files changed, 177 insertions(+), 110 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 8f70dc181e23..17990dd56e65 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -146,6 +146,7 @@ struct bpf_insn_aux_data {
                s32 call_imm;                   /* saved imm field of call insn 
*/
        };
        int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
+       u16 subprogno; /* subprog in which this insn resides */
        bool seen; /* this insn was processed by the verifier */
 };
 
@@ -196,7 +197,7 @@ struct bpf_verifier_env {
        bool seen_direct_write;
        struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
        struct bpf_verifier_log log;
-       struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
+       struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS];
        u32 subprog_cnt;
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index df4d742360d9..587eb445bfa2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -736,109 +736,171 @@ enum reg_arg_type {
        DST_OP_NO_MARK  /* same as above, check only, don't mark */
 };
 
-static int cmp_subprogs(const void *a, const void *b)
+static int find_subprog(struct bpf_verifier_env *env, int insn_idx)
 {
-       return ((struct bpf_subprog_info *)a)->start -
-              ((struct bpf_subprog_info *)b)->start;
-}
-
-static int find_subprog(struct bpf_verifier_env *env, int off)
-{
-       struct bpf_subprog_info *p;
-
-       p = bsearch(&off, env->subprog_info, env->subprog_cnt,
-                   sizeof(env->subprog_info[0]), cmp_subprogs);
-       if (!p)
-               return -ENOENT;
-       return p - env->subprog_info;
+       int insn_cnt = env->prog->len;
+       u32 subprogno;
 
+       if (insn_idx >= insn_cnt || insn_idx < 0) {
+               verbose(env, "find_subprog of invalid insn_idx %d\n", insn_idx);
+               return -EINVAL;
+       }
+       subprogno = env->insn_aux_data[insn_idx].subprogno;
+       /* validate that we are at start of subprog */
+       if (env->subprog_info[subprogno].start != insn_idx) {
+               verbose(env, "insn_idx %d is in subprog %u but that starts at 
%d\n",
+                       insn_idx, subprogno, 
env->subprog_info[subprogno].start);
+               return -EINVAL;
+       }
+       return subprogno;
 }
 
 static int add_subprog(struct bpf_verifier_env *env, int off)
 {
        int insn_cnt = env->prog->len;
+       struct bpf_subprog_info *info;
        int ret;
 
        if (off >= insn_cnt || off < 0) {
                verbose(env, "call to invalid destination\n");
                return -EINVAL;
        }
-       ret = find_subprog(env, off);
-       if (ret >= 0)
-               return 0;
-       if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
-               verbose(env, "too many subprograms\n");
-               return -E2BIG;
+       ret = env->insn_aux_data[off].subprogno;
+       /* At the time add_subprog is called, only (some) entry points are
+        * marked with an aux->subprogno; 'body' insns aren't.  Thus, a
+        * subprogno of 0 means either "not yet marked as an entry point" or
+        * "entry point of main(), i.e. insn 0".
+        * However, a call to insn 0 is never legal (it would necessarily imply
+        * recursion, which check_cfg will catch), therefore we can assume that
+        * we will only be called with off == 0 once, and in that case we
+        * should go ahead and add the subprog entry.
+        */
+       if (!ret) {
+               if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
+                       verbose(env, "too many subprograms\n");
+                       return -E2BIG;
+               }
+               ret = env->subprog_cnt++;
+               info = &env->subprog_info[ret];
+               info->start = off;
+               info->stack_depth = 0;
+               env->insn_aux_data[off].subprogno = ret;
        }
-       env->subprog_info[env->subprog_cnt++].start = off;
-       sort(env->subprog_info, env->subprog_cnt,
-            sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
-       return 0;
+       return ret;
 }
 
 static int check_subprogs(struct bpf_verifier_env *env)
 {
-       int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
-       struct bpf_insn *insn = env->prog->insnsi;
-       int insn_cnt = env->prog->len;
+       struct bpf_insn_aux_data *aux = env->insn_aux_data;
+       struct bpf_insn *insns = env->prog->insnsi;
+       int insn_cnt = env->prog->len, i, err;
+       int cur_subprog;
 
-       /* determine subprog starts. The end is one before the next starts */
-       for (i = 0; i < insn_cnt; i++) {
-               if (insn[i].code != (BPF_JMP | BPF_CALL))
-                       continue;
-               if (insn[i].src_reg != BPF_PSEUDO_CALL)
-                       continue;
-               if (!env->allow_ptr_leaks) {
-                       verbose(env, "function calls to other bpf functions are 
allowed for root only\n");
-                       return -EPERM;
-               }
-               if (bpf_prog_is_dev_bound(env->prog->aux)) {
-                       verbose(env, "function calls in offloaded programs are 
not supported yet\n");
-                       return -EINVAL;
+       /* Find subprog starts */
+       err = add_subprog(env, 0); /* subprog 0, main(), starts at insn 0 */
+       if (err < 0)
+               return err;
+       for (i = 0; i < insn_cnt; i++)
+               if (insns[i].code == (BPF_JMP | BPF_CALL) &&
+                   insns[i].src_reg == BPF_PSEUDO_CALL) {
+                       if (!env->allow_ptr_leaks) {
+                               verbose(env, "function calls to other bpf 
functions are allowed for root only\n");
+                               return -EPERM;
+                       }
+                       if (bpf_prog_is_dev_bound(env->prog->aux)) {
+                               verbose(env, "function calls in offloaded 
programs are not supported yet\n");
+                               return -EINVAL;
+                       }
+                       /* add_subprog marks the callee entry point with the
+                        * new subprogno.
+                        */
+                       err = add_subprog(env, i + insns[i].imm + 1);
+                       if (err < 0)
+                               return err;
                }
-               ret = add_subprog(env, i + insn[i].imm + 1);
-               if (ret < 0)
-                       return ret;
-       }
 
        if (env->log.level > 1)
                for (i = 0; i < env->subprog_cnt; i++)
                        verbose(env, "func#%d @%d\n", i, 
env->subprog_info[i].start);
 
-       /* now check that all jumps are within the same subprog */
-       subprog_start = 0;
-       if (env->subprog_cnt == cur_subprog)
-               subprog_end = insn_cnt;
-       else
-               subprog_end = env->subprog_info[cur_subprog++].start;
+       /* Fill in subprogno on each insn of function body, on the assumption
+        * that each subprog is a contiguous block of insns.  This will be
+        * validated by the next step
+        */
+       cur_subprog = 0;
+       for (i = 0; i < insn_cnt; i++)
+               if (aux[i].subprogno)
+                       cur_subprog = aux[i].subprogno;
+               else
+                       aux[i].subprogno = cur_subprog;
+
+       /* Check that control never flows from one subprog to another except by
+        * a pseudo-call insn.
+        */
        for (i = 0; i < insn_cnt; i++) {
-               u8 code = insn[i].code;
-
-               if (BPF_CLASS(code) != BPF_JMP)
-                       goto next;
-               if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
-                       goto next;
-               off = i + insn[i].off + 1;
-               if (off < subprog_start || off >= subprog_end) {
-                       verbose(env, "jump out of range from insn %d to %d\n", 
i, off);
-                       return -EINVAL;
+               bool fallthrough = false, jump = false;
+               u8 opcode = BPF_OP(insns[i].code);
+               int target;
+
+               /* Determine where control flow from this insn goes */
+               if (BPF_CLASS(insns[i].code) != BPF_JMP) {
+                       fallthrough = true;
+               } else {
+                       switch (opcode) {
+                       case BPF_EXIT:
+                               /* This insn doesn't go anywhere.  If we're in
+                                * a subprog, then the return target is handled
+                                * as a fallthrough on the call insn, so no need
+                                * to worry about it here.
+                                */
+                               continue;
+                       case BPF_CALL:
+                               /* If pseudo-call, already handled marking the
+                                * callee.  Both kinds of call will eventually
+                                * return and pass to the following insn.
+                                */
+                               fallthrough = true;
+                               break;
+                       case BPF_JA:
+                               /* unconditional jump; mark target */
+                               jump = true;
+                               target = i + insns[i].off + 1;
+                               break;
+                       default:
+                               /* conditional jump; branch both ways */
+                               fallthrough = true;
+                               jump = true;
+                               target = i + insns[i].off + 1;
+                               break;
+                       }
                }
-next:
-               if (i == subprog_end - 1) {
-                       /* to avoid fall-through from one subprog into another
-                        * the last insn of the subprog should be either exit
-                        * or unconditional jump back
-                        */
-                       if (code != (BPF_JMP | BPF_EXIT) &&
-                           code != (BPF_JMP | BPF_JA)) {
-                               verbose(env, "last insn is not an exit or 
jmp\n");
+
+               /* Check that that control flow doesn't leave the subprog */
+               cur_subprog = aux[i].subprogno;
+               if (fallthrough) {
+                       if (i + 1 >= insn_cnt) {
+                               verbose(env, "no exit/jump at end of program 
(insn %d)\n",
+                                       i);
+                               return -EINVAL;
+                       }
+                       if (aux[i + 1].subprogno != cur_subprog) {
+                               verbose(env, "no exit/jump at end of subprog %d 
(insn %d)\n",
+                                       cur_subprog, i);
+                               return -EINVAL;
+                       }
+               }
+               if (jump) {
+                       if (target < 0 || target >= insn_cnt) {
+                               verbose(env, "jump out of range from insn %d to 
%d\n",
+                                       i, target);
+                               return -EINVAL;
+                       }
+                       if (aux[target].subprogno != cur_subprog) {
+                               verbose(env, "jump from insn %d to %d crosses 
from subprog %d to %d\n",
+                                       i, target, cur_subprog,
+                                       aux[target].subprogno);
                                return -EINVAL;
                        }
-                       subprog_start = subprog_end;
-                       if (env->subprog_cnt == cur_subprog)
-                               subprog_end = insn_cnt;
-                       else
-                               subprog_end = 
env->subprog_info[cur_subprog++].start;
                }
        }
        return 0;
@@ -1489,7 +1551,8 @@ static int update_stack_depth(struct bpf_verifier_env 
*env,
  */
 static int check_max_stack_depth(struct bpf_verifier_env *env)
 {
-       int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end;
+       int depth = 0, frame = 0, subprog = 0, i = 0;
+       struct bpf_insn_aux_data *aux = env->insn_aux_data;
        struct bpf_insn *insn = env->prog->insnsi;
        int insn_cnt = env->prog->len;
        int ret_insn[MAX_CALL_FRAMES];
@@ -1507,11 +1570,7 @@ static int check_max_stack_depth(struct bpf_verifier_env 
*env)
                return -EACCES;
        }
 continue_func:
-       if (env->subprog_cnt == subprog)
-               subprog_end = insn_cnt;
-       else
-               subprog_end = env->subprog_info[subprog].start;
-       for (; i < subprog_end; i++) {
+       for (; i < insn_cnt && aux[i].subprogno == subprog; i++) {
                if (insn[i].code != (BPF_JMP | BPF_CALL))
                        continue;
                if (insn[i].src_reg != BPF_PSEUDO_CALL)
@@ -1528,7 +1587,6 @@ static int check_max_stack_depth(struct bpf_verifier_env 
*env)
                                  i);
                        return -EFAULT;
                }
-               subprog++;
                frame++;
                if (frame >= MAX_CALL_FRAMES) {
                        WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
@@ -1561,7 +1619,6 @@ static int get_callee_stack_depth(struct bpf_verifier_env 
*env,
                          start);
                return -EFAULT;
        }
-       subprog++;
        return env->subprog_info[subprog].stack_depth;
 }
 #endif
@@ -2100,7 +2157,7 @@ static int check_map_func_compatibility(struct 
bpf_verifier_env *env,
        case BPF_FUNC_tail_call:
                if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
                        goto error;
-               if (env->subprog_cnt) {
+               if (env->subprog_cnt > 1) {
                        verbose(env, "tail_calls are not allowed in programs 
with bpf-to-bpf calls\n");
                        return -EINVAL;
                }
@@ -2245,6 +2302,9 @@ static int check_func_call(struct bpf_verifier_env *env, 
struct bpf_insn *insn,
        }
 
        target_insn = *insn_idx + insn->imm;
+       /* We will increment insn_idx (PC) in do_check() after handling this
+        * call insn, so the actual start of the function is target + 1.
+        */
        subprog = find_subprog(env, target_insn + 1);
        if (subprog < 0) {
                verbose(env, "verifier bug. No program starts at insn %d\n",
@@ -2272,7 +2332,7 @@ static int check_func_call(struct bpf_verifier_env *env, 
struct bpf_insn *insn,
                        /* remember the callsite, it will be used by bpf_exit */
                        *insn_idx /* callsite */,
                        state->curframe + 1 /* frameno within this callchain */,
-                       subprog + 1 /* subprog number within this prog */);
+                       subprog /* subprog number within this prog */);
 
        /* copy r1 - r5 args that callee can access */
        for (i = BPF_REG_1; i <= BPF_REG_5; i++)
@@ -3831,7 +3891,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, 
struct bpf_insn *insn)
                return -EINVAL;
        }
 
-       if (env->subprog_cnt) {
+       if (env->subprog_cnt > 1) {
                /* when program has LD_ABS insn JITs and interpreter assume
                 * that r1 == ctx == skb which is not the case for callees
                 * that can have arbitrary arguments. It's problematic
@@ -4020,10 +4080,6 @@ static int check_cfg(struct bpf_verifier_env *env)
        int ret = 0;
        int i, t;
 
-       ret = check_subprogs(env);
-       if (ret < 0)
-               return ret;
-
        insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
        if (!insn_state)
                return -ENOMEM;
@@ -4862,11 +4918,11 @@ static int do_check(struct bpf_verifier_env *env)
 
        verbose(env, "processed %d insns (limit %d), stack depth ",
                insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
-       for (i = 0; i < env->subprog_cnt + 1; i++) {
+       for (i = 0; i < env->subprog_cnt; i++) {
                u32 depth = env->subprog_info[i].stack_depth;
 
                verbose(env, "%d", depth);
-               if (i + 1 < env->subprog_cnt + 1)
+               if (i + 1 < env->subprog_cnt)
                        verbose(env, "+");
        }
        verbose(env, "\n");
@@ -5238,12 +5294,12 @@ static int convert_ctx_accesses(struct bpf_verifier_env 
*env)
 static int jit_subprogs(struct bpf_verifier_env *env)
 {
        struct bpf_prog *prog = env->prog, **func, *tmp;
-       int i, j, subprog_start, subprog_end = 0, len, subprog;
        struct bpf_insn *insn;
        void *old_bpf_func;
+       int i, j, subprog;
        int err = -ENOMEM;
 
-       if (env->subprog_cnt == 0)
+       if (env->subprog_cnt <= 1)
                return 0;
 
        for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
@@ -5259,7 +5315,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                /* temporarily remember subprog id inside insn instead of
                 * aux_data, since next loop will split up all insns into funcs
                 */
-               insn->off = subprog + 1;
+               insn->off = subprog;
                /* remember original imm in case JIT fails and fallback
                 * to interpreter will be needed
                 */
@@ -5268,23 +5324,24 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                insn->imm = 1;
        }
 
-       func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
+       func = kzalloc(sizeof(prog) * env->subprog_cnt, GFP_KERNEL);
        if (!func)
                return -ENOMEM;
 
-       for (i = 0; i <= env->subprog_cnt; i++) {
-               subprog_start = subprog_end;
-               if (env->subprog_cnt == i)
-                       subprog_end = prog->len;
-               else
-                       subprog_end = env->subprog_info[i].start;
+       for (i = 0; i < env->subprog_cnt; i++) {
+               struct bpf_subprog_info *info = &env->subprog_info[i];
+               int len, end = prog->len;
+
+               if (i + 1 < env->subprog_cnt)
+                       end = info[1].start;
+               len = end - info->start;
 
-               len = subprog_end - subprog_start;
                func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
                if (!func[i])
                        goto out_free;
-               memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
+               memcpy(func[i]->insnsi, prog->insnsi + info->start,
                       len * sizeof(struct bpf_insn));
+
                func[i]->type = prog->type;
                func[i]->len = len;
                if (bpf_prog_calc_tag(func[i]))
@@ -5307,7 +5364,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
         * now populate all bpf_calls with correct addresses and
         * run last pass of JIT
         */
-       for (i = 0; i <= env->subprog_cnt; i++) {
+       for (i = 0; i < env->subprog_cnt; i++) {
                insn = func[i]->insnsi;
                for (j = 0; j < func[i]->len; j++, insn++) {
                        if (insn->code != (BPF_JMP | BPF_CALL) ||
@@ -5315,12 +5372,16 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                                continue;
                        subprog = insn->off;
                        insn->off = 0;
+                       if (subprog < 0 || subprog >= env->subprog_cnt) {
+                               err = -EFAULT;
+                               goto out_free;
+                       }
                        insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
                                func[subprog]->bpf_func -
                                __bpf_call_base;
                }
        }
-       for (i = 0; i <= env->subprog_cnt; i++) {
+       for (i = 0; i < env->subprog_cnt; i++) {
                old_bpf_func = func[i]->bpf_func;
                tmp = bpf_int_jit_compile(func[i]);
                if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
@@ -5334,7 +5395,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
        /* finally lock prog and jit images for all functions and
         * populate kallsysm
         */
-       for (i = 0; i <= env->subprog_cnt; i++) {
+       for (i = 0; i < env->subprog_cnt; i++) {
                bpf_prog_lock_ro(func[i]);
                bpf_prog_kallsyms_add(func[i]);
        }
@@ -5351,7 +5412,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                        continue;
                insn->off = env->insn_aux_data[i].call_imm;
                subprog = find_subprog(env, i + insn->off + 1);
-               addr  = (unsigned long)func[subprog + 1]->bpf_func;
+               addr  = (unsigned long)func[subprog]->bpf_func;
                addr &= PAGE_MASK;
                insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
                            addr - __bpf_call_base;
@@ -5360,10 +5421,10 @@ static int jit_subprogs(struct bpf_verifier_env *env)
        prog->jited = 1;
        prog->bpf_func = func[0]->bpf_func;
        prog->aux->func = func;
-       prog->aux->func_cnt = env->subprog_cnt + 1;
+       prog->aux->func_cnt = env->subprog_cnt;
        return 0;
 out_free:
-       for (i = 0; i <= env->subprog_cnt; i++)
+       for (i = 0; i < env->subprog_cnt; i++)
                if (func[i])
                        bpf_jit_free(func[i]);
        kfree(func);
@@ -5393,6 +5454,7 @@ static int fixup_call_args(struct bpf_verifier_env *env)
                err = jit_subprogs(env);
                if (err == 0)
                        return 0;
+               verbose(env, "failed to jit_subprogs %d\n", err);
        }
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
        for (i = 0; i < prog->len; i++, insn++) {
@@ -5682,6 +5744,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr 
*attr)
 
        env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
 
+       ret = check_subprogs(env);
+       if (ret < 0)
+               goto skip_full_check;
+
        ret = check_cfg(env);
        if (ret < 0)
                goto skip_full_check;

[RFC PATCH v3 bpf-next 2/5] bpf/verifier: rewrite subprog boundary detection

Reply via email to