On 12/28/18 7:09 PM, Jakub Kicinski wrote:
> Instead of overwriting dead code with jmp -1 instructions
> remove it completely for root.  Adjust verifier state and
> line info appropriately.
> 
> v2:
>   - adjust func_info (Alexei);
>   - make sure first instruction retains line info (Alexei).
> 
> Signed-off-by: Jakub Kicinski <jakub.kicin...@netronome.com>
> ---
>   include/linux/filter.h |   1 +
>   kernel/bpf/core.c      |  12 +++
>   kernel/bpf/verifier.c  | 197 ++++++++++++++++++++++++++++++++++++++++-
>   3 files changed, 207 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 8c8544b375eb..2cdb50bbf867 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -782,6 +782,7 @@ static inline bool bpf_dump_raw_ok(void)
>   
>   struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
>                                      const struct bpf_insn *patch, u32 len);
> +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
>   
>   void bpf_clear_redirect_map(struct bpf_map *map);
>   
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index a40057b2c556..cc6fa754627c 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -461,6 +461,18 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog 
> *prog, u32 off,
>       return prog_adj;
>   }
>   
> +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
> +{
> +     /* Branch offsets can't overflow when program is shrinking, no need
> +      * to call bpf_adj_branches(..., true) here
> +      */
> +     memmove(prog->insnsi + off, prog->insnsi + off + cnt,
> +             sizeof(struct bpf_insn) * (prog->len - off - cnt));
> +     prog->len -= cnt;
> +
> +     return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
> +}
> +
>   void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
>   {
>       int i;
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 30e2cd399b4a..a3880c6dac97 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -6233,6 +6233,171 @@ static struct bpf_prog *bpf_patch_insn_data(struct 
> bpf_verifier_env *env, u32 of
>       return new_prog;
>   }
>   
> +static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
> +                                           u32 off, u32 cnt)
> +{
> +     int i, j;
> +
> +     /* find first prog starting at or after off (first to remove) */
> +     for (i = 0; i < env->subprog_cnt; i++)
> +             if (env->subprog_info[i].start >= off)
> +                     break;
> +     /* find first prog starting at or after off + cnt (first to stay) */
> +     for (j = i; j < env->subprog_cnt; j++)
> +             if (env->subprog_info[j].start >= off + cnt)
> +                     break;
> +     /* if j doesn't start exactly at off + cnt, we are just removing
> +      * the front of previous prog
> +      */
> +     if (env->subprog_info[j].start != off + cnt)
> +             j--;
> +
> +     if (j > i) {
> +             struct bpf_prog_aux *aux = env->prog->aux;
> +             int move;
> +
> +             /* move fake 'exit' subprog as well */
> +             move = env->subprog_cnt + 1 - j;
> +
> +             memmove(env->subprog_info + i,
> +                     env->subprog_info + j,
> +                     sizeof(*env->subprog_info) * move);
> +             env->subprog_cnt -= j - i;
> +
> +             /* remove func_info */
> +             if (aux->func_info) {
> +                     move = aux->func_info_cnt - j;
> +
> +                     memmove(aux->func_info + i,
> +                             aux->func_info + j,
> +                             sizeof(*aux->func_info) * move);
> +                     aux->func_info_cnt -= j - i;
> +             }
> +     } else {
> +             /* convert i from "first prog to remove" to "first to adjust" */
> +             if (env->subprog_info[i].start == off)
> +                     i++;
> +     }
> +
> +     /* update fake 'exit' subprog as well */
> +     for (; i <= env->subprog_cnt; i++)
> +             env->subprog_info[i].start -= cnt;
> +
> +     return 0;
> +}

adjust_subprog_starts_after_remove() and update func_info looks correct 
to me.

> +
> +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
> +                                   u32 cnt)
> +{
> +     struct bpf_subprog_info *need_first_linfo;
> +     struct bpf_prog *prog = env->prog;
> +     u32 i, l_off, l_cnt, nr_linfo;
> +     struct bpf_line_info *linfo;
> +
> +     nr_linfo = prog->aux->nr_linfo;
> +     if (!nr_linfo || !cnt)
> +             return 0;
> +
> +     linfo = prog->aux->linfo;
> +
> +     /* progs are already adjusted/removed, if program starts on off, it may
> +      * had its start cut off and its line info may need to be preserved
> +      */
> +     for (i = 0; i < env->subprog_cnt; i++)
> +             if (env->subprog_info[i].start >= off)
> +                     break;
> +     if (i < env->subprog_cnt && env->subprog_info[i].start == off)
> +             need_first_linfo = &env->subprog_info[i];
> +     else
> +             need_first_linfo = NULL;
> +
> +     /* find first line info to remove */
> +     for (i = 0; i < nr_linfo; i++)
> +             if (linfo[i].insn_off >= off)
> +                     break;
> +
> +     /* count lines to be removed */
> +     l_off = i;
> +     l_cnt = 0;
> +     for (; i < nr_linfo; i++)
> +             if (linfo[i].insn_off < off + cnt)
> +                     l_cnt++;
> +             else
> +                     break;
> +
> +     /* either we didn't actually cut the start or we can just use line info
> +      * of first instruction if it exists
> +      */
> +     if (i < nr_linfo && linfo[i].insn_off == off + cnt)
> +             need_first_linfo = NULL;
> +     if (need_first_linfo) {
> +             if (WARN_ONCE(!l_cnt,
> +                           "verifier bug - no linfo removed, yet its 
> missing"))
> +                     return -EINVAL;
> +             if (WARN_ONCE(need_first_linfo->linfo_idx < l_off ||
> +                           need_first_linfo->linfo_idx >= l_off + l_cnt,
> +                           "verifier bug - removed prog linfo not in removed 
> range"))
> +                     return -EINVAL;
> +             /* subprog linfo_idx is not adjusted yet, so just find out
> +              * which line it used to be and swap it
> +              */
> +             memmove(linfo + l_off, linfo + need_first_linfo->linfo_idx,
> +                     sizeof(*linfo));
> +             need_first_linfo->linfo_idx = l_off;
> +             linfo[l_off].insn_off = off;
> +
> +             l_off++;
> +             l_cnt--;
> +     }

In this routine, we handle need_first_linfo if the first insn of a 
subprogram is deleted.
I wonder whether we need to handle the last deleted linfo as well. For 
example:
    func1:
      line_info1:
         insn1
         insn2
    func2:
      line_info2:
         insn3
         insn4:

if insn2 and insn3 are deleted, the above algorithm will produce:
     func1:
       line_info1:
        insn1
     func2:
        insn4

func2 is missing the first line info, which should be line_info2 with
adjusted insn offset to the start of func2.

Another example:
    func1:
       line_info1:
          insn1
          insn2
       line_info2:
          insn3
          insn4
If insn2 and insn3 are deleted, we will get
     func1:
       line_info1:
         insn1
         insn4
This is not ideal either. We should attribute insn4 to line_info2.

> +
> +     /* remove the line info which refers to the removed instructions */
> +     if (l_cnt) {
> +             memmove(linfo + l_off, linfo + i,
> +                     sizeof(*linfo) * (nr_linfo - i));
> +
> +             prog->aux->nr_linfo -= l_cnt;
> +             nr_linfo = prog->aux->nr_linfo;
> +     }
> +
> +     /* pull all linfo[i].insn_off >= off + cnt in by cnt */
> +     for (i = l_off; i < nr_linfo; i++)
> +             linfo[i].insn_off -= cnt;
> +
> +     /* fix up all subprogs (incl. 'exit') which start >= off */
> +     for (i = 0; i <= env->subprog_cnt; i++)
> +             if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
> +                     env->subprog_info[i].linfo_idx -= l_cnt;
> +
> +     return 0;
> +}
> +
> +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 
> cnt)
> +{
> +     struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
> +     unsigned int orig_prog_len = env->prog->len;
> +     int err;
> +
> +     if (!cnt)
> +             return 0;

This check is probably not needed as all call sites (including
patch #4) guarantees non-zero cnt.

> +
> +     err = bpf_remove_insns(env->prog, off, cnt);
> +     if (err)
> +             return err;
> +
> +     err = adjust_subprog_starts_after_remove(env, off, cnt);
> +     if (err)
> +             return err;
> +
> +     err = bpf_adj_linfo_after_remove(env, off, cnt);
> +     if (err)
> +             return err;
> +
> +     memmove(aux_data + off, aux_data + off + cnt,
> +             sizeof(*aux_data) * (orig_prog_len - off - cnt));
> +
> +     return 0;
> +}
> +
>   /* The verifier does more data flow analysis than llvm and will not
>    * explore branches that are dead at run time. Malicious programs can
>    * have dead code too. Therefore replace all dead at-run-time code
> @@ -6293,6 +6458,30 @@ static void opt_hard_wire_dead_code_branches(struct 
> bpf_verifier_env *env)
>       }
>   }
>   
> +static int opt_remove_dead_code(struct bpf_verifier_env *env)
> +{
> +     struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
> +     int insn_cnt = env->prog->len;
> +     int i, err;
> +
> +     for (i = 0; i < insn_cnt; i++) {
> +             int j;
> +
> +             j = 0;
> +             while (i + j < insn_cnt && !aux_data[i + j].seen)
> +                     j++;
> +             if (!j)
> +                     continue;
> +
> +             err = verifier_remove_insns(env, i, j);
> +             if (err)
> +                     return err;
> +             insn_cnt = env->prog->len;
> +     }
> +
> +     return 0;
> +}
> +
>   /* convert load instructions that access fields of a context type into a
>    * sequence of instructions that access fields of the underlying structure:
>    *     struct __sk_buff    -> struct sk_buff
> @@ -7032,11 +7221,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr 
> *attr,
>       if (is_priv) {
>               if (ret == 0)
>                       opt_hard_wire_dead_code_branches(env);
> +             if (ret == 0)
> +                     ret = opt_remove_dead_code(env);
> +     } else {
> +             if (ret == 0)
> +                     sanitize_dead_code(env);
>       }
>   
> -     if (ret == 0)
> -             sanitize_dead_code(env);
> -
>       if (ret == 0)
>               /* program is valid, convert *(u32*)(ctx + off) accesses */
>               ret = convert_ctx_accesses(env);
> 

Reply via email to