On Mon, Oct 07, 2019 at 07:20:36PM +0200, Toke Høiland-Jørgensen wrote:
> From: Toke Høiland-Jørgensen <t...@redhat.com>
> 
> This adds support for wrapping eBPF program dispatch in chain calling
> logic. The code injection is controlled by a flag at program load time; if
> the flag is set, the BPF program will carry a flag bit that changes the
> program dispatch logic to wrap it in a chain call loop.
> 
> Ideally, it shouldn't be necessary to set the flag on program load time,
> but rather inject the calls when a chain call program is first loaded. The
> allocation logic sets the whole of struct bpf_prog to be read-only memory,
> so it can't immediately be modified, but conceivably we could just unlock
> the first page of the struct and flip the bit when a chain call program is
> first attached.
> 
> Signed-off-by: Toke Høiland-Jørgensen <t...@redhat.com>
> ---
>  include/linux/bpf.h      |    3 +++
>  include/linux/filter.h   |   34 ++++++++++++++++++++++++++++++++--
>  include/uapi/linux/bpf.h |    6 ++++++
>  kernel/bpf/core.c        |    6 ++++++
>  kernel/bpf/syscall.c     |    4 +++-
>  5 files changed, 50 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 5b9d22338606..13e5f38cf5c6 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -365,6 +365,8 @@ struct bpf_prog_stats {
>       struct u64_stats_sync syncp;
>  };
>  
> +#define BPF_NUM_CHAIN_SLOTS 8
> +
>  struct bpf_prog_aux {
>       atomic_t refcnt;
>       u32 used_map_cnt;
> @@ -383,6 +385,7 @@ struct bpf_prog_aux {
>       struct list_head ksym_lnode;
>       const struct bpf_prog_ops *ops;
>       struct bpf_map **used_maps;
> +     struct bpf_prog *chain_progs[BPF_NUM_CHAIN_SLOTS];
>       struct bpf_prog *prog;
>       struct user_struct *user;
>       u64 load_time; /* ns since boottime */
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 2ce57645f3cd..3d1e4991e61d 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -21,6 +21,7 @@
>  #include <linux/kallsyms.h>
>  #include <linux/if_vlan.h>
>  #include <linux/vmalloc.h>
> +#include <linux/nospec.h>
>  
>  #include <net/sch_generic.h>
>  
> @@ -528,6 +529,7 @@ struct bpf_prog {
>                               is_func:1,      /* program is a bpf function */
>                               kprobe_override:1, /* Do we override a kprobe? 
> */
>                               has_callchain_buf:1, /* callchain buffer 
> allocated? */
> +                             chain_calls:1, /* should this use the 
> chain_call wrapper */
>                               enforce_expected_attach_type:1; /* Enforce 
> expected_attach_type checking at attach time */
>       enum bpf_prog_type      type;           /* Type of BPF program */
>       enum bpf_attach_type    expected_attach_type; /* For some prog types */
> @@ -551,6 +553,30 @@ struct sk_filter {
>       struct bpf_prog *prog;
>  };
>  
> +#define BPF_MAX_CHAIN_CALLS 32
> +static __always_inline unsigned int do_chain_calls(const struct bpf_prog 
> *prog,
> +                                                const void *ctx)
> +{
> +     int i = BPF_MAX_CHAIN_CALLS;
> +     int idx;
> +     u32 ret;
> +
> +     do {
> +             ret = (*(prog)->bpf_func)(ctx, prog->insnsi);

This breaks program stats.

> +
> +             if (ret + 1 >= BPF_NUM_CHAIN_SLOTS) {
> +                     prog = prog->aux->chain_progs[0];
> +                     continue;
> +             }
> +             idx = ret + 1;
> +             idx = array_index_nospec(idx, BPF_NUM_CHAIN_SLOTS);
> +
> +             prog = prog->aux->chain_progs[idx] ?: prog->aux->chain_progs[0];
> +     } while (prog && --i);
> +
> +     return ret;
> +}
> +
>  DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
>  
>  #define BPF_PROG_RUN(prog, ctx)      ({                              \
> @@ -559,14 +585,18 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
>       if (static_branch_unlikely(&bpf_stats_enabled_key)) {   \
>               struct bpf_prog_stats *stats;                   \
>               u64 start = sched_clock();                      \
> -             ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
> +             ret = prog->chain_calls ?                       \
> +                     do_chain_calls(prog, ctx) :                     \
> +                      (*(prog)->bpf_func)(ctx, (prog)->insnsi);      \

I thought you agreed on 'no performance regressions' rule?

>               stats = this_cpu_ptr(prog->aux->stats);         \
>               u64_stats_update_begin(&stats->syncp);          \
>               stats->cnt++;                                   \
>               stats->nsecs += sched_clock() - start;          \
>               u64_stats_update_end(&stats->syncp);            \
>       } else {                                                \
> -             ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
> +             ret = prog->chain_calls ?                               \
> +                     do_chain_calls(prog, ctx) :                     \
> +                      (*(prog)->bpf_func)(ctx, (prog)->insnsi);      \
>       }                                                       \
>       ret; })
>  
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 77c6be96d676..1ce80a227be3 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -288,6 +288,12 @@ enum bpf_attach_type {
>  /* The verifier internal test flag. Behavior is undefined */
>  #define BPF_F_TEST_STATE_FREQ        (1U << 3)
>  
> +/* Whether to enable chain call logic at program execution. If set, the 
> program
> + * execution logic will check for and jump to chain call programs configured
> + * with the BPF_PROG_CHAIN_* commands to the bpf syscall.
> + */
> +#define BPF_F_CHAIN_CALLS    (1U << 4)
> +
>  /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
>   * two extensions:
>   *
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 66088a9e9b9e..5dfe3585bc5d 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -254,6 +254,12 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog 
> *fp_old, unsigned int size,
>  void __bpf_prog_free(struct bpf_prog *fp)
>  {
>       if (fp->aux) {
> +             int i;
> +
> +             for (i = 0; i < BPF_NUM_CHAIN_SLOTS; i++)
> +                     if (fp->aux->chain_progs[i])
> +                             bpf_prog_put(fp->aux->chain_progs[i]);
> +
>               free_percpu(fp->aux->stats);
>               kfree(fp->aux);
>       }
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 82eabd4e38ad..b8a203a05881 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1630,7 +1630,8 @@ static int bpf_prog_load(union bpf_attr *attr, union 
> bpf_attr __user *uattr)
>       if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
>                                BPF_F_ANY_ALIGNMENT |
>                                BPF_F_TEST_STATE_FREQ |
> -                              BPF_F_TEST_RND_HI32))
> +                              BPF_F_TEST_RND_HI32 |
> +                              BPF_F_CHAIN_CALLS))
>               return -EINVAL;
>  
>       if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
> @@ -1665,6 +1666,7 @@ static int bpf_prog_load(union bpf_attr *attr, union 
> bpf_attr __user *uattr)
>               return -ENOMEM;
>  
>       prog->expected_attach_type = attr->expected_attach_type;
> +     prog->chain_calls = !!(attr->prog_flags & BPF_F_CHAIN_CALLS);
>  
>       prog->aux->offload_requested = !!attr->prog_ifindex;
>  
> 

Reply via email to