Chengkaitao <[email protected]> writes:

> From: Kaitao Cheng <[email protected]>
>
> If a user holds ownership of a node in the middle of a list, they
> can directly remove it from the list without strictly adhering to
> deletion rules from the head or tail.
>
> When a kfunc has only one bpf_list_node parameter, supplement the
> initialization of the corresponding btf_field. Add a new lock_rec
> member to struct bpf_reference_state for lock holding detection.
>
> This is typically paired with bpf_refcount. After calling
> bpf_list_del, it is generally necessary to drop the reference to
> the list node twice to prevent reference count leaks.
>
> Signed-off-by: Kaitao Cheng <[email protected]>
> ---
>  include/linux/bpf_verifier.h |  4 +++
>  kernel/bpf/btf.c             | 33 +++++++++++++++++++---
>  kernel/bpf/helpers.c         | 17 ++++++++++++
>  kernel/bpf/verifier.c        | 54 ++++++++++++++++++++++++++++++++++--
>  4 files changed, 101 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index ef8e45a362d9..e1358b62d6cc 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -261,6 +261,10 @@ struct bpf_reference_state {
>        * it matches on unlock.
>        */
>       void *ptr;
> +     /* For REF_TYPE_LOCK_*: btf_record of the locked object, used for lock
> +      * checking in kfuncs such as bpf_list_del.
> +      */
> +     struct btf_record *lock_rec;
As far as I understand this patch introduces a weaker type of
verification: we only check that there is some lock held by the
object of the same type as this node's head, but there is no guarantee
it's the same instance. Please confirm if I'm right.
What would it take to implement instance validation instead of
type-based lock check?
>  };
>  
>  struct bpf_retval_range {
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 4872d2a6c42d..8a977c793d56 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -3785,7 +3785,6 @@ static int btf_find_field_one(const struct btf *btf,
>       case BPF_RES_SPIN_LOCK:
>       case BPF_TIMER:
>       case BPF_WORKQUEUE:
> -     case BPF_LIST_NODE:
>       case BPF_RB_NODE:
>       case BPF_REFCOUNT:
>       case BPF_TASK_WORK:
> @@ -3794,6 +3793,27 @@ static int btf_find_field_one(const struct btf *btf,
>               if (ret < 0)
>                       return ret;
>               break;
> +     case BPF_LIST_NODE:
> +             ret = btf_find_struct(btf, var_type, off, sz, field_type,
> +                                   info_cnt ? &info[0] : &tmp);
> +             if (ret < 0)
> +                     return ret;
> +             /* graph_root for verifier: container type and node member name 
> */
> +             if (info_cnt && var_idx >= 0 && (u32)var_idx < 
> btf_type_vlen(var)) {
> +                     u32 id;
> +                     const struct btf_member *member;
> +
> +                     for (id = 1; id < btf_nr_types(btf); id++) {
> +                             if (btf_type_by_id(btf, id) == var) {
> +                                     info[0].graph_root.value_btf_id = id;
> +                                     member = btf_type_member(var) + var_idx;
> +                                     info[0].graph_root.node_name =
> +                                             __btf_name_by_offset(btf, 
> member->name_off);
> +                                     break;
> +                             }
> +                     }
> +             }
> +             break;
>       case BPF_KPTR_UNREF:
>       case BPF_KPTR_REF:
>       case BPF_KPTR_PERCPU:
> @@ -4138,6 +4158,7 @@ struct btf_record *btf_parse_fields(const struct btf 
> *btf, const struct btf_type
>                       if (ret < 0)
>                               goto end;
>                       break;
> +             case BPF_LIST_NODE:
>               case BPF_LIST_HEAD:
>                       ret = btf_parse_list_head(btf, &rec->fields[i], 
> &info_arr[i]);
>                       if (ret < 0)
> @@ -4148,7 +4169,6 @@ struct btf_record *btf_parse_fields(const struct btf 
> *btf, const struct btf_type
>                       if (ret < 0)
>                               goto end;
>                       break;
> -             case BPF_LIST_NODE:
>               case BPF_RB_NODE:
>                       break;
>               default:
> @@ -4192,20 +4212,25 @@ int btf_check_and_fixup_fields(const struct btf *btf, 
> struct btf_record *rec)
>       int i;
>  
>       /* There are three types that signify ownership of some other type:
> -      *  kptr_ref, bpf_list_head, bpf_rb_root.
> +      *  kptr_ref, bpf_list_head/node, bpf_rb_root.
>        * kptr_ref only supports storing kernel types, which can't store
>        * references to program allocated local types.
>        *
>        * Hence we only need to ensure that bpf_{list_head,rb_root} ownership
>        * does not form cycles.
>        */
> -     if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & (BPF_GRAPH_ROOT | 
> BPF_UPTR)))
> +     if (IS_ERR_OR_NULL(rec) || !(rec->field_mask &
> +        (BPF_GRAPH_ROOT | BPF_GRAPH_NODE | BPF_UPTR)))
>               return 0;
> +
>       for (i = 0; i < rec->cnt; i++) {
>               struct btf_struct_meta *meta;
>               const struct btf_type *t;
>               u32 btf_id;
>  
> +             if (rec->fields[i].type & BPF_GRAPH_NODE)
> +                     rec->fields[i].graph_root.value_rec = rec;
> +
>               if (rec->fields[i].type == BPF_UPTR) {
>                       /* The uptr only supports pinning one page and cannot
>                        * point to a kernel struct
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 6eb6c82ed2ee..577af62a9f7a 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -2459,6 +2459,22 @@ __bpf_kfunc struct bpf_list_node 
> *bpf_list_pop_back(struct bpf_list_head *head)
>       return __bpf_list_del(head, true);
>  }
>  
> +__bpf_kfunc struct bpf_list_node *bpf_list_del(struct bpf_list_node *node)
> +{
> +     struct bpf_list_node_kern *knode = (struct bpf_list_node_kern *)node;
> +
> +     if (unlikely(!knode))
> +             return NULL;
> +
> +     if (WARN_ON_ONCE(!READ_ONCE(knode->owner)))
> +             return NULL;
> +
> +     list_del_init(&knode->list_head);
> +     WRITE_ONCE(knode->owner, NULL);
> +
> +     return node;
> +}
> +
>  __bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head)
>  {
>       struct list_head *h = (struct list_head *)head;
> @@ -4545,6 +4561,7 @@ BTF_ID_FLAGS(func, bpf_list_push_front_impl)
>  BTF_ID_FLAGS(func, bpf_list_push_back_impl)
>  BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
> +BTF_ID_FLAGS(func, bpf_list_del, KF_ACQUIRE | KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index a3390190c26e..8a782772dd36 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -1536,7 +1536,7 @@ static int acquire_reference(struct bpf_verifier_env 
> *env, int insn_idx)
>  }
>  
>  static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, 
> enum ref_state_type type,
> -                           int id, void *ptr)
> +                           int id, void *ptr, struct btf_record *lock_rec)
>  {
>       struct bpf_verifier_state *state = env->cur_state;
>       struct bpf_reference_state *s;
> @@ -1547,6 +1547,7 @@ static int acquire_lock_state(struct bpf_verifier_env 
> *env, int insn_idx, enum r
>       s->type = type;
>       s->id = id;
>       s->ptr = ptr;
> +     s->lock_rec = lock_rec;
>  
>       state->active_locks++;
>       state->active_lock_id = id;
> @@ -1662,6 +1663,23 @@ static struct bpf_reference_state 
> *find_lock_state(struct bpf_verifier_state *st
>       return NULL;
>  }
>  
> +static bool rec_has_list_matching_node_type(struct bpf_verifier_env *env,
> +                                        const struct btf_record *rec,
> +                                        const struct btf *node_btf, u32 
> node_btf_id)
> +{
> +     u32 i;
> +
> +     for (i = 0; i < rec->cnt; i++) {
> +             if (!(rec->fields[i].type & BPF_LIST_HEAD))
> +                     continue;
> +             if (btf_struct_ids_match(&env->log, node_btf, node_btf_id, 0,
> +                                     rec->fields[i].graph_root.btf,
> +                                     rec->fields[i].graph_root.value_btf_id, 
> true))
> +                     return true;
> +     }
> +     return false;
> +}
> +
>  static void update_peak_states(struct bpf_verifier_env *env)
>  {
>       u32 cur_states;
> @@ -8576,7 +8594,8 @@ static int process_spin_lock(struct bpf_verifier_env 
> *env, int regno, int flags)
>                       type = REF_TYPE_RES_LOCK;
>               else
>                       type = REF_TYPE_LOCK;
> -             err = acquire_lock_state(env, env->insn_idx, type, reg->id, 
> ptr);
> +             err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr,
> +                                      reg_btf_record(reg));
>               if (err < 0) {
>                       verbose(env, "Failed to acquire lock state\n");
>                       return err;
> @@ -12431,6 +12450,7 @@ enum special_kfunc_type {
>       KF_bpf_list_push_back_impl,
>       KF_bpf_list_pop_front,
>       KF_bpf_list_pop_back,
> +     KF_bpf_list_del,
>       KF_bpf_list_front,
>       KF_bpf_list_back,
>       KF_bpf_cast_to_kern_ctx,
> @@ -12491,6 +12511,7 @@ BTF_ID(func, bpf_list_push_front_impl)
>  BTF_ID(func, bpf_list_push_back_impl)
>  BTF_ID(func, bpf_list_pop_front)
>  BTF_ID(func, bpf_list_pop_back)
> +BTF_ID(func, bpf_list_del)
>  BTF_ID(func, bpf_list_front)
>  BTF_ID(func, bpf_list_back)
>  BTF_ID(func, bpf_cast_to_kern_ctx)
> @@ -12966,6 +12987,7 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
>              btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
>              btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
>              btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
> +            btf_id == special_kfunc_list[KF_bpf_list_del] ||
>              btf_id == special_kfunc_list[KF_bpf_list_front] ||
>              btf_id == special_kfunc_list[KF_bpf_list_back];
>  }
> @@ -13088,7 +13110,8 @@ static bool check_kfunc_is_graph_node_api(struct 
> bpf_verifier_env *env,
>       switch (node_field_type) {
>       case BPF_LIST_NODE:
>               ret = (kfunc_btf_id == 
> special_kfunc_list[KF_bpf_list_push_front_impl] ||
> -                    kfunc_btf_id == 
> special_kfunc_list[KF_bpf_list_push_back_impl]);
> +                    kfunc_btf_id == 
> special_kfunc_list[KF_bpf_list_push_back_impl] ||
> +                    kfunc_btf_id == special_kfunc_list[KF_bpf_list_del]);
>               break;
>       case BPF_RB_NODE:
>               ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] 
> ||
> @@ -13211,6 +13234,9 @@ __process_kf_arg_ptr_to_graph_node(struct 
> bpf_verifier_env *env,
>               return -EINVAL;
>       }
>  
> +     if (!*node_field)
> +             *node_field = field;
> +
>       field = *node_field;
>  
>       et = btf_type_by_id(field->graph_root.btf, 
> field->graph_root.value_btf_id);
> @@ -13237,6 +13263,28 @@ __process_kf_arg_ptr_to_graph_node(struct 
> bpf_verifier_env *env,
>               return -EINVAL;
>       }
>  
> +     /* bpf_list_del: require list head's lock. Use refs[] REF_TYPE_LOCK_MASK
> +      * only. At lock time we stored the locked object's btf_record in ref->
> +      * lock_rec, so we can get the list value type from the ref directly.
> +      */
> +     if (node_field_type == BPF_LIST_NODE &&
> +         meta->func_id == special_kfunc_list[KF_bpf_list_del]) {
> +             struct bpf_verifier_state *cur = env->cur_state;
> +
> +             for (int i = 0; i < cur->acquired_refs; i++) {
> +                     struct bpf_reference_state *s = &cur->refs[i];
> +
> +                     if (!(s->type & REF_TYPE_LOCK_MASK) || !s->lock_rec)
> +                             continue;
> +
> +                     if (rec_has_list_matching_node_type(env, s->lock_rec,
> +                                                     reg->btf, reg->btf_id))
> +                             return 0;
> +             }
> +             verbose(env, "bpf_spin_lock must be held for bpf_list_del\n");
> +             return -EINVAL;
> +     }
> +
>       return 0;
>  }
>  
> -- 
> 2.50.1 (Apple Git-155)

Reply via email to