From: Kaitao Cheng <[email protected]>

If a user holds ownership of a node in the middle of a list, they
can directly remove it from the list without strictly adhering to
deletion rules from the head or tail.

When a kfunc has only one bpf_list_node parameter, supplement the
initialization of the corresponding btf_field. Add a new lock_rec
member to struct bpf_reference_state for lock holding detection.

This is typically paired with bpf_refcount. After calling
bpf_list_del, it is generally necessary to drop the reference to
the list node twice to prevent reference count leaks.

Signed-off-by: Kaitao Cheng <[email protected]>
---
 include/linux/bpf_verifier.h |  4 +++
 kernel/bpf/btf.c             | 33 +++++++++++++++++++---
 kernel/bpf/helpers.c         | 17 ++++++++++++
 kernel/bpf/verifier.c        | 54 ++++++++++++++++++++++++++++++++++--
 4 files changed, 101 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index ef8e45a362d9..e1358b62d6cc 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -261,6 +261,10 @@ struct bpf_reference_state {
         * it matches on unlock.
         */
        void *ptr;
+       /* For REF_TYPE_LOCK_*: btf_record of the locked object, used for lock
+        * checking in kfuncs such as bpf_list_del.
+        */
+       struct btf_record *lock_rec;
 };
 
 struct bpf_retval_range {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4872d2a6c42d..8a977c793d56 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3785,7 +3785,6 @@ static int btf_find_field_one(const struct btf *btf,
        case BPF_RES_SPIN_LOCK:
        case BPF_TIMER:
        case BPF_WORKQUEUE:
-       case BPF_LIST_NODE:
        case BPF_RB_NODE:
        case BPF_REFCOUNT:
        case BPF_TASK_WORK:
@@ -3794,6 +3793,27 @@ static int btf_find_field_one(const struct btf *btf,
                if (ret < 0)
                        return ret;
                break;
+       case BPF_LIST_NODE:
+               ret = btf_find_struct(btf, var_type, off, sz, field_type,
+                                     info_cnt ? &info[0] : &tmp);
+               if (ret < 0)
+                       return ret;
+               /* graph_root for verifier: container type and node member name 
*/
+               if (info_cnt && var_idx >= 0 && (u32)var_idx < 
btf_type_vlen(var)) {
+                       u32 id;
+                       const struct btf_member *member;
+
+                       for (id = 1; id < btf_nr_types(btf); id++) {
+                               if (btf_type_by_id(btf, id) == var) {
+                                       info[0].graph_root.value_btf_id = id;
+                                       member = btf_type_member(var) + var_idx;
+                                       info[0].graph_root.node_name =
+                                               __btf_name_by_offset(btf, 
member->name_off);
+                                       break;
+                               }
+                       }
+               }
+               break;
        case BPF_KPTR_UNREF:
        case BPF_KPTR_REF:
        case BPF_KPTR_PERCPU:
@@ -4138,6 +4158,7 @@ struct btf_record *btf_parse_fields(const struct btf 
*btf, const struct btf_type
                        if (ret < 0)
                                goto end;
                        break;
+               case BPF_LIST_NODE:
                case BPF_LIST_HEAD:
                        ret = btf_parse_list_head(btf, &rec->fields[i], 
&info_arr[i]);
                        if (ret < 0)
@@ -4148,7 +4169,6 @@ struct btf_record *btf_parse_fields(const struct btf 
*btf, const struct btf_type
                        if (ret < 0)
                                goto end;
                        break;
-               case BPF_LIST_NODE:
                case BPF_RB_NODE:
                        break;
                default:
@@ -4192,20 +4212,25 @@ int btf_check_and_fixup_fields(const struct btf *btf, 
struct btf_record *rec)
        int i;
 
        /* There are three types that signify ownership of some other type:
-        *  kptr_ref, bpf_list_head, bpf_rb_root.
+        *  kptr_ref, bpf_list_head/node, bpf_rb_root.
         * kptr_ref only supports storing kernel types, which can't store
         * references to program allocated local types.
         *
         * Hence we only need to ensure that bpf_{list_head,rb_root} ownership
         * does not form cycles.
         */
-       if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & (BPF_GRAPH_ROOT | 
BPF_UPTR)))
+       if (IS_ERR_OR_NULL(rec) || !(rec->field_mask &
+          (BPF_GRAPH_ROOT | BPF_GRAPH_NODE | BPF_UPTR)))
                return 0;
+
        for (i = 0; i < rec->cnt; i++) {
                struct btf_struct_meta *meta;
                const struct btf_type *t;
                u32 btf_id;
 
+               if (rec->fields[i].type & BPF_GRAPH_NODE)
+                       rec->fields[i].graph_root.value_rec = rec;
+
                if (rec->fields[i].type == BPF_UPTR) {
                        /* The uptr only supports pinning one page and cannot
                         * point to a kernel struct
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6eb6c82ed2ee..577af62a9f7a 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2459,6 +2459,22 @@ __bpf_kfunc struct bpf_list_node 
*bpf_list_pop_back(struct bpf_list_head *head)
        return __bpf_list_del(head, true);
 }
 
+__bpf_kfunc struct bpf_list_node *bpf_list_del(struct bpf_list_node *node)
+{
+       struct bpf_list_node_kern *knode = (struct bpf_list_node_kern *)node;
+
+       if (unlikely(!knode))
+               return NULL;
+
+       if (WARN_ON_ONCE(!READ_ONCE(knode->owner)))
+               return NULL;
+
+       list_del_init(&knode->list_head);
+       WRITE_ONCE(knode->owner, NULL);
+
+       return node;
+}
+
 __bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head)
 {
        struct list_head *h = (struct list_head *)head;
@@ -4545,6 +4561,7 @@ BTF_ID_FLAGS(func, bpf_list_push_front_impl)
 BTF_ID_FLAGS(func, bpf_list_push_back_impl)
 BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_list_del, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a3390190c26e..8a782772dd36 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1536,7 +1536,7 @@ static int acquire_reference(struct bpf_verifier_env 
*env, int insn_idx)
 }
 
 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum 
ref_state_type type,
-                             int id, void *ptr)
+                             int id, void *ptr, struct btf_record *lock_rec)
 {
        struct bpf_verifier_state *state = env->cur_state;
        struct bpf_reference_state *s;
@@ -1547,6 +1547,7 @@ static int acquire_lock_state(struct bpf_verifier_env 
*env, int insn_idx, enum r
        s->type = type;
        s->id = id;
        s->ptr = ptr;
+       s->lock_rec = lock_rec;
 
        state->active_locks++;
        state->active_lock_id = id;
@@ -1662,6 +1663,23 @@ static struct bpf_reference_state 
*find_lock_state(struct bpf_verifier_state *st
        return NULL;
 }
 
+static bool rec_has_list_matching_node_type(struct bpf_verifier_env *env,
+                                          const struct btf_record *rec,
+                                          const struct btf *node_btf, u32 
node_btf_id)
+{
+       u32 i;
+
+       for (i = 0; i < rec->cnt; i++) {
+               if (!(rec->fields[i].type & BPF_LIST_HEAD))
+                       continue;
+               if (btf_struct_ids_match(&env->log, node_btf, node_btf_id, 0,
+                                       rec->fields[i].graph_root.btf,
+                                       rec->fields[i].graph_root.value_btf_id, 
true))
+                       return true;
+       }
+       return false;
+}
+
 static void update_peak_states(struct bpf_verifier_env *env)
 {
        u32 cur_states;
@@ -8576,7 +8594,8 @@ static int process_spin_lock(struct bpf_verifier_env 
*env, int regno, int flags)
                        type = REF_TYPE_RES_LOCK;
                else
                        type = REF_TYPE_LOCK;
-               err = acquire_lock_state(env, env->insn_idx, type, reg->id, 
ptr);
+               err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr,
+                                        reg_btf_record(reg));
                if (err < 0) {
                        verbose(env, "Failed to acquire lock state\n");
                        return err;
@@ -12431,6 +12450,7 @@ enum special_kfunc_type {
        KF_bpf_list_push_back_impl,
        KF_bpf_list_pop_front,
        KF_bpf_list_pop_back,
+       KF_bpf_list_del,
        KF_bpf_list_front,
        KF_bpf_list_back,
        KF_bpf_cast_to_kern_ctx,
@@ -12491,6 +12511,7 @@ BTF_ID(func, bpf_list_push_front_impl)
 BTF_ID(func, bpf_list_push_back_impl)
 BTF_ID(func, bpf_list_pop_front)
 BTF_ID(func, bpf_list_pop_back)
+BTF_ID(func, bpf_list_del)
 BTF_ID(func, bpf_list_front)
 BTF_ID(func, bpf_list_back)
 BTF_ID(func, bpf_cast_to_kern_ctx)
@@ -12966,6 +12987,7 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
               btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
               btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
               btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
+              btf_id == special_kfunc_list[KF_bpf_list_del] ||
               btf_id == special_kfunc_list[KF_bpf_list_front] ||
               btf_id == special_kfunc_list[KF_bpf_list_back];
 }
@@ -13088,7 +13110,8 @@ static bool check_kfunc_is_graph_node_api(struct 
bpf_verifier_env *env,
        switch (node_field_type) {
        case BPF_LIST_NODE:
                ret = (kfunc_btf_id == 
special_kfunc_list[KF_bpf_list_push_front_impl] ||
-                      kfunc_btf_id == 
special_kfunc_list[KF_bpf_list_push_back_impl]);
+                      kfunc_btf_id == 
special_kfunc_list[KF_bpf_list_push_back_impl] ||
+                      kfunc_btf_id == special_kfunc_list[KF_bpf_list_del]);
                break;
        case BPF_RB_NODE:
                ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] 
||
@@ -13211,6 +13234,9 @@ __process_kf_arg_ptr_to_graph_node(struct 
bpf_verifier_env *env,
                return -EINVAL;
        }
 
+       if (!*node_field)
+               *node_field = field;
+
        field = *node_field;
 
        et = btf_type_by_id(field->graph_root.btf, 
field->graph_root.value_btf_id);
@@ -13237,6 +13263,28 @@ __process_kf_arg_ptr_to_graph_node(struct 
bpf_verifier_env *env,
                return -EINVAL;
        }
 
+       /* bpf_list_del: require list head's lock. Use refs[] REF_TYPE_LOCK_MASK
+        * only. At lock time we stored the locked object's btf_record in ref->
+        * lock_rec, so we can get the list value type from the ref directly.
+        */
+       if (node_field_type == BPF_LIST_NODE &&
+           meta->func_id == special_kfunc_list[KF_bpf_list_del]) {
+               struct bpf_verifier_state *cur = env->cur_state;
+
+               for (int i = 0; i < cur->acquired_refs; i++) {
+                       struct bpf_reference_state *s = &cur->refs[i];
+
+                       if (!(s->type & REF_TYPE_LOCK_MASK) || !s->lock_rec)
+                               continue;
+
+                       if (rec_has_list_matching_node_type(env, s->lock_rec,
+                                                       reg->btf, reg->btf_id))
+                               return 0;
+               }
+               verbose(env, "bpf_spin_lock must be held for bpf_list_del\n");
+               return -EINVAL;
+       }
+
        return 0;
 }
 
-- 
2.50.1 (Apple Git-155)


Reply via email to