On 1/22/25 19:05, Andrey Zhadchenko wrote:


On 1/22/25 11:51, Pavel Tikhomirov wrote:
+static struct cbt_info *blk_cbt_find(struct request_queue *q, __u8 *name)
+{
+    struct cbt_info *cbt;
+
+    list_for_each_entry(cbt, &q->cbt_list, list)
+        if (!memcmp(name, cbt->name, CBT_NAME_LENGTH))
+            return cbt;
+
+    return NULL;
+}
+
  static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
                     struct blk_user_cbt_snap_create __user *arg)
  {
@@ -382,8 +397,7 @@ static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
          return -EFAULT;
      mutex_lock(&cbt_mutex);
-    cbt = q->cbt;
-
+    cbt = blk_cbt_find(q, name);
      if (!cbt) {
          mutex_unlock(&cbt_mutex);
          return -ENOENT;
@@ -392,11 +406,6 @@ static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
      BUG_ON(!cbt->map);
      BUG_ON(!cbt->block_max);
-    if (!name || memcmp(name, cbt->name, sizeof(cbt->name))) {
-        mutex_unlock(&cbt_mutex);
-        return -EINVAL;
-    }
-
      if (cbt->snp_map) {
          mutex_unlock(&cbt_mutex);
          return -EBUSY;
...
@@ -667,35 +665,46 @@ static void cbt_release_callback(struct rcu_head *head)
      kfree(cbt);
  }
-void blk_cbt_release(struct request_queue *q)
+static void blk_cbt_del(struct cbt_info *cbt)
  {
-    struct cbt_info *cbt;
-    int in_use = 0;
      unsigned long flags;
+    int in_use;
-    cbt = q->cbt;
-    if (!cbt)
-        return;
      spin_lock_irqsave(&cbt->lock, flags);
      set_bit(CBT_DEAD, &cbt->flags);
-    rcu_assign_pointer(q->cbt, NULL);
+    list_del_rcu(&cbt->list);
      in_use = cbt->count;
      spin_unlock_irqrestore(&cbt->lock, flags);
      if (!in_use)
          call_rcu(&cbt->rcu, &cbt_release_callback);
  }

Why we don't take rcu_read_lock around list iteration in blk_cbt_find? If we don't take rcu_read_lock there, the cbt returned from blk_cbt_find can be already freed.

rcu is not taken on a path where mutex is held. cbt_mutex makes user operations exclusive, so there won't be any race. Maybe I should add a comment then?


There is no mutex in cbt_page_alloc path also there is no mutex on removal path.

We definitely need a good explanation of why rcu usage is correct. If we partially protect the list with a lock, the lock should be on the both ends of critical section.


I see other places of _rcu walk without rcu_read_lock, e.g.: cbt_page_alloc (!in_rcu) case, blk_cbt_update_size.

Also list can be broken probably if blk_cbt_del runs concurrently with itself (blk_cbt_release vs cbt_ioc_stop). I think the whole loop in blk_cbt_release should be protected against concurrent cbt_ioc_stop (that may be true as blk_cbt_release probably runs at the very end).

I don't think blk_cbt_release can be run concurrently win any of blk_cbt functions, because they are operation on a file descriptor. I am sure that kernel won't call blk_cbt_release() when some fd still refers to the device

--
Best regards, Tikhomirov Pavel
Senior Software Developer, Virtuozzo.

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to