On 1/22/25 19:05, Andrey Zhadchenko wrote:
On 1/22/25 11:51, Pavel Tikhomirov wrote:
+static struct cbt_info *blk_cbt_find(struct request_queue *q, __u8
*name)
+{
+ struct cbt_info *cbt;
+
+ list_for_each_entry(cbt, &q->cbt_list, list)
+ if (!memcmp(name, cbt->name, CBT_NAME_LENGTH))
+ return cbt;
+
+ return NULL;
+}
+
static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
struct blk_user_cbt_snap_create __user *arg)
{
@@ -382,8 +397,7 @@ static int blk_cbt_snap_create(struct
request_queue *q, __u8 *name,
return -EFAULT;
mutex_lock(&cbt_mutex);
- cbt = q->cbt;
-
+ cbt = blk_cbt_find(q, name);
if (!cbt) {
mutex_unlock(&cbt_mutex);
return -ENOENT;
@@ -392,11 +406,6 @@ static int blk_cbt_snap_create(struct
request_queue *q, __u8 *name,
BUG_ON(!cbt->map);
BUG_ON(!cbt->block_max);
- if (!name || memcmp(name, cbt->name, sizeof(cbt->name))) {
- mutex_unlock(&cbt_mutex);
- return -EINVAL;
- }
-
if (cbt->snp_map) {
mutex_unlock(&cbt_mutex);
return -EBUSY;
...
@@ -667,35 +665,46 @@ static void cbt_release_callback(struct
rcu_head *head)
kfree(cbt);
}
-void blk_cbt_release(struct request_queue *q)
+static void blk_cbt_del(struct cbt_info *cbt)
{
- struct cbt_info *cbt;
- int in_use = 0;
unsigned long flags;
+ int in_use;
- cbt = q->cbt;
- if (!cbt)
- return;
spin_lock_irqsave(&cbt->lock, flags);
set_bit(CBT_DEAD, &cbt->flags);
- rcu_assign_pointer(q->cbt, NULL);
+ list_del_rcu(&cbt->list);
in_use = cbt->count;
spin_unlock_irqrestore(&cbt->lock, flags);
if (!in_use)
call_rcu(&cbt->rcu, &cbt_release_callback);
}
Why we don't take rcu_read_lock around list iteration in blk_cbt_find?
If we don't take rcu_read_lock there, the cbt returned from
blk_cbt_find can be already freed.
rcu is not taken on a path where mutex is held. cbt_mutex makes user
operations exclusive, so there won't be any race. Maybe I should add a
comment then?
There is no mutex in cbt_page_alloc path also there is no mutex on
removal path.
We definitely need a good explanation of why rcu usage is correct. If we
partially protect the list with a lock, the lock should be on the both
ends of critical section.
I see other places of _rcu walk without rcu_read_lock, e.g.:
cbt_page_alloc (!in_rcu) case, blk_cbt_update_size.
Also list can be broken probably if blk_cbt_del runs concurrently with
itself (blk_cbt_release vs cbt_ioc_stop). I think the whole loop in
blk_cbt_release should be protected against concurrent cbt_ioc_stop
(that may be true as blk_cbt_release probably runs at the very end).
I don't think blk_cbt_release can be run concurrently win any of blk_cbt
functions, because they are operation on a file descriptor. I am sure
that kernel won't call blk_cbt_release() when some fd still refers to
the device
--
Best regards, Tikhomirov Pavel
Senior Software Developer, Virtuozzo.
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel