On Fri, Mar 1, 2024 at 10:38 AM Yu Kuai <[email protected]> wrote:
>
> Hi,
>
> 在 2024/02/29 23:49, Xiao Ni 写道:
> > This reverts commit ad39c08186f8a0f221337985036ba86731d6aafe.
> >
> > Function stop_sync_thread only wakes up sync task. It also needs to
> > wake up sync thread. This problem will be fixed in the following
> > patch.
>
> I don't think so, unlike mddev->thread, sync_thread will only be
> executed once and must be executed each time it's registered, and caller
> must make sure to wake up registered sync_thread.
Hi Kuai
I'll modify the comments. But it should be right to
wake_up(mddev->sync_thread) in function stop_sync_thread too? You gave
the same patch yesterday too. I know the caller should wake up sync
thread too.
"However, I think the one to register sync_thread is responsible to
wake it up." I put your comments here. If I understand correctly, we
can do something like this?
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7937,6 +7937,7 @@ static int raid5_run(struct mddev *mddev)
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
rcu_assign_pointer(mddev->sync_thread,
md_register_thread(md_do_sync, mddev, "reshape"));
+ md_wakeup_thread(mddev->sync_thread);
if (!mddev->sync_thread)
goto abort;
}
And at first, I didn't revert
ad39c08186f8a0f221337985036ba86731d6aafe. But with my patch set, it
can cause failure in lvm2 test suit. And the patch you gave yesterday
is part of my patch01, so I revert it. Are you good if I change the
comments and with the modification (wake up sync thread after
registering reshape)?
Best Regards
Xiao
>
> Thanks,
> Kuai
> >
> > Signed-off-by: Xiao Ni <[email protected]>
> > ---
> > drivers/md/md.c | 5 +----
> > drivers/md/raid10.c | 16 ++++++++++++++--
> > drivers/md/raid5.c | 29 +++++++++++++++++++++++++++--
> > 3 files changed, 42 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index 9e41a9aaba8b..db4743ba7f6c 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -9376,7 +9376,6 @@ static void md_start_sync(struct work_struct *ws)
> > struct mddev *mddev = container_of(ws, struct mddev, sync_work);
> > int spares = 0;
> > bool suspend = false;
> > - char *name;
> >
> > /*
> > * If reshape is still in progress, spares won't be added or removed
> > @@ -9414,10 +9413,8 @@ static void md_start_sync(struct work_struct *ws)
> > if (spares)
> > md_bitmap_write_all(mddev->bitmap);
> >
> > - name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ?
> > - "reshape" : "resync";
> > rcu_assign_pointer(mddev->sync_thread,
> > - md_register_thread(md_do_sync, mddev, name));
> > + md_register_thread(md_do_sync, mddev, "resync"));
> > if (!mddev->sync_thread) {
> > pr_warn("%s: could not start resync thread...\n",
> > mdname(mddev));
> > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> > index a5f8419e2df1..7412066ea22c 100644
> > --- a/drivers/md/raid10.c
> > +++ b/drivers/md/raid10.c
> > @@ -4175,7 +4175,11 @@ static int raid10_run(struct mddev *mddev)
> > clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
> > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
> > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
> > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
> > + rcu_assign_pointer(mddev->sync_thread,
> > + md_register_thread(md_do_sync, mddev, "reshape"));
> > + if (!mddev->sync_thread)
> > + goto out_free_conf;
> > }
> >
> > return 0;
> > @@ -4569,8 +4573,16 @@ static int raid10_start_reshape(struct mddev *mddev)
> > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
> > clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
> > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
> > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
> > +
> > + rcu_assign_pointer(mddev->sync_thread,
> > + md_register_thread(md_do_sync, mddev, "reshape"));
> > + if (!mddev->sync_thread) {
> > + ret = -EAGAIN;
> > + goto abort;
> > + }
> > conf->reshape_checkpoint = jiffies;
> > + md_wakeup_thread(mddev->sync_thread);
> > md_new_event();
> > return 0;
> >
> > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> > index 6a7a32f7fb91..8497880135ee 100644
> > --- a/drivers/md/raid5.c
> > +++ b/drivers/md/raid5.c
> > @@ -7936,7 +7936,11 @@ static int raid5_run(struct mddev *mddev)
> > clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
> > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
> > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
> > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
> > + rcu_assign_pointer(mddev->sync_thread,
> > + md_register_thread(md_do_sync, mddev, "reshape"));
> > + if (!mddev->sync_thread)
> > + goto abort;
> > }
> >
> > /* Ok, everything is just fine now */
> > @@ -8502,8 +8506,29 @@ static int raid5_start_reshape(struct mddev *mddev)
> > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
> > clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
> > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
> > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
> > + rcu_assign_pointer(mddev->sync_thread,
> > + md_register_thread(md_do_sync, mddev, "reshape"));
> > + if (!mddev->sync_thread) {
> > + mddev->recovery = 0;
> > + spin_lock_irq(&conf->device_lock);
> > + write_seqcount_begin(&conf->gen_lock);
> > + mddev->raid_disks = conf->raid_disks =
> > conf->previous_raid_disks;
> > + mddev->new_chunk_sectors =
> > + conf->chunk_sectors = conf->prev_chunk_sectors;
> > + mddev->new_layout = conf->algorithm = conf->prev_algo;
> > + rdev_for_each(rdev, mddev)
> > + rdev->new_data_offset = rdev->data_offset;
> > + smp_wmb();
> > + conf->generation--;
> > + conf->reshape_progress = MaxSector;
> > + mddev->reshape_position = MaxSector;
> > + write_seqcount_end(&conf->gen_lock);
> > + spin_unlock_irq(&conf->device_lock);
> > + return -EAGAIN;
> > + }
> > conf->reshape_checkpoint = jiffies;
> > + md_wakeup_thread(mddev->sync_thread);
> > md_new_event();
> > return 0;
> > }
> >
>