ping
Paolo - were you going to address these questions?
Or did I miss it?
On Tue, Feb 16, 2016 at 03:17:11PM +0800, Fam Zheng wrote:
> On Sun, 02/14 18:17, Paolo Bonzini wrote:
> > This is needed because dataplane will run during block migration as well.
> >
> > The block device migration code is quite liberal in taking the iothread
> > mutex. For simplicity, keep it the same way, even though one could
> > actually choose between the BQL (for regular BlockDriverStates) and
> > the AioContext (for dataplane BlockDriverStates). When the block layer
> > is made fully thread safe, aio_context_acquire shall go away altogether.
> >
> > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
> > ---
> > migration/block.c | 61
> > ++++++++++++++++++++++++++++++++++++++++++++-----------
> > 1 file changed, 49 insertions(+), 12 deletions(-)
> >
> > diff --git a/migration/block.c b/migration/block.c
> > index a444058..6dd2327 100644
> > --- a/migration/block.c
> > +++ b/migration/block.c
> > @@ -60,9 +60,15 @@ typedef struct BlkMigDevState {
> > int64_t cur_sector;
> > int64_t cur_dirty;
> >
> > - /* Protected by block migration lock. */
> > + /* Data in the aio_bitmap is protected by block migration lock.
> > + * Allocation and free happen during setup and cleanup respectively.
> > + */
> > unsigned long *aio_bitmap;
> > +
> > + /* Protected by block migration lock. */
> > int64_t completed_sectors;
> > +
> > + /* Protected by iothread lock / AioContext. */
> > BdrvDirtyBitmap *dirty_bitmap;
> > Error *blocker;
> > } BlkMigDevState;
> > @@ -100,7 +106,7 @@ typedef struct BlkMigState {
> > int prev_progress;
> > int bulk_completed;
> >
> > - /* Lock must be taken _inside_ the iothread lock. */
> > + /* Lock must be taken _inside_ the iothread lock and any AioContexts.
> > */
> > QemuMutex lock;
> > } BlkMigState;
> >
> > @@ -264,11 +270,13 @@ static int mig_save_device_bulk(QEMUFile *f,
> > BlkMigDevState *bmds)
> >
> > if (bmds->shared_base) {
> > qemu_mutex_lock_iothread();
> > + aio_context_acquire(bdrv_get_aio_context(bs));
> > while (cur_sector < total_sectors &&
> > !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
> > &nr_sectors)) {
> > cur_sector += nr_sectors;
> > }
> > + aio_context_release(bdrv_get_aio_context(bs));
> > qemu_mutex_unlock_iothread();
> > }
> >
> > @@ -302,11 +310,21 @@ static int mig_save_device_bulk(QEMUFile *f,
> > BlkMigDevState *bmds)
> > block_mig_state.submitted++;
> > blk_mig_unlock();
> >
> > + /* We do not know if bs is under the main thread (and thus does
> > + * not acquire the AioContext when doing AIO) or rather under
> > + * dataplane. Thus acquire both the iothread mutex and the
> > + * AioContext.
> > + *
> > + * This is ugly and will disappear when we make bdrv_* thread-safe,
> > + * without the need to acquire the AioContext.
> > + */
> > qemu_mutex_lock_iothread();
> > + aio_context_acquire(bdrv_get_aio_context(bmds->bs));
> > blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
> > nr_sectors, blk_mig_read_cb, blk);
> >
> > bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors);
> > + aio_context_release(bdrv_get_aio_context(bmds->bs));
> > qemu_mutex_unlock_iothread();
> >
> > bmds->cur_sector = cur_sector + nr_sectors;
> > @@ -321,8 +339,9 @@ static int set_dirty_tracking(void)
> > int ret;
> >
> > QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> > + /* Creating/dropping dirty bitmaps only requires the big QEMU
> > lock. */
>
> Why? I don't think it is safe today. The BDS state is mutated and it can race
> with bdrv_set_dirty() etc. (Also the refresh_total_sectors in bdrv_nb_sectors
> can even do read/write, no?)
>
> > bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
> > NULL, NULL);
> > if (!bmds->dirty_bitmap) {
> > ret = -errno;
> > goto fail;
> > @@ -332,11 +352,14 @@ static int set_dirty_tracking(void)
> > return ret;
> > }
> >
> > +/* Called with iothread lock taken. */
> > +
> > static void unset_dirty_tracking(void)
> > {
> > BlkMigDevState *bmds;
> >
> > QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> > + /* Creating/dropping dirty bitmaps only requires the big QEMU
> > lock. */
>
> Ditto.
>
> > bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
> > }
> > }
> > @@ -597,21 +627,28 @@ static void block_migration_cleanup(void *opaque)
> > {
> > BlkMigDevState *bmds;
> > BlkMigBlock *blk;
> > + AioContext *ctx;
> >
> > bdrv_drain_all();
> >
> > unset_dirty_tracking();
> >
> > - blk_mig_lock();
>
> Why is it okay to skip the blk_mig_lock() for block_mig_state.bmds_list?
>
> > while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
> > QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
> > bdrv_op_unblock_all(bmds->bs, bmds->blocker);
> > error_free(bmds->blocker);
> > +
> > + /* Save ctx, because bmds->bs can disappear during bdrv_unref. */
> > + ctx = bdrv_get_aio_context(bmds->bs);
> > + aio_context_acquire(ctx);
> > bdrv_unref(bmds->bs);
> > + aio_context_release(ctx);
> > +
> > g_free(bmds->aio_bitmap);
> > g_free(bmds);
> > }
> >
> > + blk_mig_lock();
> > while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
> > QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
> > g_free(blk->buf);