On Mon, Apr 10, 2017 at 9:52 PM, Stefan Hajnoczi <stefa...@gmail.com> wrote: > On Sat, Apr 08, 2017 at 09:17:58PM +0800, 858585 jemmy wrote: >> On Fri, Apr 7, 2017 at 7:34 PM, Stefan Hajnoczi <stefa...@redhat.com> wrote: >> > On Fri, Apr 07, 2017 at 09:30:33AM +0800, 858585 jemmy wrote: >> >> On Thu, Apr 6, 2017 at 10:02 PM, Stefan Hajnoczi <stefa...@redhat.com> >> >> wrote: >> >> > On Wed, Apr 05, 2017 at 05:27:58PM +0800, jemmy858...@gmail.com wrote: >> >> > >> >> > A proper solution is to refactor the synchronous code to make it >> >> > asynchronous. This might require invoking the system call from a >> >> > thread pool worker. >> >> > >> >> >> >> yes, i agree with you, but this is a big change. >> >> I will try to find how to optimize this code, maybe need a long time. >> >> >> >> this patch is not a perfect solution, but can alleviate the problem. >> > >> > Let's try to understand the problem fully first. >> > >> >> when migrate the vm with high speed, i find vnc response slowly sometime. >> not only vnc response slowly, virsh console aslo response slowly sometime. >> and the guest os block io performance is also reduce. >> >> the bug can be reproduce by this command: >> virsh migrate-setspeed 165cf436-312f-47e7-90f2-f8aa63f34893 900 >> virsh migrate --live 165cf436-312f-47e7-90f2-f8aa63f34893 >> --copy-storage-inc qemu+ssh://10.59.163.38/system >> >> and --copy-storage-all have no problem. >> virsh migrate --live 165cf436-312f-47e7-90f2-f8aa63f34893 >> --copy-storage-all qemu+ssh://10.59.163.38/system >> >> compare the difference between --copy-storage-inc and >> --copy-storage-all. i find out the reason is >> mig_save_device_bulk invoke bdrv_is_allocated, but bdrv_is_allocated >> is synchronous and maybe wait >> for a long time. >> >> i write this code to measure the time used by brdrv_is_allocated() >> >> 279 static int max_time = 0; >> 280 int tmp; >> >> 288 clock_gettime(CLOCK_MONOTONIC_RAW, &ts1); >> 289 ret = bdrv_is_allocated(blk_bs(bb), cur_sector, >> 290 MAX_IS_ALLOCATED_SEARCH, >> &nr_sectors); >> 291 clock_gettime(CLOCK_MONOTONIC_RAW, &ts2); >> 292 >> 293 >> 294 tmp = (ts2.tv_sec - ts1.tv_sec)*1000000000L >> 295 + (ts2.tv_nsec - ts1.tv_nsec); >> 296 if (tmp > max_time) { >> 297 max_time=tmp; >> 298 fprintf(stderr, "max_time is %d\n", max_time); >> 299 } >> >> the test result is below: >> >> max_time is 37014 >> max_time is 1075534 >> max_time is 17180913 >> max_time is 28586762 >> max_time is 49563584 >> max_time is 103085447 >> max_time is 110836833 >> max_time is 120331438 >> >> bdrv_is_allocated is called after qemu_mutex_lock_iothread. >> and the main thread is also call qemu_mutex_lock_iothread. >> so cause the main thread maybe wait for a long time. >> >> if (bmds->shared_base) { >> qemu_mutex_lock_iothread(); >> aio_context_acquire(blk_get_aio_context(bb)); >> /* Skip unallocated sectors; intentionally treats failure as >> * an allocated sector */ >> while (cur_sector < total_sectors && >> !bdrv_is_allocated(blk_bs(bb), cur_sector, >> MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) { >> cur_sector += nr_sectors; >> } >> aio_context_release(blk_get_aio_context(bb)); >> qemu_mutex_unlock_iothread(); >> } >> >> #0 0x00007f107322f264 in __lll_lock_wait () from /lib64/libpthread.so.0 >> #1 0x00007f107322a508 in _L_lock_854 () from /lib64/libpthread.so.0 >> #2 0x00007f107322a3d7 in pthread_mutex_lock () from /lib64/libpthread.so.0 >> #3 0x0000000000949ecb in qemu_mutex_lock (mutex=0xfc51a0) at >> util/qemu-thread-posix.c:60 >> #4 0x0000000000459e58 in qemu_mutex_lock_iothread () at >> /root/qemu/cpus.c:1516 >> #5 0x0000000000945322 in os_host_main_loop_wait (timeout=28911939) at >> util/main-loop.c:258 >> #6 0x00000000009453f2 in main_loop_wait (nonblocking=0) at >> util/main-loop.c:517 >> #7 0x00000000005c76b4 in main_loop () at vl.c:1898 >> #8 0x00000000005ceb77 in main (argc=49, argv=0x7fff921182b8, >> envp=0x7fff92118448) at vl.c:4709 > > The following patch moves bdrv_is_allocated() into bb's AioContext. It > will execute without blocking other I/O activity. > > Compile-tested only. i will try this patch.
> > diff --git a/migration/block.c b/migration/block.c > index 7734ff7..a5572a4 100644 > --- a/migration/block.c > +++ b/migration/block.c > @@ -263,6 +263,29 @@ static void blk_mig_read_cb(void *opaque, int ret) > blk_mig_unlock(); > } > > +typedef struct { > + int64_t *total_sectors; > + int64_t *cur_sector; > + BlockBackend *bb; > + QemuEvent event; > +} MigNextAllocatedClusterData; > + > +static void coroutine_fn mig_next_allocated_cluster(void *opaque) > +{ > + MigNextAllocatedClusterData *data = opaque; > + int nr_sectors; > + > + /* Skip unallocated sectors; intentionally treats failure as > + * an allocated sector */ > + while (*data->cur_sector < *data->total_sectors && > + !bdrv_is_allocated(blk_bs(data->bb), *data->cur_sector, > + MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) { > + *data->cur_sector += nr_sectors; > + } > + > + qemu_event_set(&data->event); > +} > + > /* Called with no lock taken. */ > > static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) > @@ -274,17 +297,27 @@ static int mig_save_device_bulk(QEMUFile *f, > BlkMigDevState *bmds) > int nr_sectors; > > if (bmds->shared_base) { > + /* Searching for the next allocated cluster can block. Do it in a > + * coroutine inside bb's AioContext. That way we don't need to hold > + * the global mutex while blocked. > + */ > + AioContext *bb_ctx; > + Coroutine *co; > + MigNextAllocatedClusterData data = { > + .cur_sector = &cur_sector, > + .total_sectors = &total_sectors, > + .bb = bb, > + }; > + > + qemu_event_init(&data.event, false); > + > qemu_mutex_lock_iothread(); > - aio_context_acquire(blk_get_aio_context(bb)); > - /* Skip unallocated sectors; intentionally treats failure as > - * an allocated sector */ > - while (cur_sector < total_sectors && > - !bdrv_is_allocated(blk_bs(bb), cur_sector, > - MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) { > - cur_sector += nr_sectors; > - } > - aio_context_release(blk_get_aio_context(bb)); > + bb_ctx = blk_get_aio_context(bb); > qemu_mutex_unlock_iothread(); > + > + co = qemu_coroutine_create(mig_next_allocated_cluster, &data); > + aio_co_schedule(bb_ctx, co); > + qemu_event_wait(&data.event); > } > > if (cur_sector >= total_sectors) {