* Peter Xu (pet...@redhat.com) wrote: > Accessing from_dst_file is potentially racy in current code base like below: > > if (s->from_dst_file) > do_something(s->from_dst_file); > > Because from_dst_file can be reset right after the check in another > thread (rp_thread). One example is migrate_fd_cancel(). > > Use the same qemu_file_lock to protect it too, just like to_dst_file. > > When it's safe to access without lock, comment it. > > There's one special reference in migration_thread() that can be replaced by > the newly introduced rp_thread_created flag. > > Reported-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > Signed-off-by: Peter Xu <pet...@redhat.com>
Yep, with Eric's comments Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > --- > migration/migration.c | 32 +++++++++++++++++++++++++------- > migration/migration.h | 8 +++++--- > migration/ram.c | 1 + > 3 files changed, 31 insertions(+), 10 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index 21b94f75a3..fa70400f98 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -1879,10 +1879,12 @@ static void migrate_fd_cancel(MigrationState *s) > QEMUFile *f = migrate_get_current()->to_dst_file; > trace_migrate_fd_cancel(); > > + qemu_mutex_lock(&s->qemu_file_lock); > if (s->rp_state.from_dst_file) { > /* shutdown the rp socket, so causing the rp thread to shutdown */ > qemu_file_shutdown(s->rp_state.from_dst_file); > } > + qemu_mutex_unlock(&s->qemu_file_lock); > > do { > old_state = s->state; > @@ -2686,6 +2688,22 @@ static int migrate_handle_rp_resume_ack(MigrationState > *s, uint32_t value) > return 0; > } > > +/* Release ms->rp_state.from_dst_file in a safe way */ > +static void migration_release_from_dst_file(MigrationState *ms) > +{ > + QEMUFile *file = ms->rp_state.from_dst_file; > + > + qemu_mutex_lock(&ms->qemu_file_lock); > + /* > + * Reset the from_dst_file pointer first before releasing it, as we can't > + * block within lock section > + */ > + ms->rp_state.from_dst_file = NULL; > + qemu_mutex_unlock(&ms->qemu_file_lock); > + > + qemu_fclose(file); > +} > + > /* > * Handles messages sent on the return path towards the source VM > * > @@ -2827,11 +2845,13 @@ out: > * Maybe there is something we can do: it looks like a > * network down issue, and we pause for a recovery. > */ > - qemu_fclose(rp); > - ms->rp_state.from_dst_file = NULL; > + migration_release_from_dst_file(ms); > rp = NULL; > if (postcopy_pause_return_path_thread(ms)) { > - /* Reload rp, reset the rest */ > + /* > + * Reload rp, reset the rest. Referencing it is save since > + * it's reset only by us above, or when migration completes > + */ > rp = ms->rp_state.from_dst_file; > ms->rp_state.error = false; > goto retry; > @@ -2843,8 +2863,7 @@ out: > } > > trace_source_return_path_thread_end(); > - ms->rp_state.from_dst_file = NULL; > - qemu_fclose(rp); > + migration_release_from_dst_file(ms); > rcu_unregister_thread(); > return NULL; > } > @@ -2852,7 +2871,6 @@ out: > static int open_return_path_on_source(MigrationState *ms, > bool create_thread) > { > - > ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); > if (!ms->rp_state.from_dst_file) { > return -1; > @@ -3746,7 +3764,7 @@ static void *migration_thread(void *opaque) > * If we opened the return path, we need to make sure dst has it > * opened as well. > */ > - if (s->rp_state.from_dst_file) { > + if (s->rp_state.rp_thread_created) { > /* Now tell the dest that it should open its end so it can reply */ > qemu_savevm_send_open_return_path(s->to_dst_file); > > diff --git a/migration/migration.h b/migration/migration.h > index c302879fad..7a5aa8c2fd 100644 > --- a/migration/migration.h > +++ b/migration/migration.h > @@ -154,12 +154,13 @@ struct MigrationState { > QemuThread thread; > QEMUBH *vm_start_bh; > QEMUBH *cleanup_bh; > + /* Protected by qemu_file_lock */ > QEMUFile *to_dst_file; > QIOChannelBuffer *bioc; > /* > - * Protects to_dst_file pointer. We need to make sure we won't > - * yield or hang during the critical section, since this lock will > - * be used in OOB command handler. > + * Protects to_dst_file/from_dst_file pointers. We need to make sure we > + * won't yield or hang during the critical section, since this lock will > be > + * used in OOB command handler. > */ > QemuMutex qemu_file_lock; > > @@ -192,6 +193,7 @@ struct MigrationState { > > /* State related to return path */ > struct { > + /* Protected by qemu_file_lock */ > QEMUFile *from_dst_file; > QemuThread rp_thread; > bool error; > diff --git a/migration/ram.c b/migration/ram.c > index b5fc454b2f..f728f5072f 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -4012,6 +4012,7 @@ static void > ram_dirty_bitmap_reload_notify(MigrationState *s) > int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) > { > int ret = -EINVAL; > + /* from_dst_file is always valid because we're within rp_thread */ > QEMUFile *file = s->rp_state.from_dst_file; > unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS; > uint64_t local_size = DIV_ROUND_UP(nbits, 8); > -- > 2.31.1 > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK