nice catch > From: "Dr. David Alan Gilbert" <dgilb...@redhat.com> > > Markus Armbruster spotted that the XBZRLE.lock might get initalised > multiple times in the case of a second attempted migration, and > that's undefined behaviour for pthread_mutex_init. > > This patch adds a flag to stop re-initialisation - finding somewhere > to cleanly destroy it where we can guarantee isn't happening > at the same place we might want to take the lock is tricky. > > It also adds comments to explain the lock use more. > > (I considered rewriting this lockless but can't justify it given > the simplicity of the fix). > > Signed-off-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > --- > arch_init.c | 30 ++++++++++++++++++++++++++---- > 1 file changed, 26 insertions(+), 4 deletions(-) > > diff --git a/arch_init.c b/arch_init.c > index 60c975d..16474b5 100644 > --- a/arch_init.c > +++ b/arch_init.c > @@ -167,10 +167,13 @@ static struct { > /* Cache for XBZRLE, Protected by lock. */ > PageCache *cache; > QemuMutex lock;
QemuMutex *lock; > + bool lock_init; /* True once we've init'd lock */ > } XBZRLE = { > .encoded_buf = NULL, > .current_buf = NULL, > .cache = NULL, .lock = NULL, > + /* .lock is initialised in ram_save_setup */ > + .lock_init = false > }; maybe it is better that we malloc the lock in ram_save_setup dynamic, and free it in migration_end. > /* buffer used for XBZRLE decoding */ > static uint8_t *xbzrle_decoded_buf; > @@ -187,6 +190,11 @@ static void XBZRLE_cache_unlock(void) > qemu_mutex_unlock(&XBZRLE.lock); > } > > +/* called from qmp_migrate_set_cache_size in main thread, possibly while > + * a migration is in progress. > + * A running migration maybe using the cache and might finish during this > + * call, hence changes to the cache are protected by XBZRLE.lock(). > + */ > int64_t xbzrle_cache_resize(int64_t new_size) > { > PageCache *new_cache, *cache_to_free; > @@ -195,9 +203,12 @@ int64_t xbzrle_cache_resize(int64_t new_size) > return -1; > } > > - /* no need to lock, the current thread holds qemu big lock */ > + /* The current thread holds qemu big lock, and we hold it while creating > + * the cache in ram_save_setup, thus it's safe to test if the > + * cache exists yet without it's own lock (which might not have been > + * init'd yet) > + */ > if (XBZRLE.cache != NULL) { > - /* check XBZRLE.cache again later */ > if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { > return pow2floor(new_size); > } > @@ -209,7 +220,10 @@ int64_t xbzrle_cache_resize(int64_t new_size) > } > > XBZRLE_cache_lock(); > - /* the XBZRLE.cache may have be destroyed, check it again */ > + /* the migration might have finished between the check above and us > + * taking the lock, causing XBZRLE.cache to be destroyed > + * check it again > + */ > if (XBZRLE.cache != NULL) { > cache_to_free = XBZRLE.cache; > XBZRLE.cache = new_cache; > @@ -744,7 +758,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque) > DPRINTF("Error creating cache\n"); > return -1; > } > - qemu_mutex_init(&XBZRLE.lock); we malloc the lock and init it. Then free it in migration_end. It is safe because migration_end holds qemu big lock. > + /* mutex's can't be reinit'd without destroying them > + * and we've not got a good place to destroy it that > + * we can guarantee isn't being called when we might want > + * to hold the lock. > + */ > + if (!XBZRLE.lock_init) { > + XBZRLE.lock_init = true; > + qemu_mutex_init(&XBZRLE.lock); > + } > qemu_mutex_unlock_iothread(); > > /* We prefer not to abort if there is no memory */ > -- > 1.8.5.3 > >