On Wed, Aug 31, 2011 at 10:35:59AM -0400, Stefan Berger wrote: > This patch introduces file locking via fcntl() for the block layer so that > concurrent access to files shared by 2 Qemu instances, for example via NFS, > can be serialized. This feature is useful primarily during initial phases of > VM migration where the target machine's TIS driver validates the block > storage (and in a later patch checks for missing AES keys) and terminates > Qemu if the storage is found to be faulty. This then allows migration to > be gracefully terminated and Qemu continues running on the source machine. > > Support for win32 is based on win32 API and has been lightly tested with a > standalone test program locking shared storage from two different machines. > > To enable locking a file multiple times, a counter is used. Actual locking > happens the very first time and unlocking happens when the counter is zero. > > v7: > - fixed compilation error in win32 part > > Signed-off-by: Stefan Berger <stef...@linux.vnet.ibm.com>
Generally, what all other devices do is perform validation as the last step in migration when device state is restored. On failure, management can decide what to do: retry migration or restart on source. Why is TPM special and needs to be treated differently? > --- > > --- > block.c | 41 +++++++++++++++++++++++++++++++++++ > block.h | 8 ++++++ > block/raw-posix.c | 63 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > block/raw-win32.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ > block_int.h | 4 +++ > 5 files changed, 168 insertions(+) > > Index: qemu-git/block.c > =================================================================== > --- qemu-git.orig/block.c > +++ qemu-git/block.c > @@ -521,6 +521,8 @@ static int bdrv_open_common(BlockDriverS > goto free_and_fail; > } > > + drv->num_locks = 0; > + > bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR); > > ret = refresh_total_sectors(bs, bs->total_sectors); > @@ -1316,6 +1318,45 @@ void bdrv_get_geometry(BlockDriverState > *nb_sectors_ptr = length; > } > > +/* file locking */ > +static int bdrv_lock_common(BlockDriverState *bs, BDRVLockType lock_type) > +{ > + BlockDriver *drv = bs->drv; > + > + if (!drv) { > + return -ENOMEDIUM; > + } > + > + if (bs->file) { > + drv = bs->file->drv; > + if (drv->bdrv_lock) { > + return drv->bdrv_lock(bs->file, lock_type); > + } > + } > + > + if (drv->bdrv_lock) { > + return drv->bdrv_lock(bs, lock_type); > + } > + > + return -ENOTSUP; > +} > + > + > +int bdrv_lock(BlockDriverState *bs) > +{ > + if (bdrv_is_read_only(bs)) { > + return bdrv_lock_common(bs, BDRV_F_RDLCK); > + } > + > + return bdrv_lock_common(bs, BDRV_F_WRLCK); > +} > + > +void bdrv_unlock(BlockDriverState *bs) > +{ > + bdrv_lock_common(bs, BDRV_F_UNLCK); > +} > + > + > struct partition { > uint8_t boot_ind; /* 0x80 - active */ > uint8_t head; /* starting head */ > Index: qemu-git/block.h > =================================================================== > --- qemu-git.orig/block.h > +++ qemu-git/block.h > @@ -43,6 +43,12 @@ typedef struct QEMUSnapshotInfo { > #define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) > > typedef enum { > + BDRV_F_UNLCK, > + BDRV_F_RDLCK, > + BDRV_F_WRLCK, > +} BDRVLockType; > + > +typedef enum { > BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC, > BLOCK_ERR_STOP_ANY > } BlockErrorAction; > @@ -100,6 +106,8 @@ int bdrv_commit(BlockDriverState *bs); > void bdrv_commit_all(void); > int bdrv_change_backing_file(BlockDriverState *bs, > const char *backing_file, const char *backing_fmt); > +int bdrv_lock(BlockDriverState *bs); > +void bdrv_unlock(BlockDriverState *bs); > void bdrv_register(BlockDriver *bdrv); > > > Index: qemu-git/block/raw-posix.c > =================================================================== > --- qemu-git.orig/block/raw-posix.c > +++ qemu-git/block/raw-posix.c > @@ -803,6 +803,67 @@ static int64_t raw_get_allocated_file_si > return (int64_t)st.st_blocks * 512; > } > > +static int raw_lock(BlockDriverState *bs, BDRVLockType lock_type) > +{ > + BlockDriver *drv = bs->drv; > + BDRVRawState *s = bs->opaque; > + struct flock flock = { > + .l_whence = SEEK_SET, > + .l_start = 0, > + .l_len = 0, > + }; > + int n; > + > + switch (lock_type) { > + case BDRV_F_RDLCK: > + case BDRV_F_WRLCK: > + if (drv->num_locks) { > + drv->num_locks++; > + return 0; > + } > + flock.l_type = (lock_type == BDRV_F_RDLCK) ? F_RDLCK : F_WRLCK; > + break; > + > + case BDRV_F_UNLCK: > + if (--drv->num_locks > 0) { > + return 0; > + } > + > + assert(drv->num_locks == 0); > + > + flock.l_type = F_UNLCK; > + break; > + > + default: > + return -EINVAL; > + } > + > + while (1) { > + n = fcntl(s->fd, F_SETLKW, &flock); > + if (n < 0) { > + if (errno == EINTR) { > + continue; > + } > + if (errno == EAGAIN) { > + usleep(10000); > + continue; > + } > + } > + break; > + } > + > + if (n == 0 && > + ((lock_type == BDRV_F_RDLCK) || (lock_type == BDRV_F_WRLCK))) { > + drv->num_locks = 1; > + } > + > + if (n) { > + return -errno; > + } > + > + return 0; > +} > + > static int raw_create(const char *filename, QEMUOptionParameter *options) > { > int fd; > @@ -901,6 +962,8 @@ static BlockDriver bdrv_file = { > .bdrv_get_allocated_file_size > = raw_get_allocated_file_size, > > + .bdrv_lock = raw_lock, > + > .create_options = raw_create_options, > }; > > Index: qemu-git/block_int.h > =================================================================== > --- qemu-git.orig/block_int.h > +++ qemu-git/block_int.h > @@ -146,6 +146,10 @@ struct BlockDriver { > */ > int (*bdrv_has_zero_init)(BlockDriverState *bs); > > + /* File locking */ > + int num_locks; > + int (*bdrv_lock)(BlockDriverState *bs, BDRVLockType lock_type); > + > QLIST_ENTRY(BlockDriver) list; > }; > > Index: qemu-git/block/raw-win32.c > =================================================================== > --- qemu-git.orig/block/raw-win32.c > +++ qemu-git/block/raw-win32.c > @@ -242,6 +242,57 @@ static int64_t raw_get_allocated_file_si > return st.st_size; > } > > +static int raw_lock(BlockDriverState *bs, int lock_type) > +{ > + BlockDriver *drv = bs->drv; > + BDRVRawState *s = bs->opaque; > + OVERLAPPED ov; > + BOOL res; > + DWORD num_bytes; > + > + switch (lock_type) { > + case BDRV_F_RDLCK: > + case BDRV_F_WRLCK: > + if (drv->num_locks) { > + drv->num_locks++; > + return 0; > + } > + > + memset(&ov, 0, sizeof(ov)); > + > + res = LockFileEx(s->hfile, LOCKFILE_EXCLUSIVE_LOCK, 0, ~0, ~0, &ov); > + > + if (res == FALSE) { > + res = GetOverlappedResult(s->hfile, &ov, &num_bytes, TRUE); > + } > + > + if (res == TRUE) { > + drv->num_locks = 1; > + } > + > + break; > + > + case BDRV_F_UNLCK: > + if (--drv->num_locks > 0) { > + return 0; > + } > + > + assert(drv->num_locks >= 0); > + > + res = UnlockFile(s->hfile, 0, 0, ~0, ~0); > + break; > + > + default: > + return -EINVAL; > + } > + > + if (res == FALSE) { > + return -EIO; > + } > + > + return 0; > +} > + > static int raw_create(const char *filename, QEMUOptionParameter *options) > { > int fd; > @@ -289,6 +340,7 @@ static BlockDriver bdrv_file = { > .bdrv_get_allocated_file_size > = raw_get_allocated_file_size, > > + .bdrv_lock = raw_lock, > .create_options = raw_create_options, > }; > >