On 09/05/2012 11:30 AM, Kevin Wolf wrote: > Am 30.08.2012 20:47, schrieb Jeff Cody: >> This is derived from the Supriya Kannery's reopen patches. >> >> This contains the raw-posix driver changes for the bdrv_reopen_* >> functions. All changes are staged into a temporary scratch buffer >> during the prepare() stage, and copied over to the live structure >> during commit(). Upon abort(), all changes are abandoned, and the >> live structures are unmodified. >> >> The _prepare() will create an extra fd - either by means of a dup, >> if possible, or opening a new fd if not (for instance, access >> control changes). Upon _commit(), the original fd is closed and >> the new fd is used. Upon _abort(), the duplicate/new fd is closed. >> >> Signed-off-by: Jeff Cody <jc...@redhat.com> >> --- >> block/raw-posix.c | 153 >> +++++++++++++++++++++++++++++++++++++++++++++++++----- >> 1 file changed, 139 insertions(+), 14 deletions(-) >> >> diff --git a/block/raw-posix.c b/block/raw-posix.c >> index 6be20b1..48086d7 100644 >> --- a/block/raw-posix.c >> +++ b/block/raw-posix.c >> @@ -140,6 +140,15 @@ typedef struct BDRVRawState { >> #endif >> } BDRVRawState; >> >> +typedef struct BDRVRawReopenState { >> + BDRVReopenState reopen_state; >> + int fd; >> + int open_flags; >> + uint8_t *aligned_buf; >> + unsigned aligned_buf_size; >> + BDRVRawState *stash_s; >> +} BDRVRawReopenState; >> + >> static int fd_open(BlockDriverState *bs); >> static int64_t raw_getlength(BlockDriverState *bs); >> >> @@ -185,6 +194,28 @@ static int raw_normalize_devicepath(const char >> **filename) >> } >> #endif >> >> +static void raw_parse_flags(int bdrv_flags, int *open_flags) >> +{ >> + assert(open_flags != NULL); >> + >> + *open_flags |= O_BINARY; >> + *open_flags &= ~O_ACCMODE; >> + if (bdrv_flags & BDRV_O_RDWR) { >> + *open_flags |= O_RDWR; >> + } else { >> + *open_flags |= O_RDONLY; >> + } >> + >> + /* Use O_DSYNC for write-through caching, no flags for write-back >> caching, >> + * and O_DIRECT for no caching. */ >> + if ((bdrv_flags & BDRV_O_NOCACHE)) { >> + *open_flags |= O_DIRECT; >> + } >> + if (!(bdrv_flags & BDRV_O_CACHE_WB)) { >> + *open_flags |= O_DSYNC; >> + } >> +} > > The code motion would ideally be a separate patch. >
OK >> + >> static int raw_open_common(BlockDriverState *bs, const char *filename, >> int bdrv_flags, int open_flags) >> { >> @@ -196,20 +227,8 @@ static int raw_open_common(BlockDriverState *bs, const >> char *filename, >> return ret; >> } >> >> - s->open_flags = open_flags | O_BINARY; >> - s->open_flags &= ~O_ACCMODE; >> - if (bdrv_flags & BDRV_O_RDWR) { >> - s->open_flags |= O_RDWR; >> - } else { >> - s->open_flags |= O_RDONLY; >> - } >> - >> - /* Use O_DSYNC for write-through caching, no flags for write-back >> caching, >> - * and O_DIRECT for no caching. */ >> - if ((bdrv_flags & BDRV_O_NOCACHE)) >> - s->open_flags |= O_DIRECT; >> - if (!(bdrv_flags & BDRV_O_CACHE_WB)) >> - s->open_flags |= O_DSYNC; >> + s->open_flags = open_flags; >> + raw_parse_flags(bdrv_flags, &s->open_flags); >> >> s->fd = -1; >> fd = qemu_open(filename, s->open_flags, 0644); >> @@ -283,6 +302,109 @@ static int raw_open(BlockDriverState *bs, const char >> *filename, int flags) >> return raw_open_common(bs, filename, flags, 0); >> } >> >> +static int raw_reopen_prepare(BDRVReopenState *state, Error **errp) >> +{ >> + BDRVRawState *s; >> + BDRVRawReopenState *raw_s; >> + int ret = 0; >> + >> + assert(state != NULL); >> + assert(state->bs != NULL); >> + >> + s = state->bs->opaque; >> + >> + state->opaque = g_malloc0(sizeof(BDRVRawReopenState)); >> + raw_s = state->opaque; >> + >> + raw_parse_flags(state->flags, &raw_s->open_flags); >> + >> + /* >> + * If we didn't have BDRV_O_NOCACHE set before, we may not have >> allocated >> + * aligned_buf >> + */ >> + if ((state->flags & BDRV_O_NOCACHE)) { >> + /* >> + * Allocate a buffer for read/modify/write cycles. Choose the size >> + * pessimistically as we don't know the block size yet. >> + */ >> + raw_s->aligned_buf_size = 32 * MAX_BLOCKSIZE; >> + raw_s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, >> + raw_s->aligned_buf_size); >> + >> + if (raw_s->aligned_buf == NULL) { >> + ret = -1; >> + goto error; >> + } > > Even though it's pretty small, I think I would factor this out into a > small static helper to make sure it's kept in sync with raw_open_common(). > OK, and like your suggestion above, I'll put that in a separate code motion patch. >> + } >> + >> + int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK; >> +#ifdef O_NOATIME >> + fcntl_flags |= O_NOATIME; >> +#endif >> + if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & >> ~fcntl_flags)) { >> + /* dup the original fd */ >> + /* TODO: use qemu fcntl wrapper */ >> + raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0); >> + if (raw_s->fd == -1) { >> + ret = -1; >> + goto error; >> + } >> + ret = fcntl_setfl(raw_s->fd, raw_s->open_flags); >> + } else { >> + raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags, 0644); >> + if (raw_s->fd == -1) { >> + ret = -1; >> + } > > Ignoring this part for now, with qemu_dup_flags() it's going to look a > bit different. In particular, I'm hoping that we don't get a second > fcntl_flags enumeration here, but can just fall back to qemu_open() > whenever qemu_dup_flags() fails. That will require modification to qemu_dup_flags()... I believe qemu_dup_flags() silently filters out fcntl incompatible flags. Maybe it would be best to create a small helper function in osdep.c, that fetches the fcntl_flags. Then qemu_dup_flags() and this function would use the same helper to fetch fcntl_flags. The results of that would determine if we call qemu_dup_flags() or qemu_open(). Although, I do think it makes sense to always try qemu_open() if qemu_dup_flags() fails for some reason. > > If we do need to keep fcntl_flags here, we'll probably want to add > O_DIRECT to it. > >> + } >> +error: >> + return ret; >> +} >> + >> + >> +static void raw_reopen_commit(BDRVReopenState *state) >> +{ >> + BDRVRawReopenState *raw_s = state->opaque; >> + BDRVRawState *s = state->bs->opaque; >> + >> + if (raw_s->aligned_buf != NULL) { >> + if (s->aligned_buf) { >> + qemu_vfree(s->aligned_buf); >> + } >> + s->aligned_buf = raw_s->aligned_buf; >> + s->aligned_buf_size = raw_s->aligned_buf_size; >> + } >> + >> + s->open_flags = raw_s->open_flags; >> + >> + close(s->fd); >> + s->fd = raw_s->fd; >> + >> + g_free(state->opaque); >> + state->opaque = NULL; >> +} > > I think s->use_aio must be changed as well, it depends on > BDRV_O_NOCACHE. Maybe we even need to do it in prepare, laio_init() may > need to be called. Yes, good catch, thanks. > > Kevin >