On Wed, 2017-09-13 at 10:44 -0600, Adam Wolfe Gordon wrote: > Register a watcher with rbd so that we get notified when an image is > resized. Propagate resizes to parent block devices so that guest devices > get resized without user intervention. > > Signed-off-by: Adam Wolfe Gordon <a...@digitalocean.com> > --- > Hello! > > We are using this patch in production at DigitalOcean and have tested it > fairly > extensively. However, we use the same block device configuration everywhere, > so > I'd be interested to get an answer to the question I've left in the code: > > > /* NOTE: This assumes there's only one layer between us and the > > block-backend. Is this always true? */ > > If that isn't true, this patch will need a bit of work to traverse up the > block > device tree and find the block-backend. > > Of course, any other feedback would be welcome too - this is my first foray > into > the qemu code. > > Regards, > Adam > > block/rbd.c | 80 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 80 insertions(+) > > diff --git a/block/rbd.c b/block/rbd.c > index 144f350e1f..1c9fcbec1f 100644 > --- a/block/rbd.c > +++ b/block/rbd.c > @@ -96,6 +96,8 @@ typedef struct BDRVRBDState { > rbd_image_t image; > char *image_name; > char *snap; > + uint64_t watcher_handle; > + uint64_t size_bytes; > } BDRVRBDState; > > static char *qemu_rbd_next_tok(char *src, char delim, char **p) > @@ -540,6 +542,67 @@ out: > return rados_str; > } > > +/* BH for when rbd notifies us of an update. */ > +static void qemu_rbd_update_bh(void *arg) > +{ > + BlockDriverState *parent, *bs = arg; > + BDRVRBDState *s = bs->opaque; > + bool was_variable_length; > + uint64_t new_size_bytes; > + int64_t new_parent_len; > + BdrvChild *c; > + int r; > + > + r = rbd_get_size(s->image, &new_size_bytes); > + if (r < 0) { > + error_report("error reading size for %s: %s", s->name, strerror(-r)); > + return; > + } > + > + /* Avoid no-op resizes on non-resize notifications. */ > + if (new_size_bytes == s->size_bytes) { > + error_printf("skipping non-resize rbd cb\n");
Image update callbacks can be invoked for any number of reasons, not just resize events. Therefore, you probably don't want to have an error message printed out here. > + return; > + } > + > + /* NOTE: This assumes there's only one layer between us and the > + block-backend. Is this always true? */ I don't think that can be assumed to be true. > + parent = bs->inherits_from; > + if (parent == NULL) { > + error_report("bs %s does not have parent", > bdrv_get_device_or_node_name(bs)); > + return; > + } > + > + /* Force parents to re-read our size. */ Can you just invoke blk_truncate instead? > + was_variable_length = bs->drv->has_variable_length; > + bs->drv->has_variable_length = true; > + new_parent_len = bdrv_getlength(parent); > + if (new_parent_len < 0) { > + error_report("getlength failed on parent %s", > bdrv_get_device_or_node_name(parent)); > + bs->drv->has_variable_length = was_variable_length; > + return; > + } > + bs->drv->has_variable_length = was_variable_length; > + > + /* Notify block backends that that we have resized. > + Copied from bdrv_parent_cb_resize. */ > + QLIST_FOREACH(c, &parent->parents, next_parent) { > + if (c->role->resize) { > + c->role->resize(c); > + } > + } > + > + s->size_bytes = new_size_bytes; > +} > + > +/* Called from non-qemu thread - careful! */ > +static void qemu_rbd_update_cb(void *arg) > +{ > + BlockDriverState *bs = arg; > + > + aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), qemu_rbd_update_bh, > bs); > +} > + > static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, > Error **errp) > { > @@ -672,9 +735,25 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict > *options, int flags, > } > } > > + r = rbd_update_watch(s->image, &s->watcher_handle, qemu_rbd_update_cb, > bs); This API method was only added in the Ceph Jewel release just over a year ago. This should probably gracefully degrade if compiled against an older version of the librbd API. > + if (r < 0) { > + error_setg_errno(errp, -r, "error registering watcher on %s", > s->name); > + goto failed_watch; > + } > + > + r = rbd_get_size(s->image, &s->size_bytes); > + if (r < 0) { > + error_setg_errno(errp, -r, "error reading size for %s", s->name); > + goto failed_sz; > + } > + > qemu_opts_del(opts); > return 0; > > +failed_sz: > + rbd_update_unwatch(s->image, s->watcher_handle); > +failed_watch: > + rbd_close(s->image); > failed_open: > rados_ioctx_destroy(s->io_ctx); > failed_shutdown: > @@ -712,6 +791,7 @@ static void qemu_rbd_close(BlockDriverState *bs) > { > BDRVRBDState *s = bs->opaque; > > + rbd_update_unwatch(s->image, s->watcher_handle); > rbd_close(s->image); > rados_ioctx_destroy(s->io_ctx); > g_free(s->snap);