On Mon, Aug 16, 2021 at 09:53:27AM +0300, Denis Plotnikov wrote: > > On 12.08.2021 11:04, Denis Plotnikov wrote: > > > > On 09.08.2021 13:48, Denis Plotnikov wrote: > > > On vhost-user-blk migration, qemu normally sends a number of commands > > > to enable logging if VHOST_USER_PROTOCOL_F_LOG_SHMFD is negotiated. > > > Qemu sends VHOST_USER_SET_FEATURES to enable buffers logging and > > > VHOST_USER_SET_VRING_ADDR per each started ring to enable "used ring" > > > data logging. > > > The issue is that qemu doesn't wait for reply from the vhost daemon > > > for these commands which may result in races between qemu expectation > > > of logging starting and actual login starting in vhost daemon. > > > > > > The race can appear as follows: on migration setup, qemu enables > > > dirty page > > > logging by sending VHOST_USER_SET_FEATURES. The command doesn't > > > arrive to a > > > vhost-user-blk daemon immediately and the daemon needs some time to > > > turn the > > > logging on internally. If qemu doesn't wait for reply, after sending the > > > command, qemu may start migrateing memory pages to a destination. At > > > this time, > > > the logging may not be actually turned on in the daemon but some > > > guest pages, > > > which the daemon is about to write to, may have already been transferred > > > without logging to the destination. Since the logging wasn't turned on, > > > those pages won't be transferred again as dirty. So we may end up with > > > corrupted data on the destination. > > > The same scenario is applicable for "used ring" data logging, which is > > > turned on with VHOST_USER_SET_VRING_ADDR command. > > > > > > To resolve this issue, this patch makes qemu wait for the command result > > > explicitly if VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated and > > > logging enabled. > > > > > > Signed-off-by: Denis Plotnikov <den-plotni...@yandex-team.ru>
This looks reasonable. This change is too scary for 6.1 so I think it should wait for 6.2. Thanks! > > > --- > > > v3 -> v4: > > > * join acked reply and get_features in enforce_reply [mst] > > > * typos, rewording, cosmetic changes [mst] > > > > > > v2 -> v3: > > > * send VHOST_USER_GET_FEATURES to flush out outstanding messages > > > [mst] > > > > > > v1 -> v2: > > > * send reply only when logging is enabled [mst] > > > > > > v0 -> v1: > > > * send reply for SET_VRING_ADDR, SET_FEATURES only [mst] > > > --- > > > hw/virtio/vhost-user.c | 139 +++++++++++++++++++++++++++++------------ > > > 1 file changed, 98 insertions(+), 41 deletions(-) > > > > > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c > > > index ee57abe04526..5bb9254acd21 100644 > > > --- a/hw/virtio/vhost-user.c > > > +++ b/hw/virtio/vhost-user.c > > > @@ -1095,23 +1095,6 @@ static int vhost_user_set_mem_table(struct > > > vhost_dev *dev, > > > return 0; > > > } > > > -static int vhost_user_set_vring_addr(struct vhost_dev *dev, > > > - struct vhost_vring_addr *addr) > > > -{ > > > - VhostUserMsg msg = { > > > - .hdr.request = VHOST_USER_SET_VRING_ADDR, > > > - .hdr.flags = VHOST_USER_VERSION, > > > - .payload.addr = *addr, > > > - .hdr.size = sizeof(msg.payload.addr), > > > - }; > > > - > > > - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { > > > - return -1; > > > - } > > > - > > > - return 0; > > > -} > > > - > > > static int vhost_user_set_vring_endian(struct vhost_dev *dev, > > > struct vhost_vring_state *ring) > > > { > > > @@ -1288,72 +1271,146 @@ static int vhost_user_set_vring_call(struct > > > vhost_dev *dev, > > > return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); > > > } > > > -static int vhost_user_set_u64(struct vhost_dev *dev, int request, > > > uint64_t u64) > > > + > > > +static int vhost_user_get_u64(struct vhost_dev *dev, int request, > > > uint64_t *u64) > > > { > > > VhostUserMsg msg = { > > > .hdr.request = request, > > > .hdr.flags = VHOST_USER_VERSION, > > > - .payload.u64 = u64, > > > - .hdr.size = sizeof(msg.payload.u64), > > > }; > > > + if (vhost_user_one_time_request(request) && dev->vq_index != 0) { > > > + return 0; > > > + } > > > + > > > if (vhost_user_write(dev, &msg, NULL, 0) < 0) { > > > return -1; > > > } > > > + if (vhost_user_read(dev, &msg) < 0) { > > > + return -1; > > > + } > > > + > > > + if (msg.hdr.request != request) { > > > + error_report("Received unexpected msg type. Expected %d > > > received %d", > > > + request, msg.hdr.request); > > > + return -1; > > > + } > > > + > > > + if (msg.hdr.size != sizeof(msg.payload.u64)) { > > > + error_report("Received bad msg size."); > > > + return -1; > > > + } > > > + > > > + *u64 = msg.payload.u64; > > > + > > > return 0; > > > } > > > -static int vhost_user_set_features(struct vhost_dev *dev, > > > - uint64_t features) > > > +static int vhost_user_get_features(struct vhost_dev *dev, uint64_t > > > *features) > > > { > > > - return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); > > > + return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); > > > } > > > -static int vhost_user_set_protocol_features(struct vhost_dev *dev, > > > - uint64_t features) > > > +static int enforce_reply(struct vhost_dev *dev, > > > + const VhostUserMsg *msg) > > > { > > > - return vhost_user_set_u64(dev, > > > VHOST_USER_SET_PROTOCOL_FEATURES, features); > > > + uint64_t dummy; > > > + > > > + if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { > > > + return process_message_reply(dev, msg); > > > + } > > > + > > > + /* > > > + * We need to wait for a reply but the backend does not > > > + * support replies for the command we just sent. > > > + * Send VHOST_USER_GET_FEATURES which makes all backends > > > + * send a reply. > > > + */ > > > + return vhost_user_get_features(dev, &dummy); > > > } > > > -static int vhost_user_get_u64(struct vhost_dev *dev, int request, > > > uint64_t *u64) > > > +static int vhost_user_set_vring_addr(struct vhost_dev *dev, > > > + struct vhost_vring_addr *addr) > > > { > > > VhostUserMsg msg = { > > > - .hdr.request = request, > > > + .hdr.request = VHOST_USER_SET_VRING_ADDR, > > > .hdr.flags = VHOST_USER_VERSION, > > > + .payload.addr = *addr, > > > + .hdr.size = sizeof(msg.payload.addr), > > > }; > > > - if (vhost_user_one_time_request(request) && dev->vq_index != 0) { > > > - return 0; > > > + bool reply_supported = virtio_has_feature(dev->protocol_features, > > > + VHOST_USER_PROTOCOL_F_REPLY_ACK); > > > + > > > + /* > > > + * wait for a reply if logging is enabled to make sure > > > + * backend is actually logging changes > > > + */ > > > + bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); > > > + > > > + if (reply_supported && wait_for_reply) { > > > + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; > > > } > > > if (vhost_user_write(dev, &msg, NULL, 0) < 0) { > > > return -1; > > > } > > > - if (vhost_user_read(dev, &msg) < 0) { > > > - return -1; > > > + if (wait_for_reply) { > > > + return enforce_reply(dev, &msg); > > > } > > > - if (msg.hdr.request != request) { > > > - error_report("Received unexpected msg type. Expected %d > > > received %d", > > > - request, msg.hdr.request); > > > - return -1; > > > + return 0; > > > +} > > > + > > > +static int vhost_user_set_u64(struct vhost_dev *dev, int request, > > > uint64_t u64, > > > + bool wait_for_reply) > > > +{ > > > + VhostUserMsg msg = { > > > + .hdr.request = request, > > > + .hdr.flags = VHOST_USER_VERSION, > > > + .payload.u64 = u64, > > > + .hdr.size = sizeof(msg.payload.u64), > > > + }; > > > + > > > + if (wait_for_reply) { > > > + bool reply_supported = > > > virtio_has_feature(dev->protocol_features, > > > + VHOST_USER_PROTOCOL_F_REPLY_ACK); > > > + if (reply_supported) { > > > + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; > > > + } > > > } > > > - if (msg.hdr.size != sizeof(msg.payload.u64)) { > > > - error_report("Received bad msg size."); > > > + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { > > > return -1; > > > } > > > - *u64 = msg.payload.u64; > > > + if (wait_for_reply) { > > > + return enforce_reply(dev, &msg); > > > + } > > > return 0; > > > } > > > -static int vhost_user_get_features(struct vhost_dev *dev, > > > uint64_t *features) > > > +static int vhost_user_set_features(struct vhost_dev *dev, > > > + uint64_t features) > > > { > > > - return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); > > > + /* > > > + * wait for a reply if logging is enabled to make sure > > > + * backend is actually logging changes > > > + */ > > > + bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); > > > + > > > + return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features, > > > + log_enabled); > > > +} > > > + > > > +static int vhost_user_set_protocol_features(struct vhost_dev *dev, > > > + uint64_t features) > > > +{ > > > + return vhost_user_set_u64(dev, > > > VHOST_USER_SET_PROTOCOL_FEATURES, features, > > > + false); > > > } > > > static int vhost_user_set_owner(struct vhost_dev *dev)