On Thu, Aug 24, 2017 at 08:27:14PM +0100, Dr. David Alan Gilbert (git) wrote: > From: "Dr. David Alan Gilbert" <dgilb...@redhat.com> > > We need a better way, but at the moment we need the address of the > mappings sent back to qemu so it can interpret the messages on the > userfaultfd it reads. > > Note: We don't ask for the default 'ack' reply since we've got our own. > > Signed-off-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > --- > contrib/libvhost-user/libvhost-user.c | 15 ++++++++- > docs/interop/vhost-user.txt | 6 ++++ > hw/virtio/trace-events | 1 + > hw/virtio/vhost-user.c | 57 > ++++++++++++++++++++++++++++++++++- > 4 files changed, 77 insertions(+), 2 deletions(-) > > diff --git a/contrib/libvhost-user/libvhost-user.c > b/contrib/libvhost-user/libvhost-user.c > index e6ab059a03..5ec54f7d60 100644 > --- a/contrib/libvhost-user/libvhost-user.c > +++ b/contrib/libvhost-user/libvhost-user.c > @@ -477,13 +477,26 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) > DPRINT("%s: region %d: Registered userfault for %llx + %llx\n", > __func__, i, reg_struct.range.start, > reg_struct.range.len); > /* TODO: Stash 'zero' support flags somewhere */ > - /* TODO: Get address back to QEMU */ > > + /* TODO: We need to find a way for the qemu not to see the > virtual > + * addresses of the clients, so as to keep better separation. > + */ > + /* Return the address to QEMU so that it can translate the ufd > + * fault addresses back. > + */ > + msg_region->userspace_addr = (uintptr_t)(mmap_addr + > + > dev_region->mmap_offset); > } > > close(vmsg->fds[i]); > } > > + if (dev->postcopy_listening) { > + /* Need to return the addresses - send the updated message back */ > + vmsg->fd_num = 0; > + return true; > + } > + > return false; > } > > diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt > index 73c3dd74db..b2a548c94d 100644 > --- a/docs/interop/vhost-user.txt > +++ b/docs/interop/vhost-user.txt > @@ -413,12 +413,18 @@ Master message types > Id: 5 > Equivalent ioctl: VHOST_SET_MEM_TABLE > Master payload: memory regions description > + Slave payload: (postcopy only) memory regions description > > Sets the memory map regions on the slave so it can translate the vring > addresses. In the ancillary data there is an array of file descriptors > for each memory mapped region. The size and ordering of the fds matches > the number and ordering of memory regions. > > + When postcopy-listening has been received, SET_MEM_TABLE replies with > + the bases of the memory mapped regions to the master. It must have > mmap'd > + the regions and enabled userfaultfd on them. Note NEED_REPLY_MASK > + is not set in this case. > + > * VHOST_USER_SET_LOG_BASE > > Id: 6 > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events > index f736c7c84f..63fd4a79cf 100644 > --- a/hw/virtio/trace-events > +++ b/hw/virtio/trace-events > @@ -2,6 +2,7 @@ > > # hw/virtio/vhost-user.c > vhost_user_postcopy_listen(void) "" > +vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int > reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d > region %d" > > # hw/virtio/virtio.c > virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned > out_num) "elem %p size %zd in_num %u out_num %u" > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c > index 9178271ab2..2e4eb0864a 100644 > --- a/hw/virtio/vhost-user.c > +++ b/hw/virtio/vhost-user.c > @@ -19,6 +19,7 @@ > #include "qemu/sockets.h" > #include "migration/migration.h" > #include "migration/postcopy-ram.h" > +#include "trace.h" > > #include <sys/ioctl.h> > #include <sys/socket.h> > @@ -133,6 +134,7 @@ struct vhost_user { > int slave_fd; > NotifierWithReturn postcopy_notifier; > struct PostCopyFD postcopy_fd; > + uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; > }; > > static bool ioeventfd_enabled(void) > @@ -300,11 +302,13 @@ static int vhost_user_set_log_base(struct vhost_dev > *dev, uint64_t base, > static int vhost_user_set_mem_table(struct vhost_dev *dev, > struct vhost_memory *mem) > { > + struct vhost_user *u = dev->opaque; > int fds[VHOST_MEMORY_MAX_NREGIONS]; > int i, fd; > size_t fd_num = 0; > bool reply_supported = virtio_has_feature(dev->protocol_features, > - > VHOST_USER_PROTOCOL_F_REPLY_ACK); > + VHOST_USER_PROTOCOL_F_REPLY_ACK) && > + !u->postcopy_fd.handler;
(indent) > > VhostUserMsg msg = { > .request = VHOST_USER_SET_MEM_TABLE, > @@ -350,6 +354,57 @@ static int vhost_user_set_mem_table(struct vhost_dev > *dev, > return -1; > } > > + if (u->postcopy_fd.handler) { It seems that after this handler is set, we never clean it up. Do we need to unset it somewhere? (maybe vhost_user_postcopy_end?) > + VhostUserMsg msg_reply; > + int region_i, reply_i; > + if (vhost_user_read(dev, &msg_reply) < 0) { > + return -1; > + } > + > + if (msg_reply.request != VHOST_USER_SET_MEM_TABLE) { > + error_report("%s: Received unexpected msg type." > + "Expected %d received %d", __func__, > + VHOST_USER_SET_MEM_TABLE, msg_reply.request); > + return -1; > + } > + /* We're using the same structure, just reusing one of the > + * fields, so it should be the same size. > + */ > + if (msg_reply.size != msg.size) { > + error_report("%s: Unexpected size for postcopy reply " > + "%d vs %d", __func__, msg_reply.size, msg.size); > + return -1; > + } > + > + memset(u->postcopy_client_bases, 0, > + sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); > + > + /* They're in the same order as the regions that were sent > + * but some of the regions were skipped (above) if they > + * didn't have fd's > + */ > + for (reply_i = 0, region_i = 0; > + region_i < dev->mem->nregions; > + region_i++) { > + if (reply_i < fd_num && > + msg_reply.payload.memory.regions[region_i].guest_phys_addr == ^^^^^^^^ should this be reply_i? (And maybe we can use pointers for the regions for better readability?) > + dev->mem->regions[region_i].guest_phys_addr) { > + u->postcopy_client_bases[region_i] = > + msg_reply.payload.memory.regions[reply_i].userspace_addr; > + trace_vhost_user_set_mem_table_postcopy( > + msg_reply.payload.memory.regions[reply_i].userspace_addr, > + msg.payload.memory.regions[reply_i].userspace_addr, > + reply_i, region_i); > + reply_i++; > + } > + } > + if (reply_i != fd_num) { > + error_report("%s: postcopy reply not fully consumed " > + "%d vs %zd", > + __func__, reply_i, fd_num); > + return -1; > + } > + } > if (reply_supported) { > return process_message_reply(dev, &msg); > } > -- > 2.13.5 > -- Peter Xu