On Mon, Sep 16, 2024 at 7:21 PM Stefan Hajnoczi <stefa...@redhat.com> wrote: > > On Thu, Sep 12, 2024 at 04:53:31PM +0200, Albert Esteve wrote: > > Add SHMEM_MAP/UNMAP requests to vhost-user to > > handle VIRTIO Shared Memory mappings. > > > > This request allows backends to dynamically map > > fds into a VIRTIO Shared Memory Region indentified > > by its `shmid`. Then, the fd memory is advertised > > to the driver as a base addres + offset, so it > > can be read/written (depending on the mmap flags > > requested) while its valid. > > > > The backend can munmap the memory range > > in a given VIRTIO Shared Memory Region (again, > > identified by its `shmid`), to free it. Upon > > receiving this message, the front-end must > > mmap the regions with PROT_NONE to reserve > > the virtual memory space. > > > > The device model needs to create MemoryRegion > > instances for the VIRTIO Shared Memory Regions > > and add them to the `VirtIODevice` instance. > > > > Signed-off-by: Albert Esteve <aest...@redhat.com> > > --- > > hw/virtio/vhost-user.c | 122 ++++++++++++++++++++++ > > hw/virtio/virtio.c | 13 +++ > > include/hw/virtio/virtio.h | 5 + > > subprojects/libvhost-user/libvhost-user.c | 60 +++++++++++ > > subprojects/libvhost-user/libvhost-user.h | 52 +++++++++ > > 5 files changed, 252 insertions(+) > > > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c > > index 00561daa06..338cc942ec 100644 > > --- a/hw/virtio/vhost-user.c > > +++ b/hw/virtio/vhost-user.c > > @@ -115,6 +115,8 @@ typedef enum VhostUserBackendRequest { > > VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, > > VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, > > VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, > > + VHOST_USER_BACKEND_SHMEM_MAP = 9, > > + VHOST_USER_BACKEND_SHMEM_UNMAP = 10, > > VHOST_USER_BACKEND_MAX > > } VhostUserBackendRequest; > > > > @@ -192,6 +194,24 @@ typedef struct VhostUserShared { > > unsigned char uuid[16]; > > } VhostUserShared; > > > > +/* For the flags field of VhostUserMMap */ > > +#define VHOST_USER_FLAG_MAP_R (1u << 0) > > +#define VHOST_USER_FLAG_MAP_W (1u << 1) > > + > > +typedef struct { > > + /* VIRTIO Shared Memory Region ID */ > > + uint8_t shmid; > > + uint8_t padding[7]; > > + /* File offset */ > > + uint64_t fd_offset; > > + /* Offset within the VIRTIO Shared Memory Region */ > > + uint64_t shm_offset; > > + /* Size of the mapping */ > > + uint64_t len; > > + /* Flags for the mmap operation, from VHOST_USER_FLAG_* */ > > + uint64_t flags; > > +} VhostUserMMap; > > + > > typedef struct { > > VhostUserRequest request; > > > > @@ -224,6 +244,7 @@ typedef union { > > VhostUserInflight inflight; > > VhostUserShared object; > > VhostUserTransferDeviceState transfer_state; > > + VhostUserMMap mmap; > > } VhostUserPayload; > > > > typedef struct VhostUserMsg { > > @@ -1749,6 +1770,100 @@ > > vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, > > return 0; > > } > > > > +static int > > +vhost_user_backend_handle_shmem_map(struct vhost_dev *dev, > > + VhostUserMMap *vu_mmap, > > + int fd) > > +{ > > + void *addr = 0; > > + MemoryRegion *mr = NULL; > > + > > + if (fd < 0) { > > + error_report("Bad fd for map"); > > + return -EBADF; > > + } > > + > > + if (!dev->vdev->shmem_list || > > + dev->vdev->n_shmem_regions <= vu_mmap->shmid) { > > + error_report("Device only has %d VIRTIO Shared Memory Regions. " > > + "Requested ID: %d", > > + dev->vdev->n_shmem_regions, vu_mmap->shmid); > > + return -EFAULT; > > + } > > + > > + mr = &dev->vdev->shmem_list[vu_mmap->shmid]; > > + > > + if (!mr) { > > + error_report("VIRTIO Shared Memory Region at " > > + "ID %d unitialized", vu_mmap->shmid); > > + return -EFAULT; > > + } > > + > > + if ((vu_mmap->shm_offset + vu_mmap->len) < vu_mmap->len || > > + (vu_mmap->shm_offset + vu_mmap->len) > mr->size) { > > + error_report("Bad offset/len for mmap %" PRIx64 "+%" PRIx64, > > + vu_mmap->shm_offset, vu_mmap->len); > > + return -EFAULT; > > + } > > + > > + void *shmem_ptr = memory_region_get_ram_ptr(mr); > > + > > + addr = mmap(shmem_ptr + vu_mmap->shm_offset, vu_mmap->len, > > + ((vu_mmap->flags & VHOST_USER_FLAG_MAP_R) ? PROT_READ : 0) | > > + ((vu_mmap->flags & VHOST_USER_FLAG_MAP_W) ? PROT_WRITE : 0), > > + MAP_SHARED | MAP_FIXED, fd, vu_mmap->fd_offset); > > + > > + if (addr == MAP_FAILED) { > > + error_report("Failed to mmap mem fd"); > > + return -EFAULT; > > + } > > + > > + return 0; > > +} > > + > > +static int > > +vhost_user_backend_handle_shmem_unmap(struct vhost_dev *dev, > > + VhostUserMMap *vu_mmap) > > +{ > > + void *addr = 0; > > + MemoryRegion *mr = NULL; > > + > > + if (!dev->vdev->shmem_list || > > + dev->vdev->n_shmem_regions <= vu_mmap->shmid) { > > + error_report("Device only has %d VIRTIO Shared Memory Regions. " > > + "Requested ID: %d", > > + dev->vdev->n_shmem_regions, vu_mmap->shmid); > > + return -EFAULT; > > + } > > + > > + mr = &dev->vdev->shmem_list[vu_mmap->shmid]; > > + > > + if (!mr) { > > + error_report("VIRTIO Shared Memory Region at " > > + "ID %d unitialized", vu_mmap->shmid); > > + return -EFAULT; > > + } > > + > > + if ((vu_mmap->shm_offset + vu_mmap->len) < vu_mmap->len || > > + (vu_mmap->shm_offset + vu_mmap->len) > mr->size) { > > + error_report("Bad offset/len for mmap %" PRIx64 "+%" PRIx64, > > + vu_mmap->shm_offset, vu_mmap->len); > > + return -EFAULT; > > + } > > + > > + void *shmem_ptr = memory_region_get_ram_ptr(mr); > > + > > + addr = mmap(shmem_ptr + vu_mmap->shm_offset, vu_mmap->len, > > + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); > > + > > + if (addr == MAP_FAILED) { > > + error_report("Failed to unmap memory"); > > + return -EFAULT; > > + } > > + > > + return 0; > > +} > > + > > static void close_backend_channel(struct vhost_user *u) > > { > > g_source_destroy(u->backend_src); > > @@ -1817,6 +1932,13 @@ static gboolean backend_read(QIOChannel *ioc, > > GIOCondition condition, > > ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, > > ioc, > > &hdr, > > &payload); > > break; > > + case VHOST_USER_BACKEND_SHMEM_MAP: > > + ret = vhost_user_backend_handle_shmem_map(dev, &payload.mmap, > > + fd ? fd[0] : -1); > > + break; > > + case VHOST_USER_BACKEND_SHMEM_UNMAP: > > + ret = vhost_user_backend_handle_shmem_unmap(dev, &payload.mmap); > > + break; > > default: > > error_report("Received unexpected msg type: %d.", hdr.request); > > ret = -EINVAL; > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > index 9e10cbc058..ccc4f2cd75 100644 > > --- a/hw/virtio/virtio.c > > +++ b/hw/virtio/virtio.c > > @@ -3059,6 +3059,17 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f) > > return vmstate_save_state(f, &vmstate_virtio, vdev, NULL); > > } > > > > +MemoryRegion *virtio_new_shmem_region(VirtIODevice *vdev) > > +{ > > + MemoryRegion *mr; > > + ++vdev->n_shmem_regions; > > + vdev->shmem_list = g_renew(MemoryRegion, vdev->shmem_list, > > + vdev->n_shmem_regions); > > + mr = &vdev->shmem_list[vdev->n_shmem_regions - 1]; > > + mr = g_new0(MemoryRegion, 1); > > + return mr; > > +} > > This function looks broken. shmem_list[] is reallocated so old > MemoryRegion pointers will be dangling pointers. And then the > MemoryRegion is allocated again using g_new0() but there is no way to > retrieve that address again via shmem_list[]. > > I expected something like this: > > MemoryRegion *virtio_new_shmem_region(VirtIODevice *vdev) > { > MemoryRegion *mr; > > assert(vdev->n_shmem_regions < INT_MAX); > ++vdev->n_shmem_regions; > vdev->shmem_list = g_renew(MemoryRegion *, vdev->shmem_list, > vdev->n_shmem_regions); > mr = g_new0(MemoryRegion, 1); > vdev->shmem_list[vdev->n_shmem_regions - 1] = mr; > return mr; > } > > > + > > /* A wrapper for use as a VMState .put function */ > > static int virtio_device_put(QEMUFile *f, void *opaque, size_t size, > > const VMStateField *field, JSONWriter > > *vmdesc) > > @@ -3481,6 +3492,8 @@ void virtio_init(VirtIODevice *vdev, uint16_t > > device_id, size_t config_size) > > virtio_vmstate_change, vdev); > > vdev->device_endian = virtio_default_endian(); > > vdev->use_guest_notifier_mask = true; > > + vdev->shmem_list = NULL; > > shmem_list[] and each MemoryRegion needs to be free somewhere. > virtio_device_instance_finalize()? > > > + vdev->n_shmem_regions = 0; > > } > > > > /* > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > index 0fcbc5c0c6..d4a2f664d9 100644 > > --- a/include/hw/virtio/virtio.h > > +++ b/include/hw/virtio/virtio.h > > @@ -167,6 +167,9 @@ struct VirtIODevice > > */ > > EventNotifier config_notifier; > > bool device_iotlb_enabled; > > + /* Shared memory region for vhost-user mappings. */ > > + MemoryRegion *shmem_list; > > + int n_shmem_regions; > > }; > > > > struct VirtioDeviceClass { > > @@ -286,6 +289,8 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); > > > > int virtio_save(VirtIODevice *vdev, QEMUFile *f); > > > > +MemoryRegion *virtio_new_shmem_region(VirtIODevice *vdev); > > + > > extern const VMStateInfo virtio_vmstate_info; > > > > #define VMSTATE_VIRTIO_DEVICE \ > > diff --git a/subprojects/libvhost-user/libvhost-user.c > > b/subprojects/libvhost-user/libvhost-user.c > > index 9c630c2170..496268e12b 100644 > > --- a/subprojects/libvhost-user/libvhost-user.c > > +++ b/subprojects/libvhost-user/libvhost-user.c > > @@ -1592,6 +1592,66 @@ vu_rm_shared_object(VuDev *dev, unsigned char > > uuid[UUID_LEN]) > > return vu_send_message(dev, &msg); > > } > > > > +bool > > +vu_shmem_map(VuDev *dev, uint8_t shmid, uint64_t fd_offset, > > + uint64_t shm_offset, uint64_t len, uint64_t flags) > > +{ > > + VhostUserMsg vmsg = { > > + .request = VHOST_USER_BACKEND_SHMEM_MAP, > > + .size = sizeof(vmsg.payload.mmap), > > + .flags = VHOST_USER_VERSION, > > + .payload.mmap = { > > + .shmid = shmid, > > + .fd_offset = fd_offset, > > + .shm_offset = shm_offset, > > + .len = len, > > + .flags = flags, > > + }, > > + }; > > + > > + if (vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK)) { > > + vmsg.flags |= VHOST_USER_NEED_REPLY_MASK; > > + } > > + > > + pthread_mutex_lock(&dev->backend_mutex); > > + if (!vu_message_write(dev, dev->backend_fd, &vmsg)) { > > + pthread_mutex_unlock(&dev->backend_mutex); > > + return false; > > + } > > + > > + /* Also unlocks the backend_mutex */ > > + return vu_process_message_reply(dev, &vmsg); > > +} > > + > > +bool > > +vu_shmem_unmap(VuDev *dev, uint8_t shmid, uint64_t shm_offset, uint64_t > > len) > > +{ > > + VhostUserMsg vmsg = { > > + .request = VHOST_USER_BACKEND_SHMEM_UNMAP, > > + .size = sizeof(vmsg.payload.mmap), > > + .flags = VHOST_USER_VERSION, > > + .payload.mmap = { > > + .shmid = shmid, > > + .fd_offset = 0, > > + .shm_offset = shm_offset, > > + .len = len, > > + }, > > + }; > > + > > + if (vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK)) { > > + vmsg.flags |= VHOST_USER_NEED_REPLY_MASK; > > + } > > + > > + pthread_mutex_lock(&dev->backend_mutex); > > + if (!vu_message_write(dev, dev->backend_fd, &vmsg)) { > > + pthread_mutex_unlock(&dev->backend_mutex); > > + return false; > > + } > > + > > + /* Also unlocks the backend_mutex */ > > + return vu_process_message_reply(dev, &vmsg); > > +} > > + > > static bool > > vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg) > > { > > diff --git a/subprojects/libvhost-user/libvhost-user.h > > b/subprojects/libvhost-user/libvhost-user.h > > index deb40e77b3..ea4902e876 100644 > > --- a/subprojects/libvhost-user/libvhost-user.h > > +++ b/subprojects/libvhost-user/libvhost-user.h > > @@ -127,6 +127,8 @@ typedef enum VhostUserBackendRequest { > > VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, > > VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, > > VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, > > + VHOST_USER_BACKEND_SHMEM_MAP = 9, > > + VHOST_USER_BACKEND_SHMEM_UNMAP = 10, > > VHOST_USER_BACKEND_MAX > > } VhostUserBackendRequest; > > > > @@ -186,6 +188,24 @@ typedef struct VhostUserShared { > > unsigned char uuid[UUID_LEN]; > > } VhostUserShared; > > > > +/* For the flags field of VhostUserMMap */ > > +#define VHOST_USER_FLAG_MAP_R (1u << 0) > > +#define VHOST_USER_FLAG_MAP_W (1u << 1) > > + > > +typedef struct { > > + /* VIRTIO Shared Memory Region ID */ > > + uint8_t shmid; > > + uint8_t padding[7]; > > + /* File offset */ > > + uint64_t fd_offset; > > + /* Offset within the VIRTIO Shared Memory Region */ > > + uint64_t shm_offset; > > + /* Size of the mapping */ > > + uint64_t len; > > + /* Flags for the mmap operation, from VHOST_USER_FLAG_* */ > > + uint64_t flags; > > +} VhostUserMMap; > > + > > #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) > > # define VU_PACKED __attribute__((gcc_struct, packed)) > > #else > > @@ -214,6 +234,7 @@ typedef struct VhostUserMsg { > > VhostUserVringArea area; > > VhostUserInflight inflight; > > VhostUserShared object; > > + VhostUserMMap mmap; > > } payload; > > > > int fds[VHOST_MEMORY_BASELINE_NREGIONS]; > > @@ -597,6 +618,37 @@ bool vu_add_shared_object(VuDev *dev, unsigned char > > uuid[UUID_LEN]); > > */ > > bool vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]); > > > > +/** > > + * vu_shmem_map: > > + * @dev: a VuDev context > > + * @shmid: VIRTIO Shared Memory Region ID > > + * @fd_offset: File offset > > + * @shm_offset: Offset within the VIRTIO Shared Memory Region > > + * @len: Size of the mapping > > + * @flags: Flags for the mmap operation > > + * > > + * Advertises a new mapping to be made in a given VIRTIO Shared Memory > > Region. > > + * > > + * Returns: TRUE on success, FALSE on failure. > > + */ > > +bool vu_shmem_map(VuDev *dev, uint8_t shmid, uint64_t fd_offset, > > + uint64_t shm_offset, uint64_t len, uint64_t flags); > > How is the fd passed to the front-end?
True, I have tested the backend part directly with rust-vmm (https://github.com/rust-vmm/vhost/pull/251), so I have not run this code myself. I'll add an fd argument and try to cover it with a test at least, as we discussed in other threads. > > > + > > +/** > > + * vu_shmem_map: > > "vu_shmem_unmap:" > > > + * @dev: a VuDev context > > + * @shmid: VIRTIO Shared Memory Region ID > > + * @fd_offset: File offset > > + * @len: Size of the mapping > > + * > > + * The front-end un-mmaps a given range in the VIRTIO Shared Memory Region > > + * with the requested `shmid`. > > + * > > + * Returns: TRUE on success, FALSE on failure. > > + */ > > +bool vu_shmem_unmap(VuDev *dev, uint8_t shmid, uint64_t shm_offset, > > + uint64_t len); > > + > > /** > > * vu_queue_set_notification: > > * @dev: a VuDev context > > -- > > 2.45.2 > >