On Thu, Jan 29, 2015 at 11:58:08AM +0800, Linhaifeng wrote: > Hi,Michael S.Tsirkin > > The vhost-user device will not work if there are two numa nodes in VM. > > Should we fix this bug or ignore it ?
I suggest we fix this bug. I saw that you responded to self so I assume you will send v2 with the modification you listed. Did I get it right? Also, pls Cc qemu-stable on bugfixes. Thanks! > On 2014/12/18 13:06, Linhaifeng wrote: > > > > > > On 2014/12/17 14:02, haifeng....@huawei.com wrote: > >> From: linhaifeng <haifeng....@huawei.com> > >> > >> If we create VM with two or more numa nodes qemu will create two > >> or more hugepage files but qemu only send one hugepage file fd > >> to vhost-user when VM's memory size is 2G and with two numa nodes. > >> > >> Signed-off-by: linhaifeng <haifeng....@huawei.com> > >> --- > >> hw/virtio/vhost-user.c | 78 > >> ++++++++++++++++++++++++++++++--------------- > >> hw/virtio/vhost.c | 13 ++++++++ > >> linux-headers/linux/vhost.h | 7 ++++ > >> 3 files changed, 73 insertions(+), 25 deletions(-) > >> > >> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c > >> index aefe0bb..439cbba 100644 > >> --- a/hw/virtio/vhost-user.c > >> +++ b/hw/virtio/vhost-user.c > >> @@ -24,6 +24,10 @@ > >> #include <linux/vhost.h> > >> > >> #define VHOST_MEMORY_MAX_NREGIONS 8 > >> +/* FIXME: same as the max number of numa node?*/ > >> +#define HUGEPAGE_MAX_FILES 8 > >> + > >> +#define RAM_SHARED (1 << 1) > >> > >> typedef enum VhostUserRequest { > >> VHOST_USER_NONE = 0, > >> @@ -41,14 +45,15 @@ typedef enum VhostUserRequest { > >> VHOST_USER_SET_VRING_KICK = 12, > >> VHOST_USER_SET_VRING_CALL = 13, > >> VHOST_USER_SET_VRING_ERR = 14, > >> - VHOST_USER_MAX > >> + VHOST_USER_MMAP_HUGEPAGE_FILE = 15, > >> + VHOST_USER_UNMAP_HUGEPAGE_FILE = 16, > >> + VHOST_USER_MAX, > >> } VhostUserRequest; > >> > >> typedef struct VhostUserMemoryRegion { > >> uint64_t guest_phys_addr; > >> uint64_t memory_size; > >> uint64_t userspace_addr; > >> - uint64_t mmap_offset; > >> } VhostUserMemoryRegion; > >> > >> typedef struct VhostUserMemory { > >> @@ -57,6 +62,16 @@ typedef struct VhostUserMemory { > >> VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; > >> } VhostUserMemory; > >> > >> +typedef struct HugepageMemoryInfo { > >> + uint64_t base_addr; > >> + uint64_t size; > >> +}HugeMemInfo; > >> + > >> +typedef struct HugepageInfo { > >> + uint32_t num; > >> + HugeMemInfo files[HUGEPAGE_MAX_FILES]; > >> +}HugepageInfo; > >> + > >> typedef struct VhostUserMsg { > >> VhostUserRequest request; > >> > >> @@ -71,6 +86,7 @@ typedef struct VhostUserMsg { > >> struct vhost_vring_state state; > >> struct vhost_vring_addr addr; > >> VhostUserMemory memory; > >> + HugepageInfo huge_info; > >> }; > >> } QEMU_PACKED VhostUserMsg; > >> > >> @@ -104,7 +120,9 @@ static unsigned long int > >> ioctl_to_vhost_user_request[VHOST_USER_MAX] = { > >> VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ > >> VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ > >> VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ > >> - VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ > >> + VHOST_SET_VRING_ERR, /* VHOST_USER_SET_VRING_ERR */ > >> + VHOST_MMAP_HUGEPAGE_FILE, /* VHOST_USER_MMAP_HUGEPAGE_FILE */ > >> + VHOST_UNMAP_HUGEPAGE_FILE, /* VHOST_USER_UNMAP_HUGEPAGE_FILE */ > >> }; > >> > >> static VhostUserRequest vhost_user_request_translate(unsigned long int > >> request) > >> @@ -190,6 +208,7 @@ static int vhost_user_call(struct vhost_dev *dev, > >> unsigned long int request, > >> int fds[VHOST_MEMORY_MAX_NREGIONS]; > >> int i, fd; > >> size_t fd_num = 0; > >> + RAMBlock *block; > >> > >> assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); > >> > >> @@ -213,37 +232,46 @@ static int vhost_user_call(struct vhost_dev *dev, > >> unsigned long int request, > >> case VHOST_RESET_OWNER: > >> break; > >> > >> - case VHOST_SET_MEM_TABLE: > >> - for (i = 0; i < dev->mem->nregions; ++i) { > >> - struct vhost_memory_region *reg = dev->mem->regions + i; > >> - ram_addr_t ram_addr; > >> + case VHOST_MMAP_HUGEPAGE_FILE: > >> + qemu_mutex_lock_ramlist(); > >> > >> - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); > >> - qemu_ram_addr_from_host((void > >> *)(uintptr_t)reg->userspace_addr, &ram_addr); > >> - fd = qemu_get_ram_fd(ram_addr); > >> - if (fd > 0) { > >> - msg.memory.regions[fd_num].userspace_addr = > >> reg->userspace_addr; > >> - msg.memory.regions[fd_num].memory_size = > >> reg->memory_size; > >> - msg.memory.regions[fd_num].guest_phys_addr = > >> reg->guest_phys_addr; > >> - msg.memory.regions[fd_num].mmap_offset = > >> reg->userspace_addr - > >> - (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); > >> - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); > >> - fds[fd_num++] = fd; > >> + /* Get hugepage file informations */ > >> + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > >> + if (block->flags & RAM_SHARED && block->fd > 0) { > >> + msg.huge_info.files[fd_num].size = block->length; > >> + msg.huge_info.files[fd_num].base_addr = block->host; > >> + fds[fd_num++] = block->fd; > >> } > >> } > >> + msg.huge_info.num = fd_num; > >> > >> - msg.memory.nregions = fd_num; > >> + /* Calculate msg size */ > >> + msg.size = sizeof(m.huge_info.num); > >> + msg.size += fd_num * sizeof(HugeMemInfo); > >> + > >> + qemu_mutex_unlock_ramlist(); > >> + break; > >> > >> - if (!fd_num) { > >> - error_report("Failed initializing vhost-user memory map\n" > >> - "consider using -object memory-backend-file > >> share=on\n"); > >> - return -1; > >> + case VHOST_UNMAP_HUGEPAGE_FILE: > >> + /* Tell vhost-user to unmap all hugepage files. */ > >> + break; > >> + > >> + case VHOST_SET_MEM_TABLE: > >> + for (i = 0; i < dev->mem->nregions; i++) { > >> + struct vhost_memory_region *reg = dev->mem->regions + i; > >> + > >> + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); > >> + > >> + msg.memory.regions[i].userspace_addr = reg->userspace_addr; > >> + msg.memory.regions[i].memory_size = reg->memory_size; > >> + msg.memory.regions[i].guest_phys_addr = reg->guest_phys_addr; > >> + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); > >> } > >> > >> + msg.memory.nregions = i; > >> msg.size = sizeof(m.memory.nregions); > >> msg.size += sizeof(m.memory.padding); > >> - msg.size += fd_num * sizeof(VhostUserMemoryRegion); > >> - > >> + msg.size += i * sizeof(VhostUserMemoryRegion); > >> break; > >> > >> case VHOST_SET_LOG_FD: > >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > >> index 5a12861..b8eb341 100644 > >> --- a/hw/virtio/vhost.c > >> +++ b/hw/virtio/vhost.c > >> @@ -1041,6 +1041,14 @@ int vhost_dev_start(struct vhost_dev *hdev, > >> VirtIODevice *vdev) > >> if (r < 0) { > >> goto fail_features; > >> } > >> + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { > >> + r = hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, > >> + NULL); > >> + if (r < 0) { > >> + r = -errno; > >> + goto fail_mem; > >> + } > >> + } > >> r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); > >> if (r < 0) { > >> r = -errno; > >> @@ -1101,5 +1109,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, > >> VirtIODevice *vdev) > >> g_free(hdev->log); > >> hdev->log = NULL; > >> hdev->log_size = 0; > >> + > >> + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { > >> + (void)hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, > > > > VHOST_MMAP_HUGEPAGE_FILE -> VHOST_UNMAP_HUGEPAGE_FILE > > > >> + NULL); > >> + } > >> } > >> > >> diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h > >> index c656f61..bb72811 100644 > >> --- a/linux-headers/linux/vhost.h > >> +++ b/linux-headers/linux/vhost.h > >> @@ -113,6 +113,13 @@ struct vhost_memory { > >> /* Set eventfd to signal an error */ > >> #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct > >> vhost_vring_file) > >> > >> +/* Tell vhost-user to mmap hugepage file */ > >> +#define VHOST_MMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x23, int) > >> +/* Tell vhost-user to unmap hugepage file */ > >> +#define VHOST_UNMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x24, int) > >> + > >> +#define VHOST_THREAD_ID _IOR(VHOST_VIRTIO, 0x25, struct > >> vhost_vring_thread) > >> + > >> /* VHOST_NET specific defines */ > >> > >> /* Attach virtio net ring to a raw socket, or tap device. > >> > > > > -- > Regards, > Haifeng