On 06/01/2015 02:12 AM, Michael S. Tsirkin wrote: > On Tue, May 26, 2015 at 01:54:09AM -0400, Jason Wang wrote: >> We allocate one vhost log per vhost device. This is sub optimal when: >> >> - Guest has several device with vhost as backend >> - Guest has multiqueue devices >> >> Since each vhost device will allocate and use their private log, this >> could cause very huge unnecessary memory allocation. We can in fact >> avoid extra cost by sharing a single vhost log among all the vhost >> devices. This is feasible since: >> >> - All vhost devices for a vm should see a consistent memory layout >> (memory slots). >> - Vhost log were design to be shared, all access function were >> synchronized. >> >> So this patch tries to implement the sharing through >> >> - Introducing a new vhost_log structure and refcnt it. >> - Using a global pointer of vhost_log structure to keep track the >> current log used. >> - If there's no resize, next vhost device will just use this log and >> increase the refcnt. >> - If resize happens, a new vhost_log structure will be allocated and >> each vhost device will use the new log then drop the refcnt of old log. >> - The old log will be synced and freed when reference count drops to zero. >> >> Tested by doing scp during migration for a 2 queues virtio-net-pci. >> >> Cc: Michael S. Tsirkin <m...@redhat.com> >> Signed-off-by: Jason Wang <jasow...@redhat.com> > Almost there I think. > >> --- >> Changes from V3: >> - only sync the old log on put >> Changes from V2: >> - rebase to HEAD >> - drop unused node field from vhost_log structure >> Changes from V1: >> - Drop the list of vhost log, instead, using a global pointer instead >> --- >> hw/virtio/vhost.c | 78 >> ++++++++++++++++++++++++++++++++++++----------- >> include/hw/virtio/vhost.h | 8 ++++- >> 2 files changed, 68 insertions(+), 18 deletions(-) >> >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c >> index 54851b7..fef28d9 100644 >> --- a/hw/virtio/vhost.c >> +++ b/hw/virtio/vhost.c >> @@ -22,15 +22,19 @@ >> #include "hw/virtio/virtio-bus.h" >> #include "migration/migration.h" >> >> +static struct vhost_log *vhost_log; >> + >> static void vhost_dev_sync_region(struct vhost_dev *dev, >> MemoryRegionSection *section, >> uint64_t mfirst, uint64_t mlast, >> uint64_t rfirst, uint64_t rlast) >> { >> + vhost_log_chunk_t *log = dev->log->log; >> + >> uint64_t start = MAX(mfirst, rfirst); >> uint64_t end = MIN(mlast, rlast); >> - vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK; >> - vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1; >> + vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK; >> + vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1; >> uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK; >> >> if (end < start) { >> @@ -280,22 +284,57 @@ static uint64_t vhost_get_log_size(struct vhost_dev >> *dev) >> } >> return log_size; >> } >> +static struct vhost_log *vhost_log_alloc(uint64_t size) >> +{ >> + struct vhost_log *log = g_malloc0(sizeof *log + size * >> sizeof(*(log->log))); >> + >> + log->size = size; >> + log->refcnt = 1; >> + >> + return log; >> +} >> + >> +static struct vhost_log *vhost_log_get(uint64_t size) >> +{ >> + if (!vhost_log || vhost_log->size != size) { >> + vhost_log = vhost_log_alloc(size); >> + } else { >> + ++vhost_log->refcnt; >> + } >> + >> + return vhost_log; >> +} >> + >> +static void vhost_log_put(struct vhost_dev *dev, bool sync) >> +{ >> + struct vhost_log *log = dev->log; >> + >> + if (!log) { >> + return; >> + } >> + >> + --log->refcnt; >> + if (log->refcnt == 0) { >> + /* Sync only the range covered by the old log */ >> + if (dev->log_size && sync) { >> + vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - >> 1); >> + } >> + if (vhost_log == log) { >> + vhost_log = NULL; >> + } >> + g_free(log); >> + } >> +} >> >> static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t >> size) >> { >> - vhost_log_chunk_t *log; >> - uint64_t log_base; >> + struct vhost_log *log = vhost_log_get(size); >> + uint64_t log_base = (uintptr_t)log->log; >> int r; >> >> - log = g_malloc0(size * sizeof *log); >> - log_base = (uintptr_t)log; >> r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base); >> assert(r >= 0); >> - /* Sync only the range covered by the old log */ >> - if (dev->log_size) { >> - vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); >> - } >> - g_free(dev->log); >> + vhost_log_put(dev, true); >> dev->log = log; >> dev->log_size = size; >> } >> @@ -601,7 +640,7 @@ static int vhost_migration_log(MemoryListener *listener, >> int enable) >> if (r < 0) { >> return r; >> } >> - g_free(dev->log); >> + vhost_log_put(dev, false); >> dev->log = NULL; >> dev->log_size = 0; >> } else { >> @@ -1060,10 +1099,12 @@ int vhost_dev_start(struct vhost_dev *hdev, >> VirtIODevice *vdev) >> uint64_t log_base; >> >> hdev->log_size = vhost_get_log_size(hdev); >> - hdev->log = hdev->log_size ? >> - g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL; >> - log_base = (uintptr_t)hdev->log; >> - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, >> &log_base); >> + if (hdev->log_size) { >> + hdev->log = vhost_log_get(hdev->log_size); >> + } > Why is this conditional on hdev->log_size? > What will the value be for log_size == 0?
This is used to save an unnecessary allocation for a dummy vhost_log structure without any log. >> + log_base = (uintptr_t)hdev->log->log; > especially since we de-reference it here. Yes, this seems unsafe, will change this to log_base = hdev->log_size ? (uintptr_t) hdev->log->log : NULL >> + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, >> + hdev->log_size ? &log_base : NULL); >> if (r < 0) { >> r = -errno; >> goto fail_log; >> @@ -1072,6 +1113,9 @@ int vhost_dev_start(struct vhost_dev *hdev, >> VirtIODevice *vdev) >> >> return 0; >> fail_log: >> + if (hdev->log_size) { >> + vhost_log_put(hdev, false); >> + } >> fail_vq: >> while (--i >= 0) { >> vhost_virtqueue_stop(hdev, >> @@ -1101,7 +1145,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, >> VirtIODevice *vdev) >> vhost_log_sync_range(hdev, 0, ~0x0ull); >> >> hdev->started = false; >> - g_free(hdev->log); >> + vhost_log_put(hdev, false); >> hdev->log = NULL; >> hdev->log_size = 0; >> } > Why false? We better sync the dirty bitmap since log is getting > cleared. We did a vhost_log_sync_range(hdev, 0, ~0x0ull) before. And we only sync 0 to dev->log_size * VHOST_LOG_CHUNK - 1 in vhost_log_put(). But looks like there's no difference, will remove vhost_log_sync_range() and use true for vhost_log_put() here. > > And if it's always true, we can just drop this flag. There's still other usage, e.g when fail to setting log base in vhost_dev_start() or migration end. In those cases, no need to sync. > >> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h >> index 8f04888..816a2e8 100644 >> --- a/include/hw/virtio/vhost.h >> +++ b/include/hw/virtio/vhost.h >> @@ -28,6 +28,12 @@ typedef unsigned long vhost_log_chunk_t; >> #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) >> #define VHOST_INVALID_FEATURE_BIT (0xff) >> >> +struct vhost_log { >> + unsigned long long size; >> + int refcnt; >> + vhost_log_chunk_t log[0]; >> +}; >> + >> struct vhost_memory; >> struct vhost_dev { >> MemoryListener memory_listener; >> @@ -43,7 +49,6 @@ struct vhost_dev { >> unsigned long long backend_features; >> bool started; >> bool log_enabled; >> - vhost_log_chunk_t *log; >> unsigned long long log_size; >> Error *migration_blocker; >> bool force; >> @@ -52,6 +57,7 @@ struct vhost_dev { >> hwaddr mem_changed_end_addr; >> const VhostOps *vhost_ops; >> void *opaque; >> + struct vhost_log *log; >> }; >> >> int vhost_dev_init(struct vhost_dev *hdev, void *opaque, >> -- >> 1.8.3.1 >>