On Tue, May 26, 2015 at 01:54:09AM -0400, Jason Wang wrote: > We allocate one vhost log per vhost device. This is sub optimal when: > > - Guest has several device with vhost as backend > - Guest has multiqueue devices > > Since each vhost device will allocate and use their private log, this > could cause very huge unnecessary memory allocation. We can in fact > avoid extra cost by sharing a single vhost log among all the vhost > devices. This is feasible since: > > - All vhost devices for a vm should see a consistent memory layout > (memory slots). > - Vhost log were design to be shared, all access function were > synchronized. > > So this patch tries to implement the sharing through > > - Introducing a new vhost_log structure and refcnt it. > - Using a global pointer of vhost_log structure to keep track the > current log used. > - If there's no resize, next vhost device will just use this log and > increase the refcnt. > - If resize happens, a new vhost_log structure will be allocated and > each vhost device will use the new log then drop the refcnt of old log. > - The old log will be synced and freed when reference count drops to zero. > > Tested by doing scp during migration for a 2 queues virtio-net-pci. > > Cc: Michael S. Tsirkin <m...@redhat.com> > Signed-off-by: Jason Wang <jasow...@redhat.com>
Almost there I think. > --- > Changes from V3: > - only sync the old log on put > Changes from V2: > - rebase to HEAD > - drop unused node field from vhost_log structure > Changes from V1: > - Drop the list of vhost log, instead, using a global pointer instead > --- > hw/virtio/vhost.c | 78 > ++++++++++++++++++++++++++++++++++++----------- > include/hw/virtio/vhost.h | 8 ++++- > 2 files changed, 68 insertions(+), 18 deletions(-) > > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > index 54851b7..fef28d9 100644 > --- a/hw/virtio/vhost.c > +++ b/hw/virtio/vhost.c > @@ -22,15 +22,19 @@ > #include "hw/virtio/virtio-bus.h" > #include "migration/migration.h" > > +static struct vhost_log *vhost_log; > + > static void vhost_dev_sync_region(struct vhost_dev *dev, > MemoryRegionSection *section, > uint64_t mfirst, uint64_t mlast, > uint64_t rfirst, uint64_t rlast) > { > + vhost_log_chunk_t *log = dev->log->log; > + > uint64_t start = MAX(mfirst, rfirst); > uint64_t end = MIN(mlast, rlast); > - vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK; > - vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1; > + vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK; > + vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1; > uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK; > > if (end < start) { > @@ -280,22 +284,57 @@ static uint64_t vhost_get_log_size(struct vhost_dev > *dev) > } > return log_size; > } > +static struct vhost_log *vhost_log_alloc(uint64_t size) > +{ > + struct vhost_log *log = g_malloc0(sizeof *log + size * > sizeof(*(log->log))); > + > + log->size = size; > + log->refcnt = 1; > + > + return log; > +} > + > +static struct vhost_log *vhost_log_get(uint64_t size) > +{ > + if (!vhost_log || vhost_log->size != size) { > + vhost_log = vhost_log_alloc(size); > + } else { > + ++vhost_log->refcnt; > + } > + > + return vhost_log; > +} > + > +static void vhost_log_put(struct vhost_dev *dev, bool sync) > +{ > + struct vhost_log *log = dev->log; > + > + if (!log) { > + return; > + } > + > + --log->refcnt; > + if (log->refcnt == 0) { > + /* Sync only the range covered by the old log */ > + if (dev->log_size && sync) { > + vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - > 1); > + } > + if (vhost_log == log) { > + vhost_log = NULL; > + } > + g_free(log); > + } > +} > > static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size) > { > - vhost_log_chunk_t *log; > - uint64_t log_base; > + struct vhost_log *log = vhost_log_get(size); > + uint64_t log_base = (uintptr_t)log->log; > int r; > > - log = g_malloc0(size * sizeof *log); > - log_base = (uintptr_t)log; > r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base); > assert(r >= 0); > - /* Sync only the range covered by the old log */ > - if (dev->log_size) { > - vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); > - } > - g_free(dev->log); > + vhost_log_put(dev, true); > dev->log = log; > dev->log_size = size; > } > @@ -601,7 +640,7 @@ static int vhost_migration_log(MemoryListener *listener, > int enable) > if (r < 0) { > return r; > } > - g_free(dev->log); > + vhost_log_put(dev, false); > dev->log = NULL; > dev->log_size = 0; > } else { > @@ -1060,10 +1099,12 @@ int vhost_dev_start(struct vhost_dev *hdev, > VirtIODevice *vdev) > uint64_t log_base; > > hdev->log_size = vhost_get_log_size(hdev); > - hdev->log = hdev->log_size ? > - g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL; > - log_base = (uintptr_t)hdev->log; > - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, &log_base); > + if (hdev->log_size) { > + hdev->log = vhost_log_get(hdev->log_size); > + } Why is this conditional on hdev->log_size? What will the value be for log_size == 0? > + log_base = (uintptr_t)hdev->log->log; especially since we de-reference it here. > + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, > + hdev->log_size ? &log_base : NULL); > if (r < 0) { > r = -errno; > goto fail_log; > @@ -1072,6 +1113,9 @@ int vhost_dev_start(struct vhost_dev *hdev, > VirtIODevice *vdev) > > return 0; > fail_log: > + if (hdev->log_size) { > + vhost_log_put(hdev, false); > + } > fail_vq: > while (--i >= 0) { > vhost_virtqueue_stop(hdev, > @@ -1101,7 +1145,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, > VirtIODevice *vdev) > vhost_log_sync_range(hdev, 0, ~0x0ull); > > hdev->started = false; > - g_free(hdev->log); > + vhost_log_put(hdev, false); > hdev->log = NULL; > hdev->log_size = 0; > } Why false? We better sync the dirty bitmap since log is getting cleared. And if it's always true, we can just drop this flag. > diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h > index 8f04888..816a2e8 100644 > --- a/include/hw/virtio/vhost.h > +++ b/include/hw/virtio/vhost.h > @@ -28,6 +28,12 @@ typedef unsigned long vhost_log_chunk_t; > #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) > #define VHOST_INVALID_FEATURE_BIT (0xff) > > +struct vhost_log { > + unsigned long long size; > + int refcnt; > + vhost_log_chunk_t log[0]; > +}; > + > struct vhost_memory; > struct vhost_dev { > MemoryListener memory_listener; > @@ -43,7 +49,6 @@ struct vhost_dev { > unsigned long long backend_features; > bool started; > bool log_enabled; > - vhost_log_chunk_t *log; > unsigned long long log_size; > Error *migration_blocker; > bool force; > @@ -52,6 +57,7 @@ struct vhost_dev { > hwaddr mem_changed_end_addr; > const VhostOps *vhost_ops; > void *opaque; > + struct vhost_log *log; > }; > > int vhost_dev_init(struct vhost_dev *hdev, void *opaque, > -- > 1.8.3.1 >