On 06/01/2015 03:26 PM, Michael S. Tsirkin wrote: > On Mon, Jun 01, 2015 at 01:53:35PM +0800, Jason Wang wrote: >> > >> > >> > On 06/01/2015 02:12 AM, Michael S. Tsirkin wrote: >>> > > On Tue, May 26, 2015 at 01:54:09AM -0400, Jason Wang wrote: >>>> > >> We allocate one vhost log per vhost device. This is sub optimal when: >>>> > >> >>>> > >> - Guest has several device with vhost as backend >>>> > >> - Guest has multiqueue devices >>>> > >> >>>> > >> Since each vhost device will allocate and use their private log, this >>>> > >> could cause very huge unnecessary memory allocation. We can in fact >>>> > >> avoid extra cost by sharing a single vhost log among all the vhost >>>> > >> devices. This is feasible since: >>>> > >> >>>> > >> - All vhost devices for a vm should see a consistent memory layout >>>> > >> (memory slots). >>>> > >> - Vhost log were design to be shared, all access function were >>>> > >> synchronized. >>>> > >> >>>> > >> So this patch tries to implement the sharing through >>>> > >> >>>> > >> - Introducing a new vhost_log structure and refcnt it. >>>> > >> - Using a global pointer of vhost_log structure to keep track the >>>> > >> current log used. >>>> > >> - If there's no resize, next vhost device will just use this log and >>>> > >> increase the refcnt. >>>> > >> - If resize happens, a new vhost_log structure will be allocated and >>>> > >> each vhost device will use the new log then drop the refcnt of old >>>> > >> log. >>>> > >> - The old log will be synced and freed when reference count drops to >>>> > >> zero. >>>> > >> >>>> > >> Tested by doing scp during migration for a 2 queues virtio-net-pci. >>>> > >> >>>> > >> Cc: Michael S. Tsirkin <m...@redhat.com> >>>> > >> Signed-off-by: Jason Wang <jasow...@redhat.com> >>> > > Almost there I think. >>> > > >>>> > >> --- >>>> > >> Changes from V3: >>>> > >> - only sync the old log on put >>>> > >> Changes from V2: >>>> > >> - rebase to HEAD >>>> > >> - drop unused node field from vhost_log structure >>>> > >> Changes from V1: >>>> > >> - Drop the list of vhost log, instead, using a global pointer instead >>>> > >> --- >>>> > >> hw/virtio/vhost.c | 78 >>>> > >> ++++++++++++++++++++++++++++++++++++----------- >>>> > >> include/hw/virtio/vhost.h | 8 ++++- >>>> > >> 2 files changed, 68 insertions(+), 18 deletions(-) >>>> > >> >>>> > >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c >>>> > >> index 54851b7..fef28d9 100644 >>>> > >> --- a/hw/virtio/vhost.c >>>> > >> +++ b/hw/virtio/vhost.c >>>> > >> @@ -22,15 +22,19 @@ >>>> > >> #include "hw/virtio/virtio-bus.h" >>>> > >> #include "migration/migration.h" >>>> > >> >>>> > >> +static struct vhost_log *vhost_log; >>>> > >> + >>>> > >> static void vhost_dev_sync_region(struct vhost_dev *dev, >>>> > >> MemoryRegionSection *section, >>>> > >> uint64_t mfirst, uint64_t mlast, >>>> > >> uint64_t rfirst, uint64_t rlast) >>>> > >> { >>>> > >> + vhost_log_chunk_t *log = dev->log->log; >>>> > >> + >>>> > >> uint64_t start = MAX(mfirst, rfirst); >>>> > >> uint64_t end = MIN(mlast, rlast); >>>> > >> - vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK; >>>> > >> - vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1; >>>> > >> + vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK; >>>> > >> + vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1; >>>> > >> uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK; >>>> > >> >>>> > >> if (end < start) { >>>> > >> @@ -280,22 +284,57 @@ static uint64_t vhost_get_log_size(struct >>>> > >> vhost_dev *dev) >>>> > >> } >>>> > >> return log_size; >>>> > >> } >>>> > >> +static struct vhost_log *vhost_log_alloc(uint64_t size) >>>> > >> +{ >>>> > >> + struct vhost_log *log = g_malloc0(sizeof *log + size * >>>> > >> sizeof(*(log->log))); >>>> > >> + >>>> > >> + log->size = size; >>>> > >> + log->refcnt = 1; >>>> > >> + >>>> > >> + return log; >>>> > >> +} >>>> > >> + >>>> > >> +static struct vhost_log *vhost_log_get(uint64_t size) >>>> > >> +{ >>>> > >> + if (!vhost_log || vhost_log->size != size) { >>>> > >> + vhost_log = vhost_log_alloc(size); >>>> > >> + } else { >>>> > >> + ++vhost_log->refcnt; >>>> > >> + } >>>> > >> + >>>> > >> + return vhost_log; >>>> > >> +} >>>> > >> + >>>> > >> +static void vhost_log_put(struct vhost_dev *dev, bool sync) >>>> > >> +{ >>>> > >> + struct vhost_log *log = dev->log; >>>> > >> + >>>> > >> + if (!log) { >>>> > >> + return; >>>> > >> + } >>>> > >> + >>>> > >> + --log->refcnt; >>>> > >> + if (log->refcnt == 0) { >>>> > >> + /* Sync only the range covered by the old log */ >>>> > >> + if (dev->log_size && sync) { >>>> > >> + vhost_log_sync_range(dev, 0, dev->log_size * >>>> > >> VHOST_LOG_CHUNK - 1); >>>> > >> + } >>>> > >> + if (vhost_log == log) { >>>> > >> + vhost_log = NULL; >>>> > >> + } >>>> > >> + g_free(log); >>>> > >> + } >>>> > >> +} >>>> > >> >>>> > >> static inline void vhost_dev_log_resize(struct vhost_dev* dev, >>>> > >> uint64_t size) >>>> > >> { >>>> > >> - vhost_log_chunk_t *log; >>>> > >> - uint64_t log_base; >>>> > >> + struct vhost_log *log = vhost_log_get(size); >>>> > >> + uint64_t log_base = (uintptr_t)log->log; >>>> > >> int r; >>>> > >> >>>> > >> - log = g_malloc0(size * sizeof *log); >>>> > >> - log_base = (uintptr_t)log; >>>> > >> r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, >>>> > >> &log_base); >>>> > >> assert(r >= 0); >>>> > >> - /* Sync only the range covered by the old log */ >>>> > >> - if (dev->log_size) { >>>> > >> - vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK >>>> > >> - 1); >>>> > >> - } >>>> > >> - g_free(dev->log); >>>> > >> + vhost_log_put(dev, true); >>>> > >> dev->log = log; >>>> > >> dev->log_size = size; >>>> > >> } >>>> > >> @@ -601,7 +640,7 @@ static int vhost_migration_log(MemoryListener >>>> > >> *listener, int enable) >>>> > >> if (r < 0) { >>>> > >> return r; >>>> > >> } >>>> > >> - g_free(dev->log); >>>> > >> + vhost_log_put(dev, false); >>>> > >> dev->log = NULL; >>>> > >> dev->log_size = 0; >>>> > >> } else { >>>> > >> @@ -1060,10 +1099,12 @@ int vhost_dev_start(struct vhost_dev *hdev, >>>> > >> VirtIODevice *vdev) >>>> > >> uint64_t log_base; >>>> > >> >>>> > >> hdev->log_size = vhost_get_log_size(hdev); >>>> > >> - hdev->log = hdev->log_size ? >>>> > >> - g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL; >>>> > >> - log_base = (uintptr_t)hdev->log; >>>> > >> - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, >>>> > >> &log_base); >>>> > >> + if (hdev->log_size) { >>>> > >> + hdev->log = vhost_log_get(hdev->log_size); >>>> > >> + } >>> > > Why is this conditional on hdev->log_size? >>> > > What will the value be for log_size == 0? >> > >> > This is used to save an unnecessary allocation for a dummy vhost_log >> > structure without any log. > Then you need to go over all uses and make sure they > are safe. A dummy vhost_log structure might be easier. > >>>> > >> + log_base = (uintptr_t)hdev->log->log; >>> > > especially since we de-reference it here. >> > >> > Yes, this seems unsafe, will change this to >> > >> > log_base = hdev->log_size ? (uintptr_t) hdev->log->log : NULL >>>> > >> + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, >>>> > >> + hdev->log_size ? &log_base : >>>> > >> NULL); >>>> > >> if (r < 0) { >>>> > >> r = -errno; >>>> > >> goto fail_log; >>>> > >> @@ -1072,6 +1113,9 @@ int vhost_dev_start(struct vhost_dev *hdev, >>>> > >> VirtIODevice *vdev) >>>> > >> >>>> > >> return 0; >>>> > >> fail_log: >>>> > >> + if (hdev->log_size) { >>>> > >> + vhost_log_put(hdev, false); >>>> > >> + } >>>> > >> fail_vq: >>>> > >> while (--i >= 0) { >>>> > >> vhost_virtqueue_stop(hdev, >>>> > >> @@ -1101,7 +1145,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, >>>> > >> VirtIODevice *vdev) >>>> > >> vhost_log_sync_range(hdev, 0, ~0x0ull); >>>> > >> >>>> > >> hdev->started = false; >>>> > >> - g_free(hdev->log); >>>> > >> + vhost_log_put(hdev, false); >>>> > >> hdev->log = NULL; >>>> > >> hdev->log_size = 0; >>>> > >> } >>> > > Why false? We better sync the dirty bitmap since log is getting >>> > > cleared. >> > >> > We did a vhost_log_sync_range(hdev, 0, ~0x0ull) before. And we only sync >> > 0 to dev->log_size * VHOST_LOG_CHUNK - 1 in vhost_log_put(). But looks >> > like there's no difference, will remove vhost_log_sync_range() and use >> > true for vhost_log_put() here. >>> > > >>> > > And if it's always true, we can just drop this flag. >> > >> > There's still other usage, e.g when fail to setting log base in >> > vhost_dev_start() or migration end. In those cases, no need to sync. > We definitely must sync on migration end. >
Sorry for not being clear. I mean in vhost_log_global_stop which is called by migration_end(). We don't sync there in the past since ram_save_complete() will first synces dirty map and then calls migration_end().