On Mon, 2013-12-02 at 15:23 +0100, Paolo Bonzini wrote: > This avoids useless masking and shifting when a single call to the > MemoryRegion ops will do. It cuts 30 cycles off the common case > of memory dispatch (out of ~150). > > Paolo Bonzini (4): > memory: cache min/max_access_size > memory: streamline common case for memory dispatch > memory: hoist coalesced MMIO flush > memory: small tweaks > > include/exec/memory.h | 2 + > memory.c | 114 > +++++++++++++++++++++++++++++++++++--------------- > 2 files changed, 82 insertions(+), 34 deletions(-) > > -- > 1.8.4.2 I saw a 20% performance boost on tcg (VMEXIT test)! It looks OK to me, Reviewed-by: Marcel Apfelbaum <marce...@redhat.com>
> > From 8324dd46284285cec1de394ce2fc3fb449e776d6 Mon Sep 17 00:00:00 2001 > From: Paolo Bonzini <pbonz...@redhat.com> > Date: Fri, 8 Nov 2013 11:29:41 +0100 > Subject: [PATCH 1/4] memory: cache min/max_access_size > > This will simplify the code in the next patch. > > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> > --- > include/exec/memory.h | 2 ++ > memory.c | 27 +++++++++++---------------- > 2 files changed, 13 insertions(+), 16 deletions(-) > > diff --git a/include/exec/memory.h b/include/exec/memory.h > index 480dfbf..cf63adb 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -163,6 +163,8 @@ struct MemoryRegion { > unsigned ioeventfd_nb; > MemoryRegionIoeventfd *ioeventfds; > NotifierList iommu_notify; > + > + uint8_t min_access_size, max_access_size; > }; > > typedef struct MemoryListener MemoryListener; > diff --git a/memory.c b/memory.c > index 28f6449..56e54aa 100644 > --- a/memory.c > +++ b/memory.c > @@ -443,8 +443,6 @@ static void memory_region_write_accessor(MemoryRegion *mr, > static void access_with_adjusted_size(hwaddr addr, > uint64_t *value, > unsigned size, > - unsigned access_size_min, > - unsigned access_size_max, > void (*access)(MemoryRegion *mr, > hwaddr addr, > uint64_t *value, > @@ -457,15 +455,8 @@ static void access_with_adjusted_size(hwaddr addr, > unsigned access_size; > unsigned i; > > - if (!access_size_min) { > - access_size_min = 1; > - } > - if (!access_size_max) { > - access_size_max = 4; > - } > - > /* FIXME: support unaligned access? */ > - access_size = MAX(MIN(size, access_size_max), access_size_min); > + access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size); > access_mask = -1ULL >> (64 - access_size * 8); > if (memory_region_big_endian(mr)) { > for (i = 0; i < size; i += access_size) { > @@ -942,11 +933,9 @@ static uint64_t > memory_region_dispatch_read1(MemoryRegion *mr, > > if (mr->ops->read) { > access_with_adjusted_size(addr, &data, size, > - mr->ops->impl.min_access_size, > - mr->ops->impl.max_access_size, > memory_region_read_accessor, mr); > } else { > - access_with_adjusted_size(addr, &data, size, 1, 4, > + access_with_adjusted_size(addr, &data, size, > memory_region_oldmmio_read_accessor, mr); > } > > @@ -982,11 +971,9 @@ static bool memory_region_dispatch_write(MemoryRegion > *mr, > > if (mr->ops->write) { > access_with_adjusted_size(addr, &data, size, > - mr->ops->impl.min_access_size, > - mr->ops->impl.max_access_size, > memory_region_write_accessor, mr); > } else { > - access_with_adjusted_size(addr, &data, size, 1, 4, > + access_with_adjusted_size(addr, &data, size, > memory_region_oldmmio_write_accessor, mr); > } > return false; > @@ -1004,6 +991,14 @@ void memory_region_init_io(MemoryRegion *mr, > mr->opaque = opaque; > mr->terminates = true; > mr->ram_addr = ~(ram_addr_t)0; > + > + if (mr->ops->read) { > + mr->min_access_size = mr->ops->impl.min_access_size ? : 1; > + mr->max_access_size = mr->ops->impl.max_access_size ? : 4; > + } else { > + mr->min_access_size = 1; > + mr->max_access_size = 4; > + } > } > > void memory_region_init_ram(MemoryRegion *mr, > -- > 1.8.4.2 > > > From 2afa3d3451dbfc76b3c6a28af1e7cdd3ab50c7ec Mon Sep 17 00:00:00 2001 > From: Paolo Bonzini <pbonz...@redhat.com> > Date: Fri, 8 Nov 2013 11:39:18 +0100 > Subject: [PATCH 2/4] memory: streamline common case for memory dispatch > > In the common case where there is no combining or splitting, > access_with_adjusted_size is adding a lot of overhead. Call > the MMIO ops directly in that case. > > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> > --- > memory.c | 68 > ++++++++++++++++++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 56 insertions(+), 12 deletions(-) > > diff --git a/memory.c b/memory.c > index 56e54aa..1ade19c 100644 > --- a/memory.c > +++ b/memory.c > @@ -443,6 +443,7 @@ static void memory_region_write_accessor(MemoryRegion *mr, > static void access_with_adjusted_size(hwaddr addr, > uint64_t *value, > unsigned size, > + unsigned access_size, > void (*access)(MemoryRegion *mr, > hwaddr addr, > uint64_t *value, > @@ -452,11 +453,9 @@ static void access_with_adjusted_size(hwaddr addr, > MemoryRegion *mr) > { > uint64_t access_mask; > - unsigned access_size; > unsigned i; > > /* FIXME: support unaligned access? */ > - access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size); > access_mask = -1ULL >> (64 - access_size * 8); > if (memory_region_big_endian(mr)) { > for (i = 0; i < size; i += access_size) { > @@ -929,13 +928,32 @@ static uint64_t > memory_region_dispatch_read1(MemoryRegion *mr, > hwaddr addr, > unsigned size) > { > + unsigned access_size; > uint64_t data = 0; > > + if (size < mr->min_access_size) { > + access_size = mr->min_access_size; > + goto adjusted; > + } > + if (size > mr->max_access_size) { > + access_size = mr->max_access_size; > + goto adjusted; > + } > + > + if (mr->ops->read) { > + data = mr->ops->read(mr->opaque, addr, size); > + } else { > + data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr); > + } > + trace_memory_region_ops_read(mr, addr, data, size); > + return data; > + > +adjusted: > if (mr->ops->read) { > - access_with_adjusted_size(addr, &data, size, > + access_with_adjusted_size(addr, &data, size, access_size, > memory_region_read_accessor, mr); > } else { > - access_with_adjusted_size(addr, &data, size, > + access_with_adjusted_size(addr, &data, size, access_size, > memory_region_oldmmio_read_accessor, mr); > } > > @@ -957,6 +975,39 @@ static bool memory_region_dispatch_read(MemoryRegion *mr, > return false; > } > > +static void memory_region_dispatch_write1(MemoryRegion *mr, > + hwaddr addr, > + uint64_t data, > + unsigned size) > +{ > + unsigned access_size; > + if (size < mr->min_access_size) { > + access_size = mr->min_access_size; > + goto adjusted; > + } > + if (size > mr->max_access_size) { > + access_size = mr->max_access_size; > + goto adjusted; > + } > + > + trace_memory_region_ops_write(mr, addr, data, size); > + if (mr->ops->write) { > + mr->ops->write(mr->opaque, addr, data, size); > + } else { > + mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data); > + } > + return; > + > +adjusted: > + if (mr->ops->write) { > + access_with_adjusted_size(addr, &data, size, access_size, > + memory_region_write_accessor, mr); > + } else { > + access_with_adjusted_size(addr, &data, size, access_size, > + memory_region_oldmmio_write_accessor, mr); > + } > +} > + > static bool memory_region_dispatch_write(MemoryRegion *mr, > hwaddr addr, > uint64_t data, > @@ -968,14 +1019,7 @@ static bool memory_region_dispatch_write(MemoryRegion > *mr, > } > > adjust_endianness(mr, &data, size); > - > - if (mr->ops->write) { > - access_with_adjusted_size(addr, &data, size, > - memory_region_write_accessor, mr); > - } else { > - access_with_adjusted_size(addr, &data, size, > - memory_region_oldmmio_write_accessor, mr); > - } > + memory_region_dispatch_write1(mr, addr, data, size); > return false; > } > > -- > 1.8.4.2 > > > From c65bf3cd93264eac93bbfe3a5974b8d15a1599b0 Mon Sep 17 00:00:00 2001 > From: Paolo Bonzini <pbonz...@redhat.com> > Date: Fri, 8 Nov 2013 11:49:52 +0100 > Subject: [PATCH 3/4] memory: hoist coalesced MMIO flush > > No need to flush the coalesced MMIO buffer multiple times when combining > multiple accesses into one. > > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> > --- > memory.c | 14 ++++++++------ > 1 file changed, 8 insertions(+), 6 deletions(-) > > diff --git a/memory.c b/memory.c > index 1ade19c..495e693 100644 > --- a/memory.c > +++ b/memory.c > @@ -401,9 +401,6 @@ static void memory_region_read_accessor(MemoryRegion *mr, > { > uint64_t tmp; > > - if (mr->flush_coalesced_mmio) { > - qemu_flush_coalesced_mmio_buffer(); > - } > tmp = mr->ops->read(mr->opaque, addr, size); > trace_memory_region_ops_read(mr, addr, tmp, size); > *value |= (tmp & mask) << shift; > @@ -432,9 +429,6 @@ static void memory_region_write_accessor(MemoryRegion *mr, > { > uint64_t tmp; > > - if (mr->flush_coalesced_mmio) { > - qemu_flush_coalesced_mmio_buffer(); > - } > tmp = (*value >> shift) & mask; > trace_memory_region_ops_write(mr, addr, tmp, size); > mr->ops->write(mr->opaque, addr, tmp, size); > @@ -965,6 +959,10 @@ static bool memory_region_dispatch_read(MemoryRegion *mr, > uint64_t *pval, > unsigned size) > { > + if (mr->flush_coalesced_mmio) { > + qemu_flush_coalesced_mmio_buffer(); > + } > + > if (!memory_region_access_valid(mr, addr, size, false)) { > *pval = unassigned_mem_read(mr, addr, size); > return true; > @@ -1013,6 +1011,10 @@ static bool memory_region_dispatch_write(MemoryRegion > *mr, > uint64_t data, > unsigned size) > { > + if (mr->flush_coalesced_mmio) { > + qemu_flush_coalesced_mmio_buffer(); > + } > + > if (!memory_region_access_valid(mr, addr, size, true)) { > unassigned_mem_write(mr, addr, data, size); > return true; > -- > 1.8.4.2 > > > From d13d7f894cb4670568a0bd45d592a9321d9d5dab Mon Sep 17 00:00:00 2001 > From: Paolo Bonzini <pbonz...@redhat.com> > Date: Fri, 8 Nov 2013 11:48:11 +0100 > Subject: [PATCH 4/4] memory: small tweaks > > Make adjust_endianness inline, and do not use a ctz instruction > when a shift will do. > > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> > --- > memory.c | 15 ++++++++++----- > 1 file changed, 10 insertions(+), 5 deletions(-) > > diff --git a/memory.c b/memory.c > index 495e693..d3b0dce 100644 > --- a/memory.c > +++ b/memory.c > @@ -357,7 +357,7 @@ static bool memory_region_wrong_endianness(MemoryRegion > *mr) > #endif > } > > -static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned > size) > +static inline void adjust_endianness(MemoryRegion *mr, uint64_t *data, > unsigned size) > { > if (memory_region_wrong_endianness(mr)) { > switch (size) { > @@ -378,6 +378,11 @@ static void adjust_endianness(MemoryRegion *mr, uint64_t > *data, unsigned size) > } > } > > +static inline int ctz3(unsigned size) > +{ > + return size >> 1; > +} > + > static void memory_region_oldmmio_read_accessor(MemoryRegion *mr, > hwaddr addr, > uint64_t *value, > @@ -387,7 +392,7 @@ static void > memory_region_oldmmio_read_accessor(MemoryRegion *mr, > { > uint64_t tmp; > > - tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr); > + tmp = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr); > trace_memory_region_ops_read(mr, addr, tmp, size); > *value |= (tmp & mask) << shift; > } > @@ -417,7 +422,7 @@ static void > memory_region_oldmmio_write_accessor(MemoryRegion *mr, > > tmp = (*value >> shift) & mask; > trace_memory_region_ops_write(mr, addr, tmp, size); > - mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp); > + mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, tmp); > } > > static void memory_region_write_accessor(MemoryRegion *mr, > @@ -937,7 +942,7 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion > *mr, > if (mr->ops->read) { > data = mr->ops->read(mr->opaque, addr, size); > } else { > - data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr); > + data = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr); > } > trace_memory_region_ops_read(mr, addr, data, size); > return data; > @@ -992,7 +997,7 @@ static void memory_region_dispatch_write1(MemoryRegion > *mr, > if (mr->ops->write) { > mr->ops->write(mr->opaque, addr, data, size); > } else { > - mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data); > + mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, data); > } > return; >