Under heavy workloads the lookup will likely end up with the same MemoryRegionSection from last time. Using a pointer to cache the result, like ram_list.mru_block, significantly reduce computation cost of address_space_translate.
During address space topology update, as->dispatch will be reallocated so the pointer is invalidated automatically. Perf reports a visible drop on the cpu usage. Before: + 2.06% phys_page_find + 0.95% address_space_translate_internal + 0.80% address_space_translate After: + 0.78% address_space_translate + 0.77% address_space_translate_internal + 0.69% address_space_lookup_region Signed-off-by: Fam Zheng <f...@redhat.com> --- exec.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/exec.c b/exec.c index ad8b826..3232861 100644 --- a/exec.c +++ b/exec.c @@ -135,6 +135,7 @@ typedef struct PhysPageMap { struct AddressSpaceDispatch { struct rcu_head rcu; + MemoryRegionSection *mru_section; /* This is a multi-level map on the physical address space. * The bottom level has pointers to MemoryRegionSections. */ @@ -342,14 +343,26 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, hwaddr addr, bool resolve_subpage) { - MemoryRegionSection *section; + MemoryRegionSection *section = atomic_read(&d->mru_section); subpage_t *subpage; + bool update; - section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections); + if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] && + range_covers_byte(section->offset_within_address_space, + section->size.lo, addr)) { + update = false; + } else { + section = phys_page_find(d->phys_map, addr, d->map.nodes, + d->map.sections); + update = true; + } if (resolve_subpage && section->mr->subpage) { subpage = container_of(section->mr, subpage_t, iomem); section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; } + if (update) { + atomic_set(&d->mru_section, section); + } return section; } -- 2.4.3