Il 01/12/2013 13:02, Marcel Apfelbaum ha scritto: > Every address space has its own nodes and sections, but > it uses the same global arrays of nodes/section. > > This limits the number of devices that can be attached > to the guest to 20-30 devices. It happens because: > - The sections array is limited to 2^12 entries. > - The main memory has at least 100 sections. > - Each device address space is actually an alias to > main memory, multiplying its number of nodes/sections. > > Remove the limitation by using separate arrays of > nodes and sections for each address space. > > Signed-off-by: Marcel Apfelbaum <marce...@redhat.com> > --- > exec.c | 151 > ++++++++++++++++++++++++++++------------------------------------- > 1 file changed, 64 insertions(+), 87 deletions(-) > > diff --git a/exec.c b/exec.c > index 95c4356..db2844c 100644 > --- a/exec.c > +++ b/exec.c > @@ -90,13 +90,21 @@ struct PhysPageEntry { > > typedef PhysPageEntry Node[L2_SIZE]; > > +typedef struct PhysPageMap { > + unsigned sections_nb; > + unsigned sections_nb_alloc; > + unsigned nodes_nb; > + unsigned nodes_nb_alloc; > + Node *nodes; > + MemoryRegionSection *sections; > +} PhysPageMap; > + > struct AddressSpaceDispatch { > /* This is a multi-level map on the physical address space. > * The bottom level has pointers to MemoryRegionSections. > */ > PhysPageEntry phys_map; > - Node *nodes; > - MemoryRegionSection *sections; > + PhysPageMap map; > AddressSpace *as; > }; > > @@ -113,18 +121,6 @@ typedef struct subpage_t { > #define PHYS_SECTION_ROM 2 > #define PHYS_SECTION_WATCH 3 > > -typedef struct PhysPageMap { > - unsigned sections_nb; > - unsigned sections_nb_alloc; > - unsigned nodes_nb; > - unsigned nodes_nb_alloc; > - Node *nodes; > - MemoryRegionSection *sections; > -} PhysPageMap; > - > -static PhysPageMap *prev_map; > -static PhysPageMap next_map; > - > #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1) > > static void io_mem_init(void); > @@ -135,35 +131,32 @@ static MemoryRegion io_mem_watch; > > #if !defined(CONFIG_USER_ONLY) > > -static void phys_map_node_reserve(unsigned nodes) > +static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes) > { > - if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) { > - next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2, > - 16); > - next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc, > - next_map.nodes_nb + nodes); > - next_map.nodes = g_renew(Node, next_map.nodes, > - next_map.nodes_nb_alloc); > + if (map->nodes_nb + nodes > map->nodes_nb_alloc) { > + map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16); > + map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + > nodes); > + map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc); > } > } > > -static uint16_t phys_map_node_alloc(void) > +static uint16_t phys_map_node_alloc(PhysPageMap *map) > { > unsigned i; > uint16_t ret; > > - ret = next_map.nodes_nb++; > + ret = map->nodes_nb++; > assert(ret != PHYS_MAP_NODE_NIL); > - assert(ret != next_map.nodes_nb_alloc); > + assert(ret != map->nodes_nb_alloc); > for (i = 0; i < L2_SIZE; ++i) { > - next_map.nodes[ret][i].is_leaf = 0; > - next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; > + map->nodes[ret][i].is_leaf = 0; > + map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; > } > return ret; > } > > -static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, > - hwaddr *nb, uint16_t leaf, > +static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, > + hwaddr *index, hwaddr *nb, uint16_t leaf, > int level) > { > PhysPageEntry *p; > @@ -171,8 +164,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr > *index, > hwaddr step = (hwaddr)1 << (level * L2_BITS); > > if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) { > - lp->ptr = phys_map_node_alloc(); > - p = next_map.nodes[lp->ptr]; > + lp->ptr = phys_map_node_alloc(map); > + p = map->nodes[lp->ptr]; > if (level == 0) { > for (i = 0; i < L2_SIZE; i++) { > p[i].is_leaf = 1; > @@ -180,7 +173,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr > *index, > } > } > } else { > - p = next_map.nodes[lp->ptr]; > + p = map->nodes[lp->ptr]; > } > lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)]; > > @@ -191,7 +184,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr > *index, > *index += step; > *nb -= step; > } else { > - phys_page_set_level(lp, index, nb, leaf, level - 1); > + phys_page_set_level(map, lp, index, nb, leaf, level - 1); > } > ++lp; > } > @@ -202,9 +195,9 @@ static void phys_page_set(AddressSpaceDispatch *d, > uint16_t leaf) > { > /* Wildly overreserve - it doesn't matter much. */ > - phys_map_node_reserve(3 * P_L2_LEVELS); > + phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS); > > - phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); > + phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, > P_L2_LEVELS - 1); > } > > static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index, > @@ -237,10 +230,10 @@ static MemoryRegionSection > *address_space_lookup_region(AddressSpaceDispatch *d, > subpage_t *subpage; > > section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS, > - d->nodes, d->sections); > + d->map.nodes, d->map.sections); > if (resolve_subpage && section->mr->subpage) { > subpage = container_of(section->mr, subpage_t, iomem); > - section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; > + section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; > } > return section; > } > @@ -708,7 +701,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env, > iotlb |= PHYS_SECTION_ROM; > } > } else { > - iotlb = section - address_space_memory.dispatch->sections; > + iotlb = section - address_space_memory.dispatch->map.sections; > iotlb += xlat; > } > > @@ -747,23 +740,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t)) > phys_mem_alloc = alloc; > } > > -static uint16_t phys_section_add(MemoryRegionSection *section) > +static uint16_t phys_section_add(PhysPageMap *map, > + MemoryRegionSection *section) > { > /* The physical section number is ORed with a page-aligned > * pointer to produce the iotlb entries. Thus it should > * never overflow into the page-aligned value. > */ > - assert(next_map.sections_nb < TARGET_PAGE_SIZE); > + assert(map->sections_nb < TARGET_PAGE_SIZE); > > - if (next_map.sections_nb == next_map.sections_nb_alloc) { > - next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2, > - 16); > - next_map.sections = g_renew(MemoryRegionSection, next_map.sections, > - next_map.sections_nb_alloc); > + if (map->sections_nb == map->sections_nb_alloc) { > + map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16); > + map->sections = g_renew(MemoryRegionSection, map->sections, > + map->sections_nb_alloc); > } > - next_map.sections[next_map.sections_nb] = *section; > + map->sections[map->sections_nb] = *section; > memory_region_ref(section->mr); > - return next_map.sections_nb++; > + return map->sections_nb++; > } > > static void phys_section_destroy(MemoryRegion *mr) > @@ -785,7 +778,6 @@ static void phys_sections_free(PhysPageMap *map) > } > g_free(map->sections); > g_free(map->nodes); > - g_free(map); > } > > static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection > *section) > @@ -794,7 +786,7 @@ static void register_subpage(AddressSpaceDispatch *d, > MemoryRegionSection *secti > hwaddr base = section->offset_within_address_space > & TARGET_PAGE_MASK; > MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> > TARGET_PAGE_BITS, > - next_map.nodes, > next_map.sections); > + d->map.nodes, > d->map.sections); > MemoryRegionSection subsection = { > .offset_within_address_space = base, > .size = int128_make64(TARGET_PAGE_SIZE), > @@ -807,13 +799,14 @@ static void register_subpage(AddressSpaceDispatch *d, > MemoryRegionSection *secti > subpage = subpage_init(d->as, base); > subsection.mr = &subpage->iomem; > phys_page_set(d, base >> TARGET_PAGE_BITS, 1, > - phys_section_add(&subsection)); > + phys_section_add(&d->map, &subsection)); > } else { > subpage = container_of(existing->mr, subpage_t, iomem); > } > start = section->offset_within_address_space & ~TARGET_PAGE_MASK; > end = start + int128_get64(section->size) - 1; > - subpage_register(subpage, start, end, phys_section_add(section)); > + subpage_register(subpage, start, end, > + phys_section_add(&d->map, section)); > } > > > @@ -821,7 +814,7 @@ static void register_multipage(AddressSpaceDispatch *d, > MemoryRegionSection *section) > { > hwaddr start_addr = section->offset_within_address_space; > - uint16_t section_index = phys_section_add(section); > + uint16_t section_index = phys_section_add(&d->map, section); > uint64_t num_pages = int128_get64(int128_rshift(section->size, > TARGET_PAGE_BITS)); > > @@ -1605,7 +1598,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr > base) > return mmio; > } > > -static uint16_t dummy_section(MemoryRegion *mr) > +static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr) > { > MemoryRegionSection section = { > .mr = mr, > @@ -1614,12 +1607,13 @@ static uint16_t dummy_section(MemoryRegion *mr) > .size = int128_2_64(), > }; > > - return phys_section_add(§ion); > + return phys_section_add(map, §ion); > } > > MemoryRegion *iotlb_to_region(hwaddr index) > { > - return address_space_memory.dispatch->sections[index & > ~TARGET_PAGE_MASK].mr; > + return address_space_memory.dispatch->map.sections[ > + index & ~TARGET_PAGE_MASK].mr; > } > > static void io_mem_init(void) > @@ -1636,7 +1630,17 @@ static void io_mem_init(void) > static void mem_begin(MemoryListener *listener) > { > AddressSpace *as = container_of(listener, AddressSpace, > dispatch_listener); > - AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1); > + AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1); > + uint16_t n; > + > + n = dummy_section(&d->map, &io_mem_unassigned); > + assert(n == PHYS_SECTION_UNASSIGNED); > + n = dummy_section(&d->map, &io_mem_notdirty); > + assert(n == PHYS_SECTION_NOTDIRTY); > + n = dummy_section(&d->map, &io_mem_rom); > + assert(n == PHYS_SECTION_ROM); > + n = dummy_section(&d->map, &io_mem_watch); > + assert(n == PHYS_SECTION_WATCH); > > d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 > }; > d->as = as; > @@ -1649,37 +1653,12 @@ static void mem_commit(MemoryListener *listener) > AddressSpaceDispatch *cur = as->dispatch; > AddressSpaceDispatch *next = as->next_dispatch; > > - next->nodes = next_map.nodes; > - next->sections = next_map.sections; > - > as->dispatch = next; > - g_free(cur); > -} > - > -static void core_begin(MemoryListener *listener) > -{ > - uint16_t n; > > - prev_map = g_new(PhysPageMap, 1); > - *prev_map = next_map; > - > - memset(&next_map, 0, sizeof(next_map)); > - n = dummy_section(&io_mem_unassigned); > - assert(n == PHYS_SECTION_UNASSIGNED); > - n = dummy_section(&io_mem_notdirty); > - assert(n == PHYS_SECTION_NOTDIRTY); > - n = dummy_section(&io_mem_rom); > - assert(n == PHYS_SECTION_ROM); > - n = dummy_section(&io_mem_watch); > - assert(n == PHYS_SECTION_WATCH); > -} > - > -/* This listener's commit run after the other AddressSpaceDispatch > listeners'. > - * All AddressSpaceDispatch instances have switched to the next map. > - */ > -static void core_commit(MemoryListener *listener) > -{ > - phys_sections_free(prev_map); > + if (cur) { > + phys_sections_free(&cur->map); > + g_free(cur); > + } > } > > static void tcg_commit(MemoryListener *listener) > @@ -1707,8 +1686,6 @@ static void core_log_global_stop(MemoryListener > *listener) > } > > static MemoryListener core_memory_listener = { > - .begin = core_begin, > - .commit = core_commit, > .log_global_start = core_log_global_start, > .log_global_stop = core_log_global_stop, > .priority = 1, >
Simpler than before. Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>