On Tue, 2013-12-10 at 13:49 +0200, Michael S. Tsirkin wrote: > On Sun, Dec 01, 2013 at 02:02:23PM +0200, Marcel Apfelbaum wrote: > > Every address space has its own nodes and sections, but > > it uses the same global arrays of nodes/section. > > > > This limits the number of devices that can be attached > > to the guest to 20-30 devices. It happens because: > > - The sections array is limited to 2^12 entries. > > - The main memory has at least 100 sections. > > - Each device address space is actually an alias to > > main memory, multiplying its number of nodes/sections. > > > > Remove the limitation by using separate arrays of > > nodes and sections for each address space. > > > > Signed-off-by: Marcel Apfelbaum <marce...@redhat.com> > > I applied this on the pci tree, pls verify it's applied correctly.
I saw one issue that can be solved by: diff --git a/exec.c b/exec.c index 07992e9..00526d1 100644 --- a/exec.c +++ b/exec.c @@ -269,7 +269,7 @@ static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb) DECLARE_BITMAP(compacted, nodes_nb); if (d->phys_map.skip) { - phys_page_compact(&d->phys_map, d->nodes, compacted); + phys_page_compact(&d->phys_map, d->map.nodes, compacted); } } Beside this it looks OK, the branch does not compile and I couldn't look into it more... Can you please also merge my other patch "memory.c: bugfix - ref counting mismatch in memory_region_find" ? It is already reviewed, Thanks! Marcel > > > --- > > exec.c | 151 > > ++++++++++++++++++++++++++++------------------------------------- > > 1 file changed, 64 insertions(+), 87 deletions(-) > > > > diff --git a/exec.c b/exec.c > > index 95c4356..db2844c 100644 > > --- a/exec.c > > +++ b/exec.c > > @@ -90,13 +90,21 @@ struct PhysPageEntry { > > > > typedef PhysPageEntry Node[L2_SIZE]; > > > > +typedef struct PhysPageMap { > > + unsigned sections_nb; > > + unsigned sections_nb_alloc; > > + unsigned nodes_nb; > > + unsigned nodes_nb_alloc; > > + Node *nodes; > > + MemoryRegionSection *sections; > > +} PhysPageMap; > > + > > struct AddressSpaceDispatch { > > /* This is a multi-level map on the physical address space. > > * The bottom level has pointers to MemoryRegionSections. > > */ > > PhysPageEntry phys_map; > > - Node *nodes; > > - MemoryRegionSection *sections; > > + PhysPageMap map; > > AddressSpace *as; > > }; > > > > @@ -113,18 +121,6 @@ typedef struct subpage_t { > > #define PHYS_SECTION_ROM 2 > > #define PHYS_SECTION_WATCH 3 > > > > -typedef struct PhysPageMap { > > - unsigned sections_nb; > > - unsigned sections_nb_alloc; > > - unsigned nodes_nb; > > - unsigned nodes_nb_alloc; > > - Node *nodes; > > - MemoryRegionSection *sections; > > -} PhysPageMap; > > - > > -static PhysPageMap *prev_map; > > -static PhysPageMap next_map; > > - > > #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1) > > > > static void io_mem_init(void); > > @@ -135,35 +131,32 @@ static MemoryRegion io_mem_watch; > > > > #if !defined(CONFIG_USER_ONLY) > > > > -static void phys_map_node_reserve(unsigned nodes) > > +static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes) > > { > > - if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) { > > - next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2, > > - 16); > > - next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc, > > - next_map.nodes_nb + nodes); > > - next_map.nodes = g_renew(Node, next_map.nodes, > > - next_map.nodes_nb_alloc); > > + if (map->nodes_nb + nodes > map->nodes_nb_alloc) { > > + map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16); > > + map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + > > nodes); > > + map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc); > > } > > } > > > > -static uint16_t phys_map_node_alloc(void) > > +static uint16_t phys_map_node_alloc(PhysPageMap *map) > > { > > unsigned i; > > uint16_t ret; > > > > - ret = next_map.nodes_nb++; > > + ret = map->nodes_nb++; > > assert(ret != PHYS_MAP_NODE_NIL); > > - assert(ret != next_map.nodes_nb_alloc); > > + assert(ret != map->nodes_nb_alloc); > > for (i = 0; i < L2_SIZE; ++i) { > > - next_map.nodes[ret][i].is_leaf = 0; > > - next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; > > + map->nodes[ret][i].is_leaf = 0; > > + map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; > > } > > return ret; > > } > > > > -static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, > > - hwaddr *nb, uint16_t leaf, > > +static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, > > + hwaddr *index, hwaddr *nb, uint16_t leaf, > > int level) > > { > > PhysPageEntry *p; > > @@ -171,8 +164,8 @@ static void phys_page_set_level(PhysPageEntry *lp, > > hwaddr *index, > > hwaddr step = (hwaddr)1 << (level * L2_BITS); > > > > if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) { > > - lp->ptr = phys_map_node_alloc(); > > - p = next_map.nodes[lp->ptr]; > > + lp->ptr = phys_map_node_alloc(map); > > + p = map->nodes[lp->ptr]; > > if (level == 0) { > > for (i = 0; i < L2_SIZE; i++) { > > p[i].is_leaf = 1; > > @@ -180,7 +173,7 @@ static void phys_page_set_level(PhysPageEntry *lp, > > hwaddr *index, > > } > > } > > } else { > > - p = next_map.nodes[lp->ptr]; > > + p = map->nodes[lp->ptr]; > > } > > lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)]; > > > > @@ -191,7 +184,7 @@ static void phys_page_set_level(PhysPageEntry *lp, > > hwaddr *index, > > *index += step; > > *nb -= step; > > } else { > > - phys_page_set_level(lp, index, nb, leaf, level - 1); > > + phys_page_set_level(map, lp, index, nb, leaf, level - 1); > > } > > ++lp; > > } > > @@ -202,9 +195,9 @@ static void phys_page_set(AddressSpaceDispatch *d, > > uint16_t leaf) > > { > > /* Wildly overreserve - it doesn't matter much. */ > > - phys_map_node_reserve(3 * P_L2_LEVELS); > > + phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS); > > > > - phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); > > + phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, > > P_L2_LEVELS - 1); > > } > > > > static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index, > > @@ -237,10 +230,10 @@ static MemoryRegionSection > > *address_space_lookup_region(AddressSpaceDispatch *d, > > subpage_t *subpage; > > > > section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS, > > - d->nodes, d->sections); > > + d->map.nodes, d->map.sections); > > if (resolve_subpage && section->mr->subpage) { > > subpage = container_of(section->mr, subpage_t, iomem); > > - section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; > > + section = > > &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; > > } > > return section; > > } > > @@ -708,7 +701,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState > > *env, > > iotlb |= PHYS_SECTION_ROM; > > } > > } else { > > - iotlb = section - address_space_memory.dispatch->sections; > > + iotlb = section - address_space_memory.dispatch->map.sections; > > iotlb += xlat; > > } > > > > @@ -747,23 +740,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t)) > > phys_mem_alloc = alloc; > > } > > > > -static uint16_t phys_section_add(MemoryRegionSection *section) > > +static uint16_t phys_section_add(PhysPageMap *map, > > + MemoryRegionSection *section) > > { > > /* The physical section number is ORed with a page-aligned > > * pointer to produce the iotlb entries. Thus it should > > * never overflow into the page-aligned value. > > */ > > - assert(next_map.sections_nb < TARGET_PAGE_SIZE); > > + assert(map->sections_nb < TARGET_PAGE_SIZE); > > > > - if (next_map.sections_nb == next_map.sections_nb_alloc) { > > - next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2, > > - 16); > > - next_map.sections = g_renew(MemoryRegionSection, next_map.sections, > > - next_map.sections_nb_alloc); > > + if (map->sections_nb == map->sections_nb_alloc) { > > + map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16); > > + map->sections = g_renew(MemoryRegionSection, map->sections, > > + map->sections_nb_alloc); > > } > > - next_map.sections[next_map.sections_nb] = *section; > > + map->sections[map->sections_nb] = *section; > > memory_region_ref(section->mr); > > - return next_map.sections_nb++; > > + return map->sections_nb++; > > } > > > > static void phys_section_destroy(MemoryRegion *mr) > > @@ -785,7 +778,6 @@ static void phys_sections_free(PhysPageMap *map) > > } > > g_free(map->sections); > > g_free(map->nodes); > > - g_free(map); > > } > > > > static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection > > *section) > > @@ -794,7 +786,7 @@ static void register_subpage(AddressSpaceDispatch *d, > > MemoryRegionSection *secti > > hwaddr base = section->offset_within_address_space > > & TARGET_PAGE_MASK; > > MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> > > TARGET_PAGE_BITS, > > - next_map.nodes, > > next_map.sections); > > + d->map.nodes, > > d->map.sections); > > MemoryRegionSection subsection = { > > .offset_within_address_space = base, > > .size = int128_make64(TARGET_PAGE_SIZE), > > @@ -807,13 +799,14 @@ static void register_subpage(AddressSpaceDispatch *d, > > MemoryRegionSection *secti > > subpage = subpage_init(d->as, base); > > subsection.mr = &subpage->iomem; > > phys_page_set(d, base >> TARGET_PAGE_BITS, 1, > > - phys_section_add(&subsection)); > > + phys_section_add(&d->map, &subsection)); > > } else { > > subpage = container_of(existing->mr, subpage_t, iomem); > > } > > start = section->offset_within_address_space & ~TARGET_PAGE_MASK; > > end = start + int128_get64(section->size) - 1; > > - subpage_register(subpage, start, end, phys_section_add(section)); > > + subpage_register(subpage, start, end, > > + phys_section_add(&d->map, section)); > > } > > > > > > @@ -821,7 +814,7 @@ static void register_multipage(AddressSpaceDispatch *d, > > MemoryRegionSection *section) > > { > > hwaddr start_addr = section->offset_within_address_space; > > - uint16_t section_index = phys_section_add(section); > > + uint16_t section_index = phys_section_add(&d->map, section); > > uint64_t num_pages = int128_get64(int128_rshift(section->size, > > TARGET_PAGE_BITS)); > > > > @@ -1605,7 +1598,7 @@ static subpage_t *subpage_init(AddressSpace *as, > > hwaddr base) > > return mmio; > > } > > > > -static uint16_t dummy_section(MemoryRegion *mr) > > +static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr) > > { > > MemoryRegionSection section = { > > .mr = mr, > > @@ -1614,12 +1607,13 @@ static uint16_t dummy_section(MemoryRegion *mr) > > .size = int128_2_64(), > > }; > > > > - return phys_section_add(§ion); > > + return phys_section_add(map, §ion); > > } > > > > MemoryRegion *iotlb_to_region(hwaddr index) > > { > > - return address_space_memory.dispatch->sections[index & > > ~TARGET_PAGE_MASK].mr; > > + return address_space_memory.dispatch->map.sections[ > > + index & ~TARGET_PAGE_MASK].mr; > > } > > > > static void io_mem_init(void) > > @@ -1636,7 +1630,17 @@ static void io_mem_init(void) > > static void mem_begin(MemoryListener *listener) > > { > > AddressSpace *as = container_of(listener, AddressSpace, > > dispatch_listener); > > - AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1); > > + AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1); > > + uint16_t n; > > + > > + n = dummy_section(&d->map, &io_mem_unassigned); > > + assert(n == PHYS_SECTION_UNASSIGNED); > > + n = dummy_section(&d->map, &io_mem_notdirty); > > + assert(n == PHYS_SECTION_NOTDIRTY); > > + n = dummy_section(&d->map, &io_mem_rom); > > + assert(n == PHYS_SECTION_ROM); > > + n = dummy_section(&d->map, &io_mem_watch); > > + assert(n == PHYS_SECTION_WATCH); > > > > d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = > > 0 }; > > d->as = as; > > @@ -1649,37 +1653,12 @@ static void mem_commit(MemoryListener *listener) > > AddressSpaceDispatch *cur = as->dispatch; > > AddressSpaceDispatch *next = as->next_dispatch; > > > > - next->nodes = next_map.nodes; > > - next->sections = next_map.sections; > > - > > as->dispatch = next; > > - g_free(cur); > > -} > > - > > -static void core_begin(MemoryListener *listener) > > -{ > > - uint16_t n; > > > > - prev_map = g_new(PhysPageMap, 1); > > - *prev_map = next_map; > > - > > - memset(&next_map, 0, sizeof(next_map)); > > - n = dummy_section(&io_mem_unassigned); > > - assert(n == PHYS_SECTION_UNASSIGNED); > > - n = dummy_section(&io_mem_notdirty); > > - assert(n == PHYS_SECTION_NOTDIRTY); > > - n = dummy_section(&io_mem_rom); > > - assert(n == PHYS_SECTION_ROM); > > - n = dummy_section(&io_mem_watch); > > - assert(n == PHYS_SECTION_WATCH); > > -} > > - > > -/* This listener's commit run after the other AddressSpaceDispatch > > listeners'. > > - * All AddressSpaceDispatch instances have switched to the next map. > > - */ > > -static void core_commit(MemoryListener *listener) > > -{ > > - phys_sections_free(prev_map); > > + if (cur) { > > + phys_sections_free(&cur->map); > > + g_free(cur); > > + } > > } > > > > static void tcg_commit(MemoryListener *listener) > > @@ -1707,8 +1686,6 @@ static void core_log_global_stop(MemoryListener > > *listener) > > } > > > > static MemoryListener core_memory_listener = { > > - .begin = core_begin, > > - .commit = core_commit, > > .log_global_start = core_log_global_start, > > .log_global_stop = core_log_global_stop, > > .priority = 1, > > -- > > 1.8.3.1