Il 29/05/2013 04:11, Liu Ping Fan ha scritto: > From: Liu Ping Fan <pingf...@linux.vnet.ibm.com> > > Using seqlock to load dispatch context atomic. The dispatch > context consist of: cur_map_nodes, cur_sections, cur_roots. > > Also during the dispatch, we should get the terminal, and dup > MemoryRegionSection. So after rcu unlock, the cur dispatch > context can be dropped safely. > > Signed-off-by: Liu Ping Fan <pingf...@linux.vnet.ibm.com> > > -------- > The tcg code related to tlb_set_page() should be fixed too. > But it is a little complicated, separating it for easing review. > --- > exec.c | 239 ++++++++++++++++++++++++++++++++++++++++----------------------- > 1 files changed, 152 insertions(+), 87 deletions(-) > > diff --git a/exec.c b/exec.c > index 315960d..9a5c67f 100644 > --- a/exec.c > +++ b/exec.c > @@ -112,6 +112,8 @@ static Node *next_map_nodes; > static PhysPageEntry *next_roots; > static AllocInfo *next_alloc_info; > > +QemuSeqLock dispatch_table_lock; > + > #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1) > > static void io_mem_init(void); > @@ -199,31 +201,24 @@ static void phys_page_set(AddressSpaceDispatch *d, > P_L2_LEVELS - 1); > } > > -static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr > index, bool cur) > +static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, > + hwaddr index, > + Node *map_base, > + MemoryRegionSection *sections_base, > + PhysPageEntry *root_base) > { > - PhysPageEntry lp; > + PhysPageEntry lp = root_base[d->idx]; > PhysPageEntry *p; > int i; > - Node *map_nodes; > - MemoryRegionSection *sections; > > - if (likely(cur)) { > - map_nodes = cur_map_nodes; > - sections = cur_phys_sections; > - lp = cur_roots[d->idx]; > - } else { > - map_nodes = next_map_nodes; > - sections = next_phys_sections; > - lp = next_roots[d->idx]; > - } > for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) { > if (lp.ptr == PHYS_MAP_NODE_NIL) { > - return §ions[phys_section_unassigned]; > + return §ions_base[phys_section_unassigned]; > } > - p = map_nodes[lp.ptr]; > + p = map_base[lp.ptr]; > lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)]; > } > - return §ions[lp.ptr]; > + return §ions_base[lp.ptr]; > } > > bool memory_region_is_unassigned(MemoryRegion *mr) > @@ -233,21 +228,28 @@ bool memory_region_is_unassigned(MemoryRegion *mr) > } > > static MemoryRegionSection *address_space_lookup_region(AddressSpace *as, > - hwaddr addr) > + hwaddr addr, Node *map_base, MemoryRegionSection *sections_base, > + PhysPageEntry *root_base) > + > { > - return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS, true); > + return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS, > + map_base, sections_base, root_base); > } > > MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr, > hwaddr *xlat, hwaddr *plen, > - bool is_write) > + bool is_write, > + Node *map_base, > + MemoryRegionSection > *sections_base, > + PhysPageEntry *root_base)
These two functions are always called on the "cur" map. Only phys_page_find can be called (from register_subpage) on the "next" map. Paolo > { > IOMMUTLBEntry iotlb; > - MemoryRegionSection *section, *base = cur_phys_sections; > + MemoryRegionSection *section; > hwaddr len = *plen; > > for (;;) { > - section = address_space_lookup_region(as, addr); > + section = address_space_lookup_region(as, addr, map_base, > + sections_base, root_base); > > /* Compute offset within MemoryRegionSection */ > addr -= section->offset_within_address_space; > @@ -265,7 +267,7 @@ MemoryRegionSection *address_space_translate(AddressSpace > *as, hwaddr addr, > | (addr & iotlb.addr_mask)); > len = MIN(len, (addr | iotlb.addr_mask) - addr + 1); > if (!(iotlb.perm & (1 << is_write))) { > - section = &base[phys_section_unassigned]; > + section = §ions_base[phys_section_unassigned]; > break; > } > > @@ -276,6 +278,42 @@ MemoryRegionSection > *address_space_translate(AddressSpace *as, hwaddr addr, > *xlat = addr; > return section; > } > + > +/* caller should hold rcu read lock */ > +MemoryRegionSection address_space_get_terminal(AddressSpace *as, hwaddr addr, > + hwaddr *xlat, hwaddr *plen, > + bool is_write) > +{ > + MemoryRegionSection *mrs, *ret, *sections_base; > + Node *map_base; > + subpage_t *mmio; > + unsigned int idx, start; > + PhysPageEntry *root_base; > + > + /* Walk through all of the AddressSpace in consistent */ > + do { > + start = seqlock_read_begin(&dispatch_table_lock); > + map_base = cur_map_nodes; > + sections_base = cur_phys_sections; > + /* get all AddressSpaceDispatch root */ > + root_base = cur_roots; > + } while (seqlock_read_check(&dispatch_table_lock, start); > + > + mrs = address_space_translate(as, addr, xlat, plen, is_write, > + map_base, sections_base, root_base); > + if (!mrs->mr->terminates) { > + mmio = container_of(mrs->mr, subpage_t, mmio); > + idx = SUBPAGE_IDX(addr); > + ret = §ions_base[mmio->sub_section[idx]]; > + *xlat += mmio->base; > + *xlat -= mrs->offset_within_address_space; > + *xlat += mrs->offset_within_region; > + } else { > + ret = mrs; > + } > + > + return *ret; > +} > #endif > > void cpu_exec_init_all(void) > @@ -1777,13 +1815,12 @@ static void core_commit(MemoryListener *listener) > info->sections = cur_phys_sections; > info->roots = cur_roots; > > - /* Fix me, in future, will be protected by wr seqlock when in parellel > - * with reader > - */ > + seqlock_write_lock(&dispatch_table_lock); > cur_map_nodes = next_map_nodes; > cur_phys_sections = next_phys_sections; > cur_alloc_info = next_alloc_info; > cur_roots = next_roots; > + seqlock_write_unlock(&dispatch_table_lock); > > /* since each CPU stores ram addresses in its TLB cache, we must > reset the modified entries */ > @@ -1890,8 +1927,13 @@ void address_space_destroy_dispatch(AddressSpace *as) > > static void memory_map_init(void) > { > + QemuMutex *mutex; > + > qemu_mutex_init(&id_lock); > address_space_id_map = g_malloc0(MAX_IDX/BITS_PER_BYTE); > + mutex = g_malloc0(sizeof(QemuMutex)); > + qemu_mutex_init(mutex); > + seqlock_init(&dispatch_table_lock, mutex); > system_memory = g_malloc(sizeof(*system_memory)); > memory_region_init(system_memory, "system", INT64_MAX); > address_space_init(&address_space_memory, system_memory, "memory"); > @@ -2001,59 +2043,68 @@ bool address_space_rw(AddressSpace *as, hwaddr addr, > uint8_t *buf, > uint8_t *ptr; > uint64_t val; > hwaddr addr1; > - MemoryRegionSection *section; > + MemoryRegionSection section; > bool error = false; > > while (len > 0) { > l = len; > - section = address_space_translate(as, addr, &addr1, &l, is_write); > + rcu_read_lock(); > + section = address_space_get_terminal(as, addr, &addr1, &l, is_write); > > if (is_write) { > - if (!memory_access_is_direct(section->mr, is_write)) { > + if (!memory_access_is_direct(section.mr, is_write)) { > l = memory_access_size(l, addr1); > /* XXX: could force cpu_single_env to NULL to avoid > potential bugs */ > + memory_region_ref(section.mr); > + rcu_read_unlock(); > if (l == 4) { > /* 32 bit write access */ > val = ldl_p(buf); > - error |= io_mem_write(section->mr, addr1, val, 4); > + error |= io_mem_write(section.mr, addr1, val, 4); > } else if (l == 2) { > /* 16 bit write access */ > val = lduw_p(buf); > - error |= io_mem_write(section->mr, addr1, val, 2); > + error |= io_mem_write(section.mr, addr1, val, 2); > } else { > /* 8 bit write access */ > val = ldub_p(buf); > - error |= io_mem_write(section->mr, addr1, val, 1); > + error |= io_mem_write(section.mr, addr1, val, 1); > } > + memory_region_unref(section.mr); > } else { > - addr1 += memory_region_get_ram_addr(section->mr); > + addr1 += memory_region_get_ram_addr(section.mr); > /* RAM case */ > ptr = qemu_get_ram_ptr(addr1); > memcpy(ptr, buf, l); > invalidate_and_set_dirty(addr1, l); > + rcu_read_unlock(); > } > } else { > - if (!memory_access_is_direct(section->mr, is_write)) { > + if (!memory_access_is_direct(section.mr, is_write)) { > /* I/O case */ > + memory_region_ref(section.mr); > + rcu_read_unlock(); > l = memory_access_size(l, addr1); > if (l == 4) { > /* 32 bit read access */ > - error |= io_mem_read(section->mr, addr1, &val, 4); > + error |= io_mem_read(section.mr, addr1, &val, 4); > stl_p(buf, val); > } else if (l == 2) { > /* 16 bit read access */ > - error |= io_mem_read(section->mr, addr1, &val, 2); > + error |= io_mem_read(section.mr, addr1, &val, 2); > stw_p(buf, val); > } else { > /* 8 bit read access */ > - error |= io_mem_read(section->mr, addr1, &val, 1); > + error |= io_mem_read(section.mr, addr1, &val, 1); > stb_p(buf, val); > } > + memory_region_unref(section.mr); > } else { > /* RAM case */ > - ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1); > + ptr = qemu_get_ram_ptr(section.mr->ram_addr + addr1); > memcpy(buf, ptr, l); > + rcu_read_unlock(); > } > } > len -= l; > @@ -2089,18 +2140,19 @@ void cpu_physical_memory_write_rom(hwaddr addr, > hwaddr l; > uint8_t *ptr; > hwaddr addr1; > - MemoryRegionSection *section; > + MemoryRegionSection section; > > + rcu_read_lock(); > while (len > 0) { > l = len; > - section = address_space_translate(&address_space_memory, > + section = address_space_get_terminal(&address_space_memory, > addr, &addr1, &l, true); > > - if (!(memory_region_is_ram(section->mr) || > - memory_region_is_romd(section->mr))) { > + if (!(memory_region_is_ram(section.mr) || > + memory_region_is_romd(section.mr))) { > /* do nothing */ > } else { > - addr1 += memory_region_get_ram_addr(section->mr); > + addr1 += memory_region_get_ram_addr(section.mr); > /* ROM/RAM case */ > ptr = qemu_get_ram_ptr(addr1); > memcpy(ptr, buf, l); > @@ -2110,6 +2162,7 @@ void cpu_physical_memory_write_rom(hwaddr addr, > buf += l; > addr += l; > } > + rcu_read_unlock(); > } > > typedef struct { > @@ -2161,15 +2214,15 @@ static void cpu_notify_map_clients(void) > > bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool > is_write) > { > - MemoryRegionSection *section; > + MemoryRegionSection section; > hwaddr l, xlat; > > while (len > 0) { > l = len; > - section = address_space_translate(as, addr, &xlat, &l, is_write); > - if (!memory_access_is_direct(section->mr, is_write)) { > + section = address_space_get_terminal(as, addr, &xlat, &l, is_write); > + if (!memory_access_is_direct(section.mr, is_write)) { > l = memory_access_size(l, addr); > - if (!memory_region_access_valid(section->mr, xlat, l, is_write)) > { > + if (!memory_region_access_valid(section.mr, xlat, l, is_write)) { > return false; > } > } > @@ -2195,14 +2248,14 @@ void *address_space_map(AddressSpace *as, > hwaddr len = *plen; > hwaddr todo = 0; > hwaddr l, xlat; > - MemoryRegionSection *section; > + MemoryRegionSection section; > ram_addr_t raddr = RAM_ADDR_MAX; > ram_addr_t rlen; > void *ret; > > while (len > 0) { > l = len; > - section = address_space_translate(as, addr, &xlat, &l, is_write); > + section = address_space_get_terminal(as, addr, &xlat, &l, is_write); > > if (!memory_access_is_direct(section->mr, is_write)) { > if (todo || bounce.buffer) { > @@ -2211,20 +2264,20 @@ void *address_space_map(AddressSpace *as, > bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, > TARGET_PAGE_SIZE); > bounce.addr = addr; > bounce.len = l; > - bounce.mr = section->mr; > + bounce.mr = section.mr; > if (!is_write) { > address_space_read(as, addr, bounce.buffer, l); > } > > *plen = l; > - memory_region_ref(section->mr); > + memory_region_ref(section.mr); > return bounce.buffer; > } > if (!todo) { > - raddr = memory_region_get_ram_addr(section->mr) + xlat; > - memory_region_ref(section->mr); > + raddr = memory_region_get_ram_addr(section.mr) + xlat; > + memory_region_ref(section.mr); > } else { > - if (memory_region_get_ram_addr(section->mr) + xlat != raddr + > todo) { > + if (memory_region_get_ram_addr(section.mr) + xlat != raddr + > todo) { > break; > } > } > @@ -2297,15 +2350,17 @@ static inline uint32_t ldl_phys_internal(hwaddr addr, > { > uint8_t *ptr; > uint64_t val; > - MemoryRegionSection *section; > + MemoryRegionSection section; > hwaddr l = 4; > hwaddr addr1; > > - section = address_space_translate(&address_space_memory, addr, &addr1, > &l, > - false); > + rcu_read_lock(); > + section = address_space_get_terminal(&address_space_memory, addr, &addr1, > + &l, false); > + rcu_read_unlock(); > if (l < 4 || !memory_access_is_direct(section->mr, false)) { > /* I/O case */ > - io_mem_read(section->mr, addr1, &val, 4); > + io_mem_read(section.mr, addr1, &val, 4); > #if defined(TARGET_WORDS_BIGENDIAN) > if (endian == DEVICE_LITTLE_ENDIAN) { > val = bswap32(val); > @@ -2360,8 +2415,10 @@ static inline uint64_t ldq_phys_internal(hwaddr addr, > hwaddr l = 8; > hwaddr addr1; > > - section = address_space_translate(&address_space_memory, addr, &addr1, > &l, > - false); > + rcu_read_lock(); > + section = address_space_get_terminal(&address_space_memory, addr, &addr1, > + &l, false); > + rcu_read_unlock(); > if (l < 8 || !memory_access_is_direct(section->mr, false)) { > /* I/O case */ > io_mem_read(section->mr, addr1, &val, 8); > @@ -2423,15 +2480,17 @@ static inline uint32_t lduw_phys_internal(hwaddr addr, > { > uint8_t *ptr; > uint64_t val; > - MemoryRegionSection *section; > + MemoryRegionSection section; > hwaddr l = 2; > hwaddr addr1; > > - section = address_space_translate(&address_space_memory, addr, &addr1, > &l, > - false); > - if (l < 2 || !memory_access_is_direct(section->mr, false)) { > + rcu_read_lock(); > + section = address_space_get_terminal(&address_space_memory, addr, &addr1, > + &l, false); > + rcu_read_unlock(); > + if (l < 2 || !memory_access_is_direct(section.mr, false)) { > /* I/O case */ > - io_mem_read(section->mr, addr1, &val, 2); > + io_mem_read(section.mr, addr1, &val, 2); > #if defined(TARGET_WORDS_BIGENDIAN) > if (endian == DEVICE_LITTLE_ENDIAN) { > val = bswap16(val); > @@ -2443,7 +2502,7 @@ static inline uint32_t lduw_phys_internal(hwaddr addr, > #endif > } else { > /* RAM case */ > - ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr) > + ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr) > & TARGET_PAGE_MASK) > + addr1); > switch (endian) { > @@ -2482,16 +2541,18 @@ uint32_t lduw_be_phys(hwaddr addr) > void stl_phys_notdirty(hwaddr addr, uint32_t val) > { > uint8_t *ptr; > - MemoryRegionSection *section; > + MemoryRegionSection section; > hwaddr l = 4; > hwaddr addr1; > > - section = address_space_translate(&address_space_memory, addr, &addr1, > &l, > - true); > - if (l < 4 || !memory_access_is_direct(section->mr, true)) { > - io_mem_write(section->mr, addr1, val, 4); > + rcu_read_lock(); > + section = address_space_get_terminal(&address_space_memory, addr, &addr1, > + &l, true); > + rcu_read_unlock(); > + if (l < 4 || !memory_access_is_direct(section.mr, true)) { > + io_mem_write(section.mr, addr1, val, 4); > } else { > - addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK; > + addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK; > ptr = qemu_get_ram_ptr(addr1); > stl_p(ptr, val); > > @@ -2512,13 +2573,15 @@ static inline void stl_phys_internal(hwaddr addr, > uint32_t val, > enum device_endian endian) > { > uint8_t *ptr; > - MemoryRegionSection *section; > + MemoryRegionSection section; > hwaddr l = 4; > hwaddr addr1; > > - section = address_space_translate(&address_space_memory, addr, &addr1, > &l, > - true); > - if (l < 4 || !memory_access_is_direct(section->mr, true)) { > + rcu_read_lock(); > + section = address_space_get_terminal(&address_space_memory, addr, &addr1, > + &l, true); > + rcu_read_unlock(); > + if (l < 4 || !memory_access_is_direct(section.mr, true)) { > #if defined(TARGET_WORDS_BIGENDIAN) > if (endian == DEVICE_LITTLE_ENDIAN) { > val = bswap32(val); > @@ -2528,10 +2591,10 @@ static inline void stl_phys_internal(hwaddr addr, > uint32_t val, > val = bswap32(val); > } > #endif > - io_mem_write(section->mr, addr1, val, 4); > + io_mem_write(section.mr, addr1, val, 4); > } else { > /* RAM case */ > - addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK; > + addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK; > ptr = qemu_get_ram_ptr(addr1); > switch (endian) { > case DEVICE_LITTLE_ENDIAN: > @@ -2575,13 +2638,13 @@ static inline void stw_phys_internal(hwaddr addr, > uint32_t val, > enum device_endian endian) > { > uint8_t *ptr; > - MemoryRegionSection *section; > + MemoryRegionSection section; > hwaddr l = 2; > hwaddr addr1; > > - section = address_space_translate(&address_space_memory, addr, &addr1, > &l, > - true); > - if (l < 2 || !memory_access_is_direct(section->mr, true)) { > + section = address_space_get_terminal(&address_space_memory, addr, &addr1, > + &l, true); > + if (l < 2 || !memory_access_is_direct(section.mr, true)) { > #if defined(TARGET_WORDS_BIGENDIAN) > if (endian == DEVICE_LITTLE_ENDIAN) { > val = bswap16(val); > @@ -2591,10 +2654,10 @@ static inline void stw_phys_internal(hwaddr addr, > uint32_t val, > val = bswap16(val); > } > #endif > - io_mem_write(section->mr, addr1, val, 2); > + io_mem_write(section.mr, addr1, val, 2); > } else { > /* RAM case */ > - addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK; > + addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK; > ptr = qemu_get_ram_ptr(addr1); > switch (endian) { > case DEVICE_LITTLE_ENDIAN: > @@ -2696,13 +2759,15 @@ bool virtio_is_big_endian(void) > #ifndef CONFIG_USER_ONLY > bool cpu_physical_memory_is_io(hwaddr phys_addr) > { > - MemoryRegionSection *section; > + MemoryRegionSection section; > hwaddr l = 1; > > - section = address_space_translate(&address_space_memory, > + rcu_read_lock(); > + section = address_space_get_terminal(&address_space_memory, > phys_addr, &phys_addr, &l, false); > + rcu_read_unlock(); > > - return !(memory_region_is_ram(section->mr) || > - memory_region_is_romd(section->mr)); > + return !(memory_region_is_ram(section.mr) || > + memory_region_is_romd(section.mr)); > } > #endif >