Call these tlb_flush_range_by_mmuidx*. Rewrite the_flush_page_bits_by_mmuidx* to use the new functions, passing in TARGET_PAGE_SIZE for length.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- include/exec/exec-all.h | 44 ++++++++ accel/tcg/cputlb.c | 232 ++++++++++++++++++++-------------------- 2 files changed, 159 insertions(+), 117 deletions(-) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 6b036cae8f..8021adf38f 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -262,6 +262,31 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, void tlb_flush_page_bits_by_mmuidx_all_cpus_synced (CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits); +/** + * tlb_flush_range_by_mmuidx + * @cpu: CPU whose TLB should be flushed + * @addr: virtual address of the start of the range to be flushed + * @len: length of range to be flushed + * @idxmap: bitmap of mmu indexes to flush + * @bits: number of significant bits in address + * + * For each mmuidx in @idxmap, flush all pages within [@addr,@addr+@len), + * comparing only the low @bits worth of each virtual page. + */ +void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, + target_ulong len, uint16_t idxmap, + unsigned bits); + +/* Similarly, with broadcast and syncing. */ +void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, + target_ulong len, uint16_t idxmap, + unsigned bits); +void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, + target_ulong addr, + target_ulong len, + uint16_t idxmap, + unsigned bits); + /** * tlb_set_page_with_attrs: * @cpu: CPU to add this TLB entry for @@ -365,6 +390,25 @@ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits) { } +static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, + target_ulong len, uint16_t idxmap, + unsigned bits) +{ +} +static inline void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, + target_ulong addr, + target_ulong len, + uint16_t idxmap, + unsigned bits) +{ +} +static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, + target_ulong addr, + target_long len, + uint16_t idxmap, + unsigned bits) +{ +} #endif /** * probe_access: diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 84e7d91a5c..c1dc0e2d27 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -707,8 +707,9 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); } -static void tlb_flush_page_bits_locked(CPUArchState *env, int midx, - target_ulong page, unsigned bits) +static void tlb_flush_range_locked(CPUArchState *env, int midx, + target_ulong addr, target_ulong len, + unsigned bits) { CPUTLBDesc *d = &env_tlb(env)->d[midx]; CPUTLBDescFast *f = &env_tlb(env)->f[midx]; @@ -718,20 +719,26 @@ static void tlb_flush_page_bits_locked(CPUArchState *env, int midx, * If @bits is smaller than the tlb size, there may be multiple entries * within the TLB; otherwise all addresses that match under @mask hit * the same TLB entry. - * * TODO: Perhaps allow bits to be a few bits less than the size. * For now, just flush the entire TLB. + * + * If @len is larger than the tlb size, then it will take longer to + * test all of the entries in the TLB than it will to flush it all. */ - if (mask < f->mask) { + if (mask < f->mask || len > f->mask) { tlb_debug("forcing full flush midx %d (" - TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", - midx, page, mask); + TARGET_FMT_lx "/" TARGET_FMT_lx "+" TARGET_FMT_lx ")\n", + midx, addr, mask, len); tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); return; } - /* Check if we need to flush due to large pages. */ - if ((page & d->large_page_mask) == d->large_page_addr) { + /* + * Check if we need to flush due to large pages. + * Because large_page_mask contains all 1's from the msb, + * we only need to test the end of the range. + */ + if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) { tlb_debug("forcing full flush midx %d (" TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", midx, d->large_page_addr, d->large_page_mask); @@ -739,85 +746,67 @@ static void tlb_flush_page_bits_locked(CPUArchState *env, int midx, return; } - if (tlb_flush_entry_mask_locked(tlb_entry(env, midx, page), page, mask)) { - tlb_n_used_entries_dec(env, midx); + for (target_ulong i = 0; i < len; i += TARGET_PAGE_SIZE) { + target_ulong page = addr + i; + CPUTLBEntry *entry = tlb_entry(env, midx, page); + + if (tlb_flush_entry_mask_locked(entry, page, mask)) { + tlb_n_used_entries_dec(env, midx); + } + tlb_flush_vtlb_page_mask_locked(env, midx, page, mask); } - tlb_flush_vtlb_page_mask_locked(env, midx, page, mask); } typedef struct { target_ulong addr; + target_ulong len; uint16_t idxmap; uint16_t bits; -} TLBFlushPageBitsByMMUIdxData; +} TLBFlushRangeData; -static void -tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu, - TLBFlushPageBitsByMMUIdxData d) +static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, + TLBFlushRangeData d) { CPUArchState *env = cpu->env_ptr; int mmu_idx; assert_cpu_is_self(cpu); - tlb_debug("page addr:" TARGET_FMT_lx "/%u mmu_map:0x%x\n", - d.addr, d.bits, d.idxmap); + tlb_debug("range:" TARGET_FMT_lx "/%u+" TARGET_FMT_lx " mmu_map:0x%x\n", + d.addr, d.bits, d.len, d.idxmap); qemu_spin_lock(&env_tlb(env)->c.lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { if ((d.idxmap >> mmu_idx) & 1) { - tlb_flush_page_bits_locked(env, mmu_idx, d.addr, d.bits); + tlb_flush_range_locked(env, mmu_idx, d.addr, d.len, d.bits); } } qemu_spin_unlock(&env_tlb(env)->c.lock); - tb_flush_jmp_cache(cpu, d.addr); -} - -static bool encode_pbm_to_runon(run_on_cpu_data *out, - TLBFlushPageBitsByMMUIdxData d) -{ - /* We need 6 bits to hold to hold @bits up to 63. */ - if (d.idxmap <= MAKE_64BIT_MASK(0, TARGET_PAGE_BITS - 6)) { - *out = RUN_ON_CPU_TARGET_PTR(d.addr | (d.idxmap << 6) | d.bits); - return true; + for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) { + tb_flush_jmp_cache(cpu, d.addr + i); } - return false; } -static TLBFlushPageBitsByMMUIdxData -decode_runon_to_pbm(run_on_cpu_data data) +static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu, + run_on_cpu_data data) { - target_ulong addr_map_bits = (target_ulong) data.target_ptr; - return (TLBFlushPageBitsByMMUIdxData){ - .addr = addr_map_bits & TARGET_PAGE_MASK, - .idxmap = (addr_map_bits & ~TARGET_PAGE_MASK) >> 6, - .bits = addr_map_bits & 0x3f - }; -} - -static void tlb_flush_page_bits_by_mmuidx_async_1(CPUState *cpu, - run_on_cpu_data runon) -{ - tlb_flush_page_bits_by_mmuidx_async_0(cpu, decode_runon_to_pbm(runon)); -} - -static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu, - run_on_cpu_data data) -{ - TLBFlushPageBitsByMMUIdxData *d = data.host_ptr; - tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d); + TLBFlushRangeData *d = data.host_ptr; + tlb_flush_range_by_mmuidx_async_0(cpu, *d); g_free(d); } -void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, - uint16_t idxmap, unsigned bits) +void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, + target_ulong len, uint16_t idxmap, + unsigned bits) { - TLBFlushPageBitsByMMUIdxData d; - run_on_cpu_data runon; + TLBFlushRangeData d; - /* If all bits are significant, this devolves to tlb_flush_page. */ - if (bits >= TARGET_LONG_BITS) { + /* + * If all bits are significant, and len is small, + * this devolves to tlb_flush_page. + */ + if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { tlb_flush_page_by_mmuidx(cpu, addr, idxmap); return; } @@ -829,34 +818,38 @@ void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, /* This should already be page aligned */ d.addr = addr & TARGET_PAGE_MASK; + d.len = len; d.idxmap = idxmap; d.bits = bits; if (qemu_cpu_is_self(cpu)) { - tlb_flush_page_bits_by_mmuidx_async_0(cpu, d); - } else if (encode_pbm_to_runon(&runon, d)) { - async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); + tlb_flush_range_by_mmuidx_async_0(cpu, d); } else { - TLBFlushPageBitsByMMUIdxData *p - = g_new(TLBFlushPageBitsByMMUIdxData, 1); - /* Otherwise allocate a structure, freed by the worker. */ - *p = d; - async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2, + TLBFlushRangeData *p = g_memdup(&d, sizeof(d)); + async_run_on_cpu(cpu, tlb_flush_range_by_mmuidx_async_1, RUN_ON_CPU_HOST_PTR(p)); } } -void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, - target_ulong addr, - uint16_t idxmap, - unsigned bits) +void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits) { - TLBFlushPageBitsByMMUIdxData d; - run_on_cpu_data runon; + tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits); +} - /* If all bits are significant, this devolves to tlb_flush_page. */ - if (bits >= TARGET_LONG_BITS) { +void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu, + target_ulong addr, target_ulong len, + uint16_t idxmap, unsigned bits) +{ + TLBFlushRangeData d; + CPUState *dst_cpu; + + /* + * If all bits are significant, and len is small, + * this devolves to tlb_flush_page. + */ + if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap); return; } @@ -868,40 +861,46 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, /* This should already be page aligned */ d.addr = addr & TARGET_PAGE_MASK; + d.len = len; d.idxmap = idxmap; d.bits = bits; - if (encode_pbm_to_runon(&runon, d)) { - flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); - } else { - CPUState *dst_cpu; - TLBFlushPageBitsByMMUIdxData *p; - - /* Allocate a separate data block for each destination cpu. */ - CPU_FOREACH(dst_cpu) { - if (dst_cpu != src_cpu) { - p = g_new(TLBFlushPageBitsByMMUIdxData, 1); - *p = d; - async_run_on_cpu(dst_cpu, - tlb_flush_page_bits_by_mmuidx_async_2, - RUN_ON_CPU_HOST_PTR(p)); - } + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + TLBFlushRangeData *p = g_memdup(&d, sizeof(d)); + async_run_on_cpu(dst_cpu, + tlb_flush_range_by_mmuidx_async_1, + RUN_ON_CPU_HOST_PTR(p)); } } - tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d); + tlb_flush_range_by_mmuidx_async_0(src_cpu, d); } -void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, - target_ulong addr, - uint16_t idxmap, - unsigned bits) +void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, + target_ulong addr, + uint16_t idxmap, unsigned bits) { - TLBFlushPageBitsByMMUIdxData d; - run_on_cpu_data runon; + tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE, + idxmap, bits); +} - /* If all bits are significant, this devolves to tlb_flush_page. */ - if (bits >= TARGET_LONG_BITS) { +void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu, + target_ulong addr, + target_ulong len, + uint16_t idxmap, + unsigned bits) +{ + TLBFlushRangeData d; + CPUState *dst_cpu; + TLBFlushRangeData *p; + + /* + * If all bits are significant, and len is small, + * this devolves to tlb_flush_page. + */ + if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap); return; } @@ -913,32 +912,31 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, /* This should already be page aligned */ d.addr = addr & TARGET_PAGE_MASK; + d.len = len; d.idxmap = idxmap; d.bits = bits; - if (encode_pbm_to_runon(&runon, d)) { - flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); - async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, - runon); - } else { - CPUState *dst_cpu; - TLBFlushPageBitsByMMUIdxData *p; - - /* Allocate a separate data block for each destination cpu. */ - CPU_FOREACH(dst_cpu) { - if (dst_cpu != src_cpu) { - p = g_new(TLBFlushPageBitsByMMUIdxData, 1); - *p = d; - async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2, - RUN_ON_CPU_HOST_PTR(p)); - } + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + p = g_memdup(&d, sizeof(d)); + async_run_on_cpu(dst_cpu, tlb_flush_range_by_mmuidx_async_1, + RUN_ON_CPU_HOST_PTR(p)); } - - p = g_new(TLBFlushPageBitsByMMUIdxData, 1); - *p = d; - async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2, - RUN_ON_CPU_HOST_PTR(p)); } + + p = g_memdup(&d, sizeof(d)); + async_safe_run_on_cpu(src_cpu, tlb_flush_range_by_mmuidx_async_1, + RUN_ON_CPU_HOST_PTR(p)); +} + +void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ + tlb_flush_range_by_mmuidx_all_cpus_synced(src_cpu, addr, TARGET_PAGE_SIZE, + idxmap, bits); } /* update the TLBs so that writes to code in the virtual page 'addr' -- 2.25.1