Only grab tb_lock when new code has to be generated. Note that due to the RCU usage we lose the ability to move recently-found TB's to the beginning of the slot's list. We could in theory try to do something smart about this, but given that each CPU has a private tb_jmp_cache, it might be OK to just leave it alone.
Signed-off-by: Emilio G. Cota <c...@braap.org> --- cpu-exec.c | 21 +++++++++----------- include/exec/exec-all.h | 12 +++++++++--- translate-all.c | 52 ++++++++++++++++++++++++------------------------- 3 files changed, 43 insertions(+), 42 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 826ec25..ff08da8 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -24,6 +24,7 @@ #include "qemu/atomic.h" #include "qemu/timer.h" #include "exec/tb-hash.h" +#include "qemu/rcu_queue.h" #include "qemu/rcu.h" #if !defined(CONFIG_USER_ONLY) @@ -261,7 +262,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, uint64_t flags) { CPUArchState *env = (CPUArchState *)cpu->env_ptr; - TranslationBlock *tb, **ptb1; + TBPhysHashSlot *slot; + TranslationBlock *tb; unsigned int h; tb_page_addr_t phys_pc, phys_page1; target_ulong virt_page2; @@ -270,12 +272,9 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, phys_pc = get_page_addr_code(env, pc); phys_page1 = phys_pc & TARGET_PAGE_MASK; h = tb_phys_hash_func(phys_pc); - ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h]; - for(;;) { - tb = atomic_rcu_read(ptb1); - if (!tb) { - return NULL; - } + slot = &tcg_ctx.tb_ctx.tb_phys_hash[h]; + + QLIST_FOREACH_RCU(tb, &slot->list, slot_node) { if (tb->pc == pc && tb->page_addr[0] == phys_page1 && tb->cs_base == cs_base && @@ -288,16 +287,14 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, TARGET_PAGE_SIZE; phys_page2 = get_page_addr_code(env, virt_page2); if (tb->page_addr[1] == phys_page2) { - break; + return tb; } } else { - break; + return tb; } } - ptb1 = &tb->phys_hash_next; } - - return tb; + return NULL; } static TranslationBlock *tb_find_slow(CPUState *cpu, diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 7e4aea7..050e820 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -155,8 +155,8 @@ struct TranslationBlock { #define CF_USE_ICOUNT 0x20000 void *tc_ptr; /* pointer to the translated code */ - /* next matching tb for physical address. */ - struct TranslationBlock *phys_hash_next; + /* list node in slot of physically-indexed hash of translation blocks */ + QLIST_ENTRY(TranslationBlock) slot_node; /* original tb when cflags has CF_NOCACHE */ struct TranslationBlock *orig_tb; /* first and second physical page containing code. The lower bit @@ -183,12 +183,18 @@ struct TranslationBlock { #include "qemu/thread.h" +typedef struct TBPhysHashSlot TBPhysHashSlot; + +struct TBPhysHashSlot { + QLIST_HEAD(, TranslationBlock) list; +}; + typedef struct TBContext TBContext; struct TBContext { TranslationBlock *tbs; - TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; + TBPhysHashSlot tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; int nb_tbs; /* any access to the tbs or the page table must use this lock */ QemuMutex tb_lock; diff --git a/translate-all.c b/translate-all.c index 94adcd0..df65c83 100644 --- a/translate-all.c +++ b/translate-all.c @@ -60,6 +60,7 @@ #include "exec/cputlb.h" #include "exec/tb-hash.h" #include "translate-all.h" +#include "qemu/rcu_queue.h" #include "qemu/bitmap.h" #include "qemu/timer.h" #include "qemu/aie.h" @@ -721,6 +722,17 @@ static inline void code_gen_alloc(size_t tb_size) qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); } +static void tb_ctx_init(void) +{ + int i; + + for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { + TBPhysHashSlot *slot = &tcg_ctx.tb_ctx.tb_phys_hash[i]; + + QLIST_INIT(&slot->list); + } +} + /* Must be called before using the QEMU cpus. 'tb_size' is the size (in bytes) allocated to the translation buffer. Zero means default size. */ @@ -731,6 +743,7 @@ void tcg_exec_init(unsigned long tb_size) tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; tcg_register_jit(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size); page_init(); + tb_ctx_init(); aie_init(); #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE) /* There's no guest base to take into account, so go ahead and @@ -878,7 +891,7 @@ void tb_flush(CPUState *cpu) } tb_invalidate_all(); - memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash)); + tb_ctx_init(); page_flush_tb(); tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; @@ -898,7 +911,9 @@ static void tb_invalidate_check(target_ulong address) address &= TARGET_PAGE_MASK; for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { + TBPhysHashSlot *slot = &tcg_ctx.tb_ctx.tb_phys_hash[i]; + + QLIST_FOREACH_RCU(tb, &slot->list, slot_node) { if (!(address + TARGET_PAGE_SIZE <= tb->pc || address >= tb->pc + tb->size)) { printf("ERROR invalidate: address=" TARGET_FMT_lx @@ -919,8 +934,9 @@ static void tb_page_check(void) int i, flags1, flags2; for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL; - tb = tb->phys_hash_next) { + TBPhysHashSlot *slot = &tcg_ctx.tb_ctx.tb_phys_hash[i]; + + QLIST_FOREACH_RCU(tb, &slot->list, slot_node) { flags1 = page_get_flags(tb->pc); flags2 = page_get_flags(tb->pc + tb->size - 1); if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) { @@ -933,20 +949,6 @@ static void tb_page_check(void) #endif -static inline void tb_hash_remove(TranslationBlock **ptb, TranslationBlock *tb) -{ - TranslationBlock *tb1; - - for (;;) { - tb1 = *ptb; - if (tb1 == tb) { - *ptb = tb1->phys_hash_next; - break; - } - ptb = &tb1->phys_hash_next; - } -} - static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb) { TranslationBlock *tb1; @@ -1029,16 +1031,13 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) { CPUState *cpu; PageDesc *p; - unsigned int h, n1; - tb_page_addr_t phys_pc; + unsigned int n1; TranslationBlock *tb1, *tb2; /* Now remove the TB from the hash list, so that tb_find_slow * cannot find it anymore. */ - phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_phys_hash_func(phys_pc); - tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb); + QLIST_REMOVE_RCU(tb, slot_node); /* remove the TB from the page list */ if (tb->page_addr[0] != page_addr) { @@ -1485,13 +1484,12 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2) { unsigned int h; - TranslationBlock **ptb; + TBPhysHashSlot *slot; /* add in the physical hash table */ h = tb_phys_hash_func(phys_pc); - ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h]; - tb->phys_hash_next = *ptb; - atomic_rcu_set(ptb, tb); + slot = &tcg_ctx.tb_ctx.tb_phys_hash[h]; + QLIST_INSERT_HEAD_RCU(&slot->list, tb, slot_node); /* add in the page list */ tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK); -- 1.9.1