qemu-user can hang in a multi-threaded fork. One common reason is that when creating a TB, between fork and exec we manipulate a GTree whose memory allocator (GSlice) is not fork-safe.
Although POSIX does not mandate it, the system's allocator (e.g. tcmalloc, libc malloc) is probably fork-safe. Fix some of these hangs by using QTree, which uses the system's allocator. For more details, see: https://gitlab.com/qemu-project/qemu/-/issues/285 Performance impact on linux-user: - ~2% slowdown in spec06 - 1.05% slowdown in Nbench-int - 4.51% slowdown in Nbench-fp x86_64 spec06 Host: AMD Ryzen 7 PRO 5850U with Radeon Graphics 1.04 +--------------------------------------------------------------------------------------------------+ | | | | qtree-gcc1|.int | | +-+ | | 1.02 |-+...............................................|..................................|...........+-| | | | | | +-+ | | | 1 |-+..........+-+............|............+-+......|..........................+-+.....|...........+-| | **|** | *|** | *+-+ | | | | *+-+* | +-+* | * * +-+ **|** | | | * * +-+ *|** * * **|* * * | *+-+* | +-+ | 0.98 |-+.........*...*....|.....*|.*..........*..*...*.|*...*..*...**|*..........*...*....|....**|**..+-| | * * **|** *| * +-+ * * * |* * * *+-+ +-+ * * **|** *+-+* | | *+-+* * * * | * +-+* | * * * |* * * * * | * * * | * * * | | *+-+* * * *+-+* * * | * * * |* * * * * | * * * | * * * | 0.96 |-+..*...*..*...*..*...*...*..*...*|**...*..*...*.|*...*..*...*..*.....|....*...*..*.|.*..*...*..+-| | * * * * * * * * *| * * * * |* * * * * **|* * * * | * * * | | * * * * * * * * +-+* * * *+-+ * * * * * |* * * * | * * * | | * * * * * * * * * * * * * * * * * * * |* * * * | * * * | 0.94 |-+..*...*..*...*..*...*...*..*...*..*...*..*...*..*...*..*...*..*...*.|*...*...*..*.|.*..*...*..+-| | * * * * * * * * * * * * * * * * * * * |* * * * | * * * | | * * * * * * * * * * * * * * * * * * *+-+ * * * | * * * | 0.92 |-+..*...*..*...*..*...*...*..*...*..*...*..*...*..*...*..*...*..*...*..*...*...*..*.|.*..*...*..+-| | * * * * * * * * * * * * * * * * * * * * * * *+-+* * * | | * * * * * * * * * * * * * * * * * * * * * * * * * * | | * * * * * * * * * * * * * * * * * * * * * * * * * * | 0.9 +--------------------------------------------------------------------------------------------------+ 400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean aarch64 NBench Integer Performance Host: AMD Ryzen 7 PRO 5850U with Radeon Graphics 81.2 +----------------------------------------------------------------+ | + + | | *** | 81 |-+ B +-| | **## | 80.8 |-+ ## +-| | ## | | ## | 80.6 |-+ ## +-| | ## | | ## | 80.4 |-+ ## +-| | ## | 80.2 |-+ ##** +-| | B | | + * | 80 +----------------------------------------------------------------+ master-gcc12 qtree-gcc12 QEMU version aarch64 NBench Floating Point Performance Host: AMD Ryzen 7 PRO 5850U with Radeon Graphics 13.8 +----------------------------------------------------------------+ | *B* + | 13.7 |-+ **## +-| | ## | 13.6 |-+ # +-| | ## | 13.5 |-+ # +-| | ## | 13.4 |-+ ## +-| | # | 13.3 |-+ ## +-| | # | 13.2 |-+ ## +-| | ##** | 13.1 |-+ B +-| | + *** | 13 +----------------------------------------------------------------+ master-gcc12 qtree-gcc12 QEMU version Signed-off-by: Emilio Cota <c...@braap.org> --- accel/tcg/tb-maint.c | 17 +++++++++-------- tcg/region.c | 19 ++++++++++--------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index b3d6529ae2..e6370ddc71 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "qemu/interval-tree.h" +#include "qemu/qtree.h" #include "exec/cputlb.h" #include "exec/log.h" #include "exec/exec-all.h" @@ -313,7 +314,7 @@ struct page_entry { * See also: page_collection_lock(). */ struct page_collection { - GTree *tree; + QTree *tree; struct page_entry *max; }; @@ -466,7 +467,7 @@ static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr) struct page_entry *pe; PageDesc *pd; - pe = g_tree_lookup(set->tree, &index); + pe = q_tree_lookup(set->tree, &index); if (pe) { return false; } @@ -477,7 +478,7 @@ static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr) } pe = page_entry_new(pd, index); - g_tree_insert(set->tree, &pe->index, pe); + q_tree_insert(set->tree, &pe->index, pe); /* * If this is either (1) the first insertion or (2) a page whose index @@ -524,13 +525,13 @@ static struct page_collection *page_collection_lock(tb_page_addr_t start, end >>= TARGET_PAGE_BITS; g_assert(start <= end); - set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL, + set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL, page_entry_destroy); set->max = NULL; assert_no_pages_locked(); retry: - g_tree_foreach(set->tree, page_entry_lock, NULL); + q_tree_foreach(set->tree, page_entry_lock, NULL); for (index = start; index <= end; index++) { TranslationBlock *tb; @@ -541,7 +542,7 @@ static struct page_collection *page_collection_lock(tb_page_addr_t start, continue; } if (page_trylock_add(set, index << TARGET_PAGE_BITS)) { - g_tree_foreach(set->tree, page_entry_unlock, NULL); + q_tree_foreach(set->tree, page_entry_unlock, NULL); goto retry; } assert_page_locked(pd); @@ -550,7 +551,7 @@ static struct page_collection *page_collection_lock(tb_page_addr_t start, (tb_page_addr1(tb) != -1 && page_trylock_add(set, tb_page_addr1(tb)))) { /* drop all locks, and reacquire in order */ - g_tree_foreach(set->tree, page_entry_unlock, NULL); + q_tree_foreach(set->tree, page_entry_unlock, NULL); goto retry; } } @@ -561,7 +562,7 @@ static struct page_collection *page_collection_lock(tb_page_addr_t start, static void page_collection_unlock(struct page_collection *set) { /* entries are unlocked and freed via page_entry_destroy */ - g_tree_destroy(set->tree); + q_tree_destroy(set->tree); g_free(set); } diff --git a/tcg/region.c b/tcg/region.c index 88d6bb273f..bef4c4756f 100644 --- a/tcg/region.c +++ b/tcg/region.c @@ -28,6 +28,7 @@ #include "qemu/mprotect.h" #include "qemu/memalign.h" #include "qemu/cacheinfo.h" +#include "qemu/qtree.h" #include "qapi/error.h" #include "exec/exec-all.h" #include "tcg/tcg.h" @@ -36,7 +37,7 @@ struct tcg_region_tree { QemuMutex lock; - GTree *tree; + QTree *tree; /* padding to avoid false sharing is computed at run-time */ }; @@ -163,7 +164,7 @@ static void tcg_region_trees_init(void) struct tcg_region_tree *rt = region_trees + i * tree_size; qemu_mutex_init(&rt->lock); - rt->tree = g_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy); + rt->tree = q_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy); } } @@ -202,7 +203,7 @@ void tcg_tb_insert(TranslationBlock *tb) g_assert(rt != NULL); qemu_mutex_lock(&rt->lock); - g_tree_insert(rt->tree, &tb->tc, tb); + q_tree_insert(rt->tree, &tb->tc, tb); qemu_mutex_unlock(&rt->lock); } @@ -212,7 +213,7 @@ void tcg_tb_remove(TranslationBlock *tb) g_assert(rt != NULL); qemu_mutex_lock(&rt->lock); - g_tree_remove(rt->tree, &tb->tc); + q_tree_remove(rt->tree, &tb->tc); qemu_mutex_unlock(&rt->lock); } @@ -232,7 +233,7 @@ TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) } qemu_mutex_lock(&rt->lock); - tb = g_tree_lookup(rt->tree, &s); + tb = q_tree_lookup(rt->tree, &s); qemu_mutex_unlock(&rt->lock); return tb; } @@ -267,7 +268,7 @@ void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) for (i = 0; i < region.n; i++) { struct tcg_region_tree *rt = region_trees + i * tree_size; - g_tree_foreach(rt->tree, func, user_data); + q_tree_foreach(rt->tree, func, user_data); } tcg_region_tree_unlock_all(); } @@ -281,7 +282,7 @@ size_t tcg_nb_tbs(void) for (i = 0; i < region.n; i++) { struct tcg_region_tree *rt = region_trees + i * tree_size; - nb_tbs += g_tree_nnodes(rt->tree); + nb_tbs += q_tree_nnodes(rt->tree); } tcg_region_tree_unlock_all(); return nb_tbs; @@ -296,8 +297,8 @@ static void tcg_region_tree_reset_all(void) struct tcg_region_tree *rt = region_trees + i * tree_size; /* Increment the refcount first so that destroy acts as a reset */ - g_tree_ref(rt->tree); - g_tree_destroy(rt->tree); + q_tree_ref(rt->tree); + q_tree_destroy(rt->tree); } tcg_region_tree_unlock_all(); } -- 2.34.1