From c882b155466313fcd85ac330a45a573e608b0d74 Mon Sep 17 00:00:00 2001 From: bauerchen <bauerc...@tencent.com> Date: Tue, 11 Feb 2020 17:10:35 +0800 Subject: [PATCH] Optimize: large guest start-up in mem-prealloc MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit
[desc]: Large memory VM starts slowly when using -mem-prealloc, and there are some areas to optimize in current method; 1、mmap will be used to alloc threads stack during create page clearing threads, and it will attempt mm->mmap_sem for write lock, but clearing threads have hold read lock, this competition will cause threads createion very slow; 2、methods of calcuating pages for per threads is not well;if we use 64 threads to split 160 hugepage,63 threads clear 2page,1 thread clear 34 page,so the entire speed is very slow; to solve the first problem,we add a mutex in thread function,and start all threads when all threads finished createion; and the second problem, we spread remainder to other threads,in situation that 160 hugepage and 64 threads, there are 32 threads clear 3 pages,and 32 threads clear 2 pages; [test]: 320G 84c VM start time can be reduced to 10s 680G 84c VM start time can be reduced to 18s Signed-off-by: bauerchen <bauerc...@tencent.com> Reviewed-by:Pan Rui <ruip...@tencent.com> Reviewed-by:Ivan Ren <ivan...@tencent.com> --- util/oslib-posix.c | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 5a291cc..e97369b 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -76,6 +76,10 @@ static MemsetThread *memset_thread; static int memset_num_threads; static bool memset_thread_failed; +static QemuMutex page_mutex; +static QemuCond page_cond; +static volatile bool thread_create_flag; + int qemu_get_thread_id(void) { #if defined(__linux__) @@ -403,6 +407,14 @@ static void *do_touch_pages(void *arg) MemsetThread *memset_args = (MemsetThread *)arg; sigset_t set, oldset; + /*wait for all threads create finished */ + qemu_mutex_lock(&page_mutex); + while(!thread_create_flag){ + qemu_cond_wait(&page_cond, &page_mutex); + } + qemu_mutex_unlock(&page_mutex); + + /* unblock SIGBUS */ sigemptyset(&set); sigaddset(&set, SIGBUS); @@ -448,30 +460,46 @@ static inline int get_memset_num_threads(int smp_cpus) return ret; } +static void calc_page_per_thread(size_t numpages, int memset_threads, size_t *pages_per_thread){ + int avg = numpages / memset_threads + 1; + int i = 0; + int last = avg * memset_threads - numpages; + for (i = 0; i < memset_threads; i++) + { + if(memset_threads - i <= last){ + pages_per_thread[i] = avg - 1; + }else + pages_per_thread[i] = avg; + } +} + static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, int smp_cpus) { - size_t numpages_per_thread; - size_t size_per_thread; + size_t *numpages_per_thread; char *addr = area; int i = 0; memset_thread_failed = false; + thread_create_flag = false; memset_num_threads = get_memset_num_threads(smp_cpus); + numpages_per_thread = g_new0(size_t, memset_num_threads); memset_thread = g_new0(MemsetThread, memset_num_threads); - numpages_per_thread = (numpages / memset_num_threads); - size_per_thread = (hpagesize * numpages_per_thread); + calc_page_per_thread(numpages, memset_num_threads, numpages_per_thread); + for (i = 0; i < memset_num_threads; i++) { memset_thread[i].addr = addr; - memset_thread[i].numpages = (i == (memset_num_threads - 1)) ? - numpages : numpages_per_thread; + memset_thread[i].numpages = numpages_per_thread[i]; memset_thread[i].hpagesize = hpagesize; qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", do_touch_pages, &memset_thread[i], QEMU_THREAD_JOINABLE); - addr += size_per_thread; - numpages -= numpages_per_thread; + addr += numpages_per_thread[i] * hpagesize; + numpages -= numpages_per_thread[i]; } + thread_create_flag = true; + qemu_cond_broadcast(&page_cond); + for (i = 0; i < memset_num_threads; i++) { qemu_thread_join(&memset_thread[i].pgthread); } -- 1.8.3.1