> I've got a world with just patches 1..5 on that's seeing corruptions, but > I've not seen where the problem is. So far the world with 1..4 on hasn't > hit those corruption, but maybe I need to test more. > > Have you tested this set with google stressapptest? > > Let it migrate for a few cycles with stress apptest running, then ctrl-z > the stressapptest program to let the migration complete, then fg it > to collect the results. > > Dave > > * arei.gong...@huawei.com (arei.gong...@huawei.com) wrote: >> From: ChenLiang <chenlian...@huawei.com> >> >> Avoid hot pages being replaced by others to remarkably decrease cache >> misses >> >> Sample results with the test program which quote from xbzrle.txt ran in >> vm:(migrate bandwidth:1GE and xbzrle cache size 8MB) >> >> the test program: >> >> include <stdlib.h> >> include <stdio.h> >> int main() >> { >> char *buf = (char *) calloc(4096, 4096); >> while (1) { >> int i; >> for (i = 0; i < 4096 * 4; i++) { >> buf[i * 4096 / 4]++; >> } >> printf("."); >> } >> } >> >> before this patch: >> virsh qemu-monitor-command test_vm '{"execute": "query-migrate"}' >> {"return":{"expected-downtime":1020,"xbzrle-cache":{"bytes":1108284, >> "cache-size":8388608,"cache-miss-rate":0.987013,"pages":18297,"overflow":8, >> "cache-miss":1228737},"status":"active","setup-time":10,"total-time":52398, >> "ram":{"total":12466991104,"remaining":1695744,"mbps":935.559472, >> "transferred":5780760580,"dirty-sync-counter":271,"duplicate":2878530, >> "dirty-pages-rate":29130,"skipped":0,"normal-bytes":5748592640, >> "normal":1403465}},"id":"libvirt-706"} >> >> 18k pages sent compressed >> cache-miss-rate is 98.7%, totally miss. >> >> after optimizing: >> virsh qemu-monitor-command test_vm '{"execute": "query-migrate"}' >> {"return":{"expected-downtime":2054,"xbzrle-cache":{"bytes":5066763, >> "cache-size":8388608,"cache-miss-rate":0.485924,"pages":194823,"overflow":0, >> "cache-miss":210653},"status":"active","setup-time":11,"total-time":18729, >> "ram":{"total":12466991104,"remaining":3895296,"mbps":937.663549, >> "transferred":1615042219,"dirty-sync-counter":98,"duplicate":2869840, >> "dirty-pages-rate":58781,"skipped":0,"normal-bytes":1588404224, >> "normal":387794}},"id":"libvirt-266"} >> >> 194k pages sent compressed >> The value of cache-miss-rate decrease to 48.59%. >> >> Signed-off-by: ChenLiang <chenlian...@huawei.com> >> Signed-off-by: Gonglei <arei.gong...@huawei.com> >> Reviewed-by: Eric Blake <ebl...@redhat.com> >> --- >> arch_init.c | 8 +++++--- >> docs/xbzrle.txt | 8 ++++++++ >> include/migration/page_cache.h | 10 +++++++--- >> page_cache.c | 23 +++++++++++++++++++---- >> 4 files changed, 39 insertions(+), 10 deletions(-) >> >> diff --git a/arch_init.c b/arch_init.c >> index 15ca4c0..84a4bd3 100644 >> --- a/arch_init.c >> +++ b/arch_init.c >> @@ -343,7 +343,8 @@ static void xbzrle_cache_zero_page(ram_addr_t >> current_addr) >> >> /* We don't care if this fails to allocate a new cache page >> * as long as it updated an old one */ >> - cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE); >> + cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE, >> + bitmap_sync_count); >> } >> >> #define ENCODING_FLAG_XBZRLE 0x1 >> @@ -355,10 +356,11 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t >> **current_data, >> int encoded_len = 0, bytes_sent = -1; >> uint8_t *prev_cached_page; >> >> - if (!cache_is_cached(XBZRLE.cache, current_addr)) { >> + if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { >> acct_info.xbzrle_cache_miss++; >> if (!last_stage) { >> - if (cache_insert(XBZRLE.cache, current_addr, *current_data) == >> -1) { >> + if (cache_insert(XBZRLE.cache, current_addr, *current_data, >> + bitmap_sync_count) == -1) { >> return -1; >> } else { >> /* update *current_data when the page has been >> diff --git a/docs/xbzrle.txt b/docs/xbzrle.txt >> index cc3a26a..52c8511 100644 >> --- a/docs/xbzrle.txt >> +++ b/docs/xbzrle.txt >> @@ -71,6 +71,14 @@ encoded buffer: >> encoded length 24 >> e9 07 0f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 03 01 67 01 01 69 >> >> +Cache update strategy >> +===================== >> +Keeping the hot pages in the cache is effective for decreased cache >> +misses. XBZRLE uses a counter as the age of each page. The counter will >> +increase after each ram dirty bitmap sync. When a cache conflict is >> +detected, XBZRLE will only evict pages in the cache that are older than >> +a threshold. >> + >> Usage >> ====================== >> 1. Verify the destination QEMU version is able to decode the new format. >> diff --git a/include/migration/page_cache.h b/include/migration/page_cache.h >> index 2d5ce2d..10ed532 100644 >> --- a/include/migration/page_cache.h >> +++ b/include/migration/page_cache.h >> @@ -43,8 +43,10 @@ void cache_fini(PageCache *cache); >> * >> * @cache pointer to the PageCache struct >> * @addr: page addr >> + * @current_age: current bitmap generation >> */ >> -bool cache_is_cached(const PageCache *cache, uint64_t addr); >> +bool cache_is_cached(const PageCache *cache, uint64_t addr, >> + uint64_t current_age); >> >> /** >> * get_cached_data: Get the data cached for an addr >> @@ -60,13 +62,15 @@ uint8_t *get_cached_data(const PageCache *cache, >> uint64_t addr); >> * cache_insert: insert the page into the cache. the page cache >> * will dup the data on insert. the previous value will be overwritten >> * >> - * Returns -1 on error >> + * Returns -1 when the page isn't inserted into cache >> * >> * @cache pointer to the PageCache struct >> * @addr: page address >> * @pdata: pointer to the page >> + * @current_age: current bitmap generation >> */ >> -int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata); >> +int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata, >> + uint64_t current_age); >> >> /** >> * cache_resize: resize the page cache. In case of size reduction the extra >> diff --git a/page_cache.c b/page_cache.c >> index b033681..c78157b 100644 >> --- a/page_cache.c >> +++ b/page_cache.c >> @@ -33,6 +33,9 @@ >> do { } while (0) >> #endif >> >> +/* the page in cache will not be replaced in two cycles */ >> +#define CACHED_PAGE_LIFETIME 2 >> + >> typedef struct CacheItem CacheItem; >> >> struct CacheItem { >> @@ -121,7 +124,8 @@ static size_t cache_get_cache_pos(const PageCache *cache, >> return pos; >> } >> >> -bool cache_is_cached(const PageCache *cache, uint64_t addr) >> +bool cache_is_cached(const PageCache *cache, uint64_t addr, >> + uint64_t current_age) >> { >> size_t pos; >> >> @@ -130,7 +134,12 @@ bool cache_is_cached(const PageCache *cache, uint64_t >> addr) >> >> pos = cache_get_cache_pos(cache, addr); >> >> - return (cache->page_cache[pos].it_addr == addr); >> + if (cache->page_cache[pos].it_addr == addr) { >> + /* update the it_age when the cache hit */ >> + cache->page_cache[pos].it_age = current_age; >> + return true; >> + } >> + return false; >> } >> >> static CacheItem *cache_get_by_addr(const PageCache *cache, uint64_t addr) >> @@ -150,7 +159,8 @@ uint8_t *get_cached_data(const PageCache *cache, >> uint64_t addr) >> return cache_get_by_addr(cache, addr)->it_data; >> } >> >> -int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata) >> +int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata, >> + uint64_t current_age) >> { >> >> CacheItem *it = NULL; >> @@ -161,6 +171,11 @@ int cache_insert(PageCache *cache, uint64_t addr, const >> uint8_t *pdata) >> /* actual update of entry */ >> it = cache_get_by_addr(cache, addr); >> >> + if (it->it_data && >> + it->it_age + CACHED_PAGE_LIFETIME > current_age) { >> + /* the cache page is fresh, don't replace it */
Hi Dave, is it ok? - if (it->it_data && + if (it->it_data && it->it_addr != addr && it->it_age + CACHED_PAGE_LIFETIME > current_age) { ChenLiang >> + return -1; >> + } >> /* allocate page */ >> if (!it->it_data) { >> it->it_data = g_try_malloc(cache->page_size); >> @@ -173,7 +188,7 @@ int cache_insert(PageCache *cache, uint64_t addr, const >> uint8_t *pdata) >> >> memcpy(it->it_data, pdata, cache->page_size); >> >> - it->it_age = ++cache->max_item_age; >> + it->it_age = current_age; >> it->it_addr = addr; >> >> return 0; >> -- >> 1.7.12.4 >> >> >> > -- > Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK >