Instead of scrubbing pages while holding heap lock we can mark buddy's head as being scrubbed and drop the lock temporarily. If someone (most likely alloc_heap_pages()) tries to access this chunk it will signal the scrubber to abort scrub by setting head's BUDDY_SCRUB_ABORT bit. The scrubber checks this bit after processing each page and stops its work as soon as it sees it.
Signed-off-by: Boris Ostrovsky <boris.ostrov...@oracle.com> --- Changes in v7: * Replaced page_info with typeof(head->u.free) in check_and_stop_scrub() * Replaced 1UL with 1U in scrub_free_pages() * Fixed formatting in asm-*/mm.h xen/common/page_alloc.c | 110 +++++++++++++++++++++++++++++++++++++++++++++-- xen/include/asm-arm/mm.h | 28 +++++++----- xen/include/asm-x86/mm.h | 30 ++++++++----- 3 files changed, 143 insertions(+), 25 deletions(-) diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index b886983..726f857 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -687,6 +687,7 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node, { PFN_ORDER(pg) = order; pg->u.free.first_dirty = first_dirty; + pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING; if ( first_dirty != INVALID_DIRTY_IDX ) { @@ -697,6 +698,25 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node, page_list_add(pg, &heap(node, zone, order)); } +static void check_and_stop_scrub(struct page_info *head) +{ + if ( head->u.free.scrub_state == BUDDY_SCRUBBING ) + { + typeof(head->u.free) pgfree; + + head->u.free.scrub_state = BUDDY_SCRUB_ABORT; + spin_lock_kick(); + for ( ; ; ) + { + /* Can't ACCESS_ONCE() a bitfield. */ + pgfree.val = ACCESS_ONCE(head->u.free.val); + if ( pgfree.scrub_state != BUDDY_SCRUB_ABORT ) + break; + cpu_relax(); + } + } +} + static struct page_info *get_free_buddy(unsigned int zone_lo, unsigned int zone_hi, unsigned int order, unsigned int memflags, @@ -741,14 +761,19 @@ static struct page_info *get_free_buddy(unsigned int zone_lo, { if ( (pg = page_list_remove_head(&heap(node, zone, j))) ) { + if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX ) + return pg; /* * We grab single pages (order=0) even if they are * unscrubbed. Given that scrubbing one page is fairly quick * it is not worth breaking higher orders. */ - if ( (order == 0) || use_unscrubbed || - pg->u.free.first_dirty == INVALID_DIRTY_IDX) + if ( (order == 0) || use_unscrubbed ) + { + check_and_stop_scrub(pg); return pg; + } + page_list_add_tail(pg, &heap(node, zone, j)); } } @@ -929,6 +954,7 @@ static int reserve_offlined_page(struct page_info *head) cur_head = head; + check_and_stop_scrub(head); /* * We may break the buddy so let's mark the head as clean. Then, when * merging chunks back into the heap, we will see whether the chunk has @@ -1079,6 +1105,29 @@ static unsigned int node_to_scrub(bool get_node) return closest; } +struct scrub_wait_state { + struct page_info *pg; + unsigned int first_dirty; + bool drop; +}; + +static void scrub_continue(void *data) +{ + struct scrub_wait_state *st = data; + + if ( st->drop ) + return; + + if ( st->pg->u.free.scrub_state == BUDDY_SCRUB_ABORT ) + { + /* There is a waiter for this buddy. Release it. */ + st->drop = true; + st->pg->u.free.first_dirty = st->first_dirty; + smp_wmb(); + st->pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING; + } +} + bool scrub_free_pages(void) { struct page_info *pg; @@ -1101,25 +1150,53 @@ bool scrub_free_pages(void) do { while ( !page_list_empty(&heap(node, zone, order)) ) { - unsigned int i; + unsigned int i, dirty_cnt; + struct scrub_wait_state st; /* Unscrubbed pages are always at the end of the list. */ pg = page_list_last(&heap(node, zone, order)); if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX ) break; + ASSERT(pg->u.free.scrub_state == BUDDY_NOT_SCRUBBING); + pg->u.free.scrub_state = BUDDY_SCRUBBING; + + spin_unlock(&heap_lock); + + dirty_cnt = 0; + for ( i = pg->u.free.first_dirty; i < (1U << order); i++) { if ( test_bit(_PGC_need_scrub, &pg[i].count_info) ) { scrub_one_page(&pg[i]); + /* + * We can modify count_info without holding heap + * lock since we effectively locked this buddy by + * setting its scrub_state. + */ pg[i].count_info &= ~PGC_need_scrub; - node_need_scrub[node]--; + dirty_cnt++; cnt += 100; /* scrubbed pages add heavier weight. */ } else cnt++; + if ( pg->u.free.scrub_state == BUDDY_SCRUB_ABORT ) + { + /* Someone wants this chunk. Drop everything. */ + + pg->u.free.first_dirty = (i == (1U << order) - 1) ? + INVALID_DIRTY_IDX : i + 1; + smp_wmb(); + pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING; + + spin_lock(&heap_lock); + node_need_scrub[node] -= dirty_cnt; + spin_unlock(&heap_lock); + goto out_nolock; + } + /* * Scrub a few (8) pages before becoming eligible for * preemption. But also count non-scrubbing loop iterations @@ -1133,6 +1210,23 @@ bool scrub_free_pages(void) } } + st.pg = pg; + /* + * get_free_buddy() grabs a buddy with first_dirty set to + * INVALID_DIRTY_IDX so we can't set pg's first_dirty here. + * It will be set either below or in the lock callback (in + * scrub_continue()). + */ + st.first_dirty = (i >= (1U << order) - 1) ? + INVALID_DIRTY_IDX : i + 1; + st.drop = false; + spin_lock_cb(&heap_lock, scrub_continue, &st); + + node_need_scrub[node] -= dirty_cnt; + + if ( st.drop ) + goto out; + if ( i >= (1U << order) - 1 ) { page_list_del(pg, &heap(node, zone, order)); @@ -1141,6 +1235,8 @@ bool scrub_free_pages(void) else pg->u.free.first_dirty = i + 1; + pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING; + if ( preempt || (node_need_scrub[node] == 0) ) goto out; } @@ -1149,6 +1245,8 @@ bool scrub_free_pages(void) out: spin_unlock(&heap_lock); + + out_nolock: node_clear(node, node_scrubbing); return node_to_scrub(false) != NUMA_NO_NODE; } @@ -1230,6 +1328,8 @@ static void free_heap_pages( (phys_to_nid(page_to_maddr(predecessor)) != node) ) break; + check_and_stop_scrub(predecessor); + page_list_del(predecessor, &heap(node, zone, order)); /* Keep predecessor's first_dirty if it is already set. */ @@ -1251,6 +1351,8 @@ static void free_heap_pages( (phys_to_nid(page_to_maddr(successor)) != node) ) break; + check_and_stop_scrub(successor); + page_list_del(successor, &heap(node, zone, order)); } diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h index 572337c..d42b070 100644 --- a/xen/include/asm-arm/mm.h +++ b/xen/include/asm-arm/mm.h @@ -42,18 +42,26 @@ struct page_info unsigned long type_info; } inuse; /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ - struct { - /* Do TLBs need flushing for safety before next page use? */ - bool need_tlbflush:1; - - /* - * Index of the first *possibly* unscrubbed page in the buddy. - * One more bit than maximum possible order to accommodate - * INVALID_DIRTY_IDX. - */ + union { + struct { + /* Do TLBs need flushing for safety before next page use? */ + bool need_tlbflush:1; + + /* + * Index of the first *possibly* unscrubbed page in the buddy. + * One more bit than maximum possible order to accommodate + * INVALID_DIRTY_IDX. + */ #define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1) - unsigned long first_dirty:MAX_ORDER + 1; + unsigned long first_dirty:MAX_ORDER + 1; + +#define BUDDY_NOT_SCRUBBING 0 +#define BUDDY_SCRUBBING 1 +#define BUDDY_SCRUB_ABORT 2 + unsigned long scrub_state:2; + }; + unsigned long val; } free; } u; diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 07dc963..c9cf6c3 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -86,19 +86,27 @@ struct page_info } sh; /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ - struct { - /* Do TLBs need flushing for safety before next page use? */ - bool need_tlbflush:1; - - /* - * Index of the first *possibly* unscrubbed page in the buddy. - * One more bit than maximum possible order to accommodate - * INVALID_DIRTY_IDX. - */ + union { + struct { + /* Do TLBs need flushing for safety before next page use? */ + bool need_tlbflush:1; + + /* + * Index of the first *possibly* unscrubbed page in the buddy. + * One more bit than maximum possible order to accommodate + * INVALID_DIRTY_IDX. + */ #define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1) - unsigned long first_dirty:MAX_ORDER + 1; - } free; + unsigned long first_dirty:MAX_ORDER + 1; +#define BUDDY_NOT_SCRUBBING 0 +#define BUDDY_SCRUBBING 1 +#define BUDDY_SCRUB_ABORT 2 + unsigned long scrub_state:2; + }; + + unsigned long val; + } free; } u; union { -- 1.8.3.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org https://lists.xen.org/xen-devel