Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.

Signed-off-by: Boris Ostrovsky <boris.ostrov...@oracle.com>
---
Changes in v4:
* Drop unnecessary ACCESS_ONCE in scrub_free_pages, add smp_wmb()
  in scrub_continue()
* Keep track of first_dirty in scrub_wait_state

 xen/common/page_alloc.c  | 100 ++++++++++++++++++++++++++++++++++++++++++++---
 xen/include/asm-arm/mm.h |   4 ++
 xen/include/asm-x86/mm.h |   4 ++
 3 files changed, 103 insertions(+), 5 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 6e505b1..b365305 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -694,6 +694,17 @@ static void page_list_add_scrub(struct page_info *pg, 
unsigned int node,
         page_list_add(pg, &heap(node, zone, order));
 }
 
+static void check_and_stop_scrub(struct page_info *head)
+{
+    if ( head->u.free.scrub_state & PAGE_SCRUBBING )
+    {
+        head->u.free.scrub_state |= PAGE_SCRUB_ABORT;
+        spin_lock_kick();
+        while ( ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT )
+            cpu_relax();
+    }
+}
+
 static struct page_info *get_free_buddy(unsigned int zone_lo,
                                         unsigned int zone_hi,
                                         unsigned int order, unsigned int 
memflags,
@@ -738,9 +749,15 @@ static struct page_info *get_free_buddy(unsigned int 
zone_lo,
             {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                 {
-                    if ( (order == 0) || use_unscrubbed ||
-                         pg->u.free.first_dirty == INVALID_DIRTY_IDX)
+                    if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                         return pg;
+
+                    if ( (order == 0) || use_unscrubbed )
+                    {
+                        check_and_stop_scrub(pg);
+                        return pg;
+                    }
+
                     page_list_add_tail(pg, &heap(node, zone, j));
                 }
             }
@@ -925,6 +942,7 @@ static int reserve_offlined_page(struct page_info *head)
 
     cur_head = head;
 
+    check_and_stop_scrub(head);
     /*
      * We may break the buddy so let's mark the head as clean. Then, when
      * merging chunks back into the heap, we will see whether the chunk has
@@ -1069,6 +1087,29 @@ static unsigned int node_to_scrub(bool get_node)
     return closest;
 }
 
+struct scrub_wait_state {
+    struct page_info *pg;
+    unsigned int first_dirty;
+    bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+    struct scrub_wait_state *st = data;
+
+    if ( st->drop )
+        return;
+
+    if ( st->pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+    {
+        /* There is a waiter for this buddy. Release it. */
+        st->drop = true;
+        st->pg->u.free.first_dirty = st->first_dirty;
+        smp_wmb();
+        st->pg->u.free.scrub_state = 0;
+    }
+}
+
 bool scrub_free_pages(void)
 {
     struct page_info *pg;
@@ -1090,24 +1131,51 @@ bool scrub_free_pages(void)
         do {
             while ( !page_list_empty(&heap(node, zone, order)) )
             {
-                unsigned int i;
+                unsigned int i, dirty_cnt;
+                struct scrub_wait_state st;
 
                 /* Unscrubbed pages are always at the end of the list. */
                 pg = page_list_last(&heap(node, zone, order));
                 if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                     break;
 
+                ASSERT(!pg->u.free.scrub_state);
+                pg->u.free.scrub_state = PAGE_SCRUBBING;
+
+                spin_unlock(&heap_lock);
+
+                dirty_cnt = 0;
                 for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
                 {
                     cnt++;
                     if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                     {
                         scrub_one_page(&pg[i]);
+                        /*
+                         * We can modify count_info without holding heap
+                         * lock since we effectively locked this buddy by
+                         * setting its scrub_state.
+                         */
                         pg[i].count_info &= ~PGC_need_scrub;
-                        node_need_scrub[node]--;
+                        dirty_cnt++;
                         cnt += 100; /* scrubbed pages add heavier weight. */
                     }
 
+                    if ( pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+                    {
+                        /* Someone wants this chunk. Drop everything. */
+
+                        pg->u.free.first_dirty = (i == (1U << order)) ?
+                            INVALID_DIRTY_IDX : i + 1; 
+                        smp_wmb();
+                        pg->u.free.scrub_state = 0;
+
+                        spin_lock(&heap_lock);
+                        node_need_scrub[node] -= dirty_cnt;
+                        spin_unlock(&heap_lock);
+                        goto out_nolock;
+                    }
+
                     /*
                      * Scrub a few (8) pages before becoming eligible for
                      * preemtion. But also count non-scrubbing loop iteration
@@ -1121,6 +1189,17 @@ bool scrub_free_pages(void)
                     }
                 }
 
+                st.pg = pg;
+                st.first_dirty = (i == (1UL << order)) ?
+                    INVALID_DIRTY_IDX : i + 1;
+                st.drop = false;
+                spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+                node_need_scrub[node] -= dirty_cnt;
+
+                if ( st.drop )
+                    goto out;
+
                 if ( i == (1U << order) )
                 {
                     page_list_del(pg, &heap(node, zone, order));
@@ -1128,7 +1207,9 @@ bool scrub_free_pages(void)
                 }
                 else
                     pg->u.free.first_dirty = i + 1;
- 
+
+                pg->u.free.scrub_state = 0;
+
                 if ( preempt || (node_need_scrub[node] == 0) )
                     goto out;
             }
@@ -1137,6 +1218,8 @@ bool scrub_free_pages(void)
 
  out:
     spin_unlock(&heap_lock);
+
+ out_nolock:
     node_clear(node, node_scrubbing);
     return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
 }
@@ -1175,6 +1258,8 @@ static void free_heap_pages(
         if ( page_state_is(&pg[i], offlined) )
             tainted = 1;
 
+        pg[i].u.free.scrub_state = 0;
+
         /* If a page has no owner it will need no safety TLB flush. */
         pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
         if ( pg[i].u.free.need_tlbflush )
@@ -1218,6 +1303,8 @@ static void free_heap_pages(
                  (phys_to_nid(page_to_maddr(predecessor)) != node) )
                 break;
 
+            check_and_stop_scrub(predecessor);
+
             page_list_del(predecessor, &heap(node, zone, order));
 
             if ( predecessor->u.free.first_dirty != INVALID_DIRTY_IDX )
@@ -1240,6 +1327,9 @@ static void free_heap_pages(
                  (PFN_ORDER(successor) != order) ||
                  (phys_to_nid(page_to_maddr(successor)) != node) )
                 break;
+
+            check_and_stop_scrub(successor);
+
             page_list_del(successor, &heap(node, zone, order));
 
             need_scrub |= (successor->u.free.first_dirty != INVALID_DIRTY_IDX);
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 38d4fba..148d60b 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -46,6 +46,10 @@ struct page_info
             /* Index of the first *possibly* unscrubbed page in the buddy. */
 #define INVALID_DIRTY_IDX -1U
             unsigned int first_dirty;
+#define PAGE_SCRUBBING    (1<<0)
+#define PAGE_SCRUB_ABORT  (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
         } free;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index e20f161..f46b242 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -90,6 +90,10 @@ struct page_info
             /* Index of the first *possibly* unscrubbed page in the buddy. */
 #define INVALID_DIRTY_IDX -1U
             unsigned int first_dirty;
+#define PAGE_SCRUBBING    (1<<0)
+#define PAGE_SCRUB_ABORT  (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
         } free;
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Reply via email to