Usually the memory of android phones is very small, so after a long
running, the fragment is very large. Kernel stack which called by
alloc_thread_stack_node() usually alloc 16K memory, and it failed
frequently.

However we have CONFIG_VMAP_STACK now, but it do not support arm64,
and maybe it has some regression because of vmalloc, it need to
find an area and create page table dynamically, this will take a short
time.

I think we can merge as soon as possible when pcp alloc/free to reduce
fragment. The pcp page is hot page, so free it will cause cache miss,
I use perf to test it, but it seems the regression is not so much, maybe
it need to test more. Any reply is welcome.

no patch:
perf stat -e cache-misses make -j50

Kernel: arch/x86/boot/bzImage is ready  (#10)

 Performance counter stats for 'make -j50':

    17,845,292,704      cache-misses

     157.605906725 seconds time elapsed

patched:
perf stat -e cache-misses make -j50

Kernel: arch/x86/boot/bzImage is ready  (#8)

 Performance counter stats for 'make -j50':

    17,876,726,774      cache-misses

     156.293720662 seconds time elapsed

nopatch:
make clean, dropcache, then make -j50, CONFIG_VMAP_STACK is off
[root@localhost ~]# cat /proc/buddyinfo
Node 0, zone      DMA      3      0      2      1      3      2      2      1   
   0      1      3
Node 0, zone    DMA32      4      4      1      5      2      4      2      2   
   3      1    447
Node 0, zone   Normal   2389    418    668    707    738    451    246     93   
  42     21  15147
Node 1, zone   Normal   1137    386    583    631    878    311     80     12   
   2      8  15640
Node 2, zone   Normal   1875    230    323    462    729    453    177     67   
  12      9  15749
Node 3, zone   Normal   1675    452    503    898    928    628    256     70   
  25     14  11688
Node 4, zone   Normal   1917    407    306   2706   1722    909    477    218   
  54     34  15682
Node 5, zone   Normal   4330   9785   6265   2612   1404    703    276    113   
  33      7  15730
Node 6, zone   Normal    754    211   1093   1023    748    599    352    193   
 107     43  15672
Node 7, zone   Normal   1092    133    819    807    729    549    254    120   
  52     28  15500
[root@localhost ~]# cat /sys/kernel/debug/extfrag/unusable_index
Node 0, zone      DMA 0.000 0.000 0.000 0.002 0.004 0.016 0.032 0.065 0.097 
0.097 0.226
Node 0, zone    DMA32 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.001 
0.002 0.004
Node 0, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.001 0.002 0.003 0.004 
0.004 0.005
Node 1, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.001 0.002 0.002 0.002 
0.002 0.002
Node 2, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.001 0.002 0.002 0.003 
0.003 0.003
Node 3, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.002 0.003 0.005 0.005 
0.006 0.007
Node 4, zone   Normal 0.000 0.000 0.000 0.000 0.001 0.003 0.005 0.006 0.008 
0.009 0.010
Node 5, zone   Normal 0.000 0.000 0.001 0.003 0.004 0.005 0.007 0.008 0.009 
0.009 0.009
Node 6, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.001 0.002 0.004 0.005 
0.007 0.008
Node 7, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.001 0.002 0.003 0.004 
0.005 0.006

patched:
make clean, dropcache, then make -j50, CONFIG_VMAP_STACK is off
[root@localhost ~]# cat /proc/buddyinfo
Node 0, zone      DMA      1      1      2      1      3      2      2      1   
   0      1      3
Node 0, zone    DMA32      3      3      0      2      2      4      2      2   
   3      1    447
Node 0, zone   Normal   1293   1097    159    564    620    392    242     89   
  49     21  15154
Node 1, zone   Normal   1195    369    155     73    295    260     92     32   
   8     10  15769
Node 2, zone   Normal   1478    434    160    846   1397    590    274    118   
  39     25  15753
Node 3, zone   Normal    892    285    176    625    691    450    226     78   
  33     14  11596
Node 4, zone   Normal    604    217     28    468   1560    690    292    126   
  46     31  15741
Node 5, zone   Normal    888    225    101    263    483    319    196     97   
  30     24  15726
Node 6, zone   Normal   1908   9294   7075   3373   1765    759    243    128   
  21     20  15591
Node 7, zone   Normal   1362   1126   1271    646    558    377    170     84   
  37     35  15602
[root@localhost ~]# cat /sys/kernel/debug/extfrag/unusable_index
Node 0, zone      DMA 0.000 0.000 0.000 0.002 0.004 0.016 0.032 0.065 0.097 
0.097 0.226
Node 0, zone    DMA32 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.001 
0.002 0.004
Node 0, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.001 0.001 0.002 0.003 
0.004 0.005
Node 1, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.001 0.001 
0.001 0.001
Node 2, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.002 0.003 0.004 0.005 
0.005 0.006
Node 3, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.001 0.002 0.003 0.004 
0.005 0.005
Node 4, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.001 0.003 0.004 0.005 
0.006 0.007
Node 5, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.000 0.001 0.002 0.002 
0.003 0.004
Node 6, zone   Normal 0.000 0.000 0.001 0.003 0.004 0.006 0.007 0.008 0.009 
0.010 0.010
Node 7, zone   Normal 0.000 0.000 0.000 0.000 0.000 0.001 0.002 0.002 0.003 
0.004 0.005


Signed-off-by: Xishi Qiu <qiuxi...@huawei.com>
---
 mm/page_alloc.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8fd42aa..82257e6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2413,6 +2413,8 @@ void free_hot_cold_page(struct page *page, bool cold)
        unsigned long flags;
        unsigned long pfn = page_to_pfn(page);
        int migratetype;
+       unsigned long page_idx = pfn & 1UL;
+       struct page *buddy;
 
        if (!free_pcp_prepare(page))
                return;
@@ -2437,6 +2439,16 @@ void free_hot_cold_page(struct page *page, bool cold)
                migratetype = MIGRATE_MOVABLE;
        }
 
+       if (page_idx)
+               buddy = page - 1;
+       else
+               buddy = page + 1;
+       /* merge immediately if buddy is free */        
+       if (PageBuddy(buddy)) {
+               free_one_page(zone, page, pfn, 0, migratetype);
+               goto out;
+       }
+
        pcp = &this_cpu_ptr(zone->pageset)->pcp;
        if (!cold)
                list_add(&page->lru, &pcp->lists[migratetype]);
@@ -2591,8 +2603,12 @@ struct page *buffered_rmqueue(struct zone 
*preferred_zone,
        if (likely(order == 0)) {
                struct per_cpu_pages *pcp;
                struct list_head *list;
+               unsigned long page_idx;
+               struct page *buddy;
+               int retry = 0;
 
                local_irq_save(flags);
+retry:
                do {
                        pcp = &this_cpu_ptr(zone->pageset)->pcp;
                        list = &pcp->lists[migratetype];
@@ -2612,6 +2628,19 @@ struct page *buffered_rmqueue(struct zone 
*preferred_zone,
                        list_del(&page->lru);
                        pcp->count--;
 
+                       page_idx = page_to_pfn(page) & 1UL;
+                       if (page_idx)
+                               buddy = page - 1;
+                       else
+                               buddy = page + 1;
+                       /* merge immediately if buddy is free */
+                       if (PageBuddy(buddy) && retry < 3) {
+                               free_one_page(page_zone(page), page,
+                                               page_to_pfn(page), 0, 
migratetype);
+                               retry++;
+                               goto retry;
+                       }
+
                } while (check_new_pcp(page));
        } else {
                /*
-- 
1.8.3.1


Reply via email to