[RFC] mm: page allocation for less fragmentation

Gioh Kim Sun, 01 Mar 2015 21:03:08 -0800

My driver allocates more than 30MB pages via alloc_page() at a time and
maps them at virtual address. Totally it uses 300~400MB pages.


If I run a heavy load test for a day, I cannot allocate even order=3 pages
because-of the external fragmentation.

I thought I needed a anti-fragmentation solution for my driver.
So I looked into the compaction code but there is no allocation function.

This patch gets a buddy and a pageblock in which the buddy exists.
And it allocates free pages in the pageblock.
So I guess it can allocate pages with less fragmentation.

I've tested this patch for 48-hours and not found problem.
I didn't check the amount of the external fragmentation yet
because it will take several days. I'll start it ASAP.

I just wonder that anybody has tried the page allocation like this.
Am I going in right direction?

I'll report a result after long-time test.
This patch is based on 3.16.

Signed-off-by: Gioh Kim <gioh....@lge.com>
---
 mm/compaction.c |   59 +++++++++++++++++++++++++++++
 mm/page_alloc.c |  112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 171 insertions(+)

diff --git a/mm/compaction.c b/mm/compaction.c
index 21bf292..7775bc6 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -16,6 +16,7 @@
 #include <linux/sysfs.h>
 #include <linux/balloon_compaction.h>
 #include <linux/page-isolation.h>
+#include <linux/cpuset.h>
 #include "internal.h"
 
 #ifdef CONFIG_COMPACTION
@@ -1289,3 +1290,61 @@ void compaction_unregister_node(struct node *node)
 #endif /* CONFIG_SYSFS && CONFIG_NUMA */
 
 #endif /* CONFIG_COMPACTION */
+
+unsigned long isolate_unmovable_freepages_block(unsigned long blockpfn,
+                                               unsigned long end_pfn,
+                                               int count,
+                                               struct list_head *freelist)
+{
+       int total_isolated = 0;
+       struct page *cursor, *valid_page = NULL;
+       unsigned long flags;
+       bool locked = false;
+
+       cursor = pfn_to_page(blockpfn);
+
+       /* Isolate free pages in a pageblock. */
+       for (; blockpfn < end_pfn; blockpfn++, cursor++) {
+               int isolated, i;
+               struct page *page = cursor;
+
+               if (!pfn_valid_within(blockpfn))
+                       continue;
+               if (!valid_page)
+                       valid_page = page;
+               if (!PageBuddy(page))
+                       continue;
+
+               /* Recheck this is a buddy page under lock */
+               if (!PageBuddy(page))
+                       continue;
+
+               /* DO NOT TOUCH CONTIGOUS PAGES */
+               if (page_order(page) >= pageblock_order/2) {
+                       blockpfn += (1 << page_order(page)) - 1;
+                       cursor += (1 << page_order(page)) - 1;
+                       continue;
+               }
+
+               /* Found a free page, break it into order-0 pages */
+               isolated = split_free_page(page);
+
+               total_isolated += isolated;
+               for (i = 0; i < isolated; i++) {
+                       list_add(&page->lru, freelist);
+                       page++;
+               }
+
+               if (total_isolated >= count)
+                       break;
+
+               /* If a page was split, advance to the end of it */
+               if (isolated) {
+                       blockpfn += isolated - 1;
+                       cursor += isolated - 1;
+                       continue;
+               }
+       }
+
+       return total_isolated;
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 86c9a72..c782191 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6646,3 +6646,115 @@ void dump_page(struct page *page, const char *reason)
        dump_page_badflags(page, reason, 0);
 }
 EXPORT_SYMBOL(dump_page);
+
+unsigned long isolate_unmovable_freepages_block(struct compact_control *cc,
+                                               unsigned long blockpfn,
+                                               unsigned long end_pfn,
+                                               int count,
+                                               struct list_head *freelist);
+
+int rmqueue_compact(struct zone *zone, unsigned int order,
+                   int migratetype, struct list_head *freepages)
+{
+       unsigned int current_order;
+       struct free_area *area;
+       struct page *page;
+       unsigned long block_start_pfn;  /* start of current pageblock */
+       unsigned long block_end_pfn;    /* end of current pageblock */
+       int total_isolated = 0;
+       unsigned long flags;
+       struct page *next;
+       int remain = 0;
+       int request = 1 << order;
+
+       spin_lock_irqsave(&zone->lock, flags);
+
+       current_order = 0;
+       page = NULL;
+       while (current_order <= pageblock_order) {
+               int isolated;
+
+               area = &(zone->free_area[current_order]);
+
+               if (list_empty(&area->free_list[migratetype])) {
+                       current_order++;
+                       continue;
+               }
+
+               page = list_entry(area->free_list[migratetype].next,
+                                 struct page, lru);
+
+               /* check migratetype of pageblock again,
+                  some pages can be set as different migratetype
+                  by rmqueue_fallback */
+               if (get_pageblock_migratetype(page) != migratetype)
+                       continue;
+
+               block_start_pfn = page_to_pfn(page) & ~(pageblock_nr_pages - 1);
+               block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
+                                   zone_end_pfn(zone));
+
+               isolated = isolate_unmovable_freepages_block(NULL,
+                                                             block_start_pfn,
+                                                             block_end_pfn,
+                                                             request,
+                                                             freepages);
+
+               total_isolated += isolated;
+               request -= isolated;
+
+               /* A buddy block is found but it is too big
+                  or the buddy block has no valid page.
+                  Anyway something wrong happened.
+                  Try next order.
+               */
+               if (isolated == 0)
+                       current_order++;
+
+               if (request <= 0)
+                       break;
+       }
+       __mod_zone_page_state(zone, NR_ALLOC_BATCH, -total_isolated);
+       __count_zone_vm_events(PGALLOC, zone, total_isolated);
+
+       spin_unlock_irqrestore(&zone->lock, flags);
+
+       list_for_each_entry_safe(page, next, freepages, lru) {
+               if (remain >= (1 << order)) {
+                       list_del(&page->lru);
+                       /* do not free pages into hot-cold list,
+                          but buddy list */
+                       atomic_dec(&page->_count);
+                       __free_pages_ok(page, 0);
+               }
+               remain++;
+       }
+
+       list_for_each_entry(page, freepages, lru) {
+               arch_alloc_page(page, 0);
+               kernel_map_pages(page, 1, 1);
+       }
+
+       return total_isolated < (1 << order) ? total_isolated : (1 << order);
+}
+
+int alloc_pages_compact(gfp_t gfp_mask, unsigned int order,
+                       struct list_head *freepages)
+{
+       enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+       struct zone *preferred_zone;
+       struct zoneref *preferred_zoneref;
+
+
+       preferred_zoneref = first_zones_zonelist(node_zonelist(numa_node_id(),
+                                                              gfp_mask),
+                                                high_zoneidx,
+                                                &cpuset_current_mems_allowed,
+                                                &preferred_zone);
+       if (!preferred_zone)
+               return 0;
+
+       return rmqueue_compact(preferred_zone, order,
+                              allocflags_to_migratetype(gfp_mask), freepages);
+}
+EXPORT_SYMBOL(alloc_pages_compact);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC] mm: page allocation for less fragmentation

Reply via email to