On Thu, Mar 31, 2016 at 10:50:36AM +0200, Vlastimil Babka wrote:
> The goal of direct compaction is to quickly make a high-order page available
> for the pending allocation. The free page scanner can add significant latency
> when searching for migration targets, although to succeed the compaction, the
> only important limit on the target free pages is that they must not come from
> the same order-aligned block as the migrated pages.

If we fails migration, free pages will remain and they can interfere
further compaction success because they doesn't come from previous
order-aligned block but can come from next order-aligned block. You
need to free remaining freelist after migration attempt fails?

Thanks.

> 
> This patch therefore makes direct async compaction allocate freepages directly
> from freelists. Pages that do come from the same block (which we cannot simply
> exclude from the freelist allocation) are put on separate list and released
> only after migration to allow them to merge.
> 
> In addition to reduced stall, another advantage is that we split larger free
> pages for migration targets only when smaller pages are depleted, while the
> free scanner can split pages up to (order - 1) as it encouters them. However,
> this approach likely sacrifices some of the long-term anti-fragmentation
> features of a thorough compaction, so we limit the direct allocation approach
> to direct async compaction.
> 
> For observational purposes, the patch introduces two new counters to
> /proc/vmstat. compact_free_direct_alloc counts how many pages were allocated
> directly without scanning, and compact_free_direct_miss counts the subset of
> these allocations that were from the wrong range and had to be held on the
> separate list.
> 
> Signed-off-by: Vlastimil Babka <vba...@suse.cz>
> ---
>  include/linux/vm_event_item.h |  1 +
>  mm/compaction.c               | 52 
> ++++++++++++++++++++++++++++++++++++++++++-
>  mm/internal.h                 |  5 +++++
>  mm/page_alloc.c               | 27 ++++++++++++++++++++++
>  mm/vmstat.c                   |  2 ++
>  5 files changed, 86 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index ec084321fe09..9ec29406a01e 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -51,6 +51,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>  #endif
>  #ifdef CONFIG_COMPACTION
>               COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
> +             COMPACTFREE_DIRECT_ALLOC, COMPACTFREE_DIRECT_MISS,
>               COMPACTISOLATED,
>               COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
>               KCOMPACTD_WAKE,
> diff --git a/mm/compaction.c b/mm/compaction.c
> index fe94d22d9144..215db281ecaf 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -1083,6 +1083,41 @@ static void isolate_freepages(struct compact_control 
> *cc)
>       cc->free_pfn = isolate_start_pfn;
>  }
>  
> +static void isolate_freepages_direct(struct compact_control *cc)
> +{
> +     unsigned long nr_pages;
> +     unsigned long flags;
> +
> +     nr_pages = cc->nr_migratepages - cc->nr_freepages;
> +
> +     if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
> +             return;
> +
> +     while (nr_pages) {
> +             struct page *page;
> +             unsigned long pfn;
> +
> +             page = alloc_pages_zone(cc->zone, 0, MIGRATE_MOVABLE);
> +             if (!page)
> +                     break;
> +             pfn = page_to_pfn(page);
> +
> +             count_compact_event(COMPACTFREE_DIRECT_ALLOC);
> +
> +             /* Is the free page in the block we are migrating from? */
> +             if (pfn >> cc->order == (cc->migrate_pfn - 1) >> cc->order) {
> +                     list_add(&page->lru, &cc->freepages_held);
> +                     count_compact_event(COMPACTFREE_DIRECT_MISS);
> +             } else {
> +                     list_add(&page->lru, &cc->freepages);
> +                     cc->nr_freepages++;
> +                     nr_pages--;
> +             }
> +     }
> +
> +     spin_unlock_irqrestore(&cc->zone->lock, flags);
> +}
> +
>  /*
>   * This is a migrate-callback that "allocates" freepages by taking pages
>   * from the isolated freelists in the block we are migrating to.
> @@ -1099,7 +1134,12 @@ static struct page *compaction_alloc(struct page 
> *migratepage,
>        * contention.
>        */
>       if (list_empty(&cc->freepages)) {
> -             if (!cc->contended)
> +             if (cc->contended)
> +                     return NULL;
> +
> +             if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC))
> +                     isolate_freepages_direct(cc);
> +             else
>                       isolate_freepages(cc);
>  
>               if (list_empty(&cc->freepages))
> @@ -1475,6 +1515,10 @@ static int compact_zone(struct zone *zone, struct 
> compact_control *cc)
>                                               (cc->mode == MIGRATE_ASYNC)) {
>                               cc->migrate_pfn = block_end_pfn(
>                                               cc->migrate_pfn - 1, cc->order);
> +
> +                             if (!list_empty(&cc->freepages_held))
> +                                     release_freepages(&cc->freepages_held);
> +
>                               /* Draining pcplists is useless in this case */
>                               cc->last_migrated_pfn = 0;
>  
> @@ -1495,6 +1539,8 @@ static int compact_zone(struct zone *zone, struct 
> compact_control *cc)
>                               block_start_pfn(cc->migrate_pfn, cc->order);
>  
>                       if (cc->last_migrated_pfn < current_block_start) {
> +                             if (!list_empty(&cc->freepages_held))
> +                                     release_freepages(&cc->freepages_held);
>                               cpu = get_cpu();
>                               lru_add_drain_cpu(cpu);
>                               drain_local_pages(zone);
> @@ -1525,6 +1571,8 @@ static int compact_zone(struct zone *zone, struct 
> compact_control *cc)
>               if (free_pfn > zone->compact_cached_free_pfn)
>                       zone->compact_cached_free_pfn = free_pfn;
>       }
> +     if (!list_empty(&cc->freepages_held))
> +             release_freepages(&cc->freepages_held);
>  
>       trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
>                               cc->free_pfn, end_pfn, sync, ret);
> @@ -1553,6 +1601,7 @@ static unsigned long compact_zone_order(struct zone 
> *zone, int order,
>       };
>       INIT_LIST_HEAD(&cc.freepages);
>       INIT_LIST_HEAD(&cc.migratepages);
> +     INIT_LIST_HEAD(&cc.freepages_held);
>  
>       ret = compact_zone(zone, &cc);
>  
> @@ -1698,6 +1747,7 @@ static void __compact_pgdat(pg_data_t *pgdat, struct 
> compact_control *cc)
>               cc->zone = zone;
>               INIT_LIST_HEAD(&cc->freepages);
>               INIT_LIST_HEAD(&cc->migratepages);
> +             INIT_LIST_HEAD(&cc->freepages_held);
>  
>               /*
>                * When called via /proc/sys/vm/compact_memory
> diff --git a/mm/internal.h b/mm/internal.h
> index b79abb6721cf..a0c0286a9567 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -145,6 +145,8 @@ static inline struct page *pageblock_pfn_to_page(unsigned 
> long start_pfn,
>  }
>  
>  extern int __isolate_free_page(struct page *page, unsigned int order);
> +extern struct page * alloc_pages_zone(struct zone *zone, unsigned int order,
> +                                                     int migratetype);
>  extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
>                                       unsigned int order);
>  extern void prep_compound_page(struct page *page, unsigned int order);
> @@ -165,6 +167,9 @@ extern int user_min_free_kbytes;
>  struct compact_control {
>       struct list_head freepages;     /* List of free pages to migrate to */
>       struct list_head migratepages;  /* List of pages being migrated */
> +     struct list_head freepages_held;/* List of free pages from the block
> +                                      * that's being migrated
> +                                      */
>       unsigned long nr_freepages;     /* Number of isolated free pages */
>       unsigned long nr_migratepages;  /* Number of pages to migrate */
>       unsigned long free_pfn;         /* isolate_freepages search base */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 59de90d5d3a3..3ee83fe02274 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2343,6 +2343,33 @@ int split_free_page(struct page *page)
>  }
>  
>  /*
> + * Like split_free_page, but given the zone, it will grab a free page from
> + * the freelists.
> + */
> +struct page *
> +alloc_pages_zone(struct zone *zone, unsigned int order, int migratetype)
> +{
> +     struct page *page;
> +     unsigned long watermark;
> +
> +     watermark = low_wmark_pages(zone) + (1 << order);
> +     if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
> +             return NULL;
> +
> +     page = __rmqueue(zone, order, migratetype);
> +     if (!page)
> +             return NULL;
> +
> +     __mod_zone_freepage_state(zone, -(1 << order),
> +                                       get_pcppage_migratetype(page));
> +
> +     set_page_owner(page, order, __GFP_MOVABLE);
> +     set_page_refcounted(page);
> +
> +     return page;
> +}
> +
> +/*
>   * Allocate a page from the given zone. Use pcplists for order-0 allocations.
>   */
>  static inline
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 5e4300482897..9e07d11afa0d 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -822,6 +822,8 @@ const char * const vmstat_text[] = {
>  #ifdef CONFIG_COMPACTION
>       "compact_migrate_scanned",
>       "compact_free_scanned",
> +     "compact_free_direct_alloc",
> +     "compact_free_direct_miss",
>       "compact_isolated",
>       "compact_stall",
>       "compact_fail",
> -- 
> 2.7.3
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"d...@kvack.org";> em...@kvack.org </a>

Reply via email to