On Mon, 28 Jul 2014, Vlastimil Babka wrote:

> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> index b2e4c92..60bdf8d 100644
> --- a/include/linux/compaction.h
> +++ b/include/linux/compaction.h
> @@ -13,6 +13,14 @@
>  /* The full zone was compacted */
>  #define COMPACT_COMPLETE     4
>  
> +/* Used to signal whether compaction detected need_sched() or lock 
> contention */
> +/* No contention detected */
> +#define COMPACT_CONTENDED_NONE       0
> +/* Either need_sched() was true or fatal signal pending */
> +#define COMPACT_CONTENDED_SCHED      1
> +/* Zone lock or lru_lock was contended in async compaction */
> +#define COMPACT_CONTENDED_LOCK       2
> +

Make this an enum?

>  #ifdef CONFIG_COMPACTION
>  extern int sysctl_compact_memory;
>  extern int sysctl_compaction_handler(struct ctl_table *table, int write,
> @@ -24,7 +32,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, 
> int write,
>  extern int fragmentation_index(struct zone *zone, unsigned int order);
>  extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
>                       int order, gfp_t gfp_mask, nodemask_t *mask,
> -                     enum migrate_mode mode, bool *contended,
> +                     enum migrate_mode mode, int *contended,
>                       struct zone **candidate_zone);
>  extern void compact_pgdat(pg_data_t *pgdat, int order);
>  extern void reset_isolation_suitable(pg_data_t *pgdat);
> @@ -94,7 +102,7 @@ static inline bool compaction_restarting(struct zone 
> *zone, int order)
>  #else
>  static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
>                       int order, gfp_t gfp_mask, nodemask_t *nodemask,
> -                     enum migrate_mode mode, bool *contended,
> +                     enum migrate_mode mode, int *contended,
>                       struct zone **candidate_zone)
>  {
>       return COMPACT_CONTINUE;
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 76a9775..2b8b6d8 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -223,9 +223,21 @@ static void update_pageblock_skip(struct compact_control 
> *cc,
>  }
>  #endif /* CONFIG_COMPACTION */
>  
> -static inline bool should_release_lock(spinlock_t *lock)
> +static int should_release_lock(spinlock_t *lock)
>  {
> -     return need_resched() || spin_is_contended(lock);
> +     /*
> +      * Sched contention has higher priority here as we may potentially
> +      * have to abort whole compaction ASAP. Returning with lock contention
> +      * means we will try another zone, and further decisions are
> +      * influenced only when all zones are lock contended. That means
> +      * potentially missing a lock contention is less critical.
> +      */
> +     if (need_resched())
> +             return COMPACT_CONTENDED_SCHED;
> +     else if (spin_is_contended(lock))
> +             return COMPACT_CONTENDED_LOCK;
> +     else
> +             return COMPACT_CONTENDED_NONE;

I would avoid the last else statement and just return 
COMPACT_CONTENDED_NONE.

>  }
>  
>  /*
> @@ -240,7 +252,9 @@ static inline bool should_release_lock(spinlock_t *lock)
>  static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
>                                     bool locked, struct compact_control *cc)
>  {
> -     if (should_release_lock(lock)) {
> +     int contended = should_release_lock(lock);
> +
> +     if (contended) {
>               if (locked) {
>                       spin_unlock_irqrestore(lock, *flags);
>                       locked = false;
> @@ -248,7 +262,7 @@ static bool compact_checklock_irqsave(spinlock_t *lock, 
> unsigned long *flags,
>  
>               /* async aborts if taking too long or contended */
>               if (cc->mode == MIGRATE_ASYNC) {
> -                     cc->contended = true;
> +                     cc->contended = contended;
>                       return false;
>               }
>  
> @@ -274,7 +288,7 @@ static inline bool compact_should_abort(struct 
> compact_control *cc)
>       /* async compaction aborts if contended */
>       if (need_resched()) {
>               if (cc->mode == MIGRATE_ASYNC) {
> -                     cc->contended = true;
> +                     cc->contended = COMPACT_CONTENDED_SCHED;
>                       return true;
>               }
>  
> @@ -1139,7 +1153,7 @@ out:
>  }
>  
>  static unsigned long compact_zone_order(struct zone *zone, int order,
> -             gfp_t gfp_mask, enum migrate_mode mode, bool *contended)
> +             gfp_t gfp_mask, enum migrate_mode mode, int *contended)
>  {
>       unsigned long ret;
>       struct compact_control cc = {
> @@ -1154,11 +1168,11 @@ static unsigned long compact_zone_order(struct zone 
> *zone, int order,
>       INIT_LIST_HEAD(&cc.migratepages);
>  
>       ret = compact_zone(zone, &cc);
> +     *contended = cc.contended;
>  
>       VM_BUG_ON(!list_empty(&cc.freepages));
>       VM_BUG_ON(!list_empty(&cc.migratepages));
>  
> -     *contended = cc.contended;

Not sure why, but ok :)

>       return ret;
>  }
>  
> @@ -1171,14 +1185,15 @@ int sysctl_extfrag_threshold = 500;
>   * @gfp_mask: The GFP mask of the current allocation
>   * @nodemask: The allowed nodes to allocate from
>   * @mode: The migration mode for async, sync light, or sync migration
> - * @contended: Return value that is true if compaction was aborted due to 
> lock contention
> + * @contended: Return value that determines if compaction was aborted due to
> + *          need_resched() or lock contention
>   * @candidate_zone: Return the zone where we think allocation should succeed
>   *
>   * This is the main entry point for direct page compaction.
>   */
>  unsigned long try_to_compact_pages(struct zonelist *zonelist,
>                       int order, gfp_t gfp_mask, nodemask_t *nodemask,
> -                     enum migrate_mode mode, bool *contended,
> +                     enum migrate_mode mode, int *contended,
>                       struct zone **candidate_zone)
>  {
>       enum zone_type high_zoneidx = gfp_zone(gfp_mask);
> @@ -1188,6 +1203,9 @@ unsigned long try_to_compact_pages(struct zonelist 
> *zonelist,
>       struct zone *zone;
>       int rc = COMPACT_DEFERRED;
>       int alloc_flags = 0;
> +     bool all_zones_lock_contended = true; /* init true for &= operation */
> +
> +     *contended = COMPACT_CONTENDED_NONE;
>  
>       /* Check if the GFP flags allow compaction */
>       if (!order || !may_enter_fs || !may_perform_io)
> @@ -1201,13 +1219,20 @@ unsigned long try_to_compact_pages(struct zonelist 
> *zonelist,
>       for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
>                                                               nodemask) {
>               int status;
> +             int zone_contended;
>  
>               if (compaction_deferred(zone, order))
>                       continue;
>  
>               status = compact_zone_order(zone, order, gfp_mask, mode,
> -                                             contended);
> +                                                     &zone_contended);
>               rc = max(status, rc);
> +             /*
> +              * It takes at least one zone that wasn't lock contended
> +              * to turn all_zones_lock_contended to false.
> +              */
> +             all_zones_lock_contended &=
> +                     (zone_contended == COMPACT_CONTENDED_LOCK);

Eek, does this always work?  COMPACT_CONTENDED_LOCK is 0x2 and 
all_zones_lock_contended is a bool initialized to true.  I'm fairly 
certain you'd get better code generation if you defined 
all_zones_lock_contended as

        int all_zones_lock_contended = COMPACT_CONTENDED_LOCK

from the start and the source code would be more clear.

>  
>               /* If a normal allocation would succeed, stop compacting */
>               if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
> @@ -1220,8 +1245,20 @@ unsigned long try_to_compact_pages(struct zonelist 
> *zonelist,
>                        * succeeds in this zone.
>                        */
>                       compaction_defer_reset(zone, order, false);
> -                     break;
> -             } else if (mode != MIGRATE_ASYNC) {
> +                     /*
> +                      * It is possible that async compaction aborted due to
> +                      * need_resched() and the watermarks were ok thanks to
> +                      * somebody else freeing memory. The allocation can
> +                      * however still fail so we better signal the
> +                      * need_resched() contention anyway.
> +                      */

Makes sense because the page allocator still tries to allocate the page 
even if this is returned, but it's not very clear in the comment.

> +                     if (zone_contended == COMPACT_CONTENDED_SCHED)
> +                             *contended = COMPACT_CONTENDED_SCHED;
> +
> +                     goto break_loop;
> +             }
> +
> +             if (mode != MIGRATE_ASYNC) {
>                       /*
>                        * We think that allocation won't succeed in this zone
>                        * so we defer compaction there. If it ends up
> @@ -1229,8 +1266,36 @@ unsigned long try_to_compact_pages(struct zonelist 
> *zonelist,
>                        */
>                       defer_compaction(zone, order);
>               }
> +
> +             /*
> +              * We might have stopped compacting due to need_resched() in
> +              * async compaction, or due to a fatal signal detected. In that
> +              * case do not try further zones and signal need_resched()
> +              * contention.
> +              */
> +             if ((zone_contended == COMPACT_CONTENDED_SCHED)
> +                                     || fatal_signal_pending(current)) {
> +                     *contended = COMPACT_CONTENDED_SCHED;
> +                     goto break_loop;
> +             }
> +
> +             continue;
> +break_loop:
> +             /*
> +              * We might not have tried all the zones, so  be conservative
> +              * and assume they are not all lock contended.
> +              */
> +             all_zones_lock_contended = false;
> +             break;
>       }
>  
> +     /*
> +      * If at least one zone wasn't deferred or skipped, we report if all
> +      * zones that were tried were contended.
> +      */
> +     if (rc > COMPACT_SKIPPED && all_zones_lock_contended)
> +             *contended = COMPACT_CONTENDED_LOCK;
> +
>       return rc;
>  }
>  
> diff --git a/mm/internal.h b/mm/internal.h
> index 5a0738f..4c1d604 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -144,8 +144,8 @@ struct compact_control {
>       int order;                      /* order a direct compactor needs */
>       int migratetype;                /* MOVABLE, RECLAIMABLE etc */
>       struct zone *zone;
> -     bool contended;                 /* True if a lock was contended, or
> -                                      * need_resched() true during async
> +     int contended;                  /* Signal need_sched() or lock
> +                                      * contention detected during
>                                        * compaction
>                                        */
>  };
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index f424752..e3c633b 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2296,7 +2296,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned 
> int order,
>       struct zonelist *zonelist, enum zone_type high_zoneidx,
>       nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
>       int classzone_idx, int migratetype, enum migrate_mode mode,
> -     bool *contended_compaction, bool *deferred_compaction,
> +     int *contended_compaction, bool *deferred_compaction,
>       unsigned long *did_some_progress)
>  {
>       struct zone *last_compact_zone = NULL;
> @@ -2547,7 +2547,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int 
> order,
>       unsigned long did_some_progress;
>       enum migrate_mode migration_mode = MIGRATE_ASYNC;
>       bool deferred_compaction = false;
> -     bool contended_compaction = false;
> +     int contended_compaction = COMPACT_CONTENDED_NONE;
>  
>       /*
>        * In the slowpath, we sanity check order to avoid ever trying to
> @@ -2660,15 +2660,36 @@ rebalance:
>       if (!(gfp_mask & __GFP_NO_KSWAPD) || (current->flags & PF_KTHREAD))
>               migration_mode = MIGRATE_SYNC_LIGHT;
>  
> -     /*
> -      * If compaction is deferred for high-order allocations, it is because
> -      * sync compaction recently failed. In this is the case and the caller
> -      * requested a movable allocation that does not heavily disrupt the
> -      * system then fail the allocation instead of entering direct reclaim.
> -      */
> -     if ((deferred_compaction || contended_compaction) &&
> -                                             (gfp_mask & __GFP_NO_KSWAPD))
> -             goto nopage;

Hmm, this check will have unfortunately changed in the latest mmotm due to 
mm-thp-restructure-thp-avoidance-of-light-synchronous-migration.patch.

> +     /* Checks for THP-specific high-order allocations */
> +     if (gfp_mask & __GFP_NO_KSWAPD) {
> +             /*
> +              * If compaction is deferred for high-order allocations, it is
> +              * because sync compaction recently failed. If this is the case
> +              * and the caller requested a THP allocation, we do not want
> +              * to heavily disrupt the system, so we fail the allocation
> +              * instead of entering direct reclaim.
> +              */
> +             if (deferred_compaction)
> +                     goto nopage;
> +
> +             /*
> +              * In all zones where compaction was attempted (and not
> +              * deferred or skipped), lock contention has been detected.
> +              * For THP allocation we do not want to disrupt the others
> +              * so we fallback to base pages instead.
> +              */
> +             if (contended_compaction == COMPACT_CONTENDED_LOCK)
> +                     goto nopage;
> +
> +             /*
> +              * If compaction was aborted due to need_resched(), we do not
> +              * want to further increase allocation latency, unless it is
> +              * khugepaged trying to collapse.
> +              */
> +             if (contended_compaction == COMPACT_CONTENDED_SCHED
> +                     && !(current->flags & PF_KTHREAD))
> +                     goto nopage;
> +     }
>  
>       /* Try direct reclaim and then allocating */
>       page = __alloc_pages_direct_reclaim(gfp_mask, order,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to