On 07/29/2014 02:59 AM, David Rientjes wrote:
On Mon, 28 Jul 2014, Vlastimil Babka wrote:

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index b2e4c92..60bdf8d 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -13,6 +13,14 @@
  /* The full zone was compacted */
  #define COMPACT_COMPLETE      4

+/* Used to signal whether compaction detected need_sched() or lock contention 
*/
+/* No contention detected */
+#define COMPACT_CONTENDED_NONE 0
+/* Either need_sched() was true or fatal signal pending */
+#define COMPACT_CONTENDED_SCHED        1
+/* Zone lock or lru_lock was contended in async compaction */
+#define COMPACT_CONTENDED_LOCK 2
+

Make this an enum?

I tried originally, but then I would have to define it elsewhere (mm/internal.h I think) together with compact_control. I didn't think it was worth the extra pollution of shared header, when the return codes are also #define and we might still get rid of need_resched() one day.

@@ -223,9 +223,21 @@ static void update_pageblock_skip(struct compact_control 
*cc,
  }
  #endif /* CONFIG_COMPACTION */

-static inline bool should_release_lock(spinlock_t *lock)
+static int should_release_lock(spinlock_t *lock)
  {
-       return need_resched() || spin_is_contended(lock);
+       /*
+        * Sched contention has higher priority here as we may potentially
+        * have to abort whole compaction ASAP. Returning with lock contention
+        * means we will try another zone, and further decisions are
+        * influenced only when all zones are lock contended. That means
+        * potentially missing a lock contention is less critical.
+        */
+       if (need_resched())
+               return COMPACT_CONTENDED_SCHED;
+       else if (spin_is_contended(lock))
+               return COMPACT_CONTENDED_LOCK;
+       else
+               return COMPACT_CONTENDED_NONE;

I would avoid the last else statement and just return
COMPACT_CONTENDED_NONE.

OK, checkpatch agrees.

@@ -1154,11 +1168,11 @@ static unsigned long compact_zone_order(struct zone 
*zone, int order,
        INIT_LIST_HEAD(&cc.migratepages);

        ret = compact_zone(zone, &cc);
+       *contended = cc.contended;

        VM_BUG_ON(!list_empty(&cc.freepages));
        VM_BUG_ON(!list_empty(&cc.migratepages));

-       *contended = cc.contended;

Not sure why, but ok :)

Oversight due to how this patch evolved :)

        return ret;
  }

@@ -1171,14 +1185,15 @@ int sysctl_extfrag_threshold = 500;
   * @gfp_mask: The GFP mask of the current allocation
   * @nodemask: The allowed nodes to allocate from
   * @mode: The migration mode for async, sync light, or sync migration
- * @contended: Return value that is true if compaction was aborted due to lock 
contention
+ * @contended: Return value that determines if compaction was aborted due to
+ *            need_resched() or lock contention
   * @candidate_zone: Return the zone where we think allocation should succeed
   *
   * This is the main entry point for direct page compaction.
   */
  unsigned long try_to_compact_pages(struct zonelist *zonelist,
                        int order, gfp_t gfp_mask, nodemask_t *nodemask,
-                       enum migrate_mode mode, bool *contended,
+                       enum migrate_mode mode, int *contended,
                        struct zone **candidate_zone)
  {
        enum zone_type high_zoneidx = gfp_zone(gfp_mask);
@@ -1188,6 +1203,9 @@ unsigned long try_to_compact_pages(struct zonelist 
*zonelist,
        struct zone *zone;
        int rc = COMPACT_DEFERRED;
        int alloc_flags = 0;
+       bool all_zones_lock_contended = true; /* init true for &= operation */
+
+       *contended = COMPACT_CONTENDED_NONE;

        /* Check if the GFP flags allow compaction */
        if (!order || !may_enter_fs || !may_perform_io)
@@ -1201,13 +1219,20 @@ unsigned long try_to_compact_pages(struct zonelist 
*zonelist,
        for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
                                                                nodemask) {
                int status;
+               int zone_contended;

                if (compaction_deferred(zone, order))
                        continue;

                status = compact_zone_order(zone, order, gfp_mask, mode,
-                                               contended);
+                                                       &zone_contended);
                rc = max(status, rc);
+               /*
+                * It takes at least one zone that wasn't lock contended
+                * to turn all_zones_lock_contended to false.
+                */
+               all_zones_lock_contended &=
+                       (zone_contended == COMPACT_CONTENDED_LOCK);

Eek, does this always work?  COMPACT_CONTENDED_LOCK is 0x2 and

(zone_contended == COMPACT_CONTENDED_LOCK) is a bool so it should work?

all_zones_lock_contended is a bool initialized to true.  I'm fairly
certain you'd get better code generation if you defined
all_zones_lock_contended as

        int all_zones_lock_contended = COMPACT_CONTENDED_LOCK

from the start and the source code would be more clear.

Yeah that would look nicer, thanks for the suggestion.


                /* If a normal allocation would succeed, stop compacting */
                if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
@@ -1220,8 +1245,20 @@ unsigned long try_to_compact_pages(struct zonelist 
*zonelist,
                         * succeeds in this zone.
                         */
                        compaction_defer_reset(zone, order, false);
-                       break;
-               } else if (mode != MIGRATE_ASYNC) {
+                       /*
+                        * It is possible that async compaction aborted due to
+                        * need_resched() and the watermarks were ok thanks to
+                        * somebody else freeing memory. The allocation can
+                        * however still fail so we better signal the
+                        * need_resched() contention anyway.
+                        */

Makes sense because the page allocator still tries to allocate the page
even if this is returned, but it's not very clear in the comment.

OK

@@ -2660,15 +2660,36 @@ rebalance:
        if (!(gfp_mask & __GFP_NO_KSWAPD) || (current->flags & PF_KTHREAD))
                migration_mode = MIGRATE_SYNC_LIGHT;

-       /*
-        * If compaction is deferred for high-order allocations, it is because
-        * sync compaction recently failed. In this is the case and the caller
-        * requested a movable allocation that does not heavily disrupt the
-        * system then fail the allocation instead of entering direct reclaim.
-        */
-       if ((deferred_compaction || contended_compaction) &&
-                                               (gfp_mask & __GFP_NO_KSWAPD))
-               goto nopage;

Hmm, this check will have unfortunately changed in the latest mmotm due to
mm-thp-restructure-thp-avoidance-of-light-synchronous-migration.patch.

I think you were changing (and moving around) a different check so there would be merge conflicts but no semantic problem.

+       /* Checks for THP-specific high-order allocations */
+       if (gfp_mask & __GFP_NO_KSWAPD) {
+               /*
+                * If compaction is deferred for high-order allocations, it is
+                * because sync compaction recently failed. If this is the case
+                * and the caller requested a THP allocation, we do not want
+                * to heavily disrupt the system, so we fail the allocation
+                * instead of entering direct reclaim.
+                */
+               if (deferred_compaction)
+                       goto nopage;
+
+               /*
+                * In all zones where compaction was attempted (and not
+                * deferred or skipped), lock contention has been detected.
+                * For THP allocation we do not want to disrupt the others
+                * so we fallback to base pages instead.
+                */
+               if (contended_compaction == COMPACT_CONTENDED_LOCK)
+                       goto nopage;
+
+               /*
+                * If compaction was aborted due to need_resched(), we do not
+                * want to further increase allocation latency, unless it is
+                * khugepaged trying to collapse.
+                */
+               if (contended_compaction == COMPACT_CONTENDED_SCHED
+                       && !(current->flags & PF_KTHREAD))
+                       goto nopage;
+       }

        /* Try direct reclaim and then allocating */
        page = __alloc_pages_direct_reclaim(gfp_mask, order,

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to