Soft limit reclaim has to check the whole reclaim hierarchy while doing
the first pass of the reclaim. This leads to a higher system time which
can be visible especially when there are many groups in the hierarchy.

- TODO put testing results here

This patch adds a per-memcg counter of children in excess. It also
restores MEM_CGROUP_TARGET_SOFTLIMIT into mem_cgroup_event_ratelimit for
a proper batching.
If a group crosses soft limit for the first time it increases parent's
children_in_excess up the hierarchy. The similarly if a group gets below
the limit it will decrease the counter. The transition phase is recorded
in soft_contributed flag.

mem_cgroup_soft_reclaim_eligible then uses this information to better
decide whether to skip the node or the whole subtree. The rule is
simple. Skip the node with a children in excess or skip the whole subtree
otherwise.

Signed-off-by: Michal Hocko <mho...@suse.cz>
---
 mm/memcontrol.c |   51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 981ee12..60b48bc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -136,6 +136,7 @@ static const char * const mem_cgroup_lru_names[] = {
  */
 enum mem_cgroup_events_target {
        MEM_CGROUP_TARGET_THRESH,
+       MEM_CGROUP_TARGET_SOFTLIMIT,
        MEM_CGROUP_TARGET_NUMAINFO,
        MEM_CGROUP_NTARGETS,
 };
@@ -355,6 +356,10 @@ struct mem_cgroup {
        atomic_t        numainfo_updating;
 #endif
 
+       spinlock_t soft_lock;
+       bool soft_contributed;
+       atomic_t children_in_excess;
+
        /*
         * Per cgroup active and inactive list, similar to the
         * per zone LRU lists.
@@ -890,6 +895,9 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup 
*memcg,
                case MEM_CGROUP_TARGET_THRESH:
                        next = val + THRESHOLDS_EVENTS_TARGET;
                        break;
+               case MEM_CGROUP_TARGET_SOFTLIMIT:
+                       next = val + SOFTLIMIT_EVENTS_TARGET;
+                       break;
                case MEM_CGROUP_TARGET_NUMAINFO:
                        next = val + NUMAINFO_EVENTS_TARGET;
                        break;
@@ -902,6 +910,34 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup 
*memcg,
        return false;
 }
 
+static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg)
+{
+       unsigned long long excess = res_counter_soft_limit_excess(&memcg->res);
+       struct mem_cgroup *parent = memcg;
+       int delta = 0;
+
+       spin_lock(&memcg->soft_lock);
+       if (excess) {
+               if (!memcg->soft_contributed) {
+                       delta = 1;
+                       memcg->soft_contributed = true;
+               }
+       } else {
+               if (memcg->soft_contributed) {
+                       delta = -1;
+                       memcg->soft_contributed = false;
+               }
+       }
+
+       /*
+        * Necessary to update all ancestors when hierarchy is used
+        * because their event counter is not touched.
+        */
+       while (delta && (parent = parent_mem_cgroup(parent)))
+               atomic_add(delta, &parent->children_in_excess);
+       spin_unlock(&memcg->soft_lock);
+}
+
 /*
  * Check events in order.
  *
@@ -912,8 +948,11 @@ static void memcg_check_events(struct mem_cgroup *memcg, 
struct page *page)
        /* threshold event is triggered in finer grain than soft limit */
        if (unlikely(mem_cgroup_event_ratelimit(memcg,
                                                MEM_CGROUP_TARGET_THRESH))) {
+               bool do_softlimit;
                bool do_numainfo __maybe_unused;
 
+               do_softlimit = mem_cgroup_event_ratelimit(memcg,
+                                               MEM_CGROUP_TARGET_SOFTLIMIT);
 #if MAX_NUMNODES > 1
                do_numainfo = mem_cgroup_event_ratelimit(memcg,
                                                MEM_CGROUP_TARGET_NUMAINFO);
@@ -921,6 +960,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, 
struct page *page)
                preempt_enable();
 
                mem_cgroup_threshold(memcg);
+               if (unlikely(do_softlimit))
+                       mem_cgroup_update_soft_limit(memcg);
 #if MAX_NUMNODES > 1
                if (unlikely(do_numainfo))
                        atomic_inc(&memcg->numainfo_events);
@@ -1894,6 +1935,9 @@ int mem_cgroup_select_victim_node(struct mem_cgroup 
*memcg)
  * hierarchy if
  *     a) it is over its soft limit
  *     b) any parent up the hierarchy is over its soft limit
+ *
+ * If the given group doesn't have any children over the limit then it
+ * doesn't make any sense to iterate its subtree.
  */
 enum mem_cgroup_filter_t
 mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
@@ -1915,6 +1959,8 @@ mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
                        break;
        }
 
+       if (!atomic_read(&memcg->children_in_excess))
+               return SKIP_TREE;
        return SKIP;
 }
 
@@ -6061,6 +6107,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
        mutex_init(&memcg->thresholds_lock);
        spin_lock_init(&memcg->move_lock);
        vmpressure_init(&memcg->vmpressure);
+       spin_lock_init(&memcg->soft_lock);
 
        return &memcg->css;
 
@@ -6150,6 +6197,10 @@ static void mem_cgroup_css_offline(struct cgroup *cont)
 
        mem_cgroup_invalidate_reclaim_iterators(memcg);
        mem_cgroup_reparent_charges(memcg);
+       if (memcg->soft_contributed) {
+               while ((memcg = parent_mem_cgroup(memcg)))
+                       atomic_dec(&memcg->children_in_excess);
+       }
        mem_cgroup_destroy_all_caches(memcg);
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to