On Mon 21-10-19 19:56:54, Hillf Danton wrote:
> 
> Currently soft limit reclaim is frozen, see
> Documentation/admin-guide/cgroup-v2.rst for reasons.
> 
> Copying the page lru idea, memcg lru is added for selecting victim
> memcg to reclaim pages from under memory pressure. It now works in
> parallel to slr not only because the latter needs some time to reap
> but the coexistence facilitates it a lot to add the lru in a straight
> forward manner.

This doesn't explain what is the problem/feature you would like to
fix/achieve. It also doesn't explain the overall design. 

> A lru list paired with a spin lock is added, thanks to the current
> memcg high_work that provides other things it needs, and a couple of
> helpers to add memcg to and pick victim from lru.
> 
> V1 is based on 5.4-rc3.
> 
> Changes since v0
> - add MEMCG_LRU in init/Kconfig
> - drop changes in mm/vmscan.c
> - make memcg lru work in parallel to slr
> 
> Cc: Chris Down <ch...@chrisdown.name>
> Cc: Tejun Heo <t...@kernel.org>
> Cc: Roman Gushchin <g...@fb.com>
> Cc: Michal Hocko <mho...@kernel.org>
> Cc: Johannes Weiner <han...@cmpxchg.org>
> Cc: Shakeel Butt <shake...@google.com>
> Cc: Matthew Wilcox <wi...@infradead.org>
> Cc: Minchan Kim <minc...@kernel.org>
> Cc: Mel Gorman <mgor...@suse.de>
> Signed-off-by: Hillf Danton <hdan...@sina.com>
> ---
> 
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -843,6 +843,14 @@ config MEMCG
>       help
>         Provides control over the memory footprint of tasks in a cgroup.
>  
> +config MEMCG_LRU
> +     bool
> +     depends on MEMCG
> +     help
> +       Select victim memcg on lru for page reclaiming.
> +
> +       Say N if unsure.
> +
>  config MEMCG_SWAP
>       bool "Swap controller"
>       depends on MEMCG && SWAP
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -223,6 +223,10 @@ struct mem_cgroup {
>       /* Upper bound of normal memory consumption range */
>       unsigned long high;
>  
> +#ifdef CONFIG_MEMCG_LRU
> +     struct list_head lru_node;
> +#endif
> +
>       /* Range enforcement for interrupt charges */
>       struct work_struct high_work;
>  
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2338,14 +2338,54 @@ static int memcg_hotplug_cpu_dead(unsign
>       return 0;
>  }
>  
> +#ifdef CONFIG_MEMCG_LRU
> +static DEFINE_SPINLOCK(memcg_lru_lock);
> +static LIST_HEAD(memcg_lru); /* a copy of page lru */
> +
> +static void memcg_add_lru(struct mem_cgroup *memcg)
> +{
> +     spin_lock_irq(&memcg_lru_lock);
> +     if (list_empty(&memcg->lru_node))
> +             list_add_tail(&memcg->lru_node, &memcg_lru);
> +     spin_unlock_irq(&memcg_lru_lock);
> +}
> +
> +static struct mem_cgroup *memcg_pick_lru(void)
> +{
> +     struct mem_cgroup *memcg, *next;
> +
> +     spin_lock_irq(&memcg_lru_lock);
> +
> +     list_for_each_entry_safe(memcg, next, &memcg_lru, lru_node) {
> +             list_del_init(&memcg->lru_node);
> +
> +             if (page_counter_read(&memcg->memory) > memcg->high) {
> +                     spin_unlock_irq(&memcg_lru_lock);
> +                     return memcg;
> +             }
> +     }
> +     spin_unlock_irq(&memcg_lru_lock);
> +
> +     return NULL;
> +}
> +#endif
> +
>  static void reclaim_high(struct mem_cgroup *memcg,
>                        unsigned int nr_pages,
>                        gfp_t gfp_mask)
>  {
> +#ifdef CONFIG_MEMCG_LRU
> +     struct mem_cgroup *start = memcg;
> +#endif
>       do {
>               if (page_counter_read(&memcg->memory) <= memcg->high)
>                       continue;
>               memcg_memory_event(memcg, MEMCG_HIGH);
> +             if (IS_ENABLED(CONFIG_MEMCG_LRU))
> +                     if (start != memcg) {
> +                             memcg_add_lru(memcg);
> +                             return;
> +                     }
>               try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
>       } while ((memcg = parent_mem_cgroup(memcg)));
>  }
> @@ -3158,6 +3198,13 @@ unsigned long mem_cgroup_soft_limit_recl
>       unsigned long excess;
>       unsigned long nr_scanned;
>  
> +     if (IS_ENABLED(CONFIG_MEMCG_LRU)) {
> +             struct mem_cgroup *memcg = memcg_pick_lru();
> +             if (memcg)
> +                     schedule_work(&memcg->high_work);
> +             return 0;
> +     }
> +
>       if (order > 0)
>               return 0;
>  
> @@ -5068,6 +5115,8 @@ static struct mem_cgroup *mem_cgroup_all
>       if (memcg_wb_domain_init(memcg, GFP_KERNEL))
>               goto fail;
>  
> +     if (IS_ENABLED(CONFIG_MEMCG_LRU))
> +             INIT_LIST_HEAD(&memcg->lru_node);
>       INIT_WORK(&memcg->high_work, high_work_func);
>       memcg->last_scanned_node = MAX_NUMNODES;
>       INIT_LIST_HEAD(&memcg->oom_notify);
> --
> 

-- 
Michal Hocko
SUSE Labs

Reply via email to