On Mon, Jul 22, 2019 at 03:06:39PM -0700, Andrew Morton wrote:
[snip] 
> > +   *end = *start + count * BITS_PER_BYTE;
> > +   if (*end > max_frame)
> > +           *end = max_frame;
> > +   return 0;
> > +}
> > +
> >
> > ...
> >
> > +static void add_page_idle_list(struct page *page,
> > +                          unsigned long addr, struct mm_walk *walk)
> > +{
> > +   struct page *page_get;
> > +   struct page_node *pn;
> > +   int bit;
> > +   unsigned long frames;
> > +   struct page_idle_proc_priv *priv = walk->private;
> > +   u64 *chunk = (u64 *)priv->buffer;
> > +
> > +   if (priv->write) {
> > +           /* Find whether this page was asked to be marked */
> > +           frames = (addr - priv->start_addr) >> PAGE_SHIFT;
> > +           bit = frames % BITMAP_CHUNK_BITS;
> > +           chunk = &chunk[frames / BITMAP_CHUNK_BITS];
> > +           if (((*chunk >> bit) & 1) == 0)
> > +                   return;
> > +   }
> > +
> > +   page_get = page_idle_get_page(page);
> > +   if (!page_get)
> > +           return;
> > +
> > +   pn = kmalloc(sizeof(*pn), GFP_ATOMIC);
> 
> I'm not liking this GFP_ATOMIC.  If I'm reading the code correctly,
> userspace can ask for an arbitrarily large number of GFP_ATOMIC
> allocations by doing a large read.  This can potentially exhaust page
> reserves which things like networking Rx interrupts need and can make
> this whole feature less reliable.

For the revision, I will pre-allocate the page nodes in advance so it does
not need to do this. Diff on top of this patch is below. Let me know any
comments, thanks.

Btw, I also dropped the idle_page_list_lock by putting the idle_page_list
list_head on the stack instead of heap.
---8<-----------------------

From: "Joel Fernandes (Google)" <j...@joelfernandes.org>
Subject: [PATCH] mm/page_idle: Avoid need for GFP_ATOMIC

GFP_ATOMIC can harm allocations does by other allocations that are in
need of reserves and the like. Pre-allocate the nodes list so that
spinlocked region can just use it.

Suggested-by: Andrew Morton <a...@linux-foundation.org>
Signed-off-by: Joel Fernandes (Google) <j...@joelfernandes.org>
---
 mm/page_idle.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/mm/page_idle.c b/mm/page_idle.c
index 874a60c41fef..b9c790721f16 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -266,6 +266,10 @@ struct page_idle_proc_priv {
        unsigned long start_addr;
        char *buffer;
        int write;
+
+       /* Pre-allocate and provide nodes to add_page_idle_list() */
+       struct page_node *page_nodes;
+       int cur_page_node;
 };
 
 static void add_page_idle_list(struct page *page,
@@ -291,10 +295,7 @@ static void add_page_idle_list(struct page *page,
        if (!page_get)
                return;
 
-       pn = kmalloc(sizeof(*pn), GFP_ATOMIC);
-       if (!pn)
-               return;
-
+       pn = &(priv->page_nodes[priv->cur_page_node++]);
        pn->page = page_get;
        pn->addr = addr;
        list_add(&pn->list, &idle_page_list);
@@ -379,6 +380,15 @@ ssize_t page_idle_proc_generic(struct file *file, char 
__user *ubuff,
        priv.buffer = buffer;
        priv.start_addr = start_addr;
        priv.write = write;
+
+       priv.cur_page_node = 0;
+       priv.page_nodes = kzalloc(sizeof(struct page_node) * (end_frame - 
start_frame),
+                                 GFP_KERNEL);
+       if (!priv.page_nodes) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
        walk.private = &priv;
        walk.mm = mm;
 
@@ -425,6 +435,7 @@ ssize_t page_idle_proc_generic(struct file *file, char 
__user *ubuff,
                ret = copy_to_user(ubuff, buffer, count);
 
        up_read(&mm->mmap_sem);
+       kfree(priv.page_nodes);
 out:
        kfree(buffer);
 out_mmput:
-- 
2.22.0.657.g960e92d24f-goog

Reply via email to