We see high order allocation warnings:

kernel: order 10 >= 10, gfp 0x40c00
kernel: WARNING: CPU: 5 PID: 182 at mm/page_alloc.c:5630 
__alloc_pages+0x1d7/0x3f0
kernel: process_compressed_read+0x6f/0x590 [dm_qcow2]

This is because we have 1M clusters and in case of zstd compression the
buffer size used for decompression is (clu_size + sizeof(ZSTD_DCtx) +
ZSTD_BLOCKSIZE_MAX + clu_size + ZSTD_BLOCKSIZE_MAX + 64 = 2520776) which
requires 4M allocation.

This is a really big allocation especially for io path (GFP_NOIO), and
it has very high probability to fail.

Let's fix it by making a memory pool for such 4M allocations.

note: I'm not fully sure that num_possible_cpus is enough minimal size
for this pool as if any code path in process_compressed_read can
schedule then for one cpu we might require two or more allocations from
the pool. At least for now I don't see any schedule there. Anyway, it
would only lead those excess process_compressed_read to schedule and
wait for buffers to be released, so it should not be that bad.

https://virtuozzo.atlassian.net/browse/VSTOR-94596
Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>
---
 drivers/md/dm-qcow2-map.c    | 18 ++++++++++++++----
 drivers/md/dm-qcow2-target.c | 18 ++++++++++++++++--
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-qcow2-map.c b/drivers/md/dm-qcow2-map.c
index 6585f3fac6e7b..c60ba9f341c26 100644
--- a/drivers/md/dm-qcow2-map.c
+++ b/drivers/md/dm-qcow2-map.c
@@ -10,6 +10,7 @@
 #include <linux/zlib.h>
 #include <linux/error-injection.h>
 #include <linux/zstd.h>
+#include <linux/sizes.h>
 
 #include "dm.h"
 #include "dm-rq.h"
@@ -3650,6 +3651,8 @@ static int complete_metadata_writeback(struct qcow2 
*qcow2)
        return fsync_ret;
 }
 
+extern mempool_t *zbuf_pool;
+
 /* Process completed compressed READs */
 static void process_compressed_read(struct qcow2 *qcow2, struct list_head 
*read_list,
                                    struct list_head *cow_list)
@@ -3671,7 +3674,10 @@ static void process_compressed_read(struct qcow2 *qcow2, 
struct list_head *read_
                dctxlen = zlib_inflate_workspacesize();
 
 
-       buf = kmalloc(qcow2->clu_size + dctxlen, GFP_NOIO);
+       if (qcow2->clu_size + dctxlen <= SZ_4M)
+               buf = mempool_alloc(zbuf_pool, GFP_NOIO);
+       else
+               buf = kmalloc(qcow2->clu_size + dctxlen, GFP_NOIO);
        if (!buf) {
                end_qios(read_list, BLK_STS_RESOURCE);
                return;
@@ -3681,8 +3687,7 @@ static void process_compressed_read(struct qcow2 *qcow2, 
struct list_head *read_
                arg = zstd_init_dstream(qcow2->clu_size, buf + qcow2->clu_size, 
dctxlen);
                if (!arg) {
                        end_qios(read_list, BLK_STS_RESOURCE);
-                       kfree(buf);
-                       return;
+                       goto err_free;
                }
        } else {
                arg = buf + qcow2->clu_size;
@@ -3716,7 +3721,12 @@ static void process_compressed_read(struct qcow2 *qcow2, 
struct list_head *read_
                list_add_tail(&qio->link, cow_list);
        }
 
-       kfree(buf);
+err_free:
+       if (qcow2->clu_size + dctxlen <= SZ_4M)
+               mempool_free(buf, zbuf_pool);
+       else
+               kfree(buf);
+       return;
 }
 
 static int prepare_sliced_data_write(struct qcow2 *qcow2, struct qio *qio,
diff --git a/drivers/md/dm-qcow2-target.c b/drivers/md/dm-qcow2-target.c
index 276eab9acc4f3..a323bd2747af6 100644
--- a/drivers/md/dm-qcow2-target.c
+++ b/drivers/md/dm-qcow2-target.c
@@ -7,6 +7,8 @@
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/error-injection.h>
+#include <linux/cpumask.h>
+#include <linux/sizes.h>
 
 #include "dm.h"
 #include "dm-qcow2.h"
@@ -1036,6 +1038,8 @@ static struct target_type qcow2_target = {
        .llseek_hole = qcow2_llseek_hole,
 };
 
+mempool_t *zbuf_pool = NULL;
+
 static int __init dm_qcow2_init(void)
 {
        int ret;
@@ -1046,14 +1050,24 @@ static int __init dm_qcow2_init(void)
                return -ENOMEM;
 
        ret = dm_register_target(&qcow2_target);
-       if (ret)
+       if (ret) {
                kmem_cache_destroy(qrq_cache);
+               return ret;
+       }
 
-       return ret;
+       zbuf_pool = mempool_create_kvmalloc_pool(num_possible_cpus(), SZ_4M);
+       if (!zbuf_pool) {
+               dm_unregister_target(&qcow2_target);
+               kmem_cache_destroy(qrq_cache);
+               return -ENOMEM;
+       }
+
+       return 0;
 }
 
 static void __exit dm_qcow2_exit(void)
 {
+       mempool_destroy(zbuf_pool);
        dm_unregister_target(&qcow2_target);
        kmem_cache_destroy(qrq_cache);
 }
-- 
2.47.0

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to