We see high order allocation warnings: kernel: order 10 >= 10, gfp 0x40c00 kernel: WARNING: CPU: 5 PID: 182 at mm/page_alloc.c:5630 __alloc_pages+0x1d7/0x3f0 kernel: process_compressed_read+0x6f/0x590 [dm_qcow2]
This is because we have 1M clusters and in case of zstd compression the buffer size used for decompression is (clu_size + sizeof(ZSTD_DCtx) + ZSTD_BLOCKSIZE_MAX + clu_size + ZSTD_BLOCKSIZE_MAX + 64 = 2520776) which requires 4M allocation. This is a really big allocation especially for io path (GFP_NOIO), and it has very high probability to fail. Let's fix it by making a memory pool for such 4M allocations. note: I'm not fully sure that num_possible_cpus is enough minimal size for this pool as if any code path in process_compressed_read can schedule then for one cpu we might require two or more allocations from the pool. At least for now I don't see any schedule there. Anyway, it would only lead those excess process_compressed_read to schedule and wait for buffers to be released, so it should not be that bad. https://virtuozzo.atlassian.net/browse/VSTOR-94596 Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> --- drivers/md/dm-qcow2-map.c | 18 ++++++++++++++---- drivers/md/dm-qcow2-target.c | 18 ++++++++++++++++-- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-qcow2-map.c b/drivers/md/dm-qcow2-map.c index 6585f3fac6e7b..c60ba9f341c26 100644 --- a/drivers/md/dm-qcow2-map.c +++ b/drivers/md/dm-qcow2-map.c @@ -10,6 +10,7 @@ #include <linux/zlib.h> #include <linux/error-injection.h> #include <linux/zstd.h> +#include <linux/sizes.h> #include "dm.h" #include "dm-rq.h" @@ -3650,6 +3651,8 @@ static int complete_metadata_writeback(struct qcow2 *qcow2) return fsync_ret; } +extern mempool_t *zbuf_pool; + /* Process completed compressed READs */ static void process_compressed_read(struct qcow2 *qcow2, struct list_head *read_list, struct list_head *cow_list) @@ -3671,7 +3674,10 @@ static void process_compressed_read(struct qcow2 *qcow2, struct list_head *read_ dctxlen = zlib_inflate_workspacesize(); - buf = kmalloc(qcow2->clu_size + dctxlen, GFP_NOIO); + if (qcow2->clu_size + dctxlen <= SZ_4M) + buf = mempool_alloc(zbuf_pool, GFP_NOIO); + else + buf = kmalloc(qcow2->clu_size + dctxlen, GFP_NOIO); if (!buf) { end_qios(read_list, BLK_STS_RESOURCE); return; @@ -3681,8 +3687,7 @@ static void process_compressed_read(struct qcow2 *qcow2, struct list_head *read_ arg = zstd_init_dstream(qcow2->clu_size, buf + qcow2->clu_size, dctxlen); if (!arg) { end_qios(read_list, BLK_STS_RESOURCE); - kfree(buf); - return; + goto err_free; } } else { arg = buf + qcow2->clu_size; @@ -3716,7 +3721,12 @@ static void process_compressed_read(struct qcow2 *qcow2, struct list_head *read_ list_add_tail(&qio->link, cow_list); } - kfree(buf); +err_free: + if (qcow2->clu_size + dctxlen <= SZ_4M) + mempool_free(buf, zbuf_pool); + else + kfree(buf); + return; } static int prepare_sliced_data_write(struct qcow2 *qcow2, struct qio *qio, diff --git a/drivers/md/dm-qcow2-target.c b/drivers/md/dm-qcow2-target.c index 276eab9acc4f3..a323bd2747af6 100644 --- a/drivers/md/dm-qcow2-target.c +++ b/drivers/md/dm-qcow2-target.c @@ -7,6 +7,8 @@ #include <linux/file.h> #include <linux/fs.h> #include <linux/error-injection.h> +#include <linux/cpumask.h> +#include <linux/sizes.h> #include "dm.h" #include "dm-qcow2.h" @@ -1036,6 +1038,8 @@ static struct target_type qcow2_target = { .llseek_hole = qcow2_llseek_hole, }; +mempool_t *zbuf_pool = NULL; + static int __init dm_qcow2_init(void) { int ret; @@ -1046,14 +1050,24 @@ static int __init dm_qcow2_init(void) return -ENOMEM; ret = dm_register_target(&qcow2_target); - if (ret) + if (ret) { kmem_cache_destroy(qrq_cache); + return ret; + } - return ret; + zbuf_pool = mempool_create_kvmalloc_pool(num_possible_cpus(), SZ_4M); + if (!zbuf_pool) { + dm_unregister_target(&qcow2_target); + kmem_cache_destroy(qrq_cache); + return -ENOMEM; + } + + return 0; } static void __exit dm_qcow2_exit(void) { + mempool_destroy(zbuf_pool); dm_unregister_target(&qcow2_target); kmem_cache_destroy(qrq_cache); } -- 2.47.0 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel