i've already committed the patch, but looks like i was too fast, please verify
me, see below.
On 01.08.2025 10:54, Liu Kui wrote:
Fix page allocation failure in kcalloc by converting the large 1-dimensional
array into a 2-dimensional array of order-0 (4KB) pages. This eliminates the
need for non-order-0 allocations, improving allocation reliability under memory
pressure. The 2D array ensures virtual contiguity for the caller maintaining
functional equivalence.
Related to #VSTOR-112413
https://virtuozzo.atlassian.net/browse/VSTOR-112413
Signed-off-by: Liu Kui<kui....@virtuozzo.com>
---
fs/fuse/kio/pcs/pcs_mr.c | 55 +++++++++++++++++++++++++---------------
fs/fuse/kio/pcs/pcs_mr.h | 21 ++++++++++-----
2 files changed, 50 insertions(+), 26 deletions(-)
diff --git a/fs/fuse/kio/pcs/pcs_mr.c b/fs/fuse/kio/pcs/pcs_mr.c
index cbd3b440dd1b..8e1dbc1121ce 100644
--- a/fs/fuse/kio/pcs/pcs_mr.c
+++ b/fs/fuse/kio/pcs/pcs_mr.c
@@ -13,11 +13,18 @@
void pcs_umem_release(struct pcs_umem *umem)
{
struct mm_struct *mm_s = umem->mm;
+ int i, npages = umem->npages;
- unpin_user_pages(umem->pages, umem->npages);
+ for (i = 0; npages; i++) {
+ int to_free = min_t(int, PCS_PAGES_PER_CHUNK, npages);
+
+ unpin_user_pages(umem->page_chunk[i].pages, to_free);
+ kfree(umem->page_chunk[i].pages);
+ npages -= to_free;
+ }
atomic64_sub(umem->npages, &mm_s->pinned_vm);
mmdrop(mm_s);
- kfree(umem->pages);
+ kfree(umem->page_chunk);
kfree(umem);
}
@@ -27,14 +34,13 @@ void pcs_umem_release(struct pcs_umem *umem)
struct pcs_umem *pcs_umem_get(u64 start, u64 len)
{
struct pcs_umem *umem = NULL;
- struct page **pages;
- int npages;
- u64 fp_va;
struct mm_struct *mm_s;
- int got, ret;
+ u64 fp_va;
+ int npages, nchunks, i, ret;
fp_va = start & PAGE_MASK;
npages = PAGE_ALIGN(start + len - fp_va) >> PAGE_SHIFT;
+ nchunks = (npages >> PCS_PAGE_CHUNK_SHIFT) + 1;
This always adds 1, even when npages is perfectly divisible by PCS_PAGES_PER_CHUNK, leading to
over-allocation.
nchunks = DIV_ROUND_UP(npages, PCS_PAGES_PER_CHUNK);
?
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
@@ -48,25 +54,34 @@ struct pcs_umem *pcs_umem_get(u64 start, u64 len)
mmap_read_lock(mm_s);
umem->fp_addr = fp_va;
- umem->pages = kcalloc(npages, sizeof(struct page *), GFP_KERNEL);
- if (!umem->pages) {
+ umem->page_chunk = kcalloc(nchunks, sizeof(struct pcs_page_chunk *),
GFP_KERNEL);
^^^^^
umem->page_chunk = kcalloc(nchunks, sizeof(struct pcs_page_chunk), GFP_KERNEL);
?
+ if (!umem->page_chunk) {
ret = -ENOMEM;
goto out_err;
}
- got = 0;
- while (npages) {
- pages = &umem->pages[got];
- ret = pin_user_pages(fp_va, npages, FOLL_WRITE | FOLL_LONGTERM,
pages, NULL);
- if (ret < 0)
- goto out_err;
+ for (i = 0; npages; i++) {
+ int n = min_t(int, npages, PCS_PAGES_PER_CHUNK);
+ struct page **pages = kcalloc(n, sizeof(struct page *),
GFP_KERNEL);
- WARN_ON(ret == 0);
- umem->npages += ret;
- atomic64_add(ret, &mm_s->pinned_vm);
- fp_va += ret * PAGE_SIZE;
- npages -= ret;
- got += ret;
+ if (!pages) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ umem->page_chunk[i].pages = pages;
+
+ while (n) {
+ ret = pin_user_pages(fp_va, n, FOLL_WRITE |
FOLL_LONGTERM, pages, NULL);
+ if (ret < 0)
+ goto out_err;
+
+ atomic64_add(ret, &mm_s->pinned_vm);
+ umem->npages += ret;
+ fp_va += ret * PAGE_SIZE;
+ pages += ret;
+ n -= ret;
+ npages -= ret;
+ }
}
mmap_read_unlock(mm_s);
diff --git a/fs/fuse/kio/pcs/pcs_mr.h b/fs/fuse/kio/pcs/pcs_mr.h
index dae9931d9967..64f237f57dec 100644
--- a/fs/fuse/kio/pcs/pcs_mr.h
+++ b/fs/fuse/kio/pcs/pcs_mr.h
@@ -11,6 +11,8 @@
struct pcs_umem;
#define PCS_MAX_MR 0x10000
+#define PCS_PAGE_CHUNK_SHIFT 9
+#define PCS_PAGES_PER_CHUNK (1 << PCS_PAGE_CHUNK_SHIFT)
struct pcs_mr_set {
struct xarray mr_xa; /* array of registered MRs*/
@@ -18,10 +20,15 @@ struct pcs_mr_set {
atomic_t mr_num; /* number of registered MRs*/
};
+struct pcs_page_chunk
+{
+ struct page **pages; /* array of pinned pages */
+};
+
struct pcs_umem {
- u64 fp_addr; /* First page base address */
- int npages; /* number of pinned pages */
- struct page **pages; /* array of pinned pages */
+ struct pcs_page_chunk *page_chunk;
+ int npages; /* number of pinned pages */
+ u64 fp_addr; /* First page base address */
struct mm_struct *mm; /* mm the memory belongs to */
};
@@ -40,10 +47,12 @@ struct pcs_mr {
*/
static inline struct page *pcs_umem_page(struct pcs_umem *umem, u64 addr)
{
- unsigned int idx = (addr - umem->fp_addr) >> PAGE_SHIFT;
+ unsigned int page_idx = (addr - umem->fp_addr) >> PAGE_SHIFT;
+ unsigned int chunk_idx = page_idx >> PCS_PAGE_CHUNK_SHIFT;
+ unsigned int page_in_chunk = page_idx & (PCS_PAGES_PER_CHUNK - 1);
- if (likely(idx < umem->npages))
- return umem->pages[idx];
+ if (likely(page_idx < umem->npages))
+ return umem->page_chunk[chunk_idx].pages[page_in_chunk];
return NULL;
}
-- 2.39.5 (Apple Git-154)
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel