Add three new mTHP statistics to track collapse failures for different
orders when encountering swap PTEs, excessive none PTEs, and shared PTEs:
- collapse_exceed_swap_pte: Increment when mTHP collapse fails due to
encountering a swap PTE.
- collapse_exceed_none_pte: Counts when mTHP collapse fails due to
exceeding the none PTE threshold for the given order
- collapse_exceed_shared_pte: Counts when mTHP collapse fails due to
encountering a shared PTE.
These statistics complement the existing THP_SCAN_EXCEED_* events by
providing per-order granularity for mTHP collapse attempts. The stats are
exposed via sysfs under
`/sys/kernel/mm/transparent_hugepage/hugepages-*/stats/` for each
supported hugepage size.
As we currently do not support collapsing mTHPs that contain a swap or
shared entry, those statistics keep track of how often we are
encountering failed mTHP collapses due to these restrictions.
We will add support for mTHP collapse for anonymous pages next; lets also
track when this happens at the PMD level within the per-mTHP stats.
Reviewed-by: Lorenzo Stoakes <[email protected]>
Acked-by: David Hildenbrand (Arm) <[email protected]>
Signed-off-by: Nico Pache <[email protected]>
---
Documentation/admin-guide/mm/transhuge.rst | 14 ++++++++++++++
include/linux/huge_mm.h | 3 +++
mm/huge_memory.c | 7 +++++++
mm/khugepaged.c | 15 +++++++++++++--
4 files changed, 37 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/mm/transhuge.rst
b/Documentation/admin-guide/mm/transhuge.rst
index a74844e01f1e..b98e18c80185 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -714,6 +714,20 @@ nr_anon_partially_mapped
an anonymous THP as "partially mapped" and count it here, even though it
is not actually partially mapped anymore.
+collapse_exceed_none_pte
+ The number of collapse attempts that failed due to exceeding the
+ max_ptes_none threshold.
+
+collapse_exceed_swap_pte
+ The number of collapse attempts that failed due to exceeding the
+ max_ptes_swap threshold. For non-PMD orders this occurs if a mTHP range
+ contains at least one swap PTE.
+
+collapse_exceed_shared_pte
+ The number of collapse attempts that failed due to exceeding the
+ max_ptes_shared threshold. For non-PMD orders this occurs if a mTHP
range
+ contains at least one shared PTE.
+
As the system ages, allocating huge pages may be expensive as the
system uses memory compaction to copy data around memory to free a
huge page for use. There are some counters in ``/proc/vmstat`` to help
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 443852423790..148109ebd08a 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -144,6 +144,9 @@ enum mthp_stat_item {
MTHP_STAT_SPLIT_DEFERRED,
MTHP_STAT_NR_ANON,
MTHP_STAT_NR_ANON_PARTIALLY_MAPPED,
+ MTHP_STAT_COLLAPSE_EXCEED_SWAP,
+ MTHP_STAT_COLLAPSE_EXCEED_NONE,
+ MTHP_STAT_COLLAPSE_EXCEED_SHARED,
__MTHP_STAT_COUNT
};
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index eea83da9114a..222e421d9e8e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -717,6 +717,10 @@ DEFINE_MTHP_STAT_ATTR(split_failed,
MTHP_STAT_SPLIT_FAILED);
DEFINE_MTHP_STAT_ATTR(split_deferred, MTHP_STAT_SPLIT_DEFERRED);
DEFINE_MTHP_STAT_ATTR(nr_anon, MTHP_STAT_NR_ANON);
DEFINE_MTHP_STAT_ATTR(nr_anon_partially_mapped,
MTHP_STAT_NR_ANON_PARTIALLY_MAPPED);
+DEFINE_MTHP_STAT_ATTR(collapse_exceed_swap_pte,
MTHP_STAT_COLLAPSE_EXCEED_SWAP);
+DEFINE_MTHP_STAT_ATTR(collapse_exceed_none_pte,
MTHP_STAT_COLLAPSE_EXCEED_NONE);
+DEFINE_MTHP_STAT_ATTR(collapse_exceed_shared_pte,
MTHP_STAT_COLLAPSE_EXCEED_SHARED);
+
static struct attribute *anon_stats_attrs[] = {
&anon_fault_alloc_attr.attr,
@@ -733,6 +737,9 @@ static struct attribute *anon_stats_attrs[] = {
&split_deferred_attr.attr,
&nr_anon_attr.attr,
&nr_anon_partially_mapped_attr.attr,
+ &collapse_exceed_swap_pte_attr.attr,
+ &collapse_exceed_none_pte_attr.attr,
+ &collapse_exceed_shared_pte_attr.attr,
NULL,
};
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 191e529c185c..ac4731addafa 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -651,7 +651,9 @@ static enum scan_result __collapse_huge_page_isolate(struct
vm_area_struct *vma,
if (pte_none_or_zero(pteval)) {
if (++none_or_zero > max_ptes_none) {
result = SCAN_EXCEED_NONE_PTE;
- count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
+ if (is_pmd_order(order))
+
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
+ count_mthp_stat(order,
MTHP_STAT_COLLAPSE_EXCEED_NONE);
goto out;
}
continue;
@@ -693,7 +695,9 @@ static enum scan_result __collapse_huge_page_isolate(struct
vm_area_struct *vma,
*/
if (++shared > max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
- count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
+ if (is_pmd_order(order))
+
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
+ count_mthp_stat(order,
MTHP_STAT_COLLAPSE_EXCEED_SHARED);
goto out;
}
}
@@ -1152,6 +1156,7 @@ static enum scan_result
__collapse_huge_page_swapin(struct mm_struct *mm,
* range.
*/
if (!is_pmd_order(order)) {
+ count_mthp_stat(order, MTHP_STAT_COLLAPSE_EXCEED_SWAP);
pte_unmap(pte);
mmap_read_unlock(mm);
result = SCAN_EXCEED_SWAP_PTE;
@@ -1459,6 +1464,8 @@ static enum scan_result collapse_scan_pmd(struct
mm_struct *mm,
if (++none_or_zero > max_ptes_none) {
result = SCAN_EXCEED_NONE_PTE;
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
+ count_mthp_stat(HPAGE_PMD_ORDER,
+ MTHP_STAT_COLLAPSE_EXCEED_NONE);
goto out_unmap;
}
continue;
@@ -1467,6 +1474,8 @@ static enum scan_result collapse_scan_pmd(struct
mm_struct *mm,
if (++unmapped > max_ptes_swap) {
result = SCAN_EXCEED_SWAP_PTE;
count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
+ count_mthp_stat(HPAGE_PMD_ORDER,
+ MTHP_STAT_COLLAPSE_EXCEED_SWAP);
goto out_unmap;
}
/*
@@ -1524,6 +1533,8 @@ static enum scan_result collapse_scan_pmd(struct
mm_struct *mm,
if (++shared > max_ptes_shared) {
result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
+ count_mthp_stat(HPAGE_PMD_ORDER,
+
MTHP_STAT_COLLAPSE_EXCEED_SHARED);
goto out_unmap;
}
}
--
2.54.0