Hi David,
On 01/02/2020 00:32, David Woodhouse wrote:
From: David Woodhouse <d...@amazon.co.uk>
Bad pages are identified by get_platform_badpages() and with badpage=
on the command line.
The boot allocator currently automatically elides these from the regions
passed to it with init_boot_pages(). The xenheap is then initialised
with the pages which are still marked as free by the boot allocator when
end_boot_allocator() is called.
However, any memory above HYPERVISOR_VIRT_END is passed directly to
init_domheap_pages() later in __start_xen(), and the bad page list is
not consulted.
Fix this by marking those pages as PGC_broken in the frametable at the
time end_boot_allocator() runs, and then making init_heap_pages() skip
over any pages which are so marked.
Signed-off-by: David Woodhouse <d...@amazon.co.uk>
---
xen/common/page_alloc.c | 82 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 79 insertions(+), 3 deletions(-)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 919a270587..3cf478311b 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1758,6 +1758,18 @@ int query_page_offline(mfn_t mfn, uint32_t *status)
return 0;
}
+static unsigned long contig_avail_pages(struct page_info *pg, unsigned long max_pages)
+{
+ unsigned long i;
+
+ for ( i = 0 ; i < max_pages; i++)
+ {
+ if ( pg[i].count_info & PGC_broken )
+ break;
+ }
+ return i;
+}
+
/*
* Hand the specified arbitrary page range to the specified heap zone
* checking the node_id of the previous page. If they differ and the
@@ -1799,18 +1811,23 @@ static void init_heap_pages(
{
unsigned int nid = phys_to_nid(page_to_maddr(pg+i));
+ /* If the (first) page is already marked broken, don't add it. */
+ if ( pg[i].count_info & PGC_broken )
+ continue;
+
if ( unlikely(!avail[nid]) )
{
+ unsigned long contig_nr_pages = contig_avail_pages(pg + i,
nr_pages);
unsigned long s = mfn_x(page_to_mfn(pg + i));
- unsigned long e = mfn_x(mfn_add(page_to_mfn(pg + nr_pages - 1),
1));
+ unsigned long e = mfn_x(mfn_add(page_to_mfn(pg + i +
contig_nr_pages - 1), 1));
bool use_tail = (nid == phys_to_nid(pfn_to_paddr(e - 1))) &&
!(s & ((1UL << MAX_ORDER) - 1)) &&
(find_first_set_bit(e) <= find_first_set_bit(s));
unsigned long n;
- n = init_node_heap(nid, mfn_x(page_to_mfn(pg + i)), nr_pages - i,
+ n = init_node_heap(nid, mfn_x(page_to_mfn(pg + i)),
contig_nr_pages,
&use_tail);
- BUG_ON(i + n > nr_pages);
+ BUG_ON(n > contig_nr_pages);
if ( n && !use_tail )
{
i += n - 1;
@@ -1846,6 +1863,63 @@ static unsigned long avail_heap_pages(
return free_pages;
}
+static void mark_bad_pages(void)
+{
+ unsigned long bad_spfn, bad_epfn;
I would like to avoid adding more 'pfn' in the code. Per the current
terminology (see include/xen/mm.h), this would be bad_smfn, bad_emfn.
Ideally, this should also use mfn_t, but I can live without it for now.
+ const char *p;
+ struct page_info *pg;
+#ifdef CONFIG_X86
+ const struct platform_bad_page *badpage;
+ unsigned int i, j, array_size;
+
+ badpage = get_platform_badpages(&array_size);
+ if ( badpage )
+ {
+ for ( i = 0; i < array_size; i++ )
+ {
+ for ( j = 0; j < 1UL << badpage->order; j++ )
+ {
+ if ( mfn_valid(_mfn(badpage->mfn + j)) )
+ {
+ pg = mfn_to_page(_mfn(badpage->mfn + j));
+ pg->count_info |= PGC_broken;
+ page_list_add_tail(pg, &page_broken_list);
+ }
+ }
+ }
+ }
You are missing the PV shim part here.
But, AFAICT, the only difference with the implementation in
init_boot_pages() is how we treat the page.
Could you we introduce a common helper taking a callback? The callback
would be applied on every range range.
+#endif
+
+ /* Check new pages against the bad-page list. */
+ p = opt_badpage;
+ while ( *p != '\0' )
+ {
+ bad_spfn = simple_strtoul(p, &p, 0);
+ bad_epfn = bad_spfn;
+
+ if ( *p == '-' )
+ {
+ p++;
+ bad_epfn = simple_strtoul(p, &p, 0);
+ if ( bad_epfn < bad_spfn )
+ bad_epfn = bad_spfn;
+ }
+
+ if ( *p == ',' )
+ p++;
+ else if ( *p != '\0' )
+ break;
+
+ while ( mfn_valid(_mfn(bad_spfn)) && bad_spfn < bad_epfn )
+ {
+ pg = mfn_to_page(_mfn(bad_spfn));
+ pg->count_info |= PGC_broken;
+ page_list_add_tail(pg, &page_broken_list);
+ bad_spfn++;
+ }
+ }
+}
+
void __init end_boot_allocator(void)
{
unsigned int i;
@@ -1870,6 +1944,8 @@ void __init end_boot_allocator(void)
}
nr_bootmem_regions = 0;
+ mark_bad_pages();
+
if ( !dma_bitsize && (num_online_nodes() > 1) )
dma_bitsize = arch_get_dma_bitsize();
Cheers,
--
Julien Grall
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel