On 31.07.2025 17:58, Oleksii Kurochko wrote:
> --- a/xen/arch/riscv/include/asm/p2m.h
> +++ b/xen/arch/riscv/include/asm/p2m.h
> @@ -79,10 +79,20 @@ typedef enum {
>      p2m_ext_storage,    /* Following types'll be stored outsude PTE bits: */
>      p2m_grant_map_rw,   /* Read/write grant mapping */
>      p2m_grant_map_ro,   /* Read-only grant mapping */
> +    p2m_map_foreign_rw, /* Read/write RAM pages from foreign domain */
> +    p2m_map_foreign_ro, /* Read-only RAM pages from foreign domain */
>  } p2m_type_t;
>  
>  #define p2m_mmio_direct p2m_mmio_direct_io
>  
> +/*
> + * Bits 8 and 9 are reserved for use by supervisor software;
> + * the implementation shall ignore this field.
> + * We are going to use to save in these bits frequently used types to avoid
> + * get/set of a type from radix tree.
> + */
> +#define P2M_TYPE_PTE_BITS_MASK  0x300
> +
>  /* We use bitmaps and mask to handle groups of types */
>  #define p2m_to_mask(t_) BIT(t_, UL)
>  
> @@ -93,10 +103,16 @@ typedef enum {
>  #define P2M_GRANT_TYPES (p2m_to_mask(p2m_grant_map_rw) | \
>                           p2m_to_mask(p2m_grant_map_ro))
>  
> +                            /* Foreign mappings types */

Nit: Why so far to the right?

> --- a/xen/arch/riscv/p2m.c
> +++ b/xen/arch/riscv/p2m.c
> @@ -197,6 +197,16 @@ static pte_t *p2m_get_root_pointer(struct p2m_domain 
> *p2m, gfn_t gfn)
>      return __map_domain_page(p2m->root + root_table_indx);
>  }
>  
> +static p2m_type_t p2m_get_type(const pte_t pte)
> +{
> +    p2m_type_t type = MASK_EXTR(pte.pte, P2M_TYPE_PTE_BITS_MASK);
> +
> +    if ( type == p2m_ext_storage )
> +        panic("unimplemented\n");

That is, as per p2m.h additions you pretend to add support for foreign types
here, but then you don't?

> @@ -248,11 +258,136 @@ static int p2m_next_level(struct p2m_domain *p2m, bool 
> alloc_tbl,
>      return P2M_TABLE_MAP_NONE;
>  }
>  
> +static void p2m_put_foreign_page(struct page_info *pg)
> +{
> +    /*
> +     * It’s safe to call put_page() here because arch_flush_tlb_mask()
> +     * will be invoked if the page is reallocated before the end of
> +     * this loop, which will trigger a flush of the guest TLBs.
> +     */
> +    put_page(pg);
> +}

How can one know the comment is true? arch_flush_tlb_mask() still lives in
stubs.c, and hence what it is eventually going to do (something like Arm's
vs more like x86'es) is entirely unknown right now.

> +/* Put any references on the single 4K page referenced by mfn. */
> +static void p2m_put_4k_page(mfn_t mfn, p2m_type_t type)
> +{
> +    /* TODO: Handle other p2m types */
> +
> +    if ( p2m_is_foreign(type) )
> +    {
> +        ASSERT(mfn_valid(mfn));
> +        p2m_put_foreign_page(mfn_to_page(mfn));
> +    }
> +
> +    /*
> +     * Detect the xenheap page and mark the stored GFN as invalid.
> +     * We don't free the underlying page until the guest requested to do so.
> +     * So we only need to tell the page is not mapped anymore in the P2M by
> +     * marking the stored GFN as invalid.
> +     */
> +    if ( p2m_is_ram(type) && is_xen_heap_mfn(mfn) )
> +        page_set_xenheap_gfn(mfn_to_page(mfn), INVALID_GFN);

Isn't this for grants? p2m_is_ram() doesn't cover p2m_grant_map_*.

> +}
> +
> +/* Put any references on the superpage referenced by mfn. */
> +static void p2m_put_2m_superpage(mfn_t mfn, p2m_type_t type)
> +{
> +    struct page_info *pg;
> +    unsigned int i;
> +
> +    ASSERT(mfn_valid(mfn));
> +
> +    pg = mfn_to_page(mfn);
> +
> +    for ( i = 0; i < XEN_PT_ENTRIES; i++, pg++ )
> +        p2m_put_foreign_page(pg);
> +}

In p2m_put_4k_page() you check the type, whereas here you don't.

> +/* Put any references on the page referenced by pte. */
> +static void p2m_put_page(const pte_t pte, unsigned int level)
> +{
> +    mfn_t mfn = pte_get_mfn(pte);
> +    p2m_type_t p2m_type = p2m_get_type(pte);
> +
> +    ASSERT(pte_is_valid(pte));
> +
> +    /*
> +     * TODO: Currently we don't handle level 2 super-page, Xen is not
> +     * preemptible and therefore some work is needed to handle such
> +     * superpages, for which at some point Xen might end up freeing memory
> +     * and therefore for such a big mapping it could end up in a very long
> +     * operation.
> +     */
> +    switch ( level )
> +    {
> +    case 1:
> +        return p2m_put_2m_superpage(mfn, p2m_type);
> +
> +    case 0:
> +        return p2m_put_4k_page(mfn, p2m_type);
> +    }

Yet despite the comment not even an assertion for level 2 and up?

>  /* Free pte sub-tree behind an entry */
>  static void p2m_free_subtree(struct p2m_domain *p2m,
>                               pte_t entry, unsigned int level)
>  {
> -    panic("%s: hasn't been implemented yet\n", __func__);
> +    unsigned int i;
> +    pte_t *table;
> +    mfn_t mfn;
> +    struct page_info *pg;
> +
> +    /* Nothing to do if the entry is invalid. */
> +    if ( !pte_is_valid(entry) )
> +        return;
> +
> +    if ( pte_is_superpage(entry, level) || (level == 0) )

Perhaps swap the two conditions around?

> +    {
> +#ifdef CONFIG_IOREQ_SERVER
> +        /*
> +         * If this gets called then either the entry was replaced by an entry
> +         * with a different base (valid case) or the shattering of a 
> superpage
> +         * has failed (error case).
> +         * So, at worst, the spurious mapcache invalidation might be sent.
> +         */
> +        if ( p2m_is_ram(p2m_get_type(p2m, entry)) &&
> +             domain_has_ioreq_server(p2m->domain) )
> +            ioreq_request_mapcache_invalidate(p2m->domain);
> +#endif
> +
> +        p2m_put_page(entry, level);
> +
> +        return;
> +    }
> +
> +    table = map_domain_page(pte_get_mfn(entry));
> +    for ( i = 0; i < XEN_PT_ENTRIES; i++ )
> +        p2m_free_subtree(p2m, table[i], level - 1);

In p2m_put_page() you comment towards concerns for level >= 2; no similar
concerns for the resulting recursion here?

> +    unmap_domain_page(table);
> +
> +    /*
> +     * Make sure all the references in the TLB have been removed before
> +     * freing the intermediate page table.
> +     * XXX: Should we defer the free of the page table to avoid the
> +     * flush?
> +     */
> +    p2m_tlb_flush_sync(p2m);
> +
> +    mfn = pte_get_mfn(entry);
> +    ASSERT(mfn_valid(mfn));
> +
> +    pg = mfn_to_page(mfn);
> +
> +    page_list_del(pg, &p2m->pages);
> +    p2m_free_page(p2m, pg);

Once again I wonder whether this code path was actually tested: p2m_free_page()
also invokes page_list_del(), and double deletions typically won't end very
well.

Jan

Reply via email to