On Thu, Jun 4, 2026 at 7:58 PM Jinyu Tang <[email protected]> wrote:
>
> RISC-V KVM has used the hugetlb VMA size directly as the G-stage
> mapping size since stage-2 page table support was added. That is safe
> only if the block covered by the fault is fully contained in the
> memslot and the userspace address has the same offset as the GPA
> within that block.
>
> The THP path already checks those constraints before installing a PMD
> block mapping. The hugetlb path did not, so an unaligned memslot could
> make KVM install a PMD or PUD sized G-stage block that covers memory
> outside the slot or maps the wrong host pages.
>
> Pass the target mapping size into fault_supports_gstage_huge_mapping().
> The same helper can be used for both THP PMD mappings and hugetlb
> PMD/PUD mappings.
>
> Select hugetlb mapping sizes through the same memslot-boundary check,
> falling back from PUD to PMD to PAGE_SIZE. When a smaller hugetlb
> mapping size is selected, fault the GFN aligned to that selected size
> instead of the original VMA size.
>
> Also keep hugetlb mappings out of transparent_hugepage_adjust(). Once
> the hugetlb path has chosen PAGE_SIZE, promoting it again through the
> THP helper would miss the hugetlb fallback decision.
>
> Fixes: 9d05c1fee837 ("RISC-V: KVM: Implement stage2 page table programming")
> Signed-off-by: Jinyu Tang <[email protected]>
> Reviewed-by: Nutty Liu <[email protected]>

LGTM.

Reviewed-by: Anup Patel <[email protected]>

> ---
> v1 -> v2:
>   - Squash the helper parameterization into this hugetlb fix.
>   - Use the ALIGN()/ALIGN_DOWN() form suggested by Nutty Liu and Anup
>     for the memslot boundary check.
>
>  arch/riscv/kvm/mmu.c | 54 ++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 44 insertions(+), 10 deletions(-)
>
> diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
> index 2d3def024..0adf017a2 100644
> --- a/arch/riscv/kvm/mmu.c
> +++ b/arch/riscv/kvm/mmu.c
> @@ -286,7 +286,8 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct 
> kvm_gfn_range *range)
>  }
>
>  static bool fault_supports_gstage_huge_mapping(struct kvm_memory_slot 
> *memslot,
> -                                              unsigned long hva)
> +                                              unsigned long hva,
> +                                              unsigned long map_size)
>  {
>         hva_t uaddr_start, uaddr_end;
>         gpa_t gpa_start;
> @@ -300,8 +301,8 @@ static bool fault_supports_gstage_huge_mapping(struct 
> kvm_memory_slot *memslot,
>
>         /*
>          * Pages belonging to memslots that don't have the same alignment
> -        * within a PMD for userspace and GPA cannot be mapped with g-stage
> -        * PMD entries, because we'll end up mapping the wrong pages.
> +        * within a huge page for userspace and GPA cannot be mapped with
> +        * g-stage block entries, because we'll end up mapping the wrong 
> pages.
>          *
>          * Consider a layout like the following:
>          *
> @@ -321,7 +322,7 @@ static bool fault_supports_gstage_huge_mapping(struct 
> kvm_memory_slot *memslot,
>          *   e -> g
>          *   f -> h
>          */
> -       if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1)))
> +       if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
>                 return false;
>
>         /*
> @@ -336,7 +337,8 @@ static bool fault_supports_gstage_huge_mapping(struct 
> kvm_memory_slot *memslot,
>          * userspace_addr or the base_gfn, as both are equally aligned (per
>          * the check above) and equally sized.
>          */
> -       return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < 
> ALIGN_DOWN(uaddr_end, PMD_SIZE));
> +       return (hva >= ALIGN(uaddr_start, map_size)) &&
> +              (hva < ALIGN_DOWN(uaddr_end, map_size));
>  }
>
>  static int get_hva_mapping_size(struct kvm *kvm,
> @@ -404,7 +406,7 @@ static unsigned long transparent_hugepage_adjust(struct 
> kvm *kvm,
>          * sure that the HVA and GPA are sufficiently aligned and that the
>          * block map is contained within the memslot.
>          */
> -       if (fault_supports_gstage_huge_mapping(memslot, hva)) {
> +       if (fault_supports_gstage_huge_mapping(memslot, hva, PMD_SIZE)) {
>                 int sz;
>
>                 sz = get_hva_mapping_size(kvm, hva);
> @@ -421,12 +423,33 @@ static unsigned long transparent_hugepage_adjust(struct 
> kvm *kvm,
>         return PAGE_SIZE;
>  }
>
> +static unsigned long hugetlb_mapping_size(struct kvm_memory_slot *memslot,
> +                                         unsigned long hva,
> +                                         unsigned long map_size)
> +{
> +       switch (map_size) {
> +       case PUD_SIZE:

As pointed out by sashiko-bot, we need a "#ifndef CONFIG_32BIT" over here.
I have taken care at the time of merging this patch.

> +               if (fault_supports_gstage_huge_mapping(memslot, hva, 
> PUD_SIZE))
> +                       return PUD_SIZE;
> +               fallthrough;
> +       case PMD_SIZE:
> +               if (fault_supports_gstage_huge_mapping(memslot, hva, 
> PMD_SIZE))
> +                       return PMD_SIZE;
> +               fallthrough;
> +       case PAGE_SIZE:
> +               return PAGE_SIZE;
> +       default:
> +               return map_size;
> +       }
> +}
> +
>  int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
>                       gpa_t gpa, unsigned long hva, bool is_write,
>                       struct kvm_gstage_mapping *out_map)
>  {
>         int ret;
>         kvm_pfn_t hfn;
> +       bool is_hugetlb;
>         bool writable;
>         short vma_pageshift;
>         gfn_t gfn = gpa >> PAGE_SHIFT;
> @@ -460,16 +483,23 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct 
> kvm_memory_slot *memslot,
>                 return -EFAULT;
>         }
>
> -       if (is_vm_hugetlb_page(vma))
> +       is_hugetlb = is_vm_hugetlb_page(vma);
> +       if (is_hugetlb)
>                 vma_pageshift = huge_page_shift(hstate_vma(vma));
>         else
>                 vma_pageshift = PAGE_SHIFT;
>         vma_pagesize = 1ULL << vma_pageshift;
>         if (logging || (vma->vm_flags & VM_PFNMAP))
>                 vma_pagesize = PAGE_SIZE;
> +       else if (is_hugetlb)
> +               vma_pagesize = hugetlb_mapping_size(memslot, hva, 
> vma_pagesize);
>
> +       /*
> +        * For hugetlb mappings, vma_pagesize might have been reduced from the
> +        * VMA size to a smaller safe mapping size.
> +        */
>         if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE)
> -               gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
> +               gfn = ALIGN_DOWN(gpa, vma_pagesize) >> PAGE_SHIFT;
>
>         /*
>          * Read mmu_invalidate_seq so that KVM can detect if the results of
> @@ -511,8 +541,12 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct 
> kvm_memory_slot *memslot,
>         if (mmu_invalidate_retry(kvm, mmu_seq))
>                 goto out_unlock;
>
> -       /* Check if we are backed by a THP and thus use block mapping if 
> possible */
> -       if (!logging && (vma_pagesize == PAGE_SIZE))
> +       /*
> +        * Check if we are backed by a THP and thus use block mapping if
> +        * possible. Hugetlb mappings already selected their target size 
> above,
> +        * so do not promote them through the THP helper.
> +        */
> +       if (!logging && !is_hugetlb && vma_pagesize == PAGE_SIZE)
>                 vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, 
> &hfn, &gpa);
>
>         if (writable) {
> --
> 2.43.0
>

Queued this patch for Linux-7.2

Thanks,
Anup

Reply via email to