On Tue, 2020-10-20 at 09:18 +0300, Kirill A. Shutemov wrote:
> We cannot access protected pages directly. Use ioremap() to
> create a temporary mapping of the page. The mapping is destroyed
> on __kvm_unmap_gfn().
> 
> The new interface gfn_to_pfn_memslot_protected() is used to detect if
> the page is protected.
> 
> ioremap_cache_force() is a hack to bypass IORES_MAP_SYSTEM_RAM check
> in
> the x86 ioremap code. We need a better solution.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shute...@linux.intel.com>
> ---
>  arch/powerpc/kvm/book3s_64_mmu_hv.c    |  2 +-
>  arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
>  arch/x86/include/asm/io.h              |  2 +
>  arch/x86/include/asm/pgtable_types.h   |  1 +
>  arch/x86/kvm/mmu/mmu.c                 |  6 ++-
>  arch/x86/mm/ioremap.c                  | 16 ++++++--
>  include/linux/kvm_host.h               |  3 +-
>  include/linux/kvm_types.h              |  1 +
>  virt/kvm/kvm_main.c                    | 52 +++++++++++++++++++-----
> --
>  9 files changed, 63 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 38ea396a23d6..8e06cd3f759c 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -590,7 +590,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu
> *vcpu,
>       } else {
>               /* Call KVM generic code to do the slow-path check */
>               pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
> -                                        writing, &write_ok);
> +                                        writing, &write_ok, NULL);
>               if (is_error_noslot_pfn(pfn))
>                       return -EFAULT;
>               page = NULL;
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index 22a677b18695..6fd4e3f9b66a 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct
> kvm_vcpu *vcpu,
>  
>               /* Call KVM generic code to do the slow-path check */
>               pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
> -                                        writing, upgrade_p);
> +                                        writing, upgrade_p, NULL);
>               if (is_error_noslot_pfn(pfn))
>                       return -EFAULT;
>               page = NULL;
> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
> index c58d52fd7bf2..a3e1bfad1026 100644
> --- a/arch/x86/include/asm/io.h
> +++ b/arch/x86/include/asm/io.h
> @@ -184,6 +184,8 @@ extern void __iomem *ioremap_uc(resource_size_t
> offset, unsigned long size);
>  #define ioremap_uc ioremap_uc
>  extern void __iomem *ioremap_cache(resource_size_t offset, unsigned
> long size);
>  #define ioremap_cache ioremap_cache
> +extern void __iomem *ioremap_cache_force(resource_size_t offset,
> unsigned long size);
> +#define ioremap_cache_force ioremap_cache_force
>  extern void __iomem *ioremap_prot(resource_size_t offset, unsigned
> long size, unsigned long prot_val);
>  #define ioremap_prot ioremap_prot
>  extern void __iomem *ioremap_encrypted(resource_size_t phys_addr,
> unsigned long size);
> diff --git a/arch/x86/include/asm/pgtable_types.h
> b/arch/x86/include/asm/pgtable_types.h
> index 816b31c68550..4c16a9583786 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -147,6 +147,7 @@ enum page_cache_mode {
>       _PAGE_CACHE_MODE_UC       = 3,
>       _PAGE_CACHE_MODE_WT       = 4,
>       _PAGE_CACHE_MODE_WP       = 5,
> +     _PAGE_CACHE_MODE_WB_FORCE = 6,
>  
>       _PAGE_CACHE_MODE_NUM      = 8
>  };
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 71aa3da2a0b7..162cb285b87b 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4058,7 +4058,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu,
> bool prefault, gfn_t gfn,
>       }
>  
>       async = false;
> -     *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write,
> writable);
> +     *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write,
> writable,
> +                                 NULL);
>       if (!async)
>               return false; /* *pfn has correct page already */
>  
> @@ -4072,7 +4073,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu,
> bool prefault, gfn_t gfn,
>                       return true;
>       }
>  
> -     *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write,
> writable);
> +     *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write,
> writable,
> +                                 NULL);
>       return false;
>  }
>  
> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
> index 9e5ccc56f8e0..4409785e294c 100644
> --- a/arch/x86/mm/ioremap.c
> +++ b/arch/x86/mm/ioremap.c
> @@ -202,9 +202,12 @@ __ioremap_caller(resource_size_t phys_addr,
> unsigned long size,
>       __ioremap_check_mem(phys_addr, size, &io_desc);
>  
>       /*
> -      * Don't allow anybody to remap normal RAM that we're using..
> +      * Don't allow anybody to remap normal RAM that we're using,
> unless
> +      * _PAGE_CACHE_MODE_WB_FORCE is used.
>        */
> -     if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
> +     if (pcm == _PAGE_CACHE_MODE_WB_FORCE) {
> +         pcm = _PAGE_CACHE_MODE_WB;
> +     } else if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
>               WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
>                         &phys_addr, &last_addr);
>               return NULL;
> @@ -419,6 +422,13 @@ void __iomem *ioremap_cache(resource_size_t
> phys_addr, unsigned long size)
>  }
>  EXPORT_SYMBOL(ioremap_cache);
>  
> +void __iomem *ioremap_cache_force(resource_size_t phys_addr,
> unsigned long size)
> +{
> +     return __ioremap_caller(phys_addr, size,
> _PAGE_CACHE_MODE_WB_FORCE,
> +                             __builtin_return_address(0), false);
> +}
> +EXPORT_SYMBOL(ioremap_cache_force);
> +
>  void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long
> size,
>                               unsigned long prot_val)
>  {
> @@ -467,7 +477,7 @@ void iounmap(volatile void __iomem *addr)
>       p = find_vm_area((void __force *)addr);
>  
>       if (!p) {
> -             printk(KERN_ERR "iounmap: bad address %p\n", addr);
> +             printk(KERN_ERR "iounmap: bad address %px\n", addr);

Unintentional?

>               dump_stack();
>               return;
>       }
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6655e8da4555..0d5f3885747b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -238,6 +238,7 @@ struct kvm_host_map {
>       void *hva;
>       kvm_pfn_t pfn;
>       kvm_pfn_t gfn;
> +     bool protected;
>  };
>  
>  /*
> @@ -725,7 +726,7 @@ kvm_pfn_t gfn_to_pfn_memslot(struct
> kvm_memory_slot *slot, gfn_t gfn);
>  kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot,
> gfn_t gfn);
>  kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t
> gfn,
>                              bool atomic, bool *async, bool
> write_fault,
> -                            bool *writable);
> +                            bool *writable, bool *protected);
>  
>  void kvm_release_pfn_clean(kvm_pfn_t pfn);
>  void kvm_release_pfn_dirty(kvm_pfn_t pfn);
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index a7580f69dda0..0a8c6426b4f4 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -58,6 +58,7 @@ struct gfn_to_pfn_cache {
>       gfn_t gfn;
>       kvm_pfn_t pfn;
>       bool dirty;
> +     bool protected;
>  };
>  
>  #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 9b569b78874a..7c2c764c28c5 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1852,9 +1852,10 @@ static bool hva_to_pfn_fast(unsigned long
> addr, bool write_fault,
>   * 1 indicates success, -errno is returned if error is detected.
>   */
>  static int hva_to_pfn_slow(unsigned long addr, bool *async, bool
> write_fault,
> -                        bool *writable, kvm_pfn_t *pfn)
> +                        bool *writable, bool *protected, kvm_pfn_t
> *pfn)
>  {
>       unsigned int flags = FOLL_HWPOISON | FOLL_KVM;
> +     struct vm_area_struct *vma;
>       struct page *page;
>       int npages = 0;
>  
> @@ -1868,9 +1869,15 @@ static int hva_to_pfn_slow(unsigned long addr,
> bool *async, bool write_fault,
>       if (async)
>               flags |= FOLL_NOWAIT;
>  
> -     npages = get_user_pages_unlocked(addr, 1, &page, flags);
> -     if (npages != 1)
> +     mmap_read_lock(current->mm);
> +     npages = get_user_pages(addr, 1, flags, &page, &vma);
> +     if (npages != 1) {
> +             mmap_read_unlock(current->mm);
>               return npages;
> +     }
> +     if (protected)
> +             *protected = vma_is_kvm_protected(vma);
> +     mmap_read_unlock(current->mm);
>  
>       /* map read fault as writable if possible */
>       if (unlikely(!write_fault) && writable) {
> @@ -1961,7 +1968,7 @@ static int hva_to_pfn_remapped(struct
> vm_area_struct *vma,
>   *     whether the mapping is writable.
>   */
>  static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool
> *async,
> -                     bool write_fault, bool *writable)
> +                     bool write_fault, bool *writable, bool
> *protected)
>  {
>       struct vm_area_struct *vma;
>       kvm_pfn_t pfn = 0;
> @@ -1976,7 +1983,8 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr,
> bool atomic, bool *async,
>       if (atomic)
>               return KVM_PFN_ERR_FAULT;
>  
> -     npages = hva_to_pfn_slow(addr, async, write_fault, writable,
> &pfn);
> +     npages = hva_to_pfn_slow(addr, async, write_fault, writable,
> protected,
> +                              &pfn);
>       if (npages == 1)
>               return pfn;
>  
> @@ -2010,7 +2018,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr,
> bool atomic, bool *async,
>  
>  kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t
> gfn,
>                              bool atomic, bool *async, bool
> write_fault,
> -                            bool *writable)
> +                            bool *writable, bool *protected)
>  {
>       unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL,
> write_fault);
>  
> @@ -2033,7 +2041,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct
> kvm_memory_slot *slot, gfn_t gfn,
>       }
>  
>       return hva_to_pfn(addr, atomic, async, write_fault,
> -                       writable);
> +                       writable, protected);
>  }
>  EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
>  
> @@ -2041,19 +2049,26 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm,
> gfn_t gfn, bool write_fault,
>                     bool *writable)
>  {
>       return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn,
> false, NULL,
> -                                 write_fault, writable);
> +                                 write_fault, writable, NULL);
>  }
>  EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
>  
>  kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t
> gfn)
>  {
> -     return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true,
> NULL);
> +     return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL,
> NULL);
>  }
>  EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
>  
> +static kvm_pfn_t gfn_to_pfn_memslot_protected(struct kvm_memory_slot
> *slot,
> +                                           gfn_t gfn, bool
> *protected)
> +{
> +     return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL,
> +                                 protected);
> +}
> +
>  kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot,
> gfn_t gfn)
>  {
> -     return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
> +     return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL,
> NULL);
>  }
>  EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
>  
> @@ -2134,7 +2149,7 @@ static void kvm_cache_gfn_to_pfn(struct
> kvm_memory_slot *slot, gfn_t gfn,
>  {
>       kvm_release_pfn(cache->pfn, cache->dirty, cache);
>  
> -     cache->pfn = gfn_to_pfn_memslot(slot, gfn);
> +     cache->pfn = gfn_to_pfn_memslot_protected(slot, gfn, &cache-
> >protected);
>       cache->gfn = gfn;
>       cache->dirty = false;
>       cache->generation = gen;
> @@ -2149,6 +2164,7 @@ static int __kvm_map_gfn(struct kvm_memslots
> *slots, gfn_t gfn,
>       void *hva = NULL;
>       struct page *page = KVM_UNMAPPED_PAGE;
>       struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
> +     bool protected;
>       u64 gen = slots->generation;
>  
>       if (!map)
> @@ -2162,15 +2178,20 @@ static int __kvm_map_gfn(struct kvm_memslots
> *slots, gfn_t gfn,
>                       kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
>               }
>               pfn = cache->pfn;
> +             protected = cache->protected;
>       } else {
>               if (atomic)
>                       return -EAGAIN;
> -             pfn = gfn_to_pfn_memslot(slot, gfn);
> +             pfn = gfn_to_pfn_memslot_protected(slot, gfn,
> &protected);
>       }
>       if (is_error_noslot_pfn(pfn))
>               return -EINVAL;
>  
> -     if (pfn_valid(pfn)) {
> +     if (protected) {
> +             if (atomic)
> +                     return -EAGAIN;
> +             hva = ioremap_cache_force(pfn_to_hpa(pfn), PAGE_SIZE);
> +     } else if (pfn_valid(pfn)) {
>               page = pfn_to_page(pfn);
>               if (atomic)
>                       hva = kmap_atomic(page);

I think the page could have got unmapped since the gup via the
hypercall on another CPU. It could be an avenue for the guest to crash
the host.

> @@ -2191,6 +2212,7 @@ static int __kvm_map_gfn(struct kvm_memslots
> *slots, gfn_t gfn,
>       map->hva = hva;
>       map->pfn = pfn;
>       map->gfn = gfn;
> +     map->protected = protected;
>  
>       return 0;
>  }
> @@ -2221,7 +2243,9 @@ static void __kvm_unmap_gfn(struct
> kvm_memory_slot *memslot,
>       if (!map->hva)
>               return;
>  
> -     if (map->page != KVM_UNMAPPED_PAGE) {
> +     if (map->protected) {
> +             iounmap(map->hva);
> +     } else if (map->page != KVM_UNMAPPED_PAGE) {
>               if (atomic)
>                       kunmap_atomic(map->hva);
>               else

Reply via email to