On Wed, Feb 10, 2021 at 05:48:26PM +0100, Jan Beulich wrote:
> I did further consider not allocating any real page at all, but just
> using the address of some unpopulated space (which would require
> announcing this page as reserved to Dom0, so it wouldn't put any PCI
> MMIO BARs there). But I thought this would be too controversial, because
> of the possible risks associated with this.

No, Xen is not capable of allocating a suitable unpopulated page IMO,
so let's not go down that route. Wasting one RAM page seems perfectly
fine to me.

> Perhaps the change to p2m_get_iommu_flags() should be in a separate
> patch.

Maybe, I'm still not fully convinced that's a change we want to do
TBH.

> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1007,6 +1007,8 @@ int arch_domain_soft_reset(struct domain
>  
>  void arch_domain_creation_finished(struct domain *d)
>  {
> +    if ( is_hvm_domain(d) )
> +        hvm_domain_creation_finished(d);
>  }
>  
>  #define xen_vcpu_guest_context vcpu_guest_context
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -66,8 +66,7 @@ boolean_param("force-ept", opt_force_ept
>  static void vmx_ctxt_switch_from(struct vcpu *v);
>  static void vmx_ctxt_switch_to(struct vcpu *v);
>  
> -static int  vmx_alloc_vlapic_mapping(struct domain *d);
> -static void vmx_free_vlapic_mapping(struct domain *d);
> +static int alloc_vlapic_mapping(void);
>  static void vmx_install_vlapic_mapping(struct vcpu *v);
>  static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
>                                  unsigned int flags);
> @@ -78,6 +77,8 @@ static int vmx_msr_read_intercept(unsign
>  static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
>  static void vmx_invlpg(struct vcpu *v, unsigned long linear);
>  
> +static mfn_t __read_mostly apic_access_mfn;
> +
>  /* Values for domain's ->arch.hvm_domain.pi_ops.flags. */
>  #define PI_CSW_FROM (1u << 0)
>  #define PI_CSW_TO   (1u << 1)
> @@ -401,7 +402,6 @@ static int vmx_domain_initialise(struct
>          .to   = vmx_ctxt_switch_to,
>          .tail = vmx_do_resume,
>      };
> -    int rc;
>  
>      d->arch.ctxt_switch = &csw;
>  
> @@ -411,21 +411,16 @@ static int vmx_domain_initialise(struct
>       */
>      d->arch.hvm.vmx.exec_sp = is_hardware_domain(d) || opt_ept_exec_sp;
>  
> -    if ( !has_vlapic(d) )
> -        return 0;
> -
> -    if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
> -        return rc;
> -
>      return 0;
>  }
>  
> -static void vmx_domain_relinquish_resources(struct domain *d)
> +static void domain_creation_finished(struct domain *d)
>  {
> -    if ( !has_vlapic(d) )
> -        return;
>  
> -    vmx_free_vlapic_mapping(d);
> +    if ( !mfn_eq(apic_access_mfn, _mfn(0)) &&
> +         set_mmio_p2m_entry(d, gaddr_to_gfn(APIC_DEFAULT_PHYS_BASE),
> +                            apic_access_mfn, PAGE_ORDER_4K) )
> +        domain_crash(d);
>  }
>  
>  static void vmx_init_ipt(struct vcpu *v)
> @@ -2407,7 +2402,7 @@ static struct hvm_function_table __initd
>      .cpu_up_prepare       = vmx_cpu_up_prepare,
>      .cpu_dead             = vmx_cpu_dead,
>      .domain_initialise    = vmx_domain_initialise,
> -    .domain_relinquish_resources = vmx_domain_relinquish_resources,
> +    .domain_creation_finished = domain_creation_finished,
>      .vcpu_initialise      = vmx_vcpu_initialise,
>      .vcpu_destroy         = vmx_vcpu_destroy,
>      .save_cpu_ctxt        = vmx_save_vmcs_ctxt,
> @@ -2653,7 +2648,7 @@ const struct hvm_function_table * __init
>  {
>      set_in_cr4(X86_CR4_VMXE);
>  
> -    if ( vmx_vmcs_init() )
> +    if ( vmx_vmcs_init() || alloc_vlapic_mapping() )
>      {
>          printk("VMX: failed to initialise.\n");
>          return NULL;
> @@ -3208,7 +3203,7 @@ gp_fault:
>      return X86EMUL_EXCEPTION;
>  }
>  
> -static int vmx_alloc_vlapic_mapping(struct domain *d)
> +static int __init alloc_vlapic_mapping(void)
>  {
>      struct page_info *pg;
>      mfn_t mfn;
> @@ -3216,53 +3211,28 @@ static int vmx_alloc_vlapic_mapping(stru
>      if ( !cpu_has_vmx_virtualize_apic_accesses )
>          return 0;
>  
> -    pg = alloc_domheap_page(d, MEMF_no_refcount);
> +    pg = alloc_domheap_page(NULL, 0);
>      if ( !pg )
>          return -ENOMEM;
>  
> -    if ( !get_page_and_type(pg, d, PGT_writable_page) )
> -    {
> -        /*
> -         * The domain can't possibly know about this page yet, so failure
> -         * here is a clear indication of something fishy going on.
> -         */
> -        domain_crash(d);
> -        return -ENODATA;
> -    }
> -
>      mfn = page_to_mfn(pg);
>      clear_domain_page(mfn);
> -    d->arch.hvm.vmx.apic_access_mfn = mfn;
> +    apic_access_mfn = mfn;
>  
> -    return set_mmio_p2m_entry(d, gaddr_to_gfn(APIC_DEFAULT_PHYS_BASE), mfn,
> -                              PAGE_ORDER_4K);
> -}
> -
> -static void vmx_free_vlapic_mapping(struct domain *d)
> -{
> -    mfn_t mfn = d->arch.hvm.vmx.apic_access_mfn;
> -
> -    d->arch.hvm.vmx.apic_access_mfn = _mfn(0);
> -    if ( !mfn_eq(mfn, _mfn(0)) )
> -    {
> -        struct page_info *pg = mfn_to_page(mfn);
> -
> -        put_page_alloc_ref(pg);
> -        put_page_and_type(pg);
> -    }
> +    return 0;
>  }
>  
>  static void vmx_install_vlapic_mapping(struct vcpu *v)
>  {
>      paddr_t virt_page_ma, apic_page_ma;
>  
> -    if ( mfn_eq(v->domain->arch.hvm.vmx.apic_access_mfn, _mfn(0)) )
> +    if ( mfn_eq(apic_access_mfn, _mfn(0)) )

You should check if the domain has a vlapic I think, since now
apic_access_mfn is global and will be set regardless of whether the
domain has vlapic enabled or not.

Previously apic_access_mfn was only allocated if the domain had vlapic
enabled.

>          return;
>  
>      ASSERT(cpu_has_vmx_virtualize_apic_accesses);
>  
>      virt_page_ma = page_to_maddr(vcpu_vlapic(v)->regs_page);
> -    apic_page_ma = mfn_to_maddr(v->domain->arch.hvm.vmx.apic_access_mfn);
> +    apic_page_ma = mfn_to_maddr(apic_access_mfn);
>  
>      vmx_vmcs_enter(v);
>      __vmwrite(VIRTUAL_APIC_PAGE_ADDR, virt_page_ma);

I would consider setting up the vmcs and adding the page to the p2m in
the same function, and likely call it from vlapic_init. We could have
a domain_setup_apic in hvm_function_table that takes care of all this.

> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -106,6 +106,7 @@ struct hvm_function_table {
>       * Initialise/destroy HVM domain/vcpu resources
>       */
>      int  (*domain_initialise)(struct domain *d);
> +    void (*domain_creation_finished)(struct domain *d);
>      void (*domain_relinquish_resources)(struct domain *d);
>      void (*domain_destroy)(struct domain *d);
>      int  (*vcpu_initialise)(struct vcpu *v);
> @@ -390,6 +391,12 @@ static inline bool hvm_has_set_descripto
>      return hvm_funcs.set_descriptor_access_exiting;
>  }
>  
> +static inline void hvm_domain_creation_finished(struct domain *d)
> +{
> +    if ( hvm_funcs.domain_creation_finished )
> +        alternative_vcall(hvm_funcs.domain_creation_finished, d);
> +}
> +
>  static inline int
>  hvm_guest_x86_mode(struct vcpu *v)
>  {
> @@ -765,6 +772,11 @@ static inline void hvm_invlpg(const stru
>  {
>      ASSERT_UNREACHABLE();
>  }
> +
> +static inline void hvm_domain_creation_finished(struct domain *d)
> +{
> +    ASSERT_UNREACHABLE();
> +}
>  
>  /*
>   * Shadow code needs further cleanup to eliminate some HVM-only paths. For
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -58,7 +58,6 @@ struct ept_data {
>  #define _VMX_DOMAIN_PML_ENABLED    0
>  #define VMX_DOMAIN_PML_ENABLED     (1ul << _VMX_DOMAIN_PML_ENABLED)
>  struct vmx_domain {
> -    mfn_t apic_access_mfn;
>      /* VMX_DOMAIN_* */
>      unsigned int status;
>  
> --- a/xen/include/asm-x86/p2m.h
> +++ b/xen/include/asm-x86/p2m.h
> @@ -935,6 +935,9 @@ static inline unsigned int p2m_get_iommu
>          flags = IOMMUF_readable;
>          if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
>              flags |= IOMMUF_writable;
> +        /* VMX'es APIC access page is global and hence has no owner. */
> +        if ( mfn_valid(mfn) && !page_get_owner(mfn_to_page(mfn)) )
> +            flags = 0;

Is it fine to have this page accessible to devices if the page tables
are shared between the CPU and the IOMMU?

Is it possible for devices to write to it?

I still think both the CPU and the IOMMU page tables should be the
same unless there's a technical reason for them not to be, rather than
us just wanting to hide things from devices.

Thanks, Roger.

Reply via email to