On Mon, Feb 04, 2019 at 12:30:39PM +0100, Cédric Le Goater wrote:
> On 2/4/19 5:45 AM, David Gibson wrote:
> > On Mon, Jan 07, 2019 at 07:43:18PM +0100, Cédric Le Goater wrote:
> >> This will let the guest create a memory mapping to expose the ESB MMIO
> >> regions used to control the interrupt sources, to trigger events, to
> >> EOI or to turn off the sources.
> >>
> >> Signed-off-by: Cédric Le Goater <c...@kaod.org>
> >> ---
> >>  arch/powerpc/include/uapi/asm/kvm.h   |  4 ++
> >>  arch/powerpc/kvm/book3s_xive_native.c | 97 +++++++++++++++++++++++++++
> >>  2 files changed, 101 insertions(+)
> >>
> >> diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
> >> b/arch/powerpc/include/uapi/asm/kvm.h
> >> index 8c876c166ef2..6bb61ba141c2 100644
> >> --- a/arch/powerpc/include/uapi/asm/kvm.h
> >> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> >> @@ -675,4 +675,8 @@ struct kvm_ppc_cpu_char {
> >>  #define  KVM_XICS_PRESENTED               (1ULL << 43)
> >>  #define  KVM_XICS_QUEUED          (1ULL << 44)
> >>  
> >> +/* POWER9 XIVE Native Interrupt Controller */
> >> +#define KVM_DEV_XIVE_GRP_CTRL             1
> >> +#define   KVM_DEV_XIVE_GET_ESB_FD 1
> > 
> > Introducing a new FD for ESB and TIMA seems overkill.  Can't you get
> > to both with an mmap() directly on the xive device fd?  Using the
> > offset to distinguish which one to map, obviously.
> 
> The page offset would define some sort of user API. It seems feasible.
> But I am not sure this would be practical in the future if we need to 
> tune the length.

Um.. why not?  I mean, yes the XIVE supports rather a lot of
interrupts, but we have 64-bits of offset we can play with - we can
leave room for billions of ESB slots and still have room for billions
of VPs.

> The TIMA has two pages that can be exposed at guest level for interrupt 
> management : the OS and the USER page. That should be OK.
> 
> But we might want to map only portions of the interrupt ESB space, for 
> PCI passthrough for instance as Paul proposed. I am still looking at that.
> 
> Thanks,
> 
> C.
> 
> >>  #endif /* __LINUX_KVM_POWERPC_H */
> >> diff --git a/arch/powerpc/kvm/book3s_xive_native.c 
> >> b/arch/powerpc/kvm/book3s_xive_native.c
> >> index 115143e76c45..e20081f0c8d4 100644
> >> --- a/arch/powerpc/kvm/book3s_xive_native.c
> >> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> >> @@ -153,6 +153,85 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device 
> >> *dev,
> >>    return rc;
> >>  }
> >>  
> >> +static int xive_native_esb_fault(struct vm_fault *vmf)
> >> +{
> >> +  struct vm_area_struct *vma = vmf->vma;
> >> +  struct kvmppc_xive *xive = vma->vm_file->private_data;
> >> +  struct kvmppc_xive_src_block *sb;
> >> +  struct kvmppc_xive_irq_state *state;
> >> +  struct xive_irq_data *xd;
> >> +  u32 hw_num;
> >> +  u16 src;
> >> +  u64 page;
> >> +  unsigned long irq;
> >> +
> >> +  /*
> >> +   * Linux/KVM uses a two pages ESB setting, one for trigger and
> >> +   * one for EOI
> >> +   */
> >> +  irq = vmf->pgoff / 2;
> >> +
> >> +  sb = kvmppc_xive_find_source(xive, irq, &src);
> >> +  if (!sb) {
> >> +          pr_err("%s: source %lx not found !\n", __func__, irq);
> >> +          return VM_FAULT_SIGBUS;
> >> +  }
> >> +
> >> +  state = &sb->irq_state[src];
> >> +  kvmppc_xive_select_irq(state, &hw_num, &xd);
> >> +
> >> +  arch_spin_lock(&sb->lock);
> >> +
> >> +  /*
> >> +   * first/even page is for trigger
> >> +   * second/odd page is for EOI and management.
> >> +   */
> >> +  page = vmf->pgoff % 2 ? xd->eoi_page : xd->trig_page;
> >> +  arch_spin_unlock(&sb->lock);
> >> +
> >> +  if (!page) {
> >> +          pr_err("%s: acessing invalid ESB page for source %lx !\n",
> >> +                 __func__, irq);
> >> +          return VM_FAULT_SIGBUS;
> >> +  }
> >> +
> >> +  vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
> >> +  return VM_FAULT_NOPAGE;
> >> +}
> >> +
> >> +static const struct vm_operations_struct xive_native_esb_vmops = {
> >> +  .fault = xive_native_esb_fault,
> >> +};
> >> +
> >> +static int xive_native_esb_mmap(struct file *file, struct vm_area_struct 
> >> *vma)
> >> +{
> >> +  /* There are two ESB pages (trigger and EOI) per IRQ */
> >> +  if (vma_pages(vma) + vma->vm_pgoff > KVMPPC_XIVE_NR_IRQS * 2)
> >> +          return -EINVAL;
> >> +
> >> +  vma->vm_flags |= VM_IO | VM_PFNMAP;
> >> +  vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> >> +  vma->vm_ops = &xive_native_esb_vmops;
> >> +  return 0;
> >> +}
> >> +
> >> +static const struct file_operations xive_native_esb_fops = {
> >> +  .mmap = xive_native_esb_mmap,
> >> +};
> >> +
> >> +static int kvmppc_xive_native_get_esb_fd(struct kvmppc_xive *xive, u64 
> >> addr)
> >> +{
> >> +  u64 __user *ubufp = (u64 __user *) addr;
> >> +  int ret;
> >> +
> >> +  ret = anon_inode_getfd("[xive-esb]", &xive_native_esb_fops, xive,
> >> +                          O_RDWR | O_CLOEXEC);
> >> +  if (ret < 0)
> >> +          return ret;
> >> +
> >> +  return put_user(ret, ubufp);
> >> +}
> >> +
> >>  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
> >>                                   struct kvm_device_attr *attr)
> >>  {
> >> @@ -162,12 +241,30 @@ static int kvmppc_xive_native_set_attr(struct 
> >> kvm_device *dev,
> >>  static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
> >>                                   struct kvm_device_attr *attr)
> >>  {
> >> +  struct kvmppc_xive *xive = dev->private;
> >> +
> >> +  switch (attr->group) {
> >> +  case KVM_DEV_XIVE_GRP_CTRL:
> >> +          switch (attr->attr) {
> >> +          case KVM_DEV_XIVE_GET_ESB_FD:
> >> +                  return kvmppc_xive_native_get_esb_fd(xive, attr->addr);
> >> +          }
> >> +          break;
> >> +  }
> >>    return -ENXIO;
> >>  }
> >>  
> >>  static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
> >>                                   struct kvm_device_attr *attr)
> >>  {
> >> +  switch (attr->group) {
> >> +  case KVM_DEV_XIVE_GRP_CTRL:
> >> +          switch (attr->attr) {
> >> +          case KVM_DEV_XIVE_GET_ESB_FD:
> >> +                  return 0;
> >> +          }
> >> +          break;
> >> +  }
> >>    return -ENXIO;
> >>  }
> >>  
> > 
> 

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

Reply via email to