Re: [PATCH 2/3] KVM: Emulate MSI-X table in kernel

Sheng Yang Tue, 22 Feb 2011 22:56:52 -0800

On Wednesday 23 February 2011 08:19:21 Alex Williamson wrote:
> On Sun, 2011-01-30 at 13:11 +0800, Sheng Yang wrote:
> > Then we can support mask bit operation of assigned devices now.
> 
> Looks pretty good overall.  A few comments below.  It seems like we
> should be able to hook this into vfio with a small stub in kvm.  We just
> need to be able to communicate disabling and enabling of individual msix
> vectors.  For brute force, we could do this with a lot of eventfds,
> triggered by kvm and consumed by vfio, two per MSI-X vector.  Not sure
> if there's something smaller that could do it.  Thanks,


Alex, thanks for your comments. See my comments below:

> 
> Alex
> 
> > Signed-off-by: Sheng Yang <sh...@linux.intel.com>
> > ---
> > 
> >  arch/x86/kvm/Makefile    |    2 +-
> >  arch/x86/kvm/x86.c       |    8 +-
> >  include/linux/kvm.h      |   21 ++++
> >  include/linux/kvm_host.h |   25 ++++
> >  virt/kvm/assigned-dev.c  |   44 +++++++
> >  virt/kvm/kvm_main.c      |   38 ++++++-
> >  virt/kvm/msix_mmio.c     |  286
> >  ++++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/msix_mmio.h    
> >  |   25 ++++
> >  8 files changed, 442 insertions(+), 7 deletions(-)
> >  create mode 100644 virt/kvm/msix_mmio.c
> >  create mode 100644 virt/kvm/msix_mmio.h
> > 
> > diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> > index f15501f..3a0d851 100644
> > --- a/arch/x86/kvm/Makefile
> > +++ b/arch/x86/kvm/Makefile
> > @@ -7,7 +7,7 @@ CFLAGS_vmx.o := -I.
> > 
> >  kvm-y                      += $(addprefix ../../../virt/kvm/, kvm_main.o 
> > ioapic.o \
> >  
> >                             coalesced_mmio.o irq_comm.o eventfd.o \
> > 
> > -                           assigned-dev.o)
> > +                           assigned-dev.o msix_mmio.o)
> > 
> >  kvm-$(CONFIG_IOMMU_API)    += $(addprefix ../../../virt/kvm/, iommu.o)
> >  kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/,
> >  async_pf.o)
> > 
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index fa708c9..89bf12c 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1966,6 +1966,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> > 
> >     case KVM_CAP_X86_ROBUST_SINGLESTEP:
> >     case KVM_CAP_XSAVE:
> > 
> >     case KVM_CAP_ASYNC_PF:
> > +   case KVM_CAP_MSIX_MMIO:
> >             r = 1;
> >             break;
> >     
> >     case KVM_CAP_COALESCED_MMIO:
> > @@ -3807,6 +3808,7 @@ static int emulator_write_emulated_onepage(unsigned
> > long addr,
> > 
> >                                        struct kvm_vcpu *vcpu)
> >  
> >  {
> >  
> >     gpa_t                 gpa;
> > 
> > +   int r;
> > 
> >     gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
> > 
> > @@ -3822,14 +3824,16 @@ static int
> > emulator_write_emulated_onepage(unsigned long addr,
> > 
> >  mmio:
> >     trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
> > 
> > +   r = vcpu_mmio_write(vcpu, gpa, bytes, val);
> > 
> >     /*
> >     
> >      * Is this MMIO handled locally?
> >      */
> > 
> > -   if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
> > +   if (!r)
> > 
> >             return X86EMUL_CONTINUE;
> >     
> >     vcpu->mmio_needed = 1;
> > 
> > -   vcpu->run->exit_reason = KVM_EXIT_MMIO;
> > +   vcpu->run->exit_reason = (r == -ENOTSYNC) ?
> > +           KVM_EXIT_MSIX_ROUTING_UPDATE : KVM_EXIT_MMIO;
> > 
> >     vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
> >     vcpu->run->mmio.len = vcpu->mmio_size = bytes;
> >     vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
> > 
> > diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> > index ea2dc1a..ad9df4b 100644
> > --- a/include/linux/kvm.h
> > +++ b/include/linux/kvm.h
> > @@ -161,6 +161,7 @@ struct kvm_pit_config {
> > 
> >  #define KVM_EXIT_NMI              16
> >  #define KVM_EXIT_INTERNAL_ERROR   17
> >  #define KVM_EXIT_OSI              18
> > 
> > +#define KVM_EXIT_MSIX_ROUTING_UPDATE 19
> > 
> >  /* For KVM_EXIT_INTERNAL_ERROR */
> >  #define KVM_INTERNAL_ERROR_EMULATION 1
> > 
> > @@ -541,6 +542,7 @@ struct kvm_ppc_pvinfo {
> > 
> >  #define KVM_CAP_PPC_GET_PVINFO 57
> >  #define KVM_CAP_PPC_IRQ_LEVEL 58
> >  #define KVM_CAP_ASYNC_PF 59
> > 
> > +#define KVM_CAP_MSIX_MMIO 60
> > 
> >  #ifdef KVM_CAP_IRQ_ROUTING
> > 
> > @@ -672,6 +674,9 @@ struct kvm_clock_data {
> > 
> >  #define KVM_XEN_HVM_CONFIG        _IOW(KVMIO,  0x7a, struct
> >  kvm_xen_hvm_config) #define KVM_SET_CLOCK             _IOW(KVMIO, 
> >  0x7b, struct kvm_clock_data) #define KVM_GET_CLOCK            
> >  _IOR(KVMIO,  0x7c, struct kvm_clock_data)
> > 
> > +/* Available with KVM_CAP_MSIX_MMIO */
> > +#define KVM_REGISTER_MSIX_MMIO    _IOW(KVMIO,  0x7d, struct
> > kvm_msix_mmio_user) +#define KVM_UNREGISTER_MSIX_MMIO  _IOW(KVMIO, 
> > 0x7e, struct kvm_msix_mmio_user)
> > 
> >  /* Available with KVM_CAP_PIT_STATE2 */
> >  #define KVM_GET_PIT2              _IOR(KVMIO,  0x9f, struct
> >  kvm_pit_state2) #define KVM_SET_PIT2              _IOW(KVMIO,  0xa0,
> >  struct kvm_pit_state2)
> > 
> > @@ -795,4 +800,20 @@ struct kvm_assigned_msix_entry {
> > 
> >     __u16 padding[3];
> >  
> >  };
> > 
> > +#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV        (1 << 0)
> > +
> > +#define KVM_MSIX_MMIO_TYPE_BASE_TABLE          (1 << 8)
> > +
> > +#define KVM_MSIX_MMIO_TYPE_DEV_MASK            0x00ff
> > +#define KVM_MSIX_MMIO_TYPE_BASE_MASK           0xff00
> > +struct kvm_msix_mmio_user {
> > +   __u32 dev_id;
> > +   __u16 type;
> > +   __u16 max_entries_nr;
> > +   __u64 base_addr;
> > +   __u64 base_va;
> > +   __u64 flags;
> > +   __u64 reserved[4];
> > +};
> > +
> > 
> >  #endif /* __LINUX_KVM_H */
> > 
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 7d313e0..c10670c 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -233,6 +233,27 @@ struct kvm_memslots {
> > 
> >                                     KVM_PRIVATE_MEM_SLOTS];
> >  
> >  };
> > 
> > +#define KVM_MSIX_MMIO_MAX    32
> 
> I assume this is based on 32 devices/bus, but we could exceed this is we
> add more buses or make use of more functions.  How hard is it to make
> this dynamic?

We can improve this later(maybe with balance tree?). 32 should be more than 
sufficient for now. And this patchset has been in the mailing list for months, 
I 
really want to get it in first...
 
> > +
> > +struct kvm_msix_mmio {
> > +   u32 dev_id;
> > +   u16 type;
> > +   u16 max_entries_nr;
> > +   u64 flags;
> > +   gpa_t table_base_addr;
> > +   hva_t table_base_va;
> > +   gpa_t pba_base_addr;
> > +   hva_t pba_base_va;
> > +};
> > +
> > +struct kvm_msix_mmio_dev {
> > +   struct kvm *kvm;
> > +   struct kvm_io_device table_dev;
> > +   int mmio_nr;
> > +   struct kvm_msix_mmio mmio[KVM_MSIX_MMIO_MAX];
> > +   struct mutex lock;
> > +};
> > +
> > 
> >  struct kvm {
> >  
> >     spinlock_t mmu_lock;
> >     raw_spinlock_t requests_lock;
> > 
> > @@ -281,6 +302,7 @@ struct kvm {
> > 
> >     long mmu_notifier_count;
> >  
> >  #endif
> >  
> >     long tlbs_dirty;
> > 
> > +   struct kvm_msix_mmio_dev msix_mmio_dev;
> > 
> >  };
> >  
> >  /* The guest did something we don't support. */
> > 
> > @@ -553,6 +575,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> > 
> >  int kvm_request_irq_source_id(struct kvm *kvm);
> >  void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
> > 
> > +int kvm_assigned_device_update_msix_mask_bit(struct kvm *kvm,
> > +                   int assigned_dev_id, int entry, bool mask);
> > +
> > 
> >  /* For vcpu->arch.iommu_flags */
> >  #define KVM_IOMMU_CACHE_COHERENCY  0x1
> > 
> > diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
> > index ae72ae6..d1598a6 100644
> > --- a/virt/kvm/assigned-dev.c
> > +++ b/virt/kvm/assigned-dev.c
> > @@ -18,6 +18,7 @@
> > 
> >  #include <linux/interrupt.h>
> >  #include <linux/slab.h>
> >  #include "irq.h"
> > 
> > +#include "msix_mmio.h"
> > 
> >  static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct
> >  list_head *head,
> >  
> >                                                   int assigned_dev_id)
> > 
> > @@ -191,12 +192,25 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
> > 
> >     kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
> >  
> >  }
> > 
> > +static void assigned_device_free_msix_mmio(struct kvm *kvm,
> > +                           struct kvm_assigned_dev_kernel *adev)
> > +{
> > +   struct kvm_msix_mmio mmio;
> > +
> > +   mmio.dev_id = adev->assigned_dev_id;
> > +   mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV |
> > +               KVM_MSIX_MMIO_TYPE_BASE_TABLE;
> > +   kvm_free_msix_mmio(kvm, &mmio);
> > +}
> > +
> > 
> >  static void kvm_free_assigned_device(struct kvm *kvm,
> >  
> >                                  struct kvm_assigned_dev_kernel
> >                                  *assigned_dev)
> >  
> >  {
> >  
> >     kvm_free_assigned_irq(kvm, assigned_dev);
> > 
> > +   assigned_device_free_msix_mmio(kvm, assigned_dev);
> > +
> > 
> >     __pci_reset_function(assigned_dev->dev);
> >     pci_restore_state(assigned_dev->dev);
> > 
> > @@ -785,3 +799,33 @@ out:
> >     return r;
> >  
> >  }
> > 
> > +/* The caller should hold kvm->lock */
> > +int kvm_assigned_device_update_msix_mask_bit(struct kvm *kvm,
> > +                           int assigned_dev_id, int entry, bool mask)
> > +{
> > +   int r = -EFAULT;
> > +   struct kvm_assigned_dev_kernel *adev;
> > +   int i;
> > +
> > +   if (!irqchip_in_kernel(kvm))
> > +           return r;
> > +
> > +   adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> > +                                 assigned_dev_id);
> > +   if (!adev)
> > +           goto out;
> > +
> > +   /* For non-MSIX enabled devices, entries_nr == 0 */
> > +   for (i = 0; i < adev->entries_nr; i++)
> > +           if (adev->host_msix_entries[i].entry == entry) {
> > +                   if (mask)
> > +                           disable_irq_nosync(
> > +                                   adev->host_msix_entries[i].vector);
> > +                   else
> > +                           enable_irq(adev->host_msix_entries[i].vector);
> > +                   r = 0;
> > +                   break;
> > +           }
> > +out:
> > +   return r;
> > +}
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index b1b6cbb..b7807c8 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -56,6 +56,7 @@
> > 
> >  #include "coalesced_mmio.h"
> >  #include "async_pf.h"
> > 
> > +#include "msix_mmio.h"
> > 
> >  #define CREATE_TRACE_POINTS
> >  #include <trace/events/kvm.h>
> > 
> > @@ -509,6 +510,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
> > 
> >     struct mm_struct *mm = kvm->mm;
> >     
> >     kvm_arch_sync_events(kvm);
> > 
> > +   kvm_unregister_msix_mmio_dev(kvm);
> > 
> >     spin_lock(&kvm_lock);
> >     list_del(&kvm->vm_list);
> >     spin_unlock(&kvm_lock);
> > 
> > @@ -1877,6 +1879,24 @@ static long kvm_vm_ioctl(struct file *filp,
> > 
> >             mutex_unlock(&kvm->lock);
> >             break;
> >  
> >  #endif
> > 
> > +   case KVM_REGISTER_MSIX_MMIO: {
> > +           struct kvm_msix_mmio_user mmio_user;
> > +
> > +           r = -EFAULT;
> > +           if (copy_from_user(&mmio_user, argp, sizeof mmio_user))
> > +                   goto out;
> > +           r = kvm_vm_ioctl_register_msix_mmio(kvm, &mmio_user);
> > +           break;
> > +   }
> > +   case KVM_UNREGISTER_MSIX_MMIO: {
> > +           struct kvm_msix_mmio_user mmio_user;
> > +
> > +           r = -EFAULT;
> > +           if (copy_from_user(&mmio_user, argp, sizeof mmio_user))
> > +                   goto out;
> > +           r = kvm_vm_ioctl_unregister_msix_mmio(kvm, &mmio_user);
> > +           break;
> > +   }
> > 
> >     default:
> >             r = kvm_arch_vm_ioctl(filp, ioctl, arg);
> >             if (r == -ENOTTY)
> > 
> > @@ -1988,6 +2008,12 @@ static int kvm_dev_ioctl_create_vm(void)
> > 
> >             return r;
> >     
> >     }
> >  
> >  #endif
> > 
> > +   r = kvm_register_msix_mmio_dev(kvm);
> > +   if (r < 0) {
> > +           kvm_put_kvm(kvm);
> > +           return r;
> > +   }
> > +
> > 
> >     r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
> >     if (r < 0)
> >     
> >             kvm_put_kvm(kvm);
> > 
> > @@ -2223,14 +2249,18 @@ static void kvm_io_bus_destroy(struct kvm_io_bus
> > *bus)
> > 
> >  int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
> >  
> >                  int len, const void *val)
> >  
> >  {
> > 
> > -   int i;
> > +   int i, r = -EOPNOTSUPP;
> > 
> >     struct kvm_io_bus *bus;
> >     
> >     bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
> > 
> > -   for (i = 0; i < bus->dev_count; i++)
> > -           if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
> > +   for (i = 0; i < bus->dev_count; i++) {
> > +           r = kvm_iodevice_write(bus->devs[i], addr, len, val);
> > +           if (r == -ENOTSYNC)
> > +                   break;
> > +           else if (!r)
> > 
> >                     return 0;
> > 
> > -   return -EOPNOTSUPP;
> > +   }
> > +   return r;
> > 
> >  }
> >  
> >  /* kvm_io_bus_read - called under kvm->slots_lock */
> > 
> > diff --git a/virt/kvm/msix_mmio.c b/virt/kvm/msix_mmio.c
> > new file mode 100644
> > index 0000000..dde9b62
> > --- /dev/null
> > +++ b/virt/kvm/msix_mmio.c
> > @@ -0,0 +1,286 @@
> > +/*
> > + * MSI-X MMIO emulation
> > + *
> > + * Copyright (c) 2010 Intel Corporation
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + * Author:
> > + *   Sheng Yang <sheng.y...@intel.com>
> > + */
> > +
> > +#include <linux/kvm_host.h>
> > +#include <linux/kvm.h>
> > +
> > +#include "msix_mmio.h"
> > +#include "iodev.h"
> > +
> > +static int update_msix_mask_bit(struct kvm *kvm, struct kvm_msix_mmio
> > *mmio, +                            int entry, u32 flag)
> > +{
> > +   if (mmio->type & KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV)
> > +           return kvm_assigned_device_update_msix_mask_bit(kvm,
> > +                           mmio->dev_id, entry, flag);
> > +   return -EFAULT;
> > +}
> > +
> > +/* Caller must hold dev->lock */
> > +static int get_mmio_table_index(struct kvm_msix_mmio_dev *dev,
> > +                           gpa_t addr, int len)
> > +{
> > +   gpa_t start, end;
> > +   int i, r = -EINVAL;
> > +
> > +   for (i = 0; i < dev->mmio_nr; i++) {
> > +           start = dev->mmio[i].table_base_addr;
> > +           end = dev->mmio[i].table_base_addr + PCI_MSIX_ENTRY_SIZE *
> > +                   dev->mmio[i].max_entries_nr;
> > +           if (addr >= start && addr + len <= end) {
> > +                   r = i;
> > +                   break;
> > +           }
> 
> Yet another potential user of the weight balanced tree ;)

Yeah, we can improve it later.
> 
> > +   }
> > +
> > +   return r;
> > +}
> > +
> > +static int msix_table_mmio_read(struct kvm_io_device *this, gpa_t addr,
> > int len, +                          void *val)
> > +{
> > +   struct kvm_msix_mmio_dev *mmio_dev =
> > +           container_of(this, struct kvm_msix_mmio_dev, table_dev);
> > +   struct kvm_msix_mmio *mmio;
> > +   int idx, ret = 0, entry, offset, r;
> > +
> > +   mutex_lock(&mmio_dev->lock);
> > +   idx = get_mmio_table_index(mmio_dev, addr, len);
> > +   if (idx < 0) {
> > +           ret = -EOPNOTSUPP;
> > +           goto out;
> > +   }
> > +   if ((addr & 0x3) || (len != 4 && len != 8))
> > +           goto out;
> > +
> > +   offset = addr & 0xf;
> > +   if (offset == PCI_MSIX_ENTRY_VECTOR_CTRL && len == 8)
> > +           goto out;
> > +
> > +   mmio = &mmio_dev->mmio[idx];
> > +   entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE;
> > +   r = copy_from_user(val, (void __user *)(mmio->table_base_va +
> > +                   entry * PCI_MSIX_ENTRY_SIZE + offset), len);
> > +   if (r)
> > +           goto out;
> > +out:
> > +   mutex_unlock(&mmio_dev->lock);
> > +   return ret;
> > +}
> > +
> > +static int msix_table_mmio_write(struct kvm_io_device *this, gpa_t addr,
> > +                           int len, const void *val)
> > +{
> > +   struct kvm_msix_mmio_dev *mmio_dev =
> > +           container_of(this, struct kvm_msix_mmio_dev, table_dev);
> > +   struct kvm_msix_mmio *mmio;
> > +   int idx, entry, offset, ret = 0, r = 0;
> > +   gpa_t entry_base;
> > +   u32 old_ctrl, new_ctrl;
> > +   u32 *ctrl_pos;
> > +
> > +   mutex_lock(&mmio_dev->kvm->lock);
> > +   mutex_lock(&mmio_dev->lock);
> > +   idx = get_mmio_table_index(mmio_dev, addr, len);
> > +   if (idx < 0) {
> > +           ret = -EOPNOTSUPP;
> > +           goto out;
> > +   }
> > +   if ((addr & 0x3) || (len != 4 && len != 8))
> > +           goto out;
> > +
> > +   offset = addr & 0xF;
> 
> nit, this 0xf above.

So you like another mask?
> 
> > +   if (offset == PCI_MSIX_ENTRY_VECTOR_CTRL && len == 8)
> > +           goto out;
> > +
> > +   mmio = &mmio_dev->mmio[idx];
> > +   entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE;
> > +   entry_base = mmio->table_base_va + entry * PCI_MSIX_ENTRY_SIZE;
> > +   ctrl_pos = (u32 *)(entry_base + PCI_MSIX_ENTRY_VECTOR_CTRL);
> > +
> > +   if (get_user(old_ctrl, ctrl_pos))
> > +           goto out;
> > +
> > +   /* No allow writing to other fields when entry is unmasked */
> > +   if (!(old_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT) &&
> > +       offset != PCI_MSIX_ENTRY_VECTOR_CTRL)
> > +           goto out;
> > +
> > +   if (copy_to_user((void __user *)(entry_base + offset), val, len))
> > +           goto out;
> > +
> > +   if (get_user(new_ctrl, ctrl_pos))
> > +           goto out;
> > +
> > +   if ((offset < PCI_MSIX_ENTRY_VECTOR_CTRL && len == 4) ||
> > +       (offset < PCI_MSIX_ENTRY_DATA && len == 8))
> > +           ret = -ENOTSYNC;
> 
> Couldn't we avoid the above get_user(new_ctrl,...) if we tested this
> first?  I'd also prefer to see a direct goto out here since it's not
> entirely obvious that this first 'if' always falls into the below 'if'.
> I'm not a fan of using an error code as a special non-error return
> either.

In fact when offset==PCI_MSIX_ENTRY_DATA and len ==8, we need to check new_ctrl 
to 
see if they indeed modified ctrl bits.

I would try to make the logic here more clear. 
> 
> > +   if (old_ctrl == new_ctrl)
> > +           goto out;
> > +
> > +   if (!(old_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT) &&
> > +                   (new_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT))
> > +           r = update_msix_mask_bit(mmio_dev->kvm, mmio, entry, 1);
> > +   else if ((old_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT) &&
> > +                   !(new_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT))
> > +           r = update_msix_mask_bit(mmio_dev->kvm, mmio, entry, 0);
> 
> An xor would remove some redundancy here
> 
> if ((old_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT) ^ (new_ctrl &
> PCI_MSIX_ENTRY_CTRL_MASKBIT)) r = update_msix_mask_bit(mmio_dev->kvm,
> mmio, entry, !!(new_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT));

Oh, yes, sure...
> 
> > +   if (r || ret)
> > +           ret = -ENOTSYNC;
> > +out:
> > +   mutex_unlock(&mmio_dev->lock);
> > +   mutex_unlock(&mmio_dev->kvm->lock);
> > +   return ret;
> > +}
> > +
> > +static const struct kvm_io_device_ops msix_mmio_table_ops = {
> > +   .read     = msix_table_mmio_read,
> > +   .write    = msix_table_mmio_write,
> > +};
> > +
> > +int kvm_register_msix_mmio_dev(struct kvm *kvm)
> > +{
> > +   int ret;
> > +
> > +   kvm_iodevice_init(&kvm->msix_mmio_dev.table_dev, &msix_mmio_table_ops);
> > +   mutex_init(&kvm->msix_mmio_dev.lock);
> > +   kvm->msix_mmio_dev.kvm = kvm;
> > +   mutex_lock(&kvm->slots_lock);
> > +   ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
> > +                                 &kvm->msix_mmio_dev.table_dev);
> > +   mutex_unlock(&kvm->slots_lock);
> > +   return ret;
> > +}
> > +
> > +int kvm_unregister_msix_mmio_dev(struct kvm *kvm)
> > +{
> > +   int ret;
> > +
> > +   mutex_lock(&kvm->slots_lock);
> > +   ret = kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
> > +                                 &kvm->msix_mmio_dev.table_dev);
> > +   mutex_unlock(&kvm->slots_lock);
> > +   return ret;
> > +}
> > +
> > +int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm,
> > +                               struct kvm_msix_mmio_user *mmio_user)
> > +{
> > +   struct kvm_msix_mmio_dev *mmio_dev = &kvm->msix_mmio_dev;
> > +   struct kvm_msix_mmio *mmio = NULL;
> > +   int r = 0, i;
> > +
> > +   mutex_lock(&mmio_dev->lock);
> > +   for (i = 0; i < mmio_dev->mmio_nr; i++) {
> > +           if (mmio_dev->mmio[i].dev_id == mmio_user->dev_id &&
> > +               (mmio_dev->mmio[i].type & KVM_MSIX_MMIO_TYPE_DEV_MASK) ==
> > +               (mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK)) {
> > +                   mmio = &mmio_dev->mmio[i];
> > +                   if (mmio->max_entries_nr != mmio_user->max_entries_nr) {
> > +                           r = -EINVAL;
> > +                           goto out;
> > +                   }
> > +                   break;
> > +           }
> > +   }
> > +   if (mmio_user->max_entries_nr > KVM_MAX_MSIX_PER_DEV) {
> > +           r = -EINVAL;
> > +           goto out;
> > +   }
> > +   /* All reserved currently */
> > +   if (mmio_user->flags) {
> > +           r = -EINVAL;
> > +           goto out;
> > +   }
> > +
> > +   if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK) !=
> > +                   KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV) {
> > +           r = -EINVAL;
> > +           goto out;
> > +   }
> > +   if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_BASE_MASK) !=
> > +                   KVM_MSIX_MMIO_TYPE_BASE_TABLE) {
> > +           r = -EINVAL;
> > +           goto out;
> > +   }
> > +
> > +   if (!access_ok(VERIFY_WRITE, mmio_user->base_va,
> > +                   mmio_user->max_entries_nr * PCI_MSIX_ENTRY_SIZE)) {
> > +           r = -EINVAL;
> > +           goto out;
> > +   }
> > +   if (!mmio) {
> > +           if (mmio_dev->mmio_nr == KVM_MSIX_MMIO_MAX) {
> > +                   r = -ENOSPC;
> > +                   goto out;
> > +           }
> > +           mmio = &mmio_dev->mmio[mmio_dev->mmio_nr];
> > +   }
> > +
> > +   mmio_dev->mmio_nr++;
> > +   mmio->max_entries_nr = mmio_user->max_entries_nr;
> > +   mmio->dev_id = mmio_user->dev_id;
> > +   mmio->flags = mmio_user->flags;
> > +
> > +   if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK) ==
> > +                   KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV)
> > +           mmio->type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV;
> > +   if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_BASE_MASK) ==
> > +                   KVM_MSIX_MMIO_TYPE_BASE_TABLE) {
> > +           mmio->type |= KVM_MSIX_MMIO_TYPE_BASE_TABLE;
> > +           mmio->table_base_addr = mmio_user->base_addr;
> > +           mmio->table_base_va = mmio_user->base_va;
> > +   }
> > +out:
> > +   mutex_unlock(&mmio_dev->lock);
> > +   return r;
> > +}
> > +
> > +int kvm_free_msix_mmio(struct kvm *kvm, struct kvm_msix_mmio *mmio)
> > +{
> > +   struct kvm_msix_mmio_dev *mmio_dev = &kvm->msix_mmio_dev;
> > +   int r = 0, i, j;
> 
> set r = -EINVAL here
> 
> > +   bool found = 0;
> > +
> > +   if (!mmio)
> > +           return 0;
> > +
> > +   mutex_lock(&mmio_dev->lock);
> > +   BUG_ON(mmio_dev->mmio_nr > KVM_MSIX_MMIO_MAX);
> > +   for (i = 0; i < mmio_dev->mmio_nr; i++) {
> > +           if (mmio_dev->mmio[i].dev_id == mmio->dev_id &&
> > +               mmio_dev->mmio[i].type == mmio->type) {
> > +                   found = true;
> 
> set r = 0 here
> 
> > +                   for (j = i; j < mmio_dev->mmio_nr - 1; j++)
> > +                           mmio_dev->mmio[j] = mmio_dev->mmio[j + 1];
> > +                   mmio_dev->mmio[mmio_dev->mmio_nr].max_entries_nr = 0;
> > +                   mmio_dev->mmio[mmio_dev->mmio_nr].dev_id = 0;
> > +                   mmio_dev->mmio[mmio_dev->mmio_nr].type = 0;
> > +                   mmio_dev->mmio_nr--;
> > +                   break;
> > +           }
> > +   }
> > +   if (!found)
> > +           r = -EINVAL;
> 
> then get rid of found

Sure.

--
regards
Yang, Sheng

> 
> > +   mutex_unlock(&mmio_dev->lock);
> > +   return r;
> > +}
> > +
> > +int kvm_vm_ioctl_unregister_msix_mmio(struct kvm *kvm,
> > +                                 struct kvm_msix_mmio_user *mmio_user)
> > +{
> > +   struct kvm_msix_mmio mmio;
> > +
> > +   mmio.dev_id = mmio_user->dev_id;
> > +   mmio.type = mmio_user->type;
> > +
> > +   return kvm_free_msix_mmio(kvm, &mmio);
> > +}
> > +
> > diff --git a/virt/kvm/msix_mmio.h b/virt/kvm/msix_mmio.h
> > new file mode 100644
> > index 0000000..01b6587
> > --- /dev/null
> > +++ b/virt/kvm/msix_mmio.h
> > @@ -0,0 +1,25 @@
> > +#ifndef __KVM_MSIX_MMIO_H__
> > +#define __KVM_MSIX_MMIO_H__
> > +/*
> > + * MSI-X MMIO emulation
> > + *
> > + * Copyright (c) 2010 Intel Corporation
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + * Author:
> > + *   Sheng Yang <sheng.y...@intel.com>
> > + */
> > +
> > +#include <linux/pci.h>
> > +
> > +int kvm_register_msix_mmio_dev(struct kvm *kvm);
> > +int kvm_unregister_msix_mmio_dev(struct kvm *kvm);
> > +int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm,
> > +                               struct kvm_msix_mmio_user *mmio_user);
> > +int kvm_vm_ioctl_unregister_msix_mmio(struct kvm *kvm,
> > +                                 struct kvm_msix_mmio_user *mmio_user);
> > +int kvm_free_msix_mmio(struct kvm *kvm, struct kvm_msix_mmio
> > *mmio_user); +
> > +#endif
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/3] KVM: Emulate MSI-X table in kernel

Reply via email to