Re: [PATCH] KVM: Fix warning in emulator_cmpxchg_emulated

2011-12-08 Thread Jan Kiszka
On 2011-12-08 07:17, Sasha Levin wrote:
> Make sure 'exchanged' is initialized. Fixes the following warning:
> 
> arch/x86/kvm/x86.c: In function 'emulator_cmpxchg_emulated':
> arch/x86/kvm/x86.c:3794:7: warning: 'exchanged' may be used uninitialized in 
> this function
> 
> Signed-off-by: Sasha Levin 
> ---
>  arch/x86/kvm/x86.c |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index eeeaf2e..9338afc 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3791,7 +3791,7 @@ static int emulator_cmpxchg_emulated(struct 
> x86_emulate_ctxt *ctxt,
>   gpa_t gpa;
>   struct page *page;
>   char *kaddr;
> - bool exchanged;
> + bool exchanged = false;
>  
>   /* guests cmpxchg8b have to be emulated atomically */
>   if (bytes > 8 || (bytes & (bytes - 1)))

Isn't BUG() tagged as no-return in the kernel? Because this
initialization is obviously unneeded.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [PATCH] KVM: Fix warning in emulator_cmpxchg_emulated

2011-12-08 Thread Sasha Levin
On Thu, 2011-12-08 at 09:04 +0100, Jan Kiszka wrote:
> On 2011-12-08 07:17, Sasha Levin wrote:
> > Make sure 'exchanged' is initialized. Fixes the following warning:
> > 
> > arch/x86/kvm/x86.c: In function 'emulator_cmpxchg_emulated':
> > arch/x86/kvm/x86.c:3794:7: warning: 'exchanged' may be used uninitialized 
> > in this function
> > 
> > Signed-off-by: Sasha Levin 
> > ---
> >  arch/x86/kvm/x86.c |2 +-
> >  1 files changed, 1 insertions(+), 1 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index eeeaf2e..9338afc 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -3791,7 +3791,7 @@ static int emulator_cmpxchg_emulated(struct 
> > x86_emulate_ctxt *ctxt,
> > gpa_t gpa;
> > struct page *page;
> > char *kaddr;
> > -   bool exchanged;
> > +   bool exchanged = false;
> >  
> > /* guests cmpxchg8b have to be emulated atomically */
> > if (bytes > 8 || (bytes & (bytes - 1)))
> 
> Isn't BUG() tagged as no-return in the kernel? Because this
> initialization is obviously unneeded.

It looks like __builtin_unreachable() requires a gcc newer than mine.

-- 

Sasha.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] virt-test: Add a class NumaNode

2011-12-08 Thread Amos Kong
Dynamically checking hardware, use a dict to record the pin status,
pin process to single cpu by 'taskset' command.

Guest memory pining is already implemented in framework.
process pining needs to be done in the testcases.

Example:
|  numa_node = -1 # last node
|  p = virt_utils.NumaNode(numa_node)
|  vhost_threads = commands.getoutput("ps aux |grep '\[vhost-.*\]'
|  |grep -v grep|awk '{print $2}'")
|  for i in vhost_threads.split():
|  logging.debug("pin vhost_net thread(%s) to host cpu node" % i)
|  p.pin_cpu(i)
|  o = vm.monitor.info("cpus")
|  for i in re.findall("thread_id=(\d+)", o):
|  logging.debug("pin vcpu thread(%s) to host cpu node" % i)
|  p.pin_cpu(i)
|  p.show()

Signed-off-by: Amos Kong 
---
 client/tests/kvm/base.cfg.sample |5 
 client/virt/kvm_vm.py|8 ++
 client/virt/virt_utils.py|   50 ++
 3 files changed, 63 insertions(+), 0 deletions(-)

diff --git a/client/tests/kvm/base.cfg.sample b/client/tests/kvm/base.cfg.sample
index c99add6..0e8555e 100644
--- a/client/tests/kvm/base.cfg.sample
+++ b/client/tests/kvm/base.cfg.sample
@@ -146,6 +146,11 @@ shell_port = 22
 used_cpus = 1
 used_mem = 512
 
+# Numa pin params
+# pin guest memory to 1th numa node
+# pin processes to host cpu of 1th node
+# numa_node = 1
+
 # Port redirections
 redirs = remote_shell
 guest_port_remote_shell = 22
diff --git a/client/virt/kvm_vm.py b/client/virt/kvm_vm.py
index 6747c2b..fa258c3 100644
--- a/client/virt/kvm_vm.py
+++ b/client/virt/kvm_vm.py
@@ -473,6 +473,14 @@ class VM(virt_vm.BaseVM):
 qemu_cmd += "LD_LIBRARY_PATH=%s " % library_path
 if params.get("qemu_audio_drv"):
 qemu_cmd += "QEMU_AUDIO_DRV=%s " % params.get("qemu_audio_drv")
+# Add numa memory cmd to pin guest memory to numa node
+if params.get("numa_node"):
+numa_node = int(params.get("numa_node"))
+if numa_node < 0:
+p = virt_utils.NumaNode(numa_node)
+qemu_cmd += "numactl -m %s " % (int(p.get_node_num()) + 
numa_node)
+else:
+qemu_cmd += "numactl -m %s " % (numa_node - 1)
 # Add the qemu binary
 qemu_cmd += qemu_binary
 # Add the VM's name
diff --git a/client/virt/virt_utils.py b/client/virt/virt_utils.py
index 7759b2d..7ef0c0b 100644
--- a/client/virt/virt_utils.py
+++ b/client/virt/virt_utils.py
@@ -3445,3 +3445,53 @@ def virt_test_assistant(test_name, test_dir, base_dir, 
default_userspace_paths,
 logging.info("Autotest prints the results dir, so you can look at DEBUG "
  "logs if something went wrong")
 logging.info("You can also edit the test config files")
+
+
+class NumaNode():
+""" Numa node to control the processes and shared memory """
+def __init__(self, i=-1):
+self.num = self.get_node_num()
+if i < 0:
+self.cpus = self.get_node_cpus(int(self.num)+i).split()
+else:
+self.cpus = self.get_node_cpus(i-1).split()
+self.dict = {}
+for i in self.cpus:
+self.dict[i] = "free"
+
+def get_node_num(self):
+""" Get the nodes number of host """
+output = commands.getoutput("numactl --hardware")
+return re.findall("available: (\d+) nodes", output)[0]
+
+def get_node_cpus(self, i):
+""" Get cpus of one node """
+output = commands.getoutput("numactl --hardware")
+return re.findall("node %s cpus: (.*)" % i, output)[0]
+
+def free_cpu(self, i):
+""" Release pin flag """
+self.dict[i] = "free"
+
+def _flush_pin(self):
+""" Flush pin dict, remove the record of exited process """
+o = commands.getoutput("ps -eLf |awk '{print $4}'")
+for i in self.cpus:
+if self.dict[i] != "free" and self.dict[i] not in o:
+self.free_cpu(i)
+
+def pin_cpu(self, process):
+""" Pin one process to a single cpu """
+self._flush_pin()
+for i in self.cpus:
+if self.dict[i] == "free":
+self.dict[i] = str(process)
+cmd = "taskset -p %s %s" % (hex(2**int(i)), process)
+logging.debug("NumaNode (%s): " % i + cmd)
+commands.getoutput(cmd)
+return i
+
+def show(self):
+""" Display the record dict """
+for i in self.cpus:
+print "%s: %s" % (i, self.dict[i])

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 07/12] [PATCH] kvm-s390-ucontrol: interface to inject faults on a vcpu page table

2011-12-08 Thread Carsten Otte
This patch allows the user to fault in pages on a virtual cpus
address space for user controlled virtual machines. Typically this
is superfluous because userspace can just create a mapping and
let the kernel's page fault logic take are of it. There is one
exception: SIE won't start if the lowcore is not present. Normally
the kernel takes care of this [handle_validity() in
arch/s390/kvm/intercept.c] but since the kernel does not handle
intercepts for user controlled virtual machines, userspace needs to
be able to handle this condition.

Signed-off-by: Carsten Otte 
---
---
 Documentation/virtual/kvm/api.txt |   16 
 arch/s390/kvm/kvm-s390.c  |6 ++
 include/linux/kvm.h   |1 +
 3 files changed, 23 insertions(+)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1498,6 +1498,22 @@ This ioctl unmaps the memory in the vcpu
 "vcpu_addr" with the length "length". The field "user_addr" is ignored.
 All parameters need to be alligned by 1 megabyte.
 
+4.66 KVM_S390_VCPU_FAULT
+
+Capability: KVM_CAP_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: vcpu absolute address (in)
+Returns: 0 in case of success
+
+This call creates a page table entry on the virtual cpu's address space
+(for user controlled virtual machines) or the virtual machine's address
+space (for regular virtual machines). This only works for minor faults,
+thus it's recommended to access subject memory page via the user page
+table upfront. This is useful to handle validity intercepts for user
+controlled virtual machines to fault in the virtual cpu's lowcore pages
+prior to calling the KVM_RUN ioctl.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -767,6 +767,12 @@ long kvm_arch_vcpu_ioctl(struct file *fi
break;
}
 #endif
+   case KVM_S390_VCPU_FAULT: {
+   r = gmap_fault(arg, vcpu->arch.gmap);
+   if (!IS_ERR_VALUE(r))
+   r = 0;
+   break;
+   }
default:
r = -EINVAL;
}
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -673,6 +673,7 @@ struct kvm_s390_ucas_mapping {
 };
 #define KVM_S390_UCAS_MAP_IOW(KVMIO, 0x50, struct 
kvm_s390_ucas_mapping)
 #define KVM_S390_UCAS_UNMAP  _IOW(KVMIO, 0x51, struct 
kvm_s390_ucas_mapping)
+#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long)
 
 /* Device model IOC */
 #define KVM_CREATE_IRQCHIP_IO(KVMIO,   0x60)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 00/12] Ucontrol patchset V2

2011-12-08 Thread Carsten Otte
Hi Avi, Hi Marcelo,

I think I've integrated all feedback from last round. The race
between KVM_S390_ENABLE_UCONTROL and creation of vcpus has been
resolved by adding a parameter to KVM_CREATE_VM. The default
KVM_VM_REGULAR (==0) is backward compatible to KVM_CREATE_VM
without parameters, and KVM_VM_S390_UCONTROL enables user controlled
virtual machines for CECSIM. The extra ioctl is gone.
The page table walk in the storage key patch has moved to arch/s390/mm,
and I think this patch should go with the rest of this series via your
tree after Martin reviewed it.

so long,
Carsten

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 11/12] [PATCH] kvm-s390-ucontrol: announce capability for user controlled vms

2011-12-08 Thread Carsten Otte
This patch announces a new capability KVM_CAP_S390_UCONTROL that
indicates that kvm can now support virtual machines that are
controlled by userspace.

Signed-off-by: Carsten Otte 
---
---
 arch/s390/kvm/kvm-s390.c |3 +++
 include/linux/kvm.h  |1 +
 2 files changed, 4 insertions(+)

--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -226,6 +226,9 @@ int kvm_dev_ioctl_check_extension(long e
case KVM_CAP_S390_PSW:
case KVM_CAP_S390_GMAP:
case KVM_CAP_SYNC_MMU:
+#ifdef CONFIG_KVM_UCONTROL
+   case KVM_CAP_S390_UCONTROL:
+#endif
r = 1;
break;
default:
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -574,6 +574,7 @@ struct kvm_s390_keyop {
 #define KVM_CAP_MAX_VCPUS 66   /* returns max vcpus per vm */
 #define KVM_CAP_PPC_PAPR 68
 #define KVM_CAP_S390_GMAP 71
+#define KVM_CAP_S390_UCONTROL 72
 
 #ifdef KVM_CAP_IRQ_ROUTING
 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 02/12] [PATCH] kvm-s390-ucontrol: per vcpu address spaces

2011-12-08 Thread Carsten Otte
This patch introduces two ioctls for virtual cpus, that are only
valid for kernel virtual machines that are controlled by userspace.
Each virtual cpu has its individual address space in this mode of
operation, and each address space is backed by the gmap
implementation just like the address space for regular KVM guests.
KVM_S390_UCAS_MAP allows to map a part of the user's virtual address
space to the vcpu. Starting offset and length in both the user and
the vcpu address space need to be aligned to 1M.
KVM_S390_UCAS_UNMAP can be used to unmap a range of memory from a
virtual cpu in a similar way.

Signed-off-by: Carsten Otte 
---
---
 Documentation/virtual/kvm/api.txt |   38 
 arch/s390/kvm/kvm-s390.c  |   50 +-
 include/linux/kvm.h   |   10 +++
 3 files changed, 97 insertions(+), 1 deletion(-)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1455,6 +1455,44 @@ is supported; 2 if the processor require
 an RMA, or 1 if the processor can use an RMA but doesn't require it,
 because it supports the Virtual RMA (VRMA) facility.
 
+4.64 KVM_S390_UCAS_MAP
+
+Capability: KVM_CAP_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_ucas_mapping (in)
+Returns: 0 in case of success
+
+The parameter is defined like this:
+   struct kvm_s390_ucas_mapping {
+   __u64 user_addr;
+   __u64 vcpu_addr;
+   __u64 length;
+   };
+
+This ioctl maps the memory at "user_addr" with the length "length" to
+the vcpu's address space starting at "vcpu_addr". All parameters need to
+be alligned by 1 megabyte.
+
+4.65 KVM_S390_UCAS_UNMAP
+
+Capability: KVM_CAP_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_ucas_mapping (in)
+Returns: 0 in case of success
+
+The parameter is defined like this:
+   struct kvm_s390_ucas_mapping {
+   __u64 user_addr;
+   __u64 vcpu_addr;
+   __u64 length;
+   };
+
+This ioctl unmaps the memory in the vcpu's address space starting at
+"vcpu_addr" with the length "length". The field "user_addr" is ignored.
+All parameters need to be alligned by 1 megabyte.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -239,6 +239,10 @@ void kvm_arch_vcpu_destroy(struct kvm_vc
(__u64) vcpu->arch.sie_block)
vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
smp_mb();
+
+   if (kvm_is_ucontrol(vcpu->kvm))
+   gmap_free(vcpu->arch.gmap);
+
free_page((unsigned long)(vcpu->arch.sie_block));
kvm_vcpu_uninit(vcpu);
kfree(vcpu);
@@ -269,12 +273,20 @@ void kvm_arch_destroy_vm(struct kvm *kvm
kvm_free_vcpus(kvm);
free_page((unsigned long)(kvm->arch.sca));
debug_unregister(kvm->arch.dbf);
-   gmap_free(kvm->arch.gmap);
+   if (!kvm_is_ucontrol(kvm))
+   gmap_free(kvm->arch.gmap);
 }
 
 /* Section: vcpu related */
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
+   if (kvm_is_ucontrol(vcpu->kvm)) {
+   vcpu->arch.gmap = gmap_alloc(current->mm);
+   if (!vcpu->arch.gmap)
+   return -ENOMEM;
+   return 0;
+   }
+
vcpu->arch.gmap = vcpu->kvm->arch.gmap;
return 0;
 }
@@ -693,6 +705,42 @@ long kvm_arch_vcpu_ioctl(struct file *fi
case KVM_S390_INITIAL_RESET:
r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
break;
+#ifdef CONFIG_KVM_UCONTROL
+   case KVM_S390_UCAS_MAP: {
+   struct kvm_s390_ucas_mapping ucasmap;
+
+   if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
+   r = -EFAULT;
+   break;
+   }
+
+   if (!kvm_is_ucontrol(vcpu->kvm)) {
+   r = -EINVAL;
+   break;
+   }
+
+   r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
+ucasmap.vcpu_addr, ucasmap.length);
+   break;
+   }
+   case KVM_S390_UCAS_UNMAP: {
+   struct kvm_s390_ucas_mapping ucasmap;
+
+   if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
+   r = -EFAULT;
+   break;
+   }
+
+   if (!kvm_is_ucontrol(vcpu->kvm)) {
+   r = -EINVAL;
+   break;
+   }
+
+   r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
+   ucasmap.length);
+   break;
+   }
+#endif
default:
r = -EINVAL;
}
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -657,6 +657,16 @@ struct kvm_clock_data {
struct kvm_use

[patch 10/12] [PATCH] kvm-s390: storage key interface

2011-12-08 Thread Carsten Otte
This patch introduces an interface to access the guest visible
storage keys. It supports three operations that model the behavior
that SSKE/ISKE/RRBE instructions would have if they were issued by
the guest. These instructions are all documented in the z architecture
principles of operation book.

Signed-off-by: Carsten Otte 
---
---
 Documentation/virtual/kvm/api.txt |   38 ++
 arch/s390/include/asm/kvm_host.h  |4 +
 arch/s390/include/asm/pgtable.h   |1 
 arch/s390/kvm/kvm-s390.c  |  103 --
 arch/s390/mm/pgtable.c|   70 +++--
 include/linux/kvm.h   |7 ++
 6 files changed, 202 insertions(+), 21 deletions(-)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1514,6 +1514,44 @@ table upfront. This is useful to handle
 controlled virtual machines to fault in the virtual cpu's lowcore pages
 prior to calling the KVM_RUN ioctl.
 
+4.67 KVM_S390_KEYOP
+
+Capability: KVM_CAP_UCONTROL
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_keyop (in+out)
+Returns: 0 in case of success
+
+The parameter looks like this:
+   struct kvm_s390_keyop {
+   __u64 user_addr;
+   __u8  key;
+   __u8  operation;
+   };
+
+user_addr  contains the userspace address of a memory page
+keycontains the guest visible storage key as defined by the
+   z Architecture Principles of Operation book, including key
+   value for key controlled storage protection, the fetch
+   protection bit, and the reference and change indicator bits
+operation  indicates the key operation that should be performed
+
+The following operations are supported:
+KVM_S390_KEYOP_SSKE:
+   This operation behaves just like the set storage key extended (SSKE)
+   instruction would, if it were issued by the guest. The storage key
+   provided in "key" is placed in the guest visible storage key.
+KVM_S390_KEYOP_ISKE:
+   This operation behaves just like the insert storage key extended (ISKE)
+   instruction would, if it were issued by the guest. After this call,
+   the guest visible storage key is presented in the "key" field.
+KVM_S390_KEYOP_RRBE:
+   This operation behaves just like the reset referenced bit extended
+   (RRBE) instruction would, if it were issued by the guest. The guest
+   visible reference bit is cleared, and the value presented in the "key"
+   field after this call has the reference bit set to 1 in case the
+   guest view of the reference bit was 1 prior to this call.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -24,6 +24,10 @@
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
 
+#define KVM_S390_KEYOP_SSKE 0x01
+#define KVM_S390_KEYOP_ISKE 0x02
+#define KVM_S390_KEYOP_RRBE 0x03
+
 struct sca_entry {
atomic_t scn;
__u32   reserved;
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1254,6 +1254,7 @@ static inline pte_t mk_swap_pte(unsigned
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
+extern pte_t *ptep_for_addr(unsigned long addr);
 
 /*
  * No page table caches to initialise
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -112,13 +112,110 @@ void kvm_arch_exit(void)
 {
 }
 
+static long kvm_s390_keyop(struct kvm_s390_keyop *kop)
+{
+   unsigned long addr = kop->user_addr;
+   pte_t *ptep;
+   pgste_t pgste;
+   int r;
+   unsigned long skey;
+   unsigned long bits;
+
+   /* make sure this process is a hypervisor */
+   r = -EINVAL;
+   if (!mm_has_pgste(current->mm))
+   goto out;
+
+   r = -ENXIO;
+   if (addr >= PGDIR_SIZE)
+   goto out;
+
+   spin_lock(¤t->mm->page_table_lock);
+   ptep = ptep_for_addr(addr);
+   if (!ptep)
+   goto out_unlock;
+
+   pgste = pgste_get_lock(ptep);
+
+   switch (kop->operation) {
+   case KVM_S390_KEYOP_SSKE:
+   pgste = pgste_update_all(ptep, pgste);
+   /* set the real key back w/o rc bits */
+   skey = kop->key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+   if (pte_present(*ptep))
+   page_set_storage_key(pte_val(*ptep), skey, 1);
+   /* put acc+f plus guest refereced and changed into the pgste */
+   pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT | RCP_GR_BIT
+| RCP_GC_BIT);
+   bits = (kop->key & (_PAGE_ACC_BITS | _PAGE_FP_BIT));
+   pgste_val(pgste) |= bits << 56;
+   bits = (kop->key & (_PA

[patch 03/12] [PATCH] kvm-s390-ucontrol: export page faults to user

2011-12-08 Thread Carsten Otte
This patch introduces a new exit reason in the kvm_run structure
named KVM_EXIT_UCONTROL. This exit indicates, that a virtual cpu
has regognized a fault on the host page table. The idea is that
userspace can handle this fault by mapping memory at the fault
location into the cpu's address space and then continue to run the
virtual cpu.

Signed-off-by: Carsten Otte 
---
---
 Documentation/virtual/kvm/api.txt |   14 ++
 arch/s390/kvm/kvm-s390.c  |   32 +++-
 arch/s390/kvm/kvm-s390.h  |1 +
 include/linux/kvm.h   |6 ++
 4 files changed, 48 insertions(+), 5 deletions(-)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1653,6 +1653,20 @@ s390 specific.
 
 s390 specific.
 
+   /* KVM_EXIT_UCONTROL */
+   struct {
+   __u64 trans_exc_code;
+   __u32 pgm_code;
+   } s390_ucontrol;
+
+s390 specific. A page fault has occurred for a user controlled virtual
+machine (KVM_VM_S390_UNCONTROL) on it's host page table that cannot be
+resolved by the kernel.
+The program code and the translation exception code that were placed
+in the cpu's lowcore are presented here as defined by the z Architecture
+Principles of Operation Book in the Chapter for Dynamic Address Translation
+(DAT)
+
/* KVM_EXIT_DCR */
struct {
__u32 dcrn;
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -499,8 +499,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(stru
return -EINVAL; /* not implemented yet */
 }
 
-static void __vcpu_run(struct kvm_vcpu *vcpu)
+static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
+   int rc;
+
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
 
if (need_resched())
@@ -517,9 +519,15 @@ static void __vcpu_run(struct kvm_vcpu *
local_irq_enable();
VCPU_EVENT(vcpu, 6, "entering sie flags %x",
   atomic_read(&vcpu->arch.sie_block->cpuflags));
-   if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
-   VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
-   kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+   rc = sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
+   if (rc) {
+   if (kvm_is_ucontrol(vcpu->kvm)) {
+   rc = SIE_INTERCEPT_UCONTROL;
+   } else {
+   VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+   kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+   rc = 0;
+   }
}
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
   vcpu->arch.sie_block->icptcode);
@@ -528,6 +536,7 @@ static void __vcpu_run(struct kvm_vcpu *
local_irq_enable();
 
memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
+   return rc;
 }
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -548,6 +557,7 @@ rerun_vcpu:
case KVM_EXIT_UNKNOWN:
case KVM_EXIT_INTR:
case KVM_EXIT_S390_RESET:
+   case KVM_EXIT_UCONTROL:
break;
default:
BUG();
@@ -559,7 +569,9 @@ rerun_vcpu:
might_fault();
 
do {
-   __vcpu_run(vcpu);
+   rc = __vcpu_run(vcpu);
+   if (rc)
+   break;
rc = kvm_handle_sie_intercept(vcpu);
} while (!signal_pending(current) && !rc);
 
@@ -571,6 +583,16 @@ rerun_vcpu:
rc = -EINTR;
}
 
+#ifdef CONFIG_KVM_UCONTROL
+   if (rc == SIE_INTERCEPT_UCONTROL) {
+   kvm_run->exit_reason = KVM_EXIT_UCONTROL;
+   kvm_run->s390_ucontrol.trans_exc_code =
+   current->thread.gmap_addr;
+   kvm_run->s390_ucontrol.pgm_code = 0x10;
+   rc = 0;
+   }
+#endif
+
if (rc == -EOPNOTSUPP) {
/* intercept cannot be handled in-kernel, prepare kvm-run */
kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -26,6 +26,7 @@ typedef int (*intercept_handler_t)(struc
 
 /* negativ values are error codes, positive values for internal conditions */
 #define SIE_INTERCEPT_RERUNVCPU(1<<0)
+#define SIE_INTERCEPT_UCONTROL (1<<1)
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
 
 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -162,6 +162,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_INTERNAL_ERROR   17
 #define KVM_EXIT_OSI  18
 #define KVM_EXIT_PAPR_HCALL  19
+#define KVM_EXIT_UCONTROL20
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
@@ -249,6 +250,11 @@ struct kvm_run {
 #define KVM_S390_RESE

[patch 08/12] [PATCH] kvm-s390-ucontrol: disable sca

2011-12-08 Thread Carsten Otte
This patch makes sure user controlled virtual machines do not use a
system control area (sca). This is needed in order to create
virtual machines with more cpus than the size of the sca [64].

Signed-off-by: Carsten Otte 
---
Index: linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c
===
--- linux-2.5-cecsim.orig/arch/s390/kvm/kvm-s390.c
+++ linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c
@@ -234,10 +234,13 @@ out_err:
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
-   clear_bit(63 - vcpu->vcpu_id, (unsigned long *) 
&vcpu->kvm->arch.sca->mcn);
-   if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
-   (__u64) vcpu->arch.sie_block)
-   vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
+   if (!kvm_is_ucontrol(vcpu->kvm)) {
+   clear_bit(63 - vcpu->vcpu_id,
+ (unsigned long *) &vcpu->kvm->arch.sca->mcn);
+   if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
+   (__u64) vcpu->arch.sie_block)
+   vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
+   }
smp_mb();
 
if (kvm_is_ucontrol(vcpu->kvm))
@@ -374,12 +377,19 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
goto out_free_cpu;
 
vcpu->arch.sie_block->icpua = id;
-   BUG_ON(!kvm->arch.sca);
-   if (!kvm->arch.sca->cpu[id].sda)
-   kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
-   vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
-   vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
-   set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
+   if (!kvm_is_ucontrol(kvm)) {
+   if (!kvm->arch.sca) {
+   WARN_ON_ONCE(1);
+   goto out_free_cpu;
+   }
+   if (!kvm->arch.sca->cpu[id].sda)
+   kvm->arch.sca->cpu[id].sda =
+   (__u64) vcpu->arch.sie_block;
+   vcpu->arch.sie_block->scaoh =
+   (__u32)(((__u64)kvm->arch.sca) >> 32);
+   vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
+   set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
+   }
 
spin_lock_init(&vcpu->arch.local_int.lock);
INIT_LIST_HEAD(&vcpu->arch.local_int.list);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 01/12] [PATCH] kvm-s390: ioctl to switch to user controlled virtual machines

2011-12-08 Thread Carsten Otte
This patch introduces a new config option for user controlled kernel
virtual machines. It introduces an optional parameter to
KVM_CREATE_VM in order to create a user controlled virtual machine.
The parameter is passed to kvm_arch_init_vm for all architectures.
Valid values for the new parameter are KVM_VM_REGULAR (defined to 0
for backward compatibility to old KVM_CREATE_VM) and
KVM_VM_S390_UCONTROL for s390 only.
Note that the user controlled virtual machines require CAP_SYS_ADMIN
privileges.

Signed-off-by: Carsten Otte 
---
---
 Documentation/virtual/kvm/api.txt |7 ++-
 arch/ia64/kvm/kvm-ia64.c  |5 -
 arch/powerpc/kvm/powerpc.c|5 -
 arch/s390/kvm/Kconfig |9 +
 arch/s390/kvm/kvm-s390.c  |   30 +-
 arch/s390/kvm/kvm-s390.h  |   10 ++
 arch/x86/kvm/x86.c|5 -
 include/linux/kvm.h   |3 +++
 include/linux/kvm_host.h  |2 +-
 virt/kvm/kvm_main.c   |   19 +--
 10 files changed, 79 insertions(+), 16 deletions(-)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -95,7 +95,7 @@ described as 'basic' will be available.
 Capability: basic
 Architectures: all
 Type: system ioctl
-Parameters: none
+Parameters: machine type identifier (KVM_VM_*)
 Returns: a VM fd that can be used to control the new virtual machine.
 
 The new VM has no virtual cpus and no memory.  An mmap() of a VM fd
@@ -103,6 +103,11 @@ will access the virtual machine's physic
 corresponds to guest physical address zero.  Use of mmap() on a VM fd
 is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is
 available.
+You most certainly want to use KVM_VM_REGULAR as machine type.
+
+In order to create user controlled virtual machines on S390, check
+KVM_CAP_S390_UCONTROL and use KVM_VM_S390_UCONTROL as machine type as
+privileged user (CAP_SYS_ADMIN).
 
 4.3 KVM_GET_MSR_INDEX_LIST
 
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -809,10 +809,13 @@ static void kvm_build_io_pmt(struct kvm
 #define GUEST_PHYSICAL_RR4 0x2739
 #define VMM_INIT_RR0x1660
 
-int kvm_arch_init_vm(struct kvm *kvm)
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
BUG_ON(!kvm);
 
+   if (type != KVM_VM_REGULAR)
+   return -EINVAL;
+
kvm->arch.is_sn2 = ia64_platform_is("sn2");
 
kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -171,8 +171,11 @@ void kvm_arch_check_processor_compat(voi
*(int *)rtn = kvmppc_core_check_processor_compat();
 }
 
-int kvm_arch_init_vm(struct kvm *kvm)
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
+   if (type != KVM_VM_REGULAR)
+   return -EINVAL;
+
return kvmppc_core_init_vm(kvm);
 }
 
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -34,6 +34,15 @@ config KVM
 
  If unsure, say N.
 
+config KVM_UCONTROL
+   bool "Userspace controlled virtual machines"
+   depends on KVM
+   ---help---
+ Allow CAP_SYS_ADMIN users to create KVM virtual machines that are
+ controlled by userspace.
+
+ If unsure, say N.
+
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source drivers/vhost/Kconfig
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -171,11 +171,28 @@ long kvm_arch_vm_ioctl(struct file *filp
return r;
 }
 
-int kvm_arch_init_vm(struct kvm *kvm)
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
int rc;
char debug_name[16];
 
+   rc = -EINVAL;
+#ifdef CONFIG_KVM_UCONTROL
+   switch (type) {
+   case KVM_VM_REGULAR:
+   break;
+   case KVM_VM_S390_UCONTROL:
+   if (!capable(CAP_SYS_ADMIN))
+   goto out_err;
+   break;
+   default:
+   goto out_err;
+   }
+#else
+   if (type != KVM_VM_REGULAR)
+   goto out_err;
+#endif
+
rc = s390_enable_sie();
if (rc)
goto out_err;
@@ -198,10 +215,13 @@ int kvm_arch_init_vm(struct kvm *kvm)
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "%s", "vm created");
 
-   kvm->arch.gmap = gmap_alloc(current->mm);
-   if (!kvm->arch.gmap)
-   goto out_nogmap;
-
+   if (type == KVM_VM_REGULAR) {
+   kvm->arch.gmap = gmap_alloc(current->mm);
+   if (!kvm->arch.gmap)
+   goto out_nogmap;
+   } else {
+   kvm->arch.gmap = NULL;
+   }
return 0;
 out_nogmap:
debug_unregister(kvm->arch.dbf);
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -47,6 +47,16 @@ static inline int __cpu_is_stopped(struc
return atomic_read(&vcpu->arch.sie_block->cpuf

[patch 09/12] [PATCH] kvm-s390: fix assumption for KVM_MAX_VCPUS

2011-12-08 Thread Carsten Otte
This patch fixes definition of the idle_mask and the local_int array
in kvm_s390_float_interrupt. Previous definition had 64 cpus max
hardcoded instead of using KVM_MAX_VCPUS.

Signed-off-by: Carsten Otte 
---
Index: linux-2.5-cecsim/arch/s390/include/asm/kvm_host.h
===
--- linux-2.5-cecsim.orig/arch/s390/include/asm/kvm_host.h
+++ linux-2.5-cecsim/arch/s390/include/asm/kvm_host.h
@@ -220,8 +220,9 @@ struct kvm_s390_float_interrupt {
struct list_head list;
atomic_t active;
int next_rr_cpu;
-   unsigned long idle_mask [(64 + sizeof(long) - 1) / sizeof(long)];
-   struct kvm_s390_local_interrupt *local_int[64];
+   unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1)
+   / sizeof(long)];
+   struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
 };
 
 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 06/12] [PATCH] kvm-s390-ucontrol: disable in-kernel irq stack

2011-12-08 Thread Carsten Otte
This patch disables the in-kernel interrupt stack for KVM virtual
machines that are controlled by user. Userspace has to take care
of handling interrupts on its own.

Signed-off-by: Carsten Otte 
---
Index: linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c
===
--- linux-2.5-cecsim.orig/arch/s390/kvm/kvm-s390.c
+++ linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c
@@ -511,7 +511,8 @@ static int __vcpu_run(struct kvm_vcpu *v
if (test_thread_flag(TIF_MCCK_PENDING))
s390_handle_mcck();
 
-   kvm_s390_deliver_pending_interrupts(vcpu);
+   if (!kvm_is_ucontrol(vcpu->kvm))
+   kvm_s390_deliver_pending_interrupts(vcpu);
 
vcpu->arch.sie_block->icptcode = 0;
local_irq_disable();

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 04/12] [PATCH] kvm-s390-ucontrol: export SIE control block to user

2011-12-08 Thread Carsten Otte
This patch exports the s390 SIE hardware control block to userspace
via the mapping of the vcpu file descriptor. In order to do so,
a new arch callback named kvm_arch_vcpu_fault  is introduced for all
architectures. It allows to map architecture specific pages.

Signed-off-by: Carsten Otte 
---
---
 Documentation/virtual/kvm/api.txt |5 +
 arch/ia64/kvm/kvm-ia64.c  |5 +
 arch/powerpc/kvm/powerpc.c|5 +
 arch/s390/kvm/kvm-s390.c  |   13 +
 arch/x86/kvm/x86.c|5 +
 include/linux/kvm.h   |1 +
 include/linux/kvm_host.h  |1 +
 virt/kvm/kvm_main.c   |2 +-
 8 files changed, 36 insertions(+), 1 deletion(-)

--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -218,6 +218,11 @@ allocation of vcpu ids.  For example, if
 single-threaded guest vcpus, it should make all vcpu ids be a multiple
 of the number of vcpus per vcore.
 
+For virtual cpus that have been created with S390 user controlled virtual
+machines, the resulting vcpu fd can be memory mapped at page offset
+KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
+cpu's hardware control block.
+
 4.8 KVM_GET_DIRTY_LOG (vm ioctl)
 
 Capability: basic
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1566,6 +1566,11 @@ out:
return r;
 }
 
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+   return VM_FAULT_SIGBUS;
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -659,6 +659,11 @@ out:
return r;
 }
 
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+   return VM_FAULT_SIGBUS;
+}
+
 static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
 {
u32 inst_lis = 0x3c00;
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -769,6 +769,19 @@ long kvm_arch_vcpu_ioctl(struct file *fi
return r;
 }
 
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+#ifdef CONFIG_KVM_UCONTROL
+   if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
+&& (kvm_is_ucontrol(vcpu->kvm))) {
+   vmf->page = virt_to_page(vcpu->arch.sie_block);
+   get_page(vmf->page);
+   return 0;
+   }
+#endif
+   return VM_FAULT_SIGBUS;
+}
+
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
   struct kvm_memory_slot *memslot,
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2790,6 +2790,11 @@ out:
return r;
 }
 
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+   return VM_FAULT_SIGBUS;
+}
+
 static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
 {
int ret;
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -439,6 +439,7 @@ struct kvm_ppc_pvinfo {
 
 #define KVM_VM_REGULAR   0
 #define KVM_VM_S390_UCONTROL 1
+#define KVM_S390_SIE_PAGE_OFFSET 1
 
 /*
  * ioctls for /dev/kvm fds:
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -449,6 +449,7 @@ long kvm_arch_dev_ioctl(struct file *fil
unsigned int ioctl, unsigned long arg);
 long kvm_arch_vcpu_ioctl(struct file *filp,
 unsigned int ioctl, unsigned long arg);
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
 
 int kvm_dev_ioctl_check_extension(long ext);
 
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1657,7 +1657,7 @@ static int kvm_vcpu_fault(struct vm_area
page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
 #endif
else
-   return VM_FAULT_SIGBUS;
+   return kvm_arch_vcpu_fault(vcpu, vmf);
get_page(page);
vmf->page = page;
return 0;

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 05/12] [PATCH] kvm-s390-ucontrol: disable in-kernel handling of SIE intercepts

2011-12-08 Thread Carsten Otte
This patch disables in-kernel handling of SIE intercepts for user
controlled virtual machines. All intercepts are passed to userspace
via KVM_EXIT_SIE exit reason just like SIE intercepts that cannot be
handled in-kernel for regular KVM guests.

Signed-off-by: Carsten Otte 
---
Index: linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c
===
--- linux-2.5-cecsim.orig/arch/s390/kvm/kvm-s390.c
+++ linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c
@@ -572,7 +572,10 @@ rerun_vcpu:
rc = __vcpu_run(vcpu);
if (rc)
break;
-   rc = kvm_handle_sie_intercept(vcpu);
+   if (kvm_is_ucontrol(vcpu->kvm))
+   rc = -EOPNOTSUPP;
+   else
+   rc = kvm_handle_sie_intercept(vcpu);
} while (!signal_pending(current) && !rc);
 
if (rc == SIE_INTERCEPT_RERUNVCPU)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 12/12] [PATCH] kvm-s390: Fix return code for unknown ioctl numbers

2011-12-08 Thread Carsten Otte
This patch fixes the return code of kvm_arch_vcpu_ioctl in case
of an unkown ioctl number.

Signed-off-by: Carsten Otte 
---
---
 arch/s390/kvm/kvm-s390.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -884,7 +884,7 @@ long kvm_arch_vcpu_ioctl(struct file *fi
break;
}
default:
-   r = -EINVAL;
+   r = -ENOTTY;
}
return r;
 }

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 01/12] [PATCH] kvm-s390: ioctl to switch to user controlled virtual machines

2011-12-08 Thread Sasha Levin
On Thu, 2011-12-08 at 10:12 +0100, Carsten Otte wrote:
> plain text document attachment (enable-ucontrol.patch)
> This patch introduces a new config option for user controlled kernel
> virtual machines. It introduces an optional parameter to
> KVM_CREATE_VM in order to create a user controlled virtual machine.
> The parameter is passed to kvm_arch_init_vm for all architectures.
> Valid values for the new parameter are KVM_VM_REGULAR (defined to 0
> for backward compatibility to old KVM_CREATE_VM) and
> KVM_VM_S390_UCONTROL for s390 only.

Why is it s390 specific? why isn't it KVM_VM_UCONTROL which is currently
only implemented on s390?

-- 

Sasha.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 01/12] [PATCH] kvm-s390: ioctl to switch to user controlled virtual machines

2011-12-08 Thread Carsten Otte

On 08.12.2011 10:25, Sasha Levin wrote:

Why is it s390 specific? why isn't it KVM_VM_UCONTROL which is currently
only implemented on s390?
Good point. Maybe the subject line for the patch should be fixed along 
with that ;-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC V3 2/4] kvm hypervisor : Add a hypercall to KVM hypervisor to support pv-ticketlocks

2011-12-08 Thread Avi Kivity
On 12/07/2011 06:46 PM, Raghavendra K T wrote:
> On 12/07/2011 08:22 PM, Avi Kivity wrote:
>> On 12/07/2011 03:39 PM, Marcelo Tosatti wrote:
 Also I think we can keep the kicked flag in vcpu->requests, no need
 for
 new storage.
>>>
>>> Was going to suggest it but it violates the currently organized
>>> processing of entries at the beginning of vcpu_enter_guest.
>>>
>>> That is, this "kicked" flag is different enough from vcpu->requests
>>> processing that a separate variable seems worthwhile (even more
>>> different with convertion to MP_STATE at KVM_GET_MP_STATE).
>>
>> IMO, it's similar to KVM_REQ_EVENT (which can also cause mpstate to
>> change due to apic re-evaluation).
>>
>
> Ok, So what I understand is we have to either :
> 1. retain current kick flag AS-IS but would have to make it migration
> friendly. [I still have to get more familiar with migration side]
> or
> 2. introduce notion similar to KVM_REQ_PVLOCK_KICK(??) to be part of
> vcpu->requests.
>
> So what would be better? Please let me know.
>

IMO, KVM_REQ.

-- 
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1 V6] qemu-kvm: fix improper nmi emulation

2011-12-08 Thread Jan Kiszka
On 2011-12-07 11:29, Avi Kivity wrote:
> On 10/17/2011 06:00 PM, Lai Jiangshan wrote:
>> From: Lai Jiangshan 
>>
>> Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
>> button event happens. This doesn't properly emulate real hardware on
>> which NMI button event triggers LINT1. Because of this, NMI is sent to
>> the processor even when LINT1 is maskied in LVT. For example, this
>> causes the problem that kdump initiated by NMI sometimes doesn't work
>> on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.
>>
>> With this patch, inject-nmi request is handled as follows.
>>
>> - When in-kernel irqchip is disabled, deliver LINT1 instead of NMI
>>   interrupt.
>> - When in-kernel irqchip is enabled, get the in-kernel LAPIC states
>>   and test the APIC_LVT_MASKED, if LINT1 is unmasked, and then
>>   delivering the NMI directly. (Suggested by Jan Kiszka)
>>
>> Changed from old version:
>>   re-implement it by the Jan's suggestion.
>>   fix the race found by Jan.
> 
> This patch fell through the cracks, sorry.  Now applied.

Lai, what is the state of a corresponding QEMU upstream patch? I'd like
to build on top of it for my upstream irqchip series.

Thanks,
Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 01/12] [PATCH] kvm-s390: ioctl to switch to user controlled virtual machines

2011-12-08 Thread Avi Kivity
On 12/08/2011 11:25 AM, Sasha Levin wrote:
> On Thu, 2011-12-08 at 10:12 +0100, Carsten Otte wrote:
> > plain text document attachment (enable-ucontrol.patch)
> > This patch introduces a new config option for user controlled kernel
> > virtual machines. It introduces an optional parameter to
> > KVM_CREATE_VM in order to create a user controlled virtual machine.
> > The parameter is passed to kvm_arch_init_vm for all architectures.
> > Valid values for the new parameter are KVM_VM_REGULAR (defined to 0
> > for backward compatibility to old KVM_CREATE_VM) and
> > KVM_VM_S390_UCONTROL for s390 only.
>
> Why is it s390 specific? why isn't it KVM_VM_UCONTROL which is currently
> only implemented on s390?

It's not possible (or at least very difficult) to implement ucontrol on
x86.  For example, to update VMCSs you need privileged instructions.  It
might be doable on svm, but there's no point, really.

-- 
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 01/12] [PATCH] kvm-s390: ioctl to switch to user controlled virtual machines

2011-12-08 Thread Sasha Levin
On Thu, 2011-12-08 at 11:45 +0200, Avi Kivity wrote:
> On 12/08/2011 11:25 AM, Sasha Levin wrote:
> > On Thu, 2011-12-08 at 10:12 +0100, Carsten Otte wrote:
> > > plain text document attachment (enable-ucontrol.patch)
> > > This patch introduces a new config option for user controlled kernel
> > > virtual machines. It introduces an optional parameter to
> > > KVM_CREATE_VM in order to create a user controlled virtual machine.
> > > The parameter is passed to kvm_arch_init_vm for all architectures.
> > > Valid values for the new parameter are KVM_VM_REGULAR (defined to 0
> > > for backward compatibility to old KVM_CREATE_VM) and
> > > KVM_VM_S390_UCONTROL for s390 only.
> >
> > Why is it s390 specific? why isn't it KVM_VM_UCONTROL which is currently
> > only implemented on s390?
> 
> It's not possible (or at least very difficult) to implement ucontrol on
> x86.  For example, to update VMCSs you need privileged instructions.  It
> might be doable on svm, but there's no point, really.

Might not work for x86, but maybe on arm? ppc? or some other random arch
that will be added in the future?

No point in limiting it to s390 from day one.

It also makes code a bit cleaner (kvm_main.c shouldn't have arch names
in the code).

-- 

Sasha.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 01/12] [PATCH] kvm-s390: ioctl to switch to user controlled virtual machines

2011-12-08 Thread Avi Kivity
On 12/08/2011 11:53 AM, Sasha Levin wrote:
> On Thu, 2011-12-08 at 11:45 +0200, Avi Kivity wrote:
> > On 12/08/2011 11:25 AM, Sasha Levin wrote:
> > > On Thu, 2011-12-08 at 10:12 +0100, Carsten Otte wrote:
> > > > plain text document attachment (enable-ucontrol.patch)
> > > > This patch introduces a new config option for user controlled kernel
> > > > virtual machines. It introduces an optional parameter to
> > > > KVM_CREATE_VM in order to create a user controlled virtual machine.
> > > > The parameter is passed to kvm_arch_init_vm for all architectures.
> > > > Valid values for the new parameter are KVM_VM_REGULAR (defined to 0
> > > > for backward compatibility to old KVM_CREATE_VM) and
> > > > KVM_VM_S390_UCONTROL for s390 only.
> > >
> > > Why is it s390 specific? why isn't it KVM_VM_UCONTROL which is currently
> > > only implemented on s390?
> > 
> > It's not possible (or at least very difficult) to implement ucontrol on
> > x86.  For example, to update VMCSs you need privileged instructions.  It
> > might be doable on svm, but there's no point, really.
>
> Might not work for x86, but maybe on arm? ppc? or some other random arch
> that will be added in the future?
>
> No point in limiting it to s390 from day one.

Agree.

> It also makes code a bit cleaner (kvm_main.c shouldn't have arch names
> in the code).

That doesn't bother me.

-- 
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [net-next RFC PATCH 0/5] Series short description

2011-12-08 Thread Jason Wang

On 12/08/2011 01:02 AM, Ben Hutchings wrote:

On Wed, 2011-12-07 at 19:31 +0800, Jason Wang wrote:

On 12/07/2011 03:30 PM, Rusty Russell wrote:

On Mon, 05 Dec 2011 16:58:37 +0800, Jason Wang   wrote:

multiple queue virtio-net: flow steering through host/guest cooperation

Hello all:

This is a rough series adds the guest/host cooperation of flow
steering support based on Krish Kumar's multiple queue virtio-net
driver patch 3/3 (http://lwn.net/Articles/467283/).

Is there a real (physical) device which does this kind of thing?  How do
they do it?  Can we copy them?

Cheers,
Rusty.

As far as I see, ixgbe and sfc have similar but much more sophisticated
mechanism.

The idea was originally suggested by Ben and it was just borrowed form
those real physical nic cards who can dispatch packets based on their
hash. All of theses cards can filter the flow based on the hash of
L2/L3/L4 header and the stack would tell the card which queue should
this flow goes.

Solarflare controllers (sfc driver) have 8192 perfect filters for
TCP/IPv4 and UDP/IPv4 which can be used for flow steering.  (The filters
are organised as a hash table, but matched based on 5-tuples.)  I
implemented the 'accelerated RFS' interface in this driver.

I believe the Intel 82599 controllers (ixgbe driver) have both
hash-based and perfect filter modes and the driver can be configured to
use one or the other.  The driver has its own independent mechanism for
steering RX and TX flows which predates RFS; I don't know whether it
uses hash-based or perfect filters.


As far as I see, their driver predates RFS by binding the TX queue and 
RX queue to the same CPU and adding hash based filter during packet 
transmission.



Most multi-queue controllers could support a kind of hash-based
filtering for TCP/IP by adjusting the RSS indirection table.  However,
this table is usually quite small (64-256 entries).  This means that
hash collisions will be quite common and this can result in reordering.
The same applies to the small table Jason has proposed for virtio-net.



Thanks for the clarification. Consider the hash were provided by host 
nic or host kernel, the collision rate is not fixed. Perfect filter is 
more suitable then.

So in host, a simple hash to queue table were introduced in tap/macvtap
and in guest, the guest driver would tell the desired queue of a flow
through changing this table.

I don't think accelerated RFS can work well without the use of perfect
filtering or hash-based filtering with a very low rate of collisions.

Ben.



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 01/12] [PATCH] kvm-s390: ioctl to switch to user controlled virtual machines

2011-12-08 Thread Alexander Graf

On 08.12.2011, at 10:59, Avi Kivity  wrote:

> On 12/08/2011 11:53 AM, Sasha Levin wrote:
>> On Thu, 2011-12-08 at 11:45 +0200, Avi Kivity wrote:
>>> On 12/08/2011 11:25 AM, Sasha Levin wrote:
 On Thu, 2011-12-08 at 10:12 +0100, Carsten Otte wrote:
> plain text document attachment (enable-ucontrol.patch)
> This patch introduces a new config option for user controlled kernel
> virtual machines. It introduces an optional parameter to
> KVM_CREATE_VM in order to create a user controlled virtual machine.
> The parameter is passed to kvm_arch_init_vm for all architectures.
> Valid values for the new parameter are KVM_VM_REGULAR (defined to 0
> for backward compatibility to old KVM_CREATE_VM) and
> KVM_VM_S390_UCONTROL for s390 only.
 
 Why is it s390 specific? why isn't it KVM_VM_UCONTROL which is currently
 only implemented on s390?
>>> 
>>> It's not possible (or at least very difficult) to implement ucontrol on
>>> x86.  For example, to update VMCSs you need privileged instructions.  It
>>> might be doable on svm, but there's no point, really.
>> 
>> Might not work for x86, but maybe on arm? ppc? or some other random arch
>> that will be added in the future?
>> 
>> No point in limiting it to s390 from day one.
> 
> Agree.

I don't think I would want to see full exposure of the vm control block to user 
space on any architecture really.

If the 390 folks like to shoot themselves in the security foot, I'm ok with 
that, but the whole idea of kvm is to abstract these hw details. By giving user 
space direct access to the vm control block, you essentially give user space a 
mkcpl(0) ioctl.

The vm control block in memory is also pretty specific to s390. The only other 
thing that comes close, where mmap'ing something actually gives you control 
over the full vm description is SVM's VMCB. All other archs (not 100% sure on 
arm) need to modify registers from cpl0 code.

So overall I dislike the idea of exposing the SIE block to user space. Imagine 
a system with containers on it where container vms should still be able to run 
kvm vms. From a security pov, this would break it, as you essentially give the 
user full access over the host. Unless CAP_ADMIN is not set in such a scenario 
of course. Then it's only as bad as /dev/mem which any random trojan or virus 
could use to inject itself into the kernel.

If you really have to do this, please

 1) make it s390 only. I don't even want to have to see this uglyness in other 
archs
 2) make it a config option, so sane people can disable it. (which you already 
do, good)


Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1 V6] qemu-kvm: fix improper nmi emulation

2011-12-08 Thread Jan Kiszka
On 2011-12-08 10:42, Jan Kiszka wrote:
> On 2011-12-07 11:29, Avi Kivity wrote:
>> On 10/17/2011 06:00 PM, Lai Jiangshan wrote:
>>> From: Lai Jiangshan 
>>>
>>> Currently, NMI interrupt is blindly sent to all the vCPUs when NMI
>>> button event happens. This doesn't properly emulate real hardware on
>>> which NMI button event triggers LINT1. Because of this, NMI is sent to
>>> the processor even when LINT1 is maskied in LVT. For example, this
>>> causes the problem that kdump initiated by NMI sometimes doesn't work
>>> on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.
>>>
>>> With this patch, inject-nmi request is handled as follows.
>>>
>>> - When in-kernel irqchip is disabled, deliver LINT1 instead of NMI
>>>   interrupt.
>>> - When in-kernel irqchip is enabled, get the in-kernel LAPIC states
>>>   and test the APIC_LVT_MASKED, if LINT1 is unmasked, and then
>>>   delivering the NMI directly. (Suggested by Jan Kiszka)
>>>
>>> Changed from old version:
>>>   re-implement it by the Jan's suggestion.
>>>   fix the race found by Jan.
>>
>> This patch fell through the cracks, sorry.  Now applied.
> 
> Lai, what is the state of a corresponding QEMU upstream patch? I'd like
> to build on top of it for my upstream irqchip series.

Never mind, I'll include a patch in my series as it requires some
tweaking to the APIC backend concept.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/11] RFC: PCI using capabilitities

2011-12-08 Thread Rusty Russell
Here's the patch series I ended up with.  I haven't coded up the QEMU
side yet, so no idea if the new driver works.

Questions:
(1) Do we win from separating ISR, NOTIFY and COMMON?
(2) I used a "u8 bar"; should I use a bir and pack it instead?  BIR
seems a little obscure (noone else in the kernel source seems to
refer to it).

Cheers,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] virtio-ring: Use threshold for switching to indirect descriptors

2011-12-08 Thread Rusty Russell
On Wed, 7 Dec 2011 17:48:17 +0200, "Michael S. Tsirkin"  wrote:
> On Wed, Dec 07, 2011 at 04:02:45PM +0200, Sasha Levin wrote:
> > On Sun, 2011-12-04 at 20:23 +0200, Sasha Levin wrote:
> > 
> > [snip]
> > 
> > Rusty, Michael, does the below looks a reasonable optimization for you?
> 
> OK overall but a bit hard to say for sure as it looks pretty incomplete ...

A static threshold is very hackish; we need to either initialize it to
a proven-good value (since noone will ever change it) or be cleverer.

Thanks,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/11] RFC: PCI using capabilitities

2011-12-08 Thread Sasha Levin
Rusty, I can't find the actual patches, could you verify that they were
indeed sent?

On Thu, 2011-12-08 at 20:52 +1030, Rusty Russell wrote:
> Here's the patch series I ended up with.  I haven't coded up the QEMU
> side yet, so no idea if the new driver works.
> 
> Questions:
> (1) Do we win from separating ISR, NOTIFY and COMMON?

By separating ISR, NOTIFY and COMMON we can place ISR and NOTIFY in PIO
and COMMON in MMIO. This gives us the benefit of having the small data
path use fast PIO, while big config path can use MMIO.

> (2) I used a "u8 bar"; should I use a bir and pack it instead?  BIR
> seems a little obscure (noone else in the kernel source seems to
> refer to it).

BIR is a concept from the PCI spec, but it was only used for MSI-X. I
don't expect to see it all around the kernel source.

-- 

Sasha.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] virtio-ring: Use threshold for switching to indirect descriptors

2011-12-08 Thread Sasha Levin
On Thu, 2011-12-08 at 20:14 +1030, Rusty Russell wrote:
> On Wed, 7 Dec 2011 17:48:17 +0200, "Michael S. Tsirkin"  
> wrote:
> > On Wed, Dec 07, 2011 at 04:02:45PM +0200, Sasha Levin wrote:
> > > On Sun, 2011-12-04 at 20:23 +0200, Sasha Levin wrote:
> > > 
> > > [snip]
> > > 
> > > Rusty, Michael, does the below looks a reasonable optimization for you?
> > 
> > OK overall but a bit hard to say for sure as it looks pretty incomplete ...
> 
> A static threshold is very hackish; we need to either initialize it to
> a proven-good value (since noone will ever change it) or be cleverer.

I'll better wait to see how the threshold issue is resolved, and
possibly do it as a dynamic value which depends on the threshold.

I doubt theres one magic value which would work for all.

-- 

Sasha.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC 1/11] virtio: use u32, not bitmap for struct virtio_device's features

2011-12-08 Thread Rusty Russell
It seemed like a good idea, but it's actually a pain when we get more
than 32 feature bits.  Just change it to a u32 for now.
---
 drivers/char/virtio_console.c  |2 +-
 drivers/lguest/lguest_device.c |2 +-
 drivers/s390/kvm/kvm_virtio.c  |2 +-
 drivers/virtio/virtio.c|   10 +-
 drivers/virtio/virtio_mmio.c   |8 ++--
 drivers/virtio/virtio_pci.c|3 +--
 drivers/virtio/virtio_ring.c   |2 +-
 include/linux/virtio.h |3 +--
 include/linux/virtio_config.h  |2 +-
 tools/virtio/linux/virtio.h|   18 ++
 tools/virtio/virtio_test.c |5 ++---
 11 files changed, 18 insertions(+), 39 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -331,7 +331,7 @@ static inline bool use_multiport(struct 
 */
if (!portdev->vdev)
return 0;
-   return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT);
+   return portdev->vdev->features & (1 << VIRTIO_CONSOLE_F_MULTIPORT);
 }
 
 static void free_buf(struct port_buffer *buf)
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -144,7 +144,7 @@ static void lg_finalize_features(struct 
memset(out_features, 0, desc->feature_len);
bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
for (i = 0; i < bits; i++) {
-   if (test_bit(i, vdev->features))
+   if (vdev->features & (1 << i))
out_features[i / 8] |= (1 << (i % 8));
}
 
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -105,7 +105,7 @@ static void kvm_finalize_features(struct
memset(out_features, 0, desc->feature_len);
bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
for (i = 0; i < bits; i++) {
-   if (test_bit(i, vdev->features))
+   if (vdev->features & (1 << i))
out_features[i / 8] |= (1 << (i % 8));
}
 }
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -41,9 +41,9 @@ static ssize_t features_show(struct devi
 
/* We actually represent this as a bitstring, as it could be
 * arbitrary length in future. */
-   for (i = 0; i < ARRAY_SIZE(dev->features)*BITS_PER_LONG; i++)
+   for (i = 0; i < sizeof(dev->features)*8; i++)
len += sprintf(buf+len, "%c",
-  test_bit(i, dev->features) ? '1' : '0');
+  dev->features & (1ULL << i) ? '1' : '0');
len += sprintf(buf+len, "\n");
return len;
 }
@@ -122,18 +122,18 @@ static int virtio_dev_probe(struct devic
device_features = dev->config->get_features(dev);
 
/* Features supported by both device and driver into dev->features. */
-   memset(dev->features, 0, sizeof(dev->features));
+   dev->features = 0;
for (i = 0; i < drv->feature_table_size; i++) {
unsigned int f = drv->feature_table[i];
BUG_ON(f >= 32);
if (device_features & (1 << f))
-   set_bit(f, dev->features);
+   dev->features |= (1 << f);
}
 
/* Transport features always preserved to pass to finalize_features. */
for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
if (device_features & (1 << i))
-   set_bit(i, dev->features);
+   dev->features |= (1 << i);
 
dev->config->finalize_features(dev);
 
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -112,16 +112,12 @@ static u32 vm_get_features(struct virtio
 static void vm_finalize_features(struct virtio_device *vdev)
 {
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
-   int i;
 
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
 
-   for (i = 0; i < ARRAY_SIZE(vdev->features); i++) {
-   writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
-   writel(vdev->features[i],
-   vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
-   }
+   writel(0, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
+   writel(vdev->features, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
 }
 
 static void vm_get(struct virtio_device *vdev, unsigned offset,
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -121,8 +121,7 @@ static void vp_finalize_

Re: [PATCH 0/5 V5] Avoid soft lockup message when KVM is stopped by host

2011-12-08 Thread Amit Shah
On (Mon) 05 Dec 2011 [15:18:59], Eric B Munson wrote:
> When a guest kernel is stopped by the host hypervisor it can look like a soft
> lockup to the guest kernel.  This false warning can mask later soft lockup
> warnings which may be real.  This patch series adds a method for a host
> hypervisor to communicate to a guest kernel that it is being stopped.  The
> final patch in the series has the watchdog check this flag when it goes to
> issue a soft lockup warning and skip the warning if the guest knows it was
> stopped.

Guest S4 would need similar treatment, and I think the code in the two
approaches can be shared.  Just something to consider.

Amit
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 01/12] [PATCH] kvm-s390: ioctl to switch to user controlled virtual machines

2011-12-08 Thread Carsten Otte

On 08.12.2011 11:18, Alexander Graf wrote:

If you really have to do this, please

  1) make it s390 only. I don't even want to have to see this uglyness in other 
archs

It pretty much is. The only interference is a) checking the machine
type in arch_init_vm now that I've introduced that parameter to
CREATE_VM on Avi's request and b) a dummy arch_vcpu_fault function that
I've introduced on Avi's request. Both changes make sense for future
enhancements independent of ucontrol.

so long,
Carsten

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 11/15] kvm: x86: Establish IRQ0 override control

2011-12-08 Thread Jan Kiszka
KVM is forced to disable the IRQ0 override when we run with in-kernel
irqchip but without IRQ routing support of the kernel. Set the fwcfg
value correspondingly. This aligns us with qemu-kvm.

Signed-off-by: Jan Kiszka 
---
 hw/pc.c|3 ++-
 kvm-all.c  |5 +
 kvm-stub.c |5 +
 kvm.h  |2 ++
 sysemu.h   |1 -
 vl.c   |1 -
 6 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index ee6e59b..066edc4 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -39,6 +39,7 @@
 #include "msi.h"
 #include "sysbus.h"
 #include "sysemu.h"
+#include "kvm.h"
 #include "blockdev.h"
 #include "ui/qemu-spice.h"
 #include "memory.h"
@@ -609,7 +610,7 @@ static void *bochs_bios_init(void)
 fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
 fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, (uint8_t *)acpi_tables,
  acpi_tables_len);
-fw_cfg_add_bytes(fw_cfg, FW_CFG_IRQ0_OVERRIDE, &irq0override, 1);
+fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override());
 
 smbios_table = smbios_get_table(&smbios_len);
 if (smbios_table)
diff --git a/kvm-all.c b/kvm-all.c
index 8958abd..7387dd3 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1271,6 +1271,11 @@ int kvm_has_gsi_routing(void)
 return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
 }
 
+int kvm_allows_irq0_override(void)
+{
+return !kvm_enabled() || !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
+}
+
 void kvm_setup_guest_memory(void *start, size_t size)
 {
 if (!kvm_has_sync_mmu()) {
diff --git a/kvm-stub.c b/kvm-stub.c
index 06064b9..6c2b06b 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -78,6 +78,11 @@ int kvm_has_many_ioeventfds(void)
 return 0;
 }
 
+int kvm_allows_irq0_override(void)
+{
+return 1;
+}
+
 void kvm_setup_guest_memory(void *start, size_t size)
 {
 }
diff --git a/kvm.h b/kvm.h
index 0d6c453..a3c87af 100644
--- a/kvm.h
+++ b/kvm.h
@@ -53,6 +53,8 @@ int kvm_has_xcrs(void);
 int kvm_has_many_ioeventfds(void);
 int kvm_has_gsi_routing(void);
 
+int kvm_allows_irq0_override(void);
+
 #ifdef NEED_CPU_H
 int kvm_init_vcpu(CPUState *env);
 
diff --git a/sysemu.h b/sysemu.h
index 22cd720..3bd896e 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -102,7 +102,6 @@ extern int vga_interface_type;
 extern int graphic_width;
 extern int graphic_height;
 extern int graphic_depth;
-extern uint8_t irq0override;
 extern DisplayType display_type;
 extern const char *keyboard_layout;
 extern int win2k_install_hack;
diff --git a/vl.c b/vl.c
index de5ecef..f9a8caf 100644
--- a/vl.c
+++ b/vl.c
@@ -218,7 +218,6 @@ int no_reboot = 0;
 int no_shutdown = 0;
 int cursor_hide = 1;
 int graphic_rotate = 0;
-uint8_t irq0override = 1;
 const char *watchdog;
 QEMUOptionRom option_rom[MAX_OPTION_ROMS];
 int nb_option_roms;
-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 02/15] kvm: Move kvmclock into hw/kvm folder

2011-12-08 Thread Jan Kiszka
More KVM-specific devices will come, so let's start with moving the
kvmclock into a dedicated folder.

Signed-off-by: Jan Kiszka 
---
 Makefile.target|4 ++--
 configure  |1 +
 hw/{kvmclock.c => kvm/clock.c} |4 ++--
 hw/{kvmclock.h => kvm/clock.h} |0
 hw/pc_piix.c   |2 +-
 5 files changed, 6 insertions(+), 5 deletions(-)
 rename hw/{kvmclock.c => kvm/clock.c} (98%)
 rename hw/{kvmclock.h => kvm/clock.h} (100%)

diff --git a/Makefile.target b/Makefile.target
index a111521..1d24a30 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -236,7 +236,7 @@ obj-i386-y += vmport.o
 obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
 obj-i386-y += pc_piix.o
-obj-i386-$(CONFIG_KVM) += kvmclock.o
+obj-i386-$(CONFIG_KVM) += kvm/clock.o
 obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
 
 # shared objects
@@ -428,7 +428,7 @@ qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx
 
 clean:
rm -f *.o *.a *~ $(PROGS) nwfpe/*.o fpu/*.o
-   rm -f *.d */*.d tcg/*.o ide/*.o 9pfs/*.o
+   rm -f *.d */*.d tcg/*.o ide/*.o 9pfs/*.o kvm/*.o
rm -f hmp-commands.h qmp-commands-old.h gdbstub-xml.c
 ifdef CONFIG_TRACE_SYSTEMTAP
rm -f *.stp
diff --git a/configure b/configure
index ac4840d..12cd9d1 100755
--- a/configure
+++ b/configure
@@ -3338,6 +3338,7 @@ mkdir -p $target_dir/fpu
 mkdir -p $target_dir/tcg
 mkdir -p $target_dir/ide
 mkdir -p $target_dir/9pfs
+mkdir -p $target_dir/kvm
 if test "$target" = "arm-linux-user" -o "$target" = "armeb-linux-user" -o 
"$target" = "arm-bsd-user" -o "$target" = "armeb-bsd-user" ; then
   mkdir -p $target_dir/nwfpe
 fi
diff --git a/hw/kvmclock.c b/hw/kvm/clock.c
similarity index 98%
rename from hw/kvmclock.c
rename to hw/kvm/clock.c
index 5388bc4..5983271 100644
--- a/hw/kvmclock.c
+++ b/hw/kvm/clock.c
@@ -13,9 +13,9 @@
 
 #include "qemu-common.h"
 #include "sysemu.h"
-#include "sysbus.h"
 #include "kvm.h"
-#include "kvmclock.h"
+#include "hw/sysbus.h"
+#include "hw/kvm/clock.h"
 
 #include 
 #include 
diff --git a/hw/kvmclock.h b/hw/kvm/clock.h
similarity index 100%
rename from hw/kvmclock.h
rename to hw/kvm/clock.h
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 970f43c..530fe9c 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -34,7 +34,7 @@
 #include "boards.h"
 #include "ide.h"
 #include "kvm.h"
-#include "kvmclock.h"
+#include "kvm/clock.h"
 #include "sysemu.h"
 #include "sysbus.h"
 #include "arch_init.h"
-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 03/15] apic: Stop timer on reset

2011-12-08 Thread Jan Kiszka
All LVTs are masked on reset, so the timer becomes ineffective. Letting
it tick nevertheless is harmless, but will at least create a spurious
trace event.

Signed-off-by: Jan Kiszka 
---
 hw/apic.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 9d0f460..4b97b17 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -528,6 +528,8 @@ void apic_init_reset(DeviceState *d)
 s->initial_count_load_time = 0;
 s->next_time = 0;
 s->wait_for_sipi = 1;
+
+qemu_del_timer(s->timer);
 }
 
 static void apic_startup(APICState *s, int vector_num)
-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 09/15] memory: Introduce memory_region_init_reservation

2011-12-08 Thread Jan Kiszka
Introduce a memory region type that can reserve I/O space. Such regions
are useful for modeling I/O that is only handled outside of QEMU, i.e.
in the context of an accelerator like KVM.

Any access to such a region from QEMU is a bug, but could theoretically
be triggered by guest code (DMA to reserved region). So only warning
about such events once, then ignore them.

Signed-off-by: Jan Kiszka 
---
 memory.c |   36 
 memory.h |   16 
 2 files changed, 52 insertions(+), 0 deletions(-)

diff --git a/memory.c b/memory.c
index adfdf14..71a252a 100644
--- a/memory.c
+++ b/memory.c
@@ -1031,6 +1031,42 @@ void memory_region_init_rom_device(MemoryRegion *mr,
 mr->backend_registered = true;
 }
 
+static uint64_t invalid_read(void *opaque, target_phys_addr_t addr,
+ unsigned size)
+{
+MemoryRegion *mr = opaque;
+
+if (!mr->warning_printed) {
+fprintf(stderr, "Invalid read from memory region %s\n", mr->name);
+mr->warning_printed = true;
+}
+return -1U;
+}
+
+static void invalid_write(void *opaque, target_phys_addr_t addr, uint64_t data,
+  unsigned size)
+{
+MemoryRegion *mr = opaque;
+
+if (!mr->warning_printed) {
+fprintf(stderr, "Invalid write to memory region %s\n", mr->name);
+mr->warning_printed = true;
+}
+}
+
+static const MemoryRegionOps reservation_ops = {
+.read = invalid_read,
+.write = invalid_write,
+.endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+void memory_region_init_reservation(MemoryRegion *mr,
+const char *name,
+uint64_t size)
+{
+memory_region_init_io(mr, &reservation_ops, mr, name, size);
+}
+
 void memory_region_destroy(MemoryRegion *mr)
 {
 assert(QTAILQ_EMPTY(&mr->subregions));
diff --git a/memory.h b/memory.h
index 53bf261..1097eac 100644
--- a/memory.h
+++ b/memory.h
@@ -123,6 +123,7 @@ struct MemoryRegion {
 bool terminates;
 bool readable;
 bool readonly; /* For RAM regions */
+bool warning_printed; /* For reservations */
 MemoryRegion *alias;
 target_phys_addr_t alias_offset;
 unsigned priority;
@@ -250,6 +251,21 @@ void memory_region_init_rom_device(MemoryRegion *mr,
uint64_t size);
 
 /**
+ * memory_region_init_reservation: Initialize a memory region that reserves
+ * I/O space.
+ *
+ * A reservation region primariy serves debugging purposes.  It claims I/O
+ * space that is not supposed to be handled by QEMU itself.  Any access via
+ * the memory API will cause an abort().
+ *
+ * @mr: the #MemoryRegion to be initialized
+ * @name: used for debugging; not visible to the user or ABI
+ * @size: size of the region.
+ */
+void memory_region_init_reservation(MemoryRegion *mr,
+const char *name,
+uint64_t size);
+/**
  * memory_region_destroy: Destroy a memory region and relaim all resources.
  *
  * @mr: the region to be destroyed.  May not currently be a subregion
-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 01/15] msi: Generalize msix_supported to msi_supported

2011-12-08 Thread Jan Kiszka
Rename msix_supported to msi_supported and control MSI and MSI-X
activation this way. That was likely to original intention for this
flag, but MSI support came after MSI-X.

Signed-off-by: Jan Kiszka 
---
 hw/msi.c  |8 
 hw/msi.h  |2 ++
 hw/msix.c |9 -
 hw/msix.h |2 --
 hw/pc.c   |4 ++--
 5 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/hw/msi.c b/hw/msi.c
index f214fcf..5d6ceb6 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -36,6 +36,9 @@
 
 #define PCI_MSI_VECTORS_MAX 32
 
+/* Flag for interrupt controller to declare MSI/MSI-X support */
+bool msi_supported;
+
 /* If we get rid of cap allocator, we won't need this. */
 static inline uint8_t msi_cap_sizeof(uint16_t flags)
 {
@@ -116,6 +119,11 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
 uint16_t flags;
 uint8_t cap_size;
 int config_offset;
+
+if (!msi_supported) {
+return -ENOTSUP;
+}
+
 MSI_DEV_PRINTF(dev,
"init offset: 0x%"PRIx8" vector: %"PRId8
" 64bit %d mask %d\n",
diff --git a/hw/msi.h b/hw/msi.h
index 5766018..3040bb0 100644
--- a/hw/msi.h
+++ b/hw/msi.h
@@ -24,6 +24,8 @@
 #include "qemu-common.h"
 #include "pci.h"
 
+extern bool msi_supported;
+
 bool msi_enabled(const PCIDevice *dev);
 int msi_init(struct PCIDevice *dev, uint8_t offset,
  unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
diff --git a/hw/msix.c b/hw/msix.c
index 149eed2..107d4e5 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -12,6 +12,7 @@
  */
 
 #include "hw.h"
+#include "msi.h"
 #include "msix.h"
 #include "pci.h"
 #include "range.h"
@@ -32,9 +33,6 @@
 #define MSIX_MAX_ENTRIES 32
 
 
-/* Flag for interrupt controller to declare MSI-X support */
-int msix_supported;
-
 /* Add MSI-X capability to the config space for the device. */
 /* Given a bar and its size, add MSI-X table on top of it
  * and fill MSI-X capability in the config space.
@@ -235,10 +233,11 @@ int msix_init(struct PCIDevice *dev, unsigned short 
nentries,
   unsigned bar_nr, unsigned bar_size)
 {
 int ret;
+
 /* Nothing to do if MSI is not supported by interrupt controller */
-if (!msix_supported)
+if (!msi_supported) {
 return -ENOTSUP;
-
+}
 if (nentries > MSIX_MAX_ENTRIES)
 return -EINVAL;
 
diff --git a/hw/msix.h b/hw/msix.h
index 7e04336..5aba22b 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -29,6 +29,4 @@ void msix_notify(PCIDevice *dev, unsigned vector);
 
 void msix_reset(PCIDevice *dev);
 
-extern int msix_supported;
-
 #endif
diff --git a/hw/pc.c b/hw/pc.c
index 7c4bfa8..240aaae 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -36,7 +36,7 @@
 #include "elf.h"
 #include "multiboot.h"
 #include "mc146818rtc.h"
-#include "msix.h"
+#include "msi.h"
 #include "sysbus.h"
 #include "sysemu.h"
 #include "blockdev.h"
@@ -896,7 +896,7 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
 apic_mapped = 1;
 }
 
-msix_supported = 1;
+msi_supported = true;
 
 return dev;
 }
-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 06/15] apic: Open-code timer save/restore

2011-12-08 Thread Jan Kiszka
To enable migration between accelerated and non-accelerated APIC models,
we will need to handle the timer saving and restoring specially and can
no longer rely on the automatics of VMSTATE_TIMER. Specifically,
accelerated model will not start any QEMUTimer.

This patch therefore factors out the generic bits into apic_next_timer
and introduces a post-load callback that can be implemented differently
by both models.

Signed-off-by: Jan Kiszka 
---
 hw/apic.c  |   30 --
 hw/apic_common.c   |   51 +--
 hw/apic_internal.h |3 +++
 3 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index f25be80..ed6411d 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -521,25 +521,9 @@ static uint32_t apic_get_current_count(APICState *s)
 
 static void apic_timer_update(APICState *s, int64_t current_time)
 {
-int64_t next_time, d;
-
-if (!(s->lvt[APIC_LVT_TIMER] & APIC_LVT_MASKED)) {
-d = (current_time - s->initial_count_load_time) >>
-s->count_shift;
-if (s->lvt[APIC_LVT_TIMER] & APIC_LVT_TIMER_PERIODIC) {
-if (!s->initial_count)
-goto no_timer;
-d = ((d / ((uint64_t)s->initial_count + 1)) + 1) * 
((uint64_t)s->initial_count + 1);
-} else {
-if (d >= s->initial_count)
-goto no_timer;
-d = (uint64_t)s->initial_count + 1;
-}
-next_time = s->initial_count_load_time + (d << s->count_shift);
-qemu_mod_timer(s->timer, next_time);
-s->next_time = next_time;
+if (apic_next_timer(s, current_time)) {
+qemu_mod_timer(s->timer, s->next_time);
 } else {
-no_timer:
 qemu_del_timer(s->timer);
 }
 }
@@ -770,12 +754,22 @@ static void apic_backend_init(APICState *s)
 local_apics[s->idx] = s;
 }
 
+static void apic_post_load(APICState *s)
+{
+if (s->timer_expiry != -1) {
+qemu_mod_timer(s->timer, s->timer_expiry);
+} else {
+qemu_del_timer(s->timer);
+}
+}
+
 static APICBackend apic_backend = {
 .name = "QEMU",
 .init = apic_backend_init,
 .set_base = apic_set_base,
 .set_tpr = apic_set_tpr,
 .external_nmi = apic_external_nmi,
+.post_load = apic_post_load,
 };
 
 static void apic_register_devices(void)
diff --git a/hw/apic_common.c b/hw/apic_common.c
index 73241e4..f38ffc1 100644
--- a/hw/apic_common.c
+++ b/hw/apic_common.c
@@ -89,6 +89,39 @@ void apic_deliver_nmi(DeviceState *d)
 s->backend->external_nmi(s);
 }
 
+bool apic_next_timer(APICState *s, int64_t current_time)
+{
+int64_t d;
+
+/* We need to store the timer state separately to support APIC
+ * implementations that maintain a non-QEMU timer, e.g. inside the
+ * host kernel. This open-coded state allows us to migrate between
+ * both models. */
+s->timer_expiry = -1;
+
+if (s->lvt[APIC_LVT_TIMER] & APIC_LVT_MASKED) {
+return false;
+}
+
+d = (current_time - s->initial_count_load_time) >> s->count_shift;
+
+if (s->lvt[APIC_LVT_TIMER] & APIC_LVT_TIMER_PERIODIC) {
+if (!s->initial_count) {
+return false;
+}
+d = ((d / ((uint64_t)s->initial_count + 1)) + 1) *
+((uint64_t)s->initial_count + 1);
+} else {
+if (d >= s->initial_count) {
+return false;
+}
+d = (uint64_t)s->initial_count + 1;
+}
+s->next_time = s->initial_count_load_time + (d << s->count_shift);
+s->timer_expiry = s->next_time;
+return true;
+}
+
 void apic_init_reset(DeviceState *d)
 {
 APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
@@ -116,7 +149,10 @@ void apic_init_reset(DeviceState *d)
 s->next_time = 0;
 s->wait_for_sipi = 1;
 
-qemu_del_timer(s->timer);
+if (s->timer) {
+qemu_del_timer(s->timer);
+}
+s->timer_expiry = -1;
 }
 
 static void apic_reset(DeviceState *d)
@@ -181,12 +217,23 @@ static int apic_load_old(QEMUFile *f, void *opaque, int 
version_id)
 return 0;
 }
 
+static int apic_dispatch_post_load(void *opaque, int version_id)
+{
+APICState *s = opaque;
+
+if (s->backend->post_load) {
+s->backend->post_load(s);
+}
+return 0;
+}
+
 static const VMStateDescription vmstate_apic = {
 .name = "apic",
 .version_id = 3,
 .minimum_version_id = 3,
 .minimum_version_id_old = 1,
 .load_state_old = apic_load_old,
+.post_load = apic_dispatch_post_load,
 .fields  = (VMStateField[]) {
 VMSTATE_UINT32(apicbase, APICState),
 VMSTATE_UINT8(id, APICState),
@@ -206,7 +253,7 @@ static const VMStateDescription vmstate_apic = {
 VMSTATE_UINT32(initial_count, APICState),
 VMSTATE_INT64(initial_count_load_time, APICState),
 VMSTATE_INT64(next_time, APICState),
-VMSTATE_TIMER(timer, APICState),
+VMSTATE_INT64(timer_expiry, APICState), /* open-coded timer state */
 VMSTA

[PATCH v4 07/15] i8259: Introduce backend/frontend infrastructure for KVM reuse

2011-12-08 Thread Jan Kiszka
Analogously to the APIC, we will reuse some parts of the user space
i8259 model for KVM. Again, we create a PIC backend infrastructure and
provide hooks for init, reset, and vmload/save. This also introduces a
common helper to instantiate a single i8259 chip from the cascade-
creating i8259_init function.

Signed-off-by: Jan Kiszka 
---
 Makefile.objs   |2 +-
 hw/i8259.c  |  127 +-
 hw/i8259_common.c   |  173 +++
 hw/i8259_internal.h |   82 
 4 files changed, 271 insertions(+), 113 deletions(-)
 create mode 100644 hw/i8259_common.c
 create mode 100644 hw/i8259_internal.h

diff --git a/Makefile.objs b/Makefile.objs
index d7a6539..72d8ee7 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -221,7 +221,7 @@ hw-obj-$(CONFIG_APPLESMC) += applesmc.o
 hw-obj-$(CONFIG_SMARTCARD) += usb-ccid.o ccid-card-passthru.o
 hw-obj-$(CONFIG_SMARTCARD_NSS) += ccid-card-emulated.o
 hw-obj-$(CONFIG_USB_REDIR) += usb-redir.o
-hw-obj-$(CONFIG_I8259) += i8259.o
+hw-obj-$(CONFIG_I8259) += i8259_common.o i8259.o
 
 # PPC devices
 hw-obj-$(CONFIG_PREP_PCI) += prep_pci.o
diff --git a/hw/i8259.c b/hw/i8259.c
index ab519de..413802c 100644
--- a/hw/i8259.c
+++ b/hw/i8259.c
@@ -26,6 +26,7 @@
 #include "isa.h"
 #include "monitor.h"
 #include "qemu-timer.h"
+#include "i8259_internal.h"
 
 /* debug PIC */
 //#define DEBUG_PIC
@@ -40,33 +41,6 @@
 //#define DEBUG_IRQ_LATENCY
 //#define DEBUG_IRQ_COUNT
 
-struct PicState {
-ISADevice dev;
-uint8_t last_irr; /* edge detection */
-uint8_t irr; /* interrupt request register */
-uint8_t imr; /* interrupt mask register */
-uint8_t isr; /* interrupt service register */
-uint8_t priority_add; /* highest irq priority */
-uint8_t irq_base;
-uint8_t read_reg_select;
-uint8_t poll;
-uint8_t special_mask;
-uint8_t init_state;
-uint8_t auto_eoi;
-uint8_t rotate_on_auto_eoi;
-uint8_t special_fully_nested_mode;
-uint8_t init4; /* true if 4 byte init */
-uint8_t single_mode; /* true if slave pic is not initialized */
-uint8_t elcr; /* PIIX edge/trigger selection*/
-uint8_t elcr_mask;
-qemu_irq int_out[1];
-uint32_t master; /* reflects /SP input pin */
-uint32_t iobase;
-uint32_t elcr_addr;
-MemoryRegion base_io;
-MemoryRegion elcr_io;
-};
-
 #if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT)
 static int irq_level[16];
 #endif
@@ -248,29 +222,12 @@ int pic_read_irq(PicState *s)
 
 static void pic_init_reset(PicState *s)
 {
-s->last_irr = 0;
-s->irr = 0;
-s->imr = 0;
-s->isr = 0;
-s->priority_add = 0;
-s->irq_base = 0;
-s->read_reg_select = 0;
-s->poll = 0;
-s->special_mask = 0;
-s->init_state = 0;
-s->auto_eoi = 0;
-s->rotate_on_auto_eoi = 0;
-s->special_fully_nested_mode = 0;
-s->init4 = 0;
-s->single_mode = 0;
-/* Note: ELCR is not reset */
+pic_reset_internal(s);
 pic_update_irq(s);
 }
 
-static void pic_reset(DeviceState *dev)
+static void pic_reset(PicState *s)
 {
-PicState *s = container_of(dev, PicState, dev.qdev);
-
 pic_init_reset(s);
 s->elcr = 0;
 }
@@ -418,32 +375,6 @@ static uint64_t elcr_ioport_read(void *opaque, 
target_phys_addr_t addr,
 return s->elcr;
 }
 
-static const VMStateDescription vmstate_pic = {
-.name = "i8259",
-.version_id = 1,
-.minimum_version_id = 1,
-.minimum_version_id_old = 1,
-.fields = (VMStateField[]) {
-VMSTATE_UINT8(last_irr, PicState),
-VMSTATE_UINT8(irr, PicState),
-VMSTATE_UINT8(imr, PicState),
-VMSTATE_UINT8(isr, PicState),
-VMSTATE_UINT8(priority_add, PicState),
-VMSTATE_UINT8(irq_base, PicState),
-VMSTATE_UINT8(read_reg_select, PicState),
-VMSTATE_UINT8(poll, PicState),
-VMSTATE_UINT8(special_mask, PicState),
-VMSTATE_UINT8(init_state, PicState),
-VMSTATE_UINT8(auto_eoi, PicState),
-VMSTATE_UINT8(rotate_on_auto_eoi, PicState),
-VMSTATE_UINT8(special_fully_nested_mode, PicState),
-VMSTATE_UINT8(init4, PicState),
-VMSTATE_UINT8(single_mode, PicState),
-VMSTATE_UINT8(elcr, PicState),
-VMSTATE_END_OF_LIST()
-}
-};
-
 static const MemoryRegionOps pic_base_ioport_ops = {
 .read = pic_ioport_read,
 .write = pic_ioport_write,
@@ -462,24 +393,13 @@ static const MemoryRegionOps pic_elcr_ioport_ops = {
 },
 };
 
-static int pic_initfn(ISADevice *dev)
+static void pic_backend_init(PicState *s)
 {
-PicState *s = DO_UPCAST(PicState, dev, dev);
-
 memory_region_init_io(&s->base_io, &pic_base_ioport_ops, s, "pic", 2);
 memory_region_init_io(&s->elcr_io, &pic_elcr_ioport_ops, s, "elcr", 1);
 
-isa_register_ioport(NULL, &s->base_io, s->iobase);
-if (s->elcr_addr != -1) {
-isa_register_ioport(NULL, &s->elcr_io, s->elcr_addr);
-}
-
-qdev_init_gpio_out(&dev->qdev, s->int_out, ARRAY

[PATCH v4 15/15] kvm: Arm in-kernel irqchip support

2011-12-08 Thread Jan Kiszka
Make the basic in-kernel irqchip support selectable via
-machine ...,kernel_irqchip=on. Leave it off by default until it can
fully replace user space models.

Signed-off-by: Jan Kiszka 
---
 qemu-config.c   |4 
 qemu-options.hx |5 -
 2 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/qemu-config.c b/qemu-config.c
index 597d7e1..a761bea 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -490,6 +490,10 @@ static QemuOptsList qemu_machine_opts = {
 .name = "accel",
 .type = QEMU_OPT_STRING,
 .help = "accelerator list",
+}, {
+.name = "kernel_irqchip",
+.type = QEMU_OPT_BOOL,
+.help = "use KVM in-kernel irqchip",
 },
 { /* End of list */ }
 },
diff --git a/qemu-options.hx b/qemu-options.hx
index 681eaf1..60b7dc0 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -31,7 +31,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
 "-machine [type=]name[,prop[=value][,...]]\n"
 "selects emulated machine (-machine ? for list)\n"
 "property accel=accel1[:accel2[:...]] selects 
accelerator\n"
-"supported accelerators are kvm, xen, tcg (default: 
tcg)\n",
+"supported accelerators are kvm, xen, tcg (default: tcg)\n"
+"kernel_irqchip=on|off controls accelerated irqchip 
support\n",
 QEMU_ARCH_ALL)
 STEXI
 @item -machine [type=]@var{name}[,prop=@var{value}[,...]]
@@ -44,6 +45,8 @@ This is used to enable an accelerator. Depending on the 
target architecture,
 kvm, xen, or tcg can be available. By default, tcg is used. If there is more
 than one accelerator specified, the next one is used if the previous one fails
 to initialize.
+@item kernel_irqchip=on|off
+Enables in-kernel irqchip support for the chosen accelerator when available.
 @end table
 ETEXI
 
-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 04/15] apic: Inject external NMI events via LINT1

2011-12-08 Thread Jan Kiszka
On real hardware, NMI button events are injected via the LINT1 line of
the APICs. E.g. kdump expect this wiring and gets upset if the per-APIC
LINT1 mask is not respected, i.e. if NMIs are injected to VCPUs that
should not receive them. Change the APIC emulation code to reflect this.

Based on qemu-kvm patch by Lai Jiangshan.

CC: Lai Jiangshan 
Reported-by: Kenji Kaneshige 
Signed-off-by: Jan Kiszka 
---
 hw/apic.c |7 +++
 hw/apic.h |1 +
 monitor.c |6 +-
 3 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 4b97b17..b9d733c 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -205,6 +205,13 @@ void apic_deliver_pic_intr(DeviceState *d, int level)
 }
 }
 
+void apic_deliver_nmi(DeviceState *d)
+{
+APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
+
+apic_local_deliver(s, APIC_LVT_LINT1);
+}
+
 #define foreach_apic(apic, deliver_bitmask, code) \
 {\
 int __i, __j, __mask;\
diff --git a/hw/apic.h b/hw/apic.h
index a5c910f..a62d83b 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -8,6 +8,7 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t 
delivery_mode,
   uint8_t vector_num, uint8_t trigger_mode);
 int apic_accept_pic_intr(DeviceState *s);
 void apic_deliver_pic_intr(DeviceState *s, int level);
+void apic_deliver_nmi(DeviceState *d);
 int apic_get_interrupt(DeviceState *s);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);
diff --git a/monitor.c b/monitor.c
index 1be222e..6bd0fb1 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2354,7 +2354,11 @@ static int do_inject_nmi(Monitor *mon, const QDict 
*qdict, QObject **ret_data)
 CPUState *env;
 
 for (env = first_cpu; env != NULL; env = env->next_cpu) {
-cpu_interrupt(env, CPU_INTERRUPT_NMI);
+if (!env->apic_state) {
+cpu_interrupt(env, CPU_INTERRUPT_NMI);
+} else {
+apic_deliver_nmi(env->apic_state);
+}
 }
 
 return 0;
-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 12/15] kvm: x86: Add user space part for in-kernel APIC

2011-12-08 Thread Jan Kiszka
This introduces the alternative APIC backend which makes use of KVM's
in-kernel device model. External NMI injection via LINT1 is emulated by
checking the current state of the in-kernel APIC, only injecting a NMI
into the VCPU if LINT1 is unmasked and configured to DM_NMI.

MSI is not yet supported, so we disable this when the in-kernel model is
in use.

CC: Lai Jiangshan 
Signed-off-by: Jan Kiszka 
---
 Makefile.target   |2 +-
 hw/kvm/apic.c |  154 +
 hw/pc.c   |   15 --
 kvm.h |3 +
 target-i386/kvm.c |8 +++
 5 files changed, 176 insertions(+), 6 deletions(-)
 create mode 100644 hw/kvm/apic.c

diff --git a/Makefile.target b/Makefile.target
index b549988..76de485 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -236,7 +236,7 @@ obj-i386-y += vmport.o
 obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
 obj-i386-y += pc_piix.o
-obj-i386-$(CONFIG_KVM) += kvm/clock.o
+obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
 obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
 
 # shared objects
diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
new file mode 100644
index 000..3924f9e
--- /dev/null
+++ b/hw/kvm/apic.c
@@ -0,0 +1,154 @@
+/*
+ * KVM in-kernel APIC support
+ *
+ * Copyright (c) 2011 Siemens AG
+ *
+ * Authors:
+ *  Jan Kiszka  
+ *
+ * This work is licensed under the terms of the GNU GPL version 2.
+ * See the COPYING file in the top-level directory.
+ */
+#include "hw/apic_internal.h"
+#include "kvm.h"
+
+static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
+   int reg_id, uint32_t val)
+{
+*((uint32_t *)(kapic->regs + (reg_id << 4))) = val;
+}
+
+static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
+   int reg_id)
+{
+return *((uint32_t *)(kapic->regs + (reg_id << 4)));
+}
+
+int kvm_put_apic(CPUState *env)
+{
+APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
+struct kvm_lapic_state kapic;
+int i;
+
+if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
+memset(&kapic, 0, sizeof(kapic));
+kvm_apic_set_reg(&kapic, 0x2, s->id << 24);
+kvm_apic_set_reg(&kapic, 0x8, s->tpr);
+kvm_apic_set_reg(&kapic, 0xd, s->log_dest << 24);
+kvm_apic_set_reg(&kapic, 0xe, s->dest_mode << 28 | 0x0fff);
+kvm_apic_set_reg(&kapic, 0xf, s->spurious_vec);
+for (i = 0; i < 8; i++) {
+kvm_apic_set_reg(&kapic, 0x10 + i, s->isr[i]);
+kvm_apic_set_reg(&kapic, 0x18 + i, s->tmr[i]);
+kvm_apic_set_reg(&kapic, 0x20 + i, s->irr[i]);
+}
+kvm_apic_set_reg(&kapic, 0x28, s->esr);
+kvm_apic_set_reg(&kapic, 0x30, s->icr[0]);
+kvm_apic_set_reg(&kapic, 0x31, s->icr[1]);
+for (i = 0; i < APIC_LVT_NB; i++) {
+kvm_apic_set_reg(&kapic, 0x32 + i, s->lvt[i]);
+}
+kvm_apic_set_reg(&kapic, 0x38, s->initial_count);
+kvm_apic_set_reg(&kapic, 0x3e, s->divide_conf);
+
+return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, &kapic);
+}
+
+return 0;
+}
+
+int kvm_get_apic(CPUState *env)
+{
+APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
+struct kvm_lapic_state kapic;
+int ret, i, v;
+
+if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
+ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, &kapic);
+if (ret < 0) {
+return ret;
+}
+
+s->id = kvm_apic_get_reg(&kapic, 0x2) >> 24;
+s->tpr = kvm_apic_get_reg(&kapic, 0x8);
+s->arb_id = kvm_apic_get_reg(&kapic, 0x9);
+s->log_dest = kvm_apic_get_reg(&kapic, 0xd) >> 24;
+s->dest_mode = kvm_apic_get_reg(&kapic, 0xe) >> 28;
+s->spurious_vec = kvm_apic_get_reg(&kapic, 0xf);
+for (i = 0; i < 8; i++) {
+s->isr[i] = kvm_apic_get_reg(&kapic, 0x10 + i);
+s->tmr[i] = kvm_apic_get_reg(&kapic, 0x18 + i);
+s->irr[i] = kvm_apic_get_reg(&kapic, 0x20 + i);
+}
+s->esr = kvm_apic_get_reg(&kapic, 0x28);
+s->icr[0] = kvm_apic_get_reg(&kapic, 0x30);
+s->icr[1] = kvm_apic_get_reg(&kapic, 0x31);
+for (i = 0; i < APIC_LVT_NB; i++) {
+s->lvt[i] = kvm_apic_get_reg(&kapic, 0x32 + i);
+}
+s->initial_count = kvm_apic_get_reg(&kapic, 0x38);
+s->divide_conf = kvm_apic_get_reg(&kapic, 0x3e);
+
+v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4);
+s->count_shift = (v + 1) & 7;
+
+s->initial_count_load_time = qemu_get_clock_ns(vm_clock);
+apic_next_timer(s, s->initial_count_load_time);
+}
+return 0;
+}
+
+static void kvm_apic_set_base(APICState *s, uint64_t val)
+{
+s->apicbase = val;
+}
+
+static void kvm_apic_set_tpr(APICState *s, uint8_t val)
+{
+s->tpr = (val & 0x0f) << 4;
+}
+
+static void do_inject_e

[PATCH v4 08/15] ioapic: Introduce backend/frontend infrastructure for KVM reuse

2011-12-08 Thread Jan Kiszka
Split up the IOAPIC analogously to APIC and i8259. KVM will share the
device description, reset logic and certain init parts with the user
space model.

Signed-off-by: Jan Kiszka 
---
 Makefile.target  |2 +-
 hw/ioapic.c  |  130 ---
 hw/ioapic_common.c   |  137 ++
 hw/ioapic_internal.h |  105 ++
 hw/pc_piix.c |1 +
 5 files changed, 254 insertions(+), 121 deletions(-)
 create mode 100644 hw/ioapic_common.c
 create mode 100644 hw/ioapic_internal.h

diff --git a/Makefile.target b/Makefile.target
index c46f062..b549988 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -231,7 +231,7 @@ obj-$(CONFIG_IVSHMEM) += ivshmem.o
 # Hardware support
 obj-i386-y += vga.o
 obj-i386-y += mc146818rtc.o pc.o
-obj-i386-y += cirrus_vga.o sga.o apic_common.o apic.o ioapic.o piix_pci.o
+obj-i386-y += cirrus_vga.o sga.o apic_common.o apic.o ioapic_common.o ioapic.o 
piix_pci.o
 obj-i386-y += vmport.o
 obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
diff --git a/hw/ioapic.c b/hw/ioapic.c
index 27b07c6..2db72e0 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -24,9 +24,7 @@
 #include "pc.h"
 #include "apic.h"
 #include "ioapic.h"
-#include "qemu-timer.h"
-#include "host-utils.h"
-#include "sysbus.h"
+#include "ioapic_internal.h"
 
 //#define DEBUG_IOAPIC
 
@@ -37,62 +35,6 @@
 #define DPRINTF(fmt, ...)
 #endif
 
-#define MAX_IOAPICS 1
-
-#define IOAPIC_VERSION  0x11
-
-#define IOAPIC_LVT_DEST_SHIFT   56
-#define IOAPIC_LVT_MASKED_SHIFT 16
-#define IOAPIC_LVT_TRIGGER_MODE_SHIFT   15
-#define IOAPIC_LVT_REMOTE_IRR_SHIFT 14
-#define IOAPIC_LVT_POLARITY_SHIFT   13
-#define IOAPIC_LVT_DELIV_STATUS_SHIFT   12
-#define IOAPIC_LVT_DEST_MODE_SHIFT  11
-#define IOAPIC_LVT_DELIV_MODE_SHIFT 8
-
-#define IOAPIC_LVT_MASKED   (1 << IOAPIC_LVT_MASKED_SHIFT)
-#define IOAPIC_LVT_REMOTE_IRR   (1 << IOAPIC_LVT_REMOTE_IRR_SHIFT)
-
-#define IOAPIC_TRIGGER_EDGE 0
-#define IOAPIC_TRIGGER_LEVEL1
-
-/*io{apic,sapic} delivery mode*/
-#define IOAPIC_DM_FIXED 0x0
-#define IOAPIC_DM_LOWEST_PRIORITY   0x1
-#define IOAPIC_DM_PMI   0x2
-#define IOAPIC_DM_NMI   0x4
-#define IOAPIC_DM_INIT  0x5
-#define IOAPIC_DM_SIPI  0x6
-#define IOAPIC_DM_EXTINT0x7
-#define IOAPIC_DM_MASK  0x7
-
-#define IOAPIC_VECTOR_MASK  0xff
-
-#define IOAPIC_IOREGSEL 0x00
-#define IOAPIC_IOWIN0x10
-
-#define IOAPIC_REG_ID   0x00
-#define IOAPIC_REG_VER  0x01
-#define IOAPIC_REG_ARB  0x02
-#define IOAPIC_REG_REDTBL_BASE  0x10
-#define IOAPIC_ID   0x00
-
-#define IOAPIC_ID_SHIFT 24
-#define IOAPIC_ID_MASK  0xf
-
-#define IOAPIC_VER_ENTRIES_SHIFT16
-
-typedef struct IOAPICState IOAPICState;
-
-struct IOAPICState {
-SysBusDevice busdev;
-MemoryRegion io_memory;
-uint8_t id;
-uint8_t ioregsel;
-uint32_t irr;
-uint64_t ioredtbl[IOAPIC_NUM_PINS];
-};
-
 static IOAPICState *ioapics[MAX_IOAPICS];
 
 static void ioapic_service(IOAPICState *s)
@@ -278,83 +220,31 @@ ioapic_mem_write(void *opaque, target_phys_addr_t addr, 
uint64_t val,
 }
 }
 
-static int ioapic_post_load(void *opaque, int version_id)
-{
-IOAPICState *s = opaque;
-
-if (version_id == 1) {
-/* set sane value */
-s->irr = 0;
-}
-return 0;
-}
-
-static const VMStateDescription vmstate_ioapic = {
-.name = "ioapic",
-.version_id = 3,
-.post_load = ioapic_post_load,
-.minimum_version_id = 1,
-.minimum_version_id_old = 1,
-.fields = (VMStateField[]) {
-VMSTATE_UINT8(id, IOAPICState),
-VMSTATE_UINT8(ioregsel, IOAPICState),
-VMSTATE_UNUSED_V(2, 8), /* to account for qemu-kvm's v2 format */
-VMSTATE_UINT32_V(irr, IOAPICState, 2),
-VMSTATE_UINT64_ARRAY(ioredtbl, IOAPICState, IOAPIC_NUM_PINS),
-VMSTATE_END_OF_LIST()
-}
-};
-
-static void ioapic_reset(DeviceState *d)
-{
-IOAPICState *s = DO_UPCAST(IOAPICState, busdev.qdev, d);
-int i;
-
-s->id = 0;
-s->ioregsel = 0;
-s->irr = 0;
-for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-s->ioredtbl[i] = 1 << IOAPIC_LVT_MASKED_SHIFT;
-}
-}
-
 static const MemoryRegionOps ioapic_io_ops = {
 .read = ioapic_mem_read,
 .write = ioapic_mem_write,
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int ioapic_init1(SysBusDevice *dev)
+static void ioapic_backend_init(IOAPICState *s, int index)
 {
-IOAPICState *s = FROM_SYSBUS(IOAPICState, dev);
-static int ioapic_no;
-
-if (ioapic_no >= MAX_IOAPICS) {
-return -1;
-}
-
 memory

[PATCH v4 10/15] kvm: Introduce core services for in-kernel irqchip support

2011-12-08 Thread Jan Kiszka
Add the basic infrastructure to active in-kernel irqchip support, inject
interrupts into these models, and maintain IRQ routes.

Routing is optional and depends on the host arch supporting
KVM_CAP_IRQ_ROUTING. When it's not available on x86, we looe the HPET as
we can't route GSI0 to IOAPIC pin 2.

In-kernel irqchip support will once be controlled by the machine
property 'kernel_irqchip', but this is not yet wired up.

Signed-off-by: Jan Kiszka 
---
 kvm-all.c |  149 +
 kvm.h |8 +++
 target-i386/kvm.c |   11 
 3 files changed, 168 insertions(+), 0 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index 4c466d6..8958abd 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -77,6 +77,13 @@ struct KVMState
 int pit_in_kernel;
 int xsave, xcrs;
 int many_ioeventfds;
+int irqchip_inject_ioctl;
+#ifdef KVM_CAP_IRQ_ROUTING
+struct kvm_irq_routing *irq_routes;
+int nr_allocated_irq_routes;
+uint32_t *used_gsi_bitmap;
+unsigned int max_gsi;
+#endif
 };
 
 KVMState *kvm_state;
@@ -693,6 +700,138 @@ static void kvm_handle_interrupt(CPUState *env, int mask)
 }
 }
 
+int kvm_irqchip_set_irq(KVMState *s, int irq, int level)
+{
+struct kvm_irq_level event;
+int ret;
+
+assert(s->irqchip_in_kernel);
+
+event.level = level;
+event.irq = irq;
+ret = kvm_vm_ioctl(s, s->irqchip_inject_ioctl, &event);
+if (ret < 0) {
+perror("kvm_set_irqchip_line");
+abort();
+}
+
+return (s->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
+}
+
+#ifdef KVM_CAP_IRQ_ROUTING
+static void set_gsi(KVMState *s, unsigned int gsi)
+{
+assert(gsi < s->max_gsi);
+
+s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32);
+}
+
+static void kvm_init_irq_routing(KVMState *s)
+{
+int gsi_count;
+
+gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING);
+if (gsi_count > 0) {
+unsigned int gsi_bits, i;
+
+/* Round up so we can search ints using ffs */
+gsi_bits = (gsi_count + 31) / 32;
+s->used_gsi_bitmap = g_malloc0(gsi_bits / 8);
+s->max_gsi = gsi_bits;
+
+/* Mark any over-allocated bits as already in use */
+for (i = gsi_count; i < gsi_bits; i++) {
+set_gsi(s, i);
+}
+}
+
+s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
+s->nr_allocated_irq_routes = 0;
+
+kvm_arch_init_irq_routing(s);
+}
+
+static void kvm_add_routing_entry(KVMState *s,
+  struct kvm_irq_routing_entry *entry)
+{
+struct kvm_irq_routing_entry *new;
+int n, size;
+
+if (s->irq_routes->nr == s->nr_allocated_irq_routes) {
+n = s->nr_allocated_irq_routes * 2;
+if (n < 64) {
+n = 64;
+}
+size = sizeof(struct kvm_irq_routing);
+size += n * sizeof(*new);
+s->irq_routes = g_realloc(s->irq_routes, size);
+s->nr_allocated_irq_routes = n;
+}
+n = s->irq_routes->nr++;
+new = &s->irq_routes->entries[n];
+memset(new, 0, sizeof(*new));
+new->gsi = entry->gsi;
+new->type = entry->type;
+new->flags = entry->flags;
+new->u = entry->u;
+
+set_gsi(s, entry->gsi);
+}
+
+void kvm_irqchip_add_route(KVMState *s, int irq, int irqchip, int pin)
+{
+struct kvm_irq_routing_entry e;
+
+e.gsi = irq;
+e.type = KVM_IRQ_ROUTING_IRQCHIP;
+e.flags = 0;
+e.u.irqchip.irqchip = irqchip;
+e.u.irqchip.pin = pin;
+kvm_add_routing_entry(s, &e);
+}
+
+int kvm_irqchip_commit_routes(KVMState *s)
+{
+s->irq_routes->flags = 0;
+return kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
+}
+
+#else /* !KVM_CAP_IRQ_ROUTING */
+
+static void kvm_init_irq_routing(KVMState *s)
+{
+}
+#endif /* !KVM_CAP_IRQ_ROUTING */
+
+static int kvm_irqchip_create(KVMState *s)
+{
+QemuOptsList *list = qemu_find_opts("machine");
+int ret;
+
+if (QTAILQ_EMPTY(&list->head) ||
+!qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
+   "kernel_irqchip", false) ||
+!kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
+return 0;
+}
+
+ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+if (ret < 0) {
+fprintf(stderr, "Create kernel irqchip failed\n");
+return ret;
+}
+
+s->irqchip_inject_ioctl = KVM_IRQ_LINE;
+if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) {
+s->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
+}
+s->irqchip_in_kernel = 1;
+
+kvm_init_irq_routing(s);
+
+return 0;
+}
+
 int kvm_init(void)
 {
 static const char upgrade_note[] =
@@ -788,6 +927,11 @@ int kvm_init(void)
 goto err;
 }
 
+ret = kvm_irqchip_create(s);
+if (ret < 0) {
+goto err;
+}
+
 kvm_state = s;
 cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
 
@@ -1122,6 +1266,11 @@ int kvm_has_many_ioeventfds(void)
 return kvm_state->many_ioeventfds;
 }
 
+int kvm_has_gsi_routing(void)
+

[PATCH v4 14/15] kvm: x86: Add user space part for in-kernel IOAPIC

2011-12-08 Thread Jan Kiszka
This introduces the KVM-accelerated IOAPIC backend and extends the IRQ
routing setup by the 0->2 redirection when needed.

The IOAPIC gains a KVM-specific property that allows to define the GSI
base for injecting interrupts into the kernel model. This will allow to
disentangle PIC and IOAPIC pins for chipsets that support more
sophisticated IRQ routes than the PIIX3. So far the base is kept at 0,
i.e. PIC and IOAPIC share pins 0..15.

Signed-off-by: Jan Kiszka 
---
 Makefile.target  |2 +-
 hw/ioapic_common.c   |1 +
 hw/ioapic_internal.h |1 +
 hw/kvm/ioapic.c  |  101 ++
 hw/pc_piix.c |   15 +++-
 5 files changed, 118 insertions(+), 2 deletions(-)
 create mode 100644 hw/kvm/ioapic.c

diff --git a/Makefile.target b/Makefile.target
index fb10143..b48bb57 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -236,7 +236,7 @@ obj-i386-y += vmport.o
 obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
 obj-i386-y += pc_piix.o
-obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o kvm/i8259.o
+obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o kvm/i8259.o kvm/ioapic.o
 obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
 
 # shared objects
diff --git a/hw/ioapic_common.c b/hw/ioapic_common.c
index 094551c..efc1d44 100644
--- a/hw/ioapic_common.c
+++ b/hw/ioapic_common.c
@@ -122,6 +122,7 @@ static SysBusDeviceInfo ioapic_info = {
 .qdev.no_user = 1,
 .qdev.props = (Property[]) {
 DEFINE_PROP_STRING("backend", IOAPICState, backend_name),
+DEFINE_PROP_UINT32("kvm_gsi_base", IOAPICState, kvm_gsi_base, 0),
 DEFINE_PROP_END_OF_LIST(),
 },
 };
diff --git a/hw/ioapic_internal.h b/hw/ioapic_internal.h
index c5fab8b..bf63115 100644
--- a/hw/ioapic_internal.h
+++ b/hw/ioapic_internal.h
@@ -95,6 +95,7 @@ struct IOAPICState {
 
 char *backend_name;
 IOAPICBackend *backend;
+uint32_t kvm_gsi_base;
 };
 
 void ioapic_register_device(void);
diff --git a/hw/kvm/ioapic.c b/hw/kvm/ioapic.c
new file mode 100644
index 000..0e66240
--- /dev/null
+++ b/hw/kvm/ioapic.c
@@ -0,0 +1,101 @@
+/*
+ * KVM in-kernel IOPIC support
+ *
+ * Copyright (c) 2011 Siemens AG
+ *
+ * Authors:
+ *  Jan Kiszka  
+ *
+ * This work is licensed under the terms of the GNU GPL version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hw/pc.h"
+#include "hw/ioapic_internal.h"
+#include "hw/apic_internal.h"
+#include "kvm.h"
+
+static void kvm_ioapic_get(IOAPICState *s)
+{
+struct kvm_irqchip chip;
+struct kvm_ioapic_state *kioapic;
+int ret, i;
+
+chip.chip_id = KVM_IRQCHIP_IOAPIC;
+ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip);
+if (ret < 0) {
+fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret));
+abort();
+}
+
+kioapic = &chip.chip.ioapic;
+
+s->id = kioapic->id;
+s->ioregsel = kioapic->ioregsel;
+s->irr = kioapic->irr;
+for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+s->ioredtbl[i] = kioapic->redirtbl[i].bits;
+}
+}
+
+static void kvm_ioapic_put(IOAPICState *s)
+{
+struct kvm_irqchip chip;
+struct kvm_ioapic_state *kioapic;
+int ret, i;
+
+chip.chip_id = KVM_IRQCHIP_IOAPIC;
+kioapic = &chip.chip.ioapic;
+
+kioapic->id = s->id;
+kioapic->ioregsel = s->ioregsel;
+kioapic->base_address = s->busdev.mmio[0].addr;
+kioapic->irr = s->irr;
+for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+kioapic->redirtbl[i].bits = s->ioredtbl[i];
+}
+
+ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip);
+if (ret < 0) {
+fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret));
+abort();
+}
+}
+
+static void kvm_ioapic_reset(IOAPICState *s)
+{
+ioapic_reset_internal(s);
+
+kvm_ioapic_put(s);
+}
+
+static void kvm_ioapic_set_irq(void *opaque, int irq, int level)
+{
+IOAPICState *s = opaque;
+int delivered;
+
+delivered = kvm_irqchip_set_irq(kvm_state, s->kvm_gsi_base + irq, level);
+apic_set_irq_delivered(delivered);
+}
+
+static void kvm_ioapic_backend_init(IOAPICState *s, int index)
+{
+memory_region_init_reservation(&s->io_memory, "kvm-ioapic", 0x1000);
+
+qdev_init_gpio_in(&s->busdev.qdev, kvm_ioapic_set_irq, IOAPIC_NUM_PINS);
+}
+
+static IOAPICBackend kvm_ioapic_backend = {
+.name = "KVM",
+.init = kvm_ioapic_backend_init,
+.reset = kvm_ioapic_reset,
+.pre_save = kvm_ioapic_get,
+.post_load = kvm_ioapic_put,
+};
+
+static void kvm_ioapic_register(void)
+{
+ioapic_register_backend(&kvm_ioapic_backend);
+}
+
+device_init(kvm_ioapic_register)
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 8650319..93d0eba 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -68,6 +68,15 @@ static void kvm_piix3_setup_irq_routing(bool pci_enabled)
 for (i = 8; i < 16; ++i) {
 kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
 }
+

[PATCH v4 13/15] kvm: x86: Add user space part for in-kernel i8259

2011-12-08 Thread Jan Kiszka
Introduce the alternative i8259 backend that exploits KVM in-kernel
acceleration.

The PIIX3 initialization code is furthermore extended by KVM specific
IRQ route setup. GSI injection differs in KVM mode from the user space
model. As we can dispatch ISA-range IRQs to both IOAPIC and PIC inside
the kernel, we do not need to inject them separately. This is reflected
by a KVM-specific GSI handler.

Signed-off-by: Jan Kiszka 
---
 Makefile.target |2 +-
 hw/kvm/i8259.c  |  126 +++
 hw/pc.h |1 +
 hw/pc_piix.c|   50 --
 4 files changed, 174 insertions(+), 5 deletions(-)
 create mode 100644 hw/kvm/i8259.c

diff --git a/Makefile.target b/Makefile.target
index 76de485..fb10143 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -236,7 +236,7 @@ obj-i386-y += vmport.o
 obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
 obj-i386-y += pc_piix.o
-obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
+obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o kvm/i8259.o
 obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
 
 # shared objects
diff --git a/hw/kvm/i8259.c b/hw/kvm/i8259.c
new file mode 100644
index 000..98d7141
--- /dev/null
+++ b/hw/kvm/i8259.c
@@ -0,0 +1,126 @@
+/*
+ * KVM in-kernel PIC (i8259) support
+ *
+ * Copyright (c) 2011 Siemens AG
+ *
+ * Authors:
+ *  Jan Kiszka  
+ *
+ * This work is licensed under the terms of the GNU GPL version 2.
+ * See the COPYING file in the top-level directory.
+ */
+#include "hw/i8259_internal.h"
+#include "hw/apic_internal.h"
+#include "kvm.h"
+
+static void kvm_pic_get(PicState *s)
+{
+struct kvm_irqchip chip;
+struct kvm_pic_state *kpic;
+int ret;
+
+chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE;
+ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip);
+if (ret < 0) {
+fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret));
+abort();
+}
+
+kpic = &chip.chip.pic;
+
+s->last_irr = kpic->last_irr;
+s->irr = kpic->irr;
+s->imr = kpic->imr;
+s->isr = kpic->isr;
+s->priority_add = kpic->priority_add;
+s->irq_base = kpic->irq_base;
+s->read_reg_select = kpic->read_reg_select;
+s->poll = kpic->poll;
+s->special_mask = kpic->special_mask;
+s->init_state = kpic->init_state;
+s->auto_eoi = kpic->auto_eoi;
+s->rotate_on_auto_eoi = kpic->rotate_on_auto_eoi;
+s->special_fully_nested_mode = kpic->special_fully_nested_mode;
+s->init4 = kpic->init4;
+s->elcr = kpic->elcr;
+s->elcr_mask = kpic->elcr_mask;
+}
+
+static void kvm_pic_put(PicState *s)
+{
+struct kvm_irqchip chip;
+struct kvm_pic_state *kpic;
+int ret;
+
+chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE;
+
+kpic = &chip.chip.pic;
+
+kpic->last_irr = s->last_irr;
+kpic->irr = s->irr;
+kpic->imr = s->imr;
+kpic->isr = s->isr;
+kpic->priority_add = s->priority_add;
+kpic->irq_base = s->irq_base;
+kpic->read_reg_select = s->read_reg_select;
+kpic->poll = s->poll;
+kpic->special_mask = s->special_mask;
+kpic->init_state = s->init_state;
+kpic->auto_eoi = s->auto_eoi;
+kpic->rotate_on_auto_eoi = s->rotate_on_auto_eoi;
+kpic->special_fully_nested_mode = s->special_fully_nested_mode;
+kpic->init4 = s->init4;
+kpic->elcr = s->elcr;
+kpic->elcr_mask = s->elcr_mask;
+
+ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip);
+if (ret < 0) {
+fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret));
+abort();
+}
+}
+
+static void kvm_pic_reset(PicState *s)
+{
+pic_reset_internal(s);
+s->elcr = 0;
+
+kvm_pic_put(s);
+}
+
+static void kvm_pic_set_irq(void *opaque, int irq, int level)
+{
+int delivered;
+
+delivered = kvm_irqchip_set_irq(kvm_state, irq, level);
+apic_set_irq_delivered(delivered);
+}
+
+static void kvm_pic_backend_init(PicState *s)
+{
+memory_region_init_reservation(&s->base_io, "kvm-pic", 2);
+memory_region_init_reservation(&s->elcr_io, "kvm-elcr", 1);
+}
+
+qemu_irq *kvm_i8259_init(void)
+{
+i8259_init_chip(true, "KVM");
+i8259_init_chip(false, "KVM");
+
+return qemu_allocate_irqs(kvm_pic_set_irq, NULL, ISA_NUM_IRQS);
+}
+
+static PICBackend kvm_pic_backend = {
+.name = "KVM",
+.init = kvm_pic_backend_init,
+.reset = kvm_pic_reset,
+.pre_save = kvm_pic_get,
+.post_load = kvm_pic_put,
+};
+
+static void kvm_pic_register(void)
+{
+pic_register_backend(&kvm_pic_backend);
+}
+
+device_init(kvm_pic_register)
diff --git a/hw/pc.h b/hw/pc.h
index b7b7e40..fc6f446 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -64,6 +64,7 @@ bool parallel_mm_init(MemoryRegion *address_space,
 typedef struct PicState PicState;
 extern PicState *isa_pic;
 qemu_irq *i8259_init(qemu_irq parent_irq);
+qemu_irq *kvm_i8259_init(void);
 int pic_read_irq(PicSta

[PATCH v4 00/15] uq/master: Introduce basic irqchip support

2011-12-08 Thread Jan Kiszka
Changes in v4:
- rebased of current uq/master
- fixed stupid bugs that broke bisectability and user space irqchip mode
- integrated NMI-over-LINT1 injection logic

CC: Lai Jiangshan 

Jan Kiszka (15):
  msi: Generalize msix_supported to msi_supported
  kvm: Move kvmclock into hw/kvm folder
  apic: Stop timer on reset
  apic: Inject external NMI events via LINT1
  apic: Introduce backend/frontend infrastructure for KVM reuse
  apic: Open-code timer save/restore
  i8259: Introduce backend/frontend infrastructure for KVM reuse
  ioapic: Introduce backend/frontend infrastructure for KVM reuse
  memory: Introduce memory_region_init_reservation
  kvm: Introduce core services for in-kernel irqchip support
  kvm: x86: Establish IRQ0 override control
  kvm: x86: Add user space part for in-kernel APIC
  kvm: x86: Add user space part for in-kernel i8259
  kvm: x86: Add user space part for in-kernel IOAPIC
  kvm: Arm in-kernel irqchip support

 Makefile.objs  |2 +-
 Makefile.target|6 +-
 configure  |1 +
 hw/apic.c  |  309 ---
 hw/apic.h  |1 +
 hw/apic_common.c   |  312 
 hw/apic_internal.h |  122 
 hw/i8259.c |  127 ++--
 hw/i8259_common.c  |  173 ++
 hw/i8259_internal.h|   82 +++
 hw/ioapic.c|  130 ++---
 hw/ioapic_common.c |  138 ++
 hw/ioapic_internal.h   |  106 ++
 hw/kvm/apic.c  |  154 
 hw/{kvmclock.c => kvm/clock.c} |4 +-
 hw/{kvmclock.h => kvm/clock.h} |0
 hw/kvm/i8259.c |  126 
 hw/kvm/ioapic.c|  101 +
 hw/msi.c   |8 +
 hw/msi.h   |2 +
 hw/msix.c  |9 +-
 hw/msix.h  |2 -
 hw/pc.c|   19 ++-
 hw/pc.h|1 +
 hw/pc_piix.c   |   66 -
 kvm-all.c  |  154 
 kvm-stub.c |5 +
 kvm.h  |   13 ++
 memory.c   |   36 +
 memory.h   |   16 ++
 monitor.c  |6 +-
 qemu-config.c  |4 +
 qemu-options.hx|5 +-
 sysemu.h   |1 -
 target-i386/kvm.c  |   19 +++
 trace-events   |2 +-
 vl.c   |1 -
 37 files changed, 1724 insertions(+), 539 deletions(-)
 create mode 100644 hw/apic_common.c
 create mode 100644 hw/apic_internal.h
 create mode 100644 hw/i8259_common.c
 create mode 100644 hw/i8259_internal.h
 create mode 100644 hw/ioapic_common.c
 create mode 100644 hw/ioapic_internal.h
 create mode 100644 hw/kvm/apic.c
 rename hw/{kvmclock.c => kvm/clock.c} (98%)
 rename hw/{kvmclock.h => kvm/clock.h} (100%)
 create mode 100644 hw/kvm/i8259.c
 create mode 100644 hw/kvm/ioapic.c

-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 05/15] apic: Introduce backend/frontend infrastructure for KVM reuse

2011-12-08 Thread Jan Kiszka
The KVM in-kernel APIC model will reuse parts of the user space model
while providing the same frontend view to guest and most management
interfaces. Introduce an APIC backend concept to encapsulate those
parts that will tell user space and KVM model apart. The backend offers
callback hooks for init, base/tpr setting, and the external NMI delivery
that will be implemented accordingly.

Signed-off-by: Jan Kiszka 
---
 Makefile.target|2 +-
 hw/apic.c  |  282 +++-
 hw/apic_common.c   |  265 
 hw/apic_internal.h |  119 ++
 hw/pc.c|1 +
 trace-events   |2 +-
 6 files changed, 403 insertions(+), 268 deletions(-)
 create mode 100644 hw/apic_common.c
 create mode 100644 hw/apic_internal.h

diff --git a/Makefile.target b/Makefile.target
index 1d24a30..c46f062 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -231,7 +231,7 @@ obj-$(CONFIG_IVSHMEM) += ivshmem.o
 # Hardware support
 obj-i386-y += vga.o
 obj-i386-y += mc146818rtc.o pc.o
-obj-i386-y += cirrus_vga.o sga.o apic.o ioapic.o piix_pci.o
+obj-i386-y += cirrus_vga.o sga.o apic_common.o apic.o ioapic.o piix_pci.o
 obj-i386-y += vmport.o
 obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
diff --git a/hw/apic.c b/hw/apic.c
index b9d733c..f25be80 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -16,53 +16,13 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see 
  */
-#include "hw.h"
+#include "apic_internal.h"
 #include "apic.h"
 #include "ioapic.h"
-#include "qemu-timer.h"
 #include "host-utils.h"
-#include "sysbus.h"
 #include "trace.h"
 #include "pc.h"
 
-/* APIC Local Vector Table */
-#define APIC_LVT_TIMER   0
-#define APIC_LVT_THERMAL 1
-#define APIC_LVT_PERFORM 2
-#define APIC_LVT_LINT0   3
-#define APIC_LVT_LINT1   4
-#define APIC_LVT_ERROR   5
-#define APIC_LVT_NB  6
-
-/* APIC delivery modes */
-#define APIC_DM_FIXED  0
-#define APIC_DM_LOWPRI 1
-#define APIC_DM_SMI2
-#define APIC_DM_NMI4
-#define APIC_DM_INIT   5
-#define APIC_DM_SIPI   6
-#define APIC_DM_EXTINT 7
-
-/* APIC destination mode */
-#define APIC_DESTMODE_FLAT 0xf
-#define APIC_DESTMODE_CLUSTER  1
-
-#define APIC_TRIGGER_EDGE  0
-#define APIC_TRIGGER_LEVEL 1
-
-#defineAPIC_LVT_TIMER_PERIODIC (1<<17)
-#defineAPIC_LVT_MASKED (1<<16)
-#defineAPIC_LVT_LEVEL_TRIGGER  (1<<15)
-#defineAPIC_LVT_REMOTE_IRR (1<<14)
-#defineAPIC_INPUT_POLARITY (1<<13)
-#defineAPIC_SEND_PENDING   (1<<12)
-
-#define ESR_ILLEGAL_ADDRESS (1 << 7)
-
-#define APIC_SV_DIRECTED_IO (1<<12)
-#define APIC_SV_ENABLE  (1<<8)
-
-#define MAX_APICS 255
 #define MAX_APIC_WORDS 8
 
 /* Intel APIC constants: from include/asm/msidef.h */
@@ -75,40 +35,7 @@
 #define MSI_ADDR_DEST_ID_SHIFT 12
 #defineMSI_ADDR_DEST_ID_MASK   0x000
 
-#define MSI_ADDR_SIZE   0x10
-
-typedef struct APICState APICState;
-
-struct APICState {
-SysBusDevice busdev;
-MemoryRegion io_memory;
-void *cpu_env;
-uint32_t apicbase;
-uint8_t id;
-uint8_t arb_id;
-uint8_t tpr;
-uint32_t spurious_vec;
-uint8_t log_dest;
-uint8_t dest_mode;
-uint32_t isr[8];  /* in service register */
-uint32_t tmr[8];  /* trigger mode register */
-uint32_t irr[8]; /* interrupt request register */
-uint32_t lvt[APIC_LVT_NB];
-uint32_t esr; /* error register */
-uint32_t icr[2];
-
-uint32_t divide_conf;
-int count_shift;
-uint32_t initial_count;
-int64_t initial_count_load_time, next_time;
-uint32_t idx;
-QEMUTimer *timer;
-int sipi_vector;
-int wait_for_sipi;
-};
-
 static APICState *local_apics[MAX_APICS + 1];
-static int apic_irq_delivered;
 
 static void apic_set_irq(APICState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICState *s);
@@ -205,10 +132,8 @@ void apic_deliver_pic_intr(DeviceState *d, int level)
 }
 }
 
-void apic_deliver_nmi(DeviceState *d)
+static void apic_external_nmi(APICState *s)
 {
-APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
-
 apic_local_deliver(s, APIC_LVT_LINT1);
 }
 
@@ -300,14 +225,8 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, 
uint8_t delivery_mode,
 apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
-void cpu_set_apic_base(DeviceState *d, uint64_t val)
+static void apic_set_base(APICState *s, uint64_t val)
 {
-APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
-
-trace_cpu_set_apic_base(val);
-
-if (!s)
-return;
 s->apicbase = (val & 0xf000) |
 (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
 /* if disabled, cann

Re: [patch 01/12] [PATCH] kvm-s390: ioctl to switch to user controlled virtual machines

2011-12-08 Thread Alexander Graf

On 12/08/2011 12:48 PM, Carsten Otte wrote:

On 08.12.2011 11:18, Alexander Graf wrote:

If you really have to do this, please

  1) make it s390 only. I don't even want to have to see this 
uglyness in other archs

It pretty much is. The only interference is a) checking the machine
type in arch_init_vm now that I've introduced that parameter to
CREATE_VM on Avi's request and b) a dummy arch_vcpu_fault function that
I've introduced on Avi's request. Both changes make sense for future
enhancements independent of ucontrol.


Ah, so you're only introducing a generic framework for mmapping pages 
between user space and kernel space. That's arch-independent of course 
and a good idea :)



Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] qemu-kvm: Move make-release to scripts/qemu-kvm

2011-12-08 Thread Jan Kiszka
On 2011-12-07 11:49, Avi Kivity wrote:
> On 12/06/2011 04:34 PM, Jan Kiszka wrote:
>> Save the qemu-kvm release helper before deleting the kvm directory.
> 
> Thanks, applied.
> 
>> Anything else useful remaining in /kvm? Otherwise I would release my
>> dinosaur extinction patch.
> 
> Are we clear on vgabios?
> 

At least qemu-kvm does not seem to contain different vgavios* binaries
anymore. Whatever is in kvm/vgabios, it should be unused by now.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/5 V5] Avoid soft lockup message when KVM is stopped by host

2011-12-08 Thread Eric B Munson
On Wed, 07 Dec 2011, Avi Kivity wrote:

> On 12/05/2011 10:18 PM, Eric B Munson wrote:
> > Changes from V4:
> > Rename KVM_GUEST_PAUSED to KVMCLOCK_GUEST_PAUSED
> > Add description of KVMCLOCK_GUEST_PAUSED ioctl to api.txt
> >
> > Changes from V3:
> > Include CC's on patch 3
> > Drop clear flag ioctl and have the watchdog clear the flag when it is reset
> >
> > Changes from V2:
> > A new kvm functions defined in kvm_para.h, the only change to pvclock is the
> > initial flag definition
> >
> > Changes from V1:
> > (Thanks Marcelo)
> > Host code has all been moved to arch/x86/kvm/x86.c
> > KVM_PAUSE_GUEST was renamed to KVM_GUEST_PAUSED
> >
> > When a guest kernel is stopped by the host hypervisor it can look like a 
> > soft
> > lockup to the guest kernel.  This false warning can mask later soft lockup
> > warnings which may be real.  This patch series adds a method for a host
> > hypervisor to communicate to a guest kernel that it is being stopped.  The
> > final patch in the series has the watchdog check this flag when it goes to
> > issue a soft lockup warning and skip the warning if the guest knows it was
> > stopped.
> >
> > It was attempted to solve this in Qemu, but the side effects of saving and
> > restoring the clock and tsc for each vcpu put the wall clock of the guest 
> > behind
> > by the amount of time of the pause.  This forces a guest to have ntp running
> > in order to keep the wall clock accurate.
> 
> Having this controlled from userspace means it doesn't work for SIGSTOP
> or for long scheduling delays.  What about doing this automatically
> based on preempt notifiers?
> 
> 
> -- 
> error compiling committee.c: too many arguments to function
> 
My concern for preempt notifiers is masking real soft lockup warnings.  If the
flag is set every time the vm is preempted, it becomes more likely that we will
mask real warnings.  The ioctl was choosen because it sets the flag only when
the guest is being paused deliberately.

AFAIK, SIGSTOP is not a supported way to stop a qemu vm so a soft lockup
warning would be working as designed there.  If that isn't the case, or if it
ever changes, we could always install a signal handler for SIGCONT that set the
flag before resuming the vm.

Scheduling delays are also beyond the scope of this problem and I see the soft
lockup warning as appropriate in that case.

Eric


signature.asc
Description: Digital signature


Re: [PATCH 02/10] nEPT: MMU context for nested EPT

2011-12-08 Thread Nadav Har'El
On Mon, Nov 14, 2011, Avi Kivity wrote about "Re: [PATCH 02/10] nEPT: MMU 
context for nested EPT":
> > >> +#if PTTYPE == EPT 
> > >> +real_gfn = mmu->translate_gpa(vcpu, 
> > >> gfn_to_gpa(table_gfn),
> > >> +  EPT_WRITABLE_MASK);
> > >> +#else
> > >>  real_gfn = mmu->translate_gpa(vcpu, 
> > >> gfn_to_gpa(table_gfn),
> > >>
> > >> PFERR_USER_MASK|PFERR_WRITE_MASK);
> > >> +#endif
> > >> +
> > > 
> > > Unneeded, I think.
> >
> > Is it because translate_nested_gpa always set USER_MASK ? 
> 
> Yes... maybe that function needs to do something like
> 
>access |= mmu->default_access;

Unless I'm misunderstanding something, translate_nested_gpa, and
gva_to_gpa, take as their "access" parameter a bitmask of PFERR_*,
so it's fine for PFERR_USER_MASK to be enabled in translate_nested_gpa;
It just shouldn't cause PT_USER_MASK to be used. The only additional
problem I can find is in walk_addr_generic which

does

if (!check_write_user_access(vcpu, write_fault, user_fault,
  pte))
eperm = true;

and that checks pte & PT_USER_MASK, which it shouldn't if
PTTYPE==PTTYPE_EPT.

It's really confusing that we now have in mmu.c no less than 4 (!)
access bit schemes, similar in many ways but different in many others:

1. page fault error codes (PFERR_*_MASK)
2. x86 page tables acess bits (PT_*_MASK)
3. KVM private access bits (ACC_*_MASK)
4. EPT access bits (VMX_EPT_*_MASK).

I just have to try hard not to confuse them.

-- 
Nadav Har'El|   Thursday, Dec 8 2011, 
n...@math.technion.ac.il |-
Phone +972-523-790466, ICQ 13349191 |Sorry, but my karma just ran over your
http://nadav.harel.org.il   |dogma.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/5 V5] Add functions to check if the host has stopped the vm

2011-12-08 Thread Eric B Munson
On Wed, 07 Dec 2011, Avi Kivity wrote:

> On 12/05/2011 10:19 PM, Eric B Munson wrote:
> > When a host stops or suspends a VM it will set a flag to show this.  The
> > watchdog will use these functions to determine if a softlockup is real, or 
> > the
> > result of a suspended VM.
> >  
> > +bool kvm_check_and_clear_guest_paused(int cpu)
> > +{
> > +   bool ret = false;
> > +   struct pvclock_vcpu_time_info *src;
> > +
> > +   /*
> > +* per_cpu() is safe here because this function is only called from
> > +* timer functions where preemption is already disabled.
> > +*/
> > +   WARN_ON(!in_atomic());
> > +   src = &per_cpu(hv_clock, cpu);
> 
> __get_cpu_var(); drop the cpu argument
> 

Will change for V6.

> > +   if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
> > +   src->flags = src->flags & (~PVCLOCK_GUEST_STOPPED);
> 
> Isn't this racy?  Between reading and writing src->flags, we can exit to
> the hypervisor and add/remove new flags.  The write then overrides those
> new flags.
> 

If I understand (please correct me if this is wrong) because this is only
called from the watchdog, which disables preemption, we should be protected
from something else writing to these flags.

> > +   ret = true;
> > +   }
> > +
> > +   return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_check_and_clear_guest_paused);
> > +
> >  static struct clocksource kvm_clock = {
> > .name = "kvm-clock",
> > .read = kvm_clock_get_cycles,
> 
> 
> -- 
> error compiling committee.c: too many arguments to function
> 


signature.asc
Description: Digital signature


Re: [PATCH 3/5 V5] Add ioctl for KVMCLOCK_GUEST_STOPPED

2011-12-08 Thread Eric B Munson
On Wed, 07 Dec 2011, Avi Kivity wrote:

> On 12/05/2011 10:19 PM, Eric B Munson wrote:
> > Now that we have a flag that will tell the guest it was suspended, create an
> > interface for that communication using a KVM ioctl.
> >
> > @@ -3295,6 +3295,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> >  
> > goto out;
> > }
> > +   case KVMCLOCK_GUEST_PAUSED: {
> > +   r = kvm_set_guest_paused(vcpu);
> > +   break;
> > +   }
> > default:
> > r = -EINVAL;
> > }
> >
> 
> You could also do this purely from userspace by reading the kvmclock msr
> and updating it.  However, it's better to do this in the kernel to avoid
> distributing responsibility for kvmclock across too many cooks.
> 
> -- 
> error compiling committee.c: too many arguments to function
> 

As stated in the cover letter, per Marcelo's suggestion I already tried this
method and it resulted in the guest wall clock skewing by the amount of time
the guest was suspended.  This places a new burden on the guest to be running
ntpd or equivslent to keep the wall clock correct.

Eric


signature.asc
Description: Digital signature


Re: [PATCH 4/5 V5] Add generic stubs for kvm stop check functions

2011-12-08 Thread Eric B Munson
On Wed, 07 Dec 2011, Avi Kivity wrote:

> On 12/05/2011 10:19 PM, Eric B Munson wrote:
> >
> > diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h
> > new file mode 100644
> > index 000..177e1eb
> > --- /dev/null
> > +++ b/include/asm-generic/kvm_para.h
> > @@ -0,0 +1,14 @@
> > +#ifndef _ASM_GENERIC_KVM_PARA_H
> > +#define _ASM_GENERIC_KVM_PARA_H
> > +
> > +
> > +/*
> > + * This function is used by architectures that support kvm to avoid issuing
> > + * false soft lockup messages.
> > + */
> > +static inline bool kvm_check_and_clear_guest_paused(int cpu)
> > +{
> > +   return false;
> > +}
> > +
> > +#endif
> 
> Please fold into patch 2.

Will do for V6.

> 
> What about arch/{ia64,s390,powerpc}/include/asm/kvm_para.h?
> 

I will add the above to those as well.

> -- 
> error compiling committee.c: too many arguments to function
> 


signature.asc
Description: Digital signature


Re: [PATCH] kvm tools: Allow the user to pass a FD to use as a TAP device

2011-12-08 Thread Osier Yang

On 2011年12月08日 01:24, Pekka Enberg wrote:

On Wed, 7 Dec 2011, Daniel P. Berrange wrote:


On Wed, Dec 07, 2011 at 06:28:12PM +0200, Pekka Enberg wrote:

On Wed, Dec 7, 2011 at 11:37 AM, Sasha Levin
 wrote:

This allows users to pass a pre-configured fd to use for the network
interface.

For example:
   kvm run -n mode=tap,fd=3 3<>/dev/net/tap3

Cc: Daniel P. Berrange 
Cc: Osier Yang 
Signed-off-by: Sasha Levin 


Daniel, Osier, I assume this is useful for libvirt?


Yes, this works.


Applied, thanks!


/me late to see the news. :)

I'm going to update the codes to support that.

Regards,
Osier
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/11] RFC: PCI using capabilitities

2011-12-08 Thread Sasha Levin
On Thu, 2011-12-08 at 20:52 +1030, Rusty Russell wrote:
> Here's the patch series I ended up with.  I haven't coded up the QEMU
> side yet, so no idea if the new driver works.
> 
> Questions:
> (1) Do we win from separating ISR, NOTIFY and COMMON?
> (2) I used a "u8 bar"; should I use a bir and pack it instead?  BIR
> seems a little obscure (noone else in the kernel source seems to
> refer to it).

I started implementing it for KVM tools, when I noticed a strange thing:
my vq creating was failing because the driver was reading a value other
than 0 from the address field of a new vq, and failing.

I've added simple prints in the usermode code, and saw the following
ordering:

1. queue select vq 0
2. queue read address (returns 0 - new vq)
3. queue write address (good address of vq)
4. queue read address (returns !=0, fails)
4. queue select vq 1

>From that I understood that the ordering is wrong, the driver was trying
to read address before selecting the correct vq.

At that point, I've added simple prints to the driver. Initially it
looked as follows:

iowrite16(index, &vp_dev->common->queue_select);

switch (ioread64(&vp_dev->common->queue_address)) {
[...]
};

So I added prints before the iowrite16() and after the ioread64(), and
saw that while the driver prints were ordered, the device ones weren't:

[1.264052] before iowrite index=1
kvmtool: net returning pfn (vq=0): 310706176
kvmtool: queue selected: 1
[1.264890] after ioread index=1

Suspecting that something was wrong with ordering, I've added a print
between the iowrite and the ioread, and it finally started working well.

Which leads me to the question: Are MMIO vs MMIO reads/writes not
ordered?

-- 

Sasha.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH][kvm-autotest] Adds cpuflags test and subtest interface.

2011-12-08 Thread Jiří Župka
This patch add tests for testing cpu flags in qemu:
 a) interface cpu flags tests
  1) qemu -cpu ?model
  2)dump
  3)cpuid
 b) guest run cpu flags tests
  1) Test boot cpu model.
  2) Test boot cpu model and additiona/nonstandard model flags.
  3) Test boot fail with host unsupported flags.
  4) Test boot guest and try flags under load.
  5) Test online offline guest CPUs under load.

 There is new c program cpuflags-test which is able to test
 main Intel cpu flags now . There will be extension for test
 AMD cpuflags etc in next version.. This program ensure to test special
 instruction provides cpuflags like rdrand etc..

RFC:

In addition, there is subtest interface. This is class and some
decorators for easy way of start function like a subtest.
Subtests result are collected and it is posible for review on end of test.
Subtest class and decorators should be placed in autotest_lib.client.utils.

There is possible to change results format.

Example:
@staticmethod
def result_to_string(result):
"""
@param result: Result of test.
"""
print result
return ("[%(result)]%(name): %(output)") % (result)

  1)
Subtest.result_to_string = result_to_string
Subtest.get_text_result()

  2)
Subtest.get_text_result(result_to_string)

Pull-request: https://github.com/autotest/autotest/pull/109

Signed-off-by: Jiří Župka 
---
 .../scripts/cpuflags-test/cpuflags-test.tar.bz2|  Bin 0 -> 3303 bytes
 client/tests/kvm/subtests.cfg.sample   |9 +
 client/tests/kvm/tests/cpuflags.py |  774 
 client/virt/kvm_vm.py  |   13 +
 4 files changed, 796 insertions(+), 0 deletions(-)
 create mode 100644 client/tests/kvm/scripts/cpuflags-test/cpuflags-test.tar.bz2
 create mode 100644 client/tests/kvm/tests/cpuflags.py

diff --git a/client/tests/kvm/scripts/cpuflags-test/cpuflags-test.tar.bz2 
b/client/tests/kvm/scripts/cpuflags-test/cpuflags-test.tar.bz2
new file mode 100644
index 
..e8c7afdfd17829b582eb818facae8a4740cc3588
GIT binary patch
literal 3303
zcmVfo5T4*^jL0KkKS<}m4^Z*O>f9%{a@c@7S|Np=5|9=1f|NkIBKmZT`00ICA
zU=ID>!M1%_SH538-#hP+Z(HucQkMGe;y#qn4?qA4eL4i;q6pGyp^9cfG-7BP82}9c
z001Su(#WDzPhWSP?;Mir46J#CXAU44K#X<00*cV2GSa64FpjWR1GF-V4Ff38UQpJ
z00Te(00R&JB2gMlfCLi)WXMLEiTa*OdU~3Ar>UdV^*v1hJwc%LJx@^eJx^1`@<=&P
znEDfmhm|3Lin|gCOCaPzK*N6^-#%B~!lD_2vkJ=OWqFz@E6^~F0M~Uv0CJR!^kgO(
zfhAT@s5Asi${_$S5mUG!5=KD*i9enor>Awt;p&}W9s!3?4!=k}1jyJj3S{G;DlZse;0cDtT$0CF
zJHRG`E2T7$$e4jn^Cbdev`QmbemjsXpjx(wCT1-mEQHL3Zq)C%q*EA`RgK-UFu&`O
zpB^%0-lCI6-4b&U&S6rKe0r@^xm1`EpTED#*UN!dUPbG!PC4%zzI`rzlrU}M7r76V
zL(Yb4XGRIbJsr?0D4mu=qR9-nD6O{cuG*^8Rdt2n%*HZ6e8Yl~7qLhPcO9D$>Hj{D
zMgM*sYncpaV8JGy>Qo2hlwrq+^&TIY7@dtOKqzD1j5Zr`{Ur$eNZz
zUqK6S;=ze^BA4jU)ZT0#bPLzCdHojauwffQ{*NC&q{hYpJ)N8@(;RKOA9u(*9jd$l
z-QgAEpqsSl%>1b&sSAy+z%Ik&;
zv^at+*0rr`Q&m+}Q&m+}P^zlHBX@8C93(<%1~Cl7saln;igGMn8{shdoFFs0S>wS&
zrngndX`!jBk1nE6XmY5yLw5U<
zGeO^_G
zOfUoFMlFe&NQ&LLw4cPyGoL=4U>_XJB1_1C>bQ~=p;#+Sn8{EnH8uNlXs@1k-Ij|?
zK7JF7I#}Obg!1fR6UmzQ{1Bj?w3R>
z$b?D~7sMkFno#c$_o3@s7H*$vADh1ous9v_Fl*L^EkT25fk`5S8HAimnNsc?HSXhS
zZ;`01YbNv{YvpTi$x?zD%?=*^Xt`8J_+4}P8MRM&k2Pn4e!NDkXyjcV3m
z3d1N;=|X8Wl9hna;pZ#^b;kI+ld84V4|l(xHoU(NlP*BkO!|bzf^$$Vrw$&ytRPtv
z0Y5e&h#4Di*@gUc==*(x`bYr?28IlE7vs)&Iv^aECZ`h@8krj*pAK3nN>emKH$Wsq
zMeqr73`ueYtFXmHtN_p`P{g`eGFk~0XdWmPLn28qO2n>fwAPrUKYn3$J1Z)sw;|ZS
zo+o6UNJy-B1crcG+Sx}H0R>#mhFt4Vk|vV6@&tPVgND}v1xOldK$xKdQ?2Jdx4l$$
zYl09tVYz-dg-0z{{*F%{U_Z>Up6yi*c!ud(aW0_RSfz%f90LxudDTgaACpx?uYjXQ
zmavUAm4^_M!xaO9u)zrF;2REaN$Zmu%RNB~go>OC04)*P3@1KHGP6ly-C`e}9|zm|
z|AUt)g5U@37%yyJvH?hX;5nC!q&U?BH4KinZ~v3n7RNV3z1~UtG5JjexZYR?Bqn<
zAes;`uvk-ZUR#RW#^y_HNE=qi*oPBuN7aa$e4Jigh8URKUvjBwGCHD>kO)zDXiPUY@i!bBV1D2w+B)clWGDupw@H?JkkW?XOJ0rQI}XOC
zpwrNWqMgUh<16FJ1@Svv6*~gMY$t>ck|e|PbiJr&aUBRze4=aZnL>eL$Xr~EjRYyX
zvImTyLY4i~4seOjK`bTnH8@$qQ3@#w3TZ#Us0JzWAX@~FC_YX1H~TlxFq$v>V^QJ<
zValg_2P@o-G|TmTY#1B6*6uS(7+vf4?dUT->h*|ybQlEmAL&$Rjtygsq9e)(*lC1!_
zi&@SmhAHBns5NXB(cjCfI=CoxngqqBVA8HZU=-4~Lh>h~;0>41kiv+ZN`{0h
zMBjW2)s792=*mSdZx5(|Sb#DX0w5jM(Y}->Y96SEX8A#fS&S+KIIjhG=??4AVH2-@
zpz-O@m5UIGkU*O`kjS;azPt*Oa3@47>h#_ifOw&qL%n>+axVTy=|jBe-$ZyY4l{CY
zIG-s%E1RO+hi+>(`>0lYaL$K{YD2NmhjOj_i$(PYocThL6QMFe;tqNqDnw($Mm2AR
z@uCqZhgCF$4r7E2B!Kvtkm+6AmcW~K(7R~(erx4AyMijzyR<$V!
zaRO9Bq|*6myL|~9hi_iNX2_xi1au*t$yThjt_MFp>m|kH2fLgzp{aHAC*pn-pO52H
zjQ!n6Z~@MSyJMuA#Wtw#&%
zBg}&ndL=_)U7yhVx+0LufZ=M}D_1`~Io6sf+J?!Iyb?XhB*ga=f~HGQ46l=5QPd9V
lALIuA*I>~m^8ly>2bl>9fwPy*#8v-`xgwk>NNMG;dH^1# 0:
+return True
+else:
+return False
+
+
+@classmethod
+def get_result(cls):
+"""
+@return: Result of subtests.
+   Format:
+ tuple(pass/fail,function_name,call_arguments)
+"""
+return cls.result
+
+
+@staticmethod
+def result_to_string_debug(result):
+"""
+@param result: Resul

Re: [PATCH 0/2] qemu-io tests: More fine grained control of qemu paths

2011-12-08 Thread Christoph Hellwig
Thanks a lot Lucas,

I've applied the patches.  And sorry for the delay, I'm pretty busy at the
moment.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 12/15] kvm: x86: Add user space part for in-kernel APIC

2011-12-08 Thread Blue Swirl
On Thu, Dec 8, 2011 at 11:52, Jan Kiszka  wrote:
> This introduces the alternative APIC backend which makes use of KVM's
> in-kernel device model. External NMI injection via LINT1 is emulated by
> checking the current state of the in-kernel APIC, only injecting a NMI
> into the VCPU if LINT1 is unmasked and configured to DM_NMI.
>
> MSI is not yet supported, so we disable this when the in-kernel model is
> in use.
>
> CC: Lai Jiangshan 
> Signed-off-by: Jan Kiszka 
> ---
>  Makefile.target   |    2 +-
>  hw/kvm/apic.c     |  154 
> +
>  hw/pc.c           |   15 --
>  kvm.h             |    3 +
>  target-i386/kvm.c |    8 +++
>  5 files changed, 176 insertions(+), 6 deletions(-)
>  create mode 100644 hw/kvm/apic.c
>
> diff --git a/Makefile.target b/Makefile.target
> index b549988..76de485 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o
>  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>  obj-i386-y += debugcon.o multiboot.o
>  obj-i386-y += pc_piix.o
> -obj-i386-$(CONFIG_KVM) += kvm/clock.o
> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>
>  # shared objects
> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
> new file mode 100644
> index 000..3924f9e
> --- /dev/null
> +++ b/hw/kvm/apic.c
> @@ -0,0 +1,154 @@
> +/*
> + * KVM in-kernel APIC support
> + *
> + * Copyright (c) 2011 Siemens AG
> + *
> + * Authors:
> + *  Jan Kiszka          
> + *
> + * This work is licensed under the terms of the GNU GPL version 2.
> + * See the COPYING file in the top-level directory.
> + */
> +#include "hw/apic_internal.h"
> +#include "kvm.h"
> +
> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
> +                                   int reg_id, uint32_t val)
> +{
> +    *((uint32_t *)(kapic->regs + (reg_id << 4))) = val;
> +}
> +
> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
> +                                       int reg_id)
> +{
> +    return *((uint32_t *)(kapic->regs + (reg_id << 4)));
> +}
> +
> +int kvm_put_apic(CPUState *env)
> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);

Please pass APICState instead of CPUState.

> +    struct kvm_lapic_state kapic;
> +    int i;
> +
> +    if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
> +        memset(&kapic, 0, sizeof(kapic));
> +        kvm_apic_set_reg(&kapic, 0x2, s->id << 24);
> +        kvm_apic_set_reg(&kapic, 0x8, s->tpr);
> +        kvm_apic_set_reg(&kapic, 0xd, s->log_dest << 24);
> +        kvm_apic_set_reg(&kapic, 0xe, s->dest_mode << 28 | 0x0fff);
> +        kvm_apic_set_reg(&kapic, 0xf, s->spurious_vec);
> +        for (i = 0; i < 8; i++) {
> +            kvm_apic_set_reg(&kapic, 0x10 + i, s->isr[i]);
> +            kvm_apic_set_reg(&kapic, 0x18 + i, s->tmr[i]);
> +            kvm_apic_set_reg(&kapic, 0x20 + i, s->irr[i]);
> +        }
> +        kvm_apic_set_reg(&kapic, 0x28, s->esr);
> +        kvm_apic_set_reg(&kapic, 0x30, s->icr[0]);
> +        kvm_apic_set_reg(&kapic, 0x31, s->icr[1]);
> +        for (i = 0; i < APIC_LVT_NB; i++) {
> +            kvm_apic_set_reg(&kapic, 0x32 + i, s->lvt[i]);
> +        }
> +        kvm_apic_set_reg(&kapic, 0x38, s->initial_count);
> +        kvm_apic_set_reg(&kapic, 0x3e, s->divide_conf);
> +
> +        return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, &kapic);
> +    }
> +
> +    return 0;
> +}
> +
> +int kvm_get_apic(CPUState *env)

Same here.

> +{
> +    APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
> +    struct kvm_lapic_state kapic;
> +    int ret, i, v;
> +
> +    if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
> +        ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, &kapic);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        s->id = kvm_apic_get_reg(&kapic, 0x2) >> 24;
> +        s->tpr = kvm_apic_get_reg(&kapic, 0x8);
> +        s->arb_id = kvm_apic_get_reg(&kapic, 0x9);
> +        s->log_dest = kvm_apic_get_reg(&kapic, 0xd) >> 24;
> +        s->dest_mode = kvm_apic_get_reg(&kapic, 0xe) >> 28;
> +        s->spurious_vec = kvm_apic_get_reg(&kapic, 0xf);
> +        for (i = 0; i < 8; i++) {
> +            s->isr[i] = kvm_apic_get_reg(&kapic, 0x10 + i);
> +            s->tmr[i] = kvm_apic_get_reg(&kapic, 0x18 + i);
> +            s->irr[i] = kvm_apic_get_reg(&kapic, 0x20 + i);
> +        }
> +        s->esr = kvm_apic_get_reg(&kapic, 0x28);
> +        s->icr[0] = kvm_apic_get_reg(&kapic, 0x30);
> +        s->icr[1] = kvm_apic_get_reg(&kapic, 0x31);
> +        for (i = 0; i < APIC_LVT_NB; i++) {
> +            s->lvt[i] = kvm_apic_get_reg(&kapic, 0x32 + i);
> +        }
> +        s->initial_count = kvm_apic_get_reg(&kapic, 0x38);
> +        s->divide_conf = kvm_apic_get_reg(&kapic, 0x3e);
> +
> +        v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4);
> +        s->count_shift = (v + 1) & 7;

[PATCH 1/4 V6] Add flag to indicate that a vm was stopped by the host

2011-12-08 Thread Eric B Munson
This flag will be used to check if the vm was stopped by the host when a soft
lockup was detected.  The host will set the flag when it stops the guest.  On
resume, the guest will check this flag if a soft lockup is detected and skip
issuing the warning.

Signed-off-by: Eric B Munson 
Cc: mi...@redhat.com
Cc: h...@zytor.com
Cc: a...@arndb.de
Cc: ry...@linux.vnet.ibm.com
Cc: aligu...@us.ibm.com
Cc: mtosa...@redhat.com
Cc: jeremy.fitzhardi...@citrix.com
Cc: kvm@vger.kernel.org
Cc: linux-a...@vger.kernel.org
Cc: x...@kernel.org
Cc: linux-ker...@vger.kernel.org
---
 arch/x86/include/asm/pvclock-abi.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/pvclock-abi.h 
b/arch/x86/include/asm/pvclock-abi.h
index 35f2d19..6167fd7 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -40,5 +40,6 @@ struct pvclock_wall_clock {
 } __attribute__((__packed__));
 
 #define PVCLOCK_TSC_STABLE_BIT (1 << 0)
+#define PVCLOCK_GUEST_STOPPED  (1 << 1)
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_PVCLOCK_ABI_H */
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] Add check for suspended vm in softlockup detector

2011-12-08 Thread Eric B Munson
A suspended VM can cause spurious soft lockup warnings.  To avoid these, the
watchdog now checks if the kernel knows it was stopped by the host and skips
the warning if so.  When the watchdog is reset successfully, clear the guest
paused flag.

Signed-off-by: Eric B Munson 
Cc: mi...@redhat.com
Cc: h...@zytor.com
Cc: a...@arndb.de
Cc: ry...@linux.vnet.ibm.com
Cc: aligu...@us.ibm.com
Cc: mtosa...@redhat.com
Cc: jeremy.fitzhardi...@citrix.com
Cc: kvm@vger.kernel.org
Cc: linux-a...@vger.kernel.org
Cc: x...@kernel.org
Cc: linux-ker...@vger.kernel.org
---
Changes from V3:
 Clear the PAUSED flag when the watchdog is reset

 kernel/watchdog.c |   12 
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 1d7bca7..91485e5 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -25,6 +25,7 @@
 #include 
 
 #include 
+#include 
 #include 
 
 int watchdog_enabled = 1;
@@ -280,6 +281,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct 
hrtimer *hrtimer)
__this_cpu_write(softlockup_touch_sync, false);
sched_clock_tick();
}
+
+   /* Clear the guest paused flag on watchdog reset */
+   kvm_check_and_clear_guest_paused();
__touch_watchdog();
return HRTIMER_RESTART;
}
@@ -292,6 +296,14 @@ static enum hrtimer_restart watchdog_timer_fn(struct 
hrtimer *hrtimer)
 */
duration = is_softlockup(touch_ts);
if (unlikely(duration)) {
+   /*
+* If a virtual machine is stopped by the host it can look to
+* the watchdog like a soft lockup, check to see if the host
+* stopped the vm before we issue the warning
+*/
+   if (kvm_check_and_clear_guest_paused())
+   return HRTIMER_RESTART;
+
/* only warn once */
if (__this_cpu_read(soft_watchdog_warn) == true)
return HRTIMER_RESTART;
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] Add ioctl for KVMCLOCK_GUEST_STOPPED

2011-12-08 Thread Eric B Munson
Now that we have a flag that will tell the guest it was suspended, create an
interface for that communication using a KVM ioctl.

Signed-off-by: Eric B Munson 

Cc: mi...@redhat.com
Cc: h...@zytor.com
Cc: a...@arndb.de
Cc: ry...@linux.vnet.ibm.com
Cc: aligu...@us.ibm.com
Cc: mtosa...@redhat.com
Cc: jeremy.fitzhardi...@citrix.com
Cc: kvm@vger.kernel.org
Cc: linux-a...@vger.kernel.org
Cc: x...@kernel.org
Cc: linux-ker...@vger.kernel.org
---
Changes from V4:
 Rename KVM_GUEST_PAUSED to KVMCLOCK_GUEST_PAUSED
 Add new ioctl description to api.txt

 Documentation/virtual/kvm/api.txt |   12 
 arch/x86/include/asm/kvm_host.h   |2 ++
 arch/x86/kvm/x86.c|   20 
 include/linux/kvm.h   |2 ++
 4 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 7945b0b..0f7dd99 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1450,6 +1450,18 @@ is supported; 2 if the processor requires all virtual 
machines to have
 an RMA, or 1 if the processor can use an RMA but doesn't require it,
 because it supports the Virtual RMA (VRMA) facility.
 
+4.64 KVMCLOCK_GUEST_PAUSED
+
+Capability: basic
+Architechtures: Any that implement pvclocks (currently x86 only)
+Type: vcpu ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+This signals to the host kernel that the specified guest is being paused by
+userspace.  The host will set a flag in the pvclock structure that is checked
+from the soft lockup watchdog.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b4973f4..beb94c6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -672,6 +672,8 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long 
bytes,
  gpa_t addr, unsigned long *ret);
 u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
+int kvm_set_guest_paused(struct kvm_vcpu *vcpu);
+
 extern bool tdp_enabled;
 
 u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c38efd7..1dab5fd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3295,6 +3295,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
goto out;
}
+   case KVMCLOCK_GUEST_PAUSED: {
+   r = kvm_set_guest_paused(vcpu);
+   break;
+   }
default:
r = -EINVAL;
}
@@ -6117,6 +6121,22 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 
tss_selector, int reason,
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
+/*
+ * kvm_set_guest_paused() indicates to the guest kernel that it has been
+ * stopped by the hypervisor.  This function will be called from the host only.
+ * EINVAL is returned when the host attempts to set the flag for a guest that
+ * does not support pv clocks.
+ */
+int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
+{
+   struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
+   if (!vcpu->arch.time_page)
+   return -EINVAL;
+   src->flags |= PVCLOCK_GUEST_STOPPED;
+   return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_set_guest_paused);
+
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
  struct kvm_sregs *sregs)
 {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c3892fc..1d1ddef 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -762,6 +762,8 @@ struct kvm_clock_data {
 #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO,  0xa8, struct 
kvm_create_spapr_tce)
 /* Available with KVM_CAP_RMA */
 #define KVM_ALLOCATE_RMA _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
+/* VM is being stopped by host */
+#define KVMCLOCK_GUEST_PAUSED_IO(KVMIO,   0xaa)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU(1 << 0)
 
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] Add functions to check if the host has stopped the vm

2011-12-08 Thread Eric B Munson
When a host stops or suspends a VM it will set a flag to show this.  The
watchdog will use these functions to determine if a softlockup is real, or the
result of a suspended VM.

Signed-off-by: Eric B Munson 
Cc: mi...@redhat.com
Cc: h...@zytor.com
Cc: a...@arndb.de
Cc: ry...@linux.vnet.ibm.com
Cc: aligu...@us.ibm.com
Cc: mtosa...@redhat.com
Cc: jeremy.fitzhardi...@citrix.com
Cc: kvm@vger.kernel.org
Cc: linux-a...@vger.kernel.org
Cc: x...@kernel.org
Cc: linux-ker...@vger.kernel.org
---
Changes from V5:
 Collapse generic stubs into this patch
 check_and_clear_guest_stopped() takes no args and uses __get_cpu_var()
 Include individual definitions in ia64, s390, and powerpc

 arch/ia64/include/asm/kvm_para.h|5 +
 arch/powerpc/include/asm/kvm_para.h |5 +
 arch/s390/include/asm/kvm_para.h|5 +
 arch/x86/include/asm/kvm_para.h |8 
 arch/x86/kernel/kvmclock.c  |   21 +
 include/asm-generic/kvm_para.h  |   14 ++
 6 files changed, 58 insertions(+), 0 deletions(-)
 create mode 100644 include/asm-generic/kvm_para.h

diff --git a/arch/ia64/include/asm/kvm_para.h b/arch/ia64/include/asm/kvm_para.h
index 1588aee..2019cb9 100644
--- a/arch/ia64/include/asm/kvm_para.h
+++ b/arch/ia64/include/asm/kvm_para.h
@@ -26,6 +26,11 @@ static inline unsigned int kvm_arch_para_features(void)
return 0;
 }
 
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+
 #endif
 
 #endif
diff --git a/arch/powerpc/include/asm/kvm_para.h 
b/arch/powerpc/include/asm/kvm_para.h
index 50533f9..1f80293 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -169,6 +169,11 @@ static inline unsigned int kvm_arch_para_features(void)
return r;
 }
 
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* __POWERPC_KVM_PARA_H__ */
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index 6964db2..a988329 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -149,6 +149,11 @@ static inline unsigned int kvm_arch_para_features(void)
return 0;
 }
 
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+
 #endif
 
 #endif /* __S390_KVM_PARA_H */
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 734c376..99c4bbe 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -95,6 +95,14 @@ struct kvm_vcpu_pv_apf_data {
 extern void kvmclock_init(void);
 extern int kvm_register_clock(char *txt);
 
+#ifdef CONFIG_KVM_CLOCK
+bool kvm_check_and_clear_guest_paused(void);
+#else
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+#endif /* CONFIG_KVMCLOCK */
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
  * trap that we will then rewrite to the appropriate instruction.
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 44842d7..33d2e22 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -114,6 +115,26 @@ static void kvm_get_preset_lpj(void)
preset_lpj = lpj;
 }
 
+bool kvm_check_and_clear_guest_paused(void)
+{
+   bool ret = false;
+   struct pvclock_vcpu_time_info *src;
+
+   /*
+* per_cpu() is safe here because this function is only called from
+* timer functions where preemption is already disabled.
+*/
+   WARN_ON(!in_atomic());
+   src = &__get_cpu_var(hv_clock);
+   if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
+   src->flags = src->flags & (~PVCLOCK_GUEST_STOPPED);
+   ret = true;
+   }
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_check_and_clear_guest_paused);
+
 static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_get_cycles,
diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h
new file mode 100644
index 000..05ef7e7
--- /dev/null
+++ b/include/asm-generic/kvm_para.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_GENERIC_KVM_PARA_H
+#define _ASM_GENERIC_KVM_PARA_H
+
+
+/*
+ * This function is used by architectures that support kvm to avoid issuing
+ * false soft lockup messages.
+ */
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+   return false;
+}
+
+#endif
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4 V6] Avoid soft lockup message when KVM is stopped by host

2011-12-08 Thread Eric B Munson
Changes from V5:
Collapse generic check_and_clear_guest_stopped into patch 2
Include check_and_clear_guest_stopped defintion to ia64, s390, and powerpc
Change check_and_clear_guest_stopped to use __get_cpu_var instead of taking the
 cpuid arg.
Protect check_and_clear_guest_stopped declaration with CONFIG_KVM_CLOCK check

Changes from V4:
Rename KVM_GUEST_PAUSED to KVMCLOCK_GUEST_PAUSED
Add description of KVMCLOCK_GUEST_PAUSED ioctl to api.txt

Changes from V3:
Include CC's on patch 3
Drop clear flag ioctl and have the watchdog clear the flag when it is reset

Changes from V2:
A new kvm functions defined in kvm_para.h, the only change to pvclock is the
initial flag definition

Changes from V1:
(Thanks Marcelo)
Host code has all been moved to arch/x86/kvm/x86.c
KVM_PAUSE_GUEST was renamed to KVM_GUEST_PAUSED

When a guest kernel is stopped by the host hypervisor it can look like a soft
lockup to the guest kernel.  This false warning can mask later soft lockup
warnings which may be real.  This patch series adds a method for a host
hypervisor to communicate to a guest kernel that it is being stopped.  The
final patch in the series has the watchdog check this flag when it goes to
issue a soft lockup warning and skip the warning if the guest knows it was
stopped.

It was attempted to solve this in Qemu, but the side effects of saving and
restoring the clock and tsc for each vcpu put the wall clock of the guest behind
by the amount of time of the pause.  This forces a guest to have ntp running
in order to keep the wall clock accurate.

Cc: mi...@redhat.com
Cc: h...@zytor.com
Cc: a...@arndb.de
Cc: ry...@linux.vnet.ibm.com
Cc: aligu...@us.ibm.com
Cc: mtosa...@redhat.com
Cc: jeremy.fitzhardi...@citrix.com
Cc: levinsasha...@gmail.com
Cc: Jan Kiszka 
Cc: kvm@vger.kernel.org
Cc: linux-a...@vger.kernel.org
Cc: x...@kernel.org
Cc: linux-ker...@vger.kernel.org

Eric B Munson (4):
  Add flag to indicate that a vm was stopped by the host
  Add functions to check if the host has stopped the vm
  Add ioctl for KVMCLOCK_GUEST_STOPPED
  Add check for suspended vm in softlockup detector

 Documentation/virtual/kvm/api.txt   |   12 
 arch/ia64/include/asm/kvm_para.h|5 +
 arch/powerpc/include/asm/kvm_para.h |5 +
 arch/s390/include/asm/kvm_para.h|5 +
 arch/x86/include/asm/kvm_host.h |2 ++
 arch/x86/include/asm/kvm_para.h |8 
 arch/x86/include/asm/pvclock-abi.h  |1 +
 arch/x86/kernel/kvmclock.c  |   21 +
 arch/x86/kvm/x86.c  |   20 
 include/asm-generic/kvm_para.h  |   14 ++
 include/linux/kvm.h |2 ++
 kernel/watchdog.c   |   12 
 12 files changed, 107 insertions(+), 0 deletions(-)
 create mode 100644 include/asm-generic/kvm_para.h

-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 00/15] uq/master: Introduce basic irqchip support

2011-12-08 Thread Blue Swirl
On Thu, Dec 8, 2011 at 11:52, Jan Kiszka  wrote:
> Changes in v4:
> - rebased of current uq/master
> - fixed stupid bugs that broke bisectability and user space irqchip mode
> - integrated NMI-over-LINT1 injection logic

I had comments to one patch, others look fine.

Overall, string based subtype selection does not somehow seem to be a
hot idea, but this could be used as a starting point which should be
cleaned up later when we have proper device composition. APIC and x86
interrupt handling need more cleanup anyway.

> CC: Lai Jiangshan 
>
> Jan Kiszka (15):
>  msi: Generalize msix_supported to msi_supported
>  kvm: Move kvmclock into hw/kvm folder
>  apic: Stop timer on reset
>  apic: Inject external NMI events via LINT1
>  apic: Introduce backend/frontend infrastructure for KVM reuse
>  apic: Open-code timer save/restore
>  i8259: Introduce backend/frontend infrastructure for KVM reuse
>  ioapic: Introduce backend/frontend infrastructure for KVM reuse
>  memory: Introduce memory_region_init_reservation
>  kvm: Introduce core services for in-kernel irqchip support
>  kvm: x86: Establish IRQ0 override control
>  kvm: x86: Add user space part for in-kernel APIC
>  kvm: x86: Add user space part for in-kernel i8259
>  kvm: x86: Add user space part for in-kernel IOAPIC
>  kvm: Arm in-kernel irqchip support
>
>  Makefile.objs                  |    2 +-
>  Makefile.target                |    6 +-
>  configure                      |    1 +
>  hw/apic.c                      |  309 ---
>  hw/apic.h                      |    1 +
>  hw/apic_common.c               |  312 
> 
>  hw/apic_internal.h             |  122 
>  hw/i8259.c                     |  127 ++--
>  hw/i8259_common.c              |  173 ++
>  hw/i8259_internal.h            |   82 +++
>  hw/ioapic.c                    |  130 ++---
>  hw/ioapic_common.c             |  138 ++
>  hw/ioapic_internal.h           |  106 ++
>  hw/kvm/apic.c                  |  154 
>  hw/{kvmclock.c => kvm/clock.c} |    4 +-
>  hw/{kvmclock.h => kvm/clock.h} |    0
>  hw/kvm/i8259.c                 |  126 
>  hw/kvm/ioapic.c                |  101 +
>  hw/msi.c                       |    8 +
>  hw/msi.h                       |    2 +
>  hw/msix.c                      |    9 +-
>  hw/msix.h                      |    2 -
>  hw/pc.c                        |   19 ++-
>  hw/pc.h                        |    1 +
>  hw/pc_piix.c                   |   66 -
>  kvm-all.c                      |  154 
>  kvm-stub.c                     |    5 +
>  kvm.h                          |   13 ++
>  memory.c                       |   36 +
>  memory.h                       |   16 ++
>  monitor.c                      |    6 +-
>  qemu-config.c                  |    4 +
>  qemu-options.hx                |    5 +-
>  sysemu.h                       |    1 -
>  target-i386/kvm.c              |   19 +++
>  trace-events                   |    2 +-
>  vl.c                           |    1 -
>  37 files changed, 1724 insertions(+), 539 deletions(-)
>  create mode 100644 hw/apic_common.c
>  create mode 100644 hw/apic_internal.h
>  create mode 100644 hw/i8259_common.c
>  create mode 100644 hw/i8259_internal.h
>  create mode 100644 hw/ioapic_common.c
>  create mode 100644 hw/ioapic_internal.h
>  create mode 100644 hw/kvm/apic.c
>  rename hw/{kvmclock.c => kvm/clock.c} (98%)
>  rename hw/{kvmclock.h => kvm/clock.h} (100%)
>  create mode 100644 hw/kvm/i8259.c
>  create mode 100644 hw/kvm/ioapic.c
>
> --
> 1.7.3.4
>


Re: [net-next RFC PATCH 5/5] virtio-net: flow director support

2011-12-08 Thread Sridhar Samudrala

On 12/7/2011 3:02 AM, Jason Wang wrote:

On 12/06/2011 11:42 PM, Sridhar Samudrala wrote:

On 12/6/2011 5:15 AM, Stefan Hajnoczi wrote:
On Tue, Dec 6, 2011 at 10:21 AM, Jason Wang  
wrote:

On 12/06/2011 05:18 PM, Stefan Hajnoczi wrote:
On Tue, Dec 6, 2011 at 6:33 AM, Jason Wang
wrote:

On 12/05/2011 06:55 PM, Stefan Hajnoczi wrote:

On Mon, Dec 5, 2011 at 8:59 AM, Jason Wang
  wrote:

The vcpus are just threads and may not be bound to physical CPUs, so
what is the big picture here?  Is the guest even in the position to
set the best queue mappings today?


Not sure it could publish the best mapping but the idea is to make 
sure the
packets of a flow were handled by the same guest vcpu and may be 
the same

vhost thread in order to eliminate the packet reordering and lock
contention. But this assumption does not take the bouncing of vhost 
or vcpu

threads which would also affect the result.

Okay, this is why I'd like to know what the big picture here is.  What
solution are you proposing?  How are we going to have everything from
guest application, guest kernel, host threads, and host NIC driver
play along so we get the right steering up the entire stack.  I think
there needs to be an answer to that before changing virtio-net to add
any steering mechanism.


Yes. Also the current model of  a vhost thread per VM's interface 
doesn't help with packet steering

all the way from the guest to the host physical NIC.

I think we need to have vhost thread(s) per-CPU that can handle 
packets to/from physical NIC's
TX/RX queues. Currently we have a single vhost thread for a VM's i/f 
that handles all the packets from

various flows coming from a multi-queue physical NIC.


Even if we have per-cpu workthread, only one socket is used to queue 
the packet then, so a multiple queue(sockets) tap/macvtap is still 
needed.
I think so.  We need per-cpu tap/macvtap sockets along with per-cpu 
vhost threads.

This will parallelize all the way from physical NIC to vhost.

Thanks
Sridhar

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] virt-test: Add a class NumaNode v2

2011-12-08 Thread Lucas Meneghel Rodrigues
From: Amos Kong 

Dynamically checking hardware, use a dict to record the pin status,
pin process to single cpu by 'taskset' command.

Guest memory pining is already implemented in framework.
process pining needs to be done in the testcases.

Example:
|  numa_node = -1 # last node
|  p = virt_utils.NumaNode(numa_node)
|  vhost_threads = commands.getoutput("ps aux |grep '\[vhost-.*\]'
|  |grep -v grep|awk '{print $2}'")
|  for i in vhost_threads.split():
|  logging.debug("pin vhost_net thread(%s) to host cpu node" % i)
|  p.pin_cpu(i)
|  o = vm.monitor.info("cpus")
|  for i in re.findall("thread_id=(\d+)", o):
|  logging.debug("pin vcpu thread(%s) to host cpu node" % i)
|  p.pin_cpu(i)
|  p.show()

Changes from v1:
 * Removed usage of commands API, using utils.run() instead
 * Added unittests for NumaNode
 * Improved docstrings

Signed-off-by: Amos Kong 
Signed-off-by: Lucas Meneghel Rodrigues 
---
 client/tests/kvm/base.cfg.sample   |6 ++-
 client/virt/kvm_vm.py  |8 +++
 client/virt/virt_utils.py  |   80 +
 client/virt/virt_utils_unittest.py |  112 +++-
 4 files changed, 204 insertions(+), 2 deletions(-)

diff --git a/client/tests/kvm/base.cfg.sample b/client/tests/kvm/base.cfg.sample
index 21fa513..411decf 100644
--- a/client/tests/kvm/base.cfg.sample
+++ b/client/tests/kvm/base.cfg.sample
@@ -147,6 +147,11 @@ shell_port = 22
 used_cpus = 1
 used_mem = 512
 
+# Numa pinning params
+# pin guest memory to 1st numa node
+# pin processes to host cpu of 1st node
+# numa_node = 1
+
 # Port redirections
 redirs = remote_shell
 guest_port_remote_shell = 22
@@ -159,4 +164,3 @@ login_timeout = 360
 
 # NFS directory of guest images
 images_good = fileserver.foo.com:/autotest/images_good
-
diff --git a/client/virt/kvm_vm.py b/client/virt/kvm_vm.py
index 6747c2b..fa258c3 100644
--- a/client/virt/kvm_vm.py
+++ b/client/virt/kvm_vm.py
@@ -473,6 +473,14 @@ class VM(virt_vm.BaseVM):
 qemu_cmd += "LD_LIBRARY_PATH=%s " % library_path
 if params.get("qemu_audio_drv"):
 qemu_cmd += "QEMU_AUDIO_DRV=%s " % params.get("qemu_audio_drv")
+# Add numa memory cmd to pin guest memory to numa node
+if params.get("numa_node"):
+numa_node = int(params.get("numa_node"))
+if numa_node < 0:
+p = virt_utils.NumaNode(numa_node)
+qemu_cmd += "numactl -m %s " % (int(p.get_node_num()) + 
numa_node)
+else:
+qemu_cmd += "numactl -m %s " % (numa_node - 1)
 # Add the qemu binary
 qemu_cmd += qemu_binary
 # Add the VM's name
diff --git a/client/virt/virt_utils.py b/client/virt/virt_utils.py
index 95b2883..b2694ec 100644
--- a/client/virt/virt_utils.py
+++ b/client/virt/virt_utils.py
@@ -3452,3 +3452,83 @@ def virt_test_assistant(test_name, test_dir, base_dir, 
default_userspace_paths,
 logging.info("Autotest prints the results dir, so you can look at DEBUG "
  "logs if something went wrong")
 logging.info("You can also edit the test config files")
+
+
+class NumaNode(object):
+"""
+Numa node to control processes and shared memory.
+"""
+def __init__(self, i=-1):
+self.num = self.get_node_num()
+if i < 0:
+self.cpus = self.get_node_cpus(int(self.num) + i).split()
+else:
+self.cpus = self.get_node_cpus(i - 1).split()
+self.dict = {}
+for i in self.cpus:
+self.dict[i] = "free"
+
+
+def get_node_num(self):
+"""
+Get the number of nodes of current host.
+"""
+cmd = utils.run("numactl --hardware")
+return re.findall("available: (\d+) nodes", cmd.stdout)[0]
+
+
+def get_node_cpus(self, i):
+"""
+Get cpus of a specific node
+
+@param i: Index of the CPU inside the node.
+"""
+cmd = utils.run("numactl --hardware")
+return re.findall("node %s cpus: (.*)" % i, cmd.stdout)[0]
+
+
+def free_cpu(self, i):
+"""
+Release pin of one node.
+
+@param i: Index of the node.
+"""
+self.dict[i] = "free"
+
+
+def _flush_pin(self):
+"""
+Flush pin dict, remove the record of exited process.
+"""
+cmd = utils.run("ps -eLf | awk '{print $4}'")
+all_pids = cmd.stdout
+for i in self.cpus:
+if self.dict[i] != "free" and self.dict[i] not in all_pids:
+self.free_cpu(i)
+
+
+@error.context_aware
+def pin_cpu(self, process):
+"""
+Pin one process to a single cpu.
+
+@param process: Process ID.
+"""
+self._flush_pin()
+error.context("Pinning process %s to the CPU" % process)
+for i in self.cpus:
+if self.dict[i] == "free":
+self.dict[i] = str(process)
+cmd = "taskset -p %s %s" % (he

[PATCH V2] kvm: make vcpu life cycle separated from kvm instance

2011-12-08 Thread Liu Ping Fan
From: Liu Ping Fan 

Currently, vcpu can be destructed only when kvm instance destroyed.
Change this to vcpu's destruction taken when its refcnt is zero,
and then vcpu MUST and CAN be destroyed before kvm's destroy.

Signed-off-by: Liu Ping Fan 
---
 arch/x86/kvm/i8254.c |   10 --
 arch/x86/kvm/i8259.c |   12 --
 arch/x86/kvm/mmu.c   |7 ++--
 arch/x86/kvm/x86.c   |   54 --
 include/linux/kvm_host.h |   77 +++---
 virt/kvm/irq_comm.c  |7 +++-
 virt/kvm/kvm_main.c  |   82 --
 7 files changed, 196 insertions(+), 53 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 76e3f1c..ac79598 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -289,7 +289,7 @@ static void pit_do_work(struct work_struct *work)
struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
struct kvm *kvm = pit->kvm;
struct kvm_vcpu *vcpu;
-   int i;
+   struct kvm_iter it;
struct kvm_kpit_state *ps = &pit->pit_state;
int inject = 0;
 
@@ -315,9 +315,13 @@ static void pit_do_work(struct work_struct *work)
 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
 * VCPU0, and only if its LVT0 is in EXTINT mode.
 */
-   if (kvm->arch.vapics_in_nmi_mode > 0)
-   kvm_for_each_vcpu(i, vcpu, kvm)
+   if (kvm->arch.vapics_in_nmi_mode > 0) {
+   rcu_read_lock();
+   kvm_for_each_vcpu(it, vcpu, kvm) {
kvm_apic_nmi_wd_deliver(vcpu);
+   }
+   rcu_read_unlock();
+   }
}
 }
 
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index cac4746..2186b30 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -50,25 +50,29 @@ static void pic_unlock(struct kvm_pic *s)
 {
bool wakeup = s->wakeup_needed;
struct kvm_vcpu *vcpu, *found = NULL;
-   int i;
+   struct kvm *kvm = s->kvm;
+   struct kvm_iter it;
 
s->wakeup_needed = false;
 
spin_unlock(&s->lock);
 
if (wakeup) {
-   kvm_for_each_vcpu(i, vcpu, s->kvm) {
+   rcu_read_lock();
+   kvm_for_each_vcpu(it, vcpu, kvm)
if (kvm_apic_accept_pic_intr(vcpu)) {
found = vcpu;
break;
}
-   }
 
-   if (!found)
+   if (!found) {
+   rcu_read_unlock();
return;
+   }
 
kvm_make_request(KVM_REQ_EVENT, found);
kvm_vcpu_kick(found);
+   rcu_read_unlock();
}
 }
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f1b36cf..c16887e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1833,11 +1833,12 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, 
u64 *parent_pte)
 
 static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
 {
-   int i;
+   struct kvm_iter it;
struct kvm_vcpu *vcpu;
-
-   kvm_for_each_vcpu(i, vcpu, kvm)
+   rcu_read_lock();
+   kvm_for_each_vcpu(it, vcpu, kvm)
vcpu->arch.last_pte_updated = NULL;
+   rcu_read_unlock();
 }
 
 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c38efd7..a302470 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1831,10 +1831,15 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 
msr, u64 *pdata)
switch (msr) {
case HV_X64_MSR_VP_INDEX: {
int r;
+   struct kvm_iter it;
struct kvm_vcpu *v;
-   kvm_for_each_vcpu(r, v, vcpu->kvm)
+   struct kvm *kvm =  vcpu->kvm;
+   rcu_read_lock();
+   kvm_for_each_vcpu(it, v, kvm) {
if (v == vcpu)
data = r;
+   }
+   rcu_read_unlock();
break;
}
case HV_X64_MSR_EOI:
@@ -4966,7 +4971,8 @@ static int kvmclock_cpufreq_notifier(struct 
notifier_block *nb, unsigned long va
struct cpufreq_freqs *freq = data;
struct kvm *kvm;
struct kvm_vcpu *vcpu;
-   int i, send_ipi = 0;
+   int send_ipi = 0;
+   struct kvm_iter it;
 
/*
 * We allow guests to temporarily run on slowing clocks,
@@ -5016,13 +5022,16 @@ static int kvmclock_cpufreq_notifier(struct 
notifier_block *nb, unsigned long va
 
raw_spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
-   kvm_for_each_vcpu(i, vcpu, kvm) {
+
+   rcu_read_lock();
+   kvm_for_each_vcpu(it, vcpu, kvm) {

[PATCH V2 00/23] kvm tools: Prepare kvmtool for another architecture

2011-12-08 Thread Matt Evans
Hi,


This patch series rearranges and tidies various parts of kvmtool to pave the way
for the addition of support for another architecture -- SPAPR PPC64.  A second
patch series will follow to present the PPC64 support.

Building on the separation of x86-specific code into tools/kvm/x86, this series
change some of the mechanics of kvm tool, for example modifying the loop around
ioctl(KVM_RUN) so that whilst it stays generic, it calls into arch-specific code
to handle specific exit reasons, MMIO etc.  The builtin-run initialisation path
is rationalised so that PCI & IRQs are initialised before devices, and all of
this happens before arch-specific code is given the chance to initialise any
firmware and generate any device trees.

Most of this series is fairly trivial, in moving code, making definitions
arch-local or available via a header, endian sanitisation.  The PCI code changes
are probably most 'interesting', in that I have made the config space accesses
available to those not using the PC ioport access method, plus wrapped
initialisations of config space with cpu_to_leXX accesses.

Changes since V1:
- Merged up, V1 patches now upstream are obv. not included here
- No longer uses PRIx64 etc., defining __SANE_USERSPACE_TYPES__ for LL64 on PPC
- Returns kvm__recommended_cpus (and max cpus) to generic code, but fixes them
  so that they cope gracefully with KVM supporting neither CAP.  (A separate
  patch submitted for PPC KVM returns a useful number of CPUs.)
- Made hugetlbfs guest RAM mapping code generic, allow x86 to use it
- Drop unnecessary ifdef around symbol__init()
- Un-broke ioport PCI config access :o)
- Rolled in other review comments (e.g. removal of "& 0xff" in term.c, fixing
  pci_config_address union, create both kvm_cpu__emulate_mmio and _io)

Applies on top of Pekka's linux-kvm.git as of today.


Cheers,


Matt


Matt Evans (23):
  kvm tools: Only build/init i8042 on x86
  kvm tools: Add Makefile parameter for kernel include path
  kvm tools: Re-arrange Makefile to heed CFLAGS before checking for
optional libs
  kvm tools: Get correct 64-bit types on PPC64 and link appropriately
  kvm tools: Add arch-specific KVM_RUN exit handling via
kvm_cpu__handle_exit()
  kvm tools: Don't die if KVM_CAP_NR_VCPUS isn't available
  kvm tools: Fix KVM_RUN exit code check
  kvm tools: Add kvm__arch_periodic_poll()
  kvm tools: Move arch-specific cmdline init into
kvm__arch_set_cmdline()
  kvm tools: Add CONSOLE_HV term type and allow it to be selected
  kvm tools: Fix term_getc(), term_getc_iov() endian bugs
  kvm tools: Allow initrd_check() to match a cpio
  kvm tools: Allow load_flat_binary() to load an initrd alongside
  kvm tools: Initialise PCI before devices start getting registered
with PCI
  kvm tools: Perform CPU and firmware setup after devices are added
  kvm tools: Init IRQs after determining nrcpus
  kvm tools: Add ability to map guest RAM from hugetlbfs
  kvm tools: Move PCI_MAX_DEVICES to pci.h
  kvm tools: Endian-sanitise pci.h and PCI device setup
  kvm tools: Correctly set virtio-pci bar_size and remove hardwired
address
  kvm tools: Add pci__config_{rd,wr}(), pci__find_dev()
  kvm tools: Arch-specific define for PCI MMIO allocation area
  kvm tools: Create arch-specific kvm_cpu__emulate_{mm}io()

 tools/kvm/Makefile   |   99 --
 tools/kvm/builtin-run.c  |   61 +++
 tools/kvm/hw/pci-shmem.c |   23 ---
 tools/kvm/hw/vesa.c  |   15 +++--
 tools/kvm/include/kvm/ioport.h   |   11 ++--
 tools/kvm/include/kvm/kvm-cpu.h  |2 +
 tools/kvm/include/kvm/kvm.h  |8 ++-
 tools/kvm/include/kvm/pci.h  |   46 ++
 tools/kvm/include/kvm/term.h |1 +
 tools/kvm/include/kvm/util.h |4 +
 tools/kvm/include/linux/types.h  |1 +
 tools/kvm/kvm-cpu.c  |   46 --
 tools/kvm/kvm.c  |   28 ++---
 tools/kvm/pci.c  |   76 ++-
 tools/kvm/term.c |7 +--
 tools/kvm/util.c |   45 ++
 tools/kvm/virtio/pci.c   |   48 --
 tools/kvm/x86/include/kvm/kvm-arch.h |5 ++
 tools/kvm/x86/include/kvm/kvm-cpu-arch.h |   17 +-
 tools/kvm/x86/kvm-cpu.c  |5 ++
 tools/kvm/x86/kvm.c  |   52 ++--
 21 files changed, 396 insertions(+), 204 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 01/23] kvm tools: Only build/init i8042 on x86

2011-12-08 Thread Matt Evans
Not every architecture has an i8042 kbd controller, so only use this when
building for x86.

Signed-off-by: Matt Evans 
---
 tools/kvm/Makefile  |2 +-
 tools/kvm/builtin-run.c |2 ++
 2 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 243886e..f58a1d8 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -77,7 +77,6 @@ OBJS  += util/strbuf.o
 OBJS   += virtio/9p.o
 OBJS   += virtio/9p-pdu.o
 OBJS   += hw/vesa.o
-OBJS   += hw/i8042.o
 OBJS   += hw/pci-shmem.o
 OBJS   += kvm-ipc.o
 
@@ -153,6 +152,7 @@ ifeq ($(ARCH),x86)
OBJS+= x86/kvm.o
OBJS+= x86/kvm-cpu.o
OBJS+= x86/mptable.o
+   OBJS+= hw/i8042.o
 # Exclude BIOS object files from header dependencies.
OTHEROBJS   += x86/bios.o
OTHEROBJS   += x86/bios/bios-rom.o
diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index d5abeb4..e273398 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -943,7 +943,9 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
 
kvm__init_ram(kvm);
 
+#ifdef CONFIG_X86
kbd__init(kvm);
+#endif
 
pci_shmem__init(kvm);
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 03/23] kvm tools: Re-arrange Makefile to heed CFLAGS before checking for optional libs

2011-12-08 Thread Matt Evans
The checks for optional libraries build code to perform the tests, so should
respect certain CFLAGS -- in particular, -m64 so we check for 64bit libraries if
they're required.

Signed-off-by: Matt Evans 
---
 tools/kvm/Makefile |   86 ++-
 1 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index f85a154..009a6ba 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -85,48 +85,6 @@ OBJS += hw/vesa.o
 OBJS   += hw/pci-shmem.o
 OBJS   += kvm-ipc.o
 
-FLAGS_BFD := $(CFLAGS) -lbfd
-has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD))
-ifeq ($(has_bfd),y)
-   CFLAGS  += -DCONFIG_HAS_BFD
-   OBJS+= symbol.o
-   LIBS+= -lbfd
-endif
-
-FLAGS_VNCSERVER := $(CFLAGS) -lvncserver
-has_vncserver := $(call try-cc,$(SOURCE_VNCSERVER),$(FLAGS_VNCSERVER))
-ifeq ($(has_vncserver),y)
-   OBJS+= ui/vnc.o
-   CFLAGS  += -DCONFIG_HAS_VNCSERVER
-   LIBS+= -lvncserver
-endif
-
-FLAGS_SDL := $(CFLAGS) -lSDL
-has_SDL := $(call try-cc,$(SOURCE_SDL),$(FLAGS_SDL))
-ifeq ($(has_SDL),y)
-   OBJS+= ui/sdl.o
-   CFLAGS  += -DCONFIG_HAS_SDL
-   LIBS+= -lSDL
-endif
-
-FLAGS_ZLIB := $(CFLAGS) -lz
-has_ZLIB := $(call try-cc,$(SOURCE_ZLIB),$(FLAGS_ZLIB))
-ifeq ($(has_ZLIB),y)
-   CFLAGS  += -DCONFIG_HAS_ZLIB
-   LIBS+= -lz
-endif
-
-FLAGS_AIO := $(CFLAGS) -laio
-has_AIO := $(call try-cc,$(SOURCE_AIO),$(FLAGS_AIO))
-ifeq ($(has_AIO),y)
-   CFLAGS  += -DCONFIG_HAS_AIO
-   LIBS+= -laio
-endif
-
-LIBS   += -lrt
-LIBS   += -lpthread
-LIBS   += -lutil
-
 # Additional ARCH settings for x86
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
   -e s/arm.*/arm/ -e s/sa110/arm/ \
@@ -172,6 +130,50 @@ else
UNSUPP_ERR =
 endif
 
+
+FLAGS_BFD := $(CFLAGS) -lbfd
+has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD))
+ifeq ($(has_bfd),y)
+   CFLAGS  += -DCONFIG_HAS_BFD
+   OBJS+= symbol.o
+   LIBS+= -lbfd
+endif
+
+FLAGS_VNCSERVER := $(CFLAGS) -lvncserver
+has_vncserver := $(call try-cc,$(SOURCE_VNCSERVER),$(FLAGS_VNCSERVER))
+ifeq ($(has_vncserver),y)
+   OBJS+= ui/vnc.o
+   CFLAGS  += -DCONFIG_HAS_VNCSERVER
+   LIBS+= -lvncserver
+endif
+
+FLAGS_SDL := $(CFLAGS) -lSDL
+has_SDL := $(call try-cc,$(SOURCE_SDL),$(FLAGS_SDL))
+ifeq ($(has_SDL),y)
+   OBJS+= ui/sdl.o
+   CFLAGS  += -DCONFIG_HAS_SDL
+   LIBS+= -lSDL
+endif
+
+FLAGS_ZLIB := $(CFLAGS) -lz
+has_ZLIB := $(call try-cc,$(SOURCE_ZLIB),$(FLAGS_ZLIB))
+ifeq ($(has_ZLIB),y)
+   CFLAGS  += -DCONFIG_HAS_ZLIB
+   LIBS+= -lz
+endif
+
+FLAGS_AIO := $(CFLAGS) -laio
+has_AIO := $(call try-cc,$(SOURCE_AIO),$(FLAGS_AIO))
+ifeq ($(has_AIO),y)
+   CFLAGS  += -DCONFIG_HAS_AIO
+   LIBS+= -laio
+endif
+
+LIBS   += -lrt
+LIBS   += -lpthread
+LIBS   += -lutil
+
+
 DEPS   := $(patsubst %.o,%.d,$(OBJS))
 OBJS   += $(OTHEROBJS)
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 02/23] kvm tools: Add Makefile parameter for kernel include path

2011-12-08 Thread Matt Evans
This patch adds an 'I' parameter to override the default kernel include path of
'../../include'.

Signed-off-by: Matt Evans 
---
 tools/kvm/Makefile |9 +++--
 1 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index f58a1d8..f85a154 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -9,7 +9,12 @@ else
E = @\#
Q =
 endif
-export E Q
+ifneq ($(I), )
+   KINCL_PATH=$(I)
+else
+   KINCL_PATH=../..
+endif
+export E Q KINCL_PATH
 
 include config/utilities.mak
 include config/feature-tests.mak
@@ -176,7 +181,7 @@ DEFINES += -DKVMTOOLS_VERSION='"$(KVMTOOLS_VERSION)"'
 DEFINES+= -DBUILD_ARCH='"$(ARCH)"'
 
 KVM_INCLUDE := include
-CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I$(ARCH_INCLUDE) 
-I../../include -I../../arch/$(ARCH)/include/ -Os -g
+CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I$(ARCH_INCLUDE) 
-I$(KINCL_PATH)/include -I$(KINCL_PATH)/arch/$(ARCH)/include/ -Os -g
 
 ifneq ($(WERROR),0)
WARNINGS += -Werror
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 04/23] kvm tools: Get correct 64-bit types on PPC64 and link appropriately

2011-12-08 Thread Matt Evans
kvmtool's types.h includes , which by default on PPC64 brings in
int-l64.h; define __SANE_USERSPACE_TYPES__ to get LL64 types.

This patch also adds CFLAGS to the final link, so that any -m64 is obeyed
when linking, too.

Signed-off-by: Matt Evans 
---
 tools/kvm/Makefile  |2 +-
 tools/kvm/include/linux/types.h |1 +
 2 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 009a6ba..57dc521 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -218,7 +218,7 @@ KVMTOOLS-VERSION-FILE:
 
 $(PROGRAM): $(DEPS) $(OBJS)
$(E) "  LINK" $@
-   $(Q) $(CC) $(OBJS) $(LIBS) -o $@
+   $(Q) $(CC) $(CFLAGS) $(OBJS) $(LIBS) -o $@
 
 $(GUEST_INIT): guest/init.c
$(E) "  LINK" $@
diff --git a/tools/kvm/include/linux/types.h b/tools/kvm/include/linux/types.h
index 357799c..5e20f10 100644
--- a/tools/kvm/include/linux/types.h
+++ b/tools/kvm/include/linux/types.h
@@ -2,6 +2,7 @@
 #define LINUX_TYPES_H
 
 #include 
+#define __SANE_USERSPACE_TYPES__   /* For PPC64, to get LL64 types */
 #include 
 
 typedef __u64 u64;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 05/23] kvm tools: Add arch-specific KVM_RUN exit handling via kvm_cpu__handle_exit()

2011-12-08 Thread Matt Evans
This patch creates a new function in x86/kvm-cpu.c, kvm_cpu__handle_exit(), in
which arch-specific exit reasons can be handled outside of the common runloop.

Signed-off-by: Matt Evans 
---
 tools/kvm/include/kvm/kvm-cpu.h |2 ++
 tools/kvm/kvm-cpu.c |   10 --
 tools/kvm/x86/kvm-cpu.c |5 +
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h
index 719e286..15618f1 100644
--- a/tools/kvm/include/kvm/kvm-cpu.h
+++ b/tools/kvm/include/kvm/kvm-cpu.h
@@ -2,6 +2,7 @@
 #define KVM__KVM_CPU_H
 
 #include "kvm/kvm-cpu-arch.h"
+#include 
 
 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id);
 void kvm_cpu__delete(struct kvm_cpu *vcpu);
@@ -11,6 +12,7 @@ void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu);
 void kvm_cpu__run(struct kvm_cpu *vcpu);
 void kvm_cpu__reboot(void);
 int kvm_cpu__start(struct kvm_cpu *cpu);
+bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu);
 
 int kvm_cpu__get_debug_fd(void);
 void kvm_cpu__set_debug_fd(int fd);
diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c
index 5aba3bb..9bc0796 100644
--- a/tools/kvm/kvm-cpu.c
+++ b/tools/kvm/kvm-cpu.c
@@ -137,8 +137,14 @@ int kvm_cpu__start(struct kvm_cpu *cpu)
goto exit_kvm;
case KVM_EXIT_SHUTDOWN:
goto exit_kvm;
-   default:
-   goto panic_kvm;
+   default: {
+   bool ret;
+
+   ret = kvm_cpu__handle_exit(cpu);
+   if (!ret)
+   goto panic_kvm;
+   break;
+   }
}
kvm_cpu__handle_coalesced_mmio(cpu);
}
diff --git a/tools/kvm/x86/kvm-cpu.c b/tools/kvm/x86/kvm-cpu.c
index b26b208..a0d10cc 100644
--- a/tools/kvm/x86/kvm-cpu.c
+++ b/tools/kvm/x86/kvm-cpu.c
@@ -212,6 +212,11 @@ void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
kvm_cpu__setup_msrs(vcpu);
 }
 
+bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
+{
+   return false;
+}
+
 static void print_dtable(const char *name, struct kvm_dtable *dtable)
 {
dprintf(debug_fd, " %s %016llx  %08hx\n",
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 06/23] kvm tools: Don't die if KVM_CAP_NR_VCPUS isn't available

2011-12-08 Thread Matt Evans
We die() if we can't read KVM_CAP_NR_VCPUS, but the API docs suggest to assume
the value 4 in this case.  This is pertinent to PPC KVM, which currently
does not support this CAP.

Signed-off-by: Matt Evans 
---
 tools/kvm/kvm.c |6 +-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
index 0356d74..0bbe9ba 100644
--- a/tools/kvm/kvm.c
+++ b/tools/kvm/kvm.c
@@ -266,7 +266,11 @@ int kvm__recommended_cpus(struct kvm *kvm)
 
ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_NR_VCPUS);
if (ret <= 0)
-   die_perror("KVM_CAP_NR_VCPUS");
+   /*
+* api.txt states that if KVM_CAP_NR_VCPUS does not exist,
+* assume 4.
+*/
+   return 4;
 
return ret;
 }
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 07/23] kvm tools: Fix KVM_RUN exit code check

2011-12-08 Thread Matt Evans
kvm_cpu__run() currently die()s if KVM_RUN returns non-zero.  Some architectures
may return positive values in non-error cases, whereas real errors are always
negative return values.  Check for those instead.

Signed-off-by: Matt Evans 
---
 tools/kvm/kvm-cpu.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c
index 9bc0796..884a89f 100644
--- a/tools/kvm/kvm-cpu.c
+++ b/tools/kvm/kvm-cpu.c
@@ -30,7 +30,7 @@ void kvm_cpu__run(struct kvm_cpu *vcpu)
int err;
 
err = ioctl(vcpu->vcpu_fd, KVM_RUN, 0);
-   if (err && (errno != EINTR && errno != EAGAIN))
+   if (err < 0 && (errno != EINTR && errno != EAGAIN))
die_perror("KVM_RUN failed");
 }
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 08/23] kvm tools: Add kvm__arch_periodic_poll()

2011-12-08 Thread Matt Evans
Currently, the SIGALRM handler calls device poll functions (for serial, virtio
console) directly.  Which devices are present and which require polling is a
system-specific decision, so create a new function called from common code &
move the x86-specific poll calls into it.

Signed-off-by: Matt Evans 
---
 tools/kvm/builtin-run.c |3 +--
 tools/kvm/include/kvm/kvm.h |1 +
 tools/kvm/x86/kvm.c |8 
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index e273398..47a2f09 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -522,8 +522,7 @@ static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
 
 static void handle_sigalrm(int sig)
 {
-   serial8250__inject_interrupt(kvm);
-   virtio_console__inject_interrupt(kvm);
+   kvm__arch_periodic_poll(kvm);
 }
 
 static void handle_stop(int fd, u32 type, u32 len, u8 *msg)
diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
index ca1acc0..60842d5 100644
--- a/tools/kvm/include/kvm/kvm.h
+++ b/tools/kvm/include/kvm/kvm.h
@@ -56,6 +56,7 @@ void kvm__remove_socket(const char *name);
 void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const 
char *name);
 void kvm__arch_setup_firmware(struct kvm *kvm);
 bool kvm__arch_cpu_supports_vm(void);
+void kvm__arch_periodic_poll(struct kvm *kvm);
 
 int load_flat_binary(struct kvm *kvm, int fd);
 bool load_bzimage(struct kvm *kvm, int fd_kernel, int fd_initrd, const char 
*kernel_cmdline, u16 vidmode);
diff --git a/tools/kvm/x86/kvm.c b/tools/kvm/x86/kvm.c
index ac6c91e..70df27e 100644
--- a/tools/kvm/x86/kvm.c
+++ b/tools/kvm/x86/kvm.c
@@ -4,6 +4,8 @@
 #include "kvm/interrupt.h"
 #include "kvm/mptable.h"
 #include "kvm/util.h"
+#include "kvm/8250-serial.h"
+#include "kvm/virtio-console.h"
 
 #include 
 #include 
@@ -328,3 +330,9 @@ void kvm__arch_setup_firmware(struct kvm *kvm)
/* MP table */
mptable_setup(kvm, kvm->nrcpus);
 }
+
+void kvm__arch_periodic_poll(struct kvm *kvm)
+{
+   serial8250__inject_interrupt(kvm);
+   virtio_console__inject_interrupt(kvm);
+}
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 09/23] kvm tools: Move arch-specific cmdline init into kvm__arch_set_cmdline()

2011-12-08 Thread Matt Evans
Different systems will want different base kernel commandlines, e.g. non-x86
systems probably don't need noapic, i8042.* etc., so set the commandline up in
arch-specific code.  Then, if the resulting commandline is empty, don't strcat a
space onto the front.

Signed-off-by: Matt Evans 
---
 tools/kvm/builtin-run.c |   12 +---
 tools/kvm/include/kvm/kvm.h |1 +
 tools/kvm/x86/kvm.c |   11 +++
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index 47a2f09..68a3246 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -835,13 +835,11 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
vidmode = 0;
 
memset(real_cmdline, 0, sizeof(real_cmdline));
-   strcpy(real_cmdline, "noapic noacpi pci=conf1 reboot=k panic=1 
i8042.direct=1 "
-   "i8042.dumbkbd=1 i8042.nopnp=1");
-   if (vnc || sdl) {
-   strcat(real_cmdline, " video=vesafb console=tty0");
-   } else
-   strcat(real_cmdline, " console=ttyS0 earlyprintk=serial 
i8042.noaux=1");
-   strcat(real_cmdline, " ");
+   kvm__arch_set_cmdline(real_cmdline, vnc || sdl);
+
+   if (strlen(real_cmdline) > 0)
+   strcat(real_cmdline, " ");
+
if (kernel_cmdline)
strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline));
 
diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
index 60842d5..fae2ba9 100644
--- a/tools/kvm/include/kvm/kvm.h
+++ b/tools/kvm/include/kvm/kvm.h
@@ -53,6 +53,7 @@ int kvm__get_sock_by_instance(const char *name);
 int kvm__enumerate_instances(int (*callback)(const char *name, int pid));
 void kvm__remove_socket(const char *name);
 
+void kvm__arch_set_cmdline(char *cmdline, bool video);
 void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const 
char *name);
 void kvm__arch_setup_firmware(struct kvm *kvm);
 bool kvm__arch_cpu_supports_vm(void);
diff --git a/tools/kvm/x86/kvm.c b/tools/kvm/x86/kvm.c
index 70df27e..b49b372 100644
--- a/tools/kvm/x86/kvm.c
+++ b/tools/kvm/x86/kvm.c
@@ -119,6 +119,17 @@ void kvm__init_ram(struct kvm *kvm)
}
 }
 
+/* Arch-specific commandline setup */
+void kvm__arch_set_cmdline(char *cmdline, bool video)
+{
+   strcpy(cmdline, "noapic noacpi pci=conf1 reboot=k panic=1 
i8042.direct=1 "
+   "i8042.dumbkbd=1 i8042.nopnp=1");
+   if (video) {
+   strcat(cmdline, " video=vesafb console=tty0");
+   } else
+   strcat(cmdline, " console=ttyS0 earlyprintk=serial 
i8042.noaux=1");
+}
+
 /* Architecture-specific KVM init */
 void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const 
char *name)
 {
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 10/23] kvm tools: Add CONSOLE_HV term type and allow it to be selected

2011-12-08 Thread Matt Evans
This patch paves the way for adding a hypervisor console, useful on systems that
support one out of the box yet don't have either serial port or virtio console
support (e.g. kernels expecting POWER SPAPR).

Signed-off-by: Matt Evans 
---
 tools/kvm/builtin-run.c  |8 ++--
 tools/kvm/include/kvm/term.h |1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index 68a3246..c067685 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -416,7 +416,7 @@ static const struct option options[] = {
OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number 
Generator"),
OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name",
 "Enable virtio 9p to share files between host and guest", 
virtio_9p_rootdir_parser),
-   OPT_STRING('\0', "console", &console, "serial or virtio",
+   OPT_STRING('\0', "console", &console, "serial, virtio or hv",
"Console to use"),
OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"),
OPT_CALLBACK('\0', "tty", NULL, "tty id",
@@ -776,8 +776,12 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
 
if (!strncmp(console, "virtio", 6))
active_console  = CONSOLE_VIRTIO;
-   else
+   else if (!strncmp(console, "serial", 6))
active_console  = CONSOLE_8250;
+   else if (!strncmp(console, "hv", 2))
+   active_console = CONSOLE_HV;
+   else
+   pr_warning("No console!");
 
if (!host_ip)
host_ip = DEFAULT_HOST_ADDR;
diff --git a/tools/kvm/include/kvm/term.h b/tools/kvm/include/kvm/term.h
index 938c26f..a6a9822 100644
--- a/tools/kvm/include/kvm/term.h
+++ b/tools/kvm/include/kvm/term.h
@@ -6,6 +6,7 @@
 
 #define CONSOLE_8250   1
 #define CONSOLE_VIRTIO 2
+#define CONSOLE_HV 3
 
 int term_putc_iov(int who, struct iovec *iov, int iovcnt, int term);
 int term_getc_iov(int who, struct iovec *iov, int iovcnt, int term);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 11/23] kvm tools: Fix term_getc(), term_getc_iov() endian bugs

2011-12-08 Thread Matt Evans
term_getc()'s int c has one byte written into it (at its lowest address) by
read_in_full().  This is expected to be the least significant byte, but that
isn't the case on BE!  Use correct type, unsigned char.  A similar issue exists
in term_getc_iov(), which needs to write a char to the iov rather than an int.

Signed-off-by: Matt Evans 
---
 tools/kvm/term.c |7 ++-
 1 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/tools/kvm/term.c b/tools/kvm/term.c
index fb5d71c..b7d8934 100644
--- a/tools/kvm/term.c
+++ b/tools/kvm/term.c
@@ -30,16 +30,13 @@ int term_fds[4][2];
 
 int term_getc(int who, int term)
 {
-   int c;
+   unsigned char c;
 
if (who != active_console)
return -1;
-
if (read_in_full(term_fds[term][TERM_FD_IN], &c, 1) < 0)
return -1;
 
-   c &= 0xff;
-
if (term_got_escape) {
term_got_escape = false;
if (c == 'x')
@@ -84,7 +81,7 @@ int term_getc_iov(int who, struct iovec *iov, int iovcnt, int 
term)
if (c < 0)
return 0;
 
-   *((int *)iov[TERM_FD_IN].iov_base)  = c;
+   *((char *)iov[TERM_FD_IN].iov_base) = (char)c;
 
return sizeof(char);
 }
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 12/23] kvm tools: Allow initrd_check() to match a cpio

2011-12-08 Thread Matt Evans
cpios are valid as initrds too, so allow them through the check.

Signed-off-by: Matt Evans 
---
 tools/kvm/kvm.c |8 +---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
index 0bbe9ba..d26e3d7 100644
--- a/tools/kvm/kvm.c
+++ b/tools/kvm/kvm.c
@@ -351,10 +351,11 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, 
const char *name)
 /* RFC 1952 */
 #define GZIP_ID1   0x1f
 #define GZIP_ID2   0x8b
-
+#define CPIO_MAGIC "0707"
+/* initrd may be gzipped, or a plain cpio */
 static bool initrd_check(int fd)
 {
-   unsigned char id[2];
+   unsigned char id[4];
 
if (read_in_full(fd, id, ARRAY_SIZE(id)) < 0)
return false;
@@ -362,7 +363,8 @@ static bool initrd_check(int fd)
if (lseek(fd, 0, SEEK_SET) < 0)
die_perror("lseek");
 
-   return id[0] == GZIP_ID1 && id[1] == GZIP_ID2;
+   return (id[0] == GZIP_ID1 && id[1] == GZIP_ID2) ||
+   !memcmp(id, CPIO_MAGIC, 4);
 }
 
 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 13/23] kvm tools: Allow load_flat_binary() to load an initrd alongside

2011-12-08 Thread Matt Evans
This patch passes the initrd fd and commandline to load_flat_binary(), which may
be used to load both the kernel & an initrd (stashing or inserting the
commandline as appropriate) in the same way that load_bzimage() does.  This is
especially useful when load_bzimage() is unused for a particular
architecture. :-)

Signed-off-by: Matt Evans 
---
 tools/kvm/include/kvm/kvm.h |2 +-
 tools/kvm/kvm.c |   10 ++
 tools/kvm/x86/kvm.c |   13 ++---
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
index fae2ba9..5fe6e75 100644
--- a/tools/kvm/include/kvm/kvm.h
+++ b/tools/kvm/include/kvm/kvm.h
@@ -59,7 +59,7 @@ void kvm__arch_setup_firmware(struct kvm *kvm);
 bool kvm__arch_cpu_supports_vm(void);
 void kvm__arch_periodic_poll(struct kvm *kvm);
 
-int load_flat_binary(struct kvm *kvm, int fd);
+int load_flat_binary(struct kvm *kvm, int fd_kernel, int fd_initrd, const char 
*kernel_cmdline);
 bool load_bzimage(struct kvm *kvm, int fd_kernel, int fd_initrd, const char 
*kernel_cmdline, u16 vidmode);
 
 /*
diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
index d26e3d7..c54f886 100644
--- a/tools/kvm/kvm.c
+++ b/tools/kvm/kvm.c
@@ -388,23 +388,25 @@ bool kvm__load_kernel(struct kvm *kvm, const char 
*kernel_filename,
 
ret = load_bzimage(kvm, fd_kernel, fd_initrd, kernel_cmdline, vidmode);
 
-   if (initrd_filename)
-   close(fd_initrd);
-
if (ret)
goto found_kernel;
 
pr_warning("%s is not a bzImage. Trying to load it as a flat 
binary...", kernel_filename);
 
-   ret = load_flat_binary(kvm, fd_kernel);
+   ret = load_flat_binary(kvm, fd_kernel, fd_initrd, kernel_cmdline);
+
if (ret)
goto found_kernel;
 
+   if (initrd_filename)
+   close(fd_initrd);
close(fd_kernel);
 
die("%s is not a valid bzImage or flat binary", kernel_filename);
 
 found_kernel:
+   if (initrd_filename)
+   close(fd_initrd);
close(fd_kernel);
 
return ret;
diff --git a/tools/kvm/x86/kvm.c b/tools/kvm/x86/kvm.c
index b49b372..a116f4b 100644
--- a/tools/kvm/x86/kvm.c
+++ b/tools/kvm/x86/kvm.c
@@ -197,17 +197,24 @@ void kvm__irq_trigger(struct kvm *kvm, int irq)
 #define BOOT_PROTOCOL_REQUIRED 0x206
 #define LOAD_HIGH  0x01
 
-int load_flat_binary(struct kvm *kvm, int fd)
+int load_flat_binary(struct kvm *kvm, int fd_kernel, int fd_initrd, const char 
*kernel_cmdline)
 {
void *p;
int nr;
 
-   if (lseek(fd, 0, SEEK_SET) < 0)
+   /*
+* Some architectures may support loading an initrd alongside the flat 
kernel,
+* but we do not.
+*/
+   if (fd_initrd != -1)
+   pr_warning("Loading initrd with flat binary not supported.");
+
+   if (lseek(fd_kernel, 0, SEEK_SET) < 0)
die_perror("lseek");
 
p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
 
-   while ((nr = read(fd, p, 65536)) > 0)
+   while ((nr = read(fd_kernel, p, 65536)) > 0)
p += nr;
 
kvm->boot_selector  = BOOT_LOADER_SELECTOR;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 14/23] kvm tools: Initialise PCI before devices start getting registered with PCI

2011-12-08 Thread Matt Evans
Re-arrange pci__init() in builtin-run such that it comes before devices are
initialised.

Signed-off-by: Matt Evans 
---
 tools/kvm/builtin-run.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index c067685..99db522 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -828,6 +828,8 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
 
kvm->nrcpus = nrcpus;
 
+   pci__init();
+
/*
 * vidmode should be either specified
 * either set by default
@@ -895,8 +897,6 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
 
serial8250__init(kvm);
 
-   pci__init();
-
if (active_console == CONSOLE_VIRTIO)
virtio_console__init(kvm);
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 15/23] kvm tools: Perform CPU and firmware setup after devices are added

2011-12-08 Thread Matt Evans
Currently some devices (in this case kbd, fb, vesa) are initialised after
CPU/firmware setup.  On some platforms (e.g. PPC) kvm__arch_setup_firmware() may
be making a device tree.  Any devices added after this point will be missed!

Tiny refactor of builtin-run.c, moving timer start, firmware setup, cpu init
to occur last.

Signed-off-by: Matt Evans 
---
 tools/kvm/builtin-run.c |   24 ++--
 1 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index 99db522..7491428 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -932,16 +932,6 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
virtio_net__init(&net_params);
}
 
-   kvm__start_timer(kvm);
-
-   kvm__arch_setup_firmware(kvm);
-
-   for (i = 0; i < nrcpus; i++) {
-   kvm_cpus[i] = kvm_cpu__init(kvm, i);
-   if (!kvm_cpus[i])
-   die("unable to initialize KVM VCPU");
-   }
-
kvm__init_ram(kvm);
 
 #ifdef CONFIG_X86
@@ -965,6 +955,20 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
 
fb__start();
 
+   /* Device init all done; firmware init must
+* come after this (it may set up device trees etc.)
+*/
+
+   kvm__start_timer(kvm);
+
+   kvm__arch_setup_firmware(kvm);
+
+   for (i = 0; i < nrcpus; i++) {
+   kvm_cpus[i] = kvm_cpu__init(kvm, i);
+   if (!kvm_cpus[i])
+   die("unable to initialize KVM VCPU");
+   }
+
thread_pool__init(nr_online_cpus);
ioeventfd__start();
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 16/23] kvm tools: Init IRQs after determining nrcpus

2011-12-08 Thread Matt Evans
IRQ init may involve per-CPU setup/allocation of resources, so make sure
kvm->nrcpus is initialised before calling irq__init().

Signed-off-by: Matt Evans 
---
 tools/kvm/builtin-run.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index 7491428..7969901 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -809,8 +809,6 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
 
kvm = kvm__init(dev, ram_size, guest_name);
 
-   irq__init(kvm);
-
kvm->single_step = single_step;
 
ioeventfd__init();
@@ -828,6 +826,8 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
 
kvm->nrcpus = nrcpus;
 
+   irq__init(kvm);
+
pci__init();
 
/*
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 17/23] kvm tools: Add ability to map guest RAM from hugetlbfs

2011-12-08 Thread Matt Evans
Add a --hugetlbfs commandline option to give a path to hugetlbfs-map guest
memory (down in kvm__arch_init()).  For x86, guest memory is a normal
ANON mmap() if this option is not provided, otherwise a hugetlbfs mmap.

Signed-off-by: Matt Evans 
---
 tools/kvm/builtin-run.c  |4 ++-
 tools/kvm/include/kvm/kvm.h  |4 +-
 tools/kvm/include/kvm/util.h |4 +++
 tools/kvm/kvm.c  |4 +-
 tools/kvm/util.c |   45 ++
 tools/kvm/x86/kvm.c  |   20 +++--
 6 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index 7969901..0acfe81 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -82,6 +82,7 @@ static const char *guest_mac;
 static const char *host_mac;
 static const char *script;
 static const char *guest_name;
+static const char *hugetlbfs_path;
 static struct virtio_net_params *net_params;
 static bool single_step;
 static bool readonly_image[MAX_DISK_IMAGES];
@@ -422,6 +423,7 @@ static const struct option options[] = {
OPT_CALLBACK('\0', "tty", NULL, "tty id",
 "Remap guest TTY into a pty on the host",
 tty_parser),
+   OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs 
path"),
 
OPT_GROUP("Kernel options:"),
OPT_STRING('k', "kernel", &kernel_filename, "kernel",
@@ -807,7 +809,7 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
guest_name = default_name;
}
 
-   kvm = kvm__init(dev, ram_size, guest_name);
+   kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name);
 
kvm->single_step = single_step;
 
diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
index 5fe6e75..7159952 100644
--- a/tools/kvm/include/kvm/kvm.h
+++ b/tools/kvm/include/kvm/kvm.h
@@ -30,7 +30,7 @@ struct kvm_ext {
 void kvm__set_dir(const char *fmt, ...);
 const char *kvm__get_dir(void);
 
-struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name);
+struct kvm *kvm__init(const char *kvm_dev, const char *hugetlbfs_path, u64 
ram_size, const char *name);
 int kvm__recommended_cpus(struct kvm *kvm);
 int kvm__max_cpus(struct kvm *kvm);
 void kvm__init_ram(struct kvm *kvm);
@@ -54,7 +54,7 @@ int kvm__enumerate_instances(int (*callback)(const char 
*name, int pid));
 void kvm__remove_socket(const char *name);
 
 void kvm__arch_set_cmdline(char *cmdline, bool video);
-void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const 
char *name);
+void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char 
*hugetlbfs_path, u64 ram_size, const char *name);
 void kvm__arch_setup_firmware(struct kvm *kvm);
 bool kvm__arch_cpu_supports_vm(void);
 void kvm__arch_periodic_poll(struct kvm *kvm);
diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
index dc2e0b9..1f6fbbd 100644
--- a/tools/kvm/include/kvm/util.h
+++ b/tools/kvm/include/kvm/util.h
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef __GNUC__
 #define NORETURN __attribute__((__noreturn__))
@@ -75,4 +76,7 @@ static inline void msleep(unsigned int msecs)
 {
usleep(MSECS_TO_USECS(msecs));
 }
+
+void *mmap_hugetlbfs(const char *htlbfs_path, u64 size);
+
 #endif /* KVM__UTIL_H */
diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
index c54f886..35ca2c5 100644
--- a/tools/kvm/kvm.c
+++ b/tools/kvm/kvm.c
@@ -306,7 +306,7 @@ int kvm__max_cpus(struct kvm *kvm)
return ret;
 }
 
-struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name)
+struct kvm *kvm__init(const char *kvm_dev, const char *hugetlbfs_path, u64 
ram_size, const char *name)
 {
struct kvm *kvm;
int ret;
@@ -339,7 +339,7 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, 
const char *name)
if (kvm__check_extensions(kvm))
die("A required KVM extention is not supported by OS");
 
-   kvm__arch_init(kvm, kvm_dev, ram_size, name);
+   kvm__arch_init(kvm, kvm_dev, hugetlbfs_path, ram_size, name);
 
kvm->name = name;
 
diff --git a/tools/kvm/util.c b/tools/kvm/util.c
index 4efbce9..90b6a3b 100644
--- a/tools/kvm/util.c
+++ b/tools/kvm/util.c
@@ -4,6 +4,11 @@
 
 #include "kvm/util.h"
 
+#include/* For HUGETLBFS_MAGIC */
+#include 
+#include 
+#include 
+
 static void report(const char *prefix, const char *err, va_list params)
 {
char msg[1024];
@@ -99,3 +104,43 @@ size_t strlcat(char *dest, const char *src, size_t count)
 
return res;
 }
+
+void *mmap_hugetlbfs(const char *htlbfs_path, u64 size)
+{
+   char mpath[PATH_MAX];
+   int fd;
+   int r;
+   struct statfs sfs;
+   void *addr;
+
+   do {
+   /*
+* QEMU seems to work around this returning EINTR...  Let's do
+* that too.
+*/
+   r = statfs(htlbfs_path, &sfs);
+   } 

[PATCH V2 18/23] kvm tools: Move PCI_MAX_DEVICES to pci.h

2011-12-08 Thread Matt Evans
Other pieces of kvmtool may be interested in PCI_MAX_DEVICES.

Signed-off-by: Matt Evans 
---
 tools/kvm/include/kvm/pci.h |1 +
 tools/kvm/pci.c |1 -
 2 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/tools/kvm/include/kvm/pci.h b/tools/kvm/include/kvm/pci.h
index f71af0b..b578ad7 100644
--- a/tools/kvm/include/kvm/pci.h
+++ b/tools/kvm/include/kvm/pci.h
@@ -6,6 +6,7 @@
 #include 
 #include 
 
+#define PCI_MAX_DEVICES256
 /*
  * PCI Configuration Mechanism #1 I/O ports. See Section 3.7.4.1.
  * ("Configuration Mechanism #1") of the PCI Local Bus Specification 2.1 for
diff --git a/tools/kvm/pci.c b/tools/kvm/pci.c
index d1afc05..920e13e 100644
--- a/tools/kvm/pci.c
+++ b/tools/kvm/pci.c
@@ -5,7 +5,6 @@
 
 #include 
 
-#define PCI_MAX_DEVICES256
 #define PCI_BAR_OFFSET(b)  (offsetof(struct pci_device_header, 
bar[b]))
 
 static struct pci_device_header*pci_devices[PCI_MAX_DEVICES];
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 19/23] kvm tools: Endian-sanitise pci.h and PCI device setup

2011-12-08 Thread Matt Evans
vesa, pci-shmem and virtio-pci devices need to set up config space with
little-endian conversions (as config space is LE).  The pci_config_address
bitfield also needs to be reversed when building on BE systems.

Signed-off-by: Matt Evans 
---
 tools/kvm/hw/pci-shmem.c   |   23 +++--
 tools/kvm/hw/vesa.c|   15 +++--
 tools/kvm/include/kvm/ioport.h |   11 +
 tools/kvm/include/kvm/pci.h|   40 ++
 tools/kvm/pci.c|4 +-
 tools/kvm/virtio/pci.c |   41 +--
 6 files changed, 77 insertions(+), 57 deletions(-)

diff --git a/tools/kvm/hw/pci-shmem.c b/tools/kvm/hw/pci-shmem.c
index 780a377..fd954c5 100644
--- a/tools/kvm/hw/pci-shmem.c
+++ b/tools/kvm/hw/pci-shmem.c
@@ -8,21 +8,22 @@
 #include "kvm/ioeventfd.h"
 
 #include 
+#include 
 #include 
 #include 
 #include 
 
 static struct pci_device_header pci_shmem_pci_device = {
-   .vendor_id  = PCI_VENDOR_ID_REDHAT_QUMRANET,
-   .device_id  = 0x1110,
+   .vendor_id  = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
+   .device_id  = cpu_to_le16(0x1110),
.header_type= PCI_HEADER_TYPE_NORMAL,
-   .class  = 0xFF, /* misc pci device */
-   .status = PCI_STATUS_CAP_LIST,
+   .class[2]   = 0xFF, /* misc pci device */
+   .status = cpu_to_le16(PCI_STATUS_CAP_LIST),
.capabilities   = (void *)&pci_shmem_pci_device.msix - (void 
*)&pci_shmem_pci_device,
.msix.cap   = PCI_CAP_ID_MSIX,
-   .msix.ctrl  = 1,
-   .msix.table_offset = 1, /* Use BAR 1 */
-   .msix.pba_offset = 0x1001,  /* Use BAR 1 */
+   .msix.ctrl  = cpu_to_le16(1),
+   .msix.table_offset = cpu_to_le32(1),/* Use BAR 1 */
+   .msix.pba_offset = cpu_to_le32(0x1001), /* Use BAR 1 */
 };
 
 /* registers for the Inter-VM shared memory device */
@@ -123,7 +124,7 @@ int pci_shmem__get_local_irqfd(struct kvm *kvm)
if (fd < 0)
return fd;
 
-   if (pci_shmem_pci_device.msix.ctrl & PCI_MSIX_FLAGS_ENABLE) {
+   if (pci_shmem_pci_device.msix.ctrl & 
cpu_to_le16(PCI_MSIX_FLAGS_ENABLE)) {
gsi = irq__add_msix_route(kvm, &msix_table[0].msg);
} else {
gsi = pci_shmem_pci_device.irq_line;
@@ -241,11 +242,11 @@ int pci_shmem__init(struct kvm *kvm)
 * 1 - MSI-X MMIO space
 * 2 - Shared memory block
 */
-   pci_shmem_pci_device.bar[0] = ivshmem_registers | 
PCI_BASE_ADDRESS_SPACE_IO;
+   pci_shmem_pci_device.bar[0] = cpu_to_le32(ivshmem_registers | 
PCI_BASE_ADDRESS_SPACE_IO);
pci_shmem_pci_device.bar_size[0] = shmem_region->size;
-   pci_shmem_pci_device.bar[1] = msix_block | 
PCI_BASE_ADDRESS_SPACE_MEMORY;
+   pci_shmem_pci_device.bar[1] = cpu_to_le32(msix_block | 
PCI_BASE_ADDRESS_SPACE_MEMORY);
pci_shmem_pci_device.bar_size[1] = 0x1010;
-   pci_shmem_pci_device.bar[2] = shmem_region->phys_addr | 
PCI_BASE_ADDRESS_SPACE_MEMORY;
+   pci_shmem_pci_device.bar[2] = cpu_to_le32(shmem_region->phys_addr | 
PCI_BASE_ADDRESS_SPACE_MEMORY);
pci_shmem_pci_device.bar_size[2] = shmem_region->size;
 
pci__register(&pci_shmem_pci_device, dev);
diff --git a/tools/kvm/hw/vesa.c b/tools/kvm/hw/vesa.c
index 22b1652..63f1082 100644
--- a/tools/kvm/hw/vesa.c
+++ b/tools/kvm/hw/vesa.c
@@ -8,6 +8,7 @@
 #include "kvm/irq.h"
 #include "kvm/kvm.h"
 #include "kvm/pci.h"
+#include 
 #include 
 
 #include 
@@ -31,14 +32,14 @@ static struct ioport_operations vesa_io_ops = {
 };
 
 static struct pci_device_header vesa_pci_device = {
-   .vendor_id  = PCI_VENDOR_ID_REDHAT_QUMRANET,
-   .device_id  = PCI_DEVICE_ID_VESA,
+   .vendor_id  = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
+   .device_id  = cpu_to_le16(PCI_DEVICE_ID_VESA),
.header_type= PCI_HEADER_TYPE_NORMAL,
.revision_id= 0,
-   .class  = 0x03,
-   .subsys_vendor_id   = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
-   .subsys_id  = PCI_SUBSYSTEM_ID_VESA,
-   .bar[1] = VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY,
+   .class[2]   = 0x03,
+   .subsys_vendor_id   = 
cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
+   .subsys_id  = cpu_to_le16(PCI_SUBSYSTEM_ID_VESA),
+   .bar[1] = cpu_to_le32(VESA_MEM_ADDR | 
PCI_BASE_ADDRESS_SPACE_MEMORY),
.bar_size[1]= VESA_MEM_SIZE,
 };
 
@@ -56,7 +57,7 @@ struct framebuffer *vesa__init(struct kvm *kvm)
vesa_pci_device.irq_pin = pin;
vesa_pci_device.irq_line= line;
vesa_base_addr  = ioport__register(IOPORT_EMPTY, 
&vesa_io_ops, IOPORT_SIZE, NULL

[PATCH V2 20/23] kvm tools: Correctly set virtio-pci bar_size and remove hardwired address

2011-12-08 Thread Matt Evans
The BAR addresses are set up fine, but missed the bar_size[] array which is now
updated correspondingly.

Use PCI_IO_SIZE instead of '0x100'.

Signed-off-by: Matt Evans 
---
 tools/kvm/virtio/pci.c |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/virtio/pci.c b/tools/kvm/virtio/pci.c
index acb7d96..0b44a19 100644
--- a/tools/kvm/virtio/pci.c
+++ b/tools/kvm/virtio/pci.c
@@ -293,8 +293,8 @@ int virtio_pci__init(struct kvm *kvm, struct virtio_trans 
*vtrans, void *dev,
vpci->msix_pba_block = pci_get_io_space_block(PCI_IO_SIZE);
 
vpci->base_addr = ioport__register(IOPORT_EMPTY, &virtio_pci__io_ops, 
IOPORT_SIZE, vtrans);
-   kvm__register_mmio(kvm, vpci->msix_io_block, 0x100, 
callback_mmio_table, vpci);
-   kvm__register_mmio(kvm, vpci->msix_pba_block, 0x100, callback_mmio_pba, 
vpci);
+   kvm__register_mmio(kvm, vpci->msix_io_block, PCI_IO_SIZE, 
callback_mmio_table, vpci);
+   kvm__register_mmio(kvm, vpci->msix_pba_block, PCI_IO_SIZE, 
callback_mmio_pba, vpci);
 
vpci->pci_hdr = (struct pci_device_header) {
.vendor_id  = 
cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
@@ -313,6 +313,9 @@ int virtio_pci__init(struct kvm *kvm, struct virtio_trans 
*vtrans, void *dev,
  | 
PCI_BASE_ADDRESS_MEM_TYPE_64),
.status = cpu_to_le16(PCI_STATUS_CAP_LIST),
.capabilities   = (void *)&vpci->pci_hdr.msix - (void 
*)&vpci->pci_hdr,
+   .bar_size[0]= IOPORT_SIZE,
+   .bar_size[1]= PCI_IO_SIZE,
+   .bar_size[3]= PCI_IO_SIZE,
};
 
vpci->pci_hdr.msix.cap = PCI_CAP_ID_MSIX;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 21/23] kvm tools: Add pci__config_{rd,wr}(), pci__find_dev()

2011-12-08 Thread Matt Evans
This allows config space access in a more natural manner than clunky x86 IO 
ports,
and is useful for other architectures.  Internally, the x86 IO port access uses
these new config space interfaces.

Signed-off-by: Matt Evans 
---
 tools/kvm/include/kvm/pci.h |9 +-
 tools/kvm/pci.c |   63 ++-
 2 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/tools/kvm/include/kvm/pci.h b/tools/kvm/include/kvm/pci.h
index 21f93d0..07b5403 100644
--- a/tools/kvm/include/kvm/pci.h
+++ b/tools/kvm/include/kvm/pci.h
@@ -7,6 +7,8 @@
 #include 
 #include 
 
+#include "kvm/kvm.h"
+
 #define PCI_MAX_DEVICES256
 /*
  * PCI Configuration Mechanism #1 I/O ports. See Section 3.7.4.1.
@@ -21,7 +23,7 @@
 union pci_config_address {
struct {
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-   unsignedzeros   : 2;/* 1  .. 0  */
+   unsignedreg_offset  : 2;/* 1  .. 0  */
unsignedregister_number : 6;/* 7  .. 2  */
unsignedfunction_number : 3;/* 10 .. 8  */
unsigneddevice_number   : 5;/* 15 .. 11 */
@@ -35,7 +37,7 @@ union pci_config_address {
unsigneddevice_number   : 5;/* 15 .. 11 */
unsignedfunction_number : 3;/* 10 .. 8  */
unsignedregister_number : 6;/* 7  .. 2  */
-   unsignedzeros   : 2;/* 1  .. 0  */
+   unsignedreg_offset  : 2;/* 1  .. 0  */
 #endif
};
u32 w;
@@ -84,6 +86,9 @@ struct pci_device_header {
 
 void pci__init(void);
 void pci__register(struct pci_device_header *dev, u8 dev_num);
+struct pci_device_header *pci__find_dev(u8 dev_num);
 u32 pci_get_io_space_block(u32 size);
+void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void 
*data, int size);
+void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void 
*data, int size);
 
 #endif /* KVM__PCI_H */
diff --git a/tools/kvm/pci.c b/tools/kvm/pci.c
index 5bbcbc7..95df169 100644
--- a/tools/kvm/pci.c
+++ b/tools/kvm/pci.c
@@ -76,21 +76,45 @@ static bool pci_device_exists(u8 bus_number, u8 
device_number, u8 function_numbe
 
 static bool pci_config_data_out(struct ioport *ioport, struct kvm *kvm, u16 
port, void *data, int size)
 {
-   unsigned long start;
-   u8 dev_num;
+   /*
+* If someone accesses PCI configuration space offsets that are not
+* aligned to 4 bytes, it uses ioports to signify that.
+*/
+   pci_config_address.reg_offset = port - PCI_CONFIG_DATA;
+
+   pci__config_wr(kvm, pci_config_address, data, size);
 
+   return true;
+}
+
+static bool pci_config_data_in(struct ioport *ioport, struct kvm *kvm, u16 
port, void *data, int size)
+{
/*
 * If someone accesses PCI configuration space offsets that are not
 * aligned to 4 bytes, it uses ioports to signify that.
 */
-   start = port - PCI_CONFIG_DATA;
+   pci_config_address.reg_offset = port - PCI_CONFIG_DATA;
 
-   dev_num = pci_config_address.device_number;
+   pci__config_rd(kvm, pci_config_address, data, size);
+
+   return true;
+}
+
+static struct ioport_operations pci_config_data_ops = {
+   .io_in  = pci_config_data_in,
+   .io_out = pci_config_data_out,
+};
+
+void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void 
*data, int size)
+{
+   u8 dev_num;
+
+   dev_num = addr.device_number;
 
if (pci_device_exists(0, dev_num, 0)) {
unsigned long offset;
 
-   offset = start + (pci_config_address.register_number << 2);
+   offset = addr.w & 0xff;
if (offset < sizeof(struct pci_device_header)) {
void *p = pci_devices[dev_num];
u8 bar = (offset - PCI_BAR_OFFSET(0)) / (sizeof(u32));
@@ -116,27 +140,18 @@ static bool pci_config_data_out(struct ioport *ioport, 
struct kvm *kvm, u16 port
}
}
}
-
-   return true;
 }
 
-static bool pci_config_data_in(struct ioport *ioport, struct kvm *kvm, u16 
port, void *data, int size)
+void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void 
*data, int size)
 {
-   unsigned long start;
u8 dev_num;
 
-   /*
-* If someone accesses PCI configuration space offsets that are not
-* aligned to 4 bytes, it uses ioports to signify that.
-*/
-   start = port - PCI_CONFIG_DATA;
-
-   dev_num = pci_config_address.device_number;
+   dev_num = addr.device_number;
 
if (pci_device_exists(0, dev_num, 0)) {
unsigned long offset;
 
-   offset = start + (pci

[PATCH V2 22/23] kvm tools: Arch-specific define for PCI MMIO allocation area

2011-12-08 Thread Matt Evans
pci_get_io_space_block() used to grab addresses from
KVM_32BIT_GAP_START + 0x100, which is x86-specific.  Create a new define,
KVM_PCI_MMIO_AREA, to specify a bus address these allocations can come from.

Signed-off-by: Matt Evans 
---
 tools/kvm/pci.c  |8 ++--
 tools/kvm/x86/include/kvm/kvm-arch.h |5 +
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/pci.c b/tools/kvm/pci.c
index 95df169..59b2618 100644
--- a/tools/kvm/pci.c
+++ b/tools/kvm/pci.c
@@ -11,8 +11,12 @@ static struct pci_device_header  
*pci_devices[PCI_MAX_DEVICES];
 
 static union pci_config_addresspci_config_address;
 
-/* This is within our PCI gap - in an unused area */
-static u32 io_space_blocks = KVM_32BIT_GAP_START + 0x100;
+/* This is within our PCI gap - in an unused area.
+ * Note this is a PCI *bus address*, is used to assign BARs etc.!
+ * (That's why it can still 32bit even with 64bit guests-- 64bit
+ * PCI isn't currently supported.)
+ */
+static u32 io_space_blocks = KVM_PCI_MMIO_AREA;
 
 u32 pci_get_io_space_block(u32 size)
 {
diff --git a/tools/kvm/x86/include/kvm/kvm-arch.h 
b/tools/kvm/x86/include/kvm/kvm-arch.h
index 02aa8b9..686b1b8 100644
--- a/tools/kvm/x86/include/kvm/kvm-arch.h
+++ b/tools/kvm/x86/include/kvm/kvm-arch.h
@@ -18,6 +18,11 @@
 
 #define KVM_MMIO_START KVM_32BIT_GAP_START
 
+/* This is the address that pci_get_io_space_block() starts allocating
+ * from.  Note that this is a PCI bus address (though same on x86).
+ */
+#define KVM_PCI_MMIO_AREA  (KVM_MMIO_START + 0x100)
+
 struct kvm {
int sys_fd; /* For system ioctls(), i.e. 
/dev/kvm */
int vm_fd;  /* For VM ioctls() */
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 23/23] kvm tools: Create arch-specific kvm_cpu__emulate_{mm}io()

2011-12-08 Thread Matt Evans
Different architectures will deal with MMIO exits differently.  For example,
KVM_EXIT_IO is x86-specific, and I/O cycles are often synthesised by steering
into windows in PCI bridges on other architectures.

This patch calls arch-specific kvm_cpu__emulate_io() and kvm_cpu__emulate_mmio()
from the main runloop's IO and MMIO exit handlers.  For x86, these directly
call kvm__emulate_io() and kvm__emulate_mmio() but other architectures will
perform some address munging before passing on the call.

Signed-off-by: Matt Evans 
---
 tools/kvm/kvm-cpu.c  |   34 +++---
 tools/kvm/x86/include/kvm/kvm-cpu-arch.h |   17 ++-
 2 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c
index 884a89f..4df9ead 100644
--- a/tools/kvm/kvm-cpu.c
+++ b/tools/kvm/kvm-cpu.c
@@ -52,11 +52,11 @@ static void kvm_cpu__handle_coalesced_mmio(struct kvm_cpu 
*cpu)
while (cpu->ring->first != cpu->ring->last) {
struct kvm_coalesced_mmio *m;
m = &cpu->ring->coalesced_mmio[cpu->ring->first];
-   kvm__emulate_mmio(cpu->kvm,
-   m->phys_addr,
-   m->data,
-   m->len,
-   1);
+   kvm_cpu__emulate_mmio(cpu->kvm,
+ m->phys_addr,
+ m->data,
+ m->len,
+ 1);
cpu->ring->first = (cpu->ring->first + 1) % 
KVM_COALESCED_MMIO_MAX;
}
}
@@ -106,13 +106,13 @@ int kvm_cpu__start(struct kvm_cpu *cpu)
case KVM_EXIT_IO: {
bool ret;
 
-   ret = kvm__emulate_io(cpu->kvm,
-   cpu->kvm_run->io.port,
-   (u8 *)cpu->kvm_run +
-   cpu->kvm_run->io.data_offset,
-   cpu->kvm_run->io.direction,
-   cpu->kvm_run->io.size,
-   cpu->kvm_run->io.count);
+   ret = kvm_cpu__emulate_io(cpu->kvm,
+ cpu->kvm_run->io.port,
+ (u8 *)cpu->kvm_run +
+ cpu->kvm_run->io.data_offset,
+ cpu->kvm_run->io.direction,
+ cpu->kvm_run->io.size,
+ cpu->kvm_run->io.count);
 
if (!ret)
goto panic_kvm;
@@ -121,11 +121,11 @@ int kvm_cpu__start(struct kvm_cpu *cpu)
case KVM_EXIT_MMIO: {
bool ret;
 
-   ret = kvm__emulate_mmio(cpu->kvm,
-   cpu->kvm_run->mmio.phys_addr,
-   cpu->kvm_run->mmio.data,
-   cpu->kvm_run->mmio.len,
-   cpu->kvm_run->mmio.is_write);
+   ret = kvm_cpu__emulate_mmio(cpu->kvm,
+   
cpu->kvm_run->mmio.phys_addr,
+   cpu->kvm_run->mmio.data,
+   cpu->kvm_run->mmio.len,
+   
cpu->kvm_run->mmio.is_write);
 
if (!ret)
goto panic_kvm;
diff --git a/tools/kvm/x86/include/kvm/kvm-cpu-arch.h 
b/tools/kvm/x86/include/kvm/kvm-cpu-arch.h
index ed1c727..f138a92 100644
--- a/tools/kvm/x86/include/kvm/kvm-cpu-arch.h
+++ b/tools/kvm/x86/include/kvm/kvm-cpu-arch.h
@@ -4,7 +4,8 @@
 /* Architecture-specific kvm_cpu definitions. */
 
 #include  /* for struct kvm_regs */
-
+#include "kvm/kvm.h"   /* for kvm__emulate_{mm}io() */
+#include 
 #include 
 
 struct kvm;
@@ -30,4 +31,18 @@ struct kvm_cpu {
struct kvm_coalesced_mmio_ring  *ring;
 };
 
+/*
+ * As these are such simple wrappers, let's have them in the header so they'll
+ * be cheaper to call:
+ */
+static inline bool kvm_cpu__emulate_io(struct kvm *kvm, u16 port, void *data, 
int direction, int size, u32 count)
+{
+   return kvm__emulate_io(kvm, port, data, direction, size, count);
+}
+
+static inline bool kvm_cpu__emulate_mmio(struct kvm *kvm, u64 phys_addr, u8 
*data, u32 len, u8 is_write)
+{
+   return kvm_cpu__emulate_mmio(kvm, phys_addr, data, len, is_write);
+}
+
 #endif /* KVM__KVM_CPU_ARCH_H */
--
To unsubscribe from this list: send the line "unsubscribe kvm" in

Re: [libvirt] [PATCH] kvm tools: Introduce an ENV variable for the state dir

2011-12-08 Thread Osier Yang

On 2011年12月06日 22:39, Daniel P. Berrange wrote:

On Fri, Nov 11, 2011 at 07:57:00PM +0800, Osier Yang wrote:

Which is named as "KVMTOOL_STATE_DIR", so that the user can
configure the path of state directly as he wants.
---
  tools/kvm/main.c |7 ++-
  1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/tools/kvm/main.c b/tools/kvm/main.c
index 05bc82c..37b2b1d 100644
--- a/tools/kvm/main.c
+++ b/tools/kvm/main.c
@@ -13,7 +13,12 @@ static int handle_kvm_command(int argc, char **argv)

  int main(int argc, char *argv[])
  {
-   kvm__set_dir("%s/%s", HOME_DIR, KVM_PID_FILE_PATH);
+   char *state_dir = getenv("KVMTOOL_STATE_DIR");
+
+   if (state_dir)
+   kvm__set_dir("%s", state_dir);
+   else
+   kvm__set_dir("%s/%s", HOME_DIR, KVM_PID_FILE_PATH);

return handle_kvm_command(argc - 1,&argv[1]);
  }


As per my comments in the first patch, I don't think this is critical
for libvirt's needs. We should just honour the default location that
the KVM tool uses, rather than forcing a libvirt specific location.



Thanks for the guy who pushed the patch, it's already in kvmtool's
source. It will be useful when we run the kvmtool process as 
non-priviledge user and group.


Regards,
Osier
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [libvirt] [PATCH 5/7] kvmtool: Add new domain type

2011-12-08 Thread Osier Yang

On 2011年12月06日 22:46, Daniel P. Berrange wrote:

On Fri, Nov 11, 2011 at 07:57:04PM +0800, Osier Yang wrote:

It's named as "kvmtool".
---
  src/conf/domain_conf.c |4 +++-
  src/conf/domain_conf.h |1 +
  2 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 58f4d0f..55121d8 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -91,7 +91,8 @@ VIR_ENUM_IMPL(virDomainVirt, VIR_DOMAIN_VIRT_LAST,
"hyperv",
"vbox",
"one",
-  "phyp")
+  "phyp",
+  "kvmtool")

  VIR_ENUM_IMPL(virDomainBoot, VIR_DOMAIN_BOOT_LAST,
"fd",
@@ -4018,6 +4019,7 @@ virDomainChrDefParseXML(virCapsPtr caps,
  if (type == NULL) {
  def->source.type = VIR_DOMAIN_CHR_TYPE_PTY;
  } else if ((def->source.type = virDomainChrTypeFromString(type))<  0) {
+VIR_WARN("type = %s", type);
  virDomainReportError(VIR_ERR_XML_ERROR,
   _("unknown type presented to host for character 
device: %s"),
   type);
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index a3cb834..001bc46 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -59,6 +59,7 @@ enum virDomainVirtType {
  VIR_DOMAIN_VIRT_VBOX,
  VIR_DOMAIN_VIRT_ONE,
  VIR_DOMAIN_VIRT_PHYP,
+VIR_DOMAIN_VIRT_KVMTOOL,

  VIR_DOMAIN_VIRT_LAST,
  };


IMHO this patch is not required. The domain type is refering to the
hypervisor used for the domain, which is still 'kvm'. What is different
here is just the userspace device model.  If you look at the 3 different
Xen user spaces we support, all of them use  still.
So just use  here for kvmtool.



Make sense, agreed.

Osier
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [libvirt] [PATCH 7/7] kvmtool: Implementation for kvm tool driver

2011-12-08 Thread Osier Yang

On 2011年12月06日 22:55, Daniel P. Berrange wrote:

On Fri, Nov 11, 2011 at 07:57:06PM +0800, Osier Yang wrote:

Basically, the drivers is implemented by using kvm tool binary
currently, (see ./kvm help for more info).

Current implementation supports define/undefine, start/destroy/,
suspend/resume, connect to guest console via "virsh console",
and balloon memory with with "virsh setmem" (using ./kvm balloon
command). Also as it supports cgroup controllers "cpuacct", and
"memory", so some other commands like "schedinfo", "memtune" can
also work. Some other commands such as "domid", "domname", "dumpxml"
,"autostart", etc. are supported, as the driver is designed
as a "stateful" driver, those APIs just need to talk with libvirtd
simply.

As Native Linux KVM Tool is designed for both non-root and root users,
the driver is designed just like QEMU, supports two modes of the
connection:

 kvmtool:///system
 kvmtool+unix:///system

 kvmtool:///session
 kvmtool+unix:///session

An example of the domain XML (all the XMLs supported currently are
listed):

% virsh -c kvm:///system dumpxml kvm_test

   kvm_test
   88bf38f1-b6ab-cfa6-ab53-4b4c0993d894
   524288
   524288
   1
   
 hvm
 /boot/bzImage
 
   
   
   destroy
   restart
   restart
   
 /usr/bin/kvmtool
 
   
   
 
 
   
   
 
 
   
 
 
   

---
  cfg.mk   |1 +
  daemon/Makefile.am   |4 +
  daemon/libvirtd.c|7 +
  po/POTFILES.in   |2 +
  src/Makefile.am  |   36 +-
  src/kvmtool/kvmtool_conf.c   |  130 ++
  src/kvmtool/kvmtool_conf.h   |   66 +
  src/kvmtool/kvmtool_driver.c | 3079 ++
  src/kvmtool/kvmtool_driver.h |   29 +


My main suggestion here would be to split up the kvmtool_driver.c
file into 3 parts as we did with the QEMU driver.

   kvmtool_driver.c   ->  Basic libvirt API glue
   kvmtool_command.c  ->  ARGV generation
   kvmtool_process.c  ->  KVMtool process start/stop/autostart/autodestroy



Agreed. As a early thinking, kvmtool might has APIs exposed in future,
how should we plan for it? A new driver just like libxl for XEN? or new
backend driver like what we do for xm, xend for XEN driver? Should we
consider the expansibility currently if we tend to use backend
drivers?

Regards,
Osier

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V2 17/23] kvm tools: Add ability to map guest RAM from hugetlbfs

2011-12-08 Thread Sasha Levin
On Fri, 2011-12-09 at 17:55 +1100, Matt Evans wrote:
> Add a --hugetlbfs commandline option to give a path to hugetlbfs-map guest
> memory (down in kvm__arch_init()).  For x86, guest memory is a normal
> ANON mmap() if this option is not provided, otherwise a hugetlbfs mmap.
> 
> Signed-off-by: Matt Evans 
> ---
>  tools/kvm/builtin-run.c  |4 ++-
>  tools/kvm/include/kvm/kvm.h  |4 +-
>  tools/kvm/include/kvm/util.h |4 +++
>  tools/kvm/kvm.c  |4 +-
>  tools/kvm/util.c |   45 
> ++
>  tools/kvm/x86/kvm.c  |   20 +++--
>  6 files changed, 73 insertions(+), 8 deletions(-)
> 
> diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
> index 7969901..0acfe81 100644
> --- a/tools/kvm/builtin-run.c
> +++ b/tools/kvm/builtin-run.c
> @@ -82,6 +82,7 @@ static const char *guest_mac;
>  static const char *host_mac;
>  static const char *script;
>  static const char *guest_name;
> +static const char *hugetlbfs_path;
>  static struct virtio_net_params *net_params;
>  static bool single_step;
>  static bool readonly_image[MAX_DISK_IMAGES];
> @@ -422,6 +423,7 @@ static const struct option options[] = {
>   OPT_CALLBACK('\0', "tty", NULL, "tty id",
>"Remap guest TTY into a pty on the host",
>tty_parser),
> + OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs 
> path"),
>  
>   OPT_GROUP("Kernel options:"),
>   OPT_STRING('k', "kernel", &kernel_filename, "kernel",
> @@ -807,7 +809,7 @@ int kvm_cmd_run(int argc, const char **argv, const char 
> *prefix)
>   guest_name = default_name;
>   }
>  
> - kvm = kvm__init(dev, ram_size, guest_name);
> + kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name);
>  
>   kvm->single_step = single_step;
>  
> diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
> index 5fe6e75..7159952 100644
> --- a/tools/kvm/include/kvm/kvm.h
> +++ b/tools/kvm/include/kvm/kvm.h
> @@ -30,7 +30,7 @@ struct kvm_ext {
>  void kvm__set_dir(const char *fmt, ...);
>  const char *kvm__get_dir(void);
>  
> -struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name);
> +struct kvm *kvm__init(const char *kvm_dev, const char *hugetlbfs_path, u64 
> ram_size, const char *name);
>  int kvm__recommended_cpus(struct kvm *kvm);
>  int kvm__max_cpus(struct kvm *kvm);
>  void kvm__init_ram(struct kvm *kvm);
> @@ -54,7 +54,7 @@ int kvm__enumerate_instances(int (*callback)(const char 
> *name, int pid));
>  void kvm__remove_socket(const char *name);
>  
>  void kvm__arch_set_cmdline(char *cmdline, bool video);
> -void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, 
> const char *name);
> +void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char 
> *hugetlbfs_path, u64 ram_size, const char *name);
>  void kvm__arch_setup_firmware(struct kvm *kvm);
>  bool kvm__arch_cpu_supports_vm(void);
>  void kvm__arch_periodic_poll(struct kvm *kvm);
> diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
> index dc2e0b9..1f6fbbd 100644
> --- a/tools/kvm/include/kvm/util.h
> +++ b/tools/kvm/include/kvm/util.h
> @@ -20,6 +20,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #ifdef __GNUC__
>  #define NORETURN __attribute__((__noreturn__))
> @@ -75,4 +76,7 @@ static inline void msleep(unsigned int msecs)
>  {
>   usleep(MSECS_TO_USECS(msecs));
>  }
> +
> +void *mmap_hugetlbfs(const char *htlbfs_path, u64 size);
> +
>  #endif /* KVM__UTIL_H */
> diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
> index c54f886..35ca2c5 100644
> --- a/tools/kvm/kvm.c
> +++ b/tools/kvm/kvm.c
> @@ -306,7 +306,7 @@ int kvm__max_cpus(struct kvm *kvm)
>   return ret;
>  }
>  
> -struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name)
> +struct kvm *kvm__init(const char *kvm_dev, const char *hugetlbfs_path, u64 
> ram_size, const char *name)
>  {
>   struct kvm *kvm;
>   int ret;
> @@ -339,7 +339,7 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, 
> const char *name)
>   if (kvm__check_extensions(kvm))
>   die("A required KVM extention is not supported by OS");
>  
> - kvm__arch_init(kvm, kvm_dev, ram_size, name);
> + kvm__arch_init(kvm, kvm_dev, hugetlbfs_path, ram_size, name);
>  
>   kvm->name = name;
>  
> diff --git a/tools/kvm/util.c b/tools/kvm/util.c
> index 4efbce9..90b6a3b 100644
> --- a/tools/kvm/util.c
> +++ b/tools/kvm/util.c
> @@ -4,6 +4,11 @@
>  
>  #include "kvm/util.h"
>  
> +#include  /* For HUGETLBFS_MAGIC */
> +#include 
> +#include 
> +#include 
> +
>  static void report(const char *prefix, const char *err, va_list params)
>  {
>   char msg[1024];
> @@ -99,3 +104,43 @@ size_t strlcat(char *dest, const char *src, size_t count)
>  
>   return res;
>  }
> +
> +void *mmap_hugetlbfs(const char *htlbfs_path, u64 size)
> +{
> + char mpath[PATH_MAX];
> + int

Re: [PATCH v4 00/15] uq/master: Introduce basic irqchip support

2011-12-08 Thread Jan Kiszka
On 2011-12-08 22:25, Blue Swirl wrote:
> On Thu, Dec 8, 2011 at 11:52, Jan Kiszka  wrote:
>> Changes in v4:
>> - rebased of current uq/master
>> - fixed stupid bugs that broke bisectability and user space irqchip mode
>> - integrated NMI-over-LINT1 injection logic
> 
> I had comments to one patch, others look fine.
> 
> Overall, string based subtype selection does not somehow seem to be a
> hot idea, but this could be used as a starting point which should be
> cleaned up later when we have proper device composition. APIC and x86
> interrupt handling need more cleanup anyway.

Yes, more than likely. However, I think the string-based selection is
the best that can be done right now. It gives us, e.g., pretty-printing
of the backend property so that you can easily check what mode is in use.

The next area of ugliness will be the PIT and its relation to the HPET.
I started looking into this, found some bugs, but do not really have a
nice model for all that yet.

Looking into the remaining remarks now.

Thanks,
Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 12/15] kvm: x86: Add user space part for in-kernel APIC

2011-12-08 Thread Jan Kiszka
On 2011-12-08 22:16, Blue Swirl wrote:
> On Thu, Dec 8, 2011 at 11:52, Jan Kiszka  wrote:
>> This introduces the alternative APIC backend which makes use of KVM's
>> in-kernel device model. External NMI injection via LINT1 is emulated by
>> checking the current state of the in-kernel APIC, only injecting a NMI
>> into the VCPU if LINT1 is unmasked and configured to DM_NMI.
>>
>> MSI is not yet supported, so we disable this when the in-kernel model is
>> in use.
>>
>> CC: Lai Jiangshan 
>> Signed-off-by: Jan Kiszka 
>> ---
>>  Makefile.target   |2 +-
>>  hw/kvm/apic.c |  154 
>> +
>>  hw/pc.c   |   15 --
>>  kvm.h |3 +
>>  target-i386/kvm.c |8 +++
>>  5 files changed, 176 insertions(+), 6 deletions(-)
>>  create mode 100644 hw/kvm/apic.c
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index b549988..76de485 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -236,7 +236,7 @@ obj-i386-y += vmport.o
>>  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>>  obj-i386-y += debugcon.o multiboot.o
>>  obj-i386-y += pc_piix.o
>> -obj-i386-$(CONFIG_KVM) += kvm/clock.o
>> +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o
>>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>>
>>  # shared objects
>> diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
>> new file mode 100644
>> index 000..3924f9e
>> --- /dev/null
>> +++ b/hw/kvm/apic.c
>> @@ -0,0 +1,154 @@
>> +/*
>> + * KVM in-kernel APIC support
>> + *
>> + * Copyright (c) 2011 Siemens AG
>> + *
>> + * Authors:
>> + *  Jan Kiszka  
>> + *
>> + * This work is licensed under the terms of the GNU GPL version 2.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +#include "hw/apic_internal.h"
>> +#include "kvm.h"
>> +
>> +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
>> +   int reg_id, uint32_t val)
>> +{
>> +*((uint32_t *)(kapic->regs + (reg_id << 4))) = val;
>> +}
>> +
>> +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
>> +   int reg_id)
>> +{
>> +return *((uint32_t *)(kapic->regs + (reg_id << 4)));
>> +}
>> +
>> +int kvm_put_apic(CPUState *env)
>> +{
>> +APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
> 
> Please pass APICState instead of CPUState.

DeviceState, I suppose. Yes, makes more sense, update will follow.

> 
>> +struct kvm_lapic_state kapic;
>> +int i;
>> +
>> +if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
>> +memset(&kapic, 0, sizeof(kapic));
>> +kvm_apic_set_reg(&kapic, 0x2, s->id << 24);
>> +kvm_apic_set_reg(&kapic, 0x8, s->tpr);
>> +kvm_apic_set_reg(&kapic, 0xd, s->log_dest << 24);
>> +kvm_apic_set_reg(&kapic, 0xe, s->dest_mode << 28 | 0x0fff);
>> +kvm_apic_set_reg(&kapic, 0xf, s->spurious_vec);
>> +for (i = 0; i < 8; i++) {
>> +kvm_apic_set_reg(&kapic, 0x10 + i, s->isr[i]);
>> +kvm_apic_set_reg(&kapic, 0x18 + i, s->tmr[i]);
>> +kvm_apic_set_reg(&kapic, 0x20 + i, s->irr[i]);
>> +}
>> +kvm_apic_set_reg(&kapic, 0x28, s->esr);
>> +kvm_apic_set_reg(&kapic, 0x30, s->icr[0]);
>> +kvm_apic_set_reg(&kapic, 0x31, s->icr[1]);
>> +for (i = 0; i < APIC_LVT_NB; i++) {
>> +kvm_apic_set_reg(&kapic, 0x32 + i, s->lvt[i]);
>> +}
>> +kvm_apic_set_reg(&kapic, 0x38, s->initial_count);
>> +kvm_apic_set_reg(&kapic, 0x3e, s->divide_conf);
>> +
>> +return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, &kapic);
>> +}
>> +
>> +return 0;
>> +}
>> +
>> +int kvm_get_apic(CPUState *env)
> 
> Same here.
> 
>> +{
>> +APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
>> +struct kvm_lapic_state kapic;
>> +int ret, i, v;
>> +
>> +if (s && kvm_enabled() && kvm_irqchip_in_kernel()) {
>> +ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, &kapic);
>> +if (ret < 0) {
>> +return ret;
>> +}
>> +
>> +s->id = kvm_apic_get_reg(&kapic, 0x2) >> 24;
>> +s->tpr = kvm_apic_get_reg(&kapic, 0x8);
>> +s->arb_id = kvm_apic_get_reg(&kapic, 0x9);
>> +s->log_dest = kvm_apic_get_reg(&kapic, 0xd) >> 24;
>> +s->dest_mode = kvm_apic_get_reg(&kapic, 0xe) >> 28;
>> +s->spurious_vec = kvm_apic_get_reg(&kapic, 0xf);
>> +for (i = 0; i < 8; i++) {
>> +s->isr[i] = kvm_apic_get_reg(&kapic, 0x10 + i);
>> +s->tmr[i] = kvm_apic_get_reg(&kapic, 0x18 + i);
>> +s->irr[i] = kvm_apic_get_reg(&kapic, 0x20 + i);
>> +}
>> +s->esr = kvm_apic_get_reg(&kapic, 0x28);
>> +s->icr[0] = kvm_apic_get_reg(&kapic, 0x30);
>> +s->icr[1] = kvm_apic_get_reg(&kapic, 0x31);
>> +for (i = 0; i < APIC_LVT_NB; i++) {
>> +s->lvt[i] = kvm_apic_get_reg(&kapic, 0x32 + i)

  1   2   >