RE: [PATCH 1/6 v2] KVM: PPC: Book3E: Use common defines for SPE/FP/AltiVec int numbers

2014-07-24 Thread mihai.cara...@freescale.com
> -Original Message-
> From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-
> ow...@vger.kernel.org] On Behalf Of mihai.cara...@freescale.com
> Sent: Monday, July 21, 2014 4:23 PM
> To: Alexander Graf; Wood Scott-B07421
> Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
> d...@lists.ozlabs.org
> Subject: RE: [PATCH 1/6 v2] KVM: PPC: Book3E: Use common defines for
> SPE/FP/AltiVec int numbers
> 
> > -Original Message-
> > From: Alexander Graf [mailto:ag...@suse.de]
> > Sent: Thursday, July 03, 2014 3:21 PM
> > To: Caraman Mihai Claudiu-B02008; kvm-...@vger.kernel.org
> > Cc: kvm@vger.kernel.org; linuxppc-...@lists.ozlabs.org
> > Subject: Re: [PATCH 1/6 v2] KVM: PPC: Book3E: Use common defines for
> > SPE/FP/AltiVec int numbers
> >
> >
> > On 30.06.14 17:34, Mihai Caraman wrote:
> > > Use common BOOKE_IRQPRIO and BOOKE_INTERRUPT defines for
> SPE/FP/AltiVec
> > > which share the same interrupt numbers.
> > >
> > > Signed-off-by: Mihai Caraman 
> > > ---
> > > v2:
> > >   - remove outdated definitions
> > >
> > >   arch/powerpc/include/asm/kvm_asm.h|  8 
> > >   arch/powerpc/kvm/booke.c  | 17 +
> > >   arch/powerpc/kvm/booke.h  |  4 ++--
> > >   arch/powerpc/kvm/booke_interrupts.S   |  9 +
> > >   arch/powerpc/kvm/bookehv_interrupts.S |  4 ++--
> > >   arch/powerpc/kvm/e500.c   | 10 ++
> > >   arch/powerpc/kvm/e500_emulate.c   | 10 ++
> > >   7 files changed, 30 insertions(+), 32 deletions(-)
> > >
> > > diff --git a/arch/powerpc/include/asm/kvm_asm.h
> > b/arch/powerpc/include/asm/kvm_asm.h
> > > index 9601741..c94fd33 100644
> > > --- a/arch/powerpc/include/asm/kvm_asm.h
> > > +++ b/arch/powerpc/include/asm/kvm_asm.h
> > > @@ -56,14 +56,6 @@
> > >   /* E500 */
> > >   #define BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL 32
> > >   #define BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST 33
> > > -/*
> > > - * TODO: Unify 32-bit and 64-bit kernel exception handlers to use
> same
> > defines
> > > - */
> > > -#define BOOKE_INTERRUPT_SPE_UNAVAIL
> > BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
> > > -#define BOOKE_INTERRUPT_SPE_FP_DATA
> > BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
> > > -#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL
> > BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
> > > -#define BOOKE_INTERRUPT_ALTIVEC_ASSIST \
> > > - BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
> >
> > I think I'd prefer to keep them separate.
> >
> > >   #define BOOKE_INTERRUPT_SPE_FP_ROUND 34
> > >   #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
> > >   #define BOOKE_INTERRUPT_DOORBELL 36
> > > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> > > index ab62109..3c86d9b 100644
> > > --- a/arch/powerpc/kvm/booke.c
> > > +++ b/arch/powerpc/kvm/booke.c
> > > @@ -388,8 +388,8 @@ static int kvmppc_booke_irqprio_deliver(struct
> > kvm_vcpu *vcpu,
> > >   case BOOKE_IRQPRIO_ITLB_MISS:
> > >   case BOOKE_IRQPRIO_SYSCALL:
> > >   case BOOKE_IRQPRIO_FP_UNAVAIL:
> > > - case BOOKE_IRQPRIO_SPE_UNAVAIL:
> > > - case BOOKE_IRQPRIO_SPE_FP_DATA:
> > > + case BOOKE_IRQPRIO_SPE_ALTIVEC_UNAVAIL:
> > > + case BOOKE_IRQPRIO_SPE_FP_DATA_ALTIVEC_ASSIST:
> >
> > #ifdef CONFIG_KVM_E500V2
> >case ...SPE:
> > #else
> >case ..ALTIVEC:
> > #endif
> >
> > >   case BOOKE_IRQPRIO_SPE_FP_ROUND:
> > >   case BOOKE_IRQPRIO_AP_UNAVAIL:
> > >   allowed = 1;
> > > @@ -977,18 +977,19 @@ int kvmppc_handle_exit(struct kvm_run *run,
> > struct kvm_vcpu *vcpu,
> > >   break;
> > >
> > >   #ifdef CONFIG_SPE
> > > - case BOOKE_INTERRUPT_SPE_UNAVAIL: {
> > > + case BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL: {
> > >   if (vcpu->arch.shared->msr & MSR_SPE)
> > >   kvmppc_vcpu_enable_spe(vcpu);
> > >   else
> > >   kvmppc_booke_queue_irqprio(vcpu,
> > > -BOOKE_IRQPRIO_SPE_UNAVAIL);
> > > + BOOKE_IRQPRIO_SPE_ALTIVEC_UNAVAIL);
> > >   r = RESUME_GUEST;
> > >   break;
> > >   }
> > >
> > > - case BOOKE_INTERRUPT_SPE_FP_DATA:
> > > - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
> > > + case BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST:
> > > + kvmppc_booke_queue_irqprio(vcpu,
> > > + BOOKE_IRQPRIO_SPE_FP_DATA_ALTIVEC_ASSIST);
> > >   r = RESUME_GUEST;
> > >   break;
> > >
> > > @@ -997,7 +998,7 @@ int kvmppc_handle_exit(struct kvm_run *run,
> struct
> > kvm_vcpu *vcpu,
> > >   r = RESUME_GUEST;
> > >   break;
> > >   #else
> > > - case BOOKE_INTERRUPT_SPE_UNAVAIL:
> > > + case BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL:
> > >   /*
> > >* Guest wants SPE, but host kernel doesn't support it.
> Send
> > >* an "unimplemented operation" program che

[Bug 81011] New: crashed on launching KVM with Juniper Simulator

2014-07-24 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81011

Bug ID: 81011
   Summary: crashed on launching KVM with Juniper Simulator
   Product: Virtualization
   Version: unspecified
Kernel Version: 3.14.4
  Hardware: i386
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: normal
  Priority: P1
 Component: kvm
  Assignee: virtualization_...@kernel-bugs.osdl.org
  Reporter: 52expl...@163.com
Regression: No

Created attachment 144091
  --> https://bugzilla.kernel.org/attachment.cgi?id=144091&action=edit
the crsahed screenshot

platform: DELL Vostro 2421
#uname -a
Linux x-linux 3.14-kali1-686-pae #1 SMP Debian 3.14.4-1kali1 (2014-05-14) i686
GNU/Linux

#kvm --version
QEMU emulator version 1.1.2 (qemu-kvm-1.1.2+dfsg-6+deb7u3, Debian), Copyright
(c) 2003-2008 Fabrice Bellard

#qemu --version
QEMU emulator version 1.1.2 (Debian 1.1.2+dfsg-6a+deb7u3), Copyright (c)
2003-2008 Fabrice Bellard

# cat /etc/issue
Kali GNU/Linux 1.0.7 \n \l

# cat /proc/cpuinfo 
processor: 0
vendor_id: GenuineIntel
cpu family: 6
model: 58
model name: Intel(R) Core(TM) i3-3227U CPU @ 1.90GHz
stepping: 9
microcode: 0x19
cpu MHz: 790.875
cache size: 3072 KB
physical id: 0
siblings: 4
core id: 0
cpu cores: 2
apicid: 0
initial apicid: 0
fdiv_bug: no
f00f_bug: no
coma_bug: no
fpu: yes
fpu_exception: yes
cpuid level: 13
wp: yes
flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx rdtscp lm
constant_tsc arch_perfmon pebs bts xtopology nonstop_tsc aperfmperf eagerfpu
pni pclmulqdq dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm pcid
sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer xsave avx f16c lahf_lm ida arat
epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase smep
erms
bogomips: 3791.39
clflush size: 64
cache_alignment: 64
address sizes: 36 bits physical, 48 bits virtual
power management:

processor: 1
vendor_id: GenuineIntel
cpu family: 6
model: 58
model name: Intel(R) Core(TM) i3-3227U CPU @ 1.90GHz
stepping: 9
microcode: 0x19
cpu MHz: 790.875
cache size: 3072 KB
physical id: 0
siblings: 4
core id: 1
cpu cores: 2
apicid: 2
initial apicid: 2
fdiv_bug: no
f00f_bug: no
coma_bug: no
fpu: yes
fpu_exception: yes
cpuid level: 13
wp: yes
flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx rdtscp lm
constant_tsc arch_perfmon pebs bts xtopology nonstop_tsc aperfmperf eagerfpu
pni pclmulqdq dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm pcid
sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer xsave avx f16c lahf_lm ida arat
epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase smep
erms
bogomips: 3791.39
clflush size: 64
cache_alignment: 64
address sizes: 36 bits physical, 48 bits virtual
power management:

processor: 2
vendor_id: GenuineIntel
cpu family: 6
model: 58
model name: Intel(R) Core(TM) i3-3227U CPU @ 1.90GHz
stepping: 9
microcode: 0x19
cpu MHz: 790.875
cache size: 3072 KB
physical id: 0
siblings: 4
core id: 0
cpu cores: 2
apicid: 1
initial apicid: 1
fdiv_bug: no
f00f_bug: no
coma_bug: no
fpu: yes
fpu_exception: yes
cpuid level: 13
wp: yes
flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx rdtscp lm
constant_tsc arch_perfmon pebs bts xtopology nonstop_tsc aperfmperf eagerfpu
pni pclmulqdq dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm pcid
sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer xsave avx f16c lahf_lm ida arat
epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase smep
erms
bogomips: 3791.39
clflush size: 64
cache_alignment: 64
address sizes: 36 bits physical, 48 bits virtual
power management:

processor: 3
vendor_id: GenuineIntel
cpu family: 6
model: 58
model name: Intel(R) Core(TM) i3-3227U CPU @ 1.90GHz
stepping: 9
microcode: 0x19
cpu MHz: 790.875
cache size: 3072 KB
physical id: 0
siblings: 4
core id: 1
cpu cores: 2
apicid: 3
initial apicid: 3
fdiv_bug: no
f00f_bug: no
coma_bug: no
fpu: yes
fpu_exception: yes
cpuid level: 13
wp: yes
flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx rdtscp lm
constant_tsc arch_perfmon pebs bts xtopology nonstop_tsc aperfmperf eagerfpu
pni pclmulqdq dtes64 monitor ds

[PATCH 2/3] watchdog: control hard lockup detection default

2014-07-24 Thread Andrew Jones
From: Ulrich Obergfell 

In some cases we don't want hard lockup detection enabled by default.
An example is when running as a guest. Introduce

  watchdog_enable_hardlockup_detector(bool)

allowing those cases to disable hard lockup detection. This must be
executed early by the boot processor from e.g. smp_prepare_boot_cpu,
in order to allow kernel command line arguments to override it, as
well as to avoid hard lockup detection being enabled before we've
had a chance to indicate that it's unwanted. In summary,

  initial boot: default=enabled
  smp_prepare_boot_cpu
watchdog_enable_hardlockup_detector(false): default=disabled
  cmdline has 'nmi_watchdog=1': default=enabled

The running kernel still has the ability to enable/disable at any
time with /proc/sys/kernel/nmi_watchdog us usual. However even
when the default has been overridden /proc/sys/kernel/nmi_watchdog
will initially show '1'. To truly turn it on one must disable/enable
it, i.e.
  echo 0 > /proc/sys/kernel/nmi_watchdog
  echo 1 > /proc/sys/kernel/nmi_watchdog

This patch will be immediately useful for KVM with the next patch
of this series. Other hypervisor guest types may find it useful as
well.

Signed-off-by: Ulrich Obergfell 
Signed-off-by: Andrew Jones 
---
 include/linux/nmi.h |  9 +
 kernel/watchdog.c   | 45 +++--
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 447775ee2c4b0..72aacf4e3d539 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -17,11 +17,20 @@
 #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
 #include 
 extern void touch_nmi_watchdog(void);
+extern void watchdog_enable_hardlockup_detector(bool val);
+extern bool watchdog_hardlockup_detector_is_enabled(void);
 #else
 static inline void touch_nmi_watchdog(void)
 {
touch_softlockup_watchdog();
 }
+static inline void watchdog_enable_hardlockup_detector(bool)
+{
+}
+static inline bool watchdog_hardlockup_detector_is_enabled(void)
+{
+   return true;
+}
 #endif
 
 /*
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index c985a21926545..34eca29e28a4c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -63,6 +63,25 @@ static unsigned long soft_lockup_nmi_warn;
 static int hardlockup_panic =
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 
+static bool hardlockup_detector_enabled = true;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void watchdog_enable_hardlockup_detector(bool val)
+{
+   hardlockup_detector_enabled = val;
+}
+
+bool watchdog_hardlockup_detector_is_enabled(void)
+{
+   return hardlockup_detector_enabled;
+}
+
 static int __init hardlockup_panic_setup(char *str)
 {
if (!strncmp(str, "panic", 5))
@@ -71,6 +90,14 @@ static int __init hardlockup_panic_setup(char *str)
hardlockup_panic = 0;
else if (!strncmp(str, "0", 1))
watchdog_user_enabled = 0;
+   else if (!strncmp(str, "1", 1) || !strncmp(str, "2", 1)) {
+   /*
+* Setting 'nmi_watchdog=1' or 'nmi_watchdog=2' (legacy option)
+* has the same effect.
+*/
+   watchdog_user_enabled = 1;
+   watchdog_enable_hardlockup_detector(true);
+   }
return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -451,6 +478,15 @@ static int watchdog_nmi_enable(unsigned int cpu)
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
 
+   /*
+* Some kernels need to default hard lockup detection to
+* 'disabled', for example a guest on a hypervisor.
+*/
+   if (!watchdog_hardlockup_detector_is_enabled()) {
+   event = ERR_PTR(-ENOENT);
+   goto handle_err;
+   }
+
/* is it already setup and enabled? */
if (event && event->state > PERF_EVENT_STATE_OFF)
goto out;
@@ -465,6 +501,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
/* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, 
watchdog_overflow_callback, NULL);
 
+handle_err:
/* save cpu0 error for future comparision */
if (cpu == 0 && IS_ERR(event))
cpu0_err = PTR_ERR(event);
@@ -610,11 +647,13 @@ int proc_dowatchdog(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
 {
int err, old_

[PATCH 0/3] watchdog: kvm: disable hard lockup detection by default

2014-07-24 Thread Andrew Jones
It's not recommended for KVM guests to enable hard lockup detection, as
false positives may be easily triggered by, for example, vcpu overcommit.
However any kernel compiled with HARDLOCKUP_DETECTOR that detects a PMU
on boot will by default enable hard lockup detection. This series gives
a kernel a mechanism to opt out of this default. Users can still force
hard lockup detection on using the kernel command line option
'nmi_watchdog=1'.

The first patch is a watchdog fix, and can be taken separately. The next
patch provides the default opt out mechanism, and the final patch applies
it to kvm guests.

Thanks in advance for reviews,
drew


Ulrich Obergfell (3):
  watchdog: fix print-once on enable
  watchdog: control hard lockup detection default
  kvm: ensure hard lockup detection is disabled by default

 arch/x86/kernel/kvm.c |  8 
 include/linux/nmi.h   |  9 +
 kernel/watchdog.c | 48 ++--
 3 files changed, 63 insertions(+), 2 deletions(-)

-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] watchdog: fix print-once on enable

2014-07-24 Thread Andrew Jones
From: Ulrich Obergfell 

This patch avoids printing the message 'enabled on all CPUs, ...'
multiple times. For example, the issue can occur in the following
scenario:

1) watchdog_nmi_enable() fails to enable PMU counters and sets
   cpu0_err.

2) 'echo [0|1] > /proc/sys/kernel/nmi_watchdog' is executed to
   disable and re-enable the watchdog mechanism 'on the fly'.

3) If watchdog_nmi_enable() succeeds to enable PMU counters, each
   CPU will print the message because step1 left behind a non-zero
   cpu0_err.

   if (!IS_ERR(event)) {
   if (cpu == 0 || cpu0_err)
   pr_info("enabled on all CPUs, ...")

The patch avoids this by clearing cpu0_err in watchdog_nmi_disable().

Signed-off-by: Ulrich Obergfell 
Signed-off-by: Andrew Jones 
---
 kernel/watchdog.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index c3319bd1b0408..c985a21926545 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -511,6 +511,9 @@ static void watchdog_nmi_disable(unsigned int cpu)
/* should be in cleanup, but blocks oprofile */
perf_event_release_kernel(event);
}
+   if (cpu == 0)
+   /* watchdog_nmi_enable() expects this to be zero initially. */
+   cpu0_err = 0;
return;
 }
 #else
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] kvm: ensure hard lockup detection is disabled by default

2014-07-24 Thread Andrew Jones
From: Ulrich Obergfell 

Use watchdog_enable_hardlockup_detector() to set hard lockup detection's
default value to false. It's risky to run this detection in a guest, as
false positives are easy to trigger, especially if the host is
overcommitted.

Signed-off-by: Ulrich Obergfell 
Signed-off-by: Andrew Jones 
---
 arch/x86/kernel/kvm.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 3dd8e2c4d74a9..95c3cb16af3e5 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -499,6 +500,13 @@ void __init kvm_guest_init(void)
 #else
kvm_guest_cpu_init();
 #endif
+
+   /*
+* Hard lockup detection is enabled by default. Disable it, as guests
+* can get false positives too easily, for example if the host is
+* overcommitted.
+*/
+   watchdog_enable_hardlockup_detector(false);
 }
 
 static noinline uint32_t __kvm_cpuid_base(void)
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3.11 002/128] MIPS: KVM: Remove redundant NULL checks before kfree()

2014-07-24 Thread Luis Henriques
3.11.10.14 -stable review patch.  If anyone has any objections, please let me 
know.

--

From: James Hogan 

commit c6c0a6637f9da54f9472144d44f71cf847f92e20 upstream.

The kfree() function already NULL checks the parameter so remove the
redundant NULL checks before kfree() calls in arch/mips/kvm/.

Signed-off-by: James Hogan 
Cc: Paolo Bonzini 
Cc: Gleb Natapov 
Cc: kvm@vger.kernel.org
Cc: Ralf Baechle 
Cc: linux-m...@linux-mips.org
Cc: Sanjay Lal 
Signed-off-by: Paolo Bonzini 
Signed-off-by: Luis Henriques 
---
 arch/mips/kvm/kvm_mips.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index 426345ac6f6e..7e78af0e57de 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -149,9 +149,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE)
kvm_mips_release_pfn_clean(kvm->arch.guest_pmap[i]);
}
-
-   if (kvm->arch.guest_pmap)
-   kfree(kvm->arch.guest_pmap);
+   kfree(kvm->arch.guest_pmap);
 
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_arch_vcpu_free(vcpu);
@@ -384,12 +382,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 
kvm_mips_dump_stats(vcpu);
 
-   if (vcpu->arch.guest_ebase)
-   kfree(vcpu->arch.guest_ebase);
-
-   if (vcpu->arch.kseg0_commpage)
-   kfree(vcpu->arch.kseg0_commpage);
-
+   kfree(vcpu->arch.guest_ebase);
+   kfree(vcpu->arch.kseg0_commpage);
 }
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] watchdog: control hard lockup detection default

2014-07-24 Thread Paolo Bonzini
Il 24/07/2014 12:13, Andrew Jones ha scritto:
> 
> The running kernel still has the ability to enable/disable at any
> time with /proc/sys/kernel/nmi_watchdog us usual. However even
> when the default has been overridden /proc/sys/kernel/nmi_watchdog
> will initially show '1'. To truly turn it on one must disable/enable
> it, i.e.
>   echo 0 > /proc/sys/kernel/nmi_watchdog
>   echo 1 > /proc/sys/kernel/nmi_watchdog

Why is it hard to make this show the right value? :)

Paolo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/4] KVM: use pci device flag operation helper functions

2014-07-24 Thread Paolo Bonzini
Il 22/07/2014 18:19, Ethan Zhao ha scritto:
> Use helper function instead of direct operation to pci device
> flag when set device to assigned or deassigned.
> 
> Signed-off-by: Ethan Zhao 
> ---
>  virt/kvm/assigned-dev.c |2 +-
>  virt/kvm/iommu.c|4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
> index bf06577..d122bda 100644
> --- a/virt/kvm/assigned-dev.c
> +++ b/virt/kvm/assigned-dev.c
> @@ -302,7 +302,7 @@ static void kvm_free_assigned_device(struct kvm *kvm,
>   else
>   pci_restore_state(assigned_dev->dev);
>  
> - assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
> + pci_set_dev_deassigned(assigned_dev->dev);
>  
>   pci_release_regions(assigned_dev->dev);
>   pci_disable_device(assigned_dev->dev);
> diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
> index 0df7d4b..8cfe021 100644
> --- a/virt/kvm/iommu.c
> +++ b/virt/kvm/iommu.c
> @@ -194,7 +194,7 @@ int kvm_assign_device(struct kvm *kvm,
>   goto out_unmap;
>   }
>  
> - pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
> + pci_set_dev_assigned(pdev);
>  
>   dev_info(&pdev->dev, "kvm assign device\n");
>  
> @@ -220,7 +220,7 @@ int kvm_deassign_device(struct kvm *kvm,
>  
>   iommu_detach_device(domain, &pdev->dev);
>  
> - pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
> + pci_set_dev_deassigned(pdev);
>  
>   dev_info(&pdev->dev, "kvm deassign device\n");
>  
> 

Acked-by: Paolo Bonzini 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] watchdog: control hard lockup detection default

2014-07-24 Thread Ulrich Obergfell
> - Original Message -
> From: "Paolo Bonzini" 
> To: "Andrew Jones" , linux-ker...@vger.kernel.org, 
> kvm@vger.kernel.org
> Cc: uober...@redhat.com, dzic...@redhat.com, a...@linux-foundation.org, 
> mi...@redhat.com
> Sent: Thursday, July 24, 2014 12:46:11 PM
> Subject: Re: [PATCH 2/3] watchdog: control hard lockup detection default
>
>Il 24/07/2014 12:13, Andrew Jones ha scritto:
>> 
>> The running kernel still has the ability to enable/disable at any
>> time with /proc/sys/kernel/nmi_watchdog us usual. However even
>> when the default has been overridden /proc/sys/kernel/nmi_watchdog
>> will initially show '1'. To truly turn it on one must disable/enable
>> it, i.e.
>>   echo 0 > /proc/sys/kernel/nmi_watchdog
>>   echo 1 > /proc/sys/kernel/nmi_watchdog
>
> Why is it hard to make this show the right value? :)
>
> Paolo

'echo 1 > /proc/sys/kernel/nmi_watchdog' enables both - hard lockup and
soft lockup detection. watchdog_enable_all_cpus() starts a 'watchdog/N'
thread for each CPU. If the kernel runs on a bare metal system where the
processor does not have a PMU, or when perf_event_create_kernel_counter()
returns failure to watchdog_nmi_enable(), or when the kernel runs as a
guest on a hypervisor that does not emulate a PMU, then the 'watchdog/N'
threads are still active for soft lockup detection. Patch 2/3 essentially
makes watchdog_nmi_enable() behave in the same way as if -ENOENT would
have been returned by perf_event_create_kernel_counter(). This is then
reported via a console message.

  NMI watchdog: disabled (cpu0): hardware events not enabled

It's hard say what _is_ 'the right value' (because lockup detection is
then enabled 'partially'), regardless of whether patch 2/3 is applied
or not.

Regards,

Uli
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] watchdog: control hard lockup detection default

2014-07-24 Thread Paolo Bonzini
Il 24/07/2014 13:18, Ulrich Obergfell ha scritto:
>>> >> The running kernel still has the ability to enable/disable at any
>>> >> time with /proc/sys/kernel/nmi_watchdog us usual. However even
>>> >> when the default has been overridden /proc/sys/kernel/nmi_watchdog
>>> >> will initially show '1'. To truly turn it on one must disable/enable
>>> >> it, i.e.
>>> >>   echo 0 > /proc/sys/kernel/nmi_watchdog
>>> >>   echo 1 > /proc/sys/kernel/nmi_watchdog
>> >
>> > Why is it hard to make this show the right value? :)
>> >
>> > Paolo
> 'echo 1 > /proc/sys/kernel/nmi_watchdog' enables both - hard lockup and
> soft lockup detection. watchdog_enable_all_cpus() starts a 'watchdog/N'
> thread for each CPU. If the kernel runs on a bare metal system where the
> processor does not have a PMU, or when perf_event_create_kernel_counter()
> returns failure to watchdog_nmi_enable(), or when the kernel runs as a
> guest on a hypervisor that does not emulate a PMU, then the 'watchdog/N'
> threads are still active for soft lockup detection. Patch 2/3 essentially
> makes watchdog_nmi_enable() behave in the same way as if -ENOENT would
> have been returned by perf_event_create_kernel_counter(). This is then
> reported via a console message.
> 
>   NMI watchdog: disabled (cpu0): hardware events not enabled
> 
> It's hard say what _is_ 'the right value' (because lockup detection is
> then enabled 'partially'), regardless of whether patch 2/3 is applied
> or not.

But this means that it is not possible to re-enable softlockup detection
only.  I think that should be the effect of echo 0 + echo 1, if
hardlockup detection was disabled by either the command line or patch 3.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] watchdog: control hard lockup detection default

2014-07-24 Thread Ulrich Obergfell
>- Original Message -
>From: "Paolo Bonzini" 
>To: "Ulrich Obergfell" 
>Cc: "Andrew Jones" , linux-ker...@vger.kernel.org, 
>kvm@vger.kernel.org, dzic...@redhat.com, a...@linux-foundation.org, 
>>mi...@redhat.com
>Sent: Thursday, July 24, 2014 1:26:40 PM
>Subject: Re: [PATCH 2/3] watchdog: control hard lockup detection default
>
>Il 24/07/2014 13:18, Ulrich Obergfell ha scritto:
 >> The running kernel still has the ability to enable/disable at any
 >> time with /proc/sys/kernel/nmi_watchdog us usual. However even
 >> when the default has been overridden /proc/sys/kernel/nmi_watchdog
 >> will initially show '1'. To truly turn it on one must disable/enable
 >> it, i.e.
 >>   echo 0 > /proc/sys/kernel/nmi_watchdog
 >>   echo 1 > /proc/sys/kernel/nmi_watchdog
>>> >
>>> > Why is it hard to make this show the right value? :)
>>> >
>>> > Paolo
>> 'echo 1 > /proc/sys/kernel/nmi_watchdog' enables both - hard lockup and
>> soft lockup detection. watchdog_enable_all_cpus() starts a 'watchdog/N'
>> thread for each CPU. If the kernel runs on a bare metal system where the
>> processor does not have a PMU, or when perf_event_create_kernel_counter()
>> returns failure to watchdog_nmi_enable(), or when the kernel runs as a
>> guest on a hypervisor that does not emulate a PMU, then the 'watchdog/N'
>> threads are still active for soft lockup detection. Patch 2/3 essentially
>> makes watchdog_nmi_enable() behave in the same way as if -ENOENT would
>> have been returned by perf_event_create_kernel_counter(). This is then
>> reported via a console message.
>> 
>>   NMI watchdog: disabled (cpu0): hardware events not enabled
>> 
>> It's hard say what _is_ 'the right value' (because lockup detection is
>> then enabled 'partially'), regardless of whether patch 2/3 is applied
>> or not.
>
> But this means that it is not possible to re-enable softlockup detection
> only.  I think that should be the effect of echo 0 + echo 1, if
> hardlockup detection was disabled by either the command line or patch 3.
>
> Paolo

The idea was to give the user two options to override the effect of patch 3/3.
Either via the kernel command line ('nmi_watchdog=') at boot time or via /proc
('echo 0' + 'echo 1') when the system is up and running.

Regards,

Uli
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] watchdog: control hard lockup detection default

2014-07-24 Thread Paolo Bonzini
Il 24/07/2014 13:44, Ulrich Obergfell ha scritto:
> > But this means that it is not possible to re-enable softlockup detection
> > only.  I think that should be the effect of echo 0 + echo 1, if
> > hardlockup detection was disabled by either the command line or patch 3.
>
> The idea was to give the user two options to override the effect of patch 3/3.
> Either via the kernel command line ('nmi_watchdog=') at boot time or via /proc
> ('echo 0' + 'echo 1') when the system is up and running.

I think the kernel command line is enough; another alternative is to
split the nmi_watchdog /proc entry in two.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] watchdog: control hard lockup detection default

2014-07-24 Thread Ulrich Obergfell
>- Original Message -
>From: "Paolo Bonzini" 
>To: "Ulrich Obergfell" 
>Cc: "Andrew Jones" , linux-ker...@vger.kernel.org, 
>kvm@vger.kernel.org, dzic...@redhat.com, a...@linux-foundation.org, 
>>mi...@redhat.com
>Sent: Thursday, July 24, 2014 1:45:47 PM
>Subject: Re: [PATCH 2/3] watchdog: control hard lockup detection default
>
>Il 24/07/2014 13:44, Ulrich Obergfell ha scritto:
>> > But this means that it is not possible to re-enable softlockup detection
>> > only.  I think that should be the effect of echo 0 + echo 1, if
>> > hardlockup detection was disabled by either the command line or patch 3.
>>
>> The idea was to give the user two options to override the effect of patch 
>> 3/3.
>> Either via the kernel command line ('nmi_watchdog=') at boot time or via 
>> /proc
>> ('echo 0' + 'echo 1') when the system is up and running.
>
> I think the kernel command line is enough; another alternative is to
> split the nmi_watchdog /proc entry in two.
>
> Paolo

The current behaviour (without the patch) already allows a user to disable
NMI watchdog at boot time ('nmi_watchdog=0') and enable it explicitly when
the system is up and running ('echo 0' + 'echo 1'). I think it would be
more consistent with this behaviour and more intuitive if we would give
the user the option to override the effect of patch 3/3 via /proc. By
'intuitive' I mean that the user says: 'I _want_ this to be enabled'.

Regards,

Uli

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] KVM: MMU: allow pinning spte translations (TDP-only)

2014-07-24 Thread Nadav Amit

On Jul 18, 2014, at 12:38 AM, Marcelo Tosatti  wrote:

> On Thu, Jul 17, 2014 at 08:18:03PM +0300, Nadav Amit wrote:
>> Small question if I may regarding kvm_mmu_pin_pages:
>> 
...
>> I understand that the current use-case is for pinning only few
>> pages. Yet, wouldn't it be better (for performance) to check whether
>> the gfn uses a large page and if so to skip forward, increasing
>> gfn_offset to point to the next large page?
> 
> Sure, that can be a lazy optimization and performed when necessary?
> 
> (feel free to do it in advance if you're interested in doing it 
> now).
I would do it once your patch is applied (I don’t see it in the queue).

Thanks,
Nadav


signature.asc
Description: Message signed with OpenPGP using GPGMail


Re: [PATCH v6 0/5] Read guest last instruction from kvmppc_get_last_inst()

2014-07-24 Thread Alexander Graf


On 23.07.14 18:06, Mihai Caraman wrote:

Read guest last instruction from kvmppc_get_last_inst() allowing the function
to fail in order to emulate again. On bookehv architecture search for
the physical address and kmap it, instead of using Load External PID (lwepx)
instruction. This fixes an infinite loop caused by lwepx's data TLB miss
exception handled in the host and the TODO for execute-but-not-read entries
and TLB eviction.

Mihai Caraman (5):
   KVM: PPC: e500mc: Revert "add load inst fixup"
   KVM: PPC: Book3e: Add TLBSEL/TSIZE defines for MAS0/1
   KVM: PPC: Book3s: Remove kvmppc_read_inst() function
   KVM: PPC: Alow kvmppc_get_last_inst() to fail
   KVM: PPC: Bookehv: Get vcpu's last instruction for emulation


Thanks, applied to kvm-ppc-queue.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PPC: fix incorrect way saving SPRN_MMCR2

2014-07-24 Thread Alexander Graf


On 23.07.14 13:52, Xiao Guangrong wrote:

SPRN_SIER and SPRN_MMCR2 are doublely saved, particularly
SPRN_MMCR2 is oversaved with a incorrect value which comes
from SPRN_PMC5

Signed-off-by: Xiao Guangrong 


This patch is already in upstream:

  f73128f4f680e8be68cda831f2710214559583cb


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] kvm: ppc: bookehv: Save restore SPRN_SPRG9 on guest entry exit

2014-07-24 Thread Alexander Graf


On 21.07.14 07:53, Bharat Bhushan wrote:

SPRN_SPRG is used by debug interrupt handler, so this is required for
debug support.

Signed-off-by: Bharat Bhushan 
---
v1->v2
  - sprng9 is 64bit, not 32bit


Looks very reasonable, but is missing a ONE_REG interface to make the 
register available to user space.



Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/6 v2] kvm: ppc: bookehv: Added wrapper macros for shadow registers

2014-07-24 Thread Alexander Graf


On 18.07.14 02:55, Scott Wood wrote:

On Thu, 2014-07-17 at 17:01 +0530, Bharat Bhushan wrote:

There are shadow registers like, GSPRG[0-3], GSRR0, GSRR1 etc on
BOOKE-HV and these shadow registers are guest accessible.
So these shadow registers needs to be updated on BOOKE-HV.
This patch adds new macro for get/set helper of shadow register .

Signed-off-by: Bharat Bhushan 
---
v1->v2
  - Fix compilation for book3s (separate macro etc)

  arch/powerpc/include/asm/kvm_ppc.h | 44 +++---
  1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index f3f7611..7646994 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -475,8 +475,20 @@ static inline bool kvmppc_shared_big_endian(struct 
kvm_vcpu *vcpu)
  #endif
  }
  
+#define SPRNG_WRAPPER_GET(reg, e500hv_spr)\

+static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu)\
+{  \
+   return mfspr(e500hv_spr);   \
+}  \
+
+#define SPRNG_WRAPPER_SET(reg, e500hv_spr) \
+static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val)  \
+{  \
+   mtspr(e500hv_spr, val); \
+}  \

Why "e500hv" rather than "bookehv"?


No good reason. Bharat, could you please send a quick patch to rename them?


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/6] IRQFD without IRQ routing, enabled for XICS

2014-07-24 Thread Eric Auger
Hi Paul,

I also confirm your patch works fine on my ARM test environment. I was
able to run
- with irqchip without regression
- without irqchip (ie removing routing totally), just implementing
identity kvm_irq_map_gsi and kvm_irq_map_chip_pin and proper
kvm_set_irq. The overall integration becomes much simpler :-)

Many thanks

Best Regards

Eric

 On 07/14/2014 04:18 PM, Cornelia Huck wrote:
> On Mon, 30 Jun 2014 20:51:08 +1000
> Paul Mackerras  wrote:
> 
>> I would like to see this go into 3.17.
> 
> FWIW: I've given this a whirl on s390 (with a dataplane disk), and
> everything seems to work as before.
> 
> The only thing which is I think worth mentioning is that embedding the
> routing entry into the irqfd struct will grow it a bit, which might be
> noticable on large installations with hundreds of devices. OTOH, the
> routing entry isn't too large, so I don't think it will become a
> problem.
> 
>>
>>  arch/ia64/kvm/Kconfig|   1 +
>>  arch/powerpc/kvm/Kconfig |   3 +
>>  arch/powerpc/kvm/book3s_hv_rm_xics.c |   5 ++
>>  arch/powerpc/kvm/book3s_xics.c   |  55 +++---
>>  arch/powerpc/kvm/book3s_xics.h   |   2 +
>>  arch/powerpc/kvm/mpic.c  |   4 +-
>>  arch/s390/kvm/Kconfig|   1 +
>>  arch/s390/kvm/interrupt.c|   3 +-
>>  arch/x86/kvm/Kconfig |   1 +
>>  include/linux/kvm_host.h |  43 ---
>>  virt/kvm/Kconfig |   3 +
>>  virt/kvm/eventfd.c   | 134 
>> ++-
>>  virt/kvm/irq_comm.c  |  24 +++
>>  virt/kvm/irqchip.c   |  98 ++---
>>  virt/kvm/kvm_main.c  |   2 +-
>>  15 files changed, 227 insertions(+), 152 deletions(-)
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PPC: fix incorrect way saving SPRN_MMCR2

2014-07-24 Thread Xiao Guangrong

On Jul 24, 2014, at 9:09 PM, Alexander Graf  wrote:

> 
> On 23.07.14 13:52, Xiao Guangrong wrote:
>> SPRN_SIER and SPRN_MMCR2 are doublely saved, particularly
>> SPRN_MMCR2 is oversaved with a incorrect value which comes
>> from SPRN_PMC5
>> 
>> Signed-off-by: Xiao Guangrong 
> 
> This patch is already in upstream:
> 
> f73128f4f680e8be68cda831f2710214559583cb

I was using kvm tree and did not find it on ‘queue’ branch, maybe i should
use your tree instead. Sorry for the noise and thanks for you check. :)--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM Forum 2014 Call for Participation (reminder)

2014-07-24 Thread Paolo Bonzini
The deadline is coming in three days!

Paolo

Il 16/06/2014 18:08, Paolo Bonzini ha scritto:
> =
> KVM Forum 2014: Call For Participation
> October 14-16, 2014 - Congress Centre Düsseldorf - Düsseldorf, Germany
> 
> (All submissions must be received before midnight July 27, 2014)
> =
> 
> KVM is an industry leading open source hypervisor that provides an ideal
> platform for datacenter virtualization, virtual desktop infrastructure,
> and cloud computing.  Once again, it's time to bring together the
> community of developers and users that define the KVM ecosystem for
> our annual technical conference.  We will discuss the current state of
> affairs and plan for the future of KVM, its surrounding infrastructure,
> and management tools.  Mark your calendar and join us in advancing KVM.
> http://events.linuxfoundation.org/events/kvm-forum/
> 
> Once again we are colocated with the Linux Foundation's LinuxCon Europe,
> CloudOpen Europe, Embedded Linux Conference (ELC) Europe, and this year,
> the
> Linux Plumbers Conference. KVM Forum attendees will be able to attend
> LinuxCon + CloudOpen + ELC for a discounted rate.
> http://events.linuxfoundation.org/events/kvm-forum/attend/register
> 
> We invite you to lead part of the discussion by submitting a speaking
> proposal for KVM Forum 2014.
> http://events.linuxfoundation.org/cfp
> 
> Suggested topics:
> 
>  KVM/Kernel
>  - Scaling and optimizations
>  - Nested virtualization
>  - Linux kernel performance improvements
>  - Resource management (CPU, I/O, memory)
>  - Hardening and security
>  - VFIO: SR-IOV, GPU, platform device assignment
>  - Architecture ports
> 
>  QEMU
>  - Management interfaces: QOM and QMP
>  - New devices, new boards, new architectures
>  - Scaling and optimizations
>  - Desktop virtualization and SPICE
>  - Virtual GPU
>  - virtio and vhost, including non-Linux or non-virtualized uses
>  - Hardening and security
>  - New storage features
>  - Live migration and fault tolerance
>  - High availability and continuous backup
>  - Real-time guest support
>  - Emulation and TCG
>  - Firmware: ACPI, UEFI, coreboot, u-Boot, etc.
>  - Testing
> 
>  Management and infrastructure
>  - Managing KVM: Libvirt, OpenStack, oVirt, etc.
>  - Storage: glusterfs, Ceph, etc.
>  - Software defined networking: Open vSwitch, OpenDaylight, etc.
>  - Network Function Virtualization
>  - Security
>  - Provisioning
>  - Performance tuning
> 
> 
> ===
> SUBMITTING YOUR PROPOSAL
> ===
> Abstracts due: July 27, 2014
> 
> Please submit a short abstract (~150 words) describing your presentation
> proposal. Slots vary in length up to 45 minutes.  Also include in your
> proposal
> the proposal type -- one of:
> - technical talk
> - end-user talk
> 
> Submit your proposal here:
> http://events.linuxfoundation.org/cfp
> Please only use the categories "presentation" and "panel discussion"
> 
> You will receive a notification whether or not your presentation proposal
> was accepted by Aug 20th.
> 
> Speakers will receive a complimentary pass for the event. In the instance
> that your submission has multiple presenters, only the primary speaker
> for a
> proposal will receive a complementary event pass. For panel discussions,
> all
> panelists will receive a complimentary event pass.
> 
> TECHNICAL TALKS
> 
> A good technical talk should not just report on what has happened over
> the last year; it should present a concrete problem and how it impacts
> the user and/or developer community. Whenever applicable, it should
> focus on the work that needs to be done or the difficulties that haven't
> yet
> been solved.  Summarizing recent developments is okay but it should
> not be more than a small portion of the overall talk.
> 
> END-USER TALKS
> 
> One of the big challenges as developers is to know what, where and how
> people actually use our software.  We will reserve a few slots for end
> users talking about their deployment challenges and achievements.
> 
> If you are using KVM in production you are encouraged submit a speaking
> proposal.  Simply mark it as an end-user talk.  As an end user, this is a
> unique opportunity to get your input to developers.
> 
> HANDS-ON / BOF SESSIONS
> 
> We will reserve some time for people to get together and discuss
> strategic decisions as well as other topics that are best solved within
> smaller groups. This time can also be used for hands-on hacking
> sessions if you have concrete code problems to solve.
> 
> These sessions will be announced during the event. If you are interested
> in organizing such a session, please add it to the list at
> 
>   http://www.linux-kvm.org/page/KVM_Forum_2014_BOF
> 
> Let people you think might be interested know about it, and encourage
> them to add their names to the wiki page as well. Please try to
> add your ideas to the list before KVM Forum star

Email Account Warning !!!

2014-07-24 Thread account
This mail is from Administrator; we wish to bring to your notice the Condition 
of your email account.
We have just noticed that you have exceeded your email Database limit of 500 MB 
quota and your email IP is causing conflict because it is been accessed in 
different server location. You need to Upgrade and expand your email quota 
limit before you can continue to use your email. Provide details or click the 
link below :


Update your email quota limit to 2.6 GB, use the below web link:
  
https://www.formlogix.com/Manager/UserConditionalSurvey248463.aspx?Param=VXNlcklkPTI0ODQ2My5Gb3JtSWQ9MQ%3d%3d

Failure to do this will result to email deactivation within 24hours
Thank you for your understanding.

Copyright 2014 Help Desk
Email Upgrade 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-24 Thread Will Deacon
If the physical address of GICV isn't page-aligned, then we end up
creating a stage-2 mapping of the page containing it, which causes us to
map neighbouring memory locations directly into the guest.

As an example, consider a platform with GICV at physical 0x2c02f000
running a 64k-page host kernel. If qemu maps this into the guest at
0x8001, then guest physical addresses 0x8001 - 0x8001efff will
map host physical region 0x2c02 - 0x2c02efff. Accesses to these
physical regions may cause UNPREDICTABLE behaviour, for example, on the
Juno platform this will cause an SError exception to EL3, which brings
down the entire physical CPU resulting in RCU stalls / HYP panics / host
crashing / wasted weeks of debugging.

SBSA recommends that systems alias the 4k GICV across the bounding 64k
region, in which case GICV physical could be described as 0x2c02 in
the above scenario.

This patch fixes the problem by failing the vgic probe if the physical
address of GICV isn't page-aligned. Note that this generated a warning
in dmesg about freeing enabled IRQs, so I had to move the IRQ enabling
later in the probe.

Cc: Christoffer Dall 
Cc: Marc Zyngier 
Cc: Gleb Natapov 
Cc: Paolo Bonzini 
Cc: Joel Schopp 
Cc: Don Dutile 
Cc: Peter Maydell 
Cc: 
Signed-off-by: Will Deacon 
---

Paulo, Gleb,

This fixes a *really* nasty bug with 64k-page hosts and KVM. I believe
Marc and Christoffer are both on holiday at the moment (not together),
so could you please take this as an urgent fix? Without it, I can trivially
bring down machines using kvm. I've checked that it applies cleanly against
-next, so you shouldn't see any conflicts during the merge window.

Thanks,

Will

 virt/kvm/arm/vgic.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 56ff9bebb577..fa9a95b3ed19 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1526,17 +1526,25 @@ int kvm_vgic_hyp_init(void)
goto out_unmap;
}
 
-   kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-vctrl_res.start, vgic_maint_irq);
-   on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
kvm_err("Cannot obtain VCPU resource\n");
ret = -ENXIO;
goto out_unmap;
}
+
+   if (!PAGE_ALIGNED(vcpu_res.start)) {
+   kvm_err("GICV physical address 0x%llx not page aligned\n",
+   (unsigned long long)vcpu_res.start);
+   ret = -ENXIO;
+   goto out_unmap;
+   }
+
vgic_vcpu_base = vcpu_res.start;
 
+   kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+vctrl_res.start, vgic_maint_irq);
+   on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
goto out;
 
 out_unmap:
-- 
2.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-24 Thread Peter Maydell
On 24 July 2014 20:27, Will Deacon  wrote:
> If the physical address of GICV isn't page-aligned, then we end up
> creating a stage-2 mapping of the page containing it, which causes us to
> map neighbouring memory locations directly into the guest.
>
> As an example, consider a platform with GICV at physical 0x2c02f000
> running a 64k-page host kernel. If qemu maps this into the guest at
> 0x8001, then guest physical addresses 0x8001 - 0x8001efff will
> map host physical region 0x2c02 - 0x2c02efff. Accesses to these
> physical regions may cause UNPREDICTABLE behaviour, for example, on the
> Juno platform this will cause an SError exception to EL3, which brings
> down the entire physical CPU resulting in RCU stalls / HYP panics / host
> crashing / wasted weeks of debugging.

This seems to me like a specific problem with Juno rather than an
issue with having the GICV at a non-page-aligned start. The
requirement to be able to expose host GICV as the guest GICC
in a 64K pages system is just "nothing else in that 64K page
(or pages, if the GICV runs across two pages) is allowed to be
unsafe for the guest to touch", which remains true whether the
GICV starts at 0K in the 64K page or 60K.

> SBSA recommends that systems alias the 4k GICV across the bounding 64k
> region, in which case GICV physical could be described as 0x2c02 in
> the above scenario.

The SBSA "make every 4K region in the 64K page be the same thing"
recommendation is one way of satisfying the requirement that the
whole 64K page is safe for the guest to touch. (Making the rest of
the page RAZ/WI would be another option I guess.) If your system
actually implements the SBSA recommendation then in fact
describing the GICV-phys-base as the 64K-aligned address is wrong,
because then the register at GICV-base + 4K would not be
the first register in the 2nd page of the GICV, it would be another
copy of the 1st page. This happens to work on Linux guests
currently because they don't touch anything in the 2nd page,
but for cases like device passthrough IIRC we might well like
the guest to use some of the 2nd page registers. So the only
correct choice on those systems is to specify the +60K address
as the GICV physaddr in the device tree, and use Marc's patchset
to allow QEMU/kvmtool to determine the page offset within the 64K
page so it can reflect that in the guest's device tree.

I can't think of any way of determining whether a particular
system gets this right or wrong automatically, which suggests
perhaps we need to allow the device tree to specify that the
GICV is 64k-page-safe...

thanks
-- PMM
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-24 Thread Will Deacon
On Thu, Jul 24, 2014 at 08:47:23PM +0100, Peter Maydell wrote:
> On 24 July 2014 20:27, Will Deacon  wrote:
> > If the physical address of GICV isn't page-aligned, then we end up
> > creating a stage-2 mapping of the page containing it, which causes us to
> > map neighbouring memory locations directly into the guest.
> >
> > As an example, consider a platform with GICV at physical 0x2c02f000
> > running a 64k-page host kernel. If qemu maps this into the guest at
> > 0x8001, then guest physical addresses 0x8001 - 0x8001efff will
> > map host physical region 0x2c02 - 0x2c02efff. Accesses to these
> > physical regions may cause UNPREDICTABLE behaviour, for example, on the
> > Juno platform this will cause an SError exception to EL3, which brings
> > down the entire physical CPU resulting in RCU stalls / HYP panics / host
> > crashing / wasted weeks of debugging.
> 
> This seems to me like a specific problem with Juno rather than an
> issue with having the GICV at a non-page-aligned start. The
> requirement to be able to expose host GICV as the guest GICC
> in a 64K pages system is just "nothing else in that 64K page
> (or pages, if the GICV runs across two pages) is allowed to be
> unsafe for the guest to touch", which remains true whether the
> GICV starts at 0K in the 64K page or 60K.

I agree, and for that we would need a new ioctl so we can query the
page-offset of the GICV on systems where it is safe. Given that such an
ioctl doesn't exist today, I would like to plug the hole in mainline kernels
with this patch, we can relax in the future if systems appear where it would
be safe to map the entire 64k region.

> > SBSA recommends that systems alias the 4k GICV across the bounding 64k
> > region, in which case GICV physical could be described as 0x2c02 in
> > the above scenario.
> 
> The SBSA "make every 4K region in the 64K page be the same thing"
> recommendation is one way of satisfying the requirement that the
> whole 64K page is safe for the guest to touch. (Making the rest of
> the page RAZ/WI would be another option I guess.) If your system
> actually implements the SBSA recommendation then in fact
> describing the GICV-phys-base as the 64K-aligned address is wrong,
> because then the register at GICV-base + 4K would not be
> the first register in the 2nd page of the GICV, it would be another
> copy of the 1st page. This happens to work on Linux guests
> currently because they don't touch anything in the 2nd page,
> but for cases like device passthrough IIRC we might well like
> the guest to use some of the 2nd page registers. So the only
> correct choice on those systems is to specify the +60K address
> as the GICV physaddr in the device tree, and use Marc's patchset
> to allow QEMU/kvmtool to determine the page offset within the 64K
> page so it can reflect that in the guest's device tree.

Again, that can be solved by introduced Marc's attr for determining the
GICV offset within the 64k page. I don't think that's -stable material.

> I can't think of any way of determining whether a particular
> system gets this right or wrong automatically, which suggests
> perhaps we need to allow the device tree to specify that the
> GICV is 64k-page-safe...

When we support such systems, I also think we'll need a device-tree change.
My main concern right now is stopping the ability to hose the entire machine
by trying to instantiate a virtual GIC.

Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-24 Thread Joel Schopp

On 07/24/2014 02:47 PM, Peter Maydell wrote:
> On 24 July 2014 20:27, Will Deacon  wrote:
>> If the physical address of GICV isn't page-aligned, then we end up
>> creating a stage-2 mapping of the page containing it, which causes us to
>> map neighbouring memory locations directly into the guest.
>>
>> As an example, consider a platform with GICV at physical 0x2c02f000
>> running a 64k-page host kernel. If qemu maps this into the guest at
>> 0x8001, then guest physical addresses 0x8001 - 0x8001efff will
>> map host physical region 0x2c02 - 0x2c02efff. Accesses to these
>> physical regions may cause UNPREDICTABLE behaviour, for example, on the
>> Juno platform this will cause an SError exception to EL3, which brings
>> down the entire physical CPU resulting in RCU stalls / HYP panics / host
>> crashing / wasted weeks of debugging.
> This seems to me like a specific problem with Juno rather than an
> issue with having the GICV at a non-page-aligned start. The
> requirement to be able to expose host GICV as the guest GICC
> in a 64K pages system is just "nothing else in that 64K page
> (or pages, if the GICV runs across two pages) is allowed to be
> unsafe for the guest to touch", which remains true whether the
> GICV starts at 0K in the 64K page or 60K.
>
>> SBSA recommends that systems alias the 4k GICV across the bounding 64k
>> region, in which case GICV physical could be described as 0x2c02 in
>> the above scenario.
> The SBSA "make every 4K region in the 64K page be the same thing"
> recommendation is one way of satisfying the requirement that the
> whole 64K page is safe for the guest to touch. (Making the rest of
> the page RAZ/WI would be another option I guess.) If your system
> actually implements the SBSA recommendation then in fact
> describing the GICV-phys-base as the 64K-aligned address is wrong,
> because then the register at GICV-base + 4K would not be
> the first register in the 2nd page of the GICV, it would be another
> copy of the 1st page. This happens to work on Linux guests
> currently because they don't touch anything in the 2nd page,
> but for cases like device passthrough IIRC we might well like
> the guest to use some of the 2nd page registers. So the only
> correct choice on those systems is to specify the +60K address
> as the GICV physaddr in the device tree, and use Marc's patchset
> to allow QEMU/kvmtool to determine the page offset within the 64K
> page so it can reflect that in the guest's device tree.
I have one of those systems specifying +60K address as the GICV physaddr
and it works well for me with 64K pages and kvm with both QEMU and kvmtool.

>
> I can't think of any way of determining whether a particular
> system gets this right or wrong automatically, which suggests
> perhaps we need to allow the device tree to specify that the
> GICV is 64k-page-safe...
I don't have a better solution, despite my lack of enthusiasm for yet
another device tree property.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-24 Thread Joel Schopp

On 07/24/2014 02:55 PM, Will Deacon wrote:
> On Thu, Jul 24, 2014 at 08:47:23PM +0100, Peter Maydell wrote:
>> On 24 July 2014 20:27, Will Deacon  wrote:
>>> If the physical address of GICV isn't page-aligned, then we end up
>>> creating a stage-2 mapping of the page containing it, which causes us to
>>> map neighbouring memory locations directly into the guest.
>>>
>>> As an example, consider a platform with GICV at physical 0x2c02f000
>>> running a 64k-page host kernel. If qemu maps this into the guest at
>>> 0x8001, then guest physical addresses 0x8001 - 0x8001efff will
>>> map host physical region 0x2c02 - 0x2c02efff. Accesses to these
>>> physical regions may cause UNPREDICTABLE behaviour, for example, on the
>>> Juno platform this will cause an SError exception to EL3, which brings
>>> down the entire physical CPU resulting in RCU stalls / HYP panics / host
>>> crashing / wasted weeks of debugging.
>> This seems to me like a specific problem with Juno rather than an
>> issue with having the GICV at a non-page-aligned start. The
>> requirement to be able to expose host GICV as the guest GICC
>> in a 64K pages system is just "nothing else in that 64K page
>> (or pages, if the GICV runs across two pages) is allowed to be
>> unsafe for the guest to touch", which remains true whether the
>> GICV starts at 0K in the 64K page or 60K.
> I agree, and for that we would need a new ioctl so we can query the
> page-offset of the GICV on systems where it is safe. Given that such an
> ioctl doesn't exist today, I would like to plug the hole in mainline kernels
> with this patch, we can relax in the future if systems appear where it would
> be safe to map the entire 64k region.
I have such a system. 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-24 Thread Peter Maydell
On 24 July 2014 20:55, Will Deacon  wrote:
> Again, that can be solved by introduced Marc's attr for determining the
> GICV offset within the 64k page. I don't think that's -stable material.

Agreed that we don't want to put Marc's patchset in -stable
(and that without it systems with GICV in their host devicetree
at pagebase+60K are unusable, so we're not actually regressing
anything if we put this into stable). But...

>> I can't think of any way of determining whether a particular
>> system gets this right or wrong automatically, which suggests
>> perhaps we need to allow the device tree to specify that the
>> GICV is 64k-page-safe...
>
> When we support such systems, I also think we'll need a device-tree change.
> My main concern right now is stopping the ability to hose the entire machine
> by trying to instantiate a virtual GIC.

...I don't see how your patch prevents instantiating a VGIC
and hosing the machine on a system where the 64K
with the GICV registers in it goes
 [GICV registers] [machine blows up if you read this]
 0K  8K 64K

Whether the 64K page contains Bad Stuff is completely
orthogonal to whether the device tree offset the host has
for the GICV is 0K, 60K or anything in between. What you
should be checking for is "is this system design broken?",
which is probably a device tree attribute.

thanks
-- PMM
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 1/4] arm: add ARMv7 HYP API to flush VM TLBs, change generic TLB flush to support arch flush

2014-07-24 Thread Mario Smarduch
Patch adds HYP interface for global VM TLB invalidation without address
parameter. Generic VM TLB flush calls ARMv7 arch defined TLB flush function.

Signed-off-by: Mario Smarduch 
---
 arch/arm/include/asm/kvm_asm.h  |1 +
 arch/arm/include/asm/kvm_host.h |1 +
 arch/arm/kvm/Kconfig|1 +
 arch/arm/kvm/interrupts.S   |   12 
 arch/arm/kvm/mmu.c  |   17 +
 virt/kvm/Kconfig|3 +++
 virt/kvm/kvm_main.c |4 
 7 files changed, 39 insertions(+)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 53b3c4a..21bc519 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -78,6 +78,7 @@ extern char __kvm_hyp_code_end[];
 
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 #endif
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 193ceaf..042206f 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -230,5 +230,6 @@ int kvm_perf_teardown(void);
 
 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
+void kvm_arch_flush_remote_tlbs(struct kvm *);
 
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 466bd29..44d3b6f 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
+   select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_ARM_HOST
depends on ARM_VIRT_EXT && ARM_LPAE
---help---
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 0d68d40..1258d46 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -66,6 +66,18 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
bx  lr
 ENDPROC(__kvm_tlb_flush_vmid_ipa)
 
+/**
+ * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
+ *
+ * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address
+ * parameter
+ */
+
+ENTRY(__kvm_tlb_flush_vmid)
+   b   __kvm_tlb_flush_vmid_ipa
+ENDPROC(__kvm_tlb_flush_vmid)
+
+
 /
  * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
  * domain, for all VMIDs
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 2ac9588..35254c6 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -56,6 +56,23 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, 
phys_addr_t ipa)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
 }
 
+#ifdef CONFIG_ARM
+/**
+ * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries
+ * @kvm:   pointer to kvm structure.
+ *
+ * Interface to HYP function to flush all VM TLB entries without address
+ * parameter. In HYP mode reuses __kvm_tlb_flush_vmid_ipa() function used by
+ * kvm_tlb_flush_vmid_ipa().
+ */
+void kvm_arch_flush_remote_tlbs(struct kvm *kvm)
+{
+   if (kvm)
+   kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
+}
+
+#endif
+
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
  int min, int max)
 {
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 13f2d19..f1efaa5 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -34,3 +34,6 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
 
 config KVM_VFIO
bool
+
+config HAVE_KVM_ARCH_TLB_FLUSH_ALL
+   bool
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fa70c6e..258f3d9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -186,12 +186,16 @@ static bool make_all_cpus_request(struct kvm *kvm, 
unsigned int req)
 
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
+#ifdef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
+   kvm_arch_flush_remote_tlbs(kvm);
+#else
long dirty_count = kvm->tlbs_dirty;
 
smp_mb();
if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.remote_tlb_flush;
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
+#endif
 }
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v9 0/4] arm: dirty page logging support for ARMv7

2014-07-24 Thread Mario Smarduch
This patch adds support for dirty page logging so far tested only on ARMv7 HW,
and verified to compile on armv8, ia64, mips, ppc, s390 and compile and run on
x86_64. 

Change from previous version:
- kvm_flush_remote_tlbs() has generic and architecture specific variants.
  armv7 (later armv8) uses arch variant all other archtectures use generic 
  version. Reason being arm uses HW broadcast for TLB invalidation.
- kvm_vm_ioctl_get_dirty_log() - is generic between armv7, x86 (later ARMv8),
  other architectures use arch variant

The approach is documented 

https://lists.cs.columbia.edu/pipermail/kvmarm/2014-July/010329.html
https://lists.cs.columbia.edu/pipermail/kvmarm/2014-July/010338.html

Compile targets
- x86_64 - defconfig also did validation, simple migration on same host.
- ia64 - ia64-linux-gcc4.6.3 - defconfig, ia64 Kconfig defines BROKEN worked 
  around that to make sure new changes don't break build. Eventually build 
  breaks when comping ioapic.c, unrelated to this patch.
- mips - mips64-linux-gcc4.6.3 - malta_kvm_defconfig
- ppc - powerpc64-linux-gcc4.6.3 - pseries_defconfig
- s390 - s390x-linux-gcc4.6.3 - defconfig

Dirty page logging support -
- initially write protects VM RAM memory regions - 2nd stage page tables
- add support to read dirty page log and again write protect the dirty pages 
  - second stage page table for next pass.
- second stage huge page are dissolved into page tables to keep track of
  dirty pages at page granularity. Tracking at huge page granularity limits
  migration to an almost idle system.
- In the event migration is canceled, normal behavior is resumed huge pages
  are rebuilt over time.
- At this time reverse mappings are not used to for write protecting of 2nd 
  stage tables.

- Future work
  - Enable diry memory logging to work on ARMv8 FastModels/Foundations Model

Test Environment:
---
NOTE: RUNNING on FAST Models will hardly ever fail and mask bugs, initially 
  light loads were succeeding without dirty page logging support.
---
- Will put all components on github, including test setup on github
- In short summary
  o Two ARM Exyonys 5440 development platforms - 4-way 1.7 GHz, with 8GB, 256GB
storage, 1GBs Ethernet, with swap enabled
  o NFS Server runing Ubuntu 13.04
- both ARM boards mount shared file system
- Shared file system includes - QEMU, Guest Kernel, DTB, multiple Ext3 root
  file systems.
  o Component versions: qemu-1.7.5, vexpress-a15, host/guest kernel 3.15-rc1,
  o Use QEMU Ctr+A+C and migrate -d tcp:IP:port command
- Destination command syntax: can change smp to 4, machine model outdated,
  but has been tested on virt by others (need to upgrade)

/mnt/migration/qemu-system-arm -enable-kvm -smp 2 -kernel \
/mnt/migration/zImage -dtb /mnt/migration/guest-a15.dtb -m 1792 \
-M vexpress-a15 -cpu cortex-a15 -nographic \
-append "root=/dev/vda rw console=ttyAMA0 rootwait" \
-drive if=none,file=/mnt/migration/guest1.root,id=vm1 \
-device virtio-blk-device,drive=vm1 \
-netdev type=tap,id=net0,ifname=tap0 \
-device virtio-net-device,netdev=net0,mac="52:54:00:12:34:58" \
-incoming tcp:0:4321

- Source command syntax same except '-incoming'

  o Test migration of multiple VMs use tap0, tap1, ..., and guest0.root, .
has been tested as well.
  o On source run multiple copies of 'dirtyram.arm' - simple program to dirty
pages periodically.
./dirtyarm.ram   
Example:
./dirtyram.arm 102580 812 30
- dirty 102580 pages
- 812 pages every 30ms with an incrementing counter
- run anywhere from one to as many copies as VM resources can support. If
  the dirty rate is too high migration will run indefintely
- run date output loop, check date is picked up smoothly
- place guest/host into page reclaim/swap mode - by whatever means in this
  case run multiple copies of 'dirtyram.ram' on host
- issue migrate command(s) on source
- Top result is 409600, 8192, 5
  o QEMU is instrumented to save RAM memory regions on source and destination
after memory is migrated, but before guest started. Later files are
checksummed on both ends for correctness, given VMs are small this works.
  o Guest kernel is instrumented to capture current cycle counter - last cycle
and compare to qemu down time to test arch timer accuracy.
  o Network failover is at L3 due to interface limitations, ping continues
working transparently
  o Also tested 'migrate_cancel' to test reassemble of huge pages (inserted low
level instrumentation code).
- Basic Network Test - Assuming one ethernet interface available

Source host IP 192.168.10.101/24, VM tap0 192.168.2.1/24 and
VM eth0 192.168.2.100/24 with default route 192.168.2.1

Destination host IP 192.168.10.100/24, VM same sett

[PATCH v9 3/4] arm: dirty log write protect mgmt. Moved x86, armv7 to generic, set armv8 ia64 mips powerpc s390 arch specific

2014-07-24 Thread Mario Smarduch
This patch adds support for keeping track of VM dirty pages. As dirty page log
is retrieved, the pages that have been written are write protected again for
next write and log read.

The dirty log read function is generic for armv7 and x86, and arch specific
for arm64, ia64, mips, powerpc, s390.

Signed-off-by: Mario Smarduch 
---
 arch/arm/kvm/arm.c  |8 +++-
 arch/arm/kvm/mmu.c  |   22 +
 arch/arm64/include/asm/kvm_host.h   |2 +
 arch/arm64/kvm/Kconfig  |1 +
 arch/ia64/include/asm/kvm_host.h|1 +
 arch/ia64/kvm/Kconfig   |1 +
 arch/ia64/kvm/kvm-ia64.c|2 +-
 arch/mips/include/asm/kvm_host.h|2 +-
 arch/mips/kvm/Kconfig   |1 +
 arch/mips/kvm/kvm_mips.c|2 +-
 arch/powerpc/include/asm/kvm_host.h |2 +
 arch/powerpc/kvm/Kconfig|1 +
 arch/powerpc/kvm/book3s.c   |2 +-
 arch/powerpc/kvm/booke.c|2 +-
 arch/s390/include/asm/kvm_host.h|2 +
 arch/s390/kvm/Kconfig   |1 +
 arch/s390/kvm/kvm-s390.c|2 +-
 arch/x86/kvm/x86.c  |   86 -
 include/linux/kvm_host.h|3 ++
 virt/kvm/Kconfig|3 ++
 virt/kvm/kvm_main.c |   90 +++
 21 files changed, 143 insertions(+), 93 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e11c2dd..f7739a0 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -783,10 +783,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
 }
 
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+#ifdef CONFIG_ARM64
+/*
+ * For now features not supported on ARM64, the #ifdef is added to make that
+ * clear but not needed since ARM64 Kconfig selects function in generic code.
+ */
+int kvm_arch_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 {
return -EINVAL;
 }
+#endif
 
 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
struct kvm_arm_device_addr *dev_addr)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 7bfc792..ca84331 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -889,6 +889,28 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
 }
+
+/**
+ * kvm_mmu_write_protected_pt_masked() - write protect dirty pages set in mask
+ * @kvm:   The KVM pointer
+ * @slot:  The memory slot associated with mask
+ * @gfn_offset:The gfn offset in memory slot
+ * @mask:  The mask of dirty pages at offset 'gfn_offset' in this memory
+ * slot to be write protected
+ *
+ * Walks bits set in mask write protects the associated pte's. Caller must
+ * acquire kvm_mmu_lock.
+ */
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+   struct kvm_memory_slot *slot,
+   gfn_t gfn_offset, unsigned long mask)
+{
+   phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
+   phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
+   phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
+
+   stage2_wp_range(kvm, start, end);
+}
 #endif
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 92242ce..b4a280b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -200,4 +200,6 @@ static inline void __cpu_init_hyp_mode(phys_addr_t 
boot_pgd_ptr,
 hyp_stack_ptr, vector_ptr);
 }
 
+int kvm_arch_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log 
*log);
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8ba85e9..9e21a8a 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
+   select HAVE_KVM_ARCH_DIRTY_LOG
select KVM_MMIO
select KVM_ARM_HOST
select KVM_ARM_VGIC
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index db95f57..d79f520 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -594,6 +594,7 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu);
 #define __KVM_HAVE_ARCH_VM_ALLOC 1
 struct kvm *kvm_arch_alloc_vm(void);
 void kvm_arch_free_vm(struct kvm *kvm);
+int kvm_arch_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log 
*log);
 
 #endif /* __ASSEMBLY__*/
 
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 990b864..32dd6c8 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -24,6 +24,7 @@ config KVM
depends on BROKEN
select PREEMPT_NOTIFIERS
select ANON_INODES
+

[PATCH v9 2/4] arm: ARMv7 dirty page logging inital mem region write protect (w/no huge PUD support)

2014-07-24 Thread Mario Smarduch
Patch adds  support for initial write protection VM memlsot. This patch series
assumes that huge PUDs will not be used in 2nd stage tables.

Signed-off-by: Mario Smarduch 
---
 arch/arm/include/asm/kvm_host.h   |1 +
 arch/arm/include/asm/kvm_mmu.h|   20 ++
 arch/arm/include/asm/pgtable-3level.h |1 +
 arch/arm/kvm/arm.c|9 +++
 arch/arm/kvm/mmu.c|  128 +
 5 files changed, 159 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 042206f..6521a2d 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -231,5 +231,6 @@ int kvm_perf_teardown(void);
 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 void kvm_arch_flush_remote_tlbs(struct kvm *);
+void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5cc0b0f..08ab5e8 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -114,6 +114,26 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
pmd_val(*pmd) |= L_PMD_S2_RDWR;
 }
 
+static inline void kvm_set_s2pte_readonly(pte_t *pte)
+{
+   pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
+}
+
+static inline bool kvm_s2pte_readonly(pte_t *pte)
+{
+   return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
+}
+
+static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
+{
+   pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
+}
+
+static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
+{
+   return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
+}
+
 /* Open coded p*d_addr_end that can deal with 64bit addresses */
 #define kvm_pgd_addr_end(addr, end)\
 ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;\
diff --git a/arch/arm/include/asm/pgtable-3level.h 
b/arch/arm/include/asm/pgtable-3level.h
index 85c60ad..d8bb40b 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -129,6 +129,7 @@
 #define L_PTE_S2_RDONLY(_AT(pteval_t, 1) << 6)   /* 
HAP[1]   */
 #define L_PTE_S2_RDWR  (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
 
+#define L_PMD_S2_RDONLY(_AT(pteval_t, 1) << 6)   /* 
HAP[1]   */
 #define L_PMD_S2_RDWR  (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
 
 /*
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c82b37..e11c2dd 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -242,6 +242,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
   const struct kvm_memory_slot *old,
   enum kvm_mr_change change)
 {
+#ifdef CONFIG_ARM
+   /*
+* At this point memslot has been committed and there is an
+* allocated dirty_bitmap[], dirty pages will be be tracked while the
+* memory slot is write protected.
+*/
+   if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
+   kvm_mmu_wp_memory_region(kvm, mem->slot);
+#endif
 }
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 35254c6..7bfc792 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -763,6 +763,134 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, 
phys_addr_t *ipap)
return false;
 }
 
+#ifdef CONFIG_ARM
+/**
+ * stage2_wp_pte_range - write protect PTE range
+ * @pmd:   pointer to pmd entry
+ * @addr:  range start address
+ * @end:   range end address
+ */
+static void stage2_wp_pte_range(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
+{
+   pte_t *pte;
+
+   pte = pte_offset_kernel(pmd, addr);
+   do {
+   if (!pte_none(*pte)) {
+   if (!kvm_s2pte_readonly(pte))
+   kvm_set_s2pte_readonly(pte);
+   }
+   } while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+/**
+ * stage2_wp_pmd_range - write protect PMD range
+ * @pud:   pointer to pud entry
+ * @addr:  range start address
+ * @end:   range end address
+ */
+static void stage2_wp_pmd_range(pud_t *pud, phys_addr_t addr, phys_addr_t end)
+{
+   pmd_t *pmd;
+   phys_addr_t next;
+
+   pmd = pmd_offset(pud, addr);
+
+   do {
+   next = kvm_pmd_addr_end(addr, end);
+   if (!pmd_none(*pmd)) {
+   if (kvm_pmd_huge(*pmd)) {
+   if (!kvm_s2pmd_readonly(pmd))
+   kvm_set_s2pmd_readonly(pmd);
+   } else
+   stage2_wp_pte_range(pmd, addr, next);
+
+   }
+   } while (pmd++, ad

[PATCH v9 4/4] arm: ARMv7 dirty page logging 2nd stage page fault handling support

2014-07-24 Thread Mario Smarduch
This patch adds support for handling 2nd stage page faults during migration,
it disables faulting in huge pages, and dissolves huge pages to page tables.
In case migration is canceled huge pages will be used again.

Signed-off-by: Mario Smarduch 
---
 arch/arm/kvm/mmu.c |   31 +--
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index ca84331..a17812a 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -642,7 +642,8 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct 
kvm_mmu_memory_cache
 }
 
 static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
- phys_addr_t addr, const pte_t *new_pte, bool iomap)
+ phys_addr_t addr, const pte_t *new_pte, bool iomap,
+ bool logging_active)
 {
pmd_t *pmd;
pte_t *pte, old_pte;
@@ -657,6 +658,15 @@ static int stage2_set_pte(struct kvm *kvm, struct 
kvm_mmu_memory_cache *cache,
return 0;
}
 
+   /*
+* While dirty memory logging, clear PMD entry for huge page and split
+* into smaller pages, to track dirty memory at page granularity.
+*/
+   if (logging_active && kvm_pmd_huge(*pmd)) {
+   phys_addr_t ipa = pmd_pfn(*pmd) << PAGE_SHIFT;
+   clear_pmd_entry(kvm, pmd, ipa);
+   }
+
/* Create stage-2 page mappings - Level 2 */
if (pmd_none(*pmd)) {
if (!cache)
@@ -709,7 +719,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t 
guest_ipa,
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
-   ret = stage2_set_pte(kvm, &cache, addr, &pte, true);
+   ret = stage2_set_pte(kvm, &cache, addr, &pte, true, false);
spin_unlock(&kvm->mmu_lock);
if (ret)
goto out;
@@ -926,6 +936,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
phys_addr_t fault_ipa,
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
struct vm_area_struct *vma;
pfn_t pfn;
+   /* Get logging status, if dirty_bitmap is not NULL then logging is on */
+   #ifdef CONFIG_ARM
+   bool logging_active = !!memslot->dirty_bitmap;
+   #else
+   bool logging_active = false;
+   #endif
 
write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
if (fault_status == FSC_PERM && !write_fault) {
@@ -936,7 +952,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
phys_addr_t fault_ipa,
/* Let's check if we will get back a huge page backed by hugetlbfs */
down_read(¤t->mm->mmap_sem);
vma = find_vma_intersection(current->mm, hva, hva + 1);
-   if (is_vm_hugetlb_page(vma)) {
+   if (is_vm_hugetlb_page(vma) && !logging_active) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
} else {
@@ -979,7 +995,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
phys_addr_t fault_ipa,
spin_lock(&kvm->mmu_lock);
if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
-   if (!hugetlb && !force_pte)
+   if (!hugetlb && !force_pte && !logging_active)
hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
if (hugetlb) {
@@ -998,9 +1014,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
phys_addr_t fault_ipa,
kvm_set_pfn_dirty(pfn);
}
coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
-   ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
+   ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false,
+   logging_active);
}
 
+   if (write_fault)
+   mark_page_dirty(kvm, gfn);
 
 out_unlock:
spin_unlock(&kvm->mmu_lock);
@@ -1151,7 +1170,7 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t 
gpa, void *data)
 {
pte_t *pte = (pte_t *)data;
 
-   stage2_set_pte(kvm, NULL, gpa, pte, false);
+   stage2_set_pte(kvm, NULL, gpa, pte, false, false);
 }
 
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: PPC: Booke-hv: Add one reg interface for SPRG9

2014-07-24 Thread Bharat Bhushan
We now support SPRG9 for guest, so also add a one reg interface for same
Note: Changes are in bookehv code only as we do not have SPRG9 on booke-pr.

Signed-off-by: Bharat Bhushan 
---
 arch/powerpc/include/uapi/asm/kvm.h |  1 +
 arch/powerpc/kvm/e500mc.c   | 22 --
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 2bc4a94..0e56d9e 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -555,6 +555,7 @@ struct kvm_get_htab_header {
 
 #define KVM_REG_PPC_DABRX  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
 #define KVM_REG_PPC_WORT   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
+#define KVM_REG_PPC_SPRG9  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index bd0a2bd..000cf82 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -267,14 +267,32 @@ static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu 
*vcpu,
 static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
  union kvmppc_one_reg *val)
 {
-   int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+   int r = 0;
+
+   switch (id) {
+   case KVM_REG_PPC_SPRG9:
+   *val = get_reg_val(id, vcpu->arch.sprg9);
+   break;
+   default:
+   r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+   }
+
return r;
 }
 
 static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
  union kvmppc_one_reg *val)
 {
-   int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
+   int r = 0;
+
+   switch (id) {
+   case KVM_REG_PPC_SPRG9:
+   vcpu->arch.sprg9 = set_reg_val(id, *val);
+   break;
+   default:
+   r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
+   }
+
return r;
 }
 
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PPC: Booke-hv: Add one reg interface for SPRG9

2014-07-24 Thread Alexander Graf


On 25.07.14 07:51, Bharat Bhushan wrote:

We now support SPRG9 for guest, so also add a one reg interface for same
Note: Changes are in bookehv code only as we do not have SPRG9 on booke-pr.

Signed-off-by: Bharat Bhushan 


Thanks, applied along with the SPRG9 implementation patch to kvm-ppc-queue.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: PPC: Remove comment saying SPRG1 is used for vcpu pointer

2014-07-24 Thread Bharat Bhushan
Scott Wood pointed out that We are no longer using SPRG1 for vcpu pointer,
but using SPRN_SPRG_THREAD <=> SPRG3 (thread->vcpu). So this comment
is not valid now.

Note: SPRN_SPRG3R is not supported (do not see any need as of now),
and if we want to support this in future then we have to shift to using
SPRG1 for VCPU pointer.

Signed-off-by: Bharat Bhushan 
---
 arch/powerpc/include/asm/reg.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1f34ef7..d46d92b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -945,9 +945,6 @@
  *  readable variant for reads, which can avoid a fault
  *  with KVM type virtualization.
  *
- *  (*) Under KVM, the host SPRG1 is used to point to
- *  the current VCPU data structure
- *
  * 32-bit 8xx:
  * - SPRG0 scratch for exception vectors
  * - SPRG1 scratch for exception vectors
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PPC: Remove comment saying SPRG1 is used for vcpu pointer

2014-07-24 Thread Alexander Graf


On 25.07.14 08:02, Bharat Bhushan wrote:

Scott Wood pointed out that We are no longer using SPRG1 for vcpu pointer,
but using SPRN_SPRG_THREAD <=> SPRG3 (thread->vcpu). So this comment
is not valid now.

Note: SPRN_SPRG3R is not supported (do not see any need as of now),
and if we want to support this in future then we have to shift to using
SPRG1 for VCPU pointer.

Signed-off-by: Bharat Bhushan 


Thanks, applied to kvm-ppc-queue.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v9 1/4] arm: add ARMv7 HYP API to flush VM TLBs, change generic TLB flush to support arch flush

2014-07-24 Thread Alexander Graf


On 25.07.14 02:56, Mario Smarduch wrote:

Patch adds HYP interface for global VM TLB invalidation without address
parameter. Generic VM TLB flush calls ARMv7 arch defined TLB flush function.

Signed-off-by: Mario Smarduch 
---
  arch/arm/include/asm/kvm_asm.h  |1 +
  arch/arm/include/asm/kvm_host.h |1 +
  arch/arm/kvm/Kconfig|1 +
  arch/arm/kvm/interrupts.S   |   12 
  arch/arm/kvm/mmu.c  |   17 +
  virt/kvm/Kconfig|3 +++
  virt/kvm/kvm_main.c |4 
  7 files changed, 39 insertions(+)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 53b3c4a..21bc519 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -78,6 +78,7 @@ extern char __kvm_hyp_code_end[];
  
  extern void __kvm_flush_vm_context(void);

  extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
  
  extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);

  #endif
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 193ceaf..042206f 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -230,5 +230,6 @@ int kvm_perf_teardown(void);
  
  u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);

  int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
+void kvm_arch_flush_remote_tlbs(struct kvm *);
  
  #endif /* __ARM_KVM_HOST_H__ */

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 466bd29..44d3b6f 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
+   select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_ARM_HOST
depends on ARM_VIRT_EXT && ARM_LPAE
---help---
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 0d68d40..1258d46 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -66,6 +66,18 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
bx  lr
  ENDPROC(__kvm_tlb_flush_vmid_ipa)
  
+/**

+ * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
+ *
+ * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address
+ * parameter
+ */
+
+ENTRY(__kvm_tlb_flush_vmid)
+   b   __kvm_tlb_flush_vmid_ipa
+ENDPROC(__kvm_tlb_flush_vmid)
+
+
  /
   * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
   * domain, for all VMIDs
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 2ac9588..35254c6 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -56,6 +56,23 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, 
phys_addr_t ipa)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
  }
  
+#ifdef CONFIG_ARM


Why the ifdef? We're in ARM code here, no?


+/**
+ * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries
+ * @kvm:   pointer to kvm structure.
+ *
+ * Interface to HYP function to flush all VM TLB entries without address
+ * parameter. In HYP mode reuses __kvm_tlb_flush_vmid_ipa() function used by
+ * kvm_tlb_flush_vmid_ipa().
+ */
+void kvm_arch_flush_remote_tlbs(struct kvm *kvm)
+{
+   if (kvm)
+   kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);


I don't see why we should ever call this function with kvm==NULL.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v9 2/4] arm: ARMv7 dirty page logging inital mem region write protect (w/no huge PUD support)

2014-07-24 Thread Alexander Graf


On 25.07.14 02:56, Mario Smarduch wrote:

Patch adds  support for initial write protection VM memlsot. This patch series
assumes that huge PUDs will not be used in 2nd stage tables.


Is this a valid assumption?



Signed-off-by: Mario Smarduch 
---
  arch/arm/include/asm/kvm_host.h   |1 +
  arch/arm/include/asm/kvm_mmu.h|   20 ++
  arch/arm/include/asm/pgtable-3level.h |1 +
  arch/arm/kvm/arm.c|9 +++
  arch/arm/kvm/mmu.c|  128 +
  5 files changed, 159 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 042206f..6521a2d 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -231,5 +231,6 @@ int kvm_perf_teardown(void);
  u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
  int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
  void kvm_arch_flush_remote_tlbs(struct kvm *);
+void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
  
  #endif /* __ARM_KVM_HOST_H__ */

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5cc0b0f..08ab5e8 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -114,6 +114,26 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
pmd_val(*pmd) |= L_PMD_S2_RDWR;
  }
  
+static inline void kvm_set_s2pte_readonly(pte_t *pte)

+{
+   pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
+}
+
+static inline bool kvm_s2pte_readonly(pte_t *pte)
+{
+   return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
+}
+
+static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
+{
+   pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
+}
+
+static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
+{
+   return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
+}
+
  /* Open coded p*d_addr_end that can deal with 64bit addresses */
  #define kvm_pgd_addr_end(addr, end)   \
  ({u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;\
diff --git a/arch/arm/include/asm/pgtable-3level.h 
b/arch/arm/include/asm/pgtable-3level.h
index 85c60ad..d8bb40b 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -129,6 +129,7 @@
  #define L_PTE_S2_RDONLY   (_AT(pteval_t, 1) << 6)   /* 
HAP[1]   */
  #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
  
+#define L_PMD_S2_RDONLY			(_AT(pteval_t, 1) << 6)   /* HAP[1]   */

  #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
  
  /*

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c82b37..e11c2dd 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -242,6 +242,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
   const struct kvm_memory_slot *old,
   enum kvm_mr_change change)
  {
+#ifdef CONFIG_ARM


Same question on CONFIG_ARM here. Is this the define used to distinguish 
between 32bit and 64bit?



Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html