Re: [PATCH v4] kvm: better MWAIT emulation for guests

2017-03-16 Thread kbuild test robot
Hi Michael,

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v4.11-rc2 next-20170310]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Michael-S-Tsirkin/kvm-better-MWAIT-emulation-for-guests/20170316-143518
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: x86_64-randconfig-x010-201711 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All error/warnings (new ones prefixed by >>):

   In file included from arch/x86/kvm/x86.c:28:0:
   arch/x86/kvm/x86.h: In function 'kvm_mwait_in_guest':
>> arch/x86/kvm/x86.h:231:34: error: 'CPUID_MWAIT_LEAF' undeclared (first use 
>> in this function)
 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
 ^~~~
   arch/x86/kvm/x86.h:231:34: note: each undeclared identifier is reported only 
once for each function it appears in
>> arch/x86/kvm/x86.h:234:45: error: 'mwait_substates' undeclared (first use in 
>> this function)
 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
^~~
>> arch/x86/kvm/x86.h:236:14: error: 'CPUID5_ECX_INTERRUPT_BREAK' undeclared 
>> (first use in this function)
 if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
 ^~
>> arch/x86/kvm/x86.h:238:1: warning: control reaches end of non-void function 
>> [-Wreturn-type]
}
^
--
   In file included from arch/x86/kvm/mmu.c:23:0:
   arch/x86/kvm/x86.h: In function 'kvm_mwait_in_guest':
>> arch/x86/kvm/x86.h:231:34: error: 'CPUID_MWAIT_LEAF' undeclared (first use 
>> in this function)
 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
 ^~~~
   arch/x86/kvm/x86.h:231:34: note: each undeclared identifier is reported only 
once for each function it appears in
>> arch/x86/kvm/x86.h:234:45: error: 'mwait_substates' undeclared (first use in 
>> this function)
 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
^~~
>> arch/x86/kvm/x86.h:236:14: error: 'CPUID5_ECX_INTERRUPT_BREAK' undeclared 
>> (first use in this function)
 if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
 ^~
   At top level:
   arch/x86/kvm/x86.h:216:13: warning: 'kvm_mwait_in_guest' defined but not 
used [-Wunused-function]
static bool kvm_mwait_in_guest(void)
^~

vim +/CPUID_MWAIT_LEAF +231 arch/x86/kvm/x86.h

   225  
   226  /*
   227   * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are 
problematic as
   228   * they would allow guest to stop the CPU completely by 
disabling
   229   * interrupts then invoking MWAIT.
   230   */
 > 231  if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
   232  return -ENODEV;
   233  
 > 234  cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
   235  
 > 236  if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
   237  return -ENODEV;
 > 238  }
   239  
   240  #endif

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH v4] kvm: better MWAIT emulation for guests

2017-03-16 Thread kbuild test robot
Hi Michael,

[auto build test WARNING on kvm/linux-next]
[also build test WARNING on v4.11-rc2 next-20170310]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Michael-S-Tsirkin/kvm-better-MWAIT-emulation-for-guests/20170316-143518
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: i386-randconfig-x018-201711 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from include/uapi/linux/posix_types.h:4,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/kvm_host.h:9,
from arch/x86/kvm/x86.c:22:
   arch/x86/kvm/x86.h: In function 'kvm_mwait_in_guest':
   arch/x86/kvm/x86.h:231:34: error: 'CPUID_MWAIT_LEAF' undeclared (first use 
in this function)
 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
 ^
   include/linux/compiler.h:160:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^~~~
>> arch/x86/kvm/x86.h:231:2: note: in expansion of macro 'if'
 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
 ^~
   arch/x86/kvm/x86.h:231:34: note: each undeclared identifier is reported only 
once for each function it appears in
 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
 ^
   include/linux/compiler.h:160:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^~~~
>> arch/x86/kvm/x86.h:231:2: note: in expansion of macro 'if'
 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
 ^~
   In file included from arch/x86/kvm/x86.c:28:0:
   arch/x86/kvm/x86.h:234:45: error: 'mwait_substates' undeclared (first use in 
this function)
 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
^~~
   In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from include/uapi/linux/posix_types.h:4,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/kvm_host.h:9,
from arch/x86/kvm/x86.c:22:
   arch/x86/kvm/x86.h:236:14: error: 'CPUID5_ECX_INTERRUPT_BREAK' undeclared 
(first use in this function)
 if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
 ^
   include/linux/compiler.h:160:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^~~~
   arch/x86/kvm/x86.h:236:2: note: in expansion of macro 'if'
 if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
 ^~
   In file included from arch/x86/kvm/x86.c:28:0:
   arch/x86/kvm/x86.h:238:1: warning: control reaches end of non-void function 
[-Wreturn-type]
}
^
--
   In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from include/uapi/linux/posix_types.h:4,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/mm_types_task.h:10,
from include/linux/mm_types.h:4,
from arch/x86/kvm/irq.h:25,
from arch/x86/kvm/mmu.c:21:
   arch/x86/kvm/x86.h: In function 'kvm_mwait_in_guest':
   arch/x86/kvm/x86.h:231:34: error: 'CPUID_MWAIT_LEAF' undeclared (first use 
in this function)
 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
 ^
   include/linux/compiler.h:160:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^~~~
>> arch/x86/kvm/x86.h:231:2: note: in expansion of macro 'if'
 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
 ^~
   arch/x86/kvm/x86.h:231:34: note: each undeclared identifier is reported only 
once for each function it appears in
 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
 ^
   include/linux/compiler.h:160:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^~~~
>> 

Re: [PATCH v7 1/3] x86/mm: Adapt MODULES_END based on Fixmap section size

2017-03-16 Thread Ingo Molnar

* Thomas Garnier  wrote:

> This patch aligns MODULES_END to the beginning of the Fixmap section.
> It optimizes the space available for both sections. The address is
> pre-computed based on the number of pages required by the Fixmap
> section.
> 
> It will allow GDT remapping in the Fixmap section. The current
> MODULES_END static address does not provide enough space for the kernel
> to support a large number of processors.
> 
> Signed-off-by: Thomas Garnier 
> ---
> Based on next-20170308
> ---
>  Documentation/x86/x86_64/mm.txt | 5 -
>  arch/x86/include/asm/pgtable_64_types.h | 3 ++-
>  arch/x86/kernel/module.c| 1 +
>  arch/x86/mm/dump_pagetables.c   | 1 +
>  arch/x86/mm/kasan_init_64.c | 1 +
>  mm/vmalloc.c| 1 +

> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -35,6 +35,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "internal.h"

Note that asm/fixmap.h is an x86-ism that isn't present in many other 
architectures, so this hunk will break the build.

To make progress with these patches I've fixed it up with an ugly #ifdef 
CONFIG_X86, but it needs a real solution instead before this can be pushed 
upstream.

Thanks,

Ingo

=>
 mm/vmalloc.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index dabea6a29fad..b7d2a23349f4 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -35,7 +35,10 @@
 #include 
 #include 
 #include 
-#include 
+
+#ifdef CONFIG_X86
+# include 
+#endif
 
 #include "internal.h"
 
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH linux v4 1/2] Documentation: dt-bindings: Document bindings for ASPEED AST2400/AST2500 PWM and Fan tach controller device driver

2017-03-16 Thread ELSOFT AG
Hello

Jaghathiswari Rankappagounder Natarajan wrote:

> +- compatible : should be "aspeed,aspeed2400-pwm-tacho" for AST2400
> or + "aspeed,aspeed2500-pwm-tacho" for AST2500.

Shouldn't this be "aspeed,ast2400-pwm-tacho" and
"aspeed,ast2500-pwm-tacho", as this is the naming scheme used for all
the other AST2x00 devices currently present in "aspeed-g[45].dtsi"?

Dave

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] lib/Kconfig.debug: correct documentation paths

2017-03-16 Thread Hans Holmberg
On Fri, Feb 24, 2017 at 10:21:35AM -0700, Jonathan Corbet wrote:
> On Fri, 24 Feb 2017 10:36:20 +0200
> Krzysztof Kozlowski  wrote:
> 
> > What happened with this patch? I am asking because on top of
> > next-20170224 I found some more sysrq.txt obsolete paths... including
> > the ones fixed here. I am not sure whether I am fixing something
> > already fixed?
> 
> What happened, most likely, is that I saw /lib/Kconfig.debug and assumed
> it was one of the many patches sent my way that aren't really up to me to
> handle.  But nobody else really owns /lib either, I guess.  I've dug the
> patch out of my archive and will get it in.
> 

Great! I just noticed that the patch does not apply, so i'll rebase it and
send out a V2.

Thanks,
Hans
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] lib/Kconfig.debug: correct documentation paths

2017-03-16 Thread Hans Holmberg
A bunch of documentation files have moved, correct the paths.

Signed-off-by: Hans Holmberg 
---

Changes in v2:
   - Rebased on top of 4.11-rc2
 (one of the paths was fixed by another patch)

 lib/Kconfig.debug | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 97d62c2..22c75bc 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -130,7 +130,8 @@ config DYNAMIC_DEBUG
nullarbor:~ # echo -n 'func svc_process -p' >
/dynamic_debug/control
 
- See Documentation/dynamic-debug-howto.txt for additional information.
+ See Documentation/admin-guide/dynamic-debug-howto.rst for additional
+ information.
 
 endmenu # "printk and dmesg options"
 
@@ -404,8 +405,8 @@ config MAGIC_SYSRQ
  by pressing various keys while holding SysRq (Alt+PrintScreen). It
  also works on a serial console (on PC hardware at least), if you
  send a BREAK and then within 5 seconds a command keypress. The
- keys are documented in . Don't say Y
- unless you really know what this hack does.
+ keys are documented in .
+ Don't say Y unless you really know what this hack does.
 
 config MAGIC_SYSRQ_DEFAULT_ENABLE
hex "Enable magic SysRq key functions by default"
@@ -414,7 +415,7 @@ config MAGIC_SYSRQ_DEFAULT_ENABLE
help
  Specifies which SysRq key functions are enabled by default.
  This may be set to 1 or 0 to enable or disable them all, or
- to a bitmask as described in Documentation/sysrq.txt.
+ to a bitmask as described in Documentation/admin-guide/sysrq.rst.
 
 config MAGIC_SYSRQ_SERIAL
bool "Enable magic SysRq key over serial"
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4] kvm: better MWAIT emulation for guests

2017-03-16 Thread Wanpeng Li
2017-03-16 4:13 GMT+08:00 Radim Krčmář :
> 2017-03-15 21:28+0200, Michael S. Tsirkin:
>> Guests running Mac OS 5, 6, and 7 (Leopard through Lion) have a problem:
>> unless explicitly provided with kernel command line argument
>> "idlehalt=0" they'd implicitly assume MONITOR and MWAIT availability,
>> without checking CPUID.
>>
>> We currently emulate that as a NOP but on VMX we can do better: let
>> guest stop the CPU until timer, IPI or memory change.  CPU will be busy
>> but that isn't any worse than a NOP emulation.
>>
>> Note that mwait within guests is not the same as on real hardware
>> because halt causes an exit while mwait doesn't.  For this reason it
>> might not be a good idea to use the regular MWAIT flag in CPUID to
>> signal this capability.  Add a flag in the hypervisor leaf instead.
>>
>> Additionally, we add a capability for QEMU - e.g. if it knows there's an
>> isolated CPU dedicated for the VCPU it can set the standard MWAIT flag
>> to improve guest behaviour.
>>
>> Reported-by: "Gabriel L. Somlo" 
>> Signed-off-by: Michael S. Tsirkin 
>> ---
>>
>> Note: SVM bits are untested at this point. Seems pretty
>> obvious though.
>>
>> changes from v3:
>> - don't enable capability if cli+mwait blocks interrupts
>> - doc typo fixes (drop drop ppc doc)
>>
>> changes from v2:
>> - add a capability to allow host userspace to detect new kernels
>> - more documentation to clarify the semantics of the feature flag
>>   and why it's useful
>> - svm support as suggested by Radim
>>
>> changes from v1:
>> - typo fix resulting in rest of leaf flags being overwritten
>>   Reported by: Wanpeng Li 
>> - updated commit log with data about guests helped by this feature
>> - better document differences between mwait and halt for guests
>>
>> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
>> @@ -212,4 +213,28 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, 
>> u64 nsec)
>>   __rem;  \
>>})
>>
>> +static bool kvm_mwait_in_guest(void)
>> +{
>> + unsigned int eax, ebx, ecx;
>> +
>> + if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
>> + return -ENODEV;
>> +
>> + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
>> + return -ENODEV;
>> +
>> + /*
>> +  * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
>> +  * they would allow guest to stop the CPU completely by disabling
>> +  * interrupts then invoking MWAIT.
>> +  */
>> + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
>> + return -ENODEV;
>> +
>> + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
>> +
>> + if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
>> + return -ENODEV;
>
> The guest is still able to set ecx=0 with MWAIT, which should be the

How can guest rewrite this?

Regards,
Wanpeng Li

> same as not having the CPUID flag, so I'm wondering how this check
> prevents anything harmful ... is it really a cpu "feature"?
>
> If we somehow report ecx bit 1 in CPUID[5], then the guest might try to
> set ecx bit 0 for MWAIT, which will cause #GP(0) and could explain the
> hang that Gabriel is hitting.
>
> Gabriel,
>
>  - do you see VM exits on the "hung" VCPU?
>  - what is your CPU model?
>  - what do you get after running this C program on host and guest?
>
>#include 
>#include 
>
>int main(void) {
> uint32_t eax = 5, ebx, ecx = 0, edx;
> asm ("cpuid" : "+a"(eax), "=b"(ebx), "+c"(ecx), "=d"(edx));
>
> printf("eax=%#08x ebx=%#08x ecx=%#08x edx=%#08x\n", eax, ebx, ecx, 
> edx);
>
> return 0;
>}
>
> Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 01:41:28AM +0200, Michael S. Tsirkin wrote:
> On Wed, Mar 15, 2017 at 07:35:34PM -0400, Gabriel L. Somlo wrote:
> > On Wed, Mar 15, 2017 at 11:22:18PM +0200, Michael S. Tsirkin wrote:
> > > Guests running Mac OS 5, 6, and 7 (Leopard through Lion) have a problem:
> > > unless explicitly provided with kernel command line argument
> > > "idlehalt=0" they'd implicitly assume MONITOR and MWAIT availability,
> > > without checking CPUID.
> > > 
> > > We currently emulate that as a NOP but on VMX we can do better: let
> > > guest stop the CPU until timer, IPI or memory change.  CPU will be busy
> > > but that isn't any worse than a NOP emulation.
> > > 
> > > Note that mwait within guests is not the same as on real hardware
> > > because halt causes an exit while mwait doesn't.  For this reason it
> > > might not be a good idea to use the regular MWAIT flag in CPUID to
> > > signal this capability.  Add a flag in the hypervisor leaf instead.
> > > 
> > > Additionally, we add a capability for QEMU - e.g. if it knows there's an
> > > isolated CPU dedicated for the VCPU it can set the standard MWAIT flag
> > > to improve guest behaviour.
> > 
> > Same behavior (on the mac pro 1,1 running F22 with custom-compiled
> > kernel from kvm git master, plus this patch on top).
> > 
> > The OS X 10.7 kernel hangs (or at least progresses extremely slowly)
> > on boot, does not bring up guest graphical interface within the first
> > 10 minutes that I waited for it. That, in contrast with the default
> > nop-based emulation where the guest comes up within 30 seconds.
> 
> 
> Thanks a lot, meanwhile I'll try to write a unit-test and experiment
> with various behaviours.
> 
> > I will run another round of tests on a newer Mac (4-year-old macbook
> > air) and report back tomorrow.
> > 
> > Going off on a tangent, why would encouraging otherwise well-behaved
> > guests (like linux ones, for example) to use MWAIT be desirable to
> > begin with ? Is it a matter of minimizing the overhead associated with
> > exiting and re-entering L1 ? Because if so, AFAIR staying inside L1 and
> > running guest-mode MWAIT in a tight loop will actually waste the host
> > CPU without the opportunity to yield to some other L0 thread. Sorry if
> > I fell into the middle of an ongoing conversation on this and missed
> > most of the relevant context, in which case please feel free to ignore
> > me... :)
> > 
> > Thanks,
> > --G
> 
> It's just some experiments I'm running, I'm not ready to describe it
> yet. I thought this part might be useful to at least some guests, so
> trying to upstream it right now.

OK, so on a macbook air running F25 and the latest kvm git master plus
your v5 patch (4.11.0-rc2+), things appear to work.

host-side cpuid output:
eax=0x40 ebx=0x40 ecx=0x03 edx=0x021120

guest-side cpuid output:
eax= ebx= ecx=0x03 edx=

processor   : 3
vendor_id   : GenuineIntel
cpu family  : 6
model   : 42
model name  : Intel(R) Core(TM) i7-2677M CPU @ 1.80GHz
stepping: 7
microcode   : 0x29
cpu MHz : 1157.849
cache size  : 4096 KB
physical id : 0
siblings: 4
core id : 1
cpu cores   : 2
apicid  : 3
initial apicid  : 3
fpu : yes
fpu_exception   : yes
cpuid level : 13
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm 
constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid 
aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr 
pdcm pcid sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx lahf_lm 
tpr_shadow vnmi flexpriority ept vpid xsaveopt dtherm ida arat pln pts
bugs:
bogomips: 3604.68
clflush size: 64
cache_alignment : 64
address sizes   : 36 bits physical, 48 bits virtual
power management:

After studying your patch a bit more carefully (sorry, it's crazy
around here right now :) ) I realized you're simply trying to
(selectively) decide when to exit L1 and emulate as NOP vs. when to
just allow L1 to execute MONITOR & MWAIT natively.

Is that right ? Because if so, the issues I saw on my MacPro1,1 are
weird and inexplicable, given that allowing L>=1 to run MONITOR/MWAIT
natively was one of the options Alex Graf and Rene Rebe used back in
the very early days of OS X on QEMU, at the time I got involved with
that project. Here's part of an out of tree patch against 3.4 which did
just that, and worked as far as I remember on *any* MWAIT capable
intel chip I had access to back in 2010:

##
# 99-mwait.patch.kvm-kmod (Rene Rebe ) 2010-04-27
##
diff -pNarU5 linux-3.4/arch/x86/kvm/cpuid.c linux-3.4-mac/arch/x86/kvm/cpuid.c
--- linux-3.4/arch/x86

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Michael S. Tsirkin
On Thu, Mar 16, 2017 at 09:24:27AM -0400, Gabriel L. Somlo wrote:
> After studying your patch a bit more carefully (sorry, it's crazy
> around here right now :) ) I realized you're simply trying to
> (selectively) decide when to exit L1 and emulate as NOP vs. when to
> just allow L1 to execute MONITOR & MWAIT natively.
> 
> Is that right ? Because if so, the issues I saw on my MacPro1,1 are
> weird and inexplicable, given that allowing L>=1 to run MONITOR/MWAIT
> natively was one of the options Alex Graf and Rene Rebe used back in
> the very early days of OS X on QEMU, at the time I got involved with
> that project. Here's part of an out of tree patch against 3.4 which did
> just that, and worked as far as I remember on *any* MWAIT capable
> intel chip I had access to back in 2010:
> 
> ##
> # 99-mwait.patch.kvm-kmod (Rene Rebe ) 2010-04-27
> ##
> diff -pNarU5 linux-3.4/arch/x86/kvm/cpuid.c linux-3.4-mac/arch/x86/kvm/cpuid.c
> --- linux-3.4/arch/x86/kvm/cpuid.c2012-05-20 18:29:13.0 -0400
> +++ linux-3.4-mac/arch/x86/kvm/cpuid.c2012-10-09 11:42:59.921215750 
> -0400
> @@ -222,11 +222,11 @@ static int do_cpuid_ent(struct kvm_cpuid
>   f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
>   F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
>   0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
>   /* cpuid 1.ecx */
>   const u32 kvm_supported_word4_x86_features =
> - F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> + F(XMM3) | F(PCLMULQDQ) | F(MWAIT) /* DTES64, MONITOR */ |
>   0 /* DS-CPL, VMX, SMX, EST */ |
>   0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
>   F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
>   0 /* Reserved, DCA */ | F(XMM4_1) |
>   F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> diff -pNarU5 linux-3.4/arch/x86/kvm/svm.c linux-3.4-mac/arch/x86/kvm/svm.c
> --- linux-3.4/arch/x86/kvm/svm.c  2012-05-20 18:29:13.0 -0400
> +++ linux-3.4-mac/arch/x86/kvm/svm.c  2012-10-09 11:44:41.598997481 -0400
> @@ -1102,12 +1102,10 @@ static void init_vmcb(struct vcpu_svm *s
>   set_intercept(svm, INTERCEPT_VMSAVE);
>   set_intercept(svm, INTERCEPT_STGI);
>   set_intercept(svm, INTERCEPT_CLGI);
>   set_intercept(svm, INTERCEPT_SKINIT);
>   set_intercept(svm, INTERCEPT_WBINVD);
> - set_intercept(svm, INTERCEPT_MONITOR);
> - set_intercept(svm, INTERCEPT_MWAIT);
>   set_intercept(svm, INTERCEPT_XSETBV);
>  
>   control->iopm_base_pa = iopm_base;
>   control->msrpm_base_pa = __pa(svm->msrpm);
>   control->int_ctl = V_INTR_MASKING_MASK;
> diff -pNarU5 linux-3.4/arch/x86/kvm/vmx.c linux-3.4-mac/arch/x86/kvm/vmx.c
> --- linux-3.4/arch/x86/kvm/vmx.c  2012-05-20 18:29:13.0 -0400
> +++ linux-3.4-mac/arch/x86/kvm/vmx.c  2012-10-09 11:42:59.925215977 -0400
> @@ -1938,11 +1938,11 @@ static __init void nested_vmx_setup_ctls
>   nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
>   nested_vmx_procbased_ctls_low = 0;
>   nested_vmx_procbased_ctls_high &=
>   CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING |
>   CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
> - CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
> + CPU_BASED_CR3_LOAD_EXITING |
>   CPU_BASED_CR3_STORE_EXITING |
>  #ifdef CONFIG_X86_64
>   CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
>  #endif
>   CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
> @@ -2404,12 +2404,10 @@ static __init int setup_vmcs_config(stru
> CPU_BASED_CR3_LOAD_EXITING |
> CPU_BASED_CR3_STORE_EXITING |
> CPU_BASED_USE_IO_BITMAPS |
> CPU_BASED_MOV_DR_EXITING |
> CPU_BASED_USE_TSC_OFFSETING |
> -   CPU_BASED_MWAIT_EXITING |
> -   CPU_BASED_MONITOR_EXITING |
> CPU_BASED_INVLPG_EXITING |
> CPU_BASED_RDPMC_EXITING;
>  
>   opt = CPU_BASED_TPR_SHADOW |
> CPU_BASED_USE_MSR_BITMAPS |
> 
> If all you're trying to do is (selectively) revert to this behavior,
> that "shouldn't" mess it up for the MacPro either, so I'm thoroughly
> confused at this point :)

Yes.  Me too. Want to try that other patch and see what happens?

> Back in 2010, running MWAIT in L>=1  behaved 100% exactly like a NOP,
> didn't power down the physical CPU, just immediately moved on to the
> next instruction. As such, there was no power saving and no
> opportunity to yield to another L0 thread either, unlike with NOP
> emulation at L0.
> 
> Did that change on newer Intel chips (i.e., is guest-mode MWAIT now
> doing something smarter than just acting as a guest-mode NOP) ?
> 
> Thanks

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Radim Krčmář
2017-03-16 09:24-0400, Gabriel L. Somlo:
> On Thu, Mar 16, 2017 at 01:41:28AM +0200, Michael S. Tsirkin wrote:
> > On Wed, Mar 15, 2017 at 07:35:34PM -0400, Gabriel L. Somlo wrote:
> > > On Wed, Mar 15, 2017 at 11:22:18PM +0200, Michael S. Tsirkin wrote:
> > > > Guests running Mac OS 5, 6, and 7 (Leopard through Lion) have a problem:
> > > > unless explicitly provided with kernel command line argument
> > > > "idlehalt=0" they'd implicitly assume MONITOR and MWAIT availability,
> > > > without checking CPUID.
> > > > 
> > > > We currently emulate that as a NOP but on VMX we can do better: let
> > > > guest stop the CPU until timer, IPI or memory change.  CPU will be busy
> > > > but that isn't any worse than a NOP emulation.
> > > > 
> > > > Note that mwait within guests is not the same as on real hardware
> > > > because halt causes an exit while mwait doesn't.  For this reason it
> > > > might not be a good idea to use the regular MWAIT flag in CPUID to
> > > > signal this capability.  Add a flag in the hypervisor leaf instead.
> > > > 
> > > > Additionally, we add a capability for QEMU - e.g. if it knows there's an
> > > > isolated CPU dedicated for the VCPU it can set the standard MWAIT flag
> > > > to improve guest behaviour.
> > > 
> > > Same behavior (on the mac pro 1,1 running F22 with custom-compiled
> > > kernel from kvm git master, plus this patch on top).
> > > 
> > > The OS X 10.7 kernel hangs (or at least progresses extremely slowly)
> > > on boot, does not bring up guest graphical interface within the first
> > > 10 minutes that I waited for it. That, in contrast with the default
> > > nop-based emulation where the guest comes up within 30 seconds.
> > 
> > 
> > Thanks a lot, meanwhile I'll try to write a unit-test and experiment
> > with various behaviours.
> > 
> > > I will run another round of tests on a newer Mac (4-year-old macbook
> > > air) and report back tomorrow.
> > > 
> > > Going off on a tangent, why would encouraging otherwise well-behaved
> > > guests (like linux ones, for example) to use MWAIT be desirable to
> > > begin with ? Is it a matter of minimizing the overhead associated with
> > > exiting and re-entering L1 ? Because if so, AFAIR staying inside L1 and
> > > running guest-mode MWAIT in a tight loop will actually waste the host
> > > CPU without the opportunity to yield to some other L0 thread. Sorry if
> > > I fell into the middle of an ongoing conversation on this and missed
> > > most of the relevant context, in which case please feel free to ignore
> > > me... :)
> > > 
> > > Thanks,
> > > --G
> > 
> > It's just some experiments I'm running, I'm not ready to describe it
> > yet. I thought this part might be useful to at least some guests, so
> > trying to upstream it right now.
> 
> OK, so on a macbook air running F25 and the latest kvm git master plus
> your v5 patch (4.11.0-rc2+), things appear to work.
> 
> host-side cpuid output:
> eax=0x40 ebx=0x40 ecx=0x03 edx=0x021120
> 
> guest-side cpuid output:
> eax= ebx= ecx=0x03 edx=
> 
> processor : 3
> vendor_id : GenuineIntel
> cpu family: 6
> model : 42
> model name: Intel(R) Core(TM) i7-2677M CPU @ 1.80GHz
> stepping  : 7
> microcode : 0x29
> cpu MHz   : 1157.849
> cache size: 4096 KB
> physical id   : 0
> siblings  : 4
> core id   : 1
> cpu cores : 2
> apicid: 3
> initial apicid: 3
> fpu   : yes
> fpu_exception : yes
> cpuid level   : 13
> wp: yes
> flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
> pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp 
> lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc 
> cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 
> cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave 
> avx lahf_lm tpr_shadow vnmi flexpriority ept vpid xsaveopt dtherm ida arat 
> pln pts
> bugs  :
> bogomips  : 3604.68
> clflush size  : 64
> cache_alignment   : 64
> address sizes : 36 bits physical, 48 bits virtual
> power management:
> 
> After studying your patch a bit more carefully (sorry, it's crazy
> around here right now :) ) I realized you're simply trying to
> (selectively) decide when to exit L1 and emulate as NOP vs. when to
> just allow L1 to execute MONITOR & MWAIT natively.
> 
> Is that right ? Because if so, the issues I saw on my MacPro1,1 are
> weird and inexplicable, given that allowing L>=1 to run MONITOR/MWAIT
> natively was one of the options Alex Graf and Rene Rebe used back in
> the very early days of OS X on QEMU, at the time I got involved with
> that project. Here's part of an out of tree patch against 3.4 which did
> just that, and worked as far as I remember on *any* MWAIT capable
> intel chip I had access to back in 2010:
> 
> ###

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 04:04:12PM +0200, Michael S. Tsirkin wrote:
> On Thu, Mar 16, 2017 at 09:24:27AM -0400, Gabriel L. Somlo wrote:
> > After studying your patch a bit more carefully (sorry, it's crazy
> > around here right now :) ) I realized you're simply trying to
> > (selectively) decide when to exit L1 and emulate as NOP vs. when to
> > just allow L1 to execute MONITOR & MWAIT natively.
> > 
> > Is that right ? Because if so, the issues I saw on my MacPro1,1 are
> > weird and inexplicable, given that allowing L>=1 to run MONITOR/MWAIT
> > natively was one of the options Alex Graf and Rene Rebe used back in
> > the very early days of OS X on QEMU, at the time I got involved with
> > that project. Here's part of an out of tree patch against 3.4 which did
> > just that, and worked as far as I remember on *any* MWAIT capable
> > intel chip I had access to back in 2010:
> > 
> > ##
> > # 99-mwait.patch.kvm-kmod (Rene Rebe ) 2010-04-27
> > ##
> > diff -pNarU5 linux-3.4/arch/x86/kvm/cpuid.c 
> > linux-3.4-mac/arch/x86/kvm/cpuid.c
> > --- linux-3.4/arch/x86/kvm/cpuid.c  2012-05-20 18:29:13.0 -0400
> > +++ linux-3.4-mac/arch/x86/kvm/cpuid.c  2012-10-09 11:42:59.921215750 
> > -0400
> > @@ -222,11 +222,11 @@ static int do_cpuid_ent(struct kvm_cpuid
> > f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> > F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
> > 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
> > /* cpuid 1.ecx */
> > const u32 kvm_supported_word4_x86_features =
> > -   F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> > +   F(XMM3) | F(PCLMULQDQ) | F(MWAIT) /* DTES64, MONITOR */ |
> > 0 /* DS-CPL, VMX, SMX, EST */ |
> > 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> > F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > 0 /* Reserved, DCA */ | F(XMM4_1) |
> > F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> > diff -pNarU5 linux-3.4/arch/x86/kvm/svm.c linux-3.4-mac/arch/x86/kvm/svm.c
> > --- linux-3.4/arch/x86/kvm/svm.c2012-05-20 18:29:13.0 -0400
> > +++ linux-3.4-mac/arch/x86/kvm/svm.c2012-10-09 11:44:41.598997481 
> > -0400
> > @@ -1102,12 +1102,10 @@ static void init_vmcb(struct vcpu_svm *s
> > set_intercept(svm, INTERCEPT_VMSAVE);
> > set_intercept(svm, INTERCEPT_STGI);
> > set_intercept(svm, INTERCEPT_CLGI);
> > set_intercept(svm, INTERCEPT_SKINIT);
> > set_intercept(svm, INTERCEPT_WBINVD);
> > -   set_intercept(svm, INTERCEPT_MONITOR);
> > -   set_intercept(svm, INTERCEPT_MWAIT);
> > set_intercept(svm, INTERCEPT_XSETBV);
> >  
> > control->iopm_base_pa = iopm_base;
> > control->msrpm_base_pa = __pa(svm->msrpm);
> > control->int_ctl = V_INTR_MASKING_MASK;
> > diff -pNarU5 linux-3.4/arch/x86/kvm/vmx.c linux-3.4-mac/arch/x86/kvm/vmx.c
> > --- linux-3.4/arch/x86/kvm/vmx.c2012-05-20 18:29:13.0 -0400
> > +++ linux-3.4-mac/arch/x86/kvm/vmx.c2012-10-09 11:42:59.925215977 
> > -0400
> > @@ -1938,11 +1938,11 @@ static __init void nested_vmx_setup_ctls
> > nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
> > nested_vmx_procbased_ctls_low = 0;
> > nested_vmx_procbased_ctls_high &=
> > CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING |
> > CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
> > -   CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
> > +   CPU_BASED_CR3_LOAD_EXITING |
> > CPU_BASED_CR3_STORE_EXITING |
> >  #ifdef CONFIG_X86_64
> > CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
> >  #endif
> > CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
> > @@ -2404,12 +2404,10 @@ static __init int setup_vmcs_config(stru
> >   CPU_BASED_CR3_LOAD_EXITING |
> >   CPU_BASED_CR3_STORE_EXITING |
> >   CPU_BASED_USE_IO_BITMAPS |
> >   CPU_BASED_MOV_DR_EXITING |
> >   CPU_BASED_USE_TSC_OFFSETING |
> > - CPU_BASED_MWAIT_EXITING |
> > - CPU_BASED_MONITOR_EXITING |
> >   CPU_BASED_INVLPG_EXITING |
> >   CPU_BASED_RDPMC_EXITING;
> >  
> > opt = CPU_BASED_TPR_SHADOW |
> >   CPU_BASED_USE_MSR_BITMAPS |
> > 
> > If all you're trying to do is (selectively) revert to this behavior,
> > that "shouldn't" mess it up for the MacPro either, so I'm thoroughly
> > confused at this point :)
> 
> Yes.  Me too. Want to try that other patch and see what happens?

You mean the old 3.4 patch against current KVM ? I'll try to do that,
might take me a while :)

> > Back in 2010, running MWAIT in L>=1  behaved 100% exactly like a NOP,
> > didn't power down the physical CPU, just immediately moved on to the
> > next instruction. As such,

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Michael S. Tsirkin
On Thu, Mar 16, 2017 at 10:58:20AM -0400, Gabriel L. Somlo wrote:
> On Thu, Mar 16, 2017 at 04:04:12PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Mar 16, 2017 at 09:24:27AM -0400, Gabriel L. Somlo wrote:
> > > After studying your patch a bit more carefully (sorry, it's crazy
> > > around here right now :) ) I realized you're simply trying to
> > > (selectively) decide when to exit L1 and emulate as NOP vs. when to
> > > just allow L1 to execute MONITOR & MWAIT natively.
> > > 
> > > Is that right ? Because if so, the issues I saw on my MacPro1,1 are
> > > weird and inexplicable, given that allowing L>=1 to run MONITOR/MWAIT
> > > natively was one of the options Alex Graf and Rene Rebe used back in
> > > the very early days of OS X on QEMU, at the time I got involved with
> > > that project. Here's part of an out of tree patch against 3.4 which did
> > > just that, and worked as far as I remember on *any* MWAIT capable
> > > intel chip I had access to back in 2010:
> > > 
> > > ##
> > > # 99-mwait.patch.kvm-kmod (Rene Rebe ) 2010-04-27
> > > ##
> > > diff -pNarU5 linux-3.4/arch/x86/kvm/cpuid.c 
> > > linux-3.4-mac/arch/x86/kvm/cpuid.c
> > > --- linux-3.4/arch/x86/kvm/cpuid.c2012-05-20 18:29:13.0 
> > > -0400
> > > +++ linux-3.4-mac/arch/x86/kvm/cpuid.c2012-10-09 11:42:59.921215750 
> > > -0400
> > > @@ -222,11 +222,11 @@ static int do_cpuid_ent(struct kvm_cpuid
> > >   f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> > >   F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
> > >   0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
> > >   /* cpuid 1.ecx */
> > >   const u32 kvm_supported_word4_x86_features =
> > > - F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> > > + F(XMM3) | F(PCLMULQDQ) | F(MWAIT) /* DTES64, MONITOR */ |
> > >   0 /* DS-CPL, VMX, SMX, EST */ |
> > >   0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> > >   F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > >   0 /* Reserved, DCA */ | F(XMM4_1) |
> > >   F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> > > diff -pNarU5 linux-3.4/arch/x86/kvm/svm.c linux-3.4-mac/arch/x86/kvm/svm.c
> > > --- linux-3.4/arch/x86/kvm/svm.c  2012-05-20 18:29:13.0 -0400
> > > +++ linux-3.4-mac/arch/x86/kvm/svm.c  2012-10-09 11:44:41.598997481 
> > > -0400
> > > @@ -1102,12 +1102,10 @@ static void init_vmcb(struct vcpu_svm *s
> > >   set_intercept(svm, INTERCEPT_VMSAVE);
> > >   set_intercept(svm, INTERCEPT_STGI);
> > >   set_intercept(svm, INTERCEPT_CLGI);
> > >   set_intercept(svm, INTERCEPT_SKINIT);
> > >   set_intercept(svm, INTERCEPT_WBINVD);
> > > - set_intercept(svm, INTERCEPT_MONITOR);
> > > - set_intercept(svm, INTERCEPT_MWAIT);
> > >   set_intercept(svm, INTERCEPT_XSETBV);
> > >  
> > >   control->iopm_base_pa = iopm_base;
> > >   control->msrpm_base_pa = __pa(svm->msrpm);
> > >   control->int_ctl = V_INTR_MASKING_MASK;
> > > diff -pNarU5 linux-3.4/arch/x86/kvm/vmx.c linux-3.4-mac/arch/x86/kvm/vmx.c
> > > --- linux-3.4/arch/x86/kvm/vmx.c  2012-05-20 18:29:13.0 -0400
> > > +++ linux-3.4-mac/arch/x86/kvm/vmx.c  2012-10-09 11:42:59.925215977 
> > > -0400
> > > @@ -1938,11 +1938,11 @@ static __init void nested_vmx_setup_ctls
> > >   nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
> > >   nested_vmx_procbased_ctls_low = 0;
> > >   nested_vmx_procbased_ctls_high &=
> > >   CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING |
> > >   CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
> > > - CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
> > > + CPU_BASED_CR3_LOAD_EXITING |
> > >   CPU_BASED_CR3_STORE_EXITING |
> > >  #ifdef CONFIG_X86_64
> > >   CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
> > >  #endif
> > >   CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
> > > @@ -2404,12 +2404,10 @@ static __init int setup_vmcs_config(stru
> > > CPU_BASED_CR3_LOAD_EXITING |
> > > CPU_BASED_CR3_STORE_EXITING |
> > > CPU_BASED_USE_IO_BITMAPS |
> > > CPU_BASED_MOV_DR_EXITING |
> > > CPU_BASED_USE_TSC_OFFSETING |
> > > -   CPU_BASED_MWAIT_EXITING |
> > > -   CPU_BASED_MONITOR_EXITING |
> > > CPU_BASED_INVLPG_EXITING |
> > > CPU_BASED_RDPMC_EXITING;
> > >  
> > >   opt = CPU_BASED_TPR_SHADOW |
> > > CPU_BASED_USE_MSR_BITMAPS |
> > > 
> > > If all you're trying to do is (selectively) revert to this behavior,
> > > that "shouldn't" mess it up for the MacPro either, so I'm thoroughly
> > > confused at this point :)
> > 
> > Yes.  Me too. Want to try that other patch and see what happens?
> 
> You mean the old 3.4 patch against current KVM ? I'll try to do that,
> might take me a while :)

I can rebase

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Radim Krčmář
2017-03-16 10:58-0400, Gabriel L. Somlo:
> On Thu, Mar 16, 2017 at 04:04:12PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Mar 16, 2017 at 09:24:27AM -0400, Gabriel L. Somlo wrote:
> > > After studying your patch a bit more carefully (sorry, it's crazy
> > > around here right now :) ) I realized you're simply trying to
> > > (selectively) decide when to exit L1 and emulate as NOP vs. when to
> > > just allow L1 to execute MONITOR & MWAIT natively.
> > > 
> > > Is that right ? Because if so, the issues I saw on my MacPro1,1 are
> > > weird and inexplicable, given that allowing L>=1 to run MONITOR/MWAIT
> > > natively was one of the options Alex Graf and Rene Rebe used back in
> > > the very early days of OS X on QEMU, at the time I got involved with
> > > that project. Here's part of an out of tree patch against 3.4 which did
> > > just that, and worked as far as I remember on *any* MWAIT capable
> > > intel chip I had access to back in 2010:
> > > 
> > > ##
> > > # 99-mwait.patch.kvm-kmod (Rene Rebe ) 2010-04-27
> > > ##
> > > diff -pNarU5 linux-3.4/arch/x86/kvm/cpuid.c 
> > > linux-3.4-mac/arch/x86/kvm/cpuid.c
> > > --- linux-3.4/arch/x86/kvm/cpuid.c2012-05-20 18:29:13.0 
> > > -0400
> > > +++ linux-3.4-mac/arch/x86/kvm/cpuid.c2012-10-09 11:42:59.921215750 
> > > -0400
> > > @@ -222,11 +222,11 @@ static int do_cpuid_ent(struct kvm_cpuid
> > >   f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> > >   F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
> > >   0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
> > >   /* cpuid 1.ecx */
> > >   const u32 kvm_supported_word4_x86_features =
> > > - F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> > > + F(XMM3) | F(PCLMULQDQ) | F(MWAIT) /* DTES64, MONITOR */ |
> > >   0 /* DS-CPL, VMX, SMX, EST */ |
> > >   0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> > >   F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > >   0 /* Reserved, DCA */ | F(XMM4_1) |
> > >   F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> > > diff -pNarU5 linux-3.4/arch/x86/kvm/svm.c linux-3.4-mac/arch/x86/kvm/svm.c
> > > --- linux-3.4/arch/x86/kvm/svm.c  2012-05-20 18:29:13.0 -0400
> > > +++ linux-3.4-mac/arch/x86/kvm/svm.c  2012-10-09 11:44:41.598997481 
> > > -0400
> > > @@ -1102,12 +1102,10 @@ static void init_vmcb(struct vcpu_svm *s
> > >   set_intercept(svm, INTERCEPT_VMSAVE);
> > >   set_intercept(svm, INTERCEPT_STGI);
> > >   set_intercept(svm, INTERCEPT_CLGI);
> > >   set_intercept(svm, INTERCEPT_SKINIT);
> > >   set_intercept(svm, INTERCEPT_WBINVD);
> > > - set_intercept(svm, INTERCEPT_MONITOR);
> > > - set_intercept(svm, INTERCEPT_MWAIT);
> > >   set_intercept(svm, INTERCEPT_XSETBV);
> > >  
> > >   control->iopm_base_pa = iopm_base;
> > >   control->msrpm_base_pa = __pa(svm->msrpm);
> > >   control->int_ctl = V_INTR_MASKING_MASK;
> > > diff -pNarU5 linux-3.4/arch/x86/kvm/vmx.c linux-3.4-mac/arch/x86/kvm/vmx.c
> > > --- linux-3.4/arch/x86/kvm/vmx.c  2012-05-20 18:29:13.0 -0400
> > > +++ linux-3.4-mac/arch/x86/kvm/vmx.c  2012-10-09 11:42:59.925215977 
> > > -0400
> > > @@ -1938,11 +1938,11 @@ static __init void nested_vmx_setup_ctls
> > >   nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
> > >   nested_vmx_procbased_ctls_low = 0;
> > >   nested_vmx_procbased_ctls_high &=
> > >   CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING |
> > >   CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
> > > - CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
> > > + CPU_BASED_CR3_LOAD_EXITING |
> > >   CPU_BASED_CR3_STORE_EXITING |
> > >  #ifdef CONFIG_X86_64
> > >   CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
> > >  #endif
> > >   CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
> > > @@ -2404,12 +2404,10 @@ static __init int setup_vmcs_config(stru
> > > CPU_BASED_CR3_LOAD_EXITING |
> > > CPU_BASED_CR3_STORE_EXITING |
> > > CPU_BASED_USE_IO_BITMAPS |
> > > CPU_BASED_MOV_DR_EXITING |
> > > CPU_BASED_USE_TSC_OFFSETING |
> > > -   CPU_BASED_MWAIT_EXITING |
> > > -   CPU_BASED_MONITOR_EXITING |
> > > CPU_BASED_INVLPG_EXITING |
> > > CPU_BASED_RDPMC_EXITING;
> > >  
> > >   opt = CPU_BASED_TPR_SHADOW |
> > > CPU_BASED_USE_MSR_BITMAPS |
> > > 
> > > If all you're trying to do is (selectively) revert to this behavior,
> > > that "shouldn't" mess it up for the MacPro either, so I'm thoroughly
> > > confused at this point :)
> > 
> > Yes.  Me too. Want to try that other patch and see what happens?
> 
> You mean the old 3.4 patch against current KVM ? I'll try to do that,
> might take me a while :)

Michael's patch already did most of t

[PATCH v2] Documentation: sync_file.txt: Fix typos

2017-03-16 Thread Tamara Diaconita
Fix spelling mistakes in sync_file.txt to make documentation clear. 

Signed-off-by: Tamara Diaconita 
---
Changes since v1:
*Add the file name to the subject of the patch.
*Change the description of the patch.

 Documentation/sync_file.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/sync_file.txt b/Documentation/sync_file.txt
index 269681a..c3d033a 100644
--- a/Documentation/sync_file.txt
+++ b/Documentation/sync_file.txt
@@ -37,7 +37,7 @@ dma_fence_signal(), when it has finished using (or 
processing) that buffer.
 Out-fences are fences that the driver creates.
 
 On the other hand if the driver receives fence(s) through a sync_file from
-userspace we call these fence(s) 'in-fences'. Receiveing in-fences means that
+userspace we call these fence(s) 'in-fences'. Receiving in-fences means that
 we need to wait for the fence(s) to signal before using any buffer related to
 the in-fences.
 
-- 
2.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Documentation: vfio-mediated-device.txt: Fix typos

2017-03-16 Thread Tamara Diaconita
Correct spelling mistakes in vfio-mediates-device.txt to make
documentation clear.

Signed-off-by: Tamara Diaconita 
---
Changes since v1:
*Add file name to the subject of the patch.
*Changed the description of the patch.
*Add one more change to the file: from 'using a minicom' to 'using minicom'.
 Documentation/vfio-mediated-device.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/vfio-mediated-device.txt 
b/Documentation/vfio-mediated-device.txt
index d226c7a..6f994ab 100644
--- a/Documentation/vfio-mediated-device.txt
+++ b/Documentation/vfio-mediated-device.txt
@@ -380,7 +380,7 @@ card.
/dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10
/dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10
 
-6. Using a minicom or any terminal enulation program, open port /dev/ttyS1 or
+6. Using minicom or any terminal emulation program, open port /dev/ttyS1 or
/dev/ttyS2 with hardware flow control disabled.
 
 7. Type data on the minicom terminal or send data to the terminal emulation
-- 
2.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Radim Krčmář
2017-03-16 11:44-0400, Gabriel L. Somlo:
> On Thu, Mar 16, 2017 at 03:08:07PM +0100, Radim Krčmář wrote:
>> 2017-03-16 09:24-0400, Gabriel L. Somlo:
>> > On Thu, Mar 16, 2017 at 01:41:28AM +0200, Michael S. Tsirkin wrote:
>> > > On Wed, Mar 15, 2017 at 07:35:34PM -0400, Gabriel L. Somlo wrote:
>> > > > On Wed, Mar 15, 2017 at 11:22:18PM +0200, Michael S. Tsirkin wrote:
>> > > > > Guests running Mac OS 5, 6, and 7 (Leopard through Lion) have a 
>> > > > > problem:
>> > > > > unless explicitly provided with kernel command line argument
>> > > > > "idlehalt=0" they'd implicitly assume MONITOR and MWAIT availability,
>> > > > > without checking CPUID.
>> > > > > 
>> > > > > We currently emulate that as a NOP but on VMX we can do better: let
>> > > > > guest stop the CPU until timer, IPI or memory change.  CPU will be 
>> > > > > busy
>> > > > > but that isn't any worse than a NOP emulation.
>> > > > > 
>> > > > > Note that mwait within guests is not the same as on real hardware
>> > > > > because halt causes an exit while mwait doesn't.  For this reason it
>> > > > > might not be a good idea to use the regular MWAIT flag in CPUID to
>> > > > > signal this capability.  Add a flag in the hypervisor leaf instead.
>> > > > > 
>> > > > > Additionally, we add a capability for QEMU - e.g. if it knows 
>> > > > > there's an
>> > > > > isolated CPU dedicated for the VCPU it can set the standard MWAIT 
>> > > > > flag
>> > > > > to improve guest behaviour.
>> > > > 
>> > > > Same behavior (on the mac pro 1,1 running F22 with custom-compiled
>> > > > kernel from kvm git master, plus this patch on top).
>> > > > 
>> > > > The OS X 10.7 kernel hangs (or at least progresses extremely slowly)
>> > > > on boot, does not bring up guest graphical interface within the first
>> > > > 10 minutes that I waited for it. That, in contrast with the default
>> > > > nop-based emulation where the guest comes up within 30 seconds.
>> > > 
>> > > 
>> > > Thanks a lot, meanwhile I'll try to write a unit-test and experiment
>> > > with various behaviours.
>> > > 
>> > > > I will run another round of tests on a newer Mac (4-year-old macbook
>> > > > air) and report back tomorrow.
>> > > > 
>> > > > Going off on a tangent, why would encouraging otherwise well-behaved
>> > > > guests (like linux ones, for example) to use MWAIT be desirable to
>> > > > begin with ? Is it a matter of minimizing the overhead associated with
>> > > > exiting and re-entering L1 ? Because if so, AFAIR staying inside L1 and
>> > > > running guest-mode MWAIT in a tight loop will actually waste the host
>> > > > CPU without the opportunity to yield to some other L0 thread. Sorry if
>> > > > I fell into the middle of an ongoing conversation on this and missed
>> > > > most of the relevant context, in which case please feel free to ignore
>> > > > me... :)
>> > > > 
>> > > > Thanks,
>> > > > --G
>> > > 
>> > > It's just some experiments I'm running, I'm not ready to describe it
>> > > yet. I thought this part might be useful to at least some guests, so
>> > > trying to upstream it right now.
>> > 
>> > OK, so on a macbook air running F25 and the latest kvm git master plus
>> > your v5 patch (4.11.0-rc2+), things appear to work.
>> > 
>> > host-side cpuid output:
>> > eax=0x40 ebx=0x40 ecx=0x03 edx=0x021120
>> > 
>> > guest-side cpuid output:
>> > eax= ebx= ecx=0x03 edx=
>> > 
>> > processor  : 3
>> > vendor_id  : GenuineIntel
>> > cpu family : 6
>> > model  : 42
>> > model name : Intel(R) Core(TM) i7-2677M CPU @ 1.80GHz
>> > stepping   : 7
>> > microcode  : 0x29
>> > cpu MHz: 1157.849
>> > cache size : 4096 KB
>> > physical id: 0
>> > siblings   : 4
>> > core id: 1
>> > cpu cores  : 2
>> > apicid : 3
>> > initial apicid : 3
>> > fpu: yes
>> > fpu_exception  : yes
>> > cpuid level: 13
>> > wp : yes
>> > flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge 
>> > mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall 
>> > nx rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology 
>> > nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx 
>> > est tm2 ssse3 cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic popcnt 
>> > tsc_deadline_timer aes xsave avx lahf_lm tpr_shadow vnmi flexpriority ept 
>> > vpid xsaveopt dtherm ida arat pln pts
>> > bugs   :
>> > bogomips   : 3604.68
>> > clflush size   : 64
>> > cache_alignment: 64
>> > address sizes  : 36 bits physical, 48 bits virtual
>> > power management:
>> > 
>> > After studying your patch a bit more carefully (sorry, it's crazy
>> > around here right now :) ) I realized you're simply trying to
>> > (selectively) decide when to exit L1 and emulate as NOP vs. when to
>> > just allow L1 to execute MONITOR & MWAIT natively.
>> > 
>> > Is that right ? Because if so, the issues I saw on my MacPro1,1 are
>> > we

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 03:08:07PM +0100, Radim Krčmář wrote:
> 2017-03-16 09:24-0400, Gabriel L. Somlo:
> > On Thu, Mar 16, 2017 at 01:41:28AM +0200, Michael S. Tsirkin wrote:
> > > On Wed, Mar 15, 2017 at 07:35:34PM -0400, Gabriel L. Somlo wrote:
> > > > On Wed, Mar 15, 2017 at 11:22:18PM +0200, Michael S. Tsirkin wrote:
> > > > > Guests running Mac OS 5, 6, and 7 (Leopard through Lion) have a 
> > > > > problem:
> > > > > unless explicitly provided with kernel command line argument
> > > > > "idlehalt=0" they'd implicitly assume MONITOR and MWAIT availability,
> > > > > without checking CPUID.
> > > > > 
> > > > > We currently emulate that as a NOP but on VMX we can do better: let
> > > > > guest stop the CPU until timer, IPI or memory change.  CPU will be 
> > > > > busy
> > > > > but that isn't any worse than a NOP emulation.
> > > > > 
> > > > > Note that mwait within guests is not the same as on real hardware
> > > > > because halt causes an exit while mwait doesn't.  For this reason it
> > > > > might not be a good idea to use the regular MWAIT flag in CPUID to
> > > > > signal this capability.  Add a flag in the hypervisor leaf instead.
> > > > > 
> > > > > Additionally, we add a capability for QEMU - e.g. if it knows there's 
> > > > > an
> > > > > isolated CPU dedicated for the VCPU it can set the standard MWAIT flag
> > > > > to improve guest behaviour.
> > > > 
> > > > Same behavior (on the mac pro 1,1 running F22 with custom-compiled
> > > > kernel from kvm git master, plus this patch on top).
> > > > 
> > > > The OS X 10.7 kernel hangs (or at least progresses extremely slowly)
> > > > on boot, does not bring up guest graphical interface within the first
> > > > 10 minutes that I waited for it. That, in contrast with the default
> > > > nop-based emulation where the guest comes up within 30 seconds.
> > > 
> > > 
> > > Thanks a lot, meanwhile I'll try to write a unit-test and experiment
> > > with various behaviours.
> > > 
> > > > I will run another round of tests on a newer Mac (4-year-old macbook
> > > > air) and report back tomorrow.
> > > > 
> > > > Going off on a tangent, why would encouraging otherwise well-behaved
> > > > guests (like linux ones, for example) to use MWAIT be desirable to
> > > > begin with ? Is it a matter of minimizing the overhead associated with
> > > > exiting and re-entering L1 ? Because if so, AFAIR staying inside L1 and
> > > > running guest-mode MWAIT in a tight loop will actually waste the host
> > > > CPU without the opportunity to yield to some other L0 thread. Sorry if
> > > > I fell into the middle of an ongoing conversation on this and missed
> > > > most of the relevant context, in which case please feel free to ignore
> > > > me... :)
> > > > 
> > > > Thanks,
> > > > --G
> > > 
> > > It's just some experiments I'm running, I'm not ready to describe it
> > > yet. I thought this part might be useful to at least some guests, so
> > > trying to upstream it right now.
> > 
> > OK, so on a macbook air running F25 and the latest kvm git master plus
> > your v5 patch (4.11.0-rc2+), things appear to work.
> > 
> > host-side cpuid output:
> > eax=0x40 ebx=0x40 ecx=0x03 edx=0x021120
> > 
> > guest-side cpuid output:
> > eax= ebx= ecx=0x03 edx=
> > 
> > processor   : 3
> > vendor_id   : GenuineIntel
> > cpu family  : 6
> > model   : 42
> > model name  : Intel(R) Core(TM) i7-2677M CPU @ 1.80GHz
> > stepping: 7
> > microcode   : 0x29
> > cpu MHz : 1157.849
> > cache size  : 4096 KB
> > physical id : 0
> > siblings: 4
> > core id : 1
> > cpu cores   : 2
> > apicid  : 3
> > initial apicid  : 3
> > fpu : yes
> > fpu_exception   : yes
> > cpuid level : 13
> > wp  : yes
> > flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge 
> > mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall 
> > nx rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology 
> > nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx 
> > est tm2 ssse3 cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic popcnt 
> > tsc_deadline_timer aes xsave avx lahf_lm tpr_shadow vnmi flexpriority ept 
> > vpid xsaveopt dtherm ida arat pln pts
> > bugs:
> > bogomips: 3604.68
> > clflush size: 64
> > cache_alignment : 64
> > address sizes   : 36 bits physical, 48 bits virtual
> > power management:
> > 
> > After studying your patch a bit more carefully (sorry, it's crazy
> > around here right now :) ) I realized you're simply trying to
> > (selectively) decide when to exit L1 and emulate as NOP vs. when to
> > just allow L1 to execute MONITOR & MWAIT natively.
> > 
> > Is that right ? Because if so, the issues I saw on my MacPro1,1 are
> > weird and inexplicable, given that allowing L>=1 to run MONITOR/MWAIT
> > natively was one of the options Alex Graf and Rene Rebe used back in
> > the very early 

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Radim Krčmář
2017-03-16 16:35+0100, Radim Krčmář:
> 2017-03-16 10:58-0400, Gabriel L. Somlo:
>> The intel manual said the same thing back in 2010 as well. However,
>> regardless of how any flags were set, interrupt-window exiting or not,
>> "normal" L1 MWAIT behavior was that it woke up immediately regardless.
>> Remember, never going to sleep is still correct ("normal" ?) behavior
>> per the ISA definition of MWAIT :)
> 
> I'll write a simple kvm-unit-test to better understand why it is broken
> for you ...

Please get git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git

and try this, thanks!

---8<---
x86/mwait: crappy test

`./configure && make` to build it, then follow the comment in code to
try few cases.

---
 x86/Makefile.common |  1 +
 x86/mwait.c | 41 +
 2 files changed, 42 insertions(+)
 create mode 100644 x86/mwait.c

diff --git a/x86/Makefile.common b/x86/Makefile.common
index 1dad18ba26e1..1e708a6acd39 100644
--- a/x86/Makefile.common
+++ b/x86/Makefile.common
@@ -46,6 +46,7 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \
$(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \
$(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \
$(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \
+   $(TEST_DIR)/mwait.flat \
 
 ifdef API
 tests-common += api/api-sample
diff --git a/x86/mwait.c b/x86/mwait.c
new file mode 100644
index ..c21dab5cc97d
--- /dev/null
+++ b/x86/mwait.c
@@ -0,0 +1,41 @@
+#include "vm.h"
+
+#define TARGET_RESUMES 1
+volatile unsigned page[4096 / 4];
+
+/*
+ * Execute
+ *   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1'
+ * (first two arguments are eax and ecx for MWAIT, the third is FLAGS.IF bit)
+ * I assume you have 1000 Hz scheduler, so the test should take about 10
+ * seconds to run if mwait works (host timer interrupts will kick mwait).
+ *
+ * If you get far less, then mwait is just nop, as in the case of
+ *
+ *   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0'
+ *
+ * All other combinations of arguments should take 10 seconds.
+ * Getting killed by the TIMEOUT most likely means that you have different HZ,
+ * but could also be a bug ...
+ */
+int main(int argc, char **argv)
+{
+   uint32_t eax = atol(argv[1]);
+   uint32_t ecx = atol(argv[2]);
+   bool sti = atol(argv[3]);
+   unsigned resumes = 0;
+
+   if (sti)
+   asm volatile ("sti");
+   else
+   asm volatile ("cli");
+
+   while (resumes < TARGET_RESUMES) {
+   asm volatile("monitor" :: "a" (page), "c" (0), "d" (0));
+   asm volatile("mwait" :: "a" (eax), "c" (ecx));
+   resumes++;
+   }
+
+   report("resumed from mwait %u times", resumes == TARGET_RESUMES, 
resumes);
+   return report_summary();
+}
-- 
2.11.0

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 04:35:18PM +0100, Radim Krčmář wrote:
> 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > On Thu, Mar 16, 2017 at 04:04:12PM +0200, Michael S. Tsirkin wrote:
> > > On Thu, Mar 16, 2017 at 09:24:27AM -0400, Gabriel L. Somlo wrote:
> > > > After studying your patch a bit more carefully (sorry, it's crazy
> > > > around here right now :) ) I realized you're simply trying to
> > > > (selectively) decide when to exit L1 and emulate as NOP vs. when to
> > > > just allow L1 to execute MONITOR & MWAIT natively.
> > > > 
> > > > Is that right ? Because if so, the issues I saw on my MacPro1,1 are
> > > > weird and inexplicable, given that allowing L>=1 to run MONITOR/MWAIT
> > > > natively was one of the options Alex Graf and Rene Rebe used back in
> > > > the very early days of OS X on QEMU, at the time I got involved with
> > > > that project. Here's part of an out of tree patch against 3.4 which did
> > > > just that, and worked as far as I remember on *any* MWAIT capable
> > > > intel chip I had access to back in 2010:
> > > > 
> > > > ##
> > > > # 99-mwait.patch.kvm-kmod (Rene Rebe ) 2010-04-27
> > > > ##
> > > > diff -pNarU5 linux-3.4/arch/x86/kvm/cpuid.c 
> > > > linux-3.4-mac/arch/x86/kvm/cpuid.c
> > > > --- linux-3.4/arch/x86/kvm/cpuid.c  2012-05-20 18:29:13.0 
> > > > -0400
> > > > +++ linux-3.4-mac/arch/x86/kvm/cpuid.c  2012-10-09 11:42:59.921215750 
> > > > -0400
> > > > @@ -222,11 +222,11 @@ static int do_cpuid_ent(struct kvm_cpuid
> > > > f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> > > > F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
> > > > 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
> > > > /* cpuid 1.ecx */
> > > > const u32 kvm_supported_word4_x86_features =
> > > > -   F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> > > > +   F(XMM3) | F(PCLMULQDQ) | F(MWAIT) /* DTES64, MONITOR */ 
> > > > |
> > > > 0 /* DS-CPL, VMX, SMX, EST */ |
> > > > 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* 
> > > > Reserved */ |
> > > > F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > > > 0 /* Reserved, DCA */ | F(XMM4_1) |
> > > > F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> > > > diff -pNarU5 linux-3.4/arch/x86/kvm/svm.c 
> > > > linux-3.4-mac/arch/x86/kvm/svm.c
> > > > --- linux-3.4/arch/x86/kvm/svm.c2012-05-20 18:29:13.0 
> > > > -0400
> > > > +++ linux-3.4-mac/arch/x86/kvm/svm.c2012-10-09 11:44:41.598997481 
> > > > -0400
> > > > @@ -1102,12 +1102,10 @@ static void init_vmcb(struct vcpu_svm *s
> > > > set_intercept(svm, INTERCEPT_VMSAVE);
> > > > set_intercept(svm, INTERCEPT_STGI);
> > > > set_intercept(svm, INTERCEPT_CLGI);
> > > > set_intercept(svm, INTERCEPT_SKINIT);
> > > > set_intercept(svm, INTERCEPT_WBINVD);
> > > > -   set_intercept(svm, INTERCEPT_MONITOR);
> > > > -   set_intercept(svm, INTERCEPT_MWAIT);
> > > > set_intercept(svm, INTERCEPT_XSETBV);
> > > >  
> > > > control->iopm_base_pa = iopm_base;
> > > > control->msrpm_base_pa = __pa(svm->msrpm);
> > > > control->int_ctl = V_INTR_MASKING_MASK;
> > > > diff -pNarU5 linux-3.4/arch/x86/kvm/vmx.c 
> > > > linux-3.4-mac/arch/x86/kvm/vmx.c
> > > > --- linux-3.4/arch/x86/kvm/vmx.c2012-05-20 18:29:13.0 
> > > > -0400
> > > > +++ linux-3.4-mac/arch/x86/kvm/vmx.c2012-10-09 11:42:59.925215977 
> > > > -0400
> > > > @@ -1938,11 +1938,11 @@ static __init void nested_vmx_setup_ctls
> > > > nested_vmx_procbased_ctls_low, 
> > > > nested_vmx_procbased_ctls_high);
> > > > nested_vmx_procbased_ctls_low = 0;
> > > > nested_vmx_procbased_ctls_high &=
> > > > CPU_BASED_VIRTUAL_INTR_PENDING | 
> > > > CPU_BASED_USE_TSC_OFFSETING |
> > > > CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
> > > > -   CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
> > > > +   CPU_BASED_CR3_LOAD_EXITING |
> > > > CPU_BASED_CR3_STORE_EXITING |
> > > >  #ifdef CONFIG_X86_64
> > > > CPU_BASED_CR8_LOAD_EXITING | 
> > > > CPU_BASED_CR8_STORE_EXITING |
> > > >  #endif
> > > > CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
> > > > @@ -2404,12 +2404,10 @@ static __init int setup_vmcs_config(stru
> > > >   CPU_BASED_CR3_LOAD_EXITING |
> > > >   CPU_BASED_CR3_STORE_EXITING |
> > > >   CPU_BASED_USE_IO_BITMAPS |
> > > >   CPU_BASED_MOV_DR_EXITING |
> > > >   CPU_BASED_USE_TSC_OFFSETING |
> > > > - CPU_BASED_MWAIT_EXITING |
> > > > - CPU_BASED_MONITOR_EXITING |
> > > >   C

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 04:54:06PM +0100, Radim Krčmář wrote:
> 2017-03-16 11:44-0400, Gabriel L. Somlo:
> > On Thu, Mar 16, 2017 at 03:08:07PM +0100, Radim Krčmář wrote:
> >> 2017-03-16 09:24-0400, Gabriel L. Somlo:
> >> > On Thu, Mar 16, 2017 at 01:41:28AM +0200, Michael S. Tsirkin wrote:
> >> > > On Wed, Mar 15, 2017 at 07:35:34PM -0400, Gabriel L. Somlo wrote:
> >> > > > On Wed, Mar 15, 2017 at 11:22:18PM +0200, Michael S. Tsirkin wrote:
> >> > > > > Guests running Mac OS 5, 6, and 7 (Leopard through Lion) have a 
> >> > > > > problem:
> >> > > > > unless explicitly provided with kernel command line argument
> >> > > > > "idlehalt=0" they'd implicitly assume MONITOR and MWAIT 
> >> > > > > availability,
> >> > > > > without checking CPUID.
> >> > > > > 
> >> > > > > We currently emulate that as a NOP but on VMX we can do better: let
> >> > > > > guest stop the CPU until timer, IPI or memory change.  CPU will be 
> >> > > > > busy
> >> > > > > but that isn't any worse than a NOP emulation.
> >> > > > > 
> >> > > > > Note that mwait within guests is not the same as on real hardware
> >> > > > > because halt causes an exit while mwait doesn't.  For this reason 
> >> > > > > it
> >> > > > > might not be a good idea to use the regular MWAIT flag in CPUID to
> >> > > > > signal this capability.  Add a flag in the hypervisor leaf instead.
> >> > > > > 
> >> > > > > Additionally, we add a capability for QEMU - e.g. if it knows 
> >> > > > > there's an
> >> > > > > isolated CPU dedicated for the VCPU it can set the standard MWAIT 
> >> > > > > flag
> >> > > > > to improve guest behaviour.
> >> > > > 
> >> > > > Same behavior (on the mac pro 1,1 running F22 with custom-compiled
> >> > > > kernel from kvm git master, plus this patch on top).
> >> > > > 
> >> > > > The OS X 10.7 kernel hangs (or at least progresses extremely slowly)
> >> > > > on boot, does not bring up guest graphical interface within the first
> >> > > > 10 minutes that I waited for it. That, in contrast with the default
> >> > > > nop-based emulation where the guest comes up within 30 seconds.
> >> > > 
> >> > > 
> >> > > Thanks a lot, meanwhile I'll try to write a unit-test and experiment
> >> > > with various behaviours.
> >> > > 
> >> > > > I will run another round of tests on a newer Mac (4-year-old macbook
> >> > > > air) and report back tomorrow.
> >> > > > 
> >> > > > Going off on a tangent, why would encouraging otherwise well-behaved
> >> > > > guests (like linux ones, for example) to use MWAIT be desirable to
> >> > > > begin with ? Is it a matter of minimizing the overhead associated 
> >> > > > with
> >> > > > exiting and re-entering L1 ? Because if so, AFAIR staying inside L1 
> >> > > > and
> >> > > > running guest-mode MWAIT in a tight loop will actually waste the host
> >> > > > CPU without the opportunity to yield to some other L0 thread. Sorry 
> >> > > > if
> >> > > > I fell into the middle of an ongoing conversation on this and missed
> >> > > > most of the relevant context, in which case please feel free to 
> >> > > > ignore
> >> > > > me... :)
> >> > > > 
> >> > > > Thanks,
> >> > > > --G
> >> > > 
> >> > > It's just some experiments I'm running, I'm not ready to describe it
> >> > > yet. I thought this part might be useful to at least some guests, so
> >> > > trying to upstream it right now.
> >> > 
> >> > OK, so on a macbook air running F25 and the latest kvm git master plus
> >> > your v5 patch (4.11.0-rc2+), things appear to work.
> >> > 
> >> > host-side cpuid output:
> >> > eax=0x40 ebx=0x40 ecx=0x03 edx=0x021120
> >> > 
> >> > guest-side cpuid output:
> >> > eax= ebx= ecx=0x03 edx=
> >> > 
> >> > processor: 3
> >> > vendor_id: GenuineIntel
> >> > cpu family   : 6
> >> > model: 42
> >> > model name   : Intel(R) Core(TM) i7-2677M CPU @ 1.80GHz
> >> > stepping : 7
> >> > microcode: 0x29
> >> > cpu MHz  : 1157.849
> >> > cache size   : 4096 KB
> >> > physical id  : 0
> >> > siblings : 4
> >> > core id  : 1
> >> > cpu cores: 2
> >> > apicid   : 3
> >> > initial apicid   : 3
> >> > fpu  : yes
> >> > fpu_exception: yes
> >> > cpuid level  : 13
> >> > wp   : yes
> >> > flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge 
> >> > mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe 
> >> > syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl 
> >> > xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor 
> >> > ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic 
> >> > popcnt tsc_deadline_timer aes xsave avx lahf_lm tpr_shadow vnmi 
> >> > flexpriority ept vpid xsaveopt dtherm ida arat pln pts
> >> > bugs :
> >> > bogomips : 3604.68
> >> > clflush size : 64
> >> > cache_alignment  : 64
> >> > address sizes: 36 bits physical, 48 bits virtual
> >> > power management:
> >> 

Re: [PATCH v3 5/6] drm: bridge: dw-hdmi: Add Documentation on supported input formats

2017-03-16 Thread Archit Taneja



On 3/7/2017 10:12 PM, Neil Armstrong wrote:

This patch adds a new DRM documentation entry and links to the input
format table added in the dw_hdmi header.

Signed-off-by: Neil Armstrong 
---
 Documentation/gpu/dw-hdmi.rst | 15 +++
 Documentation/gpu/index.rst   |  1 +


Maybe we create a sub-directory for bridges here? Maybe
a hierarchy similar to tinydrm's?

Looks good otherwise.

Archit


 2 files changed, 16 insertions(+)
 create mode 100644 Documentation/gpu/dw-hdmi.rst

diff --git a/Documentation/gpu/dw-hdmi.rst b/Documentation/gpu/dw-hdmi.rst
new file mode 100644
index 000..486faad
--- /dev/null
+++ b/Documentation/gpu/dw-hdmi.rst
@@ -0,0 +1,15 @@
+===
+ drm/bridge/dw-hdmi Synopsys DesignWare HDMI Controller
+===
+
+Synopsys DesignWare HDMI Controller
+===
+
+This section covers everything related to the Synopsys DesignWare HDMI
+Controller implemented as a DRM bridge.
+
+Supported Input Formats and Encodings
+-
+
+.. kernel-doc:: include/drm/bridge/dw_hdmi.h
+   :doc: Supported input formats and encodings
diff --git a/Documentation/gpu/index.rst b/Documentation/gpu/index.rst
index e998ee0..0725449 100644
--- a/Documentation/gpu/index.rst
+++ b/Documentation/gpu/index.rst
@@ -10,6 +10,7 @@ Linux GPU Driver Developer's Guide
drm-kms
drm-kms-helpers
drm-uapi
+   dw-hdmi
i915
tinydrm
vc4



--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Michael S. Tsirkin
On Thu, Mar 16, 2017 at 12:16:13PM -0400, Gabriel L. Somlo wrote:
> On Thu, Mar 16, 2017 at 04:35:18PM +0100, Radim Krčmář wrote:
> > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > > On Thu, Mar 16, 2017 at 04:04:12PM +0200, Michael S. Tsirkin wrote:
> > > > On Thu, Mar 16, 2017 at 09:24:27AM -0400, Gabriel L. Somlo wrote:
> > > > > After studying your patch a bit more carefully (sorry, it's crazy
> > > > > around here right now :) ) I realized you're simply trying to
> > > > > (selectively) decide when to exit L1 and emulate as NOP vs. when to
> > > > > just allow L1 to execute MONITOR & MWAIT natively.
> > > > > 
> > > > > Is that right ? Because if so, the issues I saw on my MacPro1,1 are
> > > > > weird and inexplicable, given that allowing L>=1 to run MONITOR/MWAIT
> > > > > natively was one of the options Alex Graf and Rene Rebe used back in
> > > > > the very early days of OS X on QEMU, at the time I got involved with
> > > > > that project. Here's part of an out of tree patch against 3.4 which 
> > > > > did
> > > > > just that, and worked as far as I remember on *any* MWAIT capable
> > > > > intel chip I had access to back in 2010:
> > > > > 
> > > > > ##
> > > > > # 99-mwait.patch.kvm-kmod (Rene Rebe ) 2010-04-27
> > > > > ##
> > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/cpuid.c 
> > > > > linux-3.4-mac/arch/x86/kvm/cpuid.c
> > > > > --- linux-3.4/arch/x86/kvm/cpuid.c2012-05-20 18:29:13.0 
> > > > > -0400
> > > > > +++ linux-3.4-mac/arch/x86/kvm/cpuid.c2012-10-09 
> > > > > 11:42:59.921215750 -0400
> > > > > @@ -222,11 +222,11 @@ static int do_cpuid_ent(struct kvm_cpuid
> > > > >   f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> > > > >   F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
> > > > >   0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
> > > > >   /* cpuid 1.ecx */
> > > > >   const u32 kvm_supported_word4_x86_features =
> > > > > - F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> > > > > + F(XMM3) | F(PCLMULQDQ) | F(MWAIT) /* DTES64, MONITOR */ 
> > > > > |
> > > > >   0 /* DS-CPL, VMX, SMX, EST */ |
> > > > >   0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* 
> > > > > Reserved */ |
> > > > >   F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > > > >   0 /* Reserved, DCA */ | F(XMM4_1) |
> > > > >   F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/svm.c 
> > > > > linux-3.4-mac/arch/x86/kvm/svm.c
> > > > > --- linux-3.4/arch/x86/kvm/svm.c  2012-05-20 18:29:13.0 
> > > > > -0400
> > > > > +++ linux-3.4-mac/arch/x86/kvm/svm.c  2012-10-09 11:44:41.598997481 
> > > > > -0400
> > > > > @@ -1102,12 +1102,10 @@ static void init_vmcb(struct vcpu_svm *s
> > > > >   set_intercept(svm, INTERCEPT_VMSAVE);
> > > > >   set_intercept(svm, INTERCEPT_STGI);
> > > > >   set_intercept(svm, INTERCEPT_CLGI);
> > > > >   set_intercept(svm, INTERCEPT_SKINIT);
> > > > >   set_intercept(svm, INTERCEPT_WBINVD);
> > > > > - set_intercept(svm, INTERCEPT_MONITOR);
> > > > > - set_intercept(svm, INTERCEPT_MWAIT);
> > > > >   set_intercept(svm, INTERCEPT_XSETBV);
> > > > >  
> > > > >   control->iopm_base_pa = iopm_base;
> > > > >   control->msrpm_base_pa = __pa(svm->msrpm);
> > > > >   control->int_ctl = V_INTR_MASKING_MASK;
> > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/vmx.c 
> > > > > linux-3.4-mac/arch/x86/kvm/vmx.c
> > > > > --- linux-3.4/arch/x86/kvm/vmx.c  2012-05-20 18:29:13.0 
> > > > > -0400
> > > > > +++ linux-3.4-mac/arch/x86/kvm/vmx.c  2012-10-09 11:42:59.925215977 
> > > > > -0400
> > > > > @@ -1938,11 +1938,11 @@ static __init void nested_vmx_setup_ctls
> > > > >   nested_vmx_procbased_ctls_low, 
> > > > > nested_vmx_procbased_ctls_high);
> > > > >   nested_vmx_procbased_ctls_low = 0;
> > > > >   nested_vmx_procbased_ctls_high &=
> > > > >   CPU_BASED_VIRTUAL_INTR_PENDING | 
> > > > > CPU_BASED_USE_TSC_OFFSETING |
> > > > >   CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
> > > > > - CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
> > > > > + CPU_BASED_CR3_LOAD_EXITING |
> > > > >   CPU_BASED_CR3_STORE_EXITING |
> > > > >  #ifdef CONFIG_X86_64
> > > > >   CPU_BASED_CR8_LOAD_EXITING | 
> > > > > CPU_BASED_CR8_STORE_EXITING |
> > > > >  #endif
> > > > >   CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
> > > > > @@ -2404,12 +2404,10 @@ static __init int setup_vmcs_config(stru
> > > > > CPU_BASED_CR3_LOAD_EXITING |
> > > > > CPU_BASED_CR3_STORE_EXITING |
> > > > > CPU_BASED_USE_IO_BITMAPS |
> > > > > CPU_BASED_MOV_DR_EXITING |

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 05:01:58PM +0100, Radim Krčmář wrote:
> 2017-03-16 16:35+0100, Radim Krčmář:
> > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> >> The intel manual said the same thing back in 2010 as well. However,
> >> regardless of how any flags were set, interrupt-window exiting or not,
> >> "normal" L1 MWAIT behavior was that it woke up immediately regardless.
> >> Remember, never going to sleep is still correct ("normal" ?) behavior
> >> per the ISA definition of MWAIT :)
> > 
> > I'll write a simple kvm-unit-test to better understand why it is broken
> > for you ...
> 
> Please get git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git
> 
> and try this, thanks!
> 
> ---8<---
> x86/mwait: crappy test
> 
> `./configure && make` to build it, then follow the comment in code to
> try few cases.

kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1'
timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 1
enabling apic
PASS: resumed from mwait 1 times
SUMMARY: 1 tests

real0m10.564s
user0m10.339s
sys 0m0.225s


and

kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0'
timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 0
enabling apic
PASS: resumed from mwait 1 times
SUMMARY: 1 tests

real0m0.746s
user0m0.555s
sys 0m0.200s

Both of these with Michael's v5 patch applied, on the MacPro1,1.

Similar behavior (0 1 1 takes 10 seconds, 0 1 0 returns immediately)
on the macbook air.

If I revert to the original (nop-emulated MWAIT) kvm source, I get
both versions to return immediately.

HTH,
--Gabriel



> 
> ---
>  x86/Makefile.common |  1 +
>  x86/mwait.c | 41 +
>  2 files changed, 42 insertions(+)
>  create mode 100644 x86/mwait.c
> 
> diff --git a/x86/Makefile.common b/x86/Makefile.common
> index 1dad18ba26e1..1e708a6acd39 100644
> --- a/x86/Makefile.common
> +++ b/x86/Makefile.common
> @@ -46,6 +46,7 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat 
> \
> $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \
> $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \
> $(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \
> +   $(TEST_DIR)/mwait.flat \
>  
>  ifdef API
>  tests-common += api/api-sample
> diff --git a/x86/mwait.c b/x86/mwait.c
> new file mode 100644
> index ..c21dab5cc97d
> --- /dev/null
> +++ b/x86/mwait.c
> @@ -0,0 +1,41 @@
> +#include "vm.h"
> +
> +#define TARGET_RESUMES 1
> +volatile unsigned page[4096 / 4];
> +
> +/*
> + * Execute
> + *   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1'
> + * (first two arguments are eax and ecx for MWAIT, the third is FLAGS.IF bit)
> + * I assume you have 1000 Hz scheduler, so the test should take about 10
> + * seconds to run if mwait works (host timer interrupts will kick mwait).
> + *
> + * If you get far less, then mwait is just nop, as in the case of
> + *
> + *   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0'
> + *
> + * All other combinations of arguments should take 10 seconds.
> + * Getting killed by the TIMEOUT most likely means that you have different 
> HZ,
> + * but could also be a bug ...
> + */
> +int main(int argc, char **argv)
> +{
> + uint32_t eax = atol(argv[1]);
> + uint32_t ecx = atol(argv[2]);
> + bool sti = atol(argv[3]);
> + unsigned resumes = 0;
> +
> + if (sti)
> + asm volatile ("sti");
> + else
> + asm volatile ("cli");
> +
> + while (resumes < TARGET_RESUMES) {
> + asm volatile("monitor" :: "a" (page), "c" (0), "d" (0));
> + asm volatile("mwait" :: "a" (eax), "c" (ecx));
> + resumes++;
> + }
> +
> + report("resumed from mwait %u times", resumes == TARGET_RESUMES, 
> resumes);
> + return report_summary();
> +}
> -- 
> 2.11.0
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 06:45:02PM +0200, Michael S. Tsirkin wrote:
> On Thu, Mar 16, 2017 at 12:16:13PM -0400, Gabriel L. Somlo wrote:
> > On Thu, Mar 16, 2017 at 04:35:18PM +0100, Radim Krčmář wrote:
> > > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > > > On Thu, Mar 16, 2017 at 04:04:12PM +0200, Michael S. Tsirkin wrote:
> > > > > On Thu, Mar 16, 2017 at 09:24:27AM -0400, Gabriel L. Somlo wrote:
> > > > > > After studying your patch a bit more carefully (sorry, it's crazy
> > > > > > around here right now :) ) I realized you're simply trying to
> > > > > > (selectively) decide when to exit L1 and emulate as NOP vs. when to
> > > > > > just allow L1 to execute MONITOR & MWAIT natively.
> > > > > > 
> > > > > > Is that right ? Because if so, the issues I saw on my MacPro1,1 are
> > > > > > weird and inexplicable, given that allowing L>=1 to run 
> > > > > > MONITOR/MWAIT
> > > > > > natively was one of the options Alex Graf and Rene Rebe used back in
> > > > > > the very early days of OS X on QEMU, at the time I got involved with
> > > > > > that project. Here's part of an out of tree patch against 3.4 which 
> > > > > > did
> > > > > > just that, and worked as far as I remember on *any* MWAIT capable
> > > > > > intel chip I had access to back in 2010:
> > > > > > 
> > > > > > ##
> > > > > > # 99-mwait.patch.kvm-kmod (Rene Rebe ) 2010-04-27
> > > > > > ##
> > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/cpuid.c 
> > > > > > linux-3.4-mac/arch/x86/kvm/cpuid.c
> > > > > > --- linux-3.4/arch/x86/kvm/cpuid.c  2012-05-20 18:29:13.0 
> > > > > > -0400
> > > > > > +++ linux-3.4-mac/arch/x86/kvm/cpuid.c  2012-10-09 
> > > > > > 11:42:59.921215750 -0400
> > > > > > @@ -222,11 +222,11 @@ static int do_cpuid_ent(struct kvm_cpuid
> > > > > > f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> > > > > > F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
> > > > > > 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
> > > > > > /* cpuid 1.ecx */
> > > > > > const u32 kvm_supported_word4_x86_features =
> > > > > > -   F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> > > > > > +   F(XMM3) | F(PCLMULQDQ) | F(MWAIT) /* DTES64, MONITOR */ 
> > > > > > |
> > > > > > 0 /* DS-CPL, VMX, SMX, EST */ |
> > > > > > 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* 
> > > > > > Reserved */ |
> > > > > > F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > > > > > 0 /* Reserved, DCA */ | F(XMM4_1) |
> > > > > > F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/svm.c 
> > > > > > linux-3.4-mac/arch/x86/kvm/svm.c
> > > > > > --- linux-3.4/arch/x86/kvm/svm.c2012-05-20 18:29:13.0 
> > > > > > -0400
> > > > > > +++ linux-3.4-mac/arch/x86/kvm/svm.c2012-10-09 
> > > > > > 11:44:41.598997481 -0400
> > > > > > @@ -1102,12 +1102,10 @@ static void init_vmcb(struct vcpu_svm *s
> > > > > > set_intercept(svm, INTERCEPT_VMSAVE);
> > > > > > set_intercept(svm, INTERCEPT_STGI);
> > > > > > set_intercept(svm, INTERCEPT_CLGI);
> > > > > > set_intercept(svm, INTERCEPT_SKINIT);
> > > > > > set_intercept(svm, INTERCEPT_WBINVD);
> > > > > > -   set_intercept(svm, INTERCEPT_MONITOR);
> > > > > > -   set_intercept(svm, INTERCEPT_MWAIT);
> > > > > > set_intercept(svm, INTERCEPT_XSETBV);
> > > > > >  
> > > > > > control->iopm_base_pa = iopm_base;
> > > > > > control->msrpm_base_pa = __pa(svm->msrpm);
> > > > > > control->int_ctl = V_INTR_MASKING_MASK;
> > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/vmx.c 
> > > > > > linux-3.4-mac/arch/x86/kvm/vmx.c
> > > > > > --- linux-3.4/arch/x86/kvm/vmx.c2012-05-20 18:29:13.0 
> > > > > > -0400
> > > > > > +++ linux-3.4-mac/arch/x86/kvm/vmx.c2012-10-09 
> > > > > > 11:42:59.925215977 -0400
> > > > > > @@ -1938,11 +1938,11 @@ static __init void nested_vmx_setup_ctls
> > > > > > nested_vmx_procbased_ctls_low, 
> > > > > > nested_vmx_procbased_ctls_high);
> > > > > > nested_vmx_procbased_ctls_low = 0;
> > > > > > nested_vmx_procbased_ctls_high &=
> > > > > > CPU_BASED_VIRTUAL_INTR_PENDING | 
> > > > > > CPU_BASED_USE_TSC_OFFSETING |
> > > > > > CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
> > > > > > -   CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
> > > > > > +   CPU_BASED_CR3_LOAD_EXITING |
> > > > > > CPU_BASED_CR3_STORE_EXITING |
> > > > > >  #ifdef CONFIG_X86_64
> > > > > > CPU_BASED_CR8_LOAD_EXITING | 
> > > > > > CPU_BASED_CR8_STORE_EXITING |
> > > > > >  #endif
> > > > > > CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
> > > > > > @@ -2404,12 +2404,10 @@ static __init int setup_vmcs_config(stru
> > > > > > 

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 12:52:32PM -0400, Gabriel L. Somlo wrote:
> On Thu, Mar 16, 2017 at 06:45:02PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Mar 16, 2017 at 12:16:13PM -0400, Gabriel L. Somlo wrote:
> > > On Thu, Mar 16, 2017 at 04:35:18PM +0100, Radim Krčmář wrote:
> > > > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > > > > On Thu, Mar 16, 2017 at 04:04:12PM +0200, Michael S. Tsirkin wrote:
> > > > > > On Thu, Mar 16, 2017 at 09:24:27AM -0400, Gabriel L. Somlo wrote:
> > > > > > > After studying your patch a bit more carefully (sorry, it's crazy
> > > > > > > around here right now :) ) I realized you're simply trying to
> > > > > > > (selectively) decide when to exit L1 and emulate as NOP vs. when 
> > > > > > > to
> > > > > > > just allow L1 to execute MONITOR & MWAIT natively.
> > > > > > > 
> > > > > > > Is that right ? Because if so, the issues I saw on my MacPro1,1 
> > > > > > > are
> > > > > > > weird and inexplicable, given that allowing L>=1 to run 
> > > > > > > MONITOR/MWAIT
> > > > > > > natively was one of the options Alex Graf and Rene Rebe used back 
> > > > > > > in
> > > > > > > the very early days of OS X on QEMU, at the time I got involved 
> > > > > > > with
> > > > > > > that project. Here's part of an out of tree patch against 3.4 
> > > > > > > which did
> > > > > > > just that, and worked as far as I remember on *any* MWAIT capable
> > > > > > > intel chip I had access to back in 2010:
> > > > > > > 
> > > > > > > ##
> > > > > > > # 99-mwait.patch.kvm-kmod (Rene Rebe ) 
> > > > > > > 2010-04-27
> > > > > > > ##
> > > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/cpuid.c 
> > > > > > > linux-3.4-mac/arch/x86/kvm/cpuid.c
> > > > > > > --- linux-3.4/arch/x86/kvm/cpuid.c2012-05-20 
> > > > > > > 18:29:13.0 -0400
> > > > > > > +++ linux-3.4-mac/arch/x86/kvm/cpuid.c2012-10-09 
> > > > > > > 11:42:59.921215750 -0400
> > > > > > > @@ -222,11 +222,11 @@ static int do_cpuid_ent(struct kvm_cpuid
> > > > > > >   f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> > > > > > >   F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
> > > > > > >   0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
> > > > > > >   /* cpuid 1.ecx */
> > > > > > >   const u32 kvm_supported_word4_x86_features =
> > > > > > > - F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> > > > > > > + F(XMM3) | F(PCLMULQDQ) | F(MWAIT) /* DTES64, MONITOR */ 
> > > > > > > |
> > > > > > >   0 /* DS-CPL, VMX, SMX, EST */ |
> > > > > > >   0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* 
> > > > > > > Reserved */ |
> > > > > > >   F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > > > > > >   0 /* Reserved, DCA */ | F(XMM4_1) |
> > > > > > >   F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> > > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/svm.c 
> > > > > > > linux-3.4-mac/arch/x86/kvm/svm.c
> > > > > > > --- linux-3.4/arch/x86/kvm/svm.c  2012-05-20 18:29:13.0 
> > > > > > > -0400
> > > > > > > +++ linux-3.4-mac/arch/x86/kvm/svm.c  2012-10-09 
> > > > > > > 11:44:41.598997481 -0400
> > > > > > > @@ -1102,12 +1102,10 @@ static void init_vmcb(struct vcpu_svm *s
> > > > > > >   set_intercept(svm, INTERCEPT_VMSAVE);
> > > > > > >   set_intercept(svm, INTERCEPT_STGI);
> > > > > > >   set_intercept(svm, INTERCEPT_CLGI);
> > > > > > >   set_intercept(svm, INTERCEPT_SKINIT);
> > > > > > >   set_intercept(svm, INTERCEPT_WBINVD);
> > > > > > > - set_intercept(svm, INTERCEPT_MONITOR);
> > > > > > > - set_intercept(svm, INTERCEPT_MWAIT);
> > > > > > >   set_intercept(svm, INTERCEPT_XSETBV);
> > > > > > >  
> > > > > > >   control->iopm_base_pa = iopm_base;
> > > > > > >   control->msrpm_base_pa = __pa(svm->msrpm);
> > > > > > >   control->int_ctl = V_INTR_MASKING_MASK;
> > > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/vmx.c 
> > > > > > > linux-3.4-mac/arch/x86/kvm/vmx.c
> > > > > > > --- linux-3.4/arch/x86/kvm/vmx.c  2012-05-20 18:29:13.0 
> > > > > > > -0400
> > > > > > > +++ linux-3.4-mac/arch/x86/kvm/vmx.c  2012-10-09 
> > > > > > > 11:42:59.925215977 -0400
> > > > > > > @@ -1938,11 +1938,11 @@ static __init void nested_vmx_setup_ctls
> > > > > > >   nested_vmx_procbased_ctls_low, 
> > > > > > > nested_vmx_procbased_ctls_high);
> > > > > > >   nested_vmx_procbased_ctls_low = 0;
> > > > > > >   nested_vmx_procbased_ctls_high &=
> > > > > > >   CPU_BASED_VIRTUAL_INTR_PENDING | 
> > > > > > > CPU_BASED_USE_TSC_OFFSETING |
> > > > > > >   CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
> > > > > > > - CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
> > > > > > > + CPU_BASED_CR3_LOAD_EXITING |
> > > > > > >   CPU_BASED_CR3_STORE_EXITING |
> > > > > > >  #ifdef CONFIG_X86_64
> > > > > > >   CPU_BASED_CR8_LOAD_EXIT

Re: [PATCH v3 3/6] documentation: media: Add documentation for new RGB and YUV bus formats

2017-03-16 Thread Archit Taneja



On 3/7/2017 10:12 PM, Neil Armstrong wrote:

Add documentation for added Bus Formats to describe RGB and YUS formats used


s/YUS/YUV


as input to the Synopsys DesignWare HDMI TX Controller.

Signed-off-by: Neil Armstrong 
---
 Documentation/media/uapi/v4l/subdev-formats.rst | 4992 ++-
 1 file changed, 3963 insertions(+), 1029 deletions(-)


Do we know if there is a better way to add more columns without
adding so many lines?

If not, one option could be to create a separate tables for
48 bit RGB formats, 48 bit YUV formats etc.



Thanks,
Archit

--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Michael S. Tsirkin
On Thu, Mar 16, 2017 at 12:54:50PM -0400, Gabriel L. Somlo wrote:
> On Thu, Mar 16, 2017 at 12:52:32PM -0400, Gabriel L. Somlo wrote:
> > On Thu, Mar 16, 2017 at 06:45:02PM +0200, Michael S. Tsirkin wrote:
> > > On Thu, Mar 16, 2017 at 12:16:13PM -0400, Gabriel L. Somlo wrote:
> > > > On Thu, Mar 16, 2017 at 04:35:18PM +0100, Radim Krčmář wrote:
> > > > > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > > > > > On Thu, Mar 16, 2017 at 04:04:12PM +0200, Michael S. Tsirkin wrote:
> > > > > > > On Thu, Mar 16, 2017 at 09:24:27AM -0400, Gabriel L. Somlo wrote:
> > > > > > > > After studying your patch a bit more carefully (sorry, it's 
> > > > > > > > crazy
> > > > > > > > around here right now :) ) I realized you're simply trying to
> > > > > > > > (selectively) decide when to exit L1 and emulate as NOP vs. 
> > > > > > > > when to
> > > > > > > > just allow L1 to execute MONITOR & MWAIT natively.
> > > > > > > > 
> > > > > > > > Is that right ? Because if so, the issues I saw on my MacPro1,1 
> > > > > > > > are
> > > > > > > > weird and inexplicable, given that allowing L>=1 to run 
> > > > > > > > MONITOR/MWAIT
> > > > > > > > natively was one of the options Alex Graf and Rene Rebe used 
> > > > > > > > back in
> > > > > > > > the very early days of OS X on QEMU, at the time I got involved 
> > > > > > > > with
> > > > > > > > that project. Here's part of an out of tree patch against 3.4 
> > > > > > > > which did
> > > > > > > > just that, and worked as far as I remember on *any* MWAIT 
> > > > > > > > capable
> > > > > > > > intel chip I had access to back in 2010:
> > > > > > > > 
> > > > > > > > ##
> > > > > > > > # 99-mwait.patch.kvm-kmod (Rene Rebe ) 
> > > > > > > > 2010-04-27
> > > > > > > > ##
> > > > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/cpuid.c 
> > > > > > > > linux-3.4-mac/arch/x86/kvm/cpuid.c
> > > > > > > > --- linux-3.4/arch/x86/kvm/cpuid.c  2012-05-20 
> > > > > > > > 18:29:13.0 -0400
> > > > > > > > +++ linux-3.4-mac/arch/x86/kvm/cpuid.c  2012-10-09 
> > > > > > > > 11:42:59.921215750 -0400
> > > > > > > > @@ -222,11 +222,11 @@ static int do_cpuid_ent(struct kvm_cpuid
> > > > > > > > f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> > > > > > > > F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
> > > > > > > > 0 /* Reserved */ | f_lm | F(3DNOWEXT) | 
> > > > > > > > F(3DNOW);
> > > > > > > > /* cpuid 1.ecx */
> > > > > > > > const u32 kvm_supported_word4_x86_features =
> > > > > > > > -   F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR 
> > > > > > > > */ |
> > > > > > > > +   F(XMM3) | F(PCLMULQDQ) | F(MWAIT) /* DTES64, 
> > > > > > > > MONITOR */ |
> > > > > > > > 0 /* DS-CPL, VMX, SMX, EST */ |
> > > > > > > > 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* 
> > > > > > > > Reserved */ |
> > > > > > > > F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > > > > > > > 0 /* Reserved, DCA */ | F(XMM4_1) |
> > > > > > > > F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> > > > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/svm.c 
> > > > > > > > linux-3.4-mac/arch/x86/kvm/svm.c
> > > > > > > > --- linux-3.4/arch/x86/kvm/svm.c2012-05-20 
> > > > > > > > 18:29:13.0 -0400
> > > > > > > > +++ linux-3.4-mac/arch/x86/kvm/svm.c2012-10-09 
> > > > > > > > 11:44:41.598997481 -0400
> > > > > > > > @@ -1102,12 +1102,10 @@ static void init_vmcb(struct vcpu_svm *s
> > > > > > > > set_intercept(svm, INTERCEPT_VMSAVE);
> > > > > > > > set_intercept(svm, INTERCEPT_STGI);
> > > > > > > > set_intercept(svm, INTERCEPT_CLGI);
> > > > > > > > set_intercept(svm, INTERCEPT_SKINIT);
> > > > > > > > set_intercept(svm, INTERCEPT_WBINVD);
> > > > > > > > -   set_intercept(svm, INTERCEPT_MONITOR);
> > > > > > > > -   set_intercept(svm, INTERCEPT_MWAIT);
> > > > > > > > set_intercept(svm, INTERCEPT_XSETBV);
> > > > > > > >  
> > > > > > > > control->iopm_base_pa = iopm_base;
> > > > > > > > control->msrpm_base_pa = __pa(svm->msrpm);
> > > > > > > > control->int_ctl = V_INTR_MASKING_MASK;
> > > > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/vmx.c 
> > > > > > > > linux-3.4-mac/arch/x86/kvm/vmx.c
> > > > > > > > --- linux-3.4/arch/x86/kvm/vmx.c2012-05-20 
> > > > > > > > 18:29:13.0 -0400
> > > > > > > > +++ linux-3.4-mac/arch/x86/kvm/vmx.c2012-10-09 
> > > > > > > > 11:42:59.925215977 -0400
> > > > > > > > @@ -1938,11 +1938,11 @@ static __init void nested_vmx_setup_ctls
> > > > > > > > nested_vmx_procbased_ctls_low, 
> > > > > > > > nested_vmx_procbased_ctls_high);
> > > > > > > > nested_vmx_procbased_ctls_low = 0;
> > > > > > > > nested_vmx_procbased_ctl

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Radim Krčmář
2017-03-16 12:47-0400, Gabriel L. Somlo:
> On Thu, Mar 16, 2017 at 05:01:58PM +0100, Radim Krčmář wrote:
> > 2017-03-16 16:35+0100, Radim Krčmář:
> > > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > >> The intel manual said the same thing back in 2010 as well. However,
> > >> regardless of how any flags were set, interrupt-window exiting or not,
> > >> "normal" L1 MWAIT behavior was that it woke up immediately regardless.
> > >> Remember, never going to sleep is still correct ("normal" ?) behavior
> > >> per the ISA definition of MWAIT :)
> > > 
> > > I'll write a simple kvm-unit-test to better understand why it is broken
> > > for you ...
> > 
> > Please get git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git
> > 
> > and try this, thanks!
> > 
> > ---8<---
> > x86/mwait: crappy test
> > 
> > `./configure && make` to build it, then follow the comment in code to
> > try few cases.
> 
> kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1'
> timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
> pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
> stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 1
> enabling apic
> PASS: resumed from mwait 1 times
> SUMMARY: 1 tests
> 
> real0m10.564s
> user0m10.339s
> sys 0m0.225s
> 
> 
> and
> 
> kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0'
> timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
> pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
> stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 0
> enabling apic
> PASS: resumed from mwait 1 times
> SUMMARY: 1 tests
> 
> real0m0.746s
> user0m0.555s
> sys 0m0.200s
> 
> Both of these with Michael's v5 patch applied, on the MacPro1,1.
> 
> Similar behavior (0 1 1 takes 10 seconds, 0 1 0 returns immediately)
> on the macbook air.
> 
> If I revert to the original (nop-emulated MWAIT) kvm source, I get
> both versions to return immediately.

Those look normal ... maybe MWAIT just ignores writes to the monitored
area?

Please apply the patch below and following and try:

  time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1' -smp 2
  time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 0 1' -smp 2
  time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 0 0' -smp 2

All of them should take rougly the same time as the NOP one,

  time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0' -smp 2

Thanks.

---8<---
diff --git a/x86/mwait.c b/x86/mwait.c
index c21dab5cc97d..ca38e7223596 100644
--- a/x86/mwait.c
+++ b/x86/mwait.c
@@ -1,7 +1,9 @@
 #include "vm.h"
+#include "smp.h"
 
 #define TARGET_RESUMES 1
 volatile unsigned page[4096 / 4];
+volatile unsigned resumes;
 
 /*
  * Execute
@@ -18,19 +20,39 @@ volatile unsigned page[4096 / 4];
  * Getting killed by the TIMEOUT most likely means that you have different HZ,
  * but could also be a bug ...
  */
+void writer(void *null)
+{
+   int i;
+   unsigned old_resumes = 0, new_resumes;
+
+   for (i = 0; i < TARGET_RESUMES; i++) {
+   (*page)++;
+
+   while (old_resumes == (new_resumes = resumes))
+   pause();
+   old_resumes = new_resumes;
+   }
+}
+
 int main(int argc, char **argv)
 {
uint32_t eax = atol(argv[1]);
uint32_t ecx = atol(argv[2]);
bool sti = atol(argv[3]);
-   unsigned resumes = 0;
+   bool smp;
+
+   smp_init();
+   smp = cpu_count() > 1;
+
+   if (smp)
+   on_cpu_async(1, writer, NULL);
 
if (sti)
asm volatile ("sti");
else
asm volatile ("cli");
 
-   while (resumes < TARGET_RESUMES) {
+   while ((smp ? *page : resumes) < TARGET_RESUMES) {
asm volatile("monitor" :: "a" (page), "c" (0), "d" (0));
asm volatile("mwait" :: "a" (eax), "c" (ecx));
resumes++;
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Michael S. Tsirkin
On Thu, Mar 16, 2017 at 12:47:50PM -0400, Gabriel L. Somlo wrote:
> On Thu, Mar 16, 2017 at 05:01:58PM +0100, Radim Krčmář wrote:
> > 2017-03-16 16:35+0100, Radim Krčmář:
> > > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > >> The intel manual said the same thing back in 2010 as well. However,
> > >> regardless of how any flags were set, interrupt-window exiting or not,
> > >> "normal" L1 MWAIT behavior was that it woke up immediately regardless.
> > >> Remember, never going to sleep is still correct ("normal" ?) behavior
> > >> per the ISA definition of MWAIT :)
> > > 
> > > I'll write a simple kvm-unit-test to better understand why it is broken
> > > for you ...
> > 
> > Please get git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git
> > 
> > and try this, thanks!
> > 
> > ---8<---
> > x86/mwait: crappy test
> > 
> > `./configure && make` to build it, then follow the comment in code to
> > try few cases.
> 
> kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1'
> timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
> pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
> stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 1
> enabling apic
> PASS: resumed from mwait 1 times
> SUMMARY: 1 tests
> 
> real0m10.564s
> user0m10.339s
> sys 0m0.225s
> 
> 
> and
> 
> kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0'
> timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
> pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
> stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 0
> enabling apic
> PASS: resumed from mwait 1 times
> SUMMARY: 1 tests
> 
> real0m0.746s
> user0m0.555s
> sys 0m0.200s
> 
> Both of these with Michael's v5 patch applied, on the MacPro1,1.

Would it make sense to try to set ECX to 0? 0 0 1 and 0 0 0.


> Similar behavior (0 1 1 takes 10 seconds, 0 1 0 returns immediately)
> on the macbook air.
> 
> If I revert to the original (nop-emulated MWAIT) kvm source, I get
> both versions to return immediately.
> 
> HTH,
> --Gabriel
> 
> 
> 
> > 
> > ---
> >  x86/Makefile.common |  1 +
> >  x86/mwait.c | 41 +
> >  2 files changed, 42 insertions(+)
> >  create mode 100644 x86/mwait.c
> > 
> > diff --git a/x86/Makefile.common b/x86/Makefile.common
> > index 1dad18ba26e1..1e708a6acd39 100644
> > --- a/x86/Makefile.common
> > +++ b/x86/Makefile.common
> > @@ -46,6 +46,7 @@ tests-common = $(TEST_DIR)/vmexit.flat 
> > $(TEST_DIR)/tsc.flat \
> > $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \
> > $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \
> > $(TEST_DIR)/hyperv_synic.flat 
> > $(TEST_DIR)/hyperv_stimer.flat \
> > +   $(TEST_DIR)/mwait.flat \
> >  
> >  ifdef API
> >  tests-common += api/api-sample
> > diff --git a/x86/mwait.c b/x86/mwait.c
> > new file mode 100644
> > index ..c21dab5cc97d
> > --- /dev/null
> > +++ b/x86/mwait.c
> > @@ -0,0 +1,41 @@
> > +#include "vm.h"
> > +
> > +#define TARGET_RESUMES 1
> > +volatile unsigned page[4096 / 4];
> > +
> > +/*
> > + * Execute
> > + *   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1'
> > + * (first two arguments are eax and ecx for MWAIT, the third is FLAGS.IF 
> > bit)
> > + * I assume you have 1000 Hz scheduler, so the test should take about 10
> > + * seconds to run if mwait works (host timer interrupts will kick mwait).
> > + *
> > + * If you get far less, then mwait is just nop, as in the case of
> > + *
> > + *   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0'
> > + *
> > + * All other combinations of arguments should take 10 seconds.
> > + * Getting killed by the TIMEOUT most likely means that you have different 
> > HZ,
> > + * but could also be a bug ...
> > + */
> > +int main(int argc, char **argv)
> > +{
> > +   uint32_t eax = atol(argv[1]);
> > +   uint32_t ecx = atol(argv[2]);
> > +   bool sti = atol(argv[3]);
> > +   unsigned resumes = 0;
> > +
> > +   if (sti)
> > +   asm volatile ("sti");
> > +   else
> > +   asm volatile ("cli");
> > +
> > +   while (resumes < TARGET_RESUMES) {
> > +   asm volatile("monitor" :: "a" (page), "c" (0), "d" (0));
> > +   asm volatile("mwait" :: "a" (eax), "c" (ecx));
> > +   resumes++;
> > +   }
> > +
> > +   report("resumed from mwait %u times", resumes == TARGET_RESUMES, 
> > resumes);
> > +   return report_summary();
> > +}
> > -- 
> > 2.11.0
> > 
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Radim Krčmář
2017-03-16 19:14+0200, Michael S. Tsirkin:
> On Thu, Mar 16, 2017 at 12:54:50PM -0400, Gabriel L. Somlo wrote:
> > On Thu, Mar 16, 2017 at 12:52:32PM -0400, Gabriel L. Somlo wrote:
> > > On Thu, Mar 16, 2017 at 06:45:02PM +0200, Michael S. Tsirkin wrote:
> > > > On Thu, Mar 16, 2017 at 12:16:13PM -0400, Gabriel L. Somlo wrote:
> > > > > On Thu, Mar 16, 2017 at 04:35:18PM +0100, Radim Krčmář wrote:
> > > > > > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > > > > > > On Thu, Mar 16, 2017 at 04:04:12PM +0200, Michael S. Tsirkin 
> > > > > > > wrote:
> > > > > > > > On Thu, Mar 16, 2017 at 09:24:27AM -0400, Gabriel L. Somlo 
> > > > > > > > wrote:
> > > > > > > > > After studying your patch a bit more carefully (sorry, it's 
> > > > > > > > > crazy
> > > > > > > > > around here right now :) ) I realized you're simply trying to
> > > > > > > > > (selectively) decide when to exit L1 and emulate as NOP vs. 
> > > > > > > > > when to
> > > > > > > > > just allow L1 to execute MONITOR & MWAIT natively.
> > > > > > > > > 
> > > > > > > > > Is that right ? Because if so, the issues I saw on my 
> > > > > > > > > MacPro1,1 are
> > > > > > > > > weird and inexplicable, given that allowing L>=1 to run 
> > > > > > > > > MONITOR/MWAIT
> > > > > > > > > natively was one of the options Alex Graf and Rene Rebe used 
> > > > > > > > > back in
> > > > > > > > > the very early days of OS X on QEMU, at the time I got 
> > > > > > > > > involved with
> > > > > > > > > that project. Here's part of an out of tree patch against 3.4 
> > > > > > > > > which did
> > > > > > > > > just that, and worked as far as I remember on *any* MWAIT 
> > > > > > > > > capable
> > > > > > > > > intel chip I had access to back in 2010:
> > > > > > > > > 
> > > > > > > > > ##
> > > > > > > > > # 99-mwait.patch.kvm-kmod (Rene Rebe ) 
> > > > > > > > > 2010-04-27
> > > > > > > > > ##
> > > > > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/cpuid.c 
> > > > > > > > > linux-3.4-mac/arch/x86/kvm/cpuid.c
> > > > > > > > > --- linux-3.4/arch/x86/kvm/cpuid.c2012-05-20 
> > > > > > > > > 18:29:13.0 -0400
> > > > > > > > > +++ linux-3.4-mac/arch/x86/kvm/cpuid.c2012-10-09 
> > > > > > > > > 11:42:59.921215750 -0400
> > > > > > > > > @@ -222,11 +222,11 @@ static int do_cpuid_ent(struct kvm_cpuid
> > > > > > > > >   f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> > > > > > > > >   F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
> > > > > > > > >   0 /* Reserved */ | f_lm | F(3DNOWEXT) | 
> > > > > > > > > F(3DNOW);
> > > > > > > > >   /* cpuid 1.ecx */
> > > > > > > > >   const u32 kvm_supported_word4_x86_features =
> > > > > > > > > - F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR 
> > > > > > > > > */ |
> > > > > > > > > + F(XMM3) | F(PCLMULQDQ) | F(MWAIT) /* DTES64, 
> > > > > > > > > MONITOR */ |
> > > > > > > > >   0 /* DS-CPL, VMX, SMX, EST */ |
> > > > > > > > >   0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* 
> > > > > > > > > Reserved */ |
> > > > > > > > >   F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > > > > > > > >   0 /* Reserved, DCA */ | F(XMM4_1) |
> > > > > > > > >   F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> > > > > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/svm.c 
> > > > > > > > > linux-3.4-mac/arch/x86/kvm/svm.c
> > > > > > > > > --- linux-3.4/arch/x86/kvm/svm.c  2012-05-20 
> > > > > > > > > 18:29:13.0 -0400
> > > > > > > > > +++ linux-3.4-mac/arch/x86/kvm/svm.c  2012-10-09 
> > > > > > > > > 11:44:41.598997481 -0400
> > > > > > > > > @@ -1102,12 +1102,10 @@ static void init_vmcb(struct vcpu_svm 
> > > > > > > > > *s
> > > > > > > > >   set_intercept(svm, INTERCEPT_VMSAVE);
> > > > > > > > >   set_intercept(svm, INTERCEPT_STGI);
> > > > > > > > >   set_intercept(svm, INTERCEPT_CLGI);
> > > > > > > > >   set_intercept(svm, INTERCEPT_SKINIT);
> > > > > > > > >   set_intercept(svm, INTERCEPT_WBINVD);
> > > > > > > > > - set_intercept(svm, INTERCEPT_MONITOR);
> > > > > > > > > - set_intercept(svm, INTERCEPT_MWAIT);
> > > > > > > > >   set_intercept(svm, INTERCEPT_XSETBV);
> > > > > > > > >  
> > > > > > > > >   control->iopm_base_pa = iopm_base;
> > > > > > > > >   control->msrpm_base_pa = __pa(svm->msrpm);
> > > > > > > > >   control->int_ctl = V_INTR_MASKING_MASK;
> > > > > > > > > diff -pNarU5 linux-3.4/arch/x86/kvm/vmx.c 
> > > > > > > > > linux-3.4-mac/arch/x86/kvm/vmx.c
> > > > > > > > > --- linux-3.4/arch/x86/kvm/vmx.c  2012-05-20 
> > > > > > > > > 18:29:13.0 -0400
> > > > > > > > > +++ linux-3.4-mac/arch/x86/kvm/vmx.c  2012-10-09 
> > > > > > > > > 11:42:59.925215977 -0400
> > > > > > > > > @@ -1938,11 +1938,11 @@ static __init void 
> > > > > > > > > nested_vmx_setu

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 06:22:44PM +0100, Radim Krčmář wrote:
> 2017-03-16 12:47-0400, Gabriel L. Somlo:
> > On Thu, Mar 16, 2017 at 05:01:58PM +0100, Radim Krčmář wrote:
> > > 2017-03-16 16:35+0100, Radim Krčmář:
> > > > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > > >> The intel manual said the same thing back in 2010 as well. However,
> > > >> regardless of how any flags were set, interrupt-window exiting or not,
> > > >> "normal" L1 MWAIT behavior was that it woke up immediately regardless.
> > > >> Remember, never going to sleep is still correct ("normal" ?) behavior
> > > >> per the ISA definition of MWAIT :)
> > > > 
> > > > I'll write a simple kvm-unit-test to better understand why it is broken
> > > > for you ...
> > > 
> > > Please get git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git
> > > 
> > > and try this, thanks!
> > > 
> > > ---8<---
> > > x86/mwait: crappy test
> > > 
> > > `./configure && make` to build it, then follow the comment in code to
> > > try few cases.
> > 
> > kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1'
> > timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
> > pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
> > stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 1
> > enabling apic
> > PASS: resumed from mwait 1 times
> > SUMMARY: 1 tests
> > 
> > real0m10.564s
> > user0m10.339s
> > sys 0m0.225s
> > 
> > 
> > and
> > 
> > kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0'
> > timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
> > pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
> > stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 0
> > enabling apic
> > PASS: resumed from mwait 1 times
> > SUMMARY: 1 tests
> > 
> > real0m0.746s
> > user0m0.555s
> > sys 0m0.200s
> > 
> > Both of these with Michael's v5 patch applied, on the MacPro1,1.
> > 
> > Similar behavior (0 1 1 takes 10 seconds, 0 1 0 returns immediately)
> > on the macbook air.
> > 
> > If I revert to the original (nop-emulated MWAIT) kvm source, I get
> > both versions to return immediately.
> 
> Those look normal ... maybe MWAIT just ignores writes to the monitored
> area?
> 
> Please apply the patch below and following and try:
> 
>   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1' -smp 2

timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 1 -smp 2
enabling apic
enabling apic
PASS: resumed from mwait 1 times
SUMMARY: 1 tests

real0m0.758s
user0m0.557s
sys 0m0.220s

>   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 0 1' -smp 2

timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
stdio -device pci-testdev -kernel x86/mwait.flat -append 0 0 1 -smp 2
enabling apic
enabling apic
PASS: resumed from mwait 1 times
SUMMARY: 1 tests

real0m0.748s
user0m0.550s
sys 0m0.210s

>   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 0 0' -smp 2

timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
stdio -device pci-testdev -kernel x86/mwait.flat -append 0 0 0 -smp 2
enabling apic
enabling apic
PASS: resumed from mwait 1 times
SUMMARY: 1 tests

real0m0.745s
user0m0.558s
sys 0m0.203s

> 
> All of them should take rougly the same time as the NOP one,
> 
>   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0' -smp 2

They all *did* return fast, as you expected.

> ---8<---
> diff --git a/x86/mwait.c b/x86/mwait.c
> index c21dab5cc97d..ca38e7223596 100644
> --- a/x86/mwait.c
> +++ b/x86/mwait.c
> @@ -1,7 +1,9 @@
>  #include "vm.h"
> +#include "smp.h"
>  
>  #define TARGET_RESUMES 1
>  volatile unsigned page[4096 / 4];
> +volatile unsigned resumes;
>  
>  /*
>   * Execute
> @@ -18,19 +20,39 @@ volatile unsigned page[4096 / 4];
>   * Getting killed by the TIMEOUT most likely means that you have different 
> HZ,
>   * but could also be a bug ...
>   */
> +void writer(void *null)
> +{
> + int i;
> + unsigned old_resumes = 0, new_resumes;
> +
> + for (i = 0; i < TARGET_RESUMES; i++) {
> + (*page)++;
> +
> + while (old_resumes == (new_resumes = resumes))
> + pause();
> + old_resumes = new_resumes;
> + }
> +}
> +
>  int main(int argc, char **argv)
>  {
>   uint32_t eax = atol(argv[1]);
>   uint32_t ecx = atol(argv[2]);
>   bool sti = atol(argv[3]);
> - unsigned resumes = 0;
> + bool smp;
> +
> + smp_init();
> + smp = cpu_count() > 1;
> +
> + if (smp)
> + on_cpu_async(1, writer, NULL);
>  
>   if (sti)
>

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 07:27:34PM +0200, Michael S. Tsirkin wrote:
> On Thu, Mar 16, 2017 at 12:47:50PM -0400, Gabriel L. Somlo wrote:
> > On Thu, Mar 16, 2017 at 05:01:58PM +0100, Radim Krčmář wrote:
> > > 2017-03-16 16:35+0100, Radim Krčmář:
> > > > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > > >> The intel manual said the same thing back in 2010 as well. However,
> > > >> regardless of how any flags were set, interrupt-window exiting or not,
> > > >> "normal" L1 MWAIT behavior was that it woke up immediately regardless.
> > > >> Remember, never going to sleep is still correct ("normal" ?) behavior
> > > >> per the ISA definition of MWAIT :)
> > > > 
> > > > I'll write a simple kvm-unit-test to better understand why it is broken
> > > > for you ...
> > > 
> > > Please get git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git
> > > 
> > > and try this, thanks!
> > > 
> > > ---8<---
> > > x86/mwait: crappy test
> > > 
> > > `./configure && make` to build it, then follow the comment in code to
> > > try few cases.
> > 
> > kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1'
> > timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
> > pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
> > stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 1
> > enabling apic
> > PASS: resumed from mwait 1 times
> > SUMMARY: 1 tests
> > 
> > real0m10.564s
> > user0m10.339s
> > sys 0m0.225s
> > 
> > 
> > and
> > 
> > kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0'
> > timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
> > pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
> > stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 0
> > enabling apic
> > PASS: resumed from mwait 1 times
> > SUMMARY: 1 tests
> > 
> > real0m0.746s
> > user0m0.555s
> > sys 0m0.200s
> > 
> > Both of these with Michael's v5 patch applied, on the MacPro1,1.
> 
> Would it make sense to try to set ECX to 0? 0 0 1 and 0 0 0.

$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 0 1'
timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
stdio -device pci-testdev -kernel x86/mwait.flat -append 0 0 1
enabling apic
PASS: resumed from mwait 1 times
SUMMARY: 1 tests

real0m10.567s
user0m10.367s
sys 0m0.210s


$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 0 0'
timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device 
pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial 
stdio -device pci-testdev -kernel x86/mwait.flat -append 0 0 0
enabling apic
PASS: resumed from mwait 1 times
SUMMARY: 1 tests

real0m10.549s
user0m10.352s
sys 0m0.206s

Both took 10 seconds.
 
> 
> > Similar behavior (0 1 1 takes 10 seconds, 0 1 0 returns immediately)
> > on the macbook air.
> > 
> > If I revert to the original (nop-emulated MWAIT) kvm source, I get
> > both versions to return immediately.
> > 
> > HTH,
> > --Gabriel
> > 
> > 
> > 
> > > 
> > > ---
> > >  x86/Makefile.common |  1 +
> > >  x86/mwait.c | 41 +
> > >  2 files changed, 42 insertions(+)
> > >  create mode 100644 x86/mwait.c
> > > 
> > > diff --git a/x86/Makefile.common b/x86/Makefile.common
> > > index 1dad18ba26e1..1e708a6acd39 100644
> > > --- a/x86/Makefile.common
> > > +++ b/x86/Makefile.common
> > > @@ -46,6 +46,7 @@ tests-common = $(TEST_DIR)/vmexit.flat 
> > > $(TEST_DIR)/tsc.flat \
> > > $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \
> > > $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \
> > > $(TEST_DIR)/hyperv_synic.flat 
> > > $(TEST_DIR)/hyperv_stimer.flat \
> > > +   $(TEST_DIR)/mwait.flat \
> > >  
> > >  ifdef API
> > >  tests-common += api/api-sample
> > > diff --git a/x86/mwait.c b/x86/mwait.c
> > > new file mode 100644
> > > index ..c21dab5cc97d
> > > --- /dev/null
> > > +++ b/x86/mwait.c
> > > @@ -0,0 +1,41 @@
> > > +#include "vm.h"
> > > +
> > > +#define TARGET_RESUMES 1
> > > +volatile unsigned page[4096 / 4];
> > > +
> > > +/*
> > > + * Execute
> > > + *   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1'
> > > + * (first two arguments are eax and ecx for MWAIT, the third is FLAGS.IF 
> > > bit)
> > > + * I assume you have 1000 Hz scheduler, so the test should take about 10
> > > + * seconds to run if mwait works (host timer interrupts will kick mwait).
> > > + *
> > > + * If you get far less, then mwait is just nop, as in the case of
> > > + *
> > > + *   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0'
> > > + *
> > > + * All other combinations of arguments should take 10 seconds.
> > > + * Getting killed by the TIMEOUT most likely means that you have 
> > > different HZ,
> > > + * but could also

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Michael S. Tsirkin
Let's take a step back and try to figure out how is
mwait called. How about dumping code of VCPUs
around mwait?  gdb disa command will do this.

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 08:29:32PM +0200, Michael S. Tsirkin wrote:
> Let's take a step back and try to figure out how is
> mwait called. How about dumping code of VCPUs
> around mwait?  gdb disa command will do this.

Started guest with '-s', tried to attach from gdb with
"target remote localhost:1234", got
"remote 'g' packet reply is too long: "

Tried typing 'cont' in the qemu monitor, got os x to crash:

panic (cpu 1 caller 0xff7f813ff488): pmLock: waited too long, held
by 0xff7f813eff65

Hmm, maybe that's where it keeps its monitor/mwait idle loop.
Restarted the guest, tried this from monitor:

dump-guest-memory foobar 0xff7f813e 0x2

Got "'dump-guest-memory' has failed: integer is for 32-bit values"

Hmmm... I have no idea what I'm doing anymore at this point... :)

--G
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Michael S. Tsirkin
On Thu, Mar 16, 2017 at 03:24:41PM -0400, Gabriel L. Somlo wrote:
> On Thu, Mar 16, 2017 at 08:29:32PM +0200, Michael S. Tsirkin wrote:
> > Let's take a step back and try to figure out how is
> > mwait called. How about dumping code of VCPUs
> > around mwait?  gdb disa command will do this.
> 
> Started guest with '-s', tried to attach from gdb with
> "target remote localhost:1234", got
> "remote 'g' packet reply is too long: "

Try

set arch x86-64:x86-64


> Tried typing 'cont' in the qemu monitor, got os x to crash:
> 
> panic (cpu 1 caller 0xff7f813ff488): pmLock: waited too long, held
> by 0xff7f813eff65
> 
> Hmm, maybe that's where it keeps its monitor/mwait idle loop.
> Restarted the guest, tried this from monitor:
> 
>   dump-guest-memory foobar 0xff7f813e 0x2
> 
> Got "'dump-guest-memory' has failed: integer is for 32-bit values"
> 
> Hmmm... I have no idea what I'm doing anymore at this point... :)
> 
> --G

I think 0xff7f813ff488 is a PC.

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 09:27:56PM +0200, Michael S. Tsirkin wrote:
> On Thu, Mar 16, 2017 at 03:24:41PM -0400, Gabriel L. Somlo wrote:
> > On Thu, Mar 16, 2017 at 08:29:32PM +0200, Michael S. Tsirkin wrote:
> > > Let's take a step back and try to figure out how is
> > > mwait called. How about dumping code of VCPUs
> > > around mwait?  gdb disa command will do this.
> > 
> > Started guest with '-s', tried to attach from gdb with
> > "target remote localhost:1234", got
> > "remote 'g' packet reply is too long: "
> 
> Try
> 
> set arch x86-64:x86-64

'set architecture i386:x86-64:intel' is what worked for me;

Been rooting around for a while, can't find mwait or monitor :(

Guess I'll have to recompile KVM to actually issue an invalid opcode,
so OS X will print a panic message with the exact address :)

Stay tuned...
 
> 
> > Tried typing 'cont' in the qemu monitor, got os x to crash:
> > 
> > panic (cpu 1 caller 0xff7f813ff488): pmLock: waited too long, held
> > by 0xff7f813eff65
> > 
> > Hmm, maybe that's where it keeps its monitor/mwait idle loop.
> > Restarted the guest, tried this from monitor:
> > 
> > dump-guest-memory foobar 0xff7f813e 0x2
> > 
> > Got "'dump-guest-memory' has failed: integer is for 32-bit values"
> > 
> > Hmmm... I have no idea what I'm doing anymore at this point... :)
> > 
> > --G
> 
> I think 0xff7f813ff488 is a PC.
> 
> -- 
> MST
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Gabriel L. Somlo
On Thu, Mar 16, 2017 at 04:17:11PM -0400, Gabriel L. Somlo wrote:
> On Thu, Mar 16, 2017 at 09:27:56PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Mar 16, 2017 at 03:24:41PM -0400, Gabriel L. Somlo wrote:
> > > On Thu, Mar 16, 2017 at 08:29:32PM +0200, Michael S. Tsirkin wrote:
> > > > Let's take a step back and try to figure out how is
> > > > mwait called. How about dumping code of VCPUs
> > > > around mwait?  gdb disa command will do this.
> > > 
> > > Started guest with '-s', tried to attach from gdb with
> > > "target remote localhost:1234", got
> > > "remote 'g' packet reply is too long: "
> > 
> > Try
> > 
> > set arch x86-64:x86-64
> 
> 'set architecture i386:x86-64:intel' is what worked for me;
> 
> Been rooting around for a while, can't find mwait or monitor :(
> 
> Guess I'll have to recompile KVM to actually issue an invalid opcode,
> so OS X will print a panic message with the exact address :)
> 
> Stay tuned...

OK, so I found a few instances. The one closest to where a random
interrupt from gdb landed, was this one:

...
   0xff7f813ff379:  mov0x90(%r15),%rax
   0xff7f813ff380:  mov0x18(%rax),%rsi
   0xff7f813ff384:  xor%ecx,%ecx
   0xff7f813ff386:  mov%rsi,%rax
   0xff7f813ff389:  xor%edx,%edx
   0xff7f813ff38b:  monitor %rax,%rcx,%rdx
   0xff7f813ff38e:  test   %r14,%r14
   0xff7f813ff391:  je 0xff7f813ff3ad
   0xff7f813ff393:  movq   $0x0,0x8(%r14)
   0xff7f813ff39b:  movl   $0x0,(%r14)
   0xff7f813ff3a2:  test   %ebx,%ebx
   0xff7f813ff3a4:  je 0xff7f813ff3b2
   0xff7f813ff3a6:  mfence 
   0xff7f813ff3a9:  wbinvd
   0xff7f813ff3ab:  jmp0xff7f813ff3b2
   0xff7f813ff3ad:  cmpl   $0x0,(%rsi)
   0xff7f813ff3b0:  jne0xff7f813ff3d6
   0xff7f813ff3b2:  mov%r12d,%eax
   0xff7f813ff3b5:  imul   $0x148,%rax,%rax
   0xff7f813ff3bc:  lea0x153bd(%rip),%rcx# 0xff7f81414780
   0xff7f813ff3c3:  mov(%rcx),%rcx 
   0xff7f813ff3c6:  mov0x20(%rcx),%rcx
   0xff7f813ff3ca:  mov0xc(%rcx,%rax,1),%eax
   0xff7f813ff3ce:  mov$0x1,%ecx
   0xff7f813ff3d3:  mwait  %rax,%rcx
=> 0xff7f813ff3d6:  lfence
   0xff7f813ff3d9:  rdtsc  
   0xff7f813ff3db:  lfence 
   0xff7f813ff3de:  mov%rax,%rbx
   0xff7f813ff3e1:  mov%rdx,%r15
...

Also, there were a few more within the range occupied by
AppleIntelCPUPowerManagement.kext (which provides is the "smart"
idle loop used by OS X):


...
   0xff7f813f799a:  mov0x90(%r15),%rax
   0xff7f813f79a1:  mov0x18(%rax),%r15
   0xff7f813f79a5:  xor%ecx,%ecx
   0xff7f813f79a7:  mov%r15,%rax
   0xff7f813f79aa:  xor%edx,%edx
   0xff7f813f79ac:  monitor %rax,%rcx,%rdx
   0xff7f813f79af:  mov%r12d,%r12d
   0xff7f813f79b2:  imul   $0x148,%r12,%r13
   0xff7f813f79b9:  lea0x1cdc0(%rip),%rax# 0xff7f81414780
   0xff7f813f79c0:  mov(%rax),%rax
   0xff7f813f79c3:  mov0x20(%rax),%rcx
   0xff7f813f79c7:  testb  $0x10,0x2(%rcx,%r13,1)
   0xff7f813f79cd:  je 0xff7f813f79d5
   0xff7f813f79cf:  callq  *0x80(%rax)
   0xff7f813f79d5:  test   %r14,%r14
   0xff7f813f79d8:  je 0xff7f813f79f4
   0xff7f813f79da:  movq   $0x0,0x8(%r14)
   0xff7f813f79e2:  movl   $0x0,(%r14)
   0xff7f813f79e9:  test   %ebx,%ebx
   0xff7f813f79eb:  je 0xff7f813f79fa
   0xff7f813f79ed:  mfence  
   0xff7f813f79f0:  wbinvd 
   0xff7f813f79f2:  jmp0xff7f813f79fa
   0xff7f813f79f4:  cmpl   $0x0,(%r15)
   0xff7f813f79f8:  jne0xff7f813f7a15
   0xff7f813f79fa:  lea0x1cd7f(%rip),%rax# 0xff7f81414780
   0xff7f813f7a01:  mov(%rax),%rax
   0xff7f813f7a04:  mov0x20(%rax),%rax
   0xff7f813f7a08:  mov0xc(%rax,%r13,1),%eax
   0xff7f813f7a0d:  mov$0x1,%ecx
   0xff7f813f7a12:  mwait  %rax,%rcx
   0xff7f813f7a15:  lfence 
   0xff7f813f7a18:  rdtsc  
   0xff7f813f7a1a:  lfence 
   0xff7f813f7a1d:  mov%rax,%rbx
   0xff7f813f7a20:  mov%rdx,%r15
...

...
   0xff7f813f89c9:  xor%ecx,%ecx
   0xff7f813f89cb:  mov%r13,%rax
   0xff7f813f89ce:  xor%edx,%edx
   0xff7f813f89d0:  monitor %rax,%rcx,%rdx
   0xff7f813f89d3:  mov%r12d,%r15d
   0xff7f813f89d6:  imul   $0x148,%r15,%r12
   0xff7f813f89dd:  lea0x1bd9c(%rip),%rax# 0xff7f81414780
   0xff7f813f89e4:  mov(%rax),%rax
   0xff7f813f89e7:  mov0x20(%rax),%rcx
   0xff7f813f89eb:  testb  $0x10,0x2(%rcx,%r12,1)
   0xff7f813f89f1:  je 0xff7f813f89f9
   0xff7f813f89f3:  callq  *0x80(%rax)
   0xff7f813f89f9:  test   %r14,%r14
   0xff7f813f89fc:  je 0xff7f813f8a18
   0xff7f813f89fe:  movq   $0x0,0x8(%r14)
   0xff7f813f8a06:  movl   $0x0,(%r14)
   0xff7f813f8a0d:  test   %ebx,%ebx
   0xff7f813f8a0f:  je 0xff7

Re: [PATCH v5 untested] kvm: better MWAIT emulation for guests

2017-03-16 Thread Michael S. Tsirkin
On Thu, Mar 16, 2017 at 05:14:15PM -0400, Gabriel L. Somlo wrote:
> On Thu, Mar 16, 2017 at 04:17:11PM -0400, Gabriel L. Somlo wrote:
> > On Thu, Mar 16, 2017 at 09:27:56PM +0200, Michael S. Tsirkin wrote:
> > > On Thu, Mar 16, 2017 at 03:24:41PM -0400, Gabriel L. Somlo wrote:
> > > > On Thu, Mar 16, 2017 at 08:29:32PM +0200, Michael S. Tsirkin wrote:
> > > > > Let's take a step back and try to figure out how is
> > > > > mwait called. How about dumping code of VCPUs
> > > > > around mwait?  gdb disa command will do this.
> > > > 
> > > > Started guest with '-s', tried to attach from gdb with
> > > > "target remote localhost:1234", got
> > > > "remote 'g' packet reply is too long: "
> > > 
> > > Try
> > > 
> > > set arch x86-64:x86-64
> > 
> > 'set architecture i386:x86-64:intel' is what worked for me;
> > 
> > Been rooting around for a while, can't find mwait or monitor :(
> > 
> > Guess I'll have to recompile KVM to actually issue an invalid opcode,
> > so OS X will print a panic message with the exact address :)
> > 
> > Stay tuned...
> 
> OK, so I found a few instances. The one closest to where a random
> interrupt from gdb landed, was this one:
> 
> ...
>0xff7f813ff379:  mov0x90(%r15),%rax
>0xff7f813ff380:  mov0x18(%rax),%rsi
>0xff7f813ff384:  xor%ecx,%ecx
>0xff7f813ff386:  mov%rsi,%rax
>0xff7f813ff389:  xor%edx,%edx
>0xff7f813ff38b:  monitor %rax,%rcx,%rdx
>0xff7f813ff38e:  test   %r14,%r14
>0xff7f813ff391:  je 0xff7f813ff3ad
>0xff7f813ff393:  movq   $0x0,0x8(%r14)
>0xff7f813ff39b:  movl   $0x0,(%r14)
>0xff7f813ff3a2:  test   %ebx,%ebx
>0xff7f813ff3a4:  je 0xff7f813ff3b2
>0xff7f813ff3a6:  mfence 
>0xff7f813ff3a9:  wbinvd
>0xff7f813ff3ab:  jmp0xff7f813ff3b2
>0xff7f813ff3ad:  cmpl   $0x0,(%rsi)

Seems to do cmpl - could indicate it uses different bytes
for signalling? Radim's test monitors and
modifies the same byte...

>0xff7f813ff3b0:  jne0xff7f813ff3d6
>0xff7f813ff3b2:  mov%r12d,%eax
>0xff7f813ff3b5:  imul   $0x148,%rax,%rax
>0xff7f813ff3bc:  lea0x153bd(%rip),%rcx# 0xff7f81414780
>0xff7f813ff3c3:  mov(%rcx),%rcx 
>0xff7f813ff3c6:  mov0x20(%rcx),%rcx
>0xff7f813ff3ca:  mov0xc(%rcx,%rax,1),%eax
>0xff7f813ff3ce:  mov$0x1,%ecx
>0xff7f813ff3d3:  mwait  %rax,%rcx
> => 0xff7f813ff3d6:  lfence
>0xff7f813ff3d9:  rdtsc  
>0xff7f813ff3db:  lfence 
>0xff7f813ff3de:  mov%rax,%rbx
>0xff7f813ff3e1:  mov%rdx,%r15
> ...

OK nice, so it's actually using 1 for ECX. Now what's rax?
Can you check that with gdb pls, then try that value with
Radim's test?

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html