On 01/06/16 14:28, Jan Beulich wrote:
>>>> On 01.06.16 at 15:03, <[email protected]> wrote:
>> On 01/06/16 13:01, Jan Beulich wrote:
>>>>>> I want to adjust the representation of cpuid information in struct
>>>>>> domain. The current loop in domain_cpuid() causes an O(N) overhead for
>>>>>> every query, which is very poor for actions which really should be a
>>>>>> single bit test at a fixed offset.
>>>>>>
>>>>>> This needs to be combined with properly splitting the per-domain and
>>>>>> per-vcpu information, which requires knowing the expected vcpu topology
>>>>>> during domain creation.
>>>>>>
>>>>>> On top of that, there needs to be verification logic to check the
>>>>>> correctness of information passed from the toolstack.
>>>>>>
>>>>>> All of these areas are covered in the "known issues" section of the
>>>>>> feature doc, and I do plan to fix them all. However, it isn't a couple
>>>>>> of hours worth of work.
>>>>> All understood, yet not to the point: The original remark was that
>>>>> the very XSTATE handling could be done better with far not as much
>>>>> of a change, at least afaict without having tried.
>>>> In which case I don't know what you were suggesting.
>>> Make {hvm,pv}_cpuid() invoke themselves recursively to
>>> determine what bits to mask off from CPUID[0xd].EAX.
>> So that would work. However, to do this, you need to query leaves 1,
>> 0x80000001 and 7, all of which will hit the O(N) loop in domain_cpuid()
>>
>> Luckily, none of those specific paths further recurse into {hvm,pv}_cpuid().
>>
>> I am unsure which to go with. My gut feel is that this would be quite a
>> performance hit, but I have no evidence either way. OTOH, it will give
>> the correct answer, rather than an approximation.
> Not only since I believe performance is very close to irrelevant for
> CPUID leaf 0xD invocations, I think I'd prefer correctness over
> performance (as would be basically always the case). How about
> you?
Right - this is the alternative, doing the calculation in
{hvm,pv}_cpuid(), based on top of your cleanup from yesterday.
There is a bugfix in the PV side (pv_featureset[FEATURESET_1c] should be
taken into account even for control/hardware domain accesses), and a
preemptive fix on the HVM side to avoid advertising any XSS states, as
we don't support any yet.
Thoughts?
~Andrew
>From be056a4b78929e428d50ca386ec56b42c9cde9ac Mon Sep 17 00:00:00 2001
From: Andrew Cooper <[email protected]>
Date: Thu, 2 Jun 2016 12:08:42 +0100
Subject: [PATCH] xen/x86: Clip guests view of xfeature_mask
---
xen/arch/x86/hvm/hvm.c | 46 +++++++++++++++++++++++++++++--
xen/arch/x86/traps.c | 65 +++++++++++++++++++++++++++++++++++++-------
xen/include/asm-x86/xstate.h | 31 ++++++++++++++-------
3 files changed, 120 insertions(+), 22 deletions(-)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index bb98051..53f2bdd 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3362,7 +3362,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
switch ( input )
{
- unsigned int _ecx, _edx;
+ unsigned int _ebx, _ecx, _edx;
case 0x1:
/* Fix up VLAPIC details. */
@@ -3443,6 +3443,44 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
switch ( count )
{
case 0:
+ {
+ uint64_t xfeature_mask = XSTATE_SSE | XSTATE_FP;
+ uint32_t xstate_size = XSTATE_AREA_MIN_SIZE;
+
+ if ( _ecx & cpufeat_mask(X86_FEATURE_AVX) )
+ {
+ xfeature_mask |= XSTATE_YMM;
+ xstate_size += xstate_sizes[_XSTATE_YMM];
+ }
+
+ _ecx = 0;
+ hvm_cpuid(7, NULL, &_ebx, &_ecx, NULL);
+
+ if ( _ebx & cpufeat_mask(X86_FEATURE_MPX) )
+ {
+ xfeature_mask |= XSTATE_BNDREGS | XSTATE_BNDCSR;
+ xstate_size += xstate_sizes[_XSTATE_BNDREGS] +
+ xstate_sizes[_XSTATE_BNDCSR];
+ }
+
+ if ( _ebx & cpufeat_mask(X86_FEATURE_PKU) )
+ {
+ xfeature_mask |= XSTATE_PKRU;
+ xstate_size += xstate_sizes[_XSTATE_PKRU];
+ }
+
+ hvm_cpuid(0x80000001, NULL, NULL, &_ecx, NULL);
+
+ if ( _ecx & cpufeat_mask(X86_FEATURE_LWP) )
+ {
+ xfeature_mask |= XSTATE_LWP;
+ xstate_size += xstate_sizes[_XSTATE_LWP];
+ }
+
+ *eax = (uint32_t)xfeature_mask;
+ *edx = (uint32_t)(xfeature_mask >> 32);
+ *ecx = xstate_size;
+
/*
* Always read CPUID[0xD,0].EBX from hardware, rather than domain
* policy. It varies with enabled xstate, and the correct xcr0 is
@@ -3450,6 +3488,8 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
*/
cpuid_count(input, count, &dummy, ebx, &dummy, &dummy);
break;
+ }
+
case 1:
*eax &= hvm_featureset[FEATURESET_Da1];
@@ -3463,7 +3503,9 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
cpuid_count(input, count, &dummy, ebx, &dummy, &dummy);
}
else
- *ebx = *ecx = *edx = 0;
+ *ebx = 0;
+
+ *ecx = *edx = 0;
break;
}
break;
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 5d7232d..4ba90a4 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -928,7 +928,7 @@ void pv_cpuid(struct cpu_user_regs *regs)
switch ( leaf )
{
- uint32_t tmp;
+ uint32_t tmp, _ebx, _ecx, _edx;
case 0x00000001:
c &= pv_featureset[FEATURESET_1c];
@@ -1087,19 +1087,63 @@ void pv_cpuid(struct cpu_user_regs *regs)
break;
case XSTATE_CPUID:
- if ( !((!is_control_domain(currd) && !is_hardware_domain(currd)
- ? ({
- uint32_t ecx;
-
- domain_cpuid(currd, 1, 0, &tmp, &tmp, &ecx, &tmp);
- ecx & pv_featureset[FEATURESET_1c];
- })
- : cpuid_ecx(1)) & cpufeat_mask(X86_FEATURE_XSAVE)) ||
- subleaf >= 63 )
+
+ if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
+ domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
+ else
+ _ecx = cpuid_ecx(1);
+ _ecx &= pv_featureset[FEATURESET_1c];
+
+ if ( !(_ecx & cpufeat_mask(X86_FEATURE_XSAVE)) || subleaf >= 63 )
goto unsupported;
switch ( subleaf )
{
case 0:
+ {
+ uint64_t xfeature_mask = XSTATE_SSE | XSTATE_FP;
+ uint32_t xstate_size = XSTATE_AREA_MIN_SIZE;
+
+ if ( _ecx & cpufeat_mask(X86_FEATURE_AVX) )
+ {
+ xfeature_mask |= XSTATE_YMM;
+ xstate_size += xstate_sizes[_XSTATE_YMM];
+ }
+
+ if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
+ domain_cpuid(currd, 7, 0, &tmp, &_ebx, &tmp, &tmp);
+ else
+ cpuid_count(7, 0, &tmp, &_ebx, &tmp, &tmp);
+ _ebx &= pv_featureset[FEATURESET_7b0];
+
+ if ( _ebx & cpufeat_mask(X86_FEATURE_MPX) )
+ {
+ xfeature_mask |= XSTATE_BNDREGS | XSTATE_BNDCSR;
+ xstate_size += xstate_sizes[_XSTATE_BNDREGS] +
+ xstate_sizes[_XSTATE_BNDCSR];
+ }
+
+ if ( _ebx & cpufeat_mask(X86_FEATURE_PKU) )
+ {
+ xfeature_mask |= XSTATE_PKRU;
+ xstate_size += xstate_sizes[_XSTATE_PKRU];
+ }
+
+ if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
+ domain_cpuid(currd, 0x80000001, 0, &tmp, &tmp, &tmp, &_edx);
+ else
+ _edx = cpuid_edx(0x80000001);
+ _edx &= pv_featureset[FEATURESET_e1d];
+
+ if ( _ecx & cpufeat_mask(X86_FEATURE_LWP) )
+ {
+ xfeature_mask |= XSTATE_LWP;
+ xstate_size += xstate_sizes[_XSTATE_LWP];
+ }
+
+ a = (uint32_t)xfeature_mask;
+ d = (uint32_t)(xfeature_mask >> 32);
+ c = xstate_size;
+
/*
* Always read CPUID.0xD[ECX=0].EBX from hardware, rather than
* domain policy. It varies with enabled xstate, and the correct
@@ -1108,6 +1152,7 @@ void pv_cpuid(struct cpu_user_regs *regs)
if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
cpuid_count(leaf, subleaf, &tmp, &b, &tmp, &tmp);
break;
+ }
case 1:
a &= pv_featureset[FEATURESET_Da1];
diff --git a/xen/include/asm-x86/xstate.h b/xen/include/asm-x86/xstate.h
index 4535354..4b0c33e 100644
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -26,16 +26,27 @@
#define XSAVE_HDR_OFFSET FXSAVE_SIZE
#define XSTATE_AREA_MIN_SIZE (FXSAVE_SIZE + XSAVE_HDR_SIZE)
-#define XSTATE_FP (1ULL << 0)
-#define XSTATE_SSE (1ULL << 1)
-#define XSTATE_YMM (1ULL << 2)
-#define XSTATE_BNDREGS (1ULL << 3)
-#define XSTATE_BNDCSR (1ULL << 4)
-#define XSTATE_OPMASK (1ULL << 5)
-#define XSTATE_ZMM (1ULL << 6)
-#define XSTATE_HI_ZMM (1ULL << 7)
-#define XSTATE_PKRU (1ULL << 9)
-#define XSTATE_LWP (1ULL << 62) /* AMD lightweight profiling */
+#define _XSTATE_FP 0
+#define XSTATE_FP (1ULL << _XSTATE_FP)
+#define _XSTATE_SSE 1
+#define XSTATE_SSE (1ULL << _XSTATE_SSE)
+#define _XSTATE_YMM 2
+#define XSTATE_YMM (1ULL << _XSTATE_YMM)
+#define _XSTATE_BNDREGS 3
+#define XSTATE_BNDREGS (1ULL << _XSTATE_BNDREGS)
+#define _XSTATE_BNDCSR 4
+#define XSTATE_BNDCSR (1ULL << _XSTATE_BNDCSR)
+#define _XSTATE_OPMASK 5
+#define XSTATE_OPMASK (1ULL << _XSTATE_OPMASK)
+#define _XSTATE_ZMM 6
+#define XSTATE_ZMM (1ULL << _XSTATE_ZMM)
+#define _XSTATE_HI_ZMM 7
+#define XSTATE_HI_ZMM (1ULL << _XSTATE_HI_ZMM)
+#define _XSTATE_PKRU 9
+#define XSTATE_PKRU (1ULL << _XSTATE_PKRU)
+#define _XSTATE_LWP 62
+#define XSTATE_LWP (1ULL << _XSTATE_LWP)
+
#define XSTATE_FP_SSE (XSTATE_FP | XSTATE_SSE)
#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | \
XSTATE_ZMM | XSTATE_HI_ZMM | XSTATE_NONLAZY)
--
2.1.4
_______________________________________________
Xen-devel mailing list
[email protected]
http://lists.xen.org/xen-devel