>>> On 21.01.16 at 16:14, <dario.faggi...@citrix.com> wrote:
> On Wed, 2016-01-20 at 03:06 -0700, Jan Beulich wrote:
>> > > > On 19.01.16 at 20:55, <write.harmand...@gmail.com> wrote:
>> > 
>> > $ 'addr2line -e xen-syms ffff82d0801c1cce' returns
>> > 'xen/xen/arch/x86/xstate.c:387' which again points to
>> > xsave. Also, adding 'xsave=0' makes it boot just fine.
>> 
>> Ah, I think I see the issue: We're zeroing the entire save area in
>> the fixup code, which will make XRSTORS fault unconditionally.
>> Shuai, would you please look into possible ways of fixing this?
>> Just setting the compressed flag may not be enough, and falling
>> back to plain XRSTOR doesn't seem to be an option either - I'm in
>> particular worried that the current model of zeroing everything
>> is bogus with the growing number of different components which
>> get loaded here.
>> 
>> But of course another question then is why the XRSTORS faults
>> in the first place. I guess we'll need a debugging patch to dump
>> the full state to understand that.
>> 
> If someone can produce and send such patch, I'm sure Harmandeep will be
> happy to give it a try on her hardware.

So here you go. Instead of a debugging one, I hope I have at
once fixed the issue in a suitable way. Whether we'd like to keep
the debugging output we can decide later on.

Both patches need to be applied; while the order shouldn't matter,
the alignment one is a prereq to the actual change.

Jan

x86/xstate: fix fault behavior on XRSTORS

XRSTORS unconditionally faults when xcomp_bv has bit 63 clear. Instead
of just fixing this issue, overhaul the fault recovery code, which -
one of the many mistakes made when xstate support got introduced - was
blindly mirroring that accompanying FXRSTOR, neglecting the fact that
XRSTOR{,S} aren't all-or-nothing instructions. The new code, first of
all, does all the recovery actions in C, simplifying the inline
assembly used. And it does its work in a multi-stage fashion: Upon
first seeing a fault, state fixups get applied strictly based on what
architecturally may cause #GP. When seeing another fault despite the
fixups done, state gets fully reset. A third fault would then lead to
crashing the domain (instead of hanging the hypervisor in an infinite
loop of recurring faults).

Reported-by: Harmandeep Kaur <write.harmand...@gmail.com>
Signed-off-by: Jan Beulich <jbeul...@suse.com>

--- unstable.orig/xen/arch/x86/xstate.c 2016-01-25 09:35:12.000000000 +0100
+++ unstable/xen/arch/x86/xstate.c      2016-01-25 12:34:48.000000000 +0100
@@ -29,6 +29,8 @@ unsigned int *__read_mostly xstate_sizes
 static unsigned int __read_mostly xstate_features;
 static unsigned int __read_mostly xstate_comp_offsets[sizeof(xfeature_mask)*8];
 
+static uint32_t __read_mostly mxcsr_mask = MXCSR_DEFAULT;
+
 /* Cached xcr0 for fast read */
 static DEFINE_PER_CPU(uint64_t, xcr0);
 
@@ -342,6 +344,7 @@ void xrstor(struct vcpu *v, uint64_t mas
     uint32_t hmask = mask >> 32;
     uint32_t lmask = mask;
     struct xsave_struct *ptr = v->arch.xsave_area;
+    unsigned int faults, prev_faults;
 
     /*
      * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
@@ -361,35 +364,79 @@ void xrstor(struct vcpu *v, uint64_t mas
     /*
      * XRSTOR can fault if passed a corrupted data block. We handle this
      * possibility, which may occur if the block was passed to us by control
-     * tools or through VCPUOP_initialise, by silently clearing the block.
+     * tools or through VCPUOP_initialise, by silently adjusting state.
      */
-    switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
+    for ( prev_faults = faults = 0; ; prev_faults = faults )
     {
+        switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
+        {
 #define XRSTOR(pfx) \
         alternative_io("1: .byte " pfx "0x0f,0xae,0x2f\n" \
+                       "3:\n" \
                        "   .section .fixup,\"ax\"\n" \
-                       "2: mov %[size],%%ecx\n" \
-                       "   xor %[lmask_out],%[lmask_out]\n" \
-                       "   rep stosb\n" \
-                       "   lea %[mem],%[ptr]\n" \
-                       "   mov %[lmask_in],%[lmask_out]\n" \
-                       "   jmp 1b\n" \
+                       "2: inc%z[faults] %[faults]\n" \
+                       "   jmp 3b\n" \
                        "   .previous\n" \
                        _ASM_EXTABLE(1b, 2b), \
                        ".byte " pfx "0x0f,0xc7,0x1f\n", \
                        X86_FEATURE_XSAVES, \
-                       ASM_OUTPUT2([ptr] "+&D" (ptr), [lmask_out] "+&a" 
(lmask)), \
-                       [mem] "m" (*ptr), [lmask_in] "g" (lmask), \
-                       [hmask] "d" (hmask), [size] "m" (xsave_cntxt_size) \
-                       : "ecx")
-
-    default:
-        XRSTOR("0x48,");
-        break;
-    case 4: case 2:
-        XRSTOR("");
-        break;
+                       ASM_OUTPUT2([mem] "+m" (*ptr), [faults] "+g" (faults)), 
\
+                       [lmask] "a" (lmask), [hmask] "d" (hmask), \
+                       [ptr] "D" (ptr))
+
+        default:
+            XRSTOR("0x48,");
+            break;
+        case 4: case 2:
+            XRSTOR("");
+            break;
 #undef XRSTOR
+        }
+        if ( likely(faults == prev_faults) )
+            break;
+#ifndef NDEBUG
+        gprintk(XENLOG_WARNING, "fault#%u: mxcsr=%08x\n",
+                faults, ptr->fpu_sse.mxcsr);
+        gprintk(XENLOG_WARNING, "xs=%016lx xc=%016lx\n",
+                ptr->xsave_hdr.xstate_bv, ptr->xsave_hdr.xcomp_bv);
+        gprintk(XENLOG_WARNING, "r0=%016lx r1=%016lx\n",
+                ptr->xsave_hdr.reserved[0], ptr->xsave_hdr.reserved[1]);
+        gprintk(XENLOG_WARNING, "r2=%016lx r3=%016lx\n",
+                ptr->xsave_hdr.reserved[2], ptr->xsave_hdr.reserved[3]);
+        gprintk(XENLOG_WARNING, "r4=%016lx r5=%016lx\n",
+                ptr->xsave_hdr.reserved[4], ptr->xsave_hdr.reserved[5]);
+#endif
+        switch ( faults )
+        {
+        case 1:
+            if ( (ptr->fpu_sse.mxcsr & ~mxcsr_mask) &&
+                 ((mask & XSTATE_SSE) ||
+                  ((mask & XSTATE_YMM) &&
+                   !(ptr->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED))) )
+                ptr->fpu_sse.mxcsr &= mxcsr_mask;
+            ptr->xsave_hdr.xstate_bv &= ~mask;
+            if ( cpu_has_xsaves || cpu_has_xsavec )
+            {
+                ptr->xsave_hdr.xcomp_bv &= this_cpu(xcr0) | this_cpu(xss);
+                ptr->xsave_hdr.xstate_bv &= ptr->xsave_hdr.xcomp_bv;
+            }
+            else
+            {
+                ptr->xsave_hdr.xstate_bv &= this_cpu(xcr0);
+                ptr->xsave_hdr.xcomp_bv = 0;
+            }
+            memset(ptr->xsave_hdr.reserved, 0, 
sizeof(ptr->xsave_hdr.reserved));
+            continue;
+        case 2:
+            ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
+            ptr->xsave_hdr.xstate_bv = 0;
+            ptr->xsave_hdr.xcomp_bv = cpu_has_xsaves
+                                      ? XSTATE_COMPACTION_ENABLED : 0;
+            continue;
+        default:
+            domain_crash(current->domain);
+            break;
+        }
     }
 }
 
@@ -414,7 +461,7 @@ int xstate_alloc_save_area(struct vcpu *
     BUG_ON(xsave_cntxt_size < XSTATE_AREA_MIN_SIZE);
 
     /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
-    BUILD_BUG_ON(__alignof(*save_area) < 64);
+    BUILD_BUG_ON(__alignof(*v->arch.xsave_area) < 64);
     save_area = _xzalloc(xsave_cntxt_size, __alignof(*save_area));
     if ( save_area == NULL )
         return -ENOMEM;
@@ -496,6 +543,8 @@ void xstate_init(struct cpuinfo_x86 *c)
 
     if ( bsp )
     {
+        static typeof(current->arch.xsave_area->fpu_sse) __initdata ctxt;
+
         xfeature_mask = feature_mask;
         /*
          * xsave_cntxt_size is the max size required by enabled features.
@@ -504,6 +553,10 @@ void xstate_init(struct cpuinfo_x86 *c)
         xsave_cntxt_size = _xstate_ctxt_size(feature_mask);
         printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n",
             __func__, xsave_cntxt_size, xfeature_mask);
+
+        asm ( "fxsave %0" : "=m" (ctxt) );
+        if ( ctxt.mxcsr_mask )
+            mxcsr_mask = ctxt.mxcsr_mask;
     }
     else
     {
x86: adjust xsave structure attributes

The packed attribute was pointlessly used here - there are no
misaligned fields, and hence even if the attribute took effect, it
would at best lead to the compiler generating worse code.

At the same time specify the required alignment of the fpu_sse sub-
structure, such that the various typeof() uses on that field obtain
pointers to properly aligned memory (knowledge which a compiler may
want to make use of).

Also add suitable build-time checks.

Signed-off-by: Jan Beulich <jbeul...@suse.com>

--- unstable.orig/xen/arch/x86/i387.c   2016-01-25 11:30:11.000000000 +0100
+++ unstable/xen/arch/x86/i387.c        2016-01-25 09:35:36.000000000 +0100
@@ -277,7 +277,9 @@ int vcpu_init_fpu(struct vcpu *v)
     }
     else
     {
-        v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse), 16);
+        BUILD_BUG_ON(__alignof(v->arch.xsave_area->fpu_sse) < 16);
+        v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse),
+                                    __alignof(v->arch.xsave_area->fpu_sse));
         if ( v->arch.fpu_ctxt )
         {
             typeof(v->arch.xsave_area->fpu_sse) *fpu_sse = v->arch.fpu_ctxt;
--- unstable.orig/xen/arch/x86/xstate.c 2016-01-25 11:30:11.000000000 +0100
+++ unstable/xen/arch/x86/xstate.c      2016-01-25 09:35:12.000000000 +0100
@@ -414,7 +414,8 @@ int xstate_alloc_save_area(struct vcpu *
     BUG_ON(xsave_cntxt_size < XSTATE_AREA_MIN_SIZE);
 
     /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
-    save_area = _xzalloc(xsave_cntxt_size, 64);
+    BUILD_BUG_ON(__alignof(*save_area) < 64);
+    save_area = _xzalloc(xsave_cntxt_size, __alignof(*save_area));
     if ( save_area == NULL )
         return -ENOMEM;
 
--- unstable.orig/xen/include/asm-x86/xstate.h  2016-01-25 11:30:11.000000000 
+0100
+++ unstable/xen/include/asm-x86/xstate.h       2016-01-25 11:33:20.000000000 
+0100
@@ -48,9 +48,9 @@ extern u64 xfeature_mask;
 extern unsigned int *xstate_sizes;
 
 /* extended state save area */
-struct __packed __attribute__((aligned (64))) xsave_struct
+struct __attribute__((aligned (64))) xsave_struct
 {
-    union {                                  /* FPU/MMX, SSE */
+    union __attribute__((aligned(16))) {     /* FPU/MMX, SSE */
         char x[512];
         struct {
             uint16_t fcw;
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Reply via email to