>>> On 27.01.16 at 14:28, <write.harmand...@gmail.com> wrote:
> I tried to apply your patches but it seems
> to have some merge conflicts with latest
> staging branch.
> 
> ~/xen$ git apply ~/Downloads/x86-xsaves-init.patch
> error: patch failed: xen/arch/x86/hvm/hvm.c:2094
> error: xen/arch/x86/hvm/hvm.c: patch does not apply

Oh, indeed. Here's a better set.

Jan

x86/xstate: fix fault behavior on XRSTORS

XRSTORS unconditionally faults when xcomp_bv has bit 63 clear. Instead
of just fixing this issue, overhaul the fault recovery code, which -
one of the many mistakes made when xstate support got introduced - was
blindly mirroring that accompanying FXRSTOR, neglecting the fact that
XRSTOR{,S} aren't all-or-nothing instructions. The new code, first of
all, does all the recovery actions in C, simplifying the inline
assembly used. And it does its work in a multi-stage fashion: Upon
first seeing a fault, state fixups get applied strictly based on what
architecturally may cause #GP. When seeing another fault despite the
fixups done, state gets fully reset. A third fault would then lead to
crashing the domain (instead of hanging the hypervisor in an infinite
loop of recurring faults).

Reported-by: Harmandeep Kaur <write.harmand...@gmail.com>
Signed-off-by: Jan Beulich <jbeul...@suse.com>

--- unstable.orig/xen/arch/x86/xstate.c 2016-01-25 09:35:12.000000000 +0100
+++ unstable/xen/arch/x86/xstate.c      2016-01-27 10:23:06.000000000 +0100
@@ -29,6 +29,8 @@ unsigned int *__read_mostly xstate_sizes
 static unsigned int __read_mostly xstate_features;
 static unsigned int __read_mostly xstate_comp_offsets[sizeof(xfeature_mask)*8];
 
+static uint32_t __read_mostly mxcsr_mask = MXCSR_DEFAULT;
+
 /* Cached xcr0 for fast read */
 static DEFINE_PER_CPU(uint64_t, xcr0);
 
@@ -342,6 +344,7 @@ void xrstor(struct vcpu *v, uint64_t mas
     uint32_t hmask = mask >> 32;
     uint32_t lmask = mask;
     struct xsave_struct *ptr = v->arch.xsave_area;
+    unsigned int faults, prev_faults;
 
     /*
      * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
@@ -361,35 +364,85 @@ void xrstor(struct vcpu *v, uint64_t mas
     /*
      * XRSTOR can fault if passed a corrupted data block. We handle this
      * possibility, which may occur if the block was passed to us by control
-     * tools or through VCPUOP_initialise, by silently clearing the block.
+     * tools or through VCPUOP_initialise, by silently adjusting state.
      */
-    switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
+    for ( prev_faults = faults = 0; ; prev_faults = faults )
     {
+        switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
+        {
 #define XRSTOR(pfx) \
         alternative_io("1: .byte " pfx "0x0f,0xae,0x2f\n" \
+                       "3:\n" \
                        "   .section .fixup,\"ax\"\n" \
-                       "2: mov %[size],%%ecx\n" \
-                       "   xor %[lmask_out],%[lmask_out]\n" \
-                       "   rep stosb\n" \
-                       "   lea %[mem],%[ptr]\n" \
-                       "   mov %[lmask_in],%[lmask_out]\n" \
-                       "   jmp 1b\n" \
+                       "2: inc%z[faults] %[faults]\n" \
+                       "   jmp 3b\n" \
                        "   .previous\n" \
                        _ASM_EXTABLE(1b, 2b), \
                        ".byte " pfx "0x0f,0xc7,0x1f\n", \
                        X86_FEATURE_XSAVES, \
-                       ASM_OUTPUT2([ptr] "+&D" (ptr), [lmask_out] "+&a" 
(lmask)), \
-                       [mem] "m" (*ptr), [lmask_in] "g" (lmask), \
-                       [hmask] "d" (hmask), [size] "m" (xsave_cntxt_size) \
-                       : "ecx")
-
-    default:
-        XRSTOR("0x48,");
-        break;
-    case 4: case 2:
-        XRSTOR("");
-        break;
+                       ASM_OUTPUT2([mem] "+m" (*ptr), [faults] "+g" (faults)), 
\
+                       [lmask] "a" (lmask), [hmask] "d" (hmask), \
+                       [ptr] "D" (ptr))
+
+        default:
+            XRSTOR("0x48,");
+            break;
+        case 4: case 2:
+            XRSTOR("");
+            break;
 #undef XRSTOR
+        }
+        if ( likely(faults == prev_faults) )
+            break;
+#ifndef NDEBUG
+        gprintk(XENLOG_WARNING, "fault#%u: mxcsr=%08x\n",
+                faults, ptr->fpu_sse.mxcsr);
+        gprintk(XENLOG_WARNING, "xs=%016lx xc=%016lx\n",
+                ptr->xsave_hdr.xstate_bv, ptr->xsave_hdr.xcomp_bv);
+        gprintk(XENLOG_WARNING, "r0=%016lx r1=%016lx\n",
+                ptr->xsave_hdr.reserved[0], ptr->xsave_hdr.reserved[1]);
+        gprintk(XENLOG_WARNING, "r2=%016lx r3=%016lx\n",
+                ptr->xsave_hdr.reserved[2], ptr->xsave_hdr.reserved[3]);
+        gprintk(XENLOG_WARNING, "r4=%016lx r5=%016lx\n",
+                ptr->xsave_hdr.reserved[4], ptr->xsave_hdr.reserved[5]);
+#endif
+        switch ( faults )
+        {
+        case 1:
+            /* Stage 1: Reset state to be loaded. */
+            ptr->xsave_hdr.xstate_bv &= ~mask;
+            /*
+             * Also try to eliminate fault reasons, even if this shouldn't be
+             * needed here (other code should ensure the sanity of the data).
+             */
+            if ( ((mask & XSTATE_SSE) ||
+                  ((mask & XSTATE_YMM) &&
+                   !(ptr->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED))) )
+                ptr->fpu_sse.mxcsr &= mxcsr_mask;
+            if ( cpu_has_xsaves || cpu_has_xsavec )
+            {
+                ptr->xsave_hdr.xcomp_bv &= this_cpu(xcr0) | this_cpu(xss);
+                ptr->xsave_hdr.xstate_bv &= ptr->xsave_hdr.xcomp_bv;
+                ptr->xsave_hdr.xcomp_bv |= XSTATE_COMPACTION_ENABLED;
+            }
+            else
+            {
+                ptr->xsave_hdr.xstate_bv &= this_cpu(xcr0);
+                ptr->xsave_hdr.xcomp_bv = 0;
+            }
+            memset(ptr->xsave_hdr.reserved, 0, 
sizeof(ptr->xsave_hdr.reserved));
+            continue;
+        case 2:
+            /* Stage 2: Reset all state. */
+            ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
+            ptr->xsave_hdr.xstate_bv = 0;
+            ptr->xsave_hdr.xcomp_bv = cpu_has_xsaves
+                                      ? XSTATE_COMPACTION_ENABLED : 0;
+            continue;
+        default:
+            domain_crash(current->domain);
+            break;
+        }
     }
 }
 
@@ -496,6 +549,8 @@ void xstate_init(struct cpuinfo_x86 *c)
 
     if ( bsp )
     {
+        static typeof(current->arch.xsave_area->fpu_sse) __initdata ctxt;
+
         xfeature_mask = feature_mask;
         /*
          * xsave_cntxt_size is the max size required by enabled features.
@@ -504,6 +559,10 @@ void xstate_init(struct cpuinfo_x86 *c)
         xsave_cntxt_size = _xstate_ctxt_size(feature_mask);
         printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n",
             __func__, xsave_cntxt_size, xfeature_mask);
+
+        asm ( "fxsave %0" : "=m" (ctxt) );
+        if ( ctxt.mxcsr_mask )
+            mxcsr_mask = ctxt.mxcsr_mask;
     }
     else
     {
x86/xstate: fix xcomp_bv initialization

We must not clear the compaction bit when using XSAVES/XRSTORS. And
we need to guarantee that xcomp_bv never has any bits clear which
are set in xstate_bv (which requires partly undoing commit 83ae0bb226
["x86/xsave: simplify xcomp_bv initialization"]). Split initialization
of xcomp_bv from the other FPU/SSE/AVX related state setup in
arch_set_info_guest() and hvm_load_cpu_ctxt().

Reported-by: Harmandeep Kaur <write.harmand...@gmail.com>
Signed-off-by: Jan Beulich <jbeul...@suse.com>

--- unstable.orig/xen/arch/x86/domain.c 2015-12-18 12:21:58.000000000 +0100
+++ unstable/xen/arch/x86/domain.c      2016-01-27 09:52:37.000000000 +0100
@@ -922,15 +922,10 @@ int arch_set_info_guest(
     {
         memcpy(v->arch.fpu_ctxt, &c.nat->fpu_ctxt, sizeof(c.nat->fpu_ctxt));
         if ( v->arch.xsave_area )
-        {
             v->arch.xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
-            v->arch.xsave_area->xsave_hdr.xcomp_bv =
-                cpu_has_xsaves ? XSTATE_COMPACTION_ENABLED : 0;
-        }
     }
     else if ( v->arch.xsave_area )
-        memset(&v->arch.xsave_area->xsave_hdr, 0,
-               sizeof(v->arch.xsave_area->xsave_hdr));
+        v->arch.xsave_area->xsave_hdr.xstate_bv = 0;
     else
     {
         typeof(v->arch.xsave_area->fpu_sse) *fpu_sse = v->arch.fpu_ctxt;
@@ -939,6 +934,14 @@ int arch_set_info_guest(
         fpu_sse->fcw = FCW_DEFAULT;
         fpu_sse->mxcsr = MXCSR_DEFAULT;
     }
+    if ( cpu_has_xsaves )
+    {
+        ASSERT(v->arch.xsave_area);
+        v->arch.xsave_area->xsave_hdr.xcomp_bv = XSTATE_COMPACTION_ENABLED |
+            v->arch.xsave_area->xsave_hdr.xstate_bv;
+    }
+    else if ( v->arch.xsave_area )
+        v->arch.xsave_area->xsave_hdr.xcomp_bv = 0;
 
     if ( !compat )
     {
--- unstable.orig/xen/arch/x86/hvm/hvm.c        2015-12-18 12:22:20.000000000 
+0100
+++ unstable/xen/arch/x86/hvm/hvm.c     2016-01-27 15:25:13.000000000 +0100
@@ -1974,6 +1974,7 @@ static int hvm_load_cpu_ctxt(struct doma
     struct hvm_hw_cpu ctxt;
     struct segment_register seg;
     const char *errstr;
+    struct xsave_struct *xsave_area;
 
     /* Which vcpu is this? */
     vcpuid = hvm_load_instance(h);
@@ -2100,20 +2101,24 @@ static int hvm_load_cpu_ctxt(struct doma
     seg.attr.bytes = ctxt.ldtr_arbytes;
     hvm_set_segment_register(v, x86_seg_ldtr, &seg);
 
+    /* Cover xsave-absent save file restoration on xsave-capable host. */
+    xsave_area = xsave_enabled(v) ? NULL : v->arch.xsave_area;
+
     v->fpu_initialised = !!(ctxt.flags & XEN_X86_FPU_INITIALISED);
     if ( v->fpu_initialised )
     {
         memcpy(v->arch.fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
-        /* In case xsave-absent save file is restored on a xsave-capable host 
*/
-        if ( cpu_has_xsave && !xsave_enabled(v) )
-        {
-            struct xsave_struct *xsave_area = v->arch.xsave_area;
-
+        if ( xsave_area )
             xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
-            xsave_area->xsave_hdr.xcomp_bv =
-                cpu_has_xsaves ? XSTATE_COMPACTION_ENABLED : 0;
-        }
     }
+    else if ( xsave_area )
+    {
+        xsave_area->xsave_hdr.xstate_bv = 0;
+        xsave_area->fpu_sse.mxcsr = MXCSR_DEFAULT;
+    }
+    if ( cpu_has_xsaves && xsave_area )
+        xsave_area->xsave_hdr.xcomp_bv = XSTATE_COMPACTION_ENABLED |
+            xsave_area->xsave_hdr.xstate_bv;
 
     v->arch.user_regs.eax = ctxt.rax;
     v->arch.user_regs.ebx = ctxt.rbx;
@@ -5502,8 +5507,8 @@ void hvm_vcpu_reset_state(struct vcpu *v
     if ( v->arch.xsave_area )
     {
         v->arch.xsave_area->xsave_hdr.xstate_bv = XSTATE_FP;
-        v->arch.xsave_area->xsave_hdr.xcomp_bv =
-            cpu_has_xsaves ? XSTATE_COMPACTION_ENABLED : 0;
+        v->arch.xsave_area->xsave_hdr.xcomp_bv = cpu_has_xsaves
+            ? XSTATE_COMPACTION_ENABLED | XSTATE_FP : 0;
     }
 
     v->arch.vgc_flags = VGCF_online;
x86: adjust xsave structure attributes

The packed attribute was pointlessly used here - there are no
misaligned fields, and hence even if the attribute took effect, it
would at best lead to the compiler generating worse code.

At the same time specify the required alignment of the fpu_sse sub-
structure, such that the various typeof() uses on that field obtain
pointers to properly aligned memory (knowledge which a compiler may
want to make use of).

Also add suitable build-time checks.

Signed-off-by: Jan Beulich <jbeul...@suse.com>

--- unstable.orig/xen/arch/x86/i387.c   2016-01-25 11:30:11.000000000 +0100
+++ unstable/xen/arch/x86/i387.c        2016-01-25 09:35:36.000000000 +0100
@@ -277,7 +277,9 @@ int vcpu_init_fpu(struct vcpu *v)
     }
     else
     {
-        v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse), 16);
+        BUILD_BUG_ON(__alignof(v->arch.xsave_area->fpu_sse) < 16);
+        v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse),
+                                    __alignof(v->arch.xsave_area->fpu_sse));
         if ( v->arch.fpu_ctxt )
         {
             typeof(v->arch.xsave_area->fpu_sse) *fpu_sse = v->arch.fpu_ctxt;
--- unstable.orig/xen/arch/x86/xstate.c 2016-01-25 11:30:11.000000000 +0100
+++ unstable/xen/arch/x86/xstate.c      2016-01-25 09:35:12.000000000 +0100
@@ -414,7 +414,8 @@ int xstate_alloc_save_area(struct vcpu *
     BUG_ON(xsave_cntxt_size < XSTATE_AREA_MIN_SIZE);
 
     /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
-    save_area = _xzalloc(xsave_cntxt_size, 64);
+    BUILD_BUG_ON(__alignof(*save_area) < 64);
+    save_area = _xzalloc(xsave_cntxt_size, __alignof(*save_area));
     if ( save_area == NULL )
         return -ENOMEM;
 
--- unstable.orig/xen/include/asm-x86/xstate.h  2016-01-25 11:30:11.000000000 
+0100
+++ unstable/xen/include/asm-x86/xstate.h       2016-01-25 11:33:20.000000000 
+0100
@@ -48,9 +48,9 @@ extern u64 xfeature_mask;
 extern unsigned int *xstate_sizes;
 
 /* extended state save area */
-struct __packed __attribute__((aligned (64))) xsave_struct
+struct __attribute__((aligned (64))) xsave_struct
 {
-    union {                                  /* FPU/MMX, SSE */
+    union __attribute__((aligned(16))) {     /* FPU/MMX, SSE */
         char x[512];
         struct {
             uint16_t fcw;
x86/xstate: extend validation to cover full header

Since we never hand out compacted state, at least for now we're also
not going to accept such.

Reported-by: Harmandeep Kaur <write.harmand...@gmail.com>
Signed-off-by: Jan Beulich <jbeul...@suse.com>

--- unstable.orig/xen/arch/x86/domctl.c 2016-01-27 14:56:10.000000000 +0100
+++ unstable/xen/arch/x86/domctl.c      2016-01-27 10:44:52.000000000 +0100
@@ -958,7 +958,7 @@ long arch_do_domctl(
             {
                 if ( evc->size >= 2 * sizeof(uint64_t) + XSTATE_AREA_MIN_SIZE )
                     ret = validate_xstate(_xcr0, _xcr0_accum,
-                                          _xsave_area->xsave_hdr.xstate_bv);
+                                          &_xsave_area->xsave_hdr);
             }
             else if ( !_xcr0 )
                 ret = 0;
--- unstable.orig/xen/arch/x86/hvm/hvm.c        2016-01-27 15:25:13.000000000 
+0100
+++ unstable/xen/arch/x86/hvm/hvm.c     2016-01-27 15:26:15.000000000 +0100
@@ -2191,6 +2191,19 @@ static int hvm_save_cpu_xsave_states(str
     return 0;
 }
 
+/*
+ * Structure layout conformity checks, documenting correctness of the cast in
+ * the invocation of validate_xstate() below.
+ * Leverage CONFIG_COMPAT machinery to perform this.
+ */
+#define xen_xsave_hdr xsave_hdr
+#define compat_xsave_hdr hvm_hw_cpu_xsave_hdr
+CHECK_FIELD_(struct, xsave_hdr, xstate_bv);
+CHECK_FIELD_(struct, xsave_hdr, xcomp_bv);
+CHECK_FIELD_(struct, xsave_hdr, reserved);
+#undef compat_xsave_hdr
+#undef xen_xsave_hdr
+
 static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h)
 {
     unsigned int vcpuid, size;
@@ -2246,7 +2259,7 @@ static int hvm_load_cpu_xsave_states(str
     h->cur += desc->length;
 
     err = validate_xstate(ctxt->xcr0, ctxt->xcr0_accum,
-                          ctxt->save_area.xsave_hdr.xstate_bv);
+                          (const void *)&ctxt->save_area.xsave_hdr);
     if ( err )
     {
         printk(XENLOG_G_WARNING
--- unstable.orig/xen/arch/x86/xstate.c 2016-01-27 10:23:06.000000000 +0100
+++ unstable/xen/arch/x86/xstate.c      2016-01-27 10:48:22.000000000 +0100
@@ -614,17 +614,24 @@ static bool_t valid_xcr0(u64 xcr0)
     return !(xcr0 & XSTATE_BNDREGS) == !(xcr0 & XSTATE_BNDCSR);
 }
 
-int validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv)
+int validate_xstate(u64 xcr0, u64 xcr0_accum, const struct xsave_hdr *hdr)
 {
-    if ( (xstate_bv & ~xcr0_accum) ||
+    unsigned int i;
+
+    if ( (hdr->xstate_bv & ~xcr0_accum) ||
          (xcr0 & ~xcr0_accum) ||
          !valid_xcr0(xcr0) ||
          !valid_xcr0(xcr0_accum) )
         return -EINVAL;
 
-    if ( xcr0_accum & ~xfeature_mask )
+    if ( (xcr0_accum & ~xfeature_mask) ||
+         hdr->xcomp_bv )
         return -EOPNOTSUPP;
 
+    for ( i = 0; i < ARRAY_SIZE(hdr->reserved); ++i )
+        if ( hdr->reserved[i] )
+            return -EIO;
+
     return 0;
 }
 
--- unstable.orig/xen/include/asm-x86/xstate.h  2016-01-25 11:33:20.000000000 
+0100
+++ unstable/xen/include/asm-x86/xstate.h       2016-01-27 10:57:54.000000000 
+0100
@@ -72,14 +72,13 @@ struct __attribute__((aligned (64))) xsa
         };
     } fpu_sse;
 
-    struct {
+    struct xsave_hdr {
         u64 xstate_bv;
         u64 xcomp_bv;
         u64 reserved[6];
     } xsave_hdr;                             /* The 64-byte header */
 
-    struct { char x[XSTATE_YMM_SIZE]; } ymm; /* YMM */
-    char   data[];                           /* Future new states */
+    char data[];                             /* Variable layout states */
 };
 
 /* extended state operations */
@@ -90,7 +89,8 @@ uint64_t get_msr_xss(void);
 void xsave(struct vcpu *v, uint64_t mask);
 void xrstor(struct vcpu *v, uint64_t mask);
 bool_t xsave_enabled(const struct vcpu *v);
-int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv);
+int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum,
+                                 const struct xsave_hdr *);
 int __must_check handle_xsetbv(u32 index, u64 new_bv);
 void expand_xsave_states(struct vcpu *v, void *dest, unsigned int size);
 void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size);
--- unstable.orig/xen/include/public/arch-x86/hvm/save.h        2016-01-27 
14:58:51.000000000 +0100
+++ unstable/xen/include/public/arch-x86/hvm/save.h     2016-01-27 
15:26:25.000000000 +0100
@@ -564,12 +564,11 @@ struct hvm_hw_cpu_xsave {
     struct {
         struct { char x[512]; } fpu_sse;
 
-        struct {
+        struct hvm_hw_cpu_xsave_hdr {
             uint64_t xstate_bv;         /* Updated by XRSTOR */
-            uint64_t reserved[7];
+            uint64_t xcomp_bv;          /* Updated by XRSTOR{C,S} */
+            uint64_t reserved[6];
         } xsave_hdr;                    /* The 64-byte header */
-
-        struct { char x[0]; } ymm;    /* YMM */
     } save_area;
 };
 
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Reply via email to