When calling FXSAVE, XSAVE, FXRSTOR, ... for 64-bit programs on amd64
use the 64-suffixed variant in order to include the complete FIP/FDP
registers in the x87 area.

The difference between the two variants is that the FXSAVE64 (new)
variant represents FIP/FDP as 64-bit fields (union fp_addr.fa_64),
while the legacy FXSAVE variant uses split fields: 32-bit offset,
16-bit segment and 16-bit reserved field (union fp_addr.fa_32).
The latter implies that the actual addresses are truncated to 32 bits
which is insufficient in modern programs.

The change is applied only to 64-bit programs on amd64.  Plain i386
and compat32 continue using plain FXSAVE.  Similarly, NVMM is not
changed as I am not familiar with that code.

This is a potentially breaking change.  However, I don't think it likely
to actually break anything because the data provided by the old variant
were not meaningful (because of the truncated pointer).
---
 sys/arch/x86/include/cpufunc.h         | 76 ++++++++++++++++++++++++++
 sys/arch/x86/include/fpu.h             |  4 +-
 sys/arch/x86/x86/fpu.c                 | 30 ++++++----
 sys/dev/nvmm/x86/nvmm_x86_svm.c        |  6 +-
 sys/dev/nvmm/x86/nvmm_x86_vmx.c        |  6 +-
 tests/lib/libc/sys/t_ptrace_x86_wait.h |  2 -
 6 files changed, 105 insertions(+), 19 deletions(-)

diff --git a/sys/arch/x86/include/cpufunc.h b/sys/arch/x86/include/cpufunc.h
index 38534c305544..dd8b0c7dc375 100644
--- a/sys/arch/x86/include/cpufunc.h
+++ b/sys/arch/x86/include/cpufunc.h
@@ -485,6 +485,82 @@ xrstor(const void *addr, uint64_t mask)
        );
 }
 
+#ifdef __x86_64__
+static inline void
+fxsave64(void *addr)
+{
+       uint8_t *area = addr;
+
+       __asm volatile (
+               "fxsave64       %[area]"
+               : [area] "=m" (*area)
+               :
+               : "memory"
+       );
+}
+
+static inline void
+fxrstor64(const void *addr)
+{
+       const uint8_t *area = addr;
+
+       __asm volatile (
+               "fxrstor64 %[area]"
+               :
+               : [area] "m" (*area)
+               : "memory"
+       );
+}
+
+static inline void
+xsave64(void *addr, uint64_t mask)
+{
+       uint8_t *area = addr;
+       uint32_t low, high;
+
+       low = mask;
+       high = mask >> 32;
+       __asm volatile (
+               "xsave64        %[area]"
+               : [area] "=m" (*area)
+               : "a" (low), "d" (high)
+               : "memory"
+       );
+}
+
+static inline void
+xsaveopt64(void *addr, uint64_t mask)
+{
+       uint8_t *area = addr;
+       uint32_t low, high;
+
+       low = mask;
+       high = mask >> 32;
+       __asm volatile (
+               "xsaveopt64 %[area]"
+               : [area] "=m" (*area)
+               : "a" (low), "d" (high)
+               : "memory"
+       );
+}
+
+static inline void
+xrstor64(const void *addr, uint64_t mask)
+{
+       const uint8_t *area = addr;
+       uint32_t low, high;
+
+       low = mask;
+       high = mask >> 32;
+       __asm volatile (
+               "xrstor64 %[area]"
+               :
+               : [area] "m" (*area), "a" (low), "d" (high)
+               : "memory"
+       );
+}
+#endif
+
 /* -------------------------------------------------------------------------- 
*/
 
 #ifdef XENPV
diff --git a/sys/arch/x86/include/fpu.h b/sys/arch/x86/include/fpu.h
index 3255a8ca39e0..bdd86abfdd39 100644
--- a/sys/arch/x86/include/fpu.h
+++ b/sys/arch/x86/include/fpu.h
@@ -14,8 +14,8 @@ struct trapframe;
 void fpuinit(struct cpu_info *);
 void fpuinit_mxcsr_mask(void);
 
-void fpu_area_save(void *, uint64_t);
-void fpu_area_restore(const void *, uint64_t);
+void fpu_area_save(void *, uint64_t, bool);
+void fpu_area_restore(const void *, uint64_t, bool);
 
 void fpu_save(void);
 
diff --git a/sys/arch/x86/x86/fpu.c b/sys/arch/x86/x86/fpu.c
index baff8d008299..e16a64f9ff8a 100644
--- a/sys/arch/x86/x86/fpu.c
+++ b/sys/arch/x86/x86/fpu.c
@@ -156,7 +156,7 @@ fpu_save_lwp(struct lwp *l)
        s = splvm();
        if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
                KASSERT((l->l_flag & LW_SYSTEM) == 0);
-               fpu_area_save(area, x86_xsave_features);
+               fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & 
PK_32));
                l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
        }
        splx(s);
@@ -246,21 +246,27 @@ fpu_errata_amd(void)
        fldummy();
 }
 
+#ifdef __x86_64__
+#define XS64(x) (is_64bit ? x##64 : x)
+#else
+#define XS64(x) x
+#endif
+
 void
-fpu_area_save(void *area, uint64_t xsave_features)
+fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit)
 {
        switch (x86_fpu_save) {
        case FPU_SAVE_FSAVE:
                fnsave(area);
                break;
        case FPU_SAVE_FXSAVE:
-               fxsave(area);
+               XS64(fxsave)(area);
                break;
        case FPU_SAVE_XSAVE:
-               xsave(area, xsave_features);
+               XS64(xsave)(area, xsave_features);
                break;
        case FPU_SAVE_XSAVEOPT:
-               xsaveopt(area, xsave_features);
+               XS64(xsaveopt)(area, xsave_features);
                break;
        }
 
@@ -268,7 +274,7 @@ fpu_area_save(void *area, uint64_t xsave_features)
 }
 
 void
-fpu_area_restore(const void *area, uint64_t xsave_features)
+fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit)
 {
        clts();
 
@@ -279,13 +285,13 @@ fpu_area_restore(const void *area, uint64_t 
xsave_features)
        case FPU_SAVE_FXSAVE:
                if (cpu_vendor == CPUVENDOR_AMD)
                        fpu_errata_amd();
-               fxrstor(area);
+               XS64(fxrstor)(area);
                break;
        case FPU_SAVE_XSAVE:
        case FPU_SAVE_XSAVEOPT:
                if (cpu_vendor == CPUVENDOR_AMD)
                        fpu_errata_amd();
-               xrstor(area, xsave_features);
+               XS64(xrstor)(area, xsave_features);
                break;
        }
 }
@@ -294,7 +300,8 @@ void
 fpu_handle_deferred(void)
 {
        struct pcb *pcb = lwp_getpcb(curlwp);
-       fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features);
+       fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features,
+           !(curlwp->l_proc->p_flag & PK_32));
 }
 
 void
@@ -309,7 +316,8 @@ fpu_switch(struct lwp *oldlwp, struct lwp *newlwp)
        if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) {
                KASSERT(!(oldlwp->l_flag & LW_SYSTEM));
                pcb = lwp_getpcb(oldlwp);
-               fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features);
+               fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features,
+                   !(oldlwp->l_proc->p_flag & PK_32));
                oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU;
        }
        KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU));
@@ -413,7 +421,7 @@ fpu_kern_leave(void)
         * through Spectre-class attacks to userland, even if there are
         * no bugs in fpu state management.
         */
-       fpu_area_restore(&zero_fpu, x86_xsave_features);
+       fpu_area_restore(&zero_fpu, x86_xsave_features, false);
 
        /*
         * Set CR0_TS again so that the kernel can't accidentally use
diff --git a/sys/dev/nvmm/x86/nvmm_x86_svm.c b/sys/dev/nvmm/x86/nvmm_x86_svm.c
index 1dab0b27c7b3..18c82520adfa 100644
--- a/sys/dev/nvmm/x86/nvmm_x86_svm.c
+++ b/sys/dev/nvmm/x86/nvmm_x86_svm.c
@@ -1351,7 +1351,8 @@ svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu)
        struct svm_cpudata *cpudata = vcpu->cpudata;
 
        fpu_kern_enter();
-       fpu_area_restore(&cpudata->gfpu, svm_xcr0_mask);
+       /* TODO: should we use *XSAVE64 here? */
+       fpu_area_restore(&cpudata->gfpu, svm_xcr0_mask, false);
 
        if (svm_xcr0_mask != 0) {
                cpudata->hxcr0 = rdxcr(0);
@@ -1369,7 +1370,8 @@ svm_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu)
                wrxcr(0, cpudata->hxcr0);
        }
 
-       fpu_area_save(&cpudata->gfpu, svm_xcr0_mask);
+       /* TODO: should we use *XSAVE64 here? */
+       fpu_area_save(&cpudata->gfpu, svm_xcr0_mask, false);
        fpu_kern_leave();
 }
 
diff --git a/sys/dev/nvmm/x86/nvmm_x86_vmx.c b/sys/dev/nvmm/x86/nvmm_x86_vmx.c
index 852aadfd7626..471e56df0507 100644
--- a/sys/dev/nvmm/x86/nvmm_x86_vmx.c
+++ b/sys/dev/nvmm/x86/nvmm_x86_vmx.c
@@ -2014,7 +2014,8 @@ vmx_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu)
        struct vmx_cpudata *cpudata = vcpu->cpudata;
 
        fpu_kern_enter();
-       fpu_area_restore(&cpudata->gfpu, vmx_xcr0_mask);
+       /* TODO: should we use *XSAVE64 here? */
+       fpu_area_restore(&cpudata->gfpu, vmx_xcr0_mask, false);
 
        if (vmx_xcr0_mask != 0) {
                cpudata->hxcr0 = rdxcr(0);
@@ -2032,7 +2033,8 @@ vmx_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu)
                wrxcr(0, cpudata->hxcr0);
        }
 
-       fpu_area_save(&cpudata->gfpu, vmx_xcr0_mask);
+       /* TODO: should we use *XSAVE64 here? */
+       fpu_area_save(&cpudata->gfpu, vmx_xcr0_mask, false);
        fpu_kern_leave();
 }
 
diff --git a/tests/lib/libc/sys/t_ptrace_x86_wait.h 
b/tests/lib/libc/sys/t_ptrace_x86_wait.h
index 7410a0fc5500..8d3d0000b9ae 100644
--- a/tests/lib/libc/sys/t_ptrace_x86_wait.h
+++ b/tests/lib/libc/sys/t_ptrace_x86_wait.h
@@ -3387,12 +3387,10 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
                                ATF_CHECK_EQ(fxs->fx_opcode,
                                    expected_fpu.opcode);
 #if defined(__x86_64__)
-#if 0 /* TODO: kernel needs patching to call *XSAVE64 */
                                ATF_CHECK_EQ(fxs->fx_ip.fa_64,
                                    ((uint64_t)gpr.regs[_REG_RIP]) - 3);
                                ATF_CHECK_EQ(fxs->fx_dp.fa_64,
                                    (uint64_t)&x86_test_zero);
-#endif
 #else
                                ATF_CHECK_EQ(fxs->fx_ip.fa_32.fa_off,
                                    (uint32_t)gpr.r_eip - 3);
-- 
2.28.0

Reply via email to