> From: Zhong, Yang <yang.zh...@intel.com> > Sent: Friday, January 7, 2022 5:32 PM > > From: Jing Liu <jing2....@intel.com> > > Extended feature has large state while current > kvm_xsave only allows 4KB. Use new XSAVE ioctls > if the xstate size is large than kvm_xsave.
shouldn't we always use the new xsave ioctls as long as CAP_XSAVE2 is available? > > Signed-off-by: Jing Liu <jing2....@intel.com> > Signed-off-by: Zeng Guang <guang.z...@intel.com> > Signed-off-by: Wei Wang <wei.w.w...@intel.com> > Signed-off-by: Yang Zhong <yang.zh...@intel.com> > --- > linux-headers/asm-x86/kvm.h | 14 ++++++++++++++ > linux-headers/linux/kvm.h | 2 ++ > target/i386/cpu.h | 5 +++++ > target/i386/kvm/kvm.c | 16 ++++++++++++++-- > target/i386/xsave_helper.c | 35 +++++++++++++++++++++++++++++++++++ > 5 files changed, 70 insertions(+), 2 deletions(-) > > diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h > index 5a776a08f7..32f2a921e8 100644 > --- a/linux-headers/asm-x86/kvm.h > +++ b/linux-headers/asm-x86/kvm.h > @@ -376,6 +376,20 @@ struct kvm_debugregs { > /* for KVM_CAP_XSAVE */ > struct kvm_xsave { > __u32 region[1024]; > + /* > + * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many > bytes > + * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) > + * respectively, when invoked on the vm file descriptor. > + * > + * The size value returned by > KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) > + * will always be at least 4096. Currently, it is only greater > + * than 4096 if a dynamic feature has been enabled with > + * ``arch_prctl()``, but this may change in the future. > + * > + * The offsets of the state save areas in struct kvm_xsave follow > + * the contents of CPUID leaf 0xD on the host. > + */ > + __u32 extra[0]; > }; > > #define KVM_MAX_XCRS 16 > diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h > index 02c5e7b7bb..97d5b6d81d 100644 > --- a/linux-headers/linux/kvm.h > +++ b/linux-headers/linux/kvm.h > @@ -1130,6 +1130,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_BINARY_STATS_FD 203 > #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 > #define KVM_CAP_ARM_MTE 205 > +#define KVM_CAP_XSAVE2 207 > > #ifdef KVM_CAP_IRQ_ROUTING > > @@ -1550,6 +1551,7 @@ struct kvm_s390_ucas_mapping { > /* Available with KVM_CAP_XSAVE */ > #define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct > kvm_xsave) > #define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct > kvm_xsave) > +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct > kvm_xsave) > /* Available with KVM_CAP_XCRS */ > #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) > #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > index 245e8b5a1a..6153c4ab1a 100644 > --- a/target/i386/cpu.h > +++ b/target/i386/cpu.h > @@ -1519,6 +1519,11 @@ typedef struct CPUX86State { > YMMReg zmmh_regs[CPU_NB_REGS]; > ZMMReg hi16_zmm_regs[CPU_NB_REGS]; > > +#ifdef TARGET_X86_64 > + uint8_t xtilecfg[64]; > + uint8_t xtiledata[8192]; > +#endif > + > /* sysenter registers */ > uint32_t sysenter_cs; > target_ulong sysenter_esp; > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c > index 3fb3ddbe2b..97520e9dff 100644 > --- a/target/i386/kvm/kvm.c > +++ b/target/i386/kvm/kvm.c > @@ -1983,7 +1983,12 @@ int kvm_arch_init_vcpu(CPUState *cs) > } > > if (has_xsave) { > - env->xsave_buf_len = sizeof(struct kvm_xsave); > + uint32_t size = kvm_vm_check_extension(cs->kvm_state, > KVM_CAP_XSAVE2); > + if (!size) { > + size = sizeof(struct kvm_xsave); > + } > + > + env->xsave_buf_len = QEMU_ALIGN_UP(size, 4096); > env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); > memset(env->xsave_buf, 0, env->xsave_buf_len); > > @@ -2580,6 +2585,7 @@ static int kvm_put_xsave(X86CPU *cpu) > if (!has_xsave) { > return kvm_put_fpu(cpu); > } > + > x86_cpu_xsave_all_areas(cpu, xsave, env->xsave_buf_len); > > return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); > @@ -3247,10 +3253,16 @@ static int kvm_get_xsave(X86CPU *cpu) > return kvm_get_fpu(cpu); > } > > - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); > + if (env->xsave_buf_len <= sizeof(struct kvm_xsave)) { > + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); > + } else { > + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE2, xsave); > + } > + > if (ret < 0) { > return ret; > } > + > x86_cpu_xrstor_all_areas(cpu, xsave, env->xsave_buf_len); > > return 0; > diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c > index ac61a96344..090424e820 100644 > --- a/target/i386/xsave_helper.c > +++ b/target/i386/xsave_helper.c > @@ -5,6 +5,7 @@ > #include "qemu/osdep.h" > > #include "cpu.h" > +#include <asm/kvm.h> > > void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen) > { > @@ -126,6 +127,23 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void > *buf, uint32_t buflen) > > memcpy(pkru, &env->pkru, sizeof(env->pkru)); > } > + > + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; > + if (e->size && e->offset) { > + XSaveXTILE_CFG *tilecfg = buf + e->offset; > + > + memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg)); > + } > + > + if (buflen > sizeof(struct kvm_xsave)) { > + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; > + > + if (e->size && e->offset) { > + XSaveXTILE_DATA *tiledata = buf + e->offset; > + > + memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata)); > + } > + } > #endif > } > > @@ -247,5 +265,22 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const > void *buf, uint32_t buflen) > pkru = buf + e->offset; > memcpy(&env->pkru, pkru, sizeof(env->pkru)); > } > + > + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; > + if (e->size && e->offset) { > + const XSaveXTILE_CFG *tilecfg = buf + e->offset; > + > + memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg)); > + } > + > + if (buflen > sizeof(struct kvm_xsave)) { > + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; > + > + if (e->size && e->offset) { > + const XSaveXTILE_DATA *tiledata = buf + e->offset; > + > + memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata)); > + } > + } > #endif > }