On 27/09/2018 23:13, Richard Henderson wrote: > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > target/arm/cpu.h | 17 +++++++++++++++- > target/arm/translate-a64.h | 1 + > target/arm/translate.h | 1 + > linux-user/elfload.c | 6 +----- > target/arm/cpu64.c | 13 ++++++------- > target/arm/helper.c | 2 +- > target/arm/translate-a64.c | 40 +++++++++++++++++++------------------- > target/arm/translate.c | 6 +++--- > 8 files changed, 49 insertions(+), 37 deletions(-) > > diff --git a/target/arm/cpu.h b/target/arm/cpu.h > index 152a558a94..bca4ee4281 100644 > --- a/target/arm/cpu.h > +++ b/target/arm/cpu.h > @@ -1566,7 +1566,6 @@ enum arm_features { > ARM_FEATURE_PMU, /* has PMU support */ > ARM_FEATURE_VBAR, /* has cp15 VBAR */ > ARM_FEATURE_M_SECURITY, /* M profile Security Extension */ > - ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */ > ARM_FEATURE_M_MAIN, /* M profile Main Extension */ > }; > > @@ -3176,6 +3175,16 @@ static inline bool aa32_feature_dp(ARMCPU *cpu) > return FIELD_EX32(cpu->id_isar6, ID_ISAR6, DP) != 0; > } > > +static inline bool aa32_feature_fp16_arith(ARMCPU *cpu) > +{ > + /* > + * This is a placeholder for use by VCMA until the rest of > + * the ARMv8.2-FP16 extension is implemented for aa32 mode. > + * At which point we can properly set and check MVFR1.FPHP. > + */ > + return FIELD_EX64(cpu->id_aa64pfr0, ID_AA64PFR0, FP) == 1; > +} > + > /* > * 64-bit feature tests via id registers. > */ > @@ -3244,6 +3253,12 @@ static inline bool aa64_feature_fcma(ARMCPU *cpu) > return FIELD_EX64(cpu->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; > } > > +static inline bool aa64_feature_fp16(ARMCPU *cpu) > +{ > + /* We always set the AdvSIMD and FP fields identically wrt FP16. */ > + return FIELD_EX64(cpu->id_aa64pfr0, ID_AA64PFR0, FP) == 1; > +} > + > static inline bool aa64_feature_sve(ARMCPU *cpu) > { > return FIELD_EX64(cpu->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; > diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h > index 636f3fded3..e122cef242 100644 > --- a/target/arm/translate-a64.h > +++ b/target/arm/translate-a64.h > @@ -140,6 +140,7 @@ FORWARD_FEATURE(sm3) > FORWARD_FEATURE(sm4) > FORWARD_FEATURE(dp) > FORWARD_FEATURE(fcma) > +FORWARD_FEATURE(fp16) > FORWARD_FEATURE(sve) > > #undef FORWARD_FEATURE > diff --git a/target/arm/translate.h b/target/arm/translate.h > index d8eafbe88d..e022d6d4e6 100644 > --- a/target/arm/translate.h > +++ b/target/arm/translate.h > @@ -205,6 +205,7 @@ FORWARD_FEATURE(crc32) > FORWARD_FEATURE(rdm) > FORWARD_FEATURE(vcma) > FORWARD_FEATURE(dp) > +FORWARD_FEATURE(fp16_arith) > > #undef FORWARD_FEATURE > > diff --git a/linux-user/elfload.c b/linux-user/elfload.c > index c4969f163e..bcb2c9928c 100644 > --- a/linux-user/elfload.c > +++ b/linux-user/elfload.c > @@ -573,8 +573,6 @@ static uint32_t get_elf_hwcap(void) > hwcaps |= ARM_HWCAP_A64_ASIMD; > > /* probe for the extra features */ > -#define GET_FEATURE(feat, hwcap) \ > - do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0) > #define GET_FEATURE_ID(feat, hwcap) \ > do { if (aa64_feature_##feat(cpu)) { hwcaps |= hwcap; } } while (0) > > @@ -587,15 +585,13 @@ static uint32_t get_elf_hwcap(void) > GET_FEATURE_ID(sha3, ARM_HWCAP_A64_SHA3); > GET_FEATURE_ID(sm3, ARM_HWCAP_A64_SM3); > GET_FEATURE_ID(sm4, ARM_HWCAP_A64_SM4); > - GET_FEATURE(ARM_FEATURE_V8_FP16, > - ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP); > + GET_FEATURE_ID(fp16, ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP); > GET_FEATURE_ID(atomics, ARM_HWCAP_A64_ATOMICS); > GET_FEATURE_ID(rdm, ARM_HWCAP_A64_ASIMDRDM); > GET_FEATURE_ID(dp, ARM_HWCAP_A64_ASIMDDP); > GET_FEATURE_ID(fcma, ARM_HWCAP_A64_FCMA); > GET_FEATURE_ID(sve, ARM_HWCAP_A64_SVE); > > -#undef GET_FEATURE > #undef GET_FEATURE_ID > > return hwcaps; > diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c > index 8f95de677a..c734e2b2cd 100644 > --- a/target/arm/cpu64.c > +++ b/target/arm/cpu64.c > @@ -259,6 +259,8 @@ static void aarch64_max_initfn(Object *obj) > FIELD_DP64(cpu->id_aa64isar1, ID_AA64ISAR1, FCMA, 1); > > FIELD_DP64(cpu->id_aa64pfr0, ID_AA64PFR0, SVE, 1); > + FIELD_DP64(cpu->id_aa64pfr0, ID_AA64PFR0, FP, 1); > + FIELD_DP64(cpu->id_aa64pfr0, ID_AA64PFR0, ADVSIMD, 1); > > /* Replicate the same data to the 32-bit id registers. */ > FIELD_DP32(cpu->id_isar5, ID_ISAR5, AES, 2); /* AES + PMULL */ > @@ -268,15 +270,12 @@ static void aarch64_max_initfn(Object *obj) > FIELD_DP32(cpu->id_isar5, ID_ISAR5, RDM, 1); > FIELD_DP32(cpu->id_isar5, ID_ISAR5, VCMA, 1); > FIELD_DP32(cpu->id_isar6, ID_ISAR6, DP, 1); > + /* > + * FIXME: ARMv8.2-FP16 is not implemented for aa32, > + * so do not set MVFR1.FPHP and MVFR.SIMDHP. > + */
Shouldn't we use this then? FIELD_DP64(cpu->id_aa64pfr0, ID_AA64PFR0, FP, 0b1111); Rest of the patch is OK: Reviewed-by: Philippe Mathieu-Daudé <phi...@redhat.com> > > #ifdef CONFIG_USER_ONLY > - /* We don't set these in system emulation mode for the moment, > - * since we don't correctly set the ID registers to advertise them, > - * and in some cases they're only available in AArch64 and not > AArch32, > - * whereas the architecture requires them to be present in both if > - * present in either. > - */ > - set_feature(&cpu->env, ARM_FEATURE_V8_FP16); > /* For usermode -cpu max we can use a larger and more efficient DCZ > * blocksize since we don't have to follow what the hardware does. > */ > diff --git a/target/arm/helper.c b/target/arm/helper.c > index dd3a2c0b8b..996af1b0b3 100644 > --- a/target/arm/helper.c > +++ b/target/arm/helper.c > @@ -11528,7 +11528,7 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t > val) > uint32_t changed; > > /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */ > - if (!arm_feature(env, ARM_FEATURE_V8_FP16)) { > + if (!aa64_feature_fp16(arm_env_get_cpu(env))) { > val &= ~FPCR_FZ16; > } > > diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c > index 5527ffb203..095d204992 100644 > --- a/target/arm/translate-a64.c > +++ b/target/arm/translate-a64.c > @@ -4801,7 +4801,7 @@ static void disas_fp_compare(DisasContext *s, uint32_t > insn) > break; > case 3: > size = MO_16; > - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (aa64_dc_feature_fp16(s)) { > break; > } > /* fallthru */ > @@ -4852,7 +4852,7 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t > insn) > break; > case 3: > size = MO_16; > - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (aa64_dc_feature_fp16(s)) { > break; > } > /* fallthru */ > @@ -4918,7 +4918,7 @@ static void disas_fp_csel(DisasContext *s, uint32_t > insn) > break; > case 3: > sz = MO_16; > - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (aa64_dc_feature_fp16(s)) { > break; > } > /* fallthru */ > @@ -5251,7 +5251,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t > insn) > handle_fp_1src_double(s, opcode, rd, rn); > break; > case 3: > - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (!aa64_dc_feature_fp16(s)) { > unallocated_encoding(s); > return; > } > @@ -5466,7 +5466,7 @@ static void disas_fp_2src(DisasContext *s, uint32_t > insn) > handle_fp_2src_double(s, opcode, rd, rn, rm); > break; > case 3: > - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (!aa64_dc_feature_fp16(s)) { > unallocated_encoding(s); > return; > } > @@ -5624,7 +5624,7 @@ static void disas_fp_3src(DisasContext *s, uint32_t > insn) > handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); > break; > case 3: > - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (!aa64_dc_feature_fp16(s)) { > unallocated_encoding(s); > return; > } > @@ -5694,7 +5694,7 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn) > break; > case 3: > sz = MO_16; > - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (aa64_dc_feature_fp16(s)) { > break; > } > /* fallthru */ > @@ -5919,7 +5919,7 @@ static void disas_fp_fixed_conv(DisasContext *s, > uint32_t insn) > case 1: /* float64 */ > break; > case 3: /* float16 */ > - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (aa64_dc_feature_fp16(s)) { > break; > } > /* fallthru */ > @@ -6049,7 +6049,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t > insn) > break; > case 0x6: /* 16-bit float, 32-bit int */ > case 0xe: /* 16-bit float, 64-bit int */ > - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (aa64_dc_feature_fp16(s)) { > break; > } > /* fallthru */ > @@ -6076,7 +6076,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t > insn) > case 1: /* float64 */ > break; > case 3: /* float16 */ > - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (aa64_dc_feature_fp16(s)) { > break; > } > /* fallthru */ > @@ -6513,7 +6513,7 @@ static void disas_simd_across_lanes(DisasContext *s, > uint32_t insn) > */ > is_min = extract32(size, 1, 1); > is_fp = true; > - if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (!is_u && aa64_dc_feature_fp16(s)) { > size = 1; > } else if (!is_u || !is_q || extract32(size, 0, 1)) { > unallocated_encoding(s); > @@ -6909,7 +6909,7 @@ static void disas_simd_mod_imm(DisasContext *s, > uint32_t insn) > > if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { > /* Check for FMOV (vector, immediate) - half-precision */ > - if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) > { > + if (!(aa64_dc_feature_fp16(s) && o2 && cmode == 0xf)) { > unallocated_encoding(s); > return; > } > @@ -7076,7 +7076,7 @@ static void disas_simd_scalar_pairwise(DisasContext *s, > uint32_t insn) > case 0x2f: /* FMINP */ > /* FP op, size[0] is 32 or 64 bit*/ > if (!u) { > - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (!aa64_dc_feature_fp16(s)) { > unallocated_encoding(s); > return; > } else { > @@ -7721,7 +7721,7 @@ static void handle_simd_shift_intfp_conv(DisasContext > *s, bool is_scalar, > size = MO_32; > } else if (immh & 2) { > size = MO_16; > - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (!aa64_dc_feature_fp16(s)) { > unallocated_encoding(s); > return; > } > @@ -7766,7 +7766,7 @@ static void handle_simd_shift_fpint_conv(DisasContext > *s, bool is_scalar, > size = MO_32; > } else if (immh & 0x2) { > size = MO_16; > - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (!aa64_dc_feature_fp16(s)) { > unallocated_encoding(s); > return; > } > @@ -8530,7 +8530,7 @@ static void > disas_simd_scalar_three_reg_same_fp16(DisasContext *s, > return; > } > > - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (!aa64_dc_feature_fp16(s)) { > unallocated_encoding(s); > } > > @@ -11211,7 +11211,7 @@ static void > disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) > TCGv_ptr fpst; > bool pairwise = false; > > - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (!aa64_dc_feature_fp16(s)) { > unallocated_encoding(s); > return; > } > @@ -11426,7 +11426,7 @@ static void > disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) > case 0x1c: /* FCADD, #90 */ > case 0x1e: /* FCADD, #270 */ > if (size == 0 > - || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) > + || (size == 1 && !aa64_dc_feature_fp16(s)) > || (size == 3 && !is_q)) { > unallocated_encoding(s); > return; > @@ -12306,7 +12306,7 @@ static void disas_simd_two_reg_misc_fp16(DisasContext > *s, uint32_t insn) > bool need_fpst = true; > int rmode; > > - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (!aa64_dc_feature_fp16(s)) { > unallocated_encoding(s); > return; > } > @@ -12723,7 +12723,7 @@ static void disas_simd_indexed(DisasContext *s, > uint32_t insn) > } > break; > } > - if (is_fp16 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (is_fp16 && !aa64_dc_feature_fp16(s)) { > unallocated_encoding(s); > return; > } > diff --git a/target/arm/translate.c b/target/arm/translate.c > index 4036be6828..3dac413c82 100644 > --- a/target/arm/translate.c > +++ b/target/arm/translate.c > @@ -7783,7 +7783,7 @@ static int disas_neon_insn_3same_ext(DisasContext *s, > uint32_t insn) > int size = extract32(insn, 20, 1); > data = extract32(insn, 23, 2); /* rot */ > if (!aa32_dc_feature_vcma(s) > - || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { > + || (!size && !aa32_dc_feature_fp16_arith(s))) { > return 1; > } > fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; > @@ -7792,7 +7792,7 @@ static int disas_neon_insn_3same_ext(DisasContext *s, > uint32_t insn) > int size = extract32(insn, 20, 1); > data = extract32(insn, 24, 1); /* rot */ > if (!aa32_dc_feature_vcma(s) > - || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { > + || (!size && !aa32_dc_feature_fp16_arith(s))) { > return 1; > } > fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; > @@ -7865,7 +7865,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext > *s, uint32_t insn) > return 1; > } > if (size == 0) { > - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + if (!aa32_dc_feature_fp16_arith(s)) { > return 1; > } > /* For fp16, rm is just Vm, and index is M. */ >