Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/tcg/sve_helper.c | 69 +++++++++++++++++++++------------- target/arm/tcg/translate-sve.c | 2 +- 2 files changed, 43 insertions(+), 28 deletions(-)
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index c12b2600bd..c206ca65ce 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -5347,13 +5347,18 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rot = simd_data(desc); - bool flip = rot & 1; - float16 neg_imag, neg_real; + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + uint32_t negf_real = flip ^ negf_imag; + float16 negx_imag, negx_real; uint64_t *g = vg; - neg_imag = float16_set_sign(0, (rot & 2) != 0); - neg_real = float16_set_sign(0, rot == 1 || rot == 2); + /* With AH=0, use negx; with AH=1 use negf. */ + negx_real = (negf_real & ~fpcr_ah) << 15; + negx_imag = (negf_imag & ~fpcr_ah) << 15; + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); do { uint64_t pg = g[(i - 1) >> 6]; @@ -5370,18 +5375,18 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, mi = *(float16 *)(vm + H1_2(j)); e2 = (flip ? ni : nr); - e1 = (flip ? mi : mr) ^ neg_real; + e1 = (flip ? mi : mr) ^ negx_real; e4 = e2; - e3 = (flip ? mr : mi) ^ neg_imag; + e3 = (flip ? mr : mi) ^ negx_imag; if (likely((pg >> (i & 63)) & 1)) { d = *(float16 *)(va + H1_2(i)); - d = float16_muladd(e2, e1, d, 0, status); + d = float16_muladd(e2, e1, d, negf_real, status); *(float16 *)(vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float16 *)(va + H1_2(j)); - d = float16_muladd(e4, e3, d, 0, status); + d = float16_muladd(e4, e3, d, negf_imag, status); *(float16 *)(vd + H1_2(j)) = d; } } while (i & 63); @@ -5392,13 +5397,18 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rot = simd_data(desc); - bool flip = rot & 1; - float32 neg_imag, neg_real; + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + uint32_t negf_real = flip ^ negf_imag; + float32 negx_imag, negx_real; uint64_t *g = vg; - neg_imag = float32_set_sign(0, (rot & 2) != 0); - neg_real = float32_set_sign(0, rot == 1 || rot == 2); + /* With AH=0, use negx; with AH=1 use negf. */ + negx_real = (negf_real & ~fpcr_ah) << 31; + negx_imag = (negf_imag & ~fpcr_ah) << 31; + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); do { uint64_t pg = g[(i - 1) >> 6]; @@ -5415,18 +5425,18 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, mi = *(float32 *)(vm + H1_2(j)); e2 = (flip ? ni : nr); - e1 = (flip ? mi : mr) ^ neg_real; + e1 = (flip ? mi : mr) ^ negx_real; e4 = e2; - e3 = (flip ? mr : mi) ^ neg_imag; + e3 = (flip ? mr : mi) ^ negx_imag; if (likely((pg >> (i & 63)) & 1)) { d = *(float32 *)(va + H1_2(i)); - d = float32_muladd(e2, e1, d, 0, status); + d = float32_muladd(e2, e1, d, negf_real, status); *(float32 *)(vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float32 *)(va + H1_2(j)); - d = float32_muladd(e4, e3, d, 0, status); + d = float32_muladd(e4, e3, d, negf_imag, status); *(float32 *)(vd + H1_2(j)) = d; } } while (i & 63); @@ -5437,13 +5447,18 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rot = simd_data(desc); - bool flip = rot & 1; - float64 neg_imag, neg_real; + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + uint32_t negf_real = flip ^ negf_imag; + float64 negx_imag, negx_real; uint64_t *g = vg; - neg_imag = float64_set_sign(0, (rot & 2) != 0); - neg_real = float64_set_sign(0, rot == 1 || rot == 2); + /* With AH=0, use negx; with AH=1 use negf. */ + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); do { uint64_t pg = g[(i - 1) >> 6]; @@ -5460,18 +5475,18 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, mi = *(float64 *)(vm + H1_2(j)); e2 = (flip ? ni : nr); - e1 = (flip ? mi : mr) ^ neg_real; + e1 = (flip ? mi : mr) ^ negx_real; e4 = e2; - e3 = (flip ? mr : mi) ^ neg_imag; + e3 = (flip ? mr : mi) ^ negx_imag; if (likely((pg >> (i & 63)) & 1)) { d = *(float64 *)(va + H1_2(i)); - d = float64_muladd(e2, e1, d, 0, status); + d = float64_muladd(e2, e1, d, negf_real, status); *(float64 *)(vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float64 *)(va + H1_2(j)); - d = float64_muladd(e4, e3, d, 0, status); + d = float64_muladd(e4, e3, d, negf_imag, status); *(float64 *)(vd + H1_2(j)) = d; } } while (i & 63); diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index cb6bb27622..6e758fd1ca 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -3964,7 +3964,7 @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, }; TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], - a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, + a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2), a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { -- 2.43.0