Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/helper.h | 10 ++++++++ target/arm/translate-a64.c | 33 +++++++++++++++++-------- target/arm/vec_helper.c | 49 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 10 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h index ce6ff95672..e1cac31e95 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -807,6 +807,16 @@ DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 5ef6ecfbf1..7c98938077 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -605,6 +605,20 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, tcg_temp_free_ptr(fpst); } +/* Expand a 3-operand + qc + operation using an out-of-line helper. */ +static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, + int rm, gen_helper_gvec_3_ptr *fn) +{ + TCGv_ptr qc_ptr = tcg_temp_new_ptr(); + + tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc)); + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), qc_ptr, + is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); + tcg_temp_free_ptr(qc_ptr); +} + /* Expand a 4-operand operation using an out-of-line helper. */ static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, int rm, int ra, int data, gen_helper_gvec_4 *fn) @@ -11270,6 +11284,15 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); } return; + case 0x16: /* SQDMULH, SQRDMULH */ + { + static gen_helper_gvec_3_ptr * const fns[2][2] = { + { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, + { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, + }; + gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); + } + return; case 0x11: if (!u) { /* CMTST */ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); @@ -11381,16 +11404,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genenvfn = fns[size][u]; break; } - case 0x16: /* SQDMULH, SQRDMULH */ - { - static NeonGenTwoOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, - { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, - }; - assert(size == 1 || size == 2); - genenvfn = fns[size - 1][u]; - break; - } default: g_assert_not_reached(); } diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 766555a5d6..d73c1afe30 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -194,6 +194,30 @@ void HELPER(sve2_sqrdmlsh_h)(void *vd, void *vn, void *vm, } } +void HELPER(neon_sqdmulh_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(sve2_sqdmulh_h)(void *vd, void *vn, void *vm, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc); @@ -291,6 +315,7 @@ void HELPER(sve2_sqrdmlah_s)(void *vd, void *vn, void *vm, } } + void HELPER(sve2_sqrdmlsh_s)(void *vd, void *vn, void *vm, void *va, uint32_t desc) { @@ -303,6 +328,30 @@ void HELPER(sve2_sqrdmlsh_s)(void *vd, void *vn, void *vm, } } +void HELPER(neon_sqdmulh_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int32_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int32_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, vq); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(sve2_sqdmulh_s)(void *vd, void *vn, void *vm, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc); -- 2.25.1