This includes: - EXT.W.{B/H} - CL{O/Z}.{W/D}, CT{O/Z}.{W/D} - BYTEPICK.{W/D} - REVB.{2H/4H/2W/D} - REVH.{2W/D} - BITREV.{4B/8B}, BITREV.{W/D} - BSTRINS.{W/D}, BSTRPICK.{W/D} - MASKEQZ, MASKNEZ
Signed-off-by: Song Gao <gaos...@loongson.cn> Signed-off-by: Xiaojuan Yang <yangxiaoj...@loongson.cn> Reviewed-by: Richard Henderson <richard.hender...@linaro.org> --- target/loongarch/helper.h | 4 + target/loongarch/insn_trans/trans_bit.c.inc | 252 ++++++++++++++++++++++++++++ target/loongarch/insns.decode | 40 +++++ target/loongarch/op_helper.c | 22 +++ target/loongarch/translate.c | 1 + 5 files changed, 319 insertions(+) create mode 100644 target/loongarch/insn_trans/trans_bit.c.inc diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h index eb771c0..04e0245 100644 --- a/target/loongarch/helper.h +++ b/target/loongarch/helper.h @@ -4,3 +4,7 @@ */ DEF_HELPER_2(raise_exception, noreturn, env, i32) + +DEF_HELPER_FLAGS_1(bitrev_w, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_1(bitrev_d, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_1(bitswap, TCG_CALL_NO_RWG_SE, tl, tl) diff --git a/target/loongarch/insn_trans/trans_bit.c.inc b/target/loongarch/insn_trans/trans_bit.c.inc new file mode 100644 index 0000000..5063773 --- /dev/null +++ b/target/loongarch/insn_trans/trans_bit.c.inc @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +static bool gen_rr(DisasContext *ctx, arg_rr *a, + DisasExtend src_ext, DisasExtend dst_ext, + void (*func)(TCGv, TCGv)) +{ + TCGv dest = gpr_dst(ctx, a->rd, dst_ext); + TCGv src1 = gpr_src(ctx, a->rj, src_ext); + + func(dest, src1); + + if (dst_ext) { + gen_set_gpr(a->rd, dest, dst_ext); + } + return true; +} + +static bool trans_bytepick_w(DisasContext *ctx, arg_bytepick_w *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + + tcg_gen_concat_tl_i64(dest, src1, src2); + tcg_gen_sextract_i64(dest, dest, (32 - (a->sa) * 8), 32); + + return true; +} + +static bool trans_bytepick_d(DisasContext *ctx, arg_bytepick_d *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + + tcg_gen_extract2_i64(dest, src1, src2, (64 - (a->sa) * 8)); + return true; +} + +static bool trans_bstrins_w(DisasContext *ctx, arg_bstrins_w *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + + if (a->lsbw > a->msbw) { + return false; + } + + tcg_gen_deposit_tl(dest, dest, src1, a->lsbw, a->msbw - a->lsbw + 1); + tcg_gen_ext32s_tl(dest, dest); + + return true; +} + +static bool trans_bstrins_d(DisasContext *ctx, arg_bstrins_d *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + + if (a->lsbd > a->msbd) { + return false; + } + + tcg_gen_deposit_tl(dest, dest, src1, a->lsbd, a->msbd - a->lsbd + 1); + return true; +} + +static bool trans_bstrpick_w(DisasContext *ctx, arg_bstrpick_w *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + + if (a->lsbw > a->msbw) { + return false; + } + + tcg_gen_extract_tl(dest, src1, a->lsbw, a->msbw - a->lsbw + 1); + tcg_gen_ext32s_tl(dest, dest); + return true; +} + +static bool trans_bstrpick_d(DisasContext *ctx, arg_bstrpick_d *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + + if (a->lsbd > a->msbd) { + return false; + } + + tcg_gen_extract_tl(dest, src1, a->lsbd, a->msbd - a->lsbd + 1); + return true; +} + +static void gen_clz_w(TCGv dest, TCGv src1) +{ + tcg_gen_clzi_tl(dest, src1, TARGET_LONG_BITS); + tcg_gen_subi_tl(dest, dest, TARGET_LONG_BITS - 32); +} + +static void gen_clo_w(TCGv dest, TCGv src1) +{ + tcg_gen_not_tl(dest, src1); + tcg_gen_ext32u_tl(dest, dest); + gen_clz_w(dest, dest); +} + +static void gen_ctz_w(TCGv dest, TCGv src1) +{ + tcg_gen_ori_tl(dest, src1, (target_ulong)MAKE_64BIT_MASK(32, 32)); + tcg_gen_ctzi_tl(dest, dest, 32); +} + +static void gen_cto_w(TCGv dest, TCGv src1) +{ + tcg_gen_not_tl(dest, src1); + tcg_gen_ext32u_tl(dest, dest); + gen_ctz_w(dest, dest); +} + +static void gen_clz_d(TCGv dest, TCGv src1) +{ + tcg_gen_clzi_i64(dest, src1, TARGET_LONG_BITS); +} + +static void gen_clo_d(TCGv dest, TCGv src1) +{ + tcg_gen_not_tl(dest, src1); + gen_clz_d(dest, dest); +} + +static void gen_ctz_d(TCGv dest, TCGv src1) +{ + tcg_gen_ctzi_tl(dest, src1, TARGET_LONG_BITS); +} + +static void gen_cto_d(TCGv dest, TCGv src1) +{ + tcg_gen_not_tl(dest, src1); + gen_ctz_d(dest, dest); +} + +static void gen_revb_2w(TCGv dest, TCGv src1) +{ + tcg_gen_bswap64_i64(dest, src1); + tcg_gen_rotri_i64(dest, dest, 32); +} + +static void gen_revb_2h(TCGv dest, TCGv src1) +{ + TCGv mask = tcg_constant_tl(0x00FF00FF); + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + + tcg_gen_shri_tl(t0, src1, 8); + tcg_gen_and_tl(t0, t0, mask); + tcg_gen_and_tl(t1, src1, mask); + tcg_gen_shli_tl(t1, t1, 8); + tcg_gen_or_tl(dest, t0, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); +} + +static void gen_revb_4h(TCGv dest, TCGv src1) +{ + TCGv mask = tcg_constant_tl(0x00FF00FF00FF00FFULL); + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + + tcg_gen_shri_tl(t0, src1, 8); + tcg_gen_and_tl(t0, t0, mask); + tcg_gen_and_tl(t1, src1, mask); + tcg_gen_shli_tl(t1, t1, 8); + tcg_gen_or_tl(dest, t0, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); +} + +static void gen_revh_2w(TCGv dest, TCGv src1) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 mask = tcg_constant_i64(0x0000ffff0000ffffull); + + tcg_gen_shri_i64(t0, src1, 16); + tcg_gen_and_i64(t1, src1, mask); + tcg_gen_and_i64(t0, t0, mask); + tcg_gen_shli_i64(t1, t1, 16); + tcg_gen_or_i64(dest, t1, t0); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +} + +static void gen_revh_d(TCGv dest, TCGv src1) +{ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv mask = tcg_constant_tl(0x0000FFFF0000FFFFULL); + + tcg_gen_shri_tl(t1, src1, 16); + tcg_gen_and_tl(t1, t1, mask); + tcg_gen_and_tl(t0, src1, mask); + tcg_gen_shli_tl(t0, t0, 16); + tcg_gen_or_tl(t0, t0, t1); + tcg_gen_rotri_tl(dest, t0, 32); + + tcg_temp_free(t0); + tcg_temp_free(t1); +} + +static void gen_maskeqz(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv zero = tcg_constant_tl(0); + + tcg_gen_movcond_tl(TCG_COND_EQ, dest, src2, zero, zero, src1); +} + +static void gen_masknez(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv zero = tcg_constant_tl(0); + + tcg_gen_movcond_tl(TCG_COND_NE, dest, src2, zero, zero, src1); +} + +TRANS(ext_w_h, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_ext16s_tl) +TRANS(ext_w_b, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_ext8s_tl) +TRANS(clo_w, gen_rr, EXT_NONE, EXT_NONE, gen_clo_w) +TRANS(clz_w, gen_rr, EXT_ZERO, EXT_NONE, gen_clz_w) +TRANS(cto_w, gen_rr, EXT_NONE, EXT_NONE, gen_cto_w) +TRANS(ctz_w, gen_rr, EXT_NONE, EXT_NONE, gen_ctz_w) +TRANS(clo_d, gen_rr, EXT_NONE, EXT_NONE, gen_clo_d) +TRANS(clz_d, gen_rr, EXT_NONE, EXT_NONE, gen_clz_d) +TRANS(cto_d, gen_rr, EXT_NONE, EXT_NONE, gen_cto_d) +TRANS(ctz_d, gen_rr, EXT_NONE, EXT_NONE, gen_ctz_d) +TRANS(revb_2h, gen_rr, EXT_NONE, EXT_SIGN, gen_revb_2h) +TRANS(revb_4h, gen_rr, EXT_NONE, EXT_NONE, gen_revb_4h) +TRANS(revb_2w, gen_rr, EXT_NONE, EXT_NONE, gen_revb_2w) +TRANS(revb_d, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_bswap64_i64) +TRANS(revh_2w, gen_rr, EXT_NONE, EXT_NONE, gen_revh_2w) +TRANS(revh_d, gen_rr, EXT_NONE, EXT_NONE, gen_revh_d) +TRANS(bitrev_4b, gen_rr, EXT_ZERO, EXT_SIGN, gen_helper_bitswap) +TRANS(bitrev_8b, gen_rr, EXT_NONE, EXT_NONE, gen_helper_bitswap) +TRANS(bitrev_w, gen_rr, EXT_NONE, EXT_SIGN, gen_helper_bitrev_w) +TRANS(bitrev_d, gen_rr, EXT_NONE, EXT_NONE, gen_helper_bitrev_d) +TRANS(maskeqz, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_maskeqz) +TRANS(masknez, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_masknez) diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode index 673aee4..4533706 100644 --- a/target/loongarch/insns.decode +++ b/target/loongarch/insns.decode @@ -14,13 +14,17 @@ # Argument sets # &r_i rd imm +&rr rd rj &rrr rd rj rk &rr_i rd rj imm &rrr_sa rd rj rk sa +&rr_2bw rd rj msbw lsbw +&rr_2bd rd rj msbd lsbd # # Formats # +@rr .... ........ ..... ..... rj:5 rd:5 &rr @rrr .... ........ ..... rk:5 rj:5 rd:5 &rrr @r_i20 .... ... imm:s20 rd:5 &r_i @rr_ui5 .... ........ ..... imm:5 rj:5 rd:5 &rr_i @@ -29,6 +33,10 @@ @rr_ui12 .... ...... imm:12 rj:5 rd:5 &rr_i @rr_i16 .... .. imm:s16 rj:5 rd:5 &rr_i @rrr_sa2p1 .... ........ ... .. rk:5 rj:5 rd:5 &rrr_sa sa=%sa2p1 +@rrr_sa2 .... ........ ... sa:2 rk:5 rj:5 rd:5 &rrr_sa +@rrr_sa3 .... ........ .. sa:3 rk:5 rj:5 rd:5 &rrr_sa +@rr_2bw .... ....... msbw:5 . lsbw:5 rj:5 rd:5 &rr_2bw +@rr_2bd .... ...... msbd:6 lsbd:6 rj:5 rd:5 &rr_2bd # # Fixed point arithmetic operation instruction @@ -99,3 +107,35 @@ srai_w 0000 00000100 10001 ..... ..... ..... @rr_ui5 srai_d 0000 00000100 1001 ...... ..... ..... @rr_ui6 rotri_w 0000 00000100 11001 ..... ..... ..... @rr_ui5 rotri_d 0000 00000100 1101 ...... ..... ..... @rr_ui6 + +# +# Fixed point bit operation instruction +# +ext_w_h 0000 00000000 00000 10110 ..... ..... @rr +ext_w_b 0000 00000000 00000 10111 ..... ..... @rr +clo_w 0000 00000000 00000 00100 ..... ..... @rr +clz_w 0000 00000000 00000 00101 ..... ..... @rr +cto_w 0000 00000000 00000 00110 ..... ..... @rr +ctz_w 0000 00000000 00000 00111 ..... ..... @rr +clo_d 0000 00000000 00000 01000 ..... ..... @rr +clz_d 0000 00000000 00000 01001 ..... ..... @rr +cto_d 0000 00000000 00000 01010 ..... ..... @rr +ctz_d 0000 00000000 00000 01011 ..... ..... @rr +revb_2h 0000 00000000 00000 01100 ..... ..... @rr +revb_4h 0000 00000000 00000 01101 ..... ..... @rr +revb_2w 0000 00000000 00000 01110 ..... ..... @rr +revb_d 0000 00000000 00000 01111 ..... ..... @rr +revh_2w 0000 00000000 00000 10000 ..... ..... @rr +revh_d 0000 00000000 00000 10001 ..... ..... @rr +bitrev_4b 0000 00000000 00000 10010 ..... ..... @rr +bitrev_8b 0000 00000000 00000 10011 ..... ..... @rr +bitrev_w 0000 00000000 00000 10100 ..... ..... @rr +bitrev_d 0000 00000000 00000 10101 ..... ..... @rr +bytepick_w 0000 00000000 100 .. ..... ..... ..... @rrr_sa2 +bytepick_d 0000 00000000 11 ... ..... ..... ..... @rrr_sa3 +maskeqz 0000 00000001 00110 ..... ..... ..... @rrr +masknez 0000 00000001 00111 ..... ..... ..... @rrr +bstrins_w 0000 0000011 ..... 0 ..... ..... ..... @rr_2bw +bstrpick_w 0000 0000011 ..... 1 ..... ..... ..... @rr_2bw +bstrins_d 0000 000010 ...... ...... ..... ..... @rr_2bd +bstrpick_d 0000 000011 ...... ...... ..... ..... @rr_2bd diff --git a/target/loongarch/op_helper.c b/target/loongarch/op_helper.c index 9038109..1fc9d81 100644 --- a/target/loongarch/op_helper.c +++ b/target/loongarch/op_helper.c @@ -19,3 +19,25 @@ void helper_raise_exception(CPULoongArchState *env, uint32_t exception) { do_raise_exception(env, exception, GETPC()); } + + +target_ulong helper_bitrev_w(target_ulong rj) +{ + return (int32_t)revbit32(rj); +} + +target_ulong helper_bitrev_d(target_ulong rj) +{ + return revbit64(rj); +} + +target_ulong helper_bitswap(target_ulong v) +{ + v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) | + ((v & (target_ulong)0x5555555555555555ULL) << 1); + v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) | + ((v & (target_ulong)0x3333333333333333ULL) << 2); + v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) | + ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4); + return v; +} diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c index f90b63a..c0875db 100644 --- a/target/loongarch/translate.c +++ b/target/loongarch/translate.c @@ -147,6 +147,7 @@ static void gen_set_gpr(int reg_num, TCGv t, DisasExtend dst_ext) #include "decode-insns.c.inc" #include "insn_trans/trans_arith.c.inc" #include "insn_trans/trans_shift.c.inc" +#include "insn_trans/trans_bit.c.inc" static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) { -- 1.8.3.1