On 7/20/21 11:53 PM, Song Gao wrote:
This patch implement fixed point bit instruction translation.
This includes:
- EXT.W.{B/H}
- CL{O/Z}.{W/D}, CT{O/Z}.{W/D}
- BYTEPICK.{W/D}
- REVB.{2H/4H/2W/D}
- REVH.{2W/D}
- BITREV.{4B/8B}, BITREV.{W/D}
- BSTRINS.{W/D}, BSTRPICK.{W/D}
- MASKEQZ, MASKNEZ
Signed-off-by: Song Gao <gaos...@loongson.cn>
---
target/loongarch/helper.h | 10 +
target/loongarch/insns.decode | 45 +++
target/loongarch/op_helper.c | 119 ++++++++
target/loongarch/trans.inc.c | 665 ++++++++++++++++++++++++++++++++++++++++++
4 files changed, 839 insertions(+)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 6c7e19b..bbbcc26 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -8,3 +8,13 @@
DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int)
DEF_HELPER_2(raise_exception, noreturn, env, i32)
+
+DEF_HELPER_2(cto_w, tl, env, tl)
+DEF_HELPER_2(ctz_w, tl, env, tl)
+DEF_HELPER_2(cto_d, tl, env, tl)
+DEF_HELPER_2(ctz_d, tl, env, tl)
The count leading and trailing zero operations are built into tcg. Count leading and
trailing one simply needs a NOT operation to convert it to zero.
+DEF_HELPER_2(bitrev_w, tl, env, tl)
+DEF_HELPER_2(bitrev_d, tl, env, tl)
These should use TCG_CALL_NO_RWG_SE.
+target_ulong helper_bitrev_w(CPULoongArchState *env, target_ulong rj)
+{
+ int32_t v = (int32_t)rj;
+ const int SIZE = 32;
+ uint8_t bytes[SIZE];
+
+ int i;
+ for (i = 0; i < SIZE; i++) {
+ bytes[i] = v & 0x1;
+ v = v >> 1;
+ }
+ /* v == 0 */
+ for (i = 0; i < SIZE; i++) {
+ v = v | ((uint32_t)bytes[i] << (SIZE - 1 - i));
+ }
+
+ return (target_ulong)(int32_t)v;
+}
return (int32_t)revbit32(rj);
+target_ulong helper_bitrev_d(CPULoongArchState *env, target_ulong rj)
+{
+ uint64_t v = rj;
+ const int SIZE = 64;
+ uint8_t bytes[SIZE];
+
+ int i;
+ for (i = 0; i < SIZE; i++) {
+ bytes[i] = v & 0x1;
+ v = v >> 1;
+ }
+ /* v == 0 */
+ for (i = 0; i < SIZE; i++) {
+ v = v | ((uint64_t)bytes[i] << (SIZE - 1 - i));
+ }
+
+ return (target_ulong)v;
+}
return revbit64(rj);
+static inline target_ulong bitswap(target_ulong v)
+{
+ v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) |
+ ((v & (target_ulong)0x5555555555555555ULL) << 1);
+ v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) |
+ ((v & (target_ulong)0x3333333333333333ULL) << 2);
+ v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) |
+ ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4);
+ return v;
+}
+
+target_ulong helper_loongarch_dbitswap(target_ulong rj)
+{
+ return bitswap(rj);
+}
+
+target_ulong helper_loongarch_bitswap(target_ulong rt)
+{
+ return (int32_t)bitswap(rt);
+}
I assume these are fpr the bitrev.4b and bitrev.8b insns?
It would be better to name them correctly.
+/* Fixed point bit operation instruction translation */
+static bool trans_ext_w_h(DisasContext *ctx, arg_ext_w_h *a)
+{
+ TCGv t0;
+ TCGv Rd = cpu_gpr[a->rd];
+
+ if (a->rd == 0) {
+ /* Nop */
+ return true;
+ }
+
+ t0 = get_gpr(a->rj);
+
+ tcg_gen_ext16s_tl(Rd, t0);
Again, you should have a common routine for handling these unary operations.
+static bool trans_clo_w(DisasContext *ctx, arg_clo_w *a)
+{
+ TCGv Rd = cpu_gpr[a->rd];
+
+ if (a->rd == 0) {
+ /* Nop */
+ return true;
+ }
+
+ gen_load_gpr(Rd, a->rj);
+
+ tcg_gen_not_tl(Rd, Rd);
+ tcg_gen_ext32u_tl(Rd, Rd);
+ tcg_gen_clzi_tl(Rd, Rd, TARGET_LONG_BITS);
+ tcg_gen_subi_tl(Rd, Rd, TARGET_LONG_BITS - 32);
So, you're actually using the tcg builtins here, and the helper you created
isn't used.
+static bool trans_cto_w(DisasContext *ctx, arg_cto_w *a)
+{
+ TCGv t0;
+ TCGv Rd = cpu_gpr[a->rd];
+
+ if (a->rd == 0) {
+ /* Nop */
+ return true;
+ }
+
+ t0 = tcg_temp_new();
+ gen_load_gpr(t0, a->rj);
+
+ gen_helper_cto_w(Rd, cpu_env, t0);
Here you should have used the tcg builtin.
+static bool trans_ctz_w(DisasContext *ctx, arg_ctz_w *a)
+{
+ TCGv t0;
+ TCGv Rd = cpu_gpr[a->rd];
+
+ if (a->rd == 0) {
+ /* Nop */
+ return true;
+ }
+
+ t0 = tcg_temp_new();
+ gen_load_gpr(t0, a->rj);
+
+ gen_helper_ctz_w(Rd, cpu_env, t0);
Likewise.
+static bool trans_revb_2w(DisasContext *ctx, arg_revb_2w *a)
+{
+ TCGv_i64 t0, t1, t2;
+ TCGv Rd = cpu_gpr[a->rd];
+
+ if (a->rd == 0) {
+ /* Nop */
+ return true;
+ }
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ t2 = get_gpr(a->rj);
+
+ gen_load_gpr(t0, a->rd);
+
+ tcg_gen_ext32u_i64(t1, t2);
+ tcg_gen_bswap32_i64(t0, t1);
+ tcg_gen_shri_i64(t1, t2, 32);
+ tcg_gen_bswap32_i64(t1, t1);
+ tcg_gen_concat32_i64(Rd, t0, t1);
tcg_gen_bswap64_i64(Rd, Rj)
tcg_gen_rotri_i64(Rd, Rd, 32);
+static bool trans_bytepick_d(DisasContext *ctx, arg_bytepick_d *a)
+{
+ TCGv t0;
+ TCGv Rd = cpu_gpr[a->rd];
+
+ if (a->rd == 0) {
+ /* Nop */
+ return true;
+ }
+
+ t0 = tcg_temp_new();
+
+ check_loongarch_64(ctx);
+ if (a->sa3 == 0 || ((a->sa3) * 8) == 64) {
+ if (a->sa3 == 0) {
+ gen_load_gpr(t0, a->rk);
+ } else {
+ gen_load_gpr(t0, a->rj);
+ }
+ tcg_gen_mov_tl(Rd, t0);
+ } else {
+ TCGv t1 = tcg_temp_new();
+
+ gen_load_gpr(t0, a->rk);
+ gen_load_gpr(t1, a->rj);
+
+ tcg_gen_shli_tl(t0, t0, ((a->sa3) * 8));
+ tcg_gen_shri_tl(t1, t1, 64 - ((a->sa3) * 8));
+ tcg_gen_or_tl(Rd, t1, t0);
+
+ tcg_temp_free(t1);
+ }
tcg_gen_extract2_i64(Rd, Rk, Rj, a->sa3 * 8);
r~