loongarch: Add fixed point bit instruction translation

Richard Henderson Thu, 22 Jul 2021 18:30:29 -0700

On 7/20/21 11:53 PM, Song Gao wrote:

This patch implement fixed point bit instruction translation.


This includes:
- EXT.W.{B/H}
- CL{O/Z}.{W/D}, CT{O/Z}.{W/D}
- BYTEPICK.{W/D}
- REVB.{2H/4H/2W/D}
- REVH.{2W/D}
- BITREV.{4B/8B}, BITREV.{W/D}
- BSTRINS.{W/D}, BSTRPICK.{W/D}
- MASKEQZ, MASKNEZ

Signed-off-by: Song Gao <gaos...@loongson.cn>
---
  target/loongarch/helper.h     |  10 +
  target/loongarch/insns.decode |  45 +++
  target/loongarch/op_helper.c  | 119 ++++++++
  target/loongarch/trans.inc.c  | 665 ++++++++++++++++++++++++++++++++++++++++++
  4 files changed, 839 insertions(+)

diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 6c7e19b..bbbcc26 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -8,3 +8,13 @@

DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int)

  DEF_HELPER_2(raise_exception, noreturn, env, i32)
+
+DEF_HELPER_2(cto_w, tl, env, tl)
+DEF_HELPER_2(ctz_w, tl, env, tl)
+DEF_HELPER_2(cto_d, tl, env, tl)
+DEF_HELPER_2(ctz_d, tl, env, tl)

The count leading and trailing zero operations are built into tcg. Count leading andtrailing one simply needs a NOT operation to convert it to zero.

+DEF_HELPER_2(bitrev_w, tl, env, tl)
+DEF_HELPER_2(bitrev_d, tl, env, tl)


These should use TCG_CALL_NO_RWG_SE.

+target_ulong helper_bitrev_w(CPULoongArchState *env, target_ulong rj)
+{
+    int32_t v = (int32_t)rj;
+    const int SIZE = 32;
+    uint8_t bytes[SIZE];
+
+    int i;
+    for (i = 0; i < SIZE; i++) {
+        bytes[i] = v & 0x1;
+        v = v >> 1;
+    }
+    /* v == 0 */
+    for (i = 0; i < SIZE; i++) {
+        v = v | ((uint32_t)bytes[i] << (SIZE - 1 - i));
+    }
+
+    return (target_ulong)(int32_t)v;
+}


  return (int32_t)revbit32(rj);

+target_ulong helper_bitrev_d(CPULoongArchState *env, target_ulong rj)
+{
+    uint64_t v = rj;
+    const int SIZE = 64;
+    uint8_t bytes[SIZE];
+
+    int i;
+    for (i = 0; i < SIZE; i++) {
+        bytes[i] = v & 0x1;
+        v = v >> 1;
+    }
+    /* v == 0 */
+    for (i = 0; i < SIZE; i++) {
+        v = v | ((uint64_t)bytes[i] << (SIZE - 1 - i));
+    }
+
+    return (target_ulong)v;
+}


  return revbit64(rj);

+static inline target_ulong bitswap(target_ulong v)
+{
+    v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) |
+        ((v & (target_ulong)0x5555555555555555ULL) << 1);
+    v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) |
+        ((v & (target_ulong)0x3333333333333333ULL) << 2);
+    v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) |
+        ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4);
+    return v;
+}
+
+target_ulong helper_loongarch_dbitswap(target_ulong rj)
+{
+    return bitswap(rj);
+}
+
+target_ulong helper_loongarch_bitswap(target_ulong rt)
+{
+    return (int32_t)bitswap(rt);
+}


I assume these are fpr the  bitrev.4b and bitrev.8b insns?
It would be better to name them correctly.

+/* Fixed point bit operation instruction translation */
+static bool trans_ext_w_h(DisasContext *ctx, arg_ext_w_h *a)
+{
+    TCGv t0;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+
+    tcg_gen_ext16s_tl(Rd, t0);


Again, you should have a common routine for handling these unary operations.

+static bool trans_clo_w(DisasContext *ctx, arg_clo_w *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    gen_load_gpr(Rd, a->rj);
+
+    tcg_gen_not_tl(Rd, Rd);
+    tcg_gen_ext32u_tl(Rd, Rd);
+    tcg_gen_clzi_tl(Rd, Rd, TARGET_LONG_BITS);
+    tcg_gen_subi_tl(Rd, Rd, TARGET_LONG_BITS - 32);


So, you're actually using the tcg builtins here, and the helper you created 
isn't used.

+static bool trans_cto_w(DisasContext *ctx, arg_cto_w *a)
+{
+    TCGv t0;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, a->rj);
+
+    gen_helper_cto_w(Rd, cpu_env, t0);


Here you should have used the tcg builtin.

+static bool trans_ctz_w(DisasContext *ctx, arg_ctz_w *a)
+{
+    TCGv t0;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, a->rj);
+
+    gen_helper_ctz_w(Rd, cpu_env, t0);


Likewise.

+static bool trans_revb_2w(DisasContext *ctx, arg_revb_2w *a)
+{
+    TCGv_i64 t0, t1, t2;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    t2 = get_gpr(a->rj);
+
+    gen_load_gpr(t0, a->rd);
+
+    tcg_gen_ext32u_i64(t1, t2);
+    tcg_gen_bswap32_i64(t0, t1);
+    tcg_gen_shri_i64(t1, t2, 32);
+    tcg_gen_bswap32_i64(t1, t1);
+    tcg_gen_concat32_i64(Rd, t0, t1);


tcg_gen_bswap64_i64(Rd, Rj)
tcg_gen_rotri_i64(Rd, Rd, 32);

+static bool trans_bytepick_d(DisasContext *ctx, arg_bytepick_d *a)
+{
+    TCGv t0;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new();
+
+    check_loongarch_64(ctx);
+    if (a->sa3 == 0 || ((a->sa3) * 8) == 64) {
+        if (a->sa3 == 0) {
+            gen_load_gpr(t0, a->rk);
+        } else {
+            gen_load_gpr(t0, a->rj);
+        }
+            tcg_gen_mov_tl(Rd, t0);
+    } else {
+        TCGv t1 = tcg_temp_new();
+
+        gen_load_gpr(t0, a->rk);
+        gen_load_gpr(t1, a->rj);
+
+        tcg_gen_shli_tl(t0, t0, ((a->sa3) * 8));
+        tcg_gen_shri_tl(t1, t1, 64 - ((a->sa3) * 8));
+        tcg_gen_or_tl(Rd, t1, t0);
+
+        tcg_temp_free(t1);
+    }


tcg_gen_extract2_i64(Rd, Rk, Rj, a->sa3 * 8);


r~

Re: [PATCH v2 09/22] target/loongarch: Add fixed point bit instruction translation

Reply via email to