On 17/2/25 00:07, Richard Henderson wrote:
Use the fully general extract opcodes instead.

Signed-off-by: Richard Henderson <richard.hender...@linaro.org>
---
  include/tcg/tcg-opc.h            |  10 -
  tcg/aarch64/tcg-target-has.h     |  10 -
  tcg/arm/tcg-target-has.h         |   4 -
  tcg/i386/tcg-target-has.h        |  10 -
  tcg/loongarch64/tcg-target-has.h |  10 -
  tcg/mips/tcg-target-has.h        |  13 -
  tcg/ppc/tcg-target-has.h         |  12 -
  tcg/riscv/tcg-target-has.h       |  10 -
  tcg/s390x/tcg-target-has.h       |  10 -
  tcg/sparc64/tcg-target-has.h     |  10 -
  tcg/tcg-has.h                    |   6 -
  tcg/tci/tcg-target-has.h         |  10 -
  tcg/optimize.c                   |  61 +----
  tcg/tcg-op.c                     | 430 ++++++++-----------------------
  tcg/tcg.c                        |  46 ----
  tcg/tci.c                        |  36 ---
  docs/devel/tcg-ops.rst           |  14 -
  tcg/aarch64/tcg-target.c.inc     |  22 +-
  tcg/arm/tcg-target.c.inc         |   7 -
  tcg/i386/tcg-target.c.inc        |  24 +-
  tcg/loongarch64/tcg-target.c.inc |  22 +-
  tcg/mips/tcg-target.c.inc        |  20 +-
  tcg/ppc/tcg-target.c.inc         |  17 +-
  tcg/riscv/tcg-target.c.inc       |  22 +-
  tcg/s390x/tcg-target.c.inc       |  22 +-
  tcg/sparc64/tcg-target.c.inc     |  14 +-
  tcg/tci/tcg-target.c.inc         | 102 +++-----
  27 files changed, 147 insertions(+), 827 deletions(-)


@@ -1794,23 +1715,19 @@ void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, 
int64_t arg2)
      case -1:
          tcg_gen_mov_i64(ret, arg1);
          return;
-    case 0xff:
-        /* Don't recurse with tcg_gen_ext8u_i64.  */
-        if (TCG_TARGET_HAS_ext8u_i64) {
-            tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
-            return;
-        }
-        break;
-    case 0xffff:
-        if (TCG_TARGET_HAS_ext16u_i64) {
-            tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
-            return;
-        }
-        break;
-    case 0xffffffffu:
-        if (TCG_TARGET_HAS_ext32u_i64) {
-            tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
-            return;
+    default:
+        /*
+         * Canonicalize on extract, if valid.  This aids x86 with its
+         * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode
+         * which does not require matching operands.  Other backends can
+         * trivially expand the extract to AND during code generation.
+         */

Could also use s/0/ofs/ like for 32-bit.

+        if (!(arg2 & (arg2 + 1))) {
+            unsigned len = ctz64(~arg2);
+            if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, len)) {
+                tcg_gen_extract_i64(ret, arg1, 0, len);
+                return;
+            }
          }
          break;
      }


@@ -2720,54 +2592,20 @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,

I note deposit_z_i32/i64 are not documented.


@@ -2787,10 +2625,6 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
          tcg_gen_shri_i64(ret, arg, 64 - len);
          return;
      }
-    if (ofs == 0) {
-        tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
-        return;
-    }
if (TCG_TARGET_REG_BITS == 32) {
          /* Look for a 32-bit extract within one of the two words.  */
@@ -2804,39 +2638,34 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
              tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
              return;
          }
-        /* The field is split across two words.  One double-word
-           shift is better than two double-word shifts.  */
-        goto do_shift_and;
+
+        /* The field is split across two words. */
+        tcg_gen_extract2_i32(TCGV_LOW(ret), TCGV_LOW(arg),
+                             TCGV_HIGH(arg), ofs);
+        if (len <= 32) {
+            tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(ret), 0, len);
+            tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+        } else {
+            tcg_gen_extract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg),
+                                ofs, len - 32);
+        }
+        return;

This change would look better in a preliminary patch.

Otherwise, chapeau! 🎩

Reviewed-by: Philippe Mathieu-Daudé <phi...@linaro.org>

      }
if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) {
          tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
          return;
      }
+    if (ofs == 0) {
+        tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
+        return;
+    }
/* Assume that zero-extension, if available, is cheaper than a shift. */
-    switch (ofs + len) {
-    case 32:
-        if (TCG_TARGET_HAS_ext32u_i64) {
-            tcg_gen_ext32u_i64(ret, arg);
-            tcg_gen_shri_i64(ret, ret, ofs);
-            return;
-        }
-        break;
-    case 16:
-        if (TCG_TARGET_HAS_ext16u_i64) {
-            tcg_gen_ext16u_i64(ret, arg);
-            tcg_gen_shri_i64(ret, ret, ofs);
-            return;
-        }
-        break;
-    case 8:
-        if (TCG_TARGET_HAS_ext8u_i64) {
-            tcg_gen_ext8u_i64(ret, arg);
-            tcg_gen_shri_i64(ret, ret, ofs);
-            return;
-        }
-        break;
+    if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, ofs + len)) {
+        tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, 0, ofs + len);
+        tcg_gen_shri_i64(ret, ret, ofs);
+        return;
      }
/* ??? Ideally we'd know what values are available for immediate AND.
@@ -2844,7 +2673,6 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
         so that we get ext8u, ext16u, and ext32u.  */
      switch (len) {
      case 1 ... 8: case 16: case 32:
-    do_shift_and:
          tcg_gen_shri_i64(ret, arg, ofs);
          tcg_gen_andi_i64(ret, ret, (1ull << len) - 1);
          break;


Reply via email to