[Qemu-devel] [PATCH 23/35] tcg-s390: Use the OR IMMEDIATE instructions.

Richard Henderson Fri, 04 Jun 2010 13:10:03 -0700

Signed-off-by: Richard Henderson <r...@twiddle.net>
---
 tcg/s390/tcg-target.c |  119 +++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 111 insertions(+), 8 deletions(-)


diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 53a92c5..a17ef91 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -33,10 +33,11 @@
     do { } while (0)
 #endif
 
-#define TCG_CT_CONST_32    0x100
-#define TCG_CT_CONST_NEG   0x200
-#define TCG_CT_CONST_ADDI  0x400
-#define TCG_CT_CONST_ANDI  0x800
+#define TCG_CT_CONST_32    0x0100
+#define TCG_CT_CONST_NEG   0x0200
+#define TCG_CT_CONST_ADDI  0x0400
+#define TCG_CT_CONST_ANDI  0x1000
+#define TCG_CT_CONST_ORI   0x2000
 
 /* Several places within the instruction set 0 means "no register"
    rather than TCG_REG_R0.  */
@@ -64,6 +65,8 @@ typedef enum S390Opcode {
     RIL_LLILF   = 0xc00f,
     RIL_NIHF    = 0xc00a,
     RIL_NILF    = 0xc00b,
+    RIL_OIHF    = 0xc00c,
+    RIL_OILF    = 0xc00d,
 
     RI_AGHI     = 0xa70b,
     RI_AHI      = 0xa70a,
@@ -81,6 +84,10 @@ typedef enum S390Opcode {
     RI_NIHL     = 0xa505,
     RI_NILH     = 0xa506,
     RI_NILL     = 0xa507,
+    RI_OIHH     = 0xa508,
+    RI_OIHL     = 0xa509,
+    RI_OILH     = 0xa50a,
+    RI_OILL     = 0xa50b,
 
     RRE_AGR     = 0xb908,
     RRE_CGR     = 0xb920,
@@ -330,6 +337,10 @@ static int target_parse_constraint(TCGArgConstraint *ct, 
const char **pct_str)
         ct->ct &= ~TCG_CT_REG;
         ct->ct |= TCG_CT_CONST_ANDI;
         break;
+    case 'O':
+        ct->ct &= ~TCG_CT_REG;
+        ct->ct |= TCG_CT_CONST_ORI;
+        break;
     default:
         break;
     }
@@ -396,6 +407,36 @@ static int tcg_match_andi(int ct, tcg_target_ulong val)
     return 1;
 }
 
+/* Immediates to be used with logical OR.  This is an optimization only,
+   since a full 64-bit immediate OR can always be performed with 4 sequential
+   OI[LH][LH] instructions.  What we're looking for is immediates that we
+   can load efficiently, and the immediate load plus the reg-reg OR is
+   smaller than the sequential OI's.  */
+
+static int tcg_match_ori(int ct, tcg_target_long val)
+{
+    if (facilities & FACILITY_EXT_IMM) {
+        if (ct & TCG_CT_CONST_32) {
+            /* All 32-bit ORs can be performed with 1 48-bit insn.  */
+            return 1;
+        }
+    }
+
+    /* Look for negative values.  These are best to load with LGHI.  */
+    if (val < 0) {
+        if (val == (int16_t)val) {
+            return 0;
+        }
+        if (facilities & FACILITY_EXT_IMM) {
+            if (val == (int32_t)val) {
+                return 0;
+            }
+        }
+    }
+
+    return 1;
+}
+
 /* Test if a constant matches the constraint. */
 static int tcg_target_const_match(tcg_target_long val,
                                   const TCGArgConstraint *arg_ct)
@@ -427,6 +468,8 @@ static int tcg_target_const_match(tcg_target_long val,
         }
     } else if (ct & TCG_CT_CONST_ANDI) {
         return tcg_match_andi(ct, val);
+    } else if (ct & TCG_CT_CONST_ORI) {
+        return tcg_match_ori(ct, val);
     }
 
     return 0;
@@ -841,6 +884,58 @@ static void tgen64_andi(TCGContext *s, TCGReg dest, 
tcg_target_ulong val)
     }
 }
 
+static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+{
+    static const S390Opcode oi_insns[4] = {
+        RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
+    };
+    static const S390Opcode nif_insns[2] = {
+        RIL_OILF, RIL_OIHF
+    };
+
+    int i;
+
+    /* Look for no-op.  */
+    if (val == 0) {
+        return;
+    }
+
+    if (facilities & FACILITY_EXT_IMM) {
+        /* Try all 32-bit insns that can perform it in one go.  */
+        for (i = 0; i < 4; i++) {
+            tcg_target_ulong mask = (0xffffull << i*16);
+            if ((val & mask) != 0 && (val & ~mask) == 0) {
+                tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
+                return;
+            }
+        }
+
+        /* Try all 48-bit insns that can perform it in one go.  */
+        for (i = 0; i < 2; i++) {
+            tcg_target_ulong mask = (0xffffffffull << i*32);
+            if ((val & mask) != 0 && (val & ~mask) == 0) {
+                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+                return;
+            }
+        }
+
+        /* Perform the OR via sequential modifications to the high and
+           low parts.  Do this via recursion to handle 16-bit vs 32-bit
+           masks in each half.  */
+        tgen64_ori(s, dest, val & 0x00000000ffffffffull);
+        tgen64_ori(s, dest, val & 0xffffffff00000000ull);
+    } else {
+        /* With no extended-immediate facility, we don't need to be so
+           clever.  Just iterate over the insns and mask in the constant.  */
+        for (i = 0; i < 4; i++) {
+            tcg_target_ulong mask = (0xffffull << i*16);
+            if ((val & mask) != 0) {
+                tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
+            }
+        }
+    }
+}
+
 static void tgen32_cmp(TCGContext *s, TCGCond c, TCGReg r1, TCGReg r2)
 {
     if (c > TCG_COND_GT) {
@@ -1328,7 +1423,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
         }
         break;
     case INDEX_op_or_i32:
-        tcg_out_insn(s, RR, OR, args[0], args[2]);
+        if (const_args[2]) {
+            tgen64_ori(s, args[0], args[2] & 0xffffffff);
+        } else {
+            tcg_out_insn(s, RR, OR, args[0], args[2]);
+        }
         break;
     case INDEX_op_xor_i32:
         tcg_out_insn(s, RR, XR, args[0], args[2]);
@@ -1342,7 +1441,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
         }
         break;
     case INDEX_op_or_i64:
-        tcg_out_insn(s, RRE, OGR, args[0], args[2]);
+        if (const_args[2]) {
+            tgen64_ori(s, args[0], args[2]);
+        } else {
+            tcg_out_insn(s, RRE, OGR, args[0], args[2]);
+        }
         break;
     case INDEX_op_xor_i64:
         tcg_out_insn(s, RRE, XGR, args[0], args[2]);
@@ -1606,7 +1709,7 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
 
     { INDEX_op_and_i32, { "r", "0", "rWA" } },
-    { INDEX_op_or_i32, { "r", "0", "r" } },
+    { INDEX_op_or_i32, { "r", "0", "rWO" } },
     { INDEX_op_xor_i32, { "r", "0", "r" } },
 
     { INDEX_op_neg_i32, { "r", "r" } },
@@ -1668,7 +1771,7 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
 
     { INDEX_op_and_i64, { "r", "0", "rA" } },
-    { INDEX_op_or_i64, { "r", "0", "r" } },
+    { INDEX_op_or_i64, { "r", "0", "rO" } },
     { INDEX_op_xor_i64, { "r", "0", "r" } },
 
     { INDEX_op_neg_i64, { "r", "r" } },
-- 
1.7.0.1

[Qemu-devel] [PATCH 23/35] tcg-s390: Use the OR IMMEDIATE instructions.

Reply via email to