Basically this adds the ability to generate two independent constants during synthesis, then bring them together with a pack instruction. Thus we never need to go out to the constant pool when zbkb is enabled. The worst sequence we ever generate is

lui+addi+lui+addi+pack

Obviously if either half can be synthesized with just a lui or just an addi, then we'll DTRT automagically. So for example:

unsigned long foo_0xf857f2def857f2de(void) {
    return 0x1425000028000000;
}

The high and low halves are just a lui.  So the final synthesis is:


        li      a5,671088640            # 15    [c=4 l=4]  *movdi_64bit/1
        li      a0,337969152            # 16    [c=4 l=4]  *movdi_64bit/1
        pack    a0,a5,a0        # 17    [c=12 l=4]  riscv_xpack_di_si_2

On the implementation side, I think the bits I've put in here likely can be used to handle the repeating constant case for !zbkb. I think it likely could be used to help capture cases where the upper half can be derived from the lower half (say by turning a bit on or off, shifting or something similar). The key in both of these cases is we need a temporary register holding an intermediate value.

Ventana's internal tester enables zbkb, but I don't think any of the other testers currently exercise zbkb. We'll probably want to change that at some point, but I don't think it's super-critical yet.

While I can envision a few more cases where we could improve constant synthesis, No immediate plans to work in this space, but if someone is interested, some thoughts are recorded here:


https://wiki.riseproject.dev/display/HOME/CT_00_031+--+Additional+Constant+Synthesis+Improvements



Jeff
gcc/
        * config/riscv/riscv.cc (riscv_integer_op): Add new field.
        (riscv_build_integer_1): Initialize the new field.
        (riscv_built_integer): Recognize more cases where Zbkb's
        pack instruction is profitable.
        (riscv_move_integer): Loop over all the codes.  If requested,
        save the current constant into a temporary.  Generate pack
        for more cases using the saved constant.
        

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 91fefacee80..10af38a5a81 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -250,6 +250,7 @@ struct riscv_arg_info {
    and each VALUE[i] is a constant integer.  CODE[0] is undefined.  */
 struct riscv_integer_op {
   bool use_uw;
+  bool save_temporary;
   enum rtx_code code;
   unsigned HOST_WIDE_INT value;
 };
@@ -759,6 +760,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
       codes[0].code = UNKNOWN;
       codes[0].value = value;
       codes[0].use_uw = false;
+      codes[0].save_temporary = false;
       return 1;
     }
   if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value))
@@ -767,6 +769,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
       codes[0].code = UNKNOWN;
       codes[0].value = value;
       codes[0].use_uw = false;
+      codes[0].save_temporary = false;
 
       /* RISC-V sign-extends all 32bit values that live in a 32bit
         register.  To avoid paradoxes, we thus need to use the
@@ -796,6 +799,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
          alt_codes[alt_cost-1].code = PLUS;
          alt_codes[alt_cost-1].value = low_part;
          alt_codes[alt_cost-1].use_uw = false;
+         alt_codes[alt_cost-1].save_temporary = false;
          memcpy (codes, alt_codes, sizeof (alt_codes));
          cost = alt_cost;
        }
@@ -810,6 +814,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
          alt_codes[alt_cost-1].code = XOR;
          alt_codes[alt_cost-1].value = low_part;
          alt_codes[alt_cost-1].use_uw = false;
+         alt_codes[alt_cost-1].save_temporary = false;
          memcpy (codes, alt_codes, sizeof (alt_codes));
          cost = alt_cost;
        }
@@ -852,6 +857,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
          alt_codes[alt_cost-1].code = ASHIFT;
          alt_codes[alt_cost-1].value = shift;
          alt_codes[alt_cost-1].use_uw = use_uw;
+         alt_codes[alt_cost-1].save_temporary = false;
          memcpy (codes, alt_codes, sizeof (alt_codes));
          cost = alt_cost;
        }
@@ -873,9 +879,11 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
          codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones)
                            | (value << (64 - trailing_ones)));
          codes[0].use_uw = false;
+         codes[0].save_temporary = false;
          codes[1].code = ROTATERT;
          codes[1].value = 64 - trailing_ones;
          codes[1].use_uw = false;
+         codes[1].save_temporary = false;
          cost = 2;
        }
       /* Handle the case where the 11 bit range of zero bits wraps around.  */
@@ -888,9 +896,11 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
                            | ((unsigned HOST_WIDE_INT) value
                               >> (32 + upper_trailing_ones)));
          codes[0].use_uw = false;
+         codes[0].save_temporary = false;
          codes[1].code = ROTATERT;
          codes[1].value = 32 - upper_trailing_ones;
          codes[1].use_uw = false;
+         codes[1].save_temporary = false;
          cost = 2;
        }
 
@@ -917,6 +927,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
              alt_codes[alt_cost].code = AND;
              alt_codes[alt_cost].value = ~(1UL << bit);
              alt_codes[alt_cost].use_uw = false;
+             alt_codes[alt_cost].save_temporary = false;
              alt_cost++;
              nval &= ~(1UL << bit);
            }
@@ -938,6 +949,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
           alt_codes[alt_cost - 1].code = FMA;
           alt_codes[alt_cost - 1].value = 9;
           alt_codes[alt_cost - 1].use_uw = false;
+          alt_codes[alt_cost - 1].save_temporary = false;
           memcpy (codes, alt_codes, sizeof (alt_codes));
           cost = alt_cost;
        }
@@ -948,6 +960,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
           alt_codes[alt_cost - 1].code = FMA;
           alt_codes[alt_cost - 1].value = 5;
           alt_codes[alt_cost - 1].use_uw = false;
+          alt_codes[alt_cost - 1].save_temporary = false;
           memcpy (codes, alt_codes, sizeof (alt_codes));
           cost = alt_cost;
        }
@@ -958,6 +971,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
           alt_codes[alt_cost - 1].code = FMA;
           alt_codes[alt_cost - 1].value = 3;
           alt_codes[alt_cost - 1].use_uw = false;
+          alt_codes[alt_cost - 1].save_temporary = false;
           memcpy (codes, alt_codes, sizeof (alt_codes));
           cost = alt_cost;
        }
@@ -978,6 +992,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
          alt_codes[alt_cost - 1].code = PLUS;
          alt_codes[alt_cost - 1].value = adjustment;
          alt_codes[alt_cost - 1].use_uw = false;
+         alt_codes[alt_cost - 1].save_temporary = false;
          memcpy (codes, alt_codes, sizeof (alt_codes));
          cost = alt_cost;
        }
@@ -995,6 +1010,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
          alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
          alt_codes[i].value = value & 0x7ffff000;
          alt_codes[i].use_uw = false;
+         alt_codes[i].save_temporary = false;
          value &= ~0x7ffff000;
           i++;
        }
@@ -1005,6 +1021,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
          alt_codes[i].code = (i == 0 ? UNKNOWN : PLUS);
          alt_codes[i].value = value & 0x7ff;
          alt_codes[i].use_uw = false;
+         alt_codes[i].save_temporary = false;
          value &= ~0x7ff;
          i++;
        }
@@ -1016,6 +1033,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
          alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
          alt_codes[i].value = 1UL << bit;
          alt_codes[i].use_uw = false;
+         alt_codes[i].save_temporary = false;
          value &= ~(1ULL << bit);
          i++;
        }
@@ -1057,6 +1075,7 @@ riscv_build_integer (struct riscv_integer_op *codes, 
HOST_WIDE_INT value,
          alt_codes[alt_cost-1].code = LSHIFTRT;
          alt_codes[alt_cost-1].value = shift;
          alt_codes[alt_cost-1].use_uw = false;
+         alt_codes[alt_cost-1].save_temporary = false;
          memcpy (codes, alt_codes, sizeof (alt_codes));
          cost = alt_cost;
        }
@@ -1069,6 +1088,7 @@ riscv_build_integer (struct riscv_integer_op *codes, 
HOST_WIDE_INT value,
          alt_codes[alt_cost-1].code = LSHIFTRT;
          alt_codes[alt_cost-1].value = shift;
          alt_codes[alt_cost-1].use_uw = false;
+         alt_codes[alt_cost-1].save_temporary = false;
          memcpy (codes, alt_codes, sizeof (alt_codes));
          cost = alt_cost;
        }
@@ -1093,6 +1113,7 @@ riscv_build_integer (struct riscv_integer_op *codes, 
HOST_WIDE_INT value,
          alt_codes[alt_cost - 1].code = XOR;
          alt_codes[alt_cost - 1].value = -1;
          alt_codes[alt_cost - 1].use_uw = false;
+         alt_codes[alt_cost - 1].save_temporary = false;
          memcpy (codes, alt_codes, sizeof (alt_codes));
          cost = alt_cost;
        }
@@ -1128,13 +1149,55 @@ riscv_build_integer (struct riscv_integer_op *codes, 
HOST_WIDE_INT value,
   if (cost > 3 && TARGET_64BIT && TARGET_ZBKB)
     {
       unsigned HOST_WIDE_INT loval = value & 0xffffffff;
-      unsigned HOST_WIDE_INT hival = value & ~loval;
-      if (hival >> 32 == loval)
+      unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32;
+      if (hival == loval)
        {
          cost = 1 + riscv_build_integer_1 (codes, sext_hwi (loval, 32), mode);
          codes[cost - 1].code = CONCAT;
          codes[cost - 1].value = 0;
          codes[cost - 1].use_uw = false;
+         codes[cost - 1].save_temporary = false;
+       }
+
+      /* An arbitrary 64 bit constant can be synthesized in 5 instructions
+        using zbkb.  We may do better than that if the upper or lower half
+        can be synthsized with a single LUI, ADDI or BSET.  Regardless the
+        basic steps are the same.  */
+      if (cost > 3 && can_create_pseudo_p ())
+       {
+         struct riscv_integer_op hi_codes[RISCV_MAX_INTEGER_OPS];
+         struct riscv_integer_op lo_codes[RISCV_MAX_INTEGER_OPS];
+         int hi_cost, lo_cost;
+
+         /* Synthesize and get cost for each half.  */
+         lo_cost
+           = riscv_build_integer_1 (lo_codes, sext_hwi (loval, 32), mode);
+         hi_cost
+           = riscv_build_integer_1 (hi_codes, sext_hwi (hival, 32), mode);
+
+         /* If profitable, finish synthesis using zbkb.  */
+         if (cost > hi_cost + lo_cost + 1)
+           {
+             /* We need the low half independent of the high half.  So
+                mark it has creating a temporary we'll use later.  */
+             memcpy (codes, lo_codes,
+                     lo_cost * sizeof (struct riscv_integer_op));
+             codes[lo_cost - 1].save_temporary = true;
+
+             /* Now the high half synthesis.  */
+             memcpy (codes + lo_cost, hi_codes,
+                     hi_cost * sizeof (struct riscv_integer_op));
+
+             /* Adjust the cost.  */
+             cost = hi_cost + lo_cost + 1;
+
+             /* And finally (ab)use VEC_MERGE to indicate we want to
+                put merge the two parts together.  */
+             codes[cost - 1].code = VEC_MERGE;
+             codes[cost - 1].value = 0;
+             codes[cost - 1].use_uw = false;
+             codes[cost - 1].save_temporary = false;
+           }
        }
 
     }
@@ -2656,23 +2719,25 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT 
value,
     x = riscv_split_integer (value, mode);
   else
     {
-      codes[0].value = trunc_int_for_mode (codes[0].value, mode);
-      /* Apply each binary operation to X. */
-      x = GEN_INT (codes[0].value);
-
-      for (i = 1; i < num_ops; i++)
+      rtx old_value = NULL_RTX;
+      for (i = 0; i < num_ops; i++)
        {
-         if (!can_create_pseudo_p ())
+         if (i != 0 && !can_create_pseudo_p ())
            x = riscv_emit_set (temp, x);
-         else
+         else if (i != 0)
            x = force_reg (mode, x);
          codes[i].value = trunc_int_for_mode (codes[i].value, mode);
-         /* If the sequence requires using a "uw" form of an insn, we're
-            going to have to construct the RTL ourselves and put it in
-            a register to avoid force_reg/force_operand from mucking things
-            up.  */
-         if (codes[i].use_uw)
+         if (codes[i].code == UNKNOWN)
            {
+             /* UNKNOWN means load the constant value into X.  */
+             x = GEN_INT (codes[i].value);
+           }
+         else if (codes[i].use_uw)
+           {
+             /* If the sequence requires using a "uw" form of an insn, we're
+                going to have to construct the RTL ourselves and put it in
+                a register to avoid force_reg/force_operand from mucking
+                things up.  */
              gcc_assert (TARGET_64BIT || TARGET_ZBA);
              rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
 
@@ -2695,16 +2760,27 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT 
value,
              rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
              x = riscv_emit_set (t, x);
            }
-         else if (codes[i].code == CONCAT)
+         else if (codes[i].code == CONCAT || codes[i].code == VEC_MERGE)
            {
              rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
-             rtx t2 = gen_lowpart (SImode, x);
+             rtx t2 = codes[i].code == VEC_MERGE ? old_value : x;
+             gcc_assert (t2);
+             t2 = gen_lowpart (SImode, t2);
              emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2));
              x = t;
            }
          else
            x = gen_rtx_fmt_ee (codes[i].code, mode,
                                x, GEN_INT (codes[i].value));
+
+         /* If this entry in the code table indicates we should save away
+            the temporary holding the current value of X, then do so.  */
+         if (codes[i].save_temporary)
+           {
+             gcc_assert (old_value == NULL_RTX);
+             x = force_reg (mode, x);
+             old_value = x;
+           }
        }
     }
 

Reply via email to