https://gcc.gnu.org/g:d55d40afd42a280c80729b538e3cce994f20961d

commit r15-6493-gd55d40afd42a280c80729b538e3cce994f20961d
Author: Guo Jie <guo...@loongson.cn>
Date:   Mon Dec 30 10:39:13 2024 +0800

    LoongArch: Optimize for conditional move operations
    
    The optimization example is as follows.
    
    From:
      if (condition)
        dest += 1 << 16;
    To:
      dest += (condition ? 1 : 0) << 16;
    
    It does not use maskeqz and masknez, thus reducing the number of
    instructions.
    
    gcc/ChangeLog:
    
            * config/loongarch/loongarch.cc
            (loongarch_expand_conditional_move): Add some optimization
            implementations based on noce_try_cmove_arith.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/loongarch/conditional-move-opt-1.c: New test.
            * gcc.target/loongarch/conditional-move-opt-2.c: New test.

Diff:
---
 gcc/config/loongarch/loongarch.cc                  | 103 ++++++++++++++++++++-
 .../gcc.target/loongarch/conditional-move-opt-1.c  |  58 ++++++++++++
 .../gcc.target/loongarch/conditional-move-opt-2.c  |  42 +++++++++
 3 files changed, 202 insertions(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 029616c1db2a..23cbd2eead97 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -5294,6 +5294,81 @@ loongarch_expand_conditional_move (rtx *operands)
     loongarch_emit_float_compare (&code, &op0, &op1);
   else
     {
+      /* Optimize to reduce the number of instructions for ternary operations.
+        Mainly implemented based on noce_try_cmove_arith.
+        For dest = (condition) ? value_if_true : value_if_false;
+        the optimization requires:
+         a. value_if_false = var;
+         b. value_if_true = var OP C (a positive integer power of 2).
+
+        Situations similar to the following:
+           if (condition)
+             dest += 1 << imm;
+        to:
+           dest += (condition ? 1 : 0) << imm;  */
+
+      rtx_insn *insn;
+      HOST_WIDE_INT val = 0; /* The value of rtx C.  */
+      /* INSN with operands[2] as the output.  */
+      rtx_insn *value_if_true_insn = NULL;
+      /* INSN with operands[3] as the output.  */
+      rtx_insn *value_if_false_insn = NULL;
+      rtx value_if_true_insn_src = NULL_RTX;
+      /* Common operand var in value_if_true and value_if_false.  */
+      rtx comm_var = NULL_RTX;
+      bool can_be_optimized = false;
+
+      /* Search value_if_true_insn and value_if_false_insn.  */
+      struct sequence_stack *seq = get_current_sequence ()->next;
+      for (insn = seq->last; insn; insn = PREV_INSN (insn))
+       {
+         if (single_set (insn))
+           {
+             rtx set_dest = SET_DEST (single_set (insn));
+             if (rtx_equal_p (set_dest, operands[2]))
+               value_if_true_insn = insn;
+             else if (rtx_equal_p (set_dest, operands[3]))
+               value_if_false_insn = insn;
+             if (value_if_true_insn && value_if_false_insn)
+               break;
+           }
+       }
+
+      /* Check if the optimization conditions are met.  */
+      if (value_if_true_insn
+         && value_if_false_insn
+         /* Make sure that value_if_false and var are the same.  */
+         && BINARY_P (value_if_true_insn_src
+                      = SET_SRC (single_set (value_if_true_insn)))
+         /* Make sure that both value_if_true and value_if_false
+            has the same var.  */
+         && rtx_equal_p (XEXP (value_if_true_insn_src, 0),
+                         SET_SRC (single_set (value_if_false_insn))))
+       {
+         comm_var = SET_SRC (single_set (value_if_false_insn));
+         rtx src = XEXP (value_if_true_insn_src, 1);
+         rtx imm = NULL_RTX;
+         if (CONST_INT_P (src))
+           imm = src;
+         else
+           for (insn = seq->last; insn; insn = PREV_INSN (insn))
+             {
+               rtx set = single_set (insn);
+               if (set && rtx_equal_p (SET_DEST (set), src))
+                 {
+                   imm = SET_SRC (set);
+                   break;
+                 }
+             }
+         if (imm && CONST_INT_P (imm))
+           {
+             val = INTVAL (imm);
+             /* Make sure that imm is a positive integer power of 2.  */
+             if (val > 0 && !(val & (val - 1)))
+               can_be_optimized = true;
+           }
+       }
+
       if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD)
        {
          promote_op[0] = (REG_P (op0) && REG_P (operands[2]) &&
@@ -5314,22 +5389,48 @@ loongarch_expand_conditional_move (rtx *operands)
       op0_extend = op0;
       op1_extend = force_reg (word_mode, op1);
 
+      rtx target = gen_reg_rtx (GET_MODE (op0));
+
       if (code == EQ || code == NE)
        {
          op0 = loongarch_zero_if_equal (op0, op1);
          op1 = const0_rtx;
+         /* For EQ, set target to 1 if op0 and op1 are the same,
+            otherwise set to 0.
+            For NE, set target to 0 if op0 and op1 are the same,
+            otherwise set to 1.  */
+         if (can_be_optimized)
+           loongarch_emit_binary (code, target, op0, const0_rtx);
        }
       else
        {
          /* The comparison needs a separate scc instruction.  Store the
             result of the scc in *OP0 and compare it against zero.  */
          bool invert = false;
-         rtx target = gen_reg_rtx (GET_MODE (op0));
          loongarch_emit_int_order_test (code, &invert, target, op0, op1);
+         if (can_be_optimized && invert)
+           loongarch_emit_binary (EQ, target, target, const0_rtx);
          code = invert ? EQ : NE;
          op0 = target;
          op1 = const0_rtx;
        }
+
+      if (can_be_optimized)
+       {
+         /* Perform (condition ? 1 : 0) << log2 (C).  */
+         loongarch_emit_binary (ASHIFT, target, target,
+                                GEN_INT (exact_log2 (val)));
+         /* Shift-related insn patterns only support SImode operands[2].  */
+         enum rtx_code opcode = GET_CODE (value_if_true_insn_src);
+         if (opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT
+             || opcode == ROTATE || opcode == ROTATERT)
+           target = gen_lowpart (SImode, target);
+         /* Perform target = target OP ((condition ? 1 : 0) << log2 (C)).  */
+         loongarch_emit_binary (opcode, operands[0],
+                                force_reg (GET_MODE (operands[3]), comm_var),
+                                target);
+         return;
+       }
     }
 
   rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c 
b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c
new file mode 100644
index 000000000000..ed13471aa90a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "maskeqz" } } */
+/* { dg-final { scan-assembler-not "masknez" } } */
+
+extern long lm, ln, lr;
+
+void
+test_ne ()
+{
+  if (lm != ln)
+    lr += (1 << 16);
+  lr += lm;
+}
+
+void
+test_eq ()
+{
+  if (lm == ln)
+    lr = lm + (1 << 16);
+  else
+    lr = lm;
+  lr += lm;
+}
+
+void
+test_lt ()
+{
+  if (lm < ln)
+    lr *= (1 << 16);
+  lr += lm;
+}
+
+void
+test_le ()
+{
+  if (lm <= ln)
+    lr = lm * ((long)1 << 32);
+  else
+    lr = lm;
+  lr += lm;
+}
+
+void
+test_nez ()
+{
+  if (lm != 0)
+    lr <<= (1 << 4);
+  lr += lm;
+}
+
+void
+test_eqz ()
+{
+  if (lm == 0)
+    lr >>= (1 << 2);
+  lr += lm;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c 
b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c
new file mode 100644
index 000000000000..ac72d4d933ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 --param max-rtl-if-conversion-insns=1" } */
+/* { dg-final { scan-assembler-not "maskeqz" } } */
+/* { dg-final { scan-assembler-not "masknez" } } */
+
+/* The relevant optimization is currently only based on noce_try_cmove_arith,
+   so it bypasses noce_convert_multiple_sets by
+   --param max-rtl-if-conversion-insns=1 to execute noce_try_cmove_arith.  */
+
+extern long lm, ln, lr;
+
+void
+test_ge ()
+{
+  if (lm >= ln)
+    lr += ((long)1 << 32);
+  lr += lm;
+}
+
+void
+test_ltz ()
+{
+  if (lm < 0)
+    lr |= (1 << 16);
+  lr += lm;
+}
+
+void
+test_lez ()
+{
+  if (lm <= 0)
+    lr &= (1 << 16);
+  lr += lm;
+}
+
+void
+test_gez ()
+{
+  if (lm >= 0)
+    lr ^= (1 << 16);
+  lr += lm;
+}

Reply via email to