While investigating Zicond extension code generation on RISC-V, I identified
several cases where GCC (trunk) generates suboptimal code due to premature 
if-conversion.

Consider the following test case:
CFLAGS: -march=rv64gc_zicond -mabi=lp64d -O2

int test_IOR_ceqz_x (int x, int z, int c)
{
  if (c)
    x = x | z;
  return x;
}

Before the patch:
  or      a1,a0,a1
  czero.eqz a1,a1,a2
  czero.nez a0,a0,a2
  add     a0,a0,a1
  ret

The issue occurs when ifcvt encounters the following RTL pattern:
  (set reg1 (ior:DI (reg2:DI) (reg3:DI)))
  (set reg4 (sign_extend:DI (subreg:SI (reg1:DI))))

When reg1 is no longer used, this expression could be simplified. However,
noce_convert_multiple_sets converts the block early, preventing combine from
optimizing the pattern.

This patch adds checks to bb_ok_for_noce_convert_multiple_sets to detect
such sign/zero extension patterns and reject noce_convert_multiple_sets when
combine has not yet run. This allows combine to simplify the expressions,
resulting in better code generation during the second ifcvt pass.

To minimize false positives, the additional checks only apply before the
combine pass.

Generated code for test_IOR_ceqz_x after the patch:
  czero.eqz a2,a1,a2
  or       a0,a0,a2
  ret

The patch has been bootstrapped and tested on riscv64-unknown-linux-gnu.

gcc/
        * ifcvt.cc (noce_extended_and_dead_set_p): New function.
        (bb_ok_for_noce_convert_multiple_sets): Use 
noce_extended_and_dead_set_p.

gcc/testsuite/
        * gcc.target/riscv/zicond_ifcvt_opt_int.c: New test.
---
 gcc/ifcvt.cc                                  |  59 +++
 .../gcc.target/riscv/zicond_ifcvt_opt_int.c   | 486 ++++++++++++++++++
 2 files changed, 545 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt_int.c

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 3dcb1be4869..dd03ebea5fb 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -4143,6 +4143,49 @@ init_noce_multiple_sets_info (basic_block bb,
     }
 }
 
+/* Helper function for bb_ok_for_noce_convert_multiple_sets.
+   Return TRUE iff the source of SECOND_SET is sign or zero extended from
+   the destination of FIRST_SET, and that destination register dies in
+   SECOND_SET. This function detects the pattern:
+     FIRST_SET:  (set reg1 ...)
+     SECOND_SET: (set reg2 (sign/zero_extend (subreg reg1 ...))).
+
+   Additionally, return FALSE for possible swap idiom patterns:
+   if the source of FIRST_SET is a register or a sign or zero extension.  */
+
+static bool
+noce_extended_and_dead_set_p (const rtx_insn *first_insn,
+            const rtx_insn *second_insn)
+{
+  rtx first_set = single_set (first_insn);
+  rtx second_set = single_set (second_insn);
+  gcc_assert (first_set);
+  gcc_assert (second_set);
+
+  rtx first_dest = SET_DEST (first_set);
+  rtx first_src = SET_SRC (first_set);
+  rtx second_src = SET_SRC (second_set);
+
+  /* Avoid possible swap idiom patterns.  */
+  if (REG_P (first_src)
+      || GET_CODE (first_src) == SIGN_EXTEND
+      || GET_CODE (first_src) == ZERO_EXTEND)
+    return false;
+
+  if ((GET_CODE (second_src) != SIGN_EXTEND
+      && GET_CODE (second_src) != ZERO_EXTEND)
+      || !SUBREG_P (XEXP (second_src, 0)))
+    return false;
+
+  rtx subreg = SUBREG_REG (XEXP (second_src, 0));
+  if (REG_P (first_dest) && REG_P (subreg)
+      && REGNO (first_dest) == REGNO (subreg)
+      && find_regno_note (second_insn, REG_DEAD, REGNO (first_dest)))
+    return true;
+
+  return false;
+}
+
 /* Return true iff basic block TEST_BB is suitable for conversion to a
    series of conditional moves.  Also check that we have more than one
    set (other routines can handle a single set better than we would),
@@ -4153,6 +4196,7 @@ static bool
 bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, unsigned *cost)
 {
   rtx_insn *insn;
+  rtx_insn *first_insn = NULL, *second_insn = NULL;
   unsigned count = 0;
   unsigned param = param_max_rtl_if_conversion_insns;
   bool speed_p = optimize_bb_for_speed_p (test_bb);
@@ -4187,6 +4231,12 @@ bb_ok_for_noce_convert_multiple_sets (basic_block 
test_bb, unsigned *cost)
       if (!can_conditionally_move_p (GET_MODE (dest)))
        return false;
 
+      /* Record the first two INSN insns for special case handling.  */
+      if (first_insn == NULL)
+  first_insn = insn;
+      else if (second_insn == NULL)
+  second_insn = insn;
+
       potential_cost += insn_cost (insn, speed_p);
 
       count++;
@@ -4194,6 +4244,15 @@ bb_ok_for_noce_convert_multiple_sets (basic_block 
test_bb, unsigned *cost)
 
   *cost += potential_cost;
 
+  /* Reject conversion before combine if we have exactly two SET insns
+     where the SECOND_SET is sign extended from the destination of the 
FIRST_SET,
+     and that destination register dies in SECOND_SET.
+     Combine will most likely simplify these insns later.  */
+  if (!ifcvt_after_combine
+      && count == 2
+      && noce_extended_and_dead_set_p (first_insn, second_insn))
+    return false;
+
   /* If we would only put out one conditional move, the other strategies
      this pass tries are better optimized and will be more appropriate.
      Some targets want to strictly limit the number of conditional moves
diff --git a/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt_int.c 
b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt_int.c
new file mode 100644
index 00000000000..7f33e345ec2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt_int.c
@@ -0,0 +1,486 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zicond -mabi=lp64d -O2 " } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
+
+int
+test_IOR_ceqz (int x, int y, int z, int c)
+{
+  if (c)
+    x = y | z;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_IOR_ceqz_x (int x, int z, int c)
+{
+  if (c)
+    x = x | z;
+
+  return x;
+}
+
+int
+test_IOR_nez (int x, int y, int z, int c)
+{
+  if (c)
+    x = y;
+  else
+    x = y | z;
+  return x;
+}
+
+int
+test_IOR_nez_x (int x, int z, int c)
+{
+  if (c)
+    {
+    }
+  else
+    x = x | z;
+  return x;
+}
+
+int
+test_IOR_nez_2 (int x, int y, int z, int c)
+{
+  if (!c)
+    x = y | z;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_IOR_nez_x_2 (int x, int z, int c)
+{
+  if (!c)
+    x = x | z;
+
+  return x;
+}
+
+int
+test_IOR_eqz_2 (int x, int y, int z, int c)
+{
+  if (!c)
+    x = y;
+  else
+    x = y | z;
+  return x;
+}
+
+int
+test_IOR_eqz_x_2 (int x, int z, int c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = x | z;
+  return x;
+}
+
+int
+test_XOR_ceqz (int x, int y, int z, int c)
+{
+  if (c)
+    x = y ^ z;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_XOR_ceqz_x (int x, int z, int c)
+{
+  if (c)
+    x = x ^ z;
+
+  return x;
+}
+
+int
+test_XOR_nez (int x, int y, int z, int c)
+{
+  if (c)
+    x = y;
+  else
+    x = y ^ z;
+  return x;
+}
+
+int
+test_XOR_nez_x (int x, int z, int c)
+{
+  if (c)
+    {
+    }
+  else
+    x = x ^ z;
+  return x;
+}
+
+int
+test_XOR_nez_2 (int x, int y, int z, int c)
+{
+  if (!c)
+    x = y ^ z;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_XOR_nez_x_2 (int x, int z, int c)
+{
+  if (!c)
+    x = x ^ z;
+
+  return x;
+}
+
+int
+test_XOR_eqz_2 (int x, int y, int z, int c)
+{
+  if (!c)
+    x = y;
+  else
+    x = y ^ z;
+  return x;
+}
+
+int
+test_XOR_eqz_x_2 (int x, int z, int c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = x ^ z;
+  return x;
+}
+
+int
+test_IOR_ceqz_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (c)
+    x = z | y;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_IOR_ceqz_x_reverse_bin_oprands (int x, int z, int c)
+{
+  if (c)
+    x = z | x;
+
+  return x;
+}
+
+int
+test_IOR_nez_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (c)
+    x = y;
+  else
+    x = z | y;
+  return x;
+}
+
+int
+test_IOR_nez_x_reverse_bin_oprands (int x, int z, int c)
+{
+  if (c)
+    {
+    }
+  else
+    x = z | x;
+  return x;
+}
+
+int
+test_IOR_nez_2_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (!c)
+    x = z | y;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_IOR_nez_x_2_reverse_bin_oprands (int x, int z, int c)
+{
+  if (!c)
+    x = z | x;
+
+  return x;
+}
+
+int
+test_IOR_eqz_2_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (!c)
+    x = y;
+  else
+    x = z | y;
+  return x;
+}
+
+int
+test_IOR_eqz_x_2_reverse_bin_oprands (int x, int z, int c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = z | x;
+  return x;
+}
+
+int
+test_XOR_ceqz_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (c)
+    x = z ^ y;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_XOR_ceqz_x_reverse_bin_oprands (int x, int z, int c)
+{
+  if (c)
+    x = z ^ x;
+
+  return x;
+}
+
+int
+test_XOR_nez_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (c)
+    x = y;
+  else
+    x = z ^ y;
+  return x;
+}
+
+int
+test_XOR_nez_x_reverse_bin_oprands (int x, int z, int c)
+{
+  if (c)
+    {
+    }
+  else
+    x = z ^ x;
+  return x;
+}
+
+int
+test_XOR_nez_2_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (!c)
+    x = z ^ y;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_XOR_nez_x_2_reverse_bin_oprands (int x, int z, int c)
+{
+  if (!c)
+    x = z ^ x;
+
+  return x;
+}
+
+int
+test_XOR_eqz_2_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (!c)
+    x = y;
+  else
+    x = z ^ y;
+  return x;
+}
+
+int
+test_XOR_eqz_x_2_reverse_bin_oprands (int x, int z, int c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = z ^ x;
+  return x;
+}
+
+int
+test_AND_ceqz (int x, int y, int z, int c)
+{
+  if (c)
+    x = y & z;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_AND_ceqz_x (int x, int z, int c)
+{
+  if (c)
+    x = x & z;
+
+  return x;
+}
+
+int
+test_AND_nez (int x, int y, int z, int c)
+{
+  if (c)
+    x = y;
+  else
+    x = y & z;
+  return x;
+}
+
+int
+test_AND_nez_x (int x, int z, int c)
+{
+  if (c)
+    {
+    }
+  else
+    x = x & z;
+  return x;
+}
+
+int
+test_AND_nez_2 (int x, int y, int z, int c)
+{
+  if (!c)
+    x = y & z;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_AND_nez_x_2 (int x, int z, int c)
+{
+  if (!c)
+    x = x & z;
+
+  return x;
+}
+
+int
+test_AND_eqz_2 (int x, int y, int z, int c)
+{
+  if (!c)
+    x = y;
+  else
+    x = y & z;
+  return x;
+}
+
+int
+test_AND_eqz_x_2 (int x, int z, int c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = x & z;
+  return x;
+}
+
+int
+test_AND_ceqz_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (c)
+    x = z & y;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_AND_ceqz_x_reverse_bin_oprands (int x, int z, int c)
+{
+  if (c)
+    x = z & x;
+
+  return x;
+}
+
+int
+test_AND_nez_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (c)
+    x = y;
+  else
+    x = z & y;
+  return x;
+}
+
+int
+test_AND_nez_x_reverse_bin_oprands (int x, int z, int c)
+{
+  if (c)
+    {
+    }
+  else
+    x = z & x;
+  return x;
+}
+
+int
+test_AND_nez_2_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (!c)
+    x = z & y;
+  else
+    x = y;
+  return x;
+}
+
+int
+test_AND_nez_x_2_reverse_bin_oprands (int x, int z, int c)
+{
+  if (!c)
+    x = z & x;
+
+  return x;
+}
+
+int
+test_AND_eqz_2_reverse_bin_oprands (int x, int y, int z, int c)
+{
+  if (!c)
+    x = y;
+  else
+    x = z & y;
+  return x;
+}
+
+int
+test_AND_eqz_x_2_reverse_bin_oprands (int x, int z, int c)
+{
+  if (!c)
+    {
+    }
+  else
+    x = z & x;
+  return x;
+}
+
+/* { dg-final { scan-assembler-times {czero\.eqz} 24 } } */
+/* { dg-final { scan-assembler-times {czero\.nez} 24 } } */
-- 
2.43.0

Reply via email to