This patch to the middle-end's RTL expansion reorders the code in
emit_store_flag_1 so that the backend has more control over how best
to expand/split double word equality/inequality comparisons against
zero or minus one.  With the current implementation, the middle-end
always decides to lower this idiom during RTL expansion using SUBREGs
and word mode instructions, without ever consulting the backend's
machine description.  Hence on x86_64, a TImode comparison against zero
is always expanded as:

(parallel [
  (set (reg:DI 91)
       (ior:DI (subreg:DI (reg:TI 88) 0)
               (subreg:DI (reg:TI 88) 8)))
  (clobber (reg:CC 17 flags))])

(set (reg:CCZ 17 flags)
     (compare:CCZ (reg:DI 91)
                  (const_int 0 [0])))

This patch, which makes no changes to the code itself, simply reorders
the clauses in emit_store_flag_1 so that the middle-end first attempts
expansion using the target's doubleword mode cstore optab/expander,
and only if this fails, falls back to lowering to word mode operations.
On x86_64, this allows the expander to produce:

(set (reg:CCZ 17 flags)
     (compare:CCZ (reg:TI 88)
                  (const_int 0 [0])))

which is a candidate for scalar-to-vector transformations (and
combine simplifications etc.).  On targets that don't define a cstore
pattern for doubleword integer modes, there should be no change in
behaviour.  For those that do, the current behaviour can be restored
(if desired) by restricting the expander/insn to not apply when the
comparison is EQ or NE, and operand[2] is either const0_rtx or
constm1_rtx.

This change just keeps RTL expansion more consistent (in philosophy).
For other doubleword comparisons, such as with operators LT and GT,
or with constants other than zero or -1, the wishes of the backend
are respected, and only if the optab expansion fails are the default
fall-back implementations using narrower integer mode operations
(and conditional jumps) used.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures. I'm happy to help tweak any backends that notice
a change in their generated code.  Ok for mainline?

2022-08-03  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * expmed.cc (emit_store_flag_1): Move code to expand double word
        equality and inequality against zero or -1, using word operations,
        to after trying to use the backend's cstore<mode>4 optab/expander.


Thanks in advance,
Roger
--

diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index 9b01b5a..8d7418b 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -5662,63 +5662,9 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
       break;
     }
 
-  /* If we are comparing a double-word integer with zero or -1, we can
-     convert the comparison into one involving a single word.  */
-  scalar_int_mode int_mode;
-  if (is_int_mode (mode, &int_mode)
-      && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
-      && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
-    {
-      rtx tem;
-      if ((code == EQ || code == NE)
-         && (op1 == const0_rtx || op1 == constm1_rtx))
-       {
-         rtx op00, op01;
-
-         /* Do a logical OR or AND of the two words and compare the
-            result.  */
-         op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
-         op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
-         tem = expand_binop (word_mode,
-                             op1 == const0_rtx ? ior_optab : and_optab,
-                             op00, op01, NULL_RTX, unsignedp,
-                             OPTAB_DIRECT);
-
-         if (tem != 0)
-           tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
-                                  unsignedp, normalizep);
-       }
-      else if ((code == LT || code == GE) && op1 == const0_rtx)
-       {
-         rtx op0h;
-
-         /* If testing the sign bit, can just test on high word.  */
-         op0h = simplify_gen_subreg (word_mode, op0, int_mode,
-                                     subreg_highpart_offset (word_mode,
-                                                             int_mode));
-         tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
-                                unsignedp, normalizep);
-       }
-      else
-       tem = NULL_RTX;
-
-      if (tem)
-       {
-         if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
-           return tem;
-         if (!target)
-           target = gen_reg_rtx (target_mode);
-
-         convert_move (target, tem,
-                       !val_signbit_known_set_p (word_mode,
-                                                 (normalizep ? normalizep
-                                                  : STORE_FLAG_VALUE)));
-         return target;
-       }
-    }
-
   /* If this is A < 0 or A >= 0, we can do this by taking the ones
      complement of A (for GE) and shifting the sign bit to the low bit.  */
+  scalar_int_mode int_mode;
   if (op1 == const0_rtx && (code == LT || code == GE)
       && is_int_mode (mode, &int_mode)
       && (normalizep || STORE_FLAG_VALUE == 1
@@ -5764,6 +5710,7 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
       return op0;
     }
 
+  /* Next try expanding this via the backend's cstore<mode>4.  */
   mclass = GET_MODE_CLASS (mode);
   FOR_EACH_MODE_FROM (compare_mode, mode)
     {
@@ -5788,6 +5735,60 @@ emit_store_flag_1 (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
        }
     }
 
+  /* If we are comparing a double-word integer with zero or -1, we can
+     convert the comparison into one involving a single word.  */
+  if (is_int_mode (mode, &int_mode)
+      && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
+      && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
+    {
+      rtx tem;
+      if ((code == EQ || code == NE)
+         && (op1 == const0_rtx || op1 == constm1_rtx))
+       {
+         rtx op00, op01;
+
+         /* Do a logical OR or AND of the two words and compare the
+            result.  */
+         op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
+         op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
+         tem = expand_binop (word_mode,
+                             op1 == const0_rtx ? ior_optab : and_optab,
+                             op00, op01, NULL_RTX, unsignedp,
+                             OPTAB_DIRECT);
+
+         if (tem != 0)
+           tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
+                                  unsignedp, normalizep);
+       }
+      else if ((code == LT || code == GE) && op1 == const0_rtx)
+       {
+         rtx op0h;
+
+         /* If testing the sign bit, can just test on high word.  */
+         op0h = simplify_gen_subreg (word_mode, op0, int_mode,
+                                     subreg_highpart_offset (word_mode,
+                                                             int_mode));
+         tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
+                                unsignedp, normalizep);
+       }
+      else
+       tem = NULL_RTX;
+
+      if (tem)
+       {
+         if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
+           return tem;
+         if (!target)
+           target = gen_reg_rtx (target_mode);
+
+         convert_move (target, tem,
+                       !val_signbit_known_set_p (word_mode,
+                                                 (normalizep ? normalizep
+                                                  : STORE_FLAG_VALUE)));
+         return target;
+       }
+    }
+
   return 0;
 }
 

Reply via email to