Comparisons between memory and constants might be done in a smaller mode
resulting in smaller constants which might finally end up as immediates
instead of in the literal pool.

For example, on s390x a non-symmetric comparison like
  x <= 0x3fffffffffffffff
results in the constant being spilled to the literal pool and an 8 byte
memory comparison is emitted.  Ideally, an equivalent comparison
  x0 <= 0x3f
where x0 is the most significant byte of x, is emitted where the
constant is smaller and more likely to materialize as an immediate.

Similarly, comparisons of the form
  x >= 0x4000000000000000
can be shortened into x0 >= 0x40.

Bootstrapped and regtested on s390x, x64, aarch64, and powerpc64le.
Note, the new tests show that for the mentioned little-endian targets
the optimization does not materialize since either the costs of the new
instructions are higher or they do not match.  Still ok for mainline?

gcc/ChangeLog:

        * combine.cc (simplify_compare_const): Narrow comparison of
        memory and constant.
        (try_combine): Adapt new function signature.
        (simplify_comparison): Adapt new function signature.

gcc/testsuite/ChangeLog:

        * gcc.dg/cmp-mem-const-1.c: New test.
        * gcc.dg/cmp-mem-const-2.c: New test.
        * gcc.dg/cmp-mem-const-3.c: New test.
        * gcc.dg/cmp-mem-const-4.c: New test.
        * gcc.dg/cmp-mem-const-5.c: New test.
        * gcc.dg/cmp-mem-const-6.c: New test.
        * gcc.target/s390/cmp-mem-const-1.c: New test.
---
 gcc/combine.cc                                | 79 +++++++++++++++++--
 gcc/testsuite/gcc.dg/cmp-mem-const-1.c        | 17 ++++
 gcc/testsuite/gcc.dg/cmp-mem-const-2.c        | 17 ++++
 gcc/testsuite/gcc.dg/cmp-mem-const-3.c        | 17 ++++
 gcc/testsuite/gcc.dg/cmp-mem-const-4.c        | 17 ++++
 gcc/testsuite/gcc.dg/cmp-mem-const-5.c        | 17 ++++
 gcc/testsuite/gcc.dg/cmp-mem-const-6.c        | 17 ++++
 .../gcc.target/s390/cmp-mem-const-1.c         | 24 ++++++
 8 files changed, 200 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-1.c
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-2.c
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-3.c
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-4.c
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-5.c
 create mode 100644 gcc/testsuite/gcc.dg/cmp-mem-const-6.c
 create mode 100644 gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 5aa0ec5c45a..56e15a93409 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -460,7 +460,7 @@ static rtx simplify_shift_const (rtx, enum rtx_code, 
machine_mode, rtx,
 static int recog_for_combine (rtx *, rtx_insn *, rtx *);
 static rtx gen_lowpart_for_combine (machine_mode, rtx);
 static enum rtx_code simplify_compare_const (enum rtx_code, machine_mode,
-                                            rtx, rtx *);
+                                            rtx *, rtx *);
 static enum rtx_code simplify_comparison (enum rtx_code, rtx *, rtx *);
 static void update_table_tick (rtx);
 static void record_value_for_reg (rtx, rtx_insn *, rtx);
@@ -3185,7 +3185,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
          compare_code = orig_compare_code = GET_CODE (*cc_use_loc);
          if (is_a <scalar_int_mode> (GET_MODE (i2dest), &mode))
            compare_code = simplify_compare_const (compare_code, mode,
-                                                  op0, &op1);
+                                                  &op0, &op1);
          target_canonicalize_comparison (&compare_code, &op0, &op1, 1);
        }
 
@@ -11796,13 +11796,14 @@ gen_lowpart_for_combine (machine_mode omode, rtx x)
    (CODE OP0 const0_rtx) form.
 
    The result is a possibly different comparison code to use.
-   *POP1 may be updated.  */
+   *POP0 and *POP1 may be updated.  */
 
 static enum rtx_code
 simplify_compare_const (enum rtx_code code, machine_mode mode,
-                       rtx op0, rtx *pop1)
+                       rtx *pop0, rtx *pop1)
 {
   scalar_int_mode int_mode;
+  rtx op0 = *pop0;
   HOST_WIDE_INT const_op = INTVAL (*pop1);
 
   /* Get the constant we are comparing against and turn off all bits
@@ -11987,6 +11988,74 @@ simplify_compare_const (enum rtx_code code, 
machine_mode mode,
       break;
     }
 
+  /* Narrow non-symmetric comparison of memory and constant as e.g.
+     x0...x7 <= 0x3fffffffffffffff into x0 <= 0x3f where x0 is the most
+     significant byte.  Likewise, transform x0...x7 >= 0x4000000000000000 into
+     x0 >= 0x40.  */
+  if ((code == LEU || code == LTU || code == GEU || code == GTU)
+      && is_a <scalar_int_mode> (GET_MODE (op0), &int_mode)
+      && MEM_P (op0)
+      && !MEM_VOLATILE_P (op0)
+      /* The optimization makes only sense for constants which are big enough
+        so that we have a chance to chop off something at all.  */
+      && (unsigned HOST_WIDE_INT) const_op > 0xff
+      /* Ensure that we do not overflow during normalization.  */
+      && (code != GTU || (unsigned HOST_WIDE_INT) const_op < 
HOST_WIDE_INT_M1U))
+    {
+      unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
+      enum rtx_code adjusted_code;
+
+      /* Normalize code to either LEU or GEU.  */
+      if (code == LTU)
+       {
+         --n;
+         adjusted_code = LEU;
+       }
+      else if (code == GTU)
+       {
+         ++n;
+         adjusted_code = GEU;
+       }
+      else
+       adjusted_code = code;
+
+      scalar_int_mode narrow_mode_iter;
+      FOR_EACH_MODE_UNTIL (narrow_mode_iter, int_mode)
+       {
+         unsigned nbits = GET_MODE_PRECISION (int_mode)
+                          - GET_MODE_PRECISION (narrow_mode_iter);
+         unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << nbits) - 1;
+         unsigned HOST_WIDE_INT lower_bits = n & mask;
+         if ((adjusted_code == LEU && lower_bits == mask)
+             || (adjusted_code == GEU && lower_bits == 0))
+           {
+             n >>= nbits;
+             break;
+           }
+       }
+
+      if (narrow_mode_iter < int_mode)
+       {
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           {
+             fprintf (
+               dump_file, "narrow comparison from mode %s to %s: (MEM %s "
+               HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
+               HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
+               GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
+               (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
+               n);
+           }
+         poly_int64 offset = (BYTES_BIG_ENDIAN
+                              ? 0
+                              : (GET_MODE_SIZE (int_mode)
+                                 - GET_MODE_SIZE (narrow_mode_iter)));
+         *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
+         *pop1 = GEN_INT (n);
+         return adjusted_code;
+       }
+    }
+
   *pop1 = GEN_INT (const_op);
   return code;
 }
@@ -12179,7 +12248,7 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx 
*pop1)
 
       /* Try to simplify the compare to constant, possibly changing the
         comparison op, and/or changing op1 to zero.  */
-      code = simplify_compare_const (code, raw_mode, op0, &op1);
+      code = simplify_compare_const (code, raw_mode, &op0, &op1);
       const_op = INTVAL (op1);
 
       /* Compute some predicates to simplify code below.  */
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-1.c 
b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
new file mode 100644
index 00000000000..263ad98af79
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" 
} } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+le_1byte_a (uint64_t *x)
+{
+  return *x <= 0x3fffffffffffffff;
+}
+
+int
+le_1byte_b (uint64_t *x)
+{
+  return *x < 0x4000000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-2.c 
b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
new file mode 100644
index 00000000000..a7cc5348295
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" 
} } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+ge_1byte_a (uint64_t *x)
+{
+  return *x > 0x3fffffffffffffff;
+}
+
+int
+ge_1byte_b (uint64_t *x)
+{
+  return *x >= 0x4000000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-3.c 
b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
new file mode 100644
index 00000000000..06f80bf72d8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" 
} } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+le_2bytes_a (uint64_t *x)
+{
+  return *x <= 0x3ffdffffffffffff;
+}
+
+int
+le_2bytes_b (uint64_t *x)
+{
+  return *x < 0x3ffe000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-4.c 
b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
new file mode 100644
index 00000000000..407999abf7e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" 
} } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+ge_2bytes_a (uint64_t *x)
+{
+  return *x > 0x400cffffffffffff;
+}
+
+int
+ge_2bytes_b (uint64_t *x)
+{
+  return *x >= 0x400d000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-5.c 
b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
new file mode 100644
index 00000000000..e16773f5bcf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" 
} } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+le_4bytes_a (uint64_t *x)
+{
+  return *x <= 0x3ffffdffffffffff;
+}
+
+int
+le_4bytes_b (uint64_t *x)
+{
+  return *x < 0x3ffffe0000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-6.c 
b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
new file mode 100644
index 00000000000..8f53b5678bd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" 
} } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+ge_4bytes_a (uint64_t *x)
+{
+  return *x > 0x4000cfffffffffff;
+}
+
+int
+ge_4bytes_b (uint64_t *x)
+{
+  return *x >= 0x4000d00000000000;
+}
diff --git a/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c 
b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
new file mode 100644
index 00000000000..309aafbec01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -march=z13 -mzarch -fdump-rtl-combine-details" } */
+/* { dg-final { scan-assembler-not {\tclc\t} } } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" 
} } */
+
+struct s
+{
+  long a;
+  unsigned b : 1;
+  unsigned c : 1;
+};
+
+int foo (struct s *x)
+{
+  /* Expression
+       x->b || x->c
+     is transformed into
+       _1 = BIT_FIELD_REF <*x_4(D), 64, 64>;
+       _2 = _1 > 0x3FFFFFFFFFFFFFFF;
+     where the constant may materialize in the literal pool and an 8 byte CLC
+     may be emitted.  Ensure this is not the case.
+  */
+  return x->b || x->c;
+}
-- 
2.39.2

Reply via email to