[COMMITTED/13] Fix PR 110386: backprop vs ABSU_EXPR

2023-10-01 Thread Andrew Pinski
From: Andrew Pinski 

The issue here is that when backprop tries to go
and strip sign ops, it skips over ABSU_EXPR but
ABSU_EXPR not only does an ABS, it also changes the
type to unsigned.
Since strip_sign_op_1 is only supposed to strip off
sign changing operands and not ones that change types,
removing ABSU_EXPR here is correct. We don't handle
nop conversions so this does cause any missed optimizations either.

Committed to the GCC 13 branch after bootstrapped and
tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/110386

gcc/ChangeLog:

* gimple-ssa-backprop.cc (strip_sign_op_1): Remove ABSU_EXPR.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr110386-1.c: New test.
* gcc.c-torture/compile/pr110386-2.c: New test.

(cherry picked from commit 2bbac12ea7bd8a3eef5382e1b13f6019df4ec03f)
---
 gcc/gimple-ssa-backprop.cc   |  1 -
 gcc/testsuite/gcc.c-torture/compile/pr110386-1.c |  9 +
 gcc/testsuite/gcc.c-torture/compile/pr110386-2.c | 11 +++
 3 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr110386-1.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr110386-2.c

diff --git a/gcc/gimple-ssa-backprop.cc b/gcc/gimple-ssa-backprop.cc
index 65a65590017..dcb15ed4f61 100644
--- a/gcc/gimple-ssa-backprop.cc
+++ b/gcc/gimple-ssa-backprop.cc
@@ -694,7 +694,6 @@ strip_sign_op_1 (tree rhs)
 switch (gimple_assign_rhs_code (assign))
   {
   case ABS_EXPR:
-  case ABSU_EXPR:
   case NEGATE_EXPR:
return gimple_assign_rhs1 (assign);
 
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c
new file mode 100644
index 000..4fcc977ad16
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c
@@ -0,0 +1,9 @@
+
+int f(int a)
+{
+int c = c < 0 ? c : -c;
+c = -c;
+unsigned b =  c;
+unsigned t = b*a;
+return t*t;
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c
new file mode 100644
index 000..c60e1b6994b
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-mavx" } */
+
+#include 
+
+__m128i do_stuff(__m128i XMM0) {
+   __m128i ABS0 = _mm_abs_epi32(XMM0);
+   __m128i MUL0 = _mm_mullo_epi32(ABS0, XMM0);
+   __m128i MUL1 = _mm_mullo_epi32(MUL0, MUL0);
+   return MUL1;
+}
-- 
2.39.3



[COMMITTED/13] Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

2023-10-01 Thread Andrew Pinski
From: Andrew Pinski 

The problem here is after r6-7425-ga9fee7cdc3c62d0e51730,
the comparison to see if the transformation could be done was using the
wrong value. Instead of see if the inner was LE (for MIN and GE for MAX)
the outer value, it was comparing the inner to the value used in the comparison
which was wrong.

Committed to GCC 13 branch after bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/111331
* tree-ssa-phiopt.cc (minmax_replacement):
Fix the LE/GE comparison for the
`(a CMP CST1) ? max : a` optimization.

gcc/testsuite/ChangeLog:

PR tree-optimization/111331
* gcc.c-torture/execute/pr111331-1.c: New test.
* gcc.c-torture/execute/pr111331-2.c: New test.
* gcc.c-torture/execute/pr111331-3.c: New test.

(cherry picked from commit 30e6ee074588bacefd2dfe745b188bb20c81fe5e)
---
 .../gcc.c-torture/execute/pr111331-1.c| 17 +
 .../gcc.c-torture/execute/pr111331-2.c| 19 +++
 .../gcc.c-torture/execute/pr111331-3.c| 15 +++
 gcc/tree-ssa-phiopt.cc|  8 
 4 files changed, 55 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr111331-3.c

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
new file mode 100644
index 000..4c7f4fdbaa9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
@@ -0,0 +1,17 @@
+int a;
+int b;
+int c(int d, int e, int f) {
+  if (d < e)
+return e;
+  if (d > f)
+return f;
+  return d;
+}
+int main() {
+  int g = -1;
+  a = c(b + 30, 29, g + 29);
+  volatile t = a;
+  if (t != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
new file mode 100644
index 000..5c677f2caa9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
@@ -0,0 +1,19 @@
+
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+if (d < 29)
+  t =  29;
+else
+  t = (d > 28) ? 28 : d;
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
new file mode 100644
index 000..213d9bdd539
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
@@ -0,0 +1,15 @@
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+t = d < 29 ? 29 : ((d > 28) ? 28 : d);
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index a7ab6ce4ad9..c3d78d1400b 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -2270,7 +2270,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
 
  /* We need BOUND <= LARGER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_false)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_false, smaller)
@@ -2301,7 +2301,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
 
  /* We need BOUND >= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_false)))
return false;
}
  else
@@ -2341,7 +2341,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
 
  /* We need BOUND >= LARGER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_true)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_true, smaller)
@@ -2368,7 +2368,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
 
  /* We need BOUND <= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_true)))
return false;
}
  else
-- 
2.39.3



Re: [PATCH] ipa: Self-DCE of uses of removed call LHSs (PR 108007)

2023-10-04 Thread Andrew Pinski
On Wed, Oct 4, 2023 at 5:08 PM Maciej W. Rozycki  wrote:
>
> On Tue, 3 Oct 2023, Martin Jambor wrote:
>
> > > SSA graph may be deep so this may cause stack overflow, so I think we
> > > should use worklist here (it is also easy to do).
> > >
> > > OK with that change.
> > > Honza
> >
> > I have just committed the following after a bootstrap and testing on
> > x86_64-linux.
>
>  This has regressed the native `powerpc64le-linux-gnu' configuration,
> which doesn't bootstrap here anymore:
>
> Comparing stages 2 and 3
> Bootstrap comparison failure!
> powerpc64le-linux-gnu/libstdc++-v3/src/compatibility-ldbl.o differs
> powerpc64le-linux-gnu/libstdc++-v3/src/.libs/compatibility-ldbl.o differs
>
> I have double-checked this is indeed the offending commit, the compiler
> bootstraps just fine as at commit 7eb5ce7f58ed ("Remove pass counting in
> VRP.").
>
>  Shall I file a PR, or can you handle it regardless?  Let me know if you
> need anything from me.

It is already filed as https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111688 .

Thanks,
Andrew

>
>   Maciej


Re: [PATCH]AArch64 Handle copysign (x, -1) expansion efficiently

2023-10-05 Thread Andrew Pinski
On Thu, Oct 5, 2023 at 11:22 AM Tamar Christina  wrote:
>
> Hi All,
>
> copysign (x, -1) is effectively fneg (abs (x)) which on AArch64 can be
> most efficiently done by doing an OR of the signbit.
>
> The middle-end will optimize fneg (abs (x)) now to copysign as the
> canonical form and so this optimizes the expansion.
>
> If the target has an inclusive-OR that takes an immediate, then the 
> transformed
> instruction is both shorter and faster.  For those that don't, the immediate
> has to be separately constructed, but this still ends up being faster as the
> immediate construction is not on the critical path.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Note that this is part of another patch series, the additional testcases
> are mutually dependent on the match.pd patch.  As such the tests are added
> there insteadof here.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/109154
> * config/aarch64/aarch64.md (copysign3): Handle
> copysign (x, -1).
> * config/aarch64/aarch64-simd.md (copysign3): Likewise.
> * config/aarch64/aarch64-sve.md (copysign3): Likewise.
>
> --- inline copy of patch --
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 25a1e4e8ecf767636c0ff3cdab6cad6e1482f73e..a78e77dcc3473445108b06c50f9c28a8369f3e3f
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -754,15 +754,33 @@ (define_insn 
> "aarch64_dot_lane<
>  (define_expand "copysign3"
>[(match_operand:VHSDF 0 "register_operand")
> (match_operand:VHSDF 1 "register_operand")
> -   (match_operand:VHSDF 2 "register_operand")]
> +   (match_operand:VHSDF 2 "nonmemory_operand")]
>"TARGET_SIMD"
>  {
> -  rtx v_bitmask = gen_reg_rtx (mode);
> +  machine_mode int_mode = mode;
> +  rtx v_bitmask = gen_reg_rtx (int_mode);
>int bits = GET_MODE_UNIT_BITSIZE (mode) - 1;
>
>emit_move_insn (v_bitmask,
>   aarch64_simd_gen_const_vector_dup (mode,
>  HOST_WIDE_INT_M1U << 
> bits));
> +
> +  /* copysign (x, -1) should instead be expanded as orr with the sign
> + bit.  */
> +  if (!REG_P (operands[2]))
> +{
> +  auto r0
> +   = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[2]));
> +  if (-1 == real_to_integer (r0))

I think this should be: REAL_VALUE_NEGATIVE (r0) instead. Just copying
the sign here is needed, right?
Also seems like double check that this is a vec_duplicate of a const
and that the constant is a CONST_DOUBLE?


> +   {
> + emit_insn (gen_ior3 (
> +   lowpart_subreg (int_mode, operands[0], mode),
> +   lowpart_subreg (int_mode, operands[1], mode), v_bitmask));
> + DONE;
> +   }
> +}
> +
> +  operands[2] = force_reg (mode, operands[2]);
>emit_insn (gen_aarch64_simd_bsl (operands[0], v_bitmask,
>  operands[2], operands[1]));
>DONE;
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 
> 5a652d8536a0ef9461f40da7b22834e683e73ceb..071400c820a5b106ddf9dc9faebb117975d74ea0
>  100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -6387,7 +6387,7 @@ (define_insn "*3"
>  (define_expand "copysign3"
>[(match_operand:SVE_FULL_F 0 "register_operand")
> (match_operand:SVE_FULL_F 1 "register_operand")
> -   (match_operand:SVE_FULL_F 2 "register_operand")]
> +   (match_operand:SVE_FULL_F 2 "nonmemory_operand")]
>"TARGET_SVE"
>{
>  rtx sign = gen_reg_rtx (mode);
> @@ -6398,11 +6398,26 @@ (define_expand "copysign3"
>  rtx arg1 = lowpart_subreg (mode, operands[1], mode);
>  rtx arg2 = lowpart_subreg (mode, operands[2], mode);
>
> -emit_insn (gen_and3
> -  (sign, arg2,
> -   aarch64_simd_gen_const_vector_dup (mode,
> -  HOST_WIDE_INT_M1U
> -  << bits)));
> +rtx v_sign_bitmask
> +  = aarch64_simd_gen_const_vector_dup (mode,
> +  HOST_WIDE_INT_M1U << bits);
> +
> +/* copysign (x, -1) should instead be expanded as orr with the sign
> +   bit.  */
> +if (!REG_P (operands[2]))
> +  {
> +   auto r0
> + = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate 
> (operands[2]));
> +   if (-1 == real_to_integer (r0))

Likewise.

> + {
> +   emit_insn (gen_ior3 (int_res, arg1, v_sign_bitmask));
> +   emit_move_insn (operands[0], gen_lowpart (mode, int_res));
> +   DONE;
> + }
> +  }
> +
> +operands[2] = force_reg (mode, operands[2]);
> +emit_insn (gen_and3 (sign, arg2, v_sign_bitmask));
>  emit_insn (gen_and3
>(mant, arg1,
> aarch64_simd_gen_const_vector_dup (mode

Re: [PATCH]AArch64 Add SVE implementation for cond_copysign.

2023-10-05 Thread Andrew Pinski
On Thu, Oct 5, 2023 at 12:48 PM Tamar Christina  wrote:
>
> > -Original Message-
> > From: Richard Sandiford 
> > Sent: Thursday, October 5, 2023 8:29 PM
> > To: Tamar Christina 
> > Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw
> > ; Marcus Shawcroft
> > ; Kyrylo Tkachov 
> > Subject: Re: [PATCH]AArch64 Add SVE implementation for cond_copysign.
> >
> > Tamar Christina  writes:
> > > Hi All,
> > >
> > > This adds an implementation for masked copysign along with an
> > > optimized pattern for masked copysign (x, -1).
> >
> > It feels like we're ending up with a lot of AArch64-specific code that just 
> > hard-
> > codes the observation that changing the sign is equivalent to changing the 
> > top
> > bit.  We then need to make sure that we choose the best way of changing the
> > top bit for any given situation.
> >
> > Hard-coding the -1/negative case is one instance of that.  But it looks 
> > like we
> > also fail to use the best sequence for SVE2.  E.g.
> > [https://godbolt.org/z/ajh3MM5jv]:
> >
> > #include 
> >
> > void f(double *restrict a, double *restrict b) {
> > for (int i = 0; i < 100; ++i)
> > a[i] = __builtin_copysign(a[i], b[i]); }
> >
> > void g(uint64_t *restrict a, uint64_t *restrict b, uint64_t c) {
> > for (int i = 0; i < 100; ++i)
> > a[i] = (a[i] & ~c) | (b[i] & c); }
> >
> > gives:
> >
> > f:
> > mov x2, 0
> > mov w3, 100
> > whilelo p7.d, wzr, w3
> > .L2:
> > ld1dz30.d, p7/z, [x0, x2, lsl 3]
> > ld1dz31.d, p7/z, [x1, x2, lsl 3]
> > and z30.d, z30.d, #0x7fff
> > and z31.d, z31.d, #0x8000
> > orr z31.d, z31.d, z30.d
> > st1dz31.d, p7, [x0, x2, lsl 3]
> > incdx2
> > whilelo p7.d, w2, w3
> > b.any   .L2
> > ret
> > g:
> > mov x3, 0
> > mov w4, 100
> > mov z29.d, x2
> > whilelo p7.d, wzr, w4
> > .L6:
> > ld1dz30.d, p7/z, [x0, x3, lsl 3]
> > ld1dz31.d, p7/z, [x1, x3, lsl 3]
> > bsl z31.d, z31.d, z30.d, z29.d
> > st1dz31.d, p7, [x0, x3, lsl 3]
> > incdx3
> > whilelo p7.d, w3, w4
> > b.any   .L6
> > ret
> >
> > I saw that you originally tried to do this in match.pd and that the 
> > decision was
> > to fold to copysign instead.  But perhaps there's a compromise where isel 
> > does
> > something with the (new) copysign canonical form?
> > I.e. could we go with your new version of the match.pd patch, and add some
> > isel stuff as a follow-on?
> >
>
> Sure if that's what's desired But..
>
> The example you posted above is for instance worse for x86 
> https://godbolt.org/z/x9ccqxW6T
> where the first operation has a dependency chain of 2 and the latter of 3.  
> It's likely any
> open coding of this operation is going to hurt a target.

But that is because it is not using andn when it should be.
That would be https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94790
(scalar fix but not vector) and
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90323 IIRC.
AARCH64 already has a pattern to match the above which is why it works
there but not x86_64.

Thanks,
Andrew

>
> So I'm unsure what isel transform this into...
>
> Tamar
>
> > Not saying no to this patch, just thought that the above was worth
> > considering.
> >
> > [I agree with Andrew's comments FWIW.]
> >
> > Thanks,
> > Richard
> >
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > >
> > > Ok for master?
> > >
> > > Thanks,
> > > Tamar
> > >
> > > gcc/ChangeLog:
> > >
> > > PR tree-optimization/109154
> > > * config/aarch64/aarch64-sve.md (cond_copysign): New.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > PR tree-optimization/109154
> > > * gcc.target/aarch64/sve/fneg-abs_5.c: New test.
> > >
> > > --- inline copy of patch --
> > > diff --git a/gcc/config/aarch64/aarch64-sve.md
> > > b/gcc/config/aarch64/aarch64-sve.md
> > > index
> > >
> > 071400c820a5b106ddf9dc9faebb117975d74ea0..00ca30c24624dc661254
> > 568f45b6
> > > 1a14aa11c305 100644
> > > --- a/gcc/config/aarch64/aarch64-sve.md
> > > +++ b/gcc/config/aarch64/aarch64-sve.md
> > > @@ -6429,6 +6429,57 @@ (define_expand "copysign3"
> > >}
> > >  )
> > >
> > > +(define_expand "cond_copysign"
> > > +  [(match_operand:SVE_FULL_F 0 "register_operand")
> > > +   (match_operand: 1 "register_operand")
> > > +   (match_operand:SVE_FULL_F 2 "register_operand")
> > > +   (match_operand:SVE_FULL_F 3 "nonmemory_operand")
> > > +   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
> > > +  "TARGET_SVE"
> > > +  {
> > > +rtx sign = gen_reg_rtx (mode);
> > > +rtx mant = gen_reg_rtx (mode);
> > > +rtx int_res = gen_reg_rtx (mode);
> > > +int bits = GET_MODE_UNIT_BITSIZE (mode) - 1;
> > > +
> > > +rtx arg2 = lowpart_subreg (mode, operands[2],
> > mode);
> > > +rtx arg3 = lowpart_subreg (mode, operand

[PATCH] MATCH: Fix infinite loop between `vec_cond(vec_cond(a, b, 0), c, d)` and `a & b`

2023-10-05 Thread Andrew Pinski
Match has a pattern which converts `vec_cond(vec_cond(a,b,0), c, d)`
into `vec_cond(a & b, c, d)` but since in this case a is a comparison
fold will change `a & b` back into `vec_cond(a,b,0)` which causes an
infinite loop.
The best way to fix this is to enable the patterns for vec_cond(*,vec_cond,*)
only for GIMPLE so we don't get an infinite loop for fold any more.

Note this is a latent bug since these patterns were added in 
r11-2577-g229752afe3156a
and was exposed by r14-3350-g47b833a9abe1 where now able to remove a 
VIEW_CONVERT_EXPR.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR middle-end/111699

gcc/ChangeLog:

* match.pd ((c ? a : b) op d, (c ? a : b) op (c ? d : e),
(v ? w : 0) ? a : b, c1 ? c2 ? a : b : b): Enable only for GIMPLE.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr111699-1.c: New test.
---
 gcc/match.pd | 5 +
 gcc/testsuite/gcc.c-torture/compile/pr111699-1.c | 7 +++
 2 files changed, 12 insertions(+)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr111699-1.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 4bdd83e6e06..31bfd8b6b68 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5045,6 +5045,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* (v ? w : 0) ? a : b is just (v & w) ? a : b
Currently disabled after pass lvec because ARM understands
VEC_COND_EXPR but not a plain v==w fed to BIT_IOR_EXPR.  */
+#if GIMPLE
+/* These can only be done in gimple as fold likes to convert:
+   (CMP) & N into (CMP) ? N : 0
+   and we try to match the same pattern again and again. */
 (simplify
  (vec_cond (vec_cond:s @0 @3 integer_zerop) @1 @2)
  (if (optimize_vectors_before_lowering_p () && types_match (@0, @3))
@@ -5079,6 +5083,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (vec_cond @0 @3 (vec_cond:s @1 @2 @3))
  (if (optimize_vectors_before_lowering_p () && types_match (@0, @1))
   (vec_cond (bit_and (bit_not @0) @1) @2 @3)))
+#endif
 
 /* Canonicalize mask ? { 0, ... } : { -1, ...} to ~mask if the mask
types are compatible.  */
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr111699-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr111699-1.c
new file mode 100644
index 000..87b127ed199
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr111699-1.c
@@ -0,0 +1,7 @@
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+
+void
+foo (V *v)
+{
+  *v =  (V) 0x107B9A7FF >= (*v <= 0);
+}
-- 
2.39.3



Re: [PATCH]middle-end match.pd: optimize fneg (fabs (x)) to x | (1 << signbit(x)) [PR109154]

2023-10-09 Thread Andrew Pinski
On Mon, Oct 9, 2023 at 12:20 AM Richard Biener  wrote:
>
> On Sat, 7 Oct 2023, Richard Sandiford wrote:
>
> > Richard Biener  writes:
> > >> Am 07.10.2023 um 11:23 schrieb Richard Sandiford 
> > >> >> Richard Biener  writes:
> > >>> On Thu, 5 Oct 2023, Tamar Christina wrote:
> > >>>
> > > I suppose the idea is that -abs(x) might be easier to optimize with 
> > > other
> > > patterns (consider a - copysign(x,...), optimizing to a + abs(x)).
> > >
> > > For abs vs copysign it's a canonicalization, but (negate (abs @0)) is 
> > > less
> > > canonical than copysign.
> > >
> > >> Should I try removing this?
> > >
> > > I'd say yes (and put the reverse canonicalization next to this 
> > > pattern).
> > >
> > 
> >  This patch transforms fneg (fabs (x)) into copysign (x, -1) which is 
> >  more
> >  canonical and allows a target to expand this sequence efficiently.  
> >  Such
> >  sequences are common in scientific code working with gradients.
> > 
> >  various optimizations in match.pd only happened on COPYSIGN but not 
> >  COPYSIGN_ALL
> >  which means they exclude IFN_COPYSIGN.  COPYSIGN however is restricted 
> >  to only
> > >>>
> > >>> That's not true:
> > >>>
> > >>> (define_operator_list COPYSIGN
> > >>>BUILT_IN_COPYSIGNF
> > >>>BUILT_IN_COPYSIGN
> > >>>BUILT_IN_COPYSIGNL
> > >>>IFN_COPYSIGN)
> > >>>
> > >>> but they miss the extended float builtin variants like
> > >>> __builtin_copysignf16.  Also see below
> > >>>
> >  the C99 builtins and so doesn't work for vectors.
> > 
> >  The patch expands these optimizations to work on COPYSIGN_ALL.
> > 
> >  There is an existing canonicalization of copysign (x, -1) to fneg 
> >  (fabs (x))
> >  which I remove since this is a less efficient form.  The testsuite is 
> >  also
> >  updated in light of this.
> > 
> >  Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > 
> >  Ok for master?
> > 
> >  Thanks,
> >  Tamar
> > 
> >  gcc/ChangeLog:
> > 
> > PR tree-optimization/109154
> > * match.pd: Add new neg+abs rule, remove inverse copysign rule and
> > expand existing copysign optimizations.
> > 
> >  gcc/testsuite/ChangeLog:
> > 
> > PR tree-optimization/109154
> > * gcc.dg/fold-copysign-1.c: Updated.
> > * gcc.dg/pr55152-2.c: Updated.
> > * gcc.dg/tree-ssa/abs-4.c: Updated.
> > * gcc.dg/tree-ssa/backprop-6.c: Updated.
> > * gcc.dg/tree-ssa/copy-sign-2.c: Updated.
> > * gcc.dg/tree-ssa/mult-abs-2.c: Updated.
> > * gcc.target/aarch64/fneg-abs_1.c: New test.
> > * gcc.target/aarch64/fneg-abs_2.c: New test.
> > * gcc.target/aarch64/fneg-abs_3.c: New test.
> > * gcc.target/aarch64/fneg-abs_4.c: New test.
> > * gcc.target/aarch64/sve/fneg-abs_1.c: New test.
> > * gcc.target/aarch64/sve/fneg-abs_2.c: New test.
> > * gcc.target/aarch64/sve/fneg-abs_3.c: New test.
> > * gcc.target/aarch64/sve/fneg-abs_4.c: New test.
> > 
> >  --- inline copy of patch ---
> > 
> >  diff --git a/gcc/match.pd b/gcc/match.pd
> >  index 
> >  4bdd83e6e061b16dbdb2845b9398fcfb8a6c9739..bd6599d36021e119f51a4928354f580ffe82c6e2
> >   100644
> >  --- a/gcc/match.pd
> >  +++ b/gcc/match.pd
> >  @@ -1074,45 +1074,43 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > 
> >  /* cos(copysign(x, y)) -> cos(x).  Similarly for cosh.  */
> >  (for coss (COS COSH)
> >  - copysigns (COPYSIGN)
> >  - (simplify
> >  -  (coss (copysigns @0 @1))
> >  -   (coss @0)))
> >  + (for copysigns (COPYSIGN_ALL)
> > >>>
> > >>> So this ends up generating for example the match
> > >>> (cosf (copysignl ...)) which doesn't make much sense.
> > >>>
> > >>> The lock-step iteration did
> > >>> (cosf (copysignf ..)) ... (ifn_cos (ifn_copysign ...))
> > >>> which is leaner but misses the case of
> > >>> (cosf (ifn_copysign ..)) - that's probably what you are
> > >>> after with this change.
> > >>>
> > >>> That said, there isn't a nice solution (without altering the match.pd
> > >>> IL).  There's the explicit solution, spelling out all combinations.
> > >>>
> > >>> So if we want to go with yout pragmatic solution changing this
> > >>> to use COPYSIGN_ALL isn't necessary, only changing the lock-step
> > >>> for iteration to a cross product for iteration is.
> > >>>
> > >>> Changing just this pattern to
> > >>>
> > >>> (for coss (COS COSH)
> > >>> (for copysigns (COPYSIGN)
> > >>>  (simplify
> > >>>   (coss (copysigns @0 @1))
> > >>>   (coss @0
> > >>>
> > >>> increases the total number of gimple-match-x.cc lines from
> > >>> 234988 to 235324.
> > >>
> > >> I guess the difference between this and the later suggestions is that
> > >> this one allows builtin copysign to be paired with ifn cos, which would
> > >

[PATCH] MATCH: [PR111679] Add alternative simplification of `a | ((~a) ^ b)`

2023-10-09 Thread Andrew Pinski
So currently we have a simplification for `a | ~(a ^ b)` but
that does not match the case where we had originally `(~a) | (a ^ b)`
so we need to add a new pattern that matches that and uses 
bitwise_inverted_equal_p
that also catches comparisons too.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/111679

gcc/ChangeLog:

* match.pd (`a | ((~a) ^ b)`): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bitops-5.c: New test.
---
 gcc/match.pd |  8 +++
 gcc/testsuite/gcc.dg/tree-ssa/bitops-5.c | 27 
 2 files changed, 35 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-5.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 31bfd8b6b68..49740d189a7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1350,6 +1350,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   && TYPE_PRECISION (TREE_TYPE (@0)) == 1)
   (bit_ior @0 (bit_xor @1 { build_one_cst (type); }
 
+/* a | ((~a) ^ b)  -->  a | (~b) (alt version of the above 2) */
+(simplify
+ (bit_ior:c @0 (bit_xor:cs @1 @2))
+ (with { bool wascmp; }
+ (if (bitwise_inverted_equal_p (@0, @1, wascmp)
+  && (!wascmp || element_precision (type) == 1))
+  (bit_ior @0 (bit_not @2)
+
 /* (a | b) | (a &^ b)  -->  a | b  */
 (for op (bit_and bit_xor)
  (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-5.c
new file mode 100644
index 000..990610e3002
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-5.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* PR tree-optimization/111679 */
+
+int f1(int a, int b)
+{
+return (~a) | (a ^ b); // ~(a & b) or (~a) | (~b)
+}
+
+_Bool fb(_Bool c, _Bool d)
+{
+return (!c) | (c ^ d); // ~(c & d) or (~c) | (~d)
+}
+
+_Bool fb1(int x, int y)
+{
+_Bool a = x == 10,  b = y > 100;
+return (!a) | (a ^ b); // ~(a & b) or (~a) | (~b)
+// or (x != 10) | (y <= 100)
+}
+
+/* { dg-final { scan-tree-dump-not   "bit_xor_expr, "   "optimized" } } */
+/* { dg-final { scan-tree-dump-times "bit_not_expr, " 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "bit_and_expr, " 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "bit_ior_expr, " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ne_expr, _\[0-9\]+, x_\[0-9\]+"  1 
"optimized" } } */
+/* { dg-final { scan-tree-dump-times "le_expr, _\[0-9\]+, y_\[0-9\]+"  1 
"optimized" } } */
-- 
2.39.3



Re: [PATCH] use get_range_query to replace get_global_range_query

2023-10-10 Thread Andrew Pinski
On Tue, Oct 10, 2023 at 12:02 AM Richard Biener  wrote:
>
> On Tue, 10 Oct 2023, Jiufu Guo wrote:
>
> > Hi,
> >
> > For "get_global_range_query" SSA_NAME_RANGE_INFO can be queried.
> > For "get_range_query", it could get more context-aware range info.
> > And look at the implementation of "get_range_query",  it returns
> > global range if no local fun info.
> >
> > So, if not quering for SSA_NAME, it would be ok to use get_range_query
> > to replace get_global_range_query.
> >
> > Patch https://gcc.gnu.org/pipermail/gcc-patches/2023-September/630389.html,
> > Uses get_range_query could handle more cases.
> >
> > This patch replaces get_global_range_query by get_range_query for
> > most possible code pieces (but deoes not draft new test cases).
> >
> > Pass bootstrap & regtest on ppc64{,le} and x86_64.
> > Is this ok for trunk.
>
> See below
>
> >
> > BR,
> > Jeff (Jiufu Guo)
> >
> > gcc/ChangeLog:
> >
> >   * builtins.cc (expand_builtin_strnlen): Replace get_global_range_query
> >   by get_range_query.
> >   * fold-const.cc (expr_not_equal_to): Likewise.
> >   * gimple-fold.cc (size_must_be_zero_p): Likewise.
> >   * gimple-range-fold.cc (fur_source::fur_source): Likewise.
> >   * gimple-ssa-warn-access.cc (check_nul_terminated_array): Likewise.
> >   * tree-dfa.cc (get_ref_base_and_extent): Likewise.
> >   * tree-ssa-loop-split.cc (split_at_bb_p): Likewise.
> >   * tree-ssa-loop-unswitch.cc 
> > (evaluate_control_stmt_using_entry_checks):
> >   Likewise.
> >
> > ---
> >  gcc/builtins.cc   | 2 +-
> >  gcc/fold-const.cc | 6 +-
> >  gcc/gimple-fold.cc| 6 ++
> >  gcc/gimple-range-fold.cc  | 4 +---
> >  gcc/gimple-ssa-warn-access.cc | 2 +-
> >  gcc/tree-dfa.cc   | 5 +
> >  gcc/tree-ssa-loop-split.cc| 2 +-
> >  gcc/tree-ssa-loop-unswitch.cc | 2 +-
> >  8 files changed, 9 insertions(+), 20 deletions(-)
> >
> > diff --git a/gcc/builtins.cc b/gcc/builtins.cc
> > index cb90bd03b3e..4e0a77ff8e0 100644
> > --- a/gcc/builtins.cc
> > +++ b/gcc/builtins.cc
> > @@ -3477,7 +3477,7 @@ expand_builtin_strnlen (tree exp, rtx target, 
> > machine_mode target_mode)
> >
> >wide_int min, max;
> >value_range r;
> > -  get_global_range_query ()->range_of_expr (r, bound);
> > +  get_range_query (cfun)->range_of_expr (r, bound);
>
> expand doesn't have a ranger instance so this is a no-op.  I'm unsure
> if it would be safe given we're half GIMPLE, half RTL.  Please leave it
> out.

It definitely does not work and can't as I tried to enable a ranger
instance and it didn't work. I wrote up my experience here:
https://gcc.gnu.org/pipermail/gcc/2023-September/242407.html

Thanks,
Andrew Pinski

>
> >if (r.varying_p () || r.undefined_p ())
> >  return NULL_RTX;
> >min = r.lower_bound ();
> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> > index 4f8561509ff..15134b21b9f 100644
> > --- a/gcc/fold-const.cc
> > +++ b/gcc/fold-const.cc
> > @@ -11056,11 +11056,7 @@ expr_not_equal_to (tree t, const wide_int &w)
> >if (!INTEGRAL_TYPE_P (TREE_TYPE (t)))
> >   return false;
> >
> > -  if (cfun)
> > - get_range_query (cfun)->range_of_expr (vr, t);
> > -  else
> > - get_global_range_query ()->range_of_expr (vr, t);
> > -
> > +  get_range_query (cfun)->range_of_expr (vr, t);
>
> These kind of changes look obvious.
>
> >if (!vr.undefined_p () && !vr.contains_p (w))
> >   return true;
> >/* If T has some known zero bits and W has any of those bits set,
> > diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> > index dc89975270c..853edd9e5d4 100644
> > --- a/gcc/gimple-fold.cc
> > +++ b/gcc/gimple-fold.cc
> > @@ -876,10 +876,8 @@ size_must_be_zero_p (tree size)
> >wide_int zero = wi::zero (TYPE_PRECISION (type));
> >value_range valid_range (type, zero, ssize_max);
> >value_range vr;
> > -  if (cfun)
> > -get_range_query (cfun)->range_of_expr (vr, size);
> > -  else
> > -get_global_range_query ()->range_of_expr (vr, size);
> > +  get_range_query (cfun)->range_of_expr (vr, size);
> > +
> >if (vr.undefined_p ())
> >  vr.set_varying (TREE_TYPE (size));
> >vr.intersect (valid_range);
> > diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-ran

[PATCH] MATCH: [PR111282] Simplify `a & (b ^ ~a)` to `a & b`

2023-10-10 Thread Andrew Pinski
While `a & (b ^ ~a)` is optimized to `a & b` on the rtl level,
it is always good to optimize this at the gimple level and allows
us to match a few extra things including where a is a comparison.

Note I had to update/change the testcase and-1.c to avoid matching
this case as we can match -2 and 1 as bitwise inversions.

PR tree-optimization/111282

gcc/ChangeLog:

* match.pd (`a & ~(a ^ b)`, `a & (a == b)`,
`a & ((~a) ^ b)`): New patterns.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/and-1.c: Update testcase to avoid
matching `~1 & (a ^ 1)` simplification.
* gcc.dg/tree-ssa/bitops-6.c: New test.
---
 gcc/match.pd | 20 ++
 gcc/testsuite/gcc.dg/tree-ssa/and-1.c|  6 ++---
 gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c | 33 
 3 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 49740d189a7..26b05c157c1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1358,6 +1358,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   && (!wascmp || element_precision (type) == 1))
   (bit_ior @0 (bit_not @2)
 
+/* a & ~(a ^ b)  -->  a & b  */
+(simplify
+ (bit_and:c @0 (bit_not (bit_xor:c @0 @1)))
+ (bit_and @0 @1))
+
+/* a & (a == b)  -->  a & b (boolean version of the above). */
+(simplify
+ (bit_and:c @0 (nop_convert? (eq:c @0 @1)))
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+  && TYPE_PRECISION (TREE_TYPE (@0)) == 1)
+  (bit_and @0 @1)))
+
+/* a & ((~a) ^ b)  -->  a & b (alt version of the above 2) */
+(simplify
+ (bit_and:c @0 (bit_xor:c @1 @2))
+ (with { bool wascmp; }
+ (if (bitwise_inverted_equal_p (@0, @1, wascmp)
+  && (!wascmp || element_precision (type) == 1))
+  (bit_and @0 @2
+
 /* (a | b) | (a &^ b)  -->  a | b  */
 (for op (bit_and bit_xor)
  (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/and-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/and-1.c
index 276c2b9bd8a..27d38907eea 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/and-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/and-1.c
@@ -2,10 +2,10 @@
 /* { dg-options "-O -fdump-tree-optimized-raw" } */
 
 int f(int in) {
-  in = in | 3;
-  in = in ^ 1;
+  in = in | 7;
+  in = in ^ 3;
   in = (in & ~(unsigned long)1);
   return in;
 }
 
-/* { dg-final { scan-tree-dump-not "bit_and_expr" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "bit_and_expr, "  "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
new file mode 100644
index 000..e6ab2fd6c71
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* PR tree-optimization/111282 */
+
+
+int f(int a, int b)
+{
+  return a & (b ^ ~a); // a & b
+}
+
+_Bool fb(_Bool x, _Bool y)
+{
+  return x & (y ^ !x); // x & y
+}
+
+int fa(int w, int z)
+{
+  return (~w) & (w ^ z); // ~w & z
+}
+
+int fcmp(int x, int y)
+{
+  _Bool a = x == 2;
+  _Bool b = y == 1;
+  return a & (b ^ !a); // (x == 2) & (y == 1)
+}
+
+/* { dg-final { scan-tree-dump-not   "bit_xor_expr, "   "optimized" } } */
+/* { dg-final { scan-tree-dump-times "bit_and_expr, " 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "bit_not_expr, " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not   "ne_expr, ""optimized" } } */
+/* { dg-final { scan-tree-dump-times "eq_expr, "  2 "optimized" } } */
+
-- 
2.39.3



Re: RISC-V: Support CORE-V XCVMAC and XCVALU extensions

2023-10-11 Thread Andrew Pinski
On Wed, Oct 11, 2023 at 6:01 PM juzhe.zh...@rivai.ai
 wrote:
>
> ../../../../gcc/gcc/doc/extend.texi:21708: warning: node next `RISC-V Vector 
> Intrinsics' in menu `CORE-V Built-in Functions' and in sectioning `RX 
> Built-in Functions' differ
> ../../../../gcc/gcc/doc/extend.texi:21716: warning: node `RX Built-in 
> Functions' is next for `CORE-V Built-in Functions' in menu but not in 
> sectioning
> ../../../../gcc/gcc/doc/extend.texi:21716: warning: node `RISC-V Vector 
> Intrinsics' is prev for `CORE-V Built-in Functions' in menu but not in 
> sectioning
> ../../../../gcc/gcc/doc/extend.texi:21716: warning: node up `CORE-V Built-in 
> Functions' in menu `Target Builtins' and in sectioning `RISC-V Vector 
> Intrinsics' differ
> ../../../../gcc/gcc/doc/extend.texi:21708: node `RISC-V Vector Intrinsics' 
> lacks menu item for `CORE-V Built-in Functions' despite being its Up target
> ../../../../gcc/gcc/doc/extend.texi:21889: warning: node prev `RX Built-in 
> Functions' in menu `CORE-V Built-in Functions' and in sectioning `RISC-V 
> Vector Intrinsics' differ
> In file included from ../../../../gcc/gcc/gensupport.cc:26:0:
> ../../../../gcc/gcc/rtl.h:66:26: warning: ‘rtx_def::code’ is too small to 
> hold all values of ‘enum rtx_code’
>  #define RTX_CODE_BITSIZE 8
>   ^
> ../../../../gcc/gcc/rtl.h:318:33: note: in expansion of macro 
> ‘RTX_CODE_BITSIZE’
>ENUM_BITFIELD(rtx_code) code: RTX_CODE_BITSIZE;
>  ^~~~
>
> make[2]: *** [Makefile:3534: doc/gcc.info] Error 1
> make[2]: *** Waiting for unfinished jobs
> rm gfdl.pod gcc.pod gcov-dump.pod gcov-tool.pod fsf-funding.pod gpl.pod 
> cpp.pod gcov.pod lto-dump.pod
> make[2]: Leaving directory 
> '/work/home/jzzhong/work/toolchain/riscv/build/dev-rv64gcv_zfh-lp64d-medany-newlib-spike-debug/build-gcc-newlib-stage1/gcc'
> make[1]: *** [Makefile:4648: all-gcc] Error 2
> make[1]: Leaving directory 
> '/work/home/jzzhong/work/toolchain/riscv/build/dev-rv64gcv_zfh-lp64d-medany-newlib-spike-debug/build-gcc-newlib-stage1'
> make: *** [Makefile:590: stamps/build-gcc-newlib-stage1] Error 2

This is also recorded as
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111777 . It breaks more
than just RISCV; it depends on the version of texinfo that is
installed too.

Thanks,
Andrew

>
> 
> juzhe.zh...@rivai.ai


[PATCH] MATCH: [PR111432] Simplify `a & (x | CST)` to a when we know that (a & ~CST) == 0

2023-10-13 Thread Andrew Pinski
This adds the simplification `a & (x | CST)` to a when we know that
`(a & ~CST) == 0`. In a similar fashion as `a & CST` is handle.

I looked into handling `a | (x & CST)` but that I don't see any decent
simplifications happening.

OK? Bootstrapped and tested on x86_linux-gnu with no regressions.

PR tree-optimization/111432

gcc/ChangeLog:

* match.pd (`a & (x | CST)`): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bitops-7.c: New test.
---
 gcc/match.pd |  8 
 gcc/testsuite/gcc.dg/tree-ssa/bitops-7.c | 24 
 2 files changed, 32 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-7.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 51e5065d086..45624f3dcb4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1550,6 +1550,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
   && wi::bit_and_not (get_nonzero_bits (@0), wi::to_wide (@1)) == 0)
   @0))
+
+/* `a & (x | CST)` -> a if we know that (a & ~CST) == 0   */
+(simplify
+ (bit_and:c SSA_NAME@0 (bit_ior @1 INTEGER_CST@2))
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+  && wi::bit_and_not (get_nonzero_bits (@0), wi::to_wide (@2)) == 0)
+  @0))
+
 /* x | C -> C if we know that x & ~C == 0.  */
 (simplify
  (bit_ior SSA_NAME@0 INTEGER_CST@1)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-7.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-7.c
new file mode 100644
index 000..7fb18db3a11
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-7.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
+/* PR tree-optimization/111432 */
+
+int
+foo3(int c, int bb)
+{
+  if ((bb & ~3)!=0) __builtin_unreachable();
+  return (bb & (c|3));
+}
+
+int
+foo_bool(int c, _Bool bb)
+{
+  return (bb & (c|7));
+}
+
+/* Both of these functions should be able to remove the `IOR` and `AND`
+   as the only bits that are non-zero for bb is set on the other side
+   of the `AND`.
+ */
+
+/* { dg-final { scan-tree-dump-not   "bit_ior_expr, "   "optimized" } } */
+/* { dg-final { scan-tree-dump-not   "bit_and_expr, "   "optimized" } } */
-- 
2.39.3



[PATCH 2/2] [c] Fix PR 101364: ICE after error due to diagnose_arglist_conflict not checking for error

2023-10-14 Thread Andrew Pinski
When checking to see if we have a function declaration has a conflict due to
promotations, there is no test to see if the type was an error mark and then 
calls
c_type_promotes_to. c_type_promotes_to is not ready for error_mark and causes an
ICE.

This adds a check for error before the call of c_type_promotes_to.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR c/101364

gcc/c/ChangeLog:

* c-decl.cc (diagnose_arglist_conflict): Test for
error mark before calling of c_type_promotes_to.

gcc/testsuite/ChangeLog:

* gcc.dg/pr101364-1.c: New test.
---
 gcc/c/c-decl.cc   | 3 ++-
 gcc/testsuite/gcc.dg/pr101364-1.c | 8 
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr101364-1.c

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 5822faf01b4..eb2df08c0a7 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -1899,7 +1899,8 @@ diagnose_arglist_conflict (tree newdecl, tree olddecl,
  break;
}
 
-  if (c_type_promotes_to (type) != type)
+  if (!error_operand_p (type)
+ && c_type_promotes_to (type) != type)
{
  inform (input_location, "an argument type that has a default "
  "promotion cannot match an empty parameter name list "
diff --git a/gcc/testsuite/gcc.dg/pr101364-1.c 
b/gcc/testsuite/gcc.dg/pr101364-1.c
new file mode 100644
index 000..e7c94a05553
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr101364-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c90 "} */
+
+void fruit(); /* { dg-message "previous declaration" } */
+void fruit( /* { dg-error "conflicting types for" } */
+int b[x], /* { dg-error "undeclared " } */
+short c)
+{} /* { dg-message "an argument type that has a" } */
-- 
2.39.3



[PATCH 1/2] Fix ICE due to c_safe_arg_type_equiv_p not checking for error_mark node

2023-10-14 Thread Andrew Pinski
This is a simple error recovery issue when c_safe_arg_type_equiv_p
was added in r8-5312-gc65e18d3331aa999. The issue is that after
an error, an argument type (of a function type) might turn
into an error mark node and c_safe_arg_type_equiv_p was not ready
for that. So this just adds a check for error operand for its
arguments before getting the main variant.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR c/101285

gcc/c/ChangeLog:

* c-typeck.cc (c_safe_arg_type_equiv_p): Return true for error
operands early.

gcc/testsuite/ChangeLog:

* gcc.dg/pr101285-1.c: New test.
---
 gcc/c/c-typeck.cc |  3 +++
 gcc/testsuite/gcc.dg/pr101285-1.c | 10 ++
 2 files changed, 13 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr101285-1.c

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index e55e887da14..6e044b4afbc 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -5960,6 +5960,9 @@ handle_warn_cast_qual (location_t loc, tree type, tree 
otype)
 static bool
 c_safe_arg_type_equiv_p (tree t1, tree t2)
 {
+  if (error_operand_p (t1) || error_operand_p (t2))
+return true;
+
   t1 = TYPE_MAIN_VARIANT (t1);
   t2 = TYPE_MAIN_VARIANT (t2);
 
diff --git a/gcc/testsuite/gcc.dg/pr101285-1.c 
b/gcc/testsuite/gcc.dg/pr101285-1.c
new file mode 100644
index 000..831e35f7662
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr101285-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-W -Wall" } */
+const int b;
+typedef void (*ft1)(int[b++]); /* { dg-error "read-only variable" } */
+void bar(int * z);
+void baz()
+{
+(ft1) bar; /* { dg-warning "statement with no effect" } */
+}
+
-- 
2.39.3



[PATCH] MATCH: Improve `A CMP 0 ? A : -A` set of patterns to use bitwise_equal_p.

2023-10-15 Thread Andrew Pinski
This improves the `A CMP 0 ? A : -A` set of match patterns to use
bitwise_equal_p which allows an nop cast between signed and unsigned.
This allows catching a few extra cases which were not being caught before.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR tree-optimization/101541
* match.pd (A CMP 0 ? A : -A): Improve
using bitwise_equal_p.

gcc/testsuite/ChangeLog:

PR tree-optimization/101541
* gcc.dg/tree-ssa/phi-opt-36.c: New test.
* gcc.dg/tree-ssa/phi-opt-37.c: New test.
---
 gcc/match.pd   | 49 -
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-36.c | 51 ++
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-37.c | 24 ++
 3 files changed, 104 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-36.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-37.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 45624f3dcb4..142e2dfbeb1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5668,42 +5668,51 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  /* A == 0 ? A : -Asame as -A */
  (for cmp (eq uneq)
   (simplify
-   (cnd (cmp @0 zerop) @0 (negate@1 @0))
-(if (!HONOR_SIGNED_ZEROS (type))
+   (cnd (cmp @0 zerop) @2 (negate@1 @2))
+(if (!HONOR_SIGNED_ZEROS (type)
+&& bitwise_equal_p (@0, @2))
  @1))
   (simplify
-   (cnd (cmp @0 zerop) zerop (negate@1 @0))
-(if (!HONOR_SIGNED_ZEROS (type))
+   (cnd (cmp @0 zerop) zerop (negate@1 @2))
+(if (!HONOR_SIGNED_ZEROS (type)
+&& bitwise_equal_p (@0, @2))
  @1))
  )
  /* A != 0 ? A : -Asame as A */
  (for cmp (ne ltgt)
   (simplify
-   (cnd (cmp @0 zerop) @0 (negate @0))
-(if (!HONOR_SIGNED_ZEROS (type))
- @0))
+   (cnd (cmp @0 zerop) @1 (negate @1))
+(if (!HONOR_SIGNED_ZEROS (type)
+&& bitwise_equal_p (@0, @1))
+ @1))
   (simplify
-   (cnd (cmp @0 zerop) @0 integer_zerop)
-(if (!HONOR_SIGNED_ZEROS (type))
- @0))
+   (cnd (cmp @0 zerop) @1 integer_zerop)
+(if (!HONOR_SIGNED_ZEROS (type)
+&& bitwise_equal_p (@0, @1))
+ @1))
  )
  /* A >=/> 0 ? A : -Asame as abs (A) */
  (for cmp (ge gt)
   (simplify
-   (cnd (cmp @0 zerop) @0 (negate @0))
-(if (!HONOR_SIGNED_ZEROS (type)
-&& !TYPE_UNSIGNED (type))
- (abs @0
+   (cnd (cmp @0 zerop) @1 (negate @1))
+(if (!HONOR_SIGNED_ZEROS (TREE_TYPE(@0))
+&& !TYPE_UNSIGNED (TREE_TYPE(@0))
+&& bitwise_equal_p (@0, @1))
+ (if (TYPE_UNSIGNED (type))
+  (absu:type @0)
+  (abs @0)
  /* A <=/< 0 ? A : -Asame as -abs (A) */
  (for cmp (le lt)
   (simplify
-   (cnd (cmp @0 zerop) @0 (negate @0))
-(if (!HONOR_SIGNED_ZEROS (type)
-&& !TYPE_UNSIGNED (type))
- (if (ANY_INTEGRAL_TYPE_P (type)
- && !TYPE_OVERFLOW_WRAPS (type))
+   (cnd (cmp @0 zerop) @1 (negate @1))
+(if (!HONOR_SIGNED_ZEROS (TREE_TYPE(@0))
+&& !TYPE_UNSIGNED (TREE_TYPE(@0))
+&& bitwise_equal_p (@0, @1))
+ (if ((ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+  && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+ || TYPE_UNSIGNED (type))
   (with {
-   tree utype = unsigned_type_for (type);
+   tree utype = unsigned_type_for (TREE_TYPE(@0));
}
(convert (negate (absu:utype @0
(negate (abs @0)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-36.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-36.c
new file mode 100644
index 000..4baf9f82a22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-36.c
@@ -0,0 +1,51 @@
+/* { dg-options "-O2 -fdump-tree-phiopt" } */
+
+unsigned f0(int A)
+{
+  unsigned t = A;
+// A == 0? A : -Asame as -A
+  if (A == 0)  return t;
+  return -t;
+}
+
+unsigned f1(int A)
+{
+  unsigned t = A;
+// A != 0? A : -Asame as A
+  if (A != 0)  return t;
+  return -t;
+}
+unsigned f2(int A)
+{
+  unsigned t = A;
+// A >= 0? A : -Asame as abs (A)
+  if (A >= 0)  return t;
+  return -t;
+}
+unsigned f3(int A)
+{
+  unsigned t = A;
+// A > 0?  A : -Asame as abs (A)
+  if (A > 0)  return t;
+  return -t;
+}
+unsigned f4(int A)
+{
+  unsigned t = A;
+// A <= 0? A : -Asame as -abs (A)
+  if (A <= 0)  return t;
+  return -t;
+}
+unsigned f5(int A)
+{
+  unsigned t = A;
+// A < 0?  A : -Asame as -abs (A)
+  if (A < 0)  return t;
+  return -t;
+}
+
+/* f4 and f5 are not allowed to be optimized in early phi-opt. */
+/* { dg-final { scan-tree-dump-times "if " 2 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-not "if " "phiopt2" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-37.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-37.c
new file mode 100644
index 000..f1ff472aaff
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-37.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-phiopt1" } */
+
+unsigned abs_with_convert0 (int x)
+{
+unsigned int y = x

[PATCH] Improve factor_out_conditional_operation for conversions and constants

2023-10-15 Thread Andrew Pinski
In the case of a NOP conversion (precisions of the 2 types are equal),
factoring out the conversion can be done even if int_fits_type_p returns
false and even when the conversion is defined by a statement inside the
conditional. Since it is a NOP conversion there is no zero/sign extending
happening which is why it is ok to be done here.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR tree-optimization/104376
PR tree-optimization/101541
* tree-ssa-phiopt.cc (factor_out_conditional_operation):
Allow nop conversions even if it is defined by a statement
inside the conditional.

gcc/testsuite/ChangeLog:

PR tree-optimization/101541
* gcc.dg/tree-ssa/phi-opt-38.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-38.c | 44 ++
 gcc/tree-ssa-phiopt.cc |  8 +++-
 2 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-38.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-38.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-38.c
new file mode 100644
index 000..ca04d1619e6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-38.c
@@ -0,0 +1,44 @@
+/* { dg-options "-O2 -fdump-tree-phiopt" } */
+
+unsigned f0(int A)
+{
+// A == 0? A : -Asame as -A
+  if (A == 0)  return A;
+  return -A;
+}
+
+unsigned f1(int A)
+{
+// A != 0? A : -Asame as A
+  if (A != 0)  return A;
+  return -A;
+}
+unsigned f2(int A)
+{
+// A >= 0? A : -Asame as abs (A)
+  if (A >= 0)  return A;
+  return -A;
+}
+unsigned f3(int A)
+{
+// A > 0?  A : -Asame as abs (A)
+  if (A > 0)  return A;
+  return -A;
+}
+unsigned f4(int A)
+{
+// A <= 0? A : -Asame as -abs (A)
+  if (A <= 0)  return A;
+  return -A;
+}
+unsigned f5(int A)
+{
+// A < 0?  A : -Asame as -abs (A)
+  if (A < 0)  return A;
+  return -A;
+}
+
+/* f4 and f5 are not allowed to be optimized in early phi-opt. */
+/* { dg-final { scan-tree-dump-times "if" 2 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */
+
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 312a6f9082b..0ab8fad5898 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -310,7 +310,9 @@ factor_out_conditional_operation (edge e0, edge e1, gphi 
*phi,
return NULL;
   /* If arg1 is an INTEGER_CST, fold it to new type.  */
   if (INTEGRAL_TYPE_P (TREE_TYPE (new_arg0))
- && int_fits_type_p (arg1, TREE_TYPE (new_arg0)))
+ && (int_fits_type_p (arg1, TREE_TYPE (new_arg0))
+ || TYPE_PRECISION (TREE_TYPE (new_arg0))
+ == TYPE_PRECISION (TREE_TYPE (arg1
{
  if (gimple_assign_cast_p (arg0_def_stmt))
{
@@ -323,7 +325,9 @@ factor_out_conditional_operation (edge e0, edge e1, gphi 
*phi,
 its basic block, because then it is possible this
 could enable further optimizations (minmax replacement
 etc.).  See PR71016.  */
- if (new_arg0 != gimple_cond_lhs (cond_stmt)
+ if (TYPE_PRECISION (TREE_TYPE (new_arg0))
+   != TYPE_PRECISION (TREE_TYPE (arg1))
+ && new_arg0 != gimple_cond_lhs (cond_stmt)
  && new_arg0 != gimple_cond_rhs (cond_stmt)
  && gimple_bb (arg0_def_stmt) == e0->src)
{
-- 
2.34.1



[PATCH] [PR31531] MATCH: Improve ~a < ~b and ~a < CST, allow a nop cast inbetween ~ and a/b

2023-10-15 Thread Andrew Pinski
Currently we able to simplify `~a CMP ~b` to `b CMP a` but we should allow a nop
conversion in between the `~` and the `a` which can show up. A similarly thing 
should
be done for `~a CMP CST`.

I had originally submitted the `~a CMP CST` case as
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/585088.html;
I noticed we should do the same thing for the `~a CMP ~b` case and combined
it with that one here.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/31531

gcc/ChangeLog:

* match.pd (~X op ~Y): Allow for an optional nop convert.
(~X op C): Likewise.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr31531-1.c: New test.
* gcc.dg/tree-ssa/pr31531-2.c: New test.
---
 gcc/match.pd  | 10 ---
 gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c | 19 +
 gcc/testsuite/gcc.dg/tree-ssa/pr31531-2.c | 34 +++
 3 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr31531-2.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 51e5065d086..e76ec1ec034 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5944,18 +5944,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* Fold ~X op ~Y as Y op X.  */
 (for cmp (simple_comparison)
  (simplify
-  (cmp (bit_not@2 @0) (bit_not@3 @1))
+  (cmp (nop_convert1?@4 (bit_not@2 @0)) (nop_convert2? (bit_not@3 @1)))
   (if (single_use (@2) && single_use (@3))
-   (cmp @1 @0
+   (with { tree otype = TREE_TYPE (@4); }
+(cmp (convert:otype @1) (convert:otype @0))
 
 /* Fold ~X op C as X op' ~C, where op' is the swapped comparison.  */
 (for cmp (simple_comparison)
  scmp (swapped_simple_comparison)
  (simplify
-  (cmp (bit_not@2 @0) CONSTANT_CLASS_P@1)
+  (cmp (nop_convert? (bit_not@2 @0)) CONSTANT_CLASS_P@1)
   (if (single_use (@2)
&& (TREE_CODE (@1) == INTEGER_CST || TREE_CODE (@1) == VECTOR_CST))
-   (scmp @0 (bit_not @1)
+   (with { tree otype = TREE_TYPE (@1); }
+(scmp (convert:otype @0) (bit_not @1))
 
 (for cmp (simple_comparison)
  (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c
new file mode 100644
index 000..c27299151eb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/31531 */
+
+int f(int a)
+{
+  int b = ~a;
+  return b<0;
+}
+
+
+int f1(unsigned a)
+{
+  int b = ~a;
+  return b<0;
+}
+/* We should convert the above two functions from b <0 to ((int)a) >= 0. */
+/* { dg-final { scan-tree-dump-times ">= 0" 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "~" 0 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr31531-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr31531-2.c
new file mode 100644
index 000..865ea292215
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr31531-2.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/31531 */
+
+int f0(unsigned x, unsigned t)
+{
+x = ~x;
+t = ~t;
+int xx = x;
+int tt = t;
+return tt < xx;
+}
+
+int f1(unsigned x, int t)
+{
+x = ~x;
+t = ~t;
+int xx = x;
+int tt = t;
+return tt < xx;
+}
+
+int f2(int x, unsigned t)
+{
+x = ~x;
+t = ~t;
+int xx = x;
+int tt = t;
+return tt < xx;
+}
+
+
+/* We should be able to remove all ~ from the above functions. */
+/* { dg-final { scan-tree-dump-times "~" 0 "optimized"} } */
-- 
2.39.3



Re: [PATCH] Add files to discourage submissions of PRs to the GitHub mirror.

2023-10-16 Thread Andrew Pinski
On Mon, Oct 16, 2023, 16:39 Eric Gallager  wrote:

> Currently there is an unofficial mirror of GCC on GitHub that people
> sometimes submit pull requests to:
> https://github.com/gcc-mirror/gcc
> However, this is not the proper way to contribute to GCC, so that means
> that someone (usually Jonathan Wakely) has to go through the PRs and
> manually tell people that they're sending their PRs to the wrong place.
> One thing that would help mitigate this problem would be files in a
> special .github directory that GitHub would automatically open when
> contributors attempt to open a PR, that would then tell them the proper
> way to contribute instead. This patch attempts to add two such files.
> They are written in Markdown, which I'm realizing might require some
> special handling in this repository, since the ".md" extension is also
> used for GCC's "Machine Description" files here, but I'm not quite sure
> how to go about handling that. Also note that I adapted these files from
> equivalent files in the git repository for Git itself:
> https://github.com/git/git/blob/master/.github/CONTRIBUTING.md
> https://github.com/git/git/blob/master/.github/PULL_REQUEST_TEMPLATE.md
> What do people think?
>


I think this is a great idea. Is a similar one for opening issues too?

Thanks,
Andrew


ChangeLog:
>
> * .github/CONTRIBUTING.md: New file.
> * .github/PULL_REQUEST_TEMPLATE.md: New file.
> ---
>  .github/CONTRIBUTING.md  | 18 ++
>  .github/PULL_REQUEST_TEMPLATE.md |  5 +
>  2 files changed, 23 insertions(+)
>  create mode 100644 .github/CONTRIBUTING.md
>  create mode 100644 .github/PULL_REQUEST_TEMPLATE.md
>
> diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
> new file mode 100644
> index ..4f7b3abca5f4
> --- /dev/null
> +++ b/.github/CONTRIBUTING.md
> @@ -0,0 +1,18 @@
> +## Contributing to GCC
> +
> +Thanks for taking the time to contribute to GCC! Please be advised that
> if you are
> +viewing this on `github.com`, that the mirror there is unofficial and
> unmonitored.
> +The GCC community does not use `github.com` for their contributions.
> Instead, we use
> +a mailing list (`gcc-patches@gcc.gnu.org`) for code submissions, code
> +reviews, and bug reports.
> +
> +Perhaps one day it will be possible to use [GitGitGadget](
> https://gitgitgadget.github.io/) to
> +conveniently send Pull Requests commits to GCC's mailing list, the way
> that the Git project currently allows it to be used to send PRs to their
> mailing list, but until that day arrives, please send your patches to the
> mailing list manually.
> +
> +Please read ["Contributing to GCC"](https://gcc.gnu.org/contribute.html)
> on the main GCC website
> +to learn how the GCC project is managed, and how you can work with it.
> +In addition, we highly recommend you to read [our guidelines for
> read-write Git access](https://gcc.gnu.org/gitwrite.html).
> +
> +Or, you can follow the ["Contributing to GCC in 10 easy steps"](
> https://gcc.gnu.org/wiki/GettingStarted#Basics:_Contributing_to_GCC_in_10_easy_steps)
> section of the ["Getting Started" page](
> https://gcc.gnu.org/wiki/GettingStarted) on [the wiki](
> https://gcc.gnu.org/wiki) for another example of the contribution process.
> +
> +Your friendly GCC community!
> diff --git a/.github/PULL_REQUEST_TEMPLATE.md
> b/.github/PULL_REQUEST_TEMPLATE.md
> new file mode 100644
> index ..6417392c8cf3
> --- /dev/null
> +++ b/.github/PULL_REQUEST_TEMPLATE.md
> @@ -0,0 +1,5 @@
> +Thanks for taking the time to contribute to GCC! Please be advised that
> if you are
> +viewing this on `github.com`, that the mirror there is unofficial and
> unmonitored.
> +The GCC community does not use `github.com` for their contributions.
> Instead, we use
> +a mailing list (`gcc-patches@gcc.gnu.org`) for code submissions, code
> reviews, and
> +bug reports. Please send patches there instead.
>


Re: [PATCH 11/11] aarch64: Add new load/store pair fusion pass.

2023-10-17 Thread Andrew Pinski
On Tue, Oct 17, 2023 at 1:52 PM Alex Coplan  wrote:
>
> This adds a new aarch64-specific RTL-SSA pass dedicated to forming load
> and store pairs (LDPs and STPs).
>
> As a motivating example for the kind of thing this improves, take the
> following testcase:
>
> extern double c[20];
>
> double f(double x)
> {
>   double y = x*x;
>   y += c[16];
>   y += c[17];
>   y += c[18];
>   y += c[19];
>   return y;
> }
>
> for which we currently generate (at -O2):
>
> f:
> adrpx0, c
> add x0, x0, :lo12:c
> ldp d31, d29, [x0, 128]
> ldr d30, [x0, 144]
> fmadd   d0, d0, d0, d31
> ldr d31, [x0, 152]
> faddd0, d0, d29
> faddd0, d0, d30
> faddd0, d0, d31
> ret
>
> but with the pass, we generate:
>
> f:
> .LFB0:
> adrpx0, c
> add x0, x0, :lo12:c
> ldp d31, d29, [x0, 128]
> fmadd   d0, d0, d0, d31
> ldp d30, d31, [x0, 144]
> faddd0, d0, d29
> faddd0, d0, d30
> faddd0, d0, d31
> ret
>
> The pass is local (only considers a BB at a time).  In theory, it should
> be possible to extend it to run over EBBs, at least in the case of pure
> (MEM_READONLY_P) loads, but this is left for future work.
>
> The pass works by identifying two kinds of bases: tree decls obtained
> via MEM_EXPR, and RTL register bases in the form of RTL-SSA def_infos.
> If a candidate memory access has a MEM_EXPR base, then we track it via
> this base, and otherwise if it is of a simple reg +  form, we track
> it via the RTL-SSA def_info for the register.
>
> For each BB, for a given kind of base, we build up a hash table mapping
> the base to an access_group.  The access_group data structure holds a
> list of accesses at each offset relative to the same base.  It uses a
> splay tree to support efficient insertion (while walking the bb), and
> the nodes are chained using a linked list to support efficient
> iteration (while doing the transformation).
>
> For each base, we then iterate over the access_group to identify
> adjacent accesses, and try to form load/store pairs for those insns that
> access adjacent memory.
>
> The pass is currently run twice, both before and after register
> allocation.  The first copy of the pass is run late in the pre-RA RTL
> pipeline, immediately after sched1, since it was found that sched1 was
> increasing register pressure when the pass was run before.  The second
> copy of the pass runs immediately before peephole2, so as to get any
> opportunities that the existing ldp/stp peepholes can handle.
>
> There are some cases that we punt on before RA, e.g.
> accesses relative to eliminable regs (such as the soft frame pointer).
> We do this since we can't know the elimination offset before RA, and we
> want to avoid the RA reloading the offset (due to being out of ldp/stp
> immediate range) as this can generate worse code.
>
> The post-RA copy of the pass is there to pick up the crumbs that were
> left behind / things we punted on in the pre-RA pass.  Among other
> things, it's needed to handle accesses relative to the stack pointer
> (see the previous patch in the series for an example).  It can also
> handle code that didn't exist at the time the pre-RA pass was run (spill
> code, prologue/epilogue code).
>
> The following table shows the effect of the passes on code size in
> SPEC CPU 2017 with -Os -flto=auto -mcpu=neoverse-v1:
>
> +-+-+--+-+
> |Benchmark| Pre-RA pass | Post-RA pass | Overall |
> +-+-+--+-+
> | 541.leela_r | 0.04%   | -0.03%   | 0.01%   |
> | 502.gcc_r   | -0.07%  | -0.02%   | -0.09%  |
> | 510.parest_r| -0.06%  | -0.04%   | -0.10%  |
> | 505.mcf_r   | -0.12%  | 0.00%| -0.12%  |
> | 500.perlbench_r | -0.12%  | -0.02%   | -0.15%  |
> | 520.omnetpp_r   | -0.13%  | -0.03%   | -0.16%  |
> | 538.imagick_r   | -0.17%  | -0.02%   | -0.19%  |
> | 525.x264_r  | -0.17%  | -0.02%   | -0.19%  |
> | 544.nab_r   | -0.22%  | -0.01%   | -0.23%  |
> | 557.xz_r| -0.27%  | -0.01%   | -0.28%  |
> | 507.cactuBSSN_r | -0.26%  | -0.03%   | -0.29%  |
> | 526.blender_r   | -0.37%  | -0.02%   | -0.38%  |
> | 523.xalancbmk_r | -0.41%  | -0.01%   | -0.42%  |
> | 531.deepsjeng_r | -0.41%  | -0.05%   | -0.46%  |
> | 511.povray_r| -0.60%  | -0.05%   | -0.65%  |
> | 548.exchange2_r | -0.55%  | -0.32%   | -0.86%  |
> | 527.cam4_r  | -0.82%  | -0.16%   | -0.98%  |
> | 503.bwaves_r| -0.63%  | -0.41%   | -1.04%  |
> | 521.wrf_r   | -1.04%  | -0.06%   | -1.10%  |
> | 549.fotonik3d_r | -0.91%  | -0.35%   | -1.26%  |
> | 554.roms_r  | -1.20%  | -0.20%   | -1.40%  |
> | 519.lbm_r   | -1.91%  | 0.00%| -1

[COMMITTED] Fix expansion of `(a & 2) != 1`

2023-10-18 Thread Andrew Pinski
I had a thinko in r14-1600-ge60593f3881c72a96a3fa4844d73e8a2cd14f670
where we would remove the `& CST` part if we ended up not calling
expand_single_bit_test.
This fixes the problem by introducing a new variable that will be used
for calling expand_single_bit_test.
As afar as I know this can only show up when disabling optimization
passes as this above form would have been optimized away.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/111863

gcc/ChangeLog:

* expr.cc (do_store_flag): Don't over write arg0
when stripping off `& POW2`.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/pr111863-1.c: New test.
---
 gcc/expr.cc  |  9 +
 gcc/testsuite/gcc.c-torture/execute/pr111863-1.c | 16 
 2 files changed, 21 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr111863-1.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 8aed3fc6cbe..763bd82c59f 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -13206,14 +13206,15 @@ do_store_flag (sepops ops, rtx target, machine_mode 
mode)
  || integer_pow2p (arg1))
   && (TYPE_PRECISION (ops->type) != 1 || TYPE_UNSIGNED (ops->type)))
 {
-  wide_int nz = tree_nonzero_bits (arg0);
-  gimple *srcstmt = get_def_for_expr (arg0, BIT_AND_EXPR);
+  tree narg0 = arg0;
+  wide_int nz = tree_nonzero_bits (narg0);
+  gimple *srcstmt = get_def_for_expr (narg0, BIT_AND_EXPR);
   /* If the defining statement was (x & POW2), then use that instead of
 the non-zero bits.  */
   if (srcstmt && integer_pow2p (gimple_assign_rhs2 (srcstmt)))
{
  nz = wi::to_wide (gimple_assign_rhs2 (srcstmt));
- arg0 = gimple_assign_rhs1 (srcstmt);
+ narg0 = gimple_assign_rhs1 (srcstmt);
}
 
   if (wi::popcount (nz) == 1
@@ -13227,7 +13228,7 @@ do_store_flag (sepops ops, rtx target, machine_mode 
mode)
 
  type = lang_hooks.types.type_for_mode (mode, unsignedp);
  return expand_single_bit_test (loc, tcode,
-arg0,
+narg0,
 bitnum, type, target, mode);
}
 }
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111863-1.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111863-1.c
new file mode 100644
index 000..4e27fe631b2
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111863-1.c
@@ -0,0 +1,16 @@
+/* { dg-options " -fno-tree-ccp -fno-tree-dominator-opts -fno-tree-vrp" } */
+
+__attribute__((noipa))
+int f(int a)
+{
+a &= 2;
+return a != 1;
+}
+int main(void)
+{
+int t = f(1);
+if (!t)
+__builtin_abort();
+__builtin_printf("%d\n",t);
+return 0;
+}
-- 
2.39.3



[PATCH] aarch64: [PR110986] Emit csinv again for `a ? ~b : b`

2023-10-18 Thread Andrew Pinski
After r14-3110-g7fb65f10285, the canonical form for
`a ? ~b : b` changed to be `-(a) ^ b` that means
for aarch64 we need to add a few new insn patterns
to be able to catch this and change it to be
what is the canonical form for the aarch64 backend.
A secondary pattern was needed to support a zero_extended
form too; this adds a testcase for all 3 cases.

Bootstrapped and tested on aarch64-linux-gnu with no regressions.

PR target/110986

gcc/ChangeLog:

* config/aarch64/aarch64.md (*cmov_insn_insv): New pattern.
(*cmov_uxtw_insn_insv): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/cond_op-1.c: New test.
---
 gcc/config/aarch64/aarch64.md| 46 
 gcc/testsuite/gcc.target/aarch64/cond_op-1.c | 20 +
 2 files changed, 66 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/cond_op-1.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 32c7adc8928..59cd0415937 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4413,6 +4413,52 @@ (define_insn "*csinv3_uxtw_insn3"
   [(set_attr "type" "csel")]
 )
 
+;; There are two canonical forms for `cmp ? ~a : a`.
+;; This is the second form and is here to help combine.
+;; Support `-(cmp) ^ a` into `cmp ? ~a : a`
+;; The second pattern is to support the zero extend'ed version.
+
+(define_insn_and_split "*cmov_insn_insv"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+(xor:GPI
+(neg:GPI
+ (match_operator:GPI 1 "aarch64_comparison_operator"
+  [(match_operand 2 "cc_register" "") (const_int 0)]))
+(match_operand:GPI 3 "general_operand" "r")))]
+  "can_create_pseudo_p ()"
+  "#"
+  "&& true"
+  [(set (match_dup 0)
+   (if_then_else:GPI (match_dup 1)
+ (not:GPI (match_dup 3))
+ (match_dup 3)))]
+  {
+operands[3] = force_reg (mode, operands[3]);
+  }
+  [(set_attr "type" "csel")]
+)
+
+(define_insn_and_split "*cmov_uxtw_insn_insv"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+(zero_extend:DI
+(xor:SI
+ (neg:SI
+  (match_operator:SI 1 "aarch64_comparison_operator"
+   [(match_operand 2 "cc_register" "") (const_int 0)]))
+ (match_operand:SI 3 "general_operand" "r"]
+  "can_create_pseudo_p ()"
+  "#"
+  "&& true"
+  [(set (match_dup 0)
+   (if_then_else:DI (match_dup 1)
+ (zero_extend:DI (not:SI (match_dup 3)))
+ (zero_extend:DI (match_dup 3]
+  {
+operands[3] = force_reg (SImode, operands[3]);
+  }
+  [(set_attr "type" "csel")]
+)
+
 ;; If X can be loaded by a single CNT[BHWD] instruction,
 ;;
 ;;A = UMAX (B, X)
diff --git a/gcc/testsuite/gcc.target/aarch64/cond_op-1.c 
b/gcc/testsuite/gcc.target/aarch64/cond_op-1.c
new file mode 100644
index 000..e6c7821127e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/cond_op-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* PR target/110986 */
+
+
+long long full(unsigned a, unsigned b)
+{
+  return a ? ~b : b;
+}
+unsigned fuu(unsigned a, unsigned b)
+{
+  return a ? ~b : b;
+}
+long long f(unsigned long long a, unsigned long long b)
+{
+  return a ? ~b : b;
+}
+
+/* { dg-final { scan-assembler-times "csinv\tw\[0-9\]*" 2 } } */
+/* { dg-final { scan-assembler-times "csinv\tx\[0-9\]*" 1 } } */
-- 
2.39.3



[PATCH] c: [PR104822] Don't warn about converting NULL to different sso endian

2023-10-19 Thread Andrew Pinski
In a similar way we don't warn about NULL pointer constant conversion to
a different named address we should not warn to a different sso endian
either.
This adds the simple check.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR c/104822

gcc/c/ChangeLog:

* c-typeck.cc (convert_for_assignment): Check for null pointer
before warning about an incompatible scalar storage order.

gcc/testsuite/ChangeLog:

* gcc.dg/sso-18.c: New test.
---
 gcc/c/c-typeck.cc |  1 +
 gcc/testsuite/gcc.dg/sso-18.c | 16 
 2 files changed, 17 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/sso-18.c

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 6e044b4afbc..f39dc71d593 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -7449,6 +7449,7 @@ convert_for_assignment (location_t location, location_t 
expr_loc, tree type,
 
   /* See if the pointers point to incompatible scalar storage orders.  */
   if (warn_scalar_storage_order
+ && !null_pointer_constant_p (rhs)
  && (AGGREGATE_TYPE_P (ttl) && TYPE_REVERSE_STORAGE_ORDER (ttl))
 != (AGGREGATE_TYPE_P (ttr) && TYPE_REVERSE_STORAGE_ORDER (ttr)))
{
diff --git a/gcc/testsuite/gcc.dg/sso-18.c b/gcc/testsuite/gcc.dg/sso-18.c
new file mode 100644
index 000..799a0c858f2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/sso-18.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* PR c/104822 */
+
+#include 
+
+struct Sb {
+  int i;
+} __attribute__((scalar_storage_order("big-endian")));
+struct Sl {
+  int i;
+} __attribute__((scalar_storage_order("little-endian")));
+
+/* Neither of these should warn about incompatible scalar storage order
+   as NULL pointers are compatiable with both endian. */
+struct Sb *pb = NULL; /* { dg-bogus "" } */
+struct Sl *pl = NULL; /* { dg-bogus "" } */
-- 
2.39.3



[PATCH] c: [PR100532] Fix ICE when an agrgument was an error mark

2023-10-19 Thread Andrew Pinski
In the case of convert_argument, we would return the same expression
back rather than error_mark_node after the error message about
trying to convert to an incomplete type. This causes issues in
the gimplfier trying to see if another conversion is needed.

The code here dates back to before the revision history too so
it might be the case it never noticed we should return an error_mark_node.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR c/100532

gcc/c/ChangeLog:

* c-typeck.cc (convert_argument): After erroring out
about an incomplete type return error_mark_node.

gcc/testsuite/ChangeLog:

* gcc.dg/pr100532-1.c: New test.
---
 gcc/c/c-typeck.cc | 2 +-
 gcc/testsuite/gcc.dg/pr100532-1.c | 7 +++
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr100532-1.c

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 6e044b4afbc..8f8562936dc 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -3367,7 +3367,7 @@ convert_argument (location_t ploc, tree function, tree 
fundecl,
 {
   error_at (ploc, "type of formal parameter %d is incomplete",
parmnum + 1);
-  return val;
+  return error_mark_node;
 }
 
   /* Optionally warn about conversions that differ from the default
diff --git a/gcc/testsuite/gcc.dg/pr100532-1.c 
b/gcc/testsuite/gcc.dg/pr100532-1.c
new file mode 100644
index 000..81e37c60415
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr100532-1.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* PR c/100532 */
+
+typedef __SIZE_TYPE__ size_t;
+void *memcpy(void[], const void *, size_t); /* { dg-error "declaration of type 
name" } */
+void c(void) { memcpy(c, "a", 2); } /* { dg-error "type of formal parameter" } 
*/
+
-- 
2.34.1



Re: [PATCH, GCC, AARCH64] Add support for +profile extension

2018-07-09 Thread Andrew Pinski
On Mon, Jul 9, 2018 at 6:21 AM Andre Vieira (lists)
 wrote:
>
> Hi,
>
> This patch adds support for the Statistical Profiling Extension (SPE) on
> AArch64. Even though the compiler will not generate code any differently
> given this extension, it will need to pass it on to the assembler in
> order to let it correctly assemble inline asm containing accesses to the
> extension's system registers.  The same applies when using the
> preprocessor on an assembly file as this first must pass through cc1.
>
> I left the hwcaps string for SPE empty as the kernel does not define a
> feature string for this extension.  The current effect of this is that
> driver will disable profile feature bit in GCC.  This is OK though
> because we don't, nor do we ever, enable this feature bit, as codegen is
> not affect by the SPE support and more importantly the driver will still
> pass the extension down to the assembler regardless.
>
> Boostrapped aarch64-none-linux-gnu and ran regression tests.
>
> Is it OK for trunk?

I use a similar patch for the last year and half.

Thanks,
Andrew

>
> gcc/ChangeLog:
> 2018-07-09  Andre Vieira  
>
> * config/aarch64/aarch64-option-extensions.def: New entry for profile
> extension.
> * config/aarch64/aarch64.h (AARCH64_FL_PROFILE): New.
> * doc/invoke.texi (aarch64-feature-modifiers): New entry for profile
> extension.
>
> gcc/testsuite/ChangeLog:
> 2018-07-09 Andre Vieira 
>
> * gcc.target/aarch64/profile.c: New test.


Re: [RFC] Fix recent popcount change is breaking

2018-07-10 Thread Andrew Pinski
On Tue, Jul 10, 2018 at 6:14 PM Kugan Vivekanandarajah
 wrote:
>
> On 10 July 2018 at 23:17, Richard Biener  wrote:
> > On Tue, Jul 10, 2018 at 3:06 PM Kugan Vivekanandarajah
> >  wrote:
> >>
> >> Hi,
> >>
> >> Jeff told me that the recent popcount built-in detection is causing
> >> kernel build issues as
> >> ERROR: "__popcountsi2"
> >> [drivers/net/wireless/broadcom/brcm80211/brcmfmac/brcmfmac.ko] undefined!
> >>
> >> I could also reproduce this. AFIK, we should check if the libfunc is
> >> defined while checking popcount?
> >>
> >> I am testing the attached RFC patch. Is this reasonable?
> >
> > It doesn't work that way, all targets have this libfunc in libgcc.  This 
> > means
> > the kernel has to provide it.  The only thing you could do is restrict
> > replacement of CALL_EXPRs (in SCEV cprop) to those the target
> > natively supports.
>
> How about restricting it in expression_expensive_p ? Is that what you
> wanted. Attached patch does this.
> Bootstrap and regression testing progressing.

Seems like that should go into is_inexpensive_builtin  instead which
is just tested right below.

Thanks,
Andrew

>
> Thanks,
> Kugan
>
> >
> > Richard.
> >
> >> Thanks,
> >> Kugan
> >>
> >> gcc/ChangeLog:
> >>
> >> 2018-07-10  Kugan Vivekanandarajah  
> >>
> >> * tree-ssa-loop-niter.c (number_of_iterations_popcount): Check
> >> if libfunc for popcount is available.


Re: [RFC] Fix recent popcount change is breaking

2018-07-10 Thread Andrew Pinski
On Tue, Jul 10, 2018 at 6:35 PM Kugan Vivekanandarajah
 wrote:
>
> Hi Andrew,
>
> On 11 July 2018 at 11:19, Andrew Pinski  wrote:
> > On Tue, Jul 10, 2018 at 6:14 PM Kugan Vivekanandarajah
> >  wrote:
> >>
> >> On 10 July 2018 at 23:17, Richard Biener  
> >> wrote:
> >> > On Tue, Jul 10, 2018 at 3:06 PM Kugan Vivekanandarajah
> >> >  wrote:
> >> >>
> >> >> Hi,
> >> >>
> >> >> Jeff told me that the recent popcount built-in detection is causing
> >> >> kernel build issues as
> >> >> ERROR: "__popcountsi2"
> >> >> [drivers/net/wireless/broadcom/brcm80211/brcmfmac/brcmfmac.ko] 
> >> >> undefined!
> >> >>
> >> >> I could also reproduce this. AFIK, we should check if the libfunc is
> >> >> defined while checking popcount?
> >> >>
> >> >> I am testing the attached RFC patch. Is this reasonable?
> >> >
> >> > It doesn't work that way, all targets have this libfunc in libgcc.  This 
> >> > means
> >> > the kernel has to provide it.  The only thing you could do is restrict
> >> > replacement of CALL_EXPRs (in SCEV cprop) to those the target
> >> > natively supports.
> >>
> >> How about restricting it in expression_expensive_p ? Is that what you
> >> wanted. Attached patch does this.
> >> Bootstrap and regression testing progressing.
> >
> > Seems like that should go into is_inexpensive_builtin  instead which
> > is just tested right below.
>
> I hought about that. is_inexpensive_builtin is used in various other
> places including some inlining decision so wasn't sure if it is the
> right thing. Happy to change it if that is the right thing to do.

I audited all of the users (and their users if it is used in a
wrapper) and found that is_inexpensive_builtin should return false for
this builtin if it is a function call in the end; there are other
builtins which should be checked the similar way but I think we should
not going to force you to do the similar thing for those builtins.

Thanks,
Andrew

>
> Thanks,
> Kugan
> >
> > Thanks,
> > Andrew
> >
> >>
> >> Thanks,
> >> Kugan
> >>
> >> >
> >> > Richard.
> >> >
> >> >> Thanks,
> >> >> Kugan
> >> >>
> >> >> gcc/ChangeLog:
> >> >>
> >> >> 2018-07-10  Kugan Vivekanandarajah  
> >> >>
> >> >> * tree-ssa-loop-niter.c (number_of_iterations_popcount): Check
> >> >> if libfunc for popcount is available.


Re: [PATCH][AARCH64] PR target/84521 Fix frame pointer corruption with -fomit-frame-pointer with __builtin_setjmp

2018-07-31 Thread Andrew Pinski
On Tue, Jul 31, 2018 at 2:43 PM James Greenhalgh
 wrote:
>
> On Thu, Jul 12, 2018 at 12:01:09PM -0500, Sudakshina Das wrote:
> > Hi Eric
> >
> > On 27/06/18 12:22, Wilco Dijkstra wrote:
> > > Eric Botcazou wrote:
> > >
> > >>> This test can easily be changed not to use optimize since it doesn't 
> > >>> look
> > >>> like it needs it. We really need to tests these builtins properly,
> > >>> otherwise they will continue to fail on most targets.
> > >>
> > >> As far as I can see PR target/84521 has been reported only for Aarch64 
> > >> so I'd
> > >> just leave the other targets alone (and avoid propagating FUD if 
> > >> possible).
> > >
> > > It's quite obvious from PR84521 that this is an issue affecting all 
> > > targets.
> > > Adding better generic tests for __builtin_setjmp can only be a good thing.
> > >
> > > Wilco
> > >
> >
> > This conversation seems to have died down and I would like to
> > start it again. I would agree with Wilco's suggestion about
> > keeping the test in the generic folder. I have removed the
> > optimize attribute and the effect is still the same. It passes
> > on AArch64 with this patch and it currently fails on x86
> > trunk (gcc version 9.0.0 20180712 (experimental) (GCC))
> > on -O1 and above.
>
>
> I don't see where the FUD comes in here; either this builtin has a defined
> semantics across targets and they are adhered to, or the builtin doesn't have
> well defined semantics, or the targets fail to implement those semantics.

The problem comes from the fact the builtins are not documented at all.
See PR59039 for the issue on them not being documented.

Thanks,
Andrew


>
> I think this should go in as is. If other targets are unhappy with the
> failing test they should fix their target or skip the test if it is not
> appropriate.
>
> You may want to CC some of the maintainers of platforms you know to fail as
> a courtesy on the PR (add your testcase, and add failing targets and their
> maintainers to that PR) before committing so it doesn't come as a complete
> surprise.
>
> This is OK with some attempt to get target maintainers involved in the
> conversation before commit.
>
> Thanks,
> James
>
> > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> > index f284e74..9792d28 100644
> > --- a/gcc/config/aarch64/aarch64.h
> > +++ b/gcc/config/aarch64/aarch64.h
> > @@ -473,7 +473,9 @@ extern unsigned aarch64_architecture_version;
> >  #define EH_RETURN_STACKADJ_RTX   gen_rtx_REG (Pmode, R4_REGNUM)
> >  #define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
> >
> > -/* Don't use __builtin_setjmp until we've defined it.  */
> > +/* Don't use __builtin_setjmp until we've defined it.
> > +   CAUTION: This macro is only used during exception unwinding.
> > +   Don't fall for its name.  */
> >  #undef DONT_USE_BUILTIN_SETJMP
> >  #define DONT_USE_BUILTIN_SETJMP 1
> >
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index 01f35f8..4266a3d 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -3998,7 +3998,7 @@ static bool
> >  aarch64_needs_frame_chain (void)
> >  {
> >/* Force a frame chain for EH returns so the return address is at FP+8.  
> > */
> > -  if (frame_pointer_needed || crtl->calls_eh_return)
> > +  if (frame_pointer_needed || crtl->calls_eh_return || 
> > cfun->has_nonlocal_label)
> >  return true;
> >
> >/* A leaf function cannot have calls or write LR.  */
> > @@ -12218,6 +12218,13 @@ aarch64_expand_builtin_va_start (tree valist, rtx 
> > nextarg ATTRIBUTE_UNUSED)
> >expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
> >  }
> >
> > +/* Implement TARGET_BUILTIN_SETJMP_FRAME_VALUE.  */
> > +static rtx
> > +aarch64_builtin_setjmp_frame_value (void)
> > +{
> > +  return hard_frame_pointer_rtx;
> > +}
> > +
> >  /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
> >
> >  static tree
> > @@ -17744,6 +17751,9 @@ aarch64_run_selftests (void)
> >  #undef TARGET_FOLD_BUILTIN
> >  #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
> >
> > +#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
> > +#define TARGET_BUILTIN_SETJMP_FRAME_VALUE 
> > aarch64_builtin_setjmp_frame_value
> > +
> >  #undef TARGET_FUNCTION_ARG
> >  #define TARGET_FUNCTION_ARG aarch64_function_arg
> >
> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > index a014a01..d5f33d8 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -6087,6 +6087,30 @@
> >DONE;
> >  })
> >
> > +;; This is broadly similar to the builtins.c except that it uses
> > +;; temporaries to load the incoming SP and FP.
> > +(define_expand "nonlocal_goto"
> > +  [(use (match_operand 0 "general_operand"))
> > +   (use (match_operand 1 "general_operand"))
> > +   (use (match_operand 2 "general_operand"))
> > +   (use (match_operand 3 "general_operand"))]
> > +  ""
> > +{
> > +rtx label_in = copy_to_reg (operands[1]);
> > +rtx fp_in = copy_to_reg (operands[3

Re: [PATCH] move the (a-b) CMP 0 ? (a-b) : (b-a) optimization from fold_cond_expr_with_comparison to match

2023-10-19 Thread Andrew Pinski
On Mon, Jul 12, 2021 at 4:47 AM Richard Biener via Gcc-patches
 wrote:
>
> On Sun, Jul 11, 2021 at 4:12 AM apinski--- via Gcc-patches
>  wrote:
> >
> > From: Andrew Pinski 
> >
> > This patch moves the (a-b) CMP 0 ? (a-b) : (b-a) optimization
> > from fold_cond_expr_with_comparison to match.
>
> So I searched and I guess these transforms are produced from
>
>   /* If we have A op 0 ? A : -A, consider applying the following
>  transformations:
>
>  A == 0? A : -Asame as -A
>  A != 0? A : -Asame as A
>  A >= 0? A : -Asame as abs (A)
>  A > 0?  A : -Asame as abs (A)
>  A <= 0? A : -Asame as -abs (A)
>  A < 0?  A : -Asame as -abs (A)
>
>  None of these transformations work for modes with signed
>  zeros.  If A is +/-0, the first two transformations will
>  change the sign of the result (from +0 to -0, or vice
>  versa).  The last four will fix the sign of the result,
>  even though the original expressions could be positive or
>  negative, depending on the sign of A.
>
>  Note that all these transformations are correct if A is
>  NaN, since the two alternatives (A and -A) are also NaNs.  */
>   if (!HONOR_SIGNED_ZEROS (type)
>   && (FLOAT_TYPE_P (TREE_TYPE (arg01))
>   ? real_zerop (arg01)
>   : integer_zerop (arg01))
>   && ((TREE_CODE (arg2) == NEGATE_EXPR
>&& operand_equal_p (TREE_OPERAND (arg2, 0), arg1, 0))
>  /* In the case that A is of the form X-Y, '-A' (arg2) may
> have already been folded to Y-X, check for that. */
>   || (TREE_CODE (arg1) == MINUS_EXPR
>   && TREE_CODE (arg2) == MINUS_EXPR
>   && operand_equal_p (TREE_OPERAND (arg1, 0),
>   TREE_OPERAND (arg2, 1), 0)
>   && operand_equal_p (TREE_OPERAND (arg1, 1),
>   TREE_OPERAND (arg2, 0), 0
> ...
>
> I wonder at which point we can remove the code from fold-const.c?

I have to double check if after an updated patch, if that code does
anything that match does not do.
I will do that before I submit an updated patch.

>
> Some comments inline below.
>
> > OK? Bootstrapped and tested on x86_64-linux-gnu.
> >
> > gcc/ChangeLog:
> >
> > * match.pd ((A-B) CMP 0 ? (A-B) : (B - A)):
> > New patterns.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/tree-ssa/phi-opt-25.c: New test.
> > ---
> >  gcc/match.pd   | 48 --
> >  gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c | 45 
> >  2 files changed, 90 insertions(+), 3 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 30680d488ab..aa88381fdcb 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -4040,9 +4040,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >(cnd (logical_inverted_value truth_valued_p@0) @1 @2)
> >(cnd @0 @2 @1)))
> >
> > -/* abs/negative simplifications moved from fold_cond_expr_with_comparison,
> > -   Need to handle (A - B) case as fold_cond_expr_with_comparison does.
> > -   Need to handle UN* comparisons.
> > +/* abs/negative simplifications moved from fold_cond_expr_with_comparison.
> >
> > None of these transformations work for modes with signed
> > zeros.  If A is +/-0, the first two transformations will
> > @@ -4098,6 +4096,50 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > (convert (negate (absu:utype @0
> > (negate (abs @0)
> >   )
> > +
> > + /* (A - B) == 0 ? (A - B) : (B - A)same as (B - A) */
> > + (for cmp (eq uneq)
> > +  (simplify
> > +   (cnd (cmp (minus@0 @1 @2) zerop) @0 (minus@3 @2 @1))
> > +(if (!HONOR_SIGNED_ZEROS (type))
> > + @3))
> > +  (simplify
> > +   (cnd (cmp (minus@0 @1 @2) zerop) integer_zerop (minus@3 @2 @1))
>
> So that makes me think why integer_zerop?  'type' should then be
> integer and thus never HONOR_SIGNED_ZEROS.

yes that should be done.

>
> Don't we also need the inverted condition case for completeness?

Yes we should. Though for phiopt we don't.


>
> > +(if (!HONOR_SIGNED_ZEROS (type))
> > + @3))
> > +  (simplify
> > +   (cnd (cmp @1 @2) integer_zerop (minus@3 @2 @1))
>
> I think this needs to be (cmp:c @1 @2)

This is now actually handled already by r14-3606-g3d86e7f4a8ae so I removed 

Re: [PATCH 2/2] c++: remove NON_DEPENDENT_EXPR, part 2

2023-10-20 Thread Andrew Pinski
On Mon, Sep 25, 2023 at 1:43 PM Patrick Palka  wrote:
>
> This much more mechanical patch removes build_non_dependent_expr
> (and make_args_non_dependent) and adjusts callers accordingly,
> no functional change.


This broke the RUST front-end which decided to copy/reuse the C++ code
for constexpr and not modify it not to include trees it does not use.
In this case NON_DEPENDENT_EXPR was removed and now the rust front-end
is broken.

Thanks,
Andrew

>
> gcc/cp/ChangeLog:
>
> * call.cc (build_new_method_call): Remove calls to
> build_non_dependent_expr and/or make_args_non_dependent.
> * coroutines.cc (finish_co_return_stmt): Likewise.
> * cp-tree.h (build_non_dependent_expr): Remove.
> (make_args_non_dependent): Remove.
> * decl2.cc (grok_array_decl): Remove calls to
> build_non_dependent_expr and/or make_args_non_dependent.
> (build_offset_ref_call_from_tree): Likewise.
> * init.cc (build_new): Likewise.
> * pt.cc (make_args_non_dependent): Remove.
> (test_build_non_dependent_expr): Remove.
> (cp_pt_cc_tests): Adjust.
> * semantics.cc (finish_expr_stmt): Remove calls to
> build_non_dependent_expr and/or make_args_non_dependent.
> (finish_for_expr): Likewise.
> (finish_call_expr): Likewise.
> (finish_omp_atomic): Likewise.
> * typeck.cc (finish_class_member_access_expr): Likewise.
> (build_x_indirect_ref): Likewise.
> (build_x_binary_op): Likewise.
> (build_x_array_ref): Likewise.
> (build_x_vec_perm_expr): Likewise.
> (build_x_shufflevector): Likewise.
> (build_x_unary_op): Likewise.
> (cp_build_addressof): Likewise.
> (build_x_conditional_expr):
> (build_x_compound_expr): Likewise.
> (build_static_cast): Likewise.
> (build_x_modify_expr): Likewise.
> (check_return_expr): Likewise.
> * typeck2.cc (build_x_arrow): Likewise.
> ---
>  gcc/cp/call.cc   |  7 +--
>  gcc/cp/coroutines.cc |  3 ---
>  gcc/cp/cp-tree.h |  2 --
>  gcc/cp/decl2.cc  | 17 +++-
>  gcc/cp/init.cc   |  5 -
>  gcc/cp/pt.cc | 46 
>  gcc/cp/semantics.cc  | 25 ++--
>  gcc/cp/typeck.cc | 31 -
>  gcc/cp/typeck2.cc|  1 -
>  9 files changed, 6 insertions(+), 131 deletions(-)
>
> diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
> index e8dafbd8ba6..15079ddf6dc 100644
> --- a/gcc/cp/call.cc
> +++ b/gcc/cp/call.cc
> @@ -11430,12 +11430,7 @@ build_new_method_call (tree instance, tree fns, 
> vec **args,
>  }
>
>if (processing_template_decl)
> -{
> -  orig_args = args == NULL ? NULL : make_tree_vector_copy (*args);
> -  instance = build_non_dependent_expr (instance);
> -  if (args != NULL)
> -   make_args_non_dependent (*args);
> -}
> +orig_args = args == NULL ? NULL : make_tree_vector_copy (*args);
>
>/* Process the argument list.  */
>if (args != NULL && *args != NULL)
> diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
> index df3cc820797..a5464becf7f 100644
> --- a/gcc/cp/coroutines.cc
> +++ b/gcc/cp/coroutines.cc
> @@ -1351,9 +1351,6 @@ finish_co_return_stmt (location_t kw, tree expr)
>  to undo it so we can try to treat it as an rvalue below.  */
>expr = maybe_undo_parenthesized_ref (expr);
>
> -  if (processing_template_decl)
> -   expr = build_non_dependent_expr (expr);
> -
>if (error_operand_p (expr))
> return error_mark_node;
>  }
> diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
> index 66b9a9c4b9a..8b9a7d58462 100644
> --- a/gcc/cp/cp-tree.h
> +++ b/gcc/cp/cp-tree.h
> @@ -7488,8 +7488,6 @@ extern bool any_value_dependent_elements_p  
> (const_tree);
>  extern bool dependent_omp_for_p(tree, tree, tree, 
> tree);
>  extern tree resolve_typename_type  (tree, bool);
>  extern tree template_for_substitution  (tree);
> -inline tree build_non_dependent_expr   (tree t) { return t; } // XXX 
> remove
> -extern void make_args_non_dependent(vec *);
>  extern bool reregister_specialization  (tree, tree, tree);
>  extern tree instantiate_non_dependent_expr (tree, tsubst_flags_t = 
> tf_error);
>  extern tree instantiate_non_dependent_expr_internal (tree, tsubst_flags_t);
> diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
> index 344e19ec98b..0aa1e355972 100644
> --- a/gcc/cp/decl2.cc
> +++ b/gcc/cp/decl2.cc
> @@ -427,14 +427,8 @@ grok_array_decl (location_t loc, tree array_expr, tree 
> index_exp,
>   return build_min_nt_loc (loc, ARRAY_REF, array_expr, index_exp,
>NULL_TREE, NULL_TREE);
> }
> -  array_expr = build_non_dependent_expr (array_expr);
> -  if (index_exp)
> -   index_exp = build_non_dependent_expr (index_exp);
> -  else
> 

[PATCH] convert_to_complex vs invalid_conversion [PR111903]

2023-10-21 Thread Andrew Pinski
convert_to_complex when creating a COMPLEX_EXPR does
not currently check if either the real or imag parts
was not error_mark_node. This later on confuses the gimpilfier
when there was a SAVE_EXPR wrapped around that COMPLEX_EXPR.
The simple fix is after calling convert inside convert_to_complex_1,
check that the either result was an error_operand and return
an error_mark_node in that case.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR c/111903

gcc/ChangeLog:

* convert.cc (convert_to_complex_1): Return
error_mark_node if either convert was an error
when converting from a scalar.

gcc/testsuite/ChangeLog:

* gcc.target/i386/float16-8.c: New test.
---
 gcc/convert.cc|  9 +++--
 gcc/testsuite/gcc.target/i386/float16-8.c | 12 
 2 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/float16-8.c

diff --git a/gcc/convert.cc b/gcc/convert.cc
index 80d86fe3708..ac6af7026a7 100644
--- a/gcc/convert.cc
+++ b/gcc/convert.cc
@@ -1006,8 +1006,13 @@ convert_to_complex_1 (tree type, tree expr, bool fold_p)
 case ENUMERAL_TYPE:
 case BOOLEAN_TYPE:
 case BITINT_TYPE:
-  return build2 (COMPLEX_EXPR, type, convert (subtype, expr),
-convert (subtype, integer_zero_node));
+  {
+   tree real = convert (subtype, expr);
+   tree imag = convert (subtype, integer_zero_node);
+   if (error_operand_p (real) || error_operand_p (imag))
+ return error_mark_node;
+   return build2 (COMPLEX_EXPR, type, real, imag);
+  }
 
 case COMPLEX_TYPE:
   {
diff --git a/gcc/testsuite/gcc.target/i386/float16-8.c 
b/gcc/testsuite/gcc.target/i386/float16-8.c
new file mode 100644
index 000..003f82e7146
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/float16-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mno-sse" } */
+/* PR c/111903 */
+
+int i;
+_Float16 f;
+int bar(...);
+void
+foo (void)
+{
+  i /= bar ((_Complex _Float16) f); /* { dg-error "" } */
+}
-- 
2.39.3



Re: [PATCH] move the (a-b) CMP 0 ? (a-b) : (b-a) optimization from fold_cond_expr_with_comparison to match

2023-10-21 Thread Andrew Pinski
On Thu, Oct 19, 2023 at 10:13 PM Andrew Pinski  wrote:
>
> On Mon, Jul 12, 2021 at 4:47 AM Richard Biener via Gcc-patches
>  wrote:
> >
> > On Sun, Jul 11, 2021 at 4:12 AM apinski--- via Gcc-patches
> >  wrote:
> > >
> > > From: Andrew Pinski 
> > >
> > > This patch moves the (a-b) CMP 0 ? (a-b) : (b-a) optimization
> > > from fold_cond_expr_with_comparison to match.
> >
> > So I searched and I guess these transforms are produced from
> >
> >   /* If we have A op 0 ? A : -A, consider applying the following
> >  transformations:
> >
> >  A == 0? A : -Asame as -A
> >  A != 0? A : -Asame as A
> >  A >= 0? A : -Asame as abs (A)
> >  A > 0?  A : -Asame as abs (A)
> >  A <= 0? A : -Asame as -abs (A)
> >  A < 0?  A : -Asame as -abs (A)
> >
> >  None of these transformations work for modes with signed
> >  zeros.  If A is +/-0, the first two transformations will
> >  change the sign of the result (from +0 to -0, or vice
> >  versa).  The last four will fix the sign of the result,
> >  even though the original expressions could be positive or
> >  negative, depending on the sign of A.
> >
> >  Note that all these transformations are correct if A is
> >  NaN, since the two alternatives (A and -A) are also NaNs.  */
> >   if (!HONOR_SIGNED_ZEROS (type)
> >   && (FLOAT_TYPE_P (TREE_TYPE (arg01))
> >   ? real_zerop (arg01)
> >   : integer_zerop (arg01))
> >   && ((TREE_CODE (arg2) == NEGATE_EXPR
> >&& operand_equal_p (TREE_OPERAND (arg2, 0), arg1, 0))
> >  /* In the case that A is of the form X-Y, '-A' (arg2) may
> > have already been folded to Y-X, check for that. */
> >   || (TREE_CODE (arg1) == MINUS_EXPR
> >   && TREE_CODE (arg2) == MINUS_EXPR
> >   && operand_equal_p (TREE_OPERAND (arg1, 0),
> >   TREE_OPERAND (arg2, 1), 0)
> >   && operand_equal_p (TREE_OPERAND (arg1, 1),
> >   TREE_OPERAND (arg2, 0), 0
> > ...
> >
> > I wonder at which point we can remove the code from fold-const.c?
>
> I have to double check if after an updated patch, if that code does
> anything that match does not do.
> I will do that before I submit an updated patch.

I looked and the main thing left is solving the stripping of sign nops
that happen at the beginning of fold_cond_expr_with_comparison.
I did solve part of that with the recent
r14-4662-gc7609acb8a8210188d21b2cd7 but not with this new patterns; I
will solve that in a separate patch.

Thanks,
Andrew Pinski

>
> >
> > Some comments inline below.
> >
> > > OK? Bootstrapped and tested on x86_64-linux-gnu.
> > >
> > > gcc/ChangeLog:
> > >
> > > * match.pd ((A-B) CMP 0 ? (A-B) : (B - A)):
> > > New patterns.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.dg/tree-ssa/phi-opt-25.c: New test.
> > > ---
> > >  gcc/match.pd   | 48 --
> > >  gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c | 45 
> > >  2 files changed, 90 insertions(+), 3 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index 30680d488ab..aa88381fdcb 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -4040,9 +4040,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >(cnd (logical_inverted_value truth_valued_p@0) @1 @2)
> > >(cnd @0 @2 @1)))
> > >
> > > -/* abs/negative simplifications moved from 
> > > fold_cond_expr_with_comparison,
> > > -   Need to handle (A - B) case as fold_cond_expr_with_comparison does.
> > > -   Need to handle UN* comparisons.
> > > +/* abs/negative simplifications moved from 
> > > fold_cond_expr_with_comparison.
> > >
> > > None of these transformations work for modes with signed
> > > zeros.  If A is +/-0, the first two transformations will
> > > @@ -4098,6 +4096,50 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > (convert (negate (absu:utype @0
> > > (negate (abs @0)
> > >   )
> > > +
> > > + /* (A - B) == 0 ? (A - B) : (B - A)same as (B - A) */
> > >

[PATCHv2] move the (a-b) CMP 0 ? (a-b) : (b-a) optimization from fold_cond_expr_with_comparison to match

2023-10-21 Thread Andrew Pinski
From: Andrew Pinski 

This patch moves the `(a-b) CMP 0 ? (a-b) : (b-a)` optimization
from fold_cond_expr_with_comparison to match.

Bootstrapped and tested on x86_64-linux-gnu.

Changes in:
v2: Removes `(a == b) ? 0 : (b - a)` handling since it was handled
via r14-3606-g3d86e7f4a8ae
Change zerop to integer_zerop for `(a - b) == 0 ? 0 : (b - a)`,
Add `(a - b) != 0 ? (a - b) : 0` handling.

gcc/ChangeLog:

* match.pd (`(A - B) CMP 0 ? (A - B) : (B - A)`):
New patterns.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-38.c: New test.
---
 gcc/match.pd   | 46 --
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-38.c | 45 +
 2 files changed, 88 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-38.c

diff --git a/gcc/match.pd b/gcc/match.pd
index a56838fb388..ce8d159d260 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5650,9 +5650,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cnd (logical_inverted_value truth_valued_p@0) @1 @2)
   (cnd @0 @2 @1)))
 
-/* abs/negative simplifications moved from fold_cond_expr_with_comparison,
-   Need to handle (A - B) case as fold_cond_expr_with_comparison does.
-   Need to handle UN* comparisons.
+/* abs/negative simplifications moved from fold_cond_expr_with_comparison.
 
None of these transformations work for modes with signed
zeros.  If A is +/-0, the first two transformations will
@@ -5717,6 +5715,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(convert (negate (absu:utype @0
(negate (abs @0)
  )
+
+ /* (A - B) == 0 ? (A - B) : (B - A)same as (B - A) */
+ (for cmp (eq uneq)
+  (simplify
+   (cnd (cmp (minus@0 @1 @2) zerop) @0 (minus@3 @2 @1))
+   (if (!HONOR_SIGNED_ZEROS (type))
+@3))
+  (simplify
+   (cnd (cmp (minus@0 @1 @2) integer_zerop) integer_zerop (minus@3 @2 @1))
+   @3)
+ )
+ /* (A - B) != 0 ? (A - B) : (B - A)same as (A - B) */
+ (for cmp (ne ltgt)
+  (simplify
+   (cnd (cmp (minus@0 @1 @2) zerop) @0 (minus @2 @1))
+   (if (!HONOR_SIGNED_ZEROS (type))
+@0))
+  (simplify
+   (cnd (cmp (minus@0 @1 @2) integer_zerop) @0 integer_zerop)
+   @0)
+ )
+ /* (A - B) >=/> 0 ? (A - B) : (B - A)same as abs (A - B) */
+ (for cmp (ge gt)
+  (simplify
+   (cnd (cmp (minus@0 @1 @2) zerop) @0 (minus @2 @1))
+   (if (!HONOR_SIGNED_ZEROS (type)
+   && !TYPE_UNSIGNED (type))
+(abs @0
+ /* (A - B) <=/< 0 ? (A - B) : (B - A)same as -abs (A - B) */
+ (for cmp (le lt)
+  (simplify
+   (cnd (cmp (minus@0 @1 @2) zerop) @0 (minus @2 @1))
+   (if (!HONOR_SIGNED_ZEROS (type)
+   && !TYPE_UNSIGNED (type))
+(if (ANY_INTEGRAL_TYPE_P (type)
+&& !TYPE_OVERFLOW_WRAPS (type))
+ (with {
+tree utype = unsigned_type_for (type);
+  }
+  (convert (negate (absu:utype @0
+  (negate (abs @0)
+ )
 )
 
 /* -(type)!A -> (type)A - 1.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-38.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-38.c
new file mode 100644
index 000..0f0e3170f8d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-38.c
@@ -0,0 +1,45 @@
+/* { dg-options "-O2 -fno-signed-zeros -fdump-tree-phiopt" } */
+int minus1(int a, int b)
+{
+  int c = a - b;
+  if (c == 0) c = b - a;
+  return c;
+}
+int minus2(int a, int b)
+{
+  int c = a - b;
+  if (c != 0) c = b - a;
+  return c;
+}
+int minus3(int a, int b)
+{
+  int c = a - b;
+  if (c == 0) c = 0;
+  else c = b - a;
+  return c;
+}
+int minus4(int a, int b)
+{
+  int c;
+  if (a == b) c = 0;
+  else
+c = b - a;
+  return c;
+}
+int abs0(int a, int b)
+{
+  int c = a - b;
+  if (c <= 0) c = b - a;
+  return c;
+}
+int negabs(int a, int b)
+{
+  int c = a - b;
+  if (c >= 0) c = b - a;
+  return c;
+}
+
+/* The above should be optimized at phiopt1 except for negabs which has to wait
+  until phiopt2 as -abs is not acceptable in early phiopt.  */
+/* { dg-final { scan-tree-dump-times "if" 1  "phiopt1"  } } */
+/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */
-- 
2.39.3



Re: [PATCH] gcc.c-torture/execute/builtins/fputs.c: Define _GNU_SOURCE

2023-10-22 Thread Andrew Pinski
On Sun, Oct 22, 2023 at 12:47 AM Florian Weimer  wrote:
>
> Current glibc headers only declare fputs_unlocked for _GNU_SOURCE.
> Defining the macro avoids an implicit function declaration.

This does not help targets that don't use glibc though.
Note for builtins testsuite there is a lib-fputs.c file which will
define a fputs_unlock which is how it will link even if the libc does
not define a fputs_unlock.

Thanks,
Andrew Pinski

>
> gcc/testsuite/
>
> * gcc.c-torture/execute/builtins/fputs.c (_GNU_SOURCE):
> Define.
>
> ---
>  gcc/testsuite/gcc.c-torture/execute/builtins/fputs.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/fputs.c 
> b/gcc/testsuite/gcc.c-torture/execute/builtins/fputs.c
> index 93fa9736449..13e30724355 100644
> --- a/gcc/testsuite/gcc.c-torture/execute/builtins/fputs.c
> +++ b/gcc/testsuite/gcc.c-torture/execute/builtins/fputs.c
> @@ -5,6 +5,7 @@
>
> Written by Kaveh R. Ghazi, 10/30/2000.  */
>
> +#define _GNU_SOURCE /* For fputs_unlocked.  */
>  #include 
>  extern void abort(void);
>
>


[PATCH] Use error_mark_node after error in convert

2023-10-22 Thread Andrew Pinski
While working on PR c/111903, I Noticed that
convert will convert integer_zero_node to that
type after an error instead of returning error_mark_node.
>From what I can tell this was the old way of not having
error recovery since other places in this file does return
error_mark_node and the places I am replacing date from
when the file was imported into the repro (either via a gcc2 merge
or earlier).

I also had to update the objc front-end to allow for the error_mark_node
change, I suspect you could hit the ICE without this change though.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* convert.cc (convert_to_pointer_1): Return error_mark_node
after an error.
(convert_to_real_1): Likewise.
(convert_to_integer_1): Likewise.
(convert_to_complex_1): Likewise.

gcc/objc/ChangeLog:

* objc-gnu-runtime-abi-01.cc (build_objc_method_call): Allow
for error_operand after call to build_c_cast.
* objc-next-runtime-abi-01.cc (build_objc_method_call): Likewise.
* objc-next-runtime-abi-02.cc (build_v2_build_objc_method_call): 
Likewise.
---
 gcc/convert.cc   | 12 ++--
 gcc/objc/objc-gnu-runtime-abi-01.cc  |  3 +++
 gcc/objc/objc-next-runtime-abi-01.cc |  3 +++
 gcc/objc/objc-next-runtime-abi-02.cc |  3 +++
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/gcc/convert.cc b/gcc/convert.cc
index 5357609d8f0..ac6af7026a7 100644
--- a/gcc/convert.cc
+++ b/gcc/convert.cc
@@ -96,7 +96,7 @@ convert_to_pointer_1 (tree type, tree expr, bool fold_p)
 
 default:
   error ("cannot convert to a pointer type");
-  return convert_to_pointer_1 (type, integer_zero_node, fold_p);
+  return error_mark_node;
 }
 }
 
@@ -332,11 +332,11 @@ convert_to_real_1 (tree type, tree expr, bool fold_p)
 case POINTER_TYPE:
 case REFERENCE_TYPE:
   error ("pointer value used where a floating-point was expected");
-  return convert_to_real_1 (type, integer_zero_node, fold_p);
+  return error_mark_node;
 
 default:
   error ("aggregate value used where a floating-point was expected");
-  return convert_to_real_1 (type, integer_zero_node, fold_p);
+  return error_mark_node;
 }
 }
 
@@ -959,7 +959,7 @@ convert_to_integer_1 (tree type, tree expr, bool dofold)
 
 default:
   error ("aggregate value used where an integer was expected");
-  return convert (type, integer_zero_node);
+  return error_mark_node;
 }
 }
 
@@ -1053,11 +1053,11 @@ convert_to_complex_1 (tree type, tree expr, bool fold_p)
 case POINTER_TYPE:
 case REFERENCE_TYPE:
   error ("pointer value used where a complex was expected");
-  return convert_to_complex_1 (type, integer_zero_node, fold_p);
+  return error_mark_node;
 
 default:
   error ("aggregate value used where a complex was expected");
-  return convert_to_complex_1 (type, integer_zero_node, fold_p);
+  return error_mark_node;
 }
 }
 
diff --git a/gcc/objc/objc-gnu-runtime-abi-01.cc 
b/gcc/objc/objc-gnu-runtime-abi-01.cc
index fbf8307297a..6f45283b307 100644
--- a/gcc/objc/objc-gnu-runtime-abi-01.cc
+++ b/gcc/objc/objc-gnu-runtime-abi-01.cc
@@ -700,6 +700,9 @@ build_objc_method_call (location_t loc, int super_flag, 
tree method_prototype,
 
   lookup_object = build_c_cast (loc, rcv_p, lookup_object);
 
+  if (error_operand_p (lookup_object))
+return error_mark_node;
+
   /* Use SAVE_EXPR to avoid evaluating the receiver twice.  */
   lookup_object = save_expr (lookup_object);
 
diff --git a/gcc/objc/objc-next-runtime-abi-01.cc 
b/gcc/objc/objc-next-runtime-abi-01.cc
index 70ab5262e17..9e28976043e 100644
--- a/gcc/objc/objc-next-runtime-abi-01.cc
+++ b/gcc/objc/objc-next-runtime-abi-01.cc
@@ -846,6 +846,9 @@ build_objc_method_call (location_t loc, int super_flag, 
tree method_prototype,
 
   lookup_object = build_c_cast (loc, rcv_p, lookup_object);
 
+  if (error_operand_p (lookup_object))
+return error_mark_node;
+
   /* Use SAVE_EXPR to avoid evaluating the receiver twice.  */
   lookup_object = save_expr (lookup_object);
 
diff --git a/gcc/objc/objc-next-runtime-abi-02.cc 
b/gcc/objc/objc-next-runtime-abi-02.cc
index 6548c0078e0..723b47c9cf6 100644
--- a/gcc/objc/objc-next-runtime-abi-02.cc
+++ b/gcc/objc/objc-next-runtime-abi-02.cc
@@ -1729,6 +1729,9 @@ build_v2_build_objc_method_call (int super, tree 
method_prototype,
 
   lookup_object = build_c_cast (loc, rcv_p, lookup_object);
 
+  if (error_operand_p (lookup_object))
+return error_mark_node;
+
   /* Use SAVE_EXPR to avoid evaluating the receiver twice.  */
   lookup_object = save_expr (lookup_object);
 
-- 
2.39.3



[Committedv2] aarch64: [PR110986] Emit csinv again for `a ? ~b : b`

2023-10-22 Thread Andrew Pinski
After r14-3110-g7fb65f10285, the canonical form for
`a ? ~b : b` changed to be `-(a) ^ b` that means
for aarch64 we need to add a few new insn patterns
to be able to catch this and change it to be
what is the canonical form for the aarch64 backend.
A secondary pattern was needed to support a zero_extended
form too; this adds a testcase for all 3 cases.

Bootstrapped and tested on aarch64-linux-gnu with no regressions.

Committed as approved.

PR target/110986

gcc/ChangeLog:

* config/aarch64/aarch64.md (*cmov_insn_insv): New pattern.
(*cmov_uxtw_insn_insv): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/cond_op-1.c: New test.
---
 gcc/config/aarch64/aarch64.md| 47 
 gcc/testsuite/gcc.target/aarch64/cond_op-1.c | 20 +
 2 files changed, 67 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/cond_op-1.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index e6af09c2e8b..5bb8c772be8 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4413,6 +4413,53 @@ (define_insn "*csinv3_uxtw_insn3"
   [(set_attr "type" "csel")]
 )
 
+;; There are two canonical forms for `cmp ? ~a : a`.
+;; This is the second form and is here to help combine.
+;; Support `-(cmp) ^ a` into `cmp ? ~a : a`
+;; The second pattern is to support the zero extend'ed version.
+
+(define_insn_and_split "*cmov_insn_insv"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+(xor:GPI
+(neg:GPI
+ (match_operator:GPI 1 "aarch64_comparison_operator"
+  [(match_operand 2 "cc_register" "") (const_int 0)]))
+(match_operand:GPI 3 "general_operand" "r")))]
+  ""
+  "#"
+  "&& true"
+  [(set (match_dup 0)
+   (if_then_else:GPI (match_dup 1)
+ (not:GPI (match_dup 3))
+ (match_dup 3)))]
+  {
+/* After reload this will be a nop due to the constraint.  */
+operands[3] = force_reg (mode, operands[3]);
+  }
+  [(set_attr "type" "csel")]
+)
+
+(define_insn_and_split "*cmov_uxtw_insn_insv"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+(zero_extend:DI
+(xor:SI
+ (neg:SI
+  (match_operator:SI 1 "aarch64_comparison_operator"
+   [(match_operand 2 "cc_register" "") (const_int 0)]))
+ (match_operand:SI 3 "general_operand" "r"]
+  "can_create_pseudo_p ()"
+  "#"
+  "&& true"
+  [(set (match_dup 0)
+   (if_then_else:DI (match_dup 1)
+ (zero_extend:DI (not:SI (match_dup 3)))
+ (zero_extend:DI (match_dup 3]
+  {
+operands[3] = force_reg (SImode, operands[3]);
+  }
+  [(set_attr "type" "csel")]
+)
+
 ;; If X can be loaded by a single CNT[BHWD] instruction,
 ;;
 ;;A = UMAX (B, X)
diff --git a/gcc/testsuite/gcc.target/aarch64/cond_op-1.c 
b/gcc/testsuite/gcc.target/aarch64/cond_op-1.c
new file mode 100644
index 000..e6c7821127e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/cond_op-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* PR target/110986 */
+
+
+long long full(unsigned a, unsigned b)
+{
+  return a ? ~b : b;
+}
+unsigned fuu(unsigned a, unsigned b)
+{
+  return a ? ~b : b;
+}
+long long f(unsigned long long a, unsigned long long b)
+{
+  return a ? ~b : b;
+}
+
+/* { dg-final { scan-assembler-times "csinv\tw\[0-9\]*" 2 } } */
+/* { dg-final { scan-assembler-times "csinv\tx\[0-9\]*" 1 } } */
-- 
2.39.3



[PATCH] match: Fix the `popcnt(a&b) + popcnt(a|b)` patthern for types [PR111913]

2023-10-23 Thread Andrew Pinski
So this pattern needs a little help on the gimple side of things to know what
the type popcount should be. For most builtins, the type is the same as the 
input
but popcount and others are not. And when using it with another outer 
expression,
genmatch needs some slight help to know that the return type was type rather 
than
the argument type.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/111913

gcc/ChangeLog:

* match.pd (`popcount(X&Y) + popcount(X|Y)`): Add the resulting
type for popcount.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/fold-popcount-1.c: New test.
* gcc.dg/fold-popcount-8a.c: New test.
---
 gcc/match.pd  |  2 +-
 .../gcc.c-torture/compile/fold-popcount-1.c   | 13 
 gcc/testsuite/gcc.dg/fold-popcount-8a.c   | 33 +++
 3 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/fold-popcount-1.c
 create mode 100644 gcc/testsuite/gcc.dg/fold-popcount-8a.c

diff --git a/gcc/match.pd b/gcc/match.pd
index ce8d159d260..f725a685863 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8600,7 +8600,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* popcount(X&Y) + popcount(X|Y) is popcount(x) + popcount(Y).  */
 (simplify
   (plus:c (POPCOUNT:s (bit_and:s @0 @1)) (POPCOUNT:s (bit_ior:cs @0 @1)))
-  (plus (POPCOUNT @0) (POPCOUNT @1)))
+  (plus (POPCOUNT:type @0) (POPCOUNT:type @1)))
 
 /* popcount(X) + popcount(Y) - popcount(X&Y) is popcount(X|Y).  */
 /* popcount(X) + popcount(Y) - popcount(X|Y) is popcount(X&Y).  */
diff --git a/gcc/testsuite/gcc.c-torture/compile/fold-popcount-1.c 
b/gcc/testsuite/gcc.c-torture/compile/fold-popcount-1.c
new file mode 100644
index 000..d3d3a2976e0
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/fold-popcount-1.c
@@ -0,0 +1,13 @@
+/* PR tree-optimization/111913 */
+
+int f(unsigned int x, unsigned int y)
+{
+  return __builtin_popcount (x&y) + __builtin_popcount (y|x--);
+}
+
+int f2(unsigned int x, unsigned int y)
+{
+  int t = __builtin_popcount (x&y);
+  int t1 = __builtin_popcount (x|y);
+  return t + t1;
+}
diff --git a/gcc/testsuite/gcc.dg/fold-popcount-8a.c 
b/gcc/testsuite/gcc.dg/fold-popcount-8a.c
new file mode 100644
index 000..3001522f259
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-popcount-8a.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo1(unsigned int x, unsigned int y)
+{
+  int t = __builtin_popcount (x&y);
+  int t1 = __builtin_popcount (x|y);
+  return t + t1;
+}
+
+int foo2(unsigned int x, unsigned int y)
+{
+  int t1 = __builtin_popcount (x|y);
+  int t = __builtin_popcount (x&y);
+  return t + t1;
+}
+
+int foo3(unsigned int y, unsigned int x)
+{
+  int t = __builtin_popcount (x&y);
+  int t1 = __builtin_popcount (x|y);
+  return t + t1;
+}
+
+int foo4(unsigned int y, unsigned int x)
+{
+  int t1 = __builtin_popcount (x|y);
+  int t = __builtin_popcount (x&y);
+  return t + t1;
+}
+
+/* { dg-final { scan-tree-dump-not " & " "optimized" } } */
+/* { dg-final { scan-tree-dump-not " \\| " "optimized" } } */
-- 
2.39.3



Re: Inquiry about ARM gcc5 CVE-2023-4039 Patch

2023-10-23 Thread Andrew Pinski
On Mon, Oct 23, 2023 at 7:54 PM 老小孩老小孩  wrote:
>
> Dear arms,
>
> I hope this message finds you well.
>
> I am writing to inquire about the issue of ARM gcc5 CVE-2023-4039. According 
> to the advisory on GitHub 
> (https://github.com/metaredteam/external-disclosures/security/advisories/GHSA-x7ch-h5rf-w2mf),
>  this bug affects versions from 5.4.0 to the trunk as of May 15, 2023.
>
> However, I noticed that currently, patches are only provided for gcc7 and 
> above, as per the information available on the ARM Security Center 
> (https://developer.arm.com/Arm%20Security%20Center/GCC%20Stack%20Protector%20Vulnerability%20AArch64).
>
> Given the potential impact of this vulnerability, I am particularly 
> interested in a patch for gcc5. Could you please provide information on 
> whether a patch for gcc5 is available or planned? If not, could you suggest 
> any possible workarounds or mitigation strategies for systems that are 
> currently using gcc5?
>
> I appreciate your attention to this matter and look forward to your response.

THIS should NEVER have been a security CVE in the first place.
This is not a security issue with any correct code that GCC will process.
GCC does not consider this a security issue according to its security policy.
See the "Security features implemented in GCC" section of
https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=SECURITY.txt;hb=HEAD
for more information on that policy.

Thanks,
Andrew Pinski

>
> Best regards,


[PATCHv2] Improve factor_out_conditional_operation for conversions and constants

2023-10-23 Thread Andrew Pinski
In the case of a NOP conversion (precisions of the 2 types are equal),
factoring out the conversion can be done even if int_fits_type_p returns
false and even when the conversion is defined by a statement inside the
conditional. Since it is a NOP conversion there is no zero/sign extending
happening which is why it is ok to be done here; we were trying to prevent
an extra sign/zero extend from being moved away from definition which no-op
conversions are not.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR tree-optimization/104376
PR tree-optimization/101541
* tree-ssa-phiopt.cc (factor_out_conditional_operation):
Allow nop conversions even if it is defined by a statement
inside the conditional.

gcc/testsuite/ChangeLog:

PR tree-optimization/101541
* gcc.dg/tree-ssa/phi-opt-39.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-39.c | 43 ++
 gcc/tree-ssa-phiopt.cc | 16 ++--
 2 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-39.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-39.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-39.c
new file mode 100644
index 000..6b6006a96db
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-39.c
@@ -0,0 +1,43 @@
+/* { dg-options "-O2 -fdump-tree-phiopt" } */
+
+unsigned f0(int A)
+{
+// A == 0? A : -Asame as -A
+  if (A == 0)  return A;
+  return -A;
+}
+
+unsigned f1(int A)
+{
+// A != 0? A : -Asame as A
+  if (A != 0)  return A;
+  return -A;
+}
+unsigned f2(int A)
+{
+// A >= 0? A : -Asame as abs (A)
+  if (A >= 0)  return A;
+  return -A;
+}
+unsigned f3(int A)
+{
+// A > 0?  A : -Asame as abs (A)
+  if (A > 0)  return A;
+  return -A;
+}
+unsigned f4(int A)
+{
+// A <= 0? A : -Asame as -abs (A)
+  if (A <= 0)  return A;
+  return -A;
+}
+unsigned f5(int A)
+{
+// A < 0?  A : -Asame as -abs (A)
+  if (A < 0)  return A;
+  return -A;
+}
+
+/* f4 and f5 are not allowed to be optimized in early phi-opt. */
+/* { dg-final { scan-tree-dump-times "if" 2 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 312a6f9082b..bb55a4fba33 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -310,7 +310,9 @@ factor_out_conditional_operation (edge e0, edge e1, gphi 
*phi,
return NULL;
   /* If arg1 is an INTEGER_CST, fold it to new type.  */
   if (INTEGRAL_TYPE_P (TREE_TYPE (new_arg0))
- && int_fits_type_p (arg1, TREE_TYPE (new_arg0)))
+ && (int_fits_type_p (arg1, TREE_TYPE (new_arg0))
+ || (TYPE_PRECISION (TREE_TYPE (new_arg0))
+  == TYPE_PRECISION (TREE_TYPE (arg1)
{
  if (gimple_assign_cast_p (arg0_def_stmt))
{
@@ -322,8 +324,12 @@ factor_out_conditional_operation (edge e0, edge e1, gphi 
*phi,
 if arg0_def_stmt is the only non-debug stmt in
 its basic block, because then it is possible this
 could enable further optimizations (minmax replacement
-etc.).  See PR71016.  */
- if (new_arg0 != gimple_cond_lhs (cond_stmt)
+etc.).  See PR71016.
+Note no-op conversions don't have this issue as
+it will not generate any zero/sign extend in that case.  */
+ if ((TYPE_PRECISION (TREE_TYPE (new_arg0))
+   != TYPE_PRECISION (TREE_TYPE (arg1)))
+ && new_arg0 != gimple_cond_lhs (cond_stmt)
  && new_arg0 != gimple_cond_rhs (cond_stmt)
  && gimple_bb (arg0_def_stmt) == e0->src)
{
@@ -354,6 +360,10 @@ factor_out_conditional_operation (edge e0, edge e1, gphi 
*phi,
return NULL;
}
  new_arg1 = fold_convert (TREE_TYPE (new_arg0), arg1);
+
+ /* Drop the overlow that fold_convert might add. */
+ if (TREE_OVERFLOW (new_arg1))
+   new_arg1 = drop_tree_overflow (new_arg1);
}
  else
return NULL;
-- 
2.34.1



[PATCH] match: Simplify `a != C1 ? abs(a) : C2` when C2 == abs(C1) [PR111957]

2023-10-24 Thread Andrew Pinski
This adds a match pattern for `a != C1 ? abs(a) : C2` which gets simplified
to `abs(a)`. if C1 was originally *_MIN then change it over to use absu instead
of abs.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/111957

gcc/ChangeLog:

* match.pd (`a != C1 ? abs(a) : C2`): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-40.c: New test.
---
 gcc/match.pd   | 10 +
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c | 25 ++
 2 files changed, 35 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 5df04ebba77..370ee35de52 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5622,6 +5622,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (wi::eq_p (wi::bit_not (wi::to_wide (@1)), wi::to_wide (@2)))
   @3))
 
+/* X != C1 ? abs(X) : C2 simplifies to abs(x) when abs(C1) == C2. */
+(for op (abs absu)
+ (simplify
+  (cond (ne @0 INTEGER_CST@1) (op@3 @0) INTEGER_CST@2)
+  (if (wi::abs (wi::to_wide (@1)) == wi::to_wide (@2))
+   (if (op != ABSU_EXPR && wi::only_sign_bit_p (wi::to_wide (@1)))
+(with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
+ (convert (absu:utype @0)))
+@3
+
 /* (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when
X is unsigned, as when X + 1 overflows, X is -1, so -X == 1.  */
 (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
new file mode 100644
index 000..a9011ce97fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-phiopt" } */
+/* PR tree-optimization/111957 */
+
+int f(int a)
+{
+  if (a)
+return a > 0 ? a : -a;
+  return 0;
+}
+
+int f1(int x)
+{
+  int intmin = (-1u >> 1);
+  intmin = -intmin - 1;
+  if (x != intmin)
+return x > 0 ? x : -x;
+  return intmin;
+}
+
+/* { dg-final { scan-tree-dump-times "if " 1 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-not "if " "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 2 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times "ABSU_EXPR <" 1 "phiopt2" } } */
-- 
2.34.1



[PATCH] Improve tree_expr_nonnegative_p by using the ranger [PR111959]

2023-10-24 Thread Andrew Pinski
I noticed we were missing optimizing `a / (1 << b)` when
we know that a is nonnegative but only due to ranger information.
This adds the use of the global ranger to tree_single_nonnegative_warnv_p
for SSA_NAME.
I didn't extend tree_single_nonnegative_warnv_p to use the ranger for floating
point nor to use the local ranger since I am not 100% sure it is safe where
all of the uses tree_expr_nonnegative_p would be safe.

Note pr80776-1.c testcase fails again due to vrp's bad handling of setting
global ranges from __builtin_unreachable. It just happened to be optimized
before due to global ranges not being used as much.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/111959

gcc/ChangeLog:

* fold-const.cc (tree_single_nonnegative_warnv_p): Use
the global range to see if the SSA_NAME was nonnegative.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/forwprop-42.c: New test.
* gcc.dg/pr80776-1.c: xfail and update comment.
---
 gcc/fold-const.cc   | 36 +++--
 gcc/testsuite/gcc.dg/pr80776-1.c|  8 ++---
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c | 15 +
 3 files changed, 46 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 40767736389..2a2a90230f5 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -15047,15 +15047,33 @@ tree_single_nonnegative_warnv_p (tree t, bool 
*strict_overflow_p, int depth)
   return RECURSE (TREE_OPERAND (t, 1)) && RECURSE (TREE_OPERAND (t, 2));
 
 case SSA_NAME:
-  /* Limit the depth of recursion to avoid quadratic behavior.
-This is expected to catch almost all occurrences in practice.
-If this code misses important cases that unbounded recursion
-would not, passes that need this information could be revised
-to provide it through dataflow propagation.  */
-  return (!name_registered_for_update_p (t)
- && depth < param_max_ssa_name_query_depth
- && gimple_stmt_nonnegative_warnv_p (SSA_NAME_DEF_STMT (t),
- strict_overflow_p, depth));
+  {
+   /* For integral types, querry the global range if possible. */
+   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
+ {
+   value_range vr;
+   if (get_global_range_query ()->range_of_expr (vr, t)
+   && !vr.varying_p () && !vr.undefined_p ())
+ {
+   /* If the range is nonnegative, return true. */
+   if (vr.nonnegative_p ())
+ return true;
+
+   /* If the range is non-positive, then return false. */
+   if (vr.nonpositive_p ())
+ return false;
+ }
+ }
+   /* Limit the depth of recursion to avoid quadratic behavior.
+  This is expected to catch almost all occurrences in practice.
+  If this code misses important cases that unbounded recursion
+  would not, passes that need this information could be revised
+  to provide it through dataflow propagation.  */
+   return (!name_registered_for_update_p (t)
+   && depth < param_max_ssa_name_query_depth
+   && gimple_stmt_nonnegative_warnv_p (SSA_NAME_DEF_STMT (t),
+   strict_overflow_p, depth));
+  }
 
 default:
   return tree_simple_nonnegative_warnv_p (TREE_CODE (t), TREE_TYPE (t));
diff --git a/gcc/testsuite/gcc.dg/pr80776-1.c b/gcc/testsuite/gcc.dg/pr80776-1.c
index b9bce62d982..f3d47aeda36 100644
--- a/gcc/testsuite/gcc.dg/pr80776-1.c
+++ b/gcc/testsuite/gcc.dg/pr80776-1.c
@@ -18,14 +18,14 @@ Foo (void)
   if (! (0 <= i && i <= 99))
 __builtin_unreachable ();
 
-  /* Legacy evrp sets the range of i to [0, MAX] *before* the first 
conditional,
+  /* vrp1 sets the range of i to [0, MAX] *before* the first conditional,
  and to [0,99] *before* the second conditional.  This is because both
- evrp and VRP use trickery to set global ranges when this particular use of
+ vrp use trickery to set global ranges when this particular use of
  a __builtin_unreachable is in play (see uses of
  assert_unreachable_fallthru_edge_p).
 
- Setting these ranges at the definition site, causes VRP to remove the
+ Setting these ranges at the definition site, causes other passes to 
remove the
  unreachable code altogether, leaving the following sprintf unguarded.  
This
  causes the bogus warning below.  */
-  sprintf (number, "%d", i); /* { dg-bogus "writing" "" } */
+  sprintf (number, "%d", i); /* { dg-bogus "writing" "" { xfail *-*-* } } */
 }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
new file mode 100644
index 000..4e5421ed4d4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/for

Re: [PATCH] match: Simplify `a != C1 ? abs(a) : C2` when C2 == abs(C1) [PR111957]

2023-10-26 Thread Andrew Pinski
On Thu, Oct 26, 2023 at 2:24 AM Richard Biener
 wrote:
>
> On Wed, Oct 25, 2023 at 5:37 AM Andrew Pinski  wrote:
> >
> > This adds a match pattern for `a != C1 ? abs(a) : C2` which gets simplified
> > to `abs(a)`. if C1 was originally *_MIN then change it over to use absu 
> > instead
> > of abs.
> >
> > Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> > PR tree-optimization/111957
> >
> > gcc/ChangeLog:
> >
> > * match.pd (`a != C1 ? abs(a) : C2`): New pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/tree-ssa/phi-opt-40.c: New test.
> > ---
> >  gcc/match.pd   | 10 +
> >  gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c | 25 ++
> >  2 files changed, 35 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 5df04ebba77..370ee35de52 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -5622,6 +5622,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   (if (wi::eq_p (wi::bit_not (wi::to_wide (@1)), wi::to_wide (@2)))
> >@3))
> >
> > +/* X != C1 ? abs(X) : C2 simplifies to abs(x) when abs(C1) == C2. */
> > +(for op (abs absu)
> > + (simplify
> > +  (cond (ne @0 INTEGER_CST@1) (op@3 @0) INTEGER_CST@2)
> > +  (if (wi::abs (wi::to_wide (@1)) == wi::to_wide (@2))
>
> Why not use
>
>   (cond (ne @0 INTEGER_CST@1) (op@3 @0) @1)

Because that does not work for:
`a != -1 ? abs(a) : -1`
We want to match -1 and 1 here (in that order).
It just happens 0 and INT_MIN have the same value as their abs and
matches but we could expand it to match all others too.

Thanks,
Andrew

>
> ?  OK with that change.
>
> Richard.
>
> > +   (if (op != ABSU_EXPR && wi::only_sign_bit_p (wi::to_wide (@1)))
> > +(with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
> > + (convert (absu:utype @0)))
> > +@3
> > +
> >  /* (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when
> > X is unsigned, as when X + 1 overflows, X is -1, so -X == 1.  */
> >  (simplify
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
> > new file mode 100644
> > index 000..a9011ce97fb
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-40.c
> > @@ -0,0 +1,25 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O1 -fdump-tree-phiopt" } */
> > +/* PR tree-optimization/111957 */
> > +
> > +int f(int a)
> > +{
> > +  if (a)
> > +return a > 0 ? a : -a;
> > +  return 0;
> > +}
> > +
> > +int f1(int x)
> > +{
> > +  int intmin = (-1u >> 1);
> > +  intmin = -intmin - 1;
> > +  if (x != intmin)
> > +return x > 0 ? x : -x;
> > +  return intmin;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "if " 1 "phiopt1" } } */
> > +/* { dg-final { scan-tree-dump-not "if " "phiopt2" } } */
> > +/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 2 "phiopt1" } } */
> > +/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 1 "phiopt2" } } */
> > +/* { dg-final { scan-tree-dump-times "ABSU_EXPR <" 1 "phiopt2" } } */
> > --
> > 2.34.1
> >


Re: [PATCH] Improve tree_expr_nonnegative_p by using the ranger [PR111959]

2023-10-26 Thread Andrew Pinski
On Thu, Oct 26, 2023 at 2:29 AM Richard Biener
 wrote:
>
> On Wed, Oct 25, 2023 at 5:51 AM Andrew Pinski  wrote:
> >
> > I noticed we were missing optimizing `a / (1 << b)` when
> > we know that a is nonnegative but only due to ranger information.
> > This adds the use of the global ranger to tree_single_nonnegative_warnv_p
> > for SSA_NAME.
> > I didn't extend tree_single_nonnegative_warnv_p to use the ranger for 
> > floating
> > point nor to use the local ranger since I am not 100% sure it is safe where
> > all of the uses tree_expr_nonnegative_p would be safe.
> >
> > Note pr80776-1.c testcase fails again due to vrp's bad handling of setting
> > global ranges from __builtin_unreachable. It just happened to be optimized
> > before due to global ranges not being used as much.
> >
> > Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> > PR tree-optimization/111959
> >
> > gcc/ChangeLog:
> >
> > * fold-const.cc (tree_single_nonnegative_warnv_p): Use
> > the global range to see if the SSA_NAME was nonnegative.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/tree-ssa/forwprop-42.c: New test.
> > * gcc.dg/pr80776-1.c: xfail and update comment.
> > ---
> >  gcc/fold-const.cc   | 36 +++--
> >  gcc/testsuite/gcc.dg/pr80776-1.c|  8 ++---
> >  gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c | 15 +
> >  3 files changed, 46 insertions(+), 13 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
> >
> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> > index 40767736389..2a2a90230f5 100644
> > --- a/gcc/fold-const.cc
> > +++ b/gcc/fold-const.cc
> > @@ -15047,15 +15047,33 @@ tree_single_nonnegative_warnv_p (tree t, bool 
> > *strict_overflow_p, int depth)
> >return RECURSE (TREE_OPERAND (t, 1)) && RECURSE (TREE_OPERAND (t, 
> > 2));
> >
> >  case SSA_NAME:
> > -  /* Limit the depth of recursion to avoid quadratic behavior.
> > -This is expected to catch almost all occurrences in practice.
> > -If this code misses important cases that unbounded recursion
> > -would not, passes that need this information could be revised
> > -to provide it through dataflow propagation.  */
> > -  return (!name_registered_for_update_p (t)
> > - && depth < param_max_ssa_name_query_depth
> > - && gimple_stmt_nonnegative_warnv_p (SSA_NAME_DEF_STMT (t),
> > - strict_overflow_p, 
> > depth));
> > +  {
> > +   /* For integral types, querry the global range if possible. */
>
> query
>
> > +   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
> > + {
> > +   value_range vr;
> > +   if (get_global_range_query ()->range_of_expr (vr, t)
> > +   && !vr.varying_p () && !vr.undefined_p ())
> > + {
> > +   /* If the range is nonnegative, return true. */
> > +   if (vr.nonnegative_p ())
> > + return true;
> > +
> > +   /* If the range is non-positive, then return false. */
> > +   if (vr.nonpositive_p ())
> > + return false;
>
> That's testing for <= 0, nonnegative for >= 0.  This means when
> vr.nonpositive_p () the value could still be zero (and nonnegative),
> possibly be figured out by the recursion below.
>
> Since we don't have negative_p () do we want to test
> nonpositive_p () && nonzero_p () instead?

I was thinking about that when I was writing the patch.
If the ranger figured out the value was zero, nonnegative_p would have
returned true.
So while yes nonpositive_p() would return true but we already checked
nonnegative_p beforehand and the nonzero_p would not matter.
Now the question is if after nonnegative_p we check if the range could
contain 0 still is that worth the recursion. The whole idea of
returning false was to remove the need from recursion as much.

Thanks,
Andrew


>
> OK with that change.
>
> Richard.
>
> > + }
> > + }
> > +   /* Limit the depth of recursion to avoid quadratic behavior.
> > +  This is expected to catch almost all occurrences in practice.
> > +  If this code misses important cases that unbounded recursion
> > +  would not, passes that need this information could be revised
> > +  to

Re: Ping: [PATCH v2 0/2] Replace intl/ with out-of-tree GNU gettext

2023-10-26 Thread Andrew Pinski
On Sun, Oct 15, 2023 at 12:40 PM Arsen Arsenović  wrote:
>
> Evening,
>
> Arsen Arsenović  writes:
>
> > Afternoon,
> >
> > This patch is a rebase and rewording of
> > https://inbox.sourceware.org/20230925150921.894157-1-ar...@aarsen.me/
> >
> > Changes since v1:
> > - Implement Brunos suggested changes to install.texi.
> > - Elaborate commit message in p2 (as requested by the Binutils
> >   maintainers).
> >
> > Arsen Arsenović (2):
> >   intl: remove, in favor of out-of-tree gettext
> >   *: add modern gettext
> >
>
> Ping on this patch series.

One comment from me. It would be nice to update install.texi in
gcc/doc/ to make a mention of this requirement for non-glibc hosts.

Thanks,
Andrew Pinski

>
> TIA, have a lovely night :-)
> --
> Arsen Arsenović


Re: [PATCH] testsuite, aarch64: Normalise options to aarch64.exp.

2023-10-26 Thread Andrew Pinski
On Thu, Oct 26, 2023 at 11:58 AM Iain Sandoe  wrote:
>
> tested on cfarm185 (aarch64-linux-gnu, xgene1) and with the aarch64
> Darwin prototype.  It is possible that some initial fallout could occur
> on some test setups (where the default has been catered for in some
> way) - but that should stabilize.  OK for trunk?

This fixes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93619 I think.

Thanks,
Andrew

> thanks
> Iain
>
> --- 8< ---
>
> When the compiler is configured --with-cpu= and that is different from
> the baselines assumed, we see excess test fails (primarly in body code
> scans which are necessarily sensitive to costs).  To stabilize the
> testsuite against such changes, use aarch64-with-arch-dg-options ()
> to provide suitable consistent defaults.
>
> e.g. for --with-cpu=xgene1 we see over 100 excess fails which are
> removed by this change.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/aarch64.exp: Use aarch64-with-arch-dg-options
> to normaize the options to the tests in aarch64.exp.
>
> Signed-off-by: Iain Sandoe 
> ---
>  gcc/testsuite/gcc.target/aarch64/aarch64.exp | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/aarch64.exp 
> b/gcc/testsuite/gcc.target/aarch64/aarch64.exp
> index bd306e3b288..7612ea704e5 100644
> --- a/gcc/testsuite/gcc.target/aarch64/aarch64.exp
> +++ b/gcc/testsuite/gcc.target/aarch64/aarch64.exp
> @@ -37,9 +37,10 @@ if ![info exists DEFAULT_CFLAGS] then {
>  # Initialize `dg'.
>  dg-init
>
> -# Main loop.
> -dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
> -   "" $DEFAULT_CFLAGS
> -
> +aarch64-with-arch-dg-options "" {
> +# Main loop.
> +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
> +   "" $DEFAULT_CFLAGS
> +}
>  # All done.
>  dg-finish
> --
> 2.39.2 (Apple Git-143)
>


Re: [PATCH htdocs v2] bugs: Mention -D_GLIBCXX_ASSERTIONS and -D_GLIBCXX_DEBUG

2023-10-26 Thread Andrew Pinski
On Thu, Oct 26, 2023 at 12:18 PM Sam James  wrote:
>
> These options both enabled more checking within the C++ standard library
> and can expose errors in submitted code.
>
> -D_GLIBCXX_DEBUG is mentioned separately because while we want people to try 
> it,
> it's not always feasible because it requires the whole program and any used
> libraries to also be built with it (as it breaks ABI).

One suggestion to this is also link to the libstdc++ manual on debug mode:
https://gcc.gnu.org/onlinedocs/libstdc++/manual/debug_mode.html

Thanks,
Andrew

>
> Signed-off-by: Sam James 
> ---
> v2: Improve phrasing for the types of checks and be less scornful about ABI 
> changes.
>
>  htdocs/bugs/index.html | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/htdocs/bugs/index.html b/htdocs/bugs/index.html
> index da3d4c0d..162d846a 100644
> --- a/htdocs/bugs/index.html
> +++ b/htdocs/bugs/index.html
> @@ -56,6 +56,13 @@ makes a difference, or if compiling with 
> -fsanitize=undefined
>  produces any run-time errors, then your code is probably not correct.
>  
>
> +We also ask that for C++ code, users test their programs with
> +-D_GLIBCXX_ASSERTIONS. If you're able to rebuild the entire
> +program (including any libraries it uses, because it changes ABI), please do 
> try
> +-D_GLIBCXX_DEBUG which enables more thorough checking in parts 
> of
> +the C++ standard library. If either of these fail, this is a strong indicator
> +of an error in your code.
> +
>  Summarized bug reporting instructions
>
>  After this summary, you'll find detailed instructions that explain
> --
> 2.42.0
>


[PATCH] MATCH: Simplify `(X &| B) CMP X` if possible [PR 101590]

2023-10-26 Thread Andrew Pinski
From: Andrew Pinski 

I noticed we were missing these simplifications so let's add them.

This adds the following simplifications:
U & N <= U  -> true
U & N >  U  -> false
When U is known to be as non-negative.

When N is also known to be non-negative, this is also true:
U | N <  U  -> false
U | N >= U  -> true

When N is a negative integer, the result flips and we get:
U | N <  U  -> true
U | N >= U  -> false

We could extend this later on to be the case where we know N
is nonconstant but is known to be negative.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/101590
PR tree-optimization/94884

gcc/ChangeLog:

* match.pd (`(X BIT_OP Y) CMP X`): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bitcmp-1.c: New test.
* gcc.dg/tree-ssa/bitcmp-2.c: New test.
* gcc.dg/tree-ssa/bitcmp-3.c: New test.
* gcc.dg/tree-ssa/bitcmp-4.c: New test.
* gcc.dg/tree-ssa/bitcmp-5.c: New test.
* gcc.dg/tree-ssa/bitcmp-6.c: New test.
---
 gcc/match.pd | 24 +
 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-1.c | 20 +++
 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-2.c | 20 +++
 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-3.c | 21 
 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-4.c | 36 
 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-5.c | 43 
 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-6.c | 41 ++
 7 files changed, 205 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-4.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-5.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-6.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 5f6aeb07ac0..7d651a6582d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2707,6 +2707,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (TREE_INT_CST_LOW (@1) & 1)
{ constant_boolean_node (cmp == NE_EXPR, type); })))
 
+/*
+   U & N <= U  -> true
+   U & N >  U  -> false
+   U needs to be non-negative.
+
+   U | N <  U  -> false
+   U | N >= U  -> true
+   U and N needs to be non-negative
+
+   U | N <  U  -> true
+   U | N >= U  -> false
+   U needs to be non-negative and N needs to be a negative constant.
+   */
+(for cmp   (lt  ge  le  gt )
+ bitop (bit_ior bit_ior bit_and bit_and)
+ (simplify
+  (cmp:c (bitop:c tree_expr_nonnegative_p@0 @1) @0)
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)))
+   (if (bitop == BIT_AND_EXPR || tree_expr_nonnegative_p (@1))
+{ constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); }
+/* The sign is opposite now so the comparison is swapped around. */
+(if (TREE_CODE (@1) == INTEGER_CST && wi::neg_p (wi::to_wide (@1)))
+ { constant_boolean_node (cmp == LT_EXPR, type); })
+
 /* Arguments on which one can call get_nonzero_bits to get the bits
possibly set.  */
 (match with_possible_nonzero_bits
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitcmp-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitcmp-1.c
new file mode 100644
index 000..f3d515bb2d6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitcmp-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/101590 */
+
+int f_and_le(unsigned len) {
+  const unsigned N = 4;
+  unsigned newlen = len & -N;
+  return newlen <= len; // return 1
+}
+int f_or_ge(unsigned len) {
+  const unsigned N = 4;
+  unsigned newlen = len | -N;
+  return newlen >= len; // return 1
+}
+
+/* { dg-final { scan-tree-dump-not " <= " "optimized" } } */
+/* { dg-final { scan-tree-dump-not " >= " "optimized" } } */
+/* { dg-final { scan-tree-dump-not " & "  "optimized" } } */
+/* { dg-final { scan-tree-dump-not " \\\| " "optimized" } } */
+/* { dg-final { scan-tree-dump-times "return 1;" 2 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitcmp-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitcmp-2.c
new file mode 100644
index 000..d0031d9ecb8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitcmp-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/101590 */
+
+int f_and_gt(unsigned len) {
+  const unsigned N = 4;
+  unsigned newlen = len & -N;
+  return newlen > len; // return 0
+}
+int f_or_lt(unsigned len) {
+  const unsigned N = 4;
+  unsigned newlen = len | -N;
+  return newlen < len; // return 0
+}
+
+/* { dg-final { scan-tree-dump-not " > " "optimized" } } */
+/* { d

Re: [PATCH] MATCH: Simplify `(X &| B) CMP X` if possible [PR 101590]

2023-10-27 Thread Andrew Pinski
On Thu, Oct 26, 2023 at 11:56 PM Richard Biener
 wrote:
>
>
>
> > Am 26.10.2023 um 23:10 schrieb Andrew Pinski :
> >
> > From: Andrew Pinski 
> >
> > I noticed we were missing these simplifications so let's add them.
> >
> > This adds the following simplifications:
> > U & N <= U  -> true
> > U & N >  U  -> false
> > When U is known to be as non-negative.
> >
> > When N is also known to be non-negative, this is also true:
> > U | N <  U  -> false
> > U | N >= U  -> true
> >
> > When N is a negative integer, the result flips and we get:
> > U | N <  U  -> true
> > U | N >= U  -> false
>
> I think bit-CCP should get this, does ranger also figure this out (iirc it 
> tracks nonzero bits?)
>
> Your testcases suggest this doesn’t happen, can you figure out why CCP 
> doesn’t optimize this and maybe file a bug?

CCP and ranger is able to figure when N is a negative constant.
Otherwise no. I only added this to the testcase/match because I
originally messed up that case while working on the patch and noticed
different answers.

Thanks,
Andrew

>
> > We could extend this later on to be the case where we know N
> > is nonconstant but is known to be negative.
> >
> > Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> Ok
>
> Richard
>
> >PR tree-optimization/101590
> >PR tree-optimization/94884
> >
> > gcc/ChangeLog:
> >
> >* match.pd (`(X BIT_OP Y) CMP X`): New pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> >* gcc.dg/tree-ssa/bitcmp-1.c: New test.
> >* gcc.dg/tree-ssa/bitcmp-2.c: New test.
> >* gcc.dg/tree-ssa/bitcmp-3.c: New test.
> >* gcc.dg/tree-ssa/bitcmp-4.c: New test.
> >* gcc.dg/tree-ssa/bitcmp-5.c: New test.
> >* gcc.dg/tree-ssa/bitcmp-6.c: New test.
> > ---
> > gcc/match.pd | 24 +
> > gcc/testsuite/gcc.dg/tree-ssa/bitcmp-1.c | 20 +++
> > gcc/testsuite/gcc.dg/tree-ssa/bitcmp-2.c | 20 +++
> > gcc/testsuite/gcc.dg/tree-ssa/bitcmp-3.c | 21 
> > gcc/testsuite/gcc.dg/tree-ssa/bitcmp-4.c | 36 
> > gcc/testsuite/gcc.dg/tree-ssa/bitcmp-5.c | 43 
> > gcc/testsuite/gcc.dg/tree-ssa/bitcmp-6.c | 41 ++
> > 7 files changed, 205 insertions(+)
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-1.c
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-2.c
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-3.c
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-4.c
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-5.c
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitcmp-6.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 5f6aeb07ac0..7d651a6582d 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -2707,6 +2707,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   (if (TREE_INT_CST_LOW (@1) & 1)
> >{ constant_boolean_node (cmp == NE_EXPR, type); })))
> >
> > +/*
> > +   U & N <= U  -> true
> > +   U & N >  U  -> false
> > +   U needs to be non-negative.
> > +
> > +   U | N <  U  -> false
> > +   U | N >= U  -> true
> > +   U and N needs to be non-negative
> > +
> > +   U | N <  U  -> true
> > +   U | N >= U  -> false
> > +   U needs to be non-negative and N needs to be a negative constant.
> > +   */
> > +(for cmp   (lt  ge  le  gt )
> > + bitop (bit_ior bit_ior bit_and bit_and)
> > + (simplify
> > +  (cmp:c (bitop:c tree_expr_nonnegative_p@0 @1) @0)
> > +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)))
> > +   (if (bitop == BIT_AND_EXPR || tree_expr_nonnegative_p (@1))
> > +{ constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); }
> > +/* The sign is opposite now so the comparison is swapped around. */
> > +(if (TREE_CODE (@1) == INTEGER_CST && wi::neg_p (wi::to_wide (@1)))
> > + { constant_boolean_node (cmp == LT_EXPR, type); })
> > +
> > /* Arguments on which one can call get_nonzero_bits to get the bits
> >possibly set.  */
> > (match with_possible_nonzero_bits
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitcmp-1.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/bitcmp-1.c
> > new file mode 100644
> > index 000..f3d515bb2d6
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitcmp-1.c
> > @@ -0,0 +1,20 @@
> &

Re: [PATCH] testsuite, Darwin: Add support for Mach-O function body scans.

2023-10-27 Thread Andrew Pinski
On Fri, Oct 27, 2023 at 4:00 AM Iain Sandoe  wrote:
>
> Hi Richard,
>
> > On 26 Oct 2023, at 21:00, Iain Sandoe  wrote:
>
> >> On 26 Oct 2023, at 20:49, Richard Sandiford  
> >> wrote:
> >>
> >> Iain Sandoe  writes:
> >>> This was written before Thomas' modification to the ELF-handling to allow
> >>> a config-based change for target details.  I did consider updating this
> >>> to try and use that scheme, but I think that it would sit a little
> >>> awkwardly, since there are some differences in the start-up scanning for
> >>> Mach-O.  I would say that in all probability we could improve things but
> >>> I'd like to put this forward as a well-tested initial implementation.
> >>
> >> Sorry, I would prefer to extend the existing function instead.
> >> E.g. there's already some divergence between the Mach-O version
> >> and the default version, in that the Mach-O version doesn't print
> >> verbose messages.  I also don't think that the current default code
> >> is so watertight that it'll never need to be updated in future.
> >
> > Fair enough, will explore what can be done (as I recall last I looked the
> > primary difference was in the initial start-up scan).
>
> I’ve done this as attached.
>
> For the record, when doing it, it gave rise to the same misgivings that led
> to the separate implementation before.
>
>  * as we add formats and uncover asm oddities, they all need to be handled
>in one set of code, IMO it could be come quite convoluted.
>
>  * now making a change to the MACH-O code, means I have to check I did not
>inadvertently break ELF (and likewise, in theory, an ELF change should 
> check
>MACH-O, but many folks do/can not do that).
>
> Maybe there’s some half-way-house where code can usefully be shared without
> those down-sides.

There is already gcc.test-framework which seems like a good place to
put a test for both formats so when someone changes the function, they
could run that testsuite to make sure it is still working for the
other format.
(Note I am not saying you should add it as part of this patch but it
seems like that would be the perfect place for it.)

Thanks,
Andrew

>
> Anyway, to make progress, is the revised version OK for trunk? (tested on
> aarch64-linux and aarch64-darwin).
> thanks
> Iain
>
>
>


[PATCH 0/3] start of moving value replacement from phiopt to match

2023-10-29 Thread Andrew Pinski
This set of 3 patches, copy what is being done in value replacement and
puts it into match-and-simplify form. I will be rewriting value_replacement
in phiopt to use match and simplify directly in the next few months but
I thought getting these into match form earlier on can help improve code
generation independently of that move.

Note this does not add the absorbing_element_p optimizations yet; I filed PR 
112271
to record that move.

Andrew Pinski (3):
  MATCH: first of the value replacement moving from phiopt
  MATCH: Move jump_function_from_stmt support to match.pd
  MATCH: Add some more value_replacement simplifications to match

 gcc/match.pd  | 53 +++
 .../analyzer/inlining-3-multiline.c   |  5 +-
 .../c-c++-common/analyzer/inlining-3.c|  3 ++
 gcc/testsuite/gcc.dg/tree-ssa/cond-1.c| 17 ++
 .../gcc.dg/tree-ssa/phi-opt-value-1.c | 17 ++
 .../gcc.dg/tree-ssa/phi-opt-value-1a.c| 19 +++
 .../gcc.dg/tree-ssa/phi-opt-value-2.c | 19 +++
 .../gcc.dg/tree-ssa/phi-opt-value-3.c | 22 
 .../gcc.dg/tree-ssa/phi-opt-value-4.c | 36 +
 gcc/testsuite/gcc.dg/tree-ssa/vrp03.c |  2 +-
 10 files changed, 191 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cond-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1a.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-4.c

-- 
2.39.3



[PATCH 2/3] MATCH: Move jump_function_from_stmt support to match.pd

2023-10-29 Thread Andrew Pinski
This moves the value_replacement support for jump_function_from_stmt
to match pattern.
This allows us to optimize things earlier in phiopt1 rather than waiting
to phiopt2. Which means phiopt1 needs to be disable for vrp03.c testcase.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* match.pd (PTR == 0 ? 0 : &PTR->field): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/vrp03.c: Disable phiopt1.
* c-c++-common/analyzer/inlining-3-multiline.c: Likewise.
* c-c++-common/analyzer/inlining-3.c: Likewise.
* gcc.dg/tree-ssa/phi-opt-value-3.c: New testcase.
---
 gcc/match.pd  | 21 ++
 .../analyzer/inlining-3-multiline.c   |  5 -
 .../c-c++-common/analyzer/inlining-3.c|  3 +++
 .../gcc.dg/tree-ssa/phi-opt-value-3.c | 22 +++
 gcc/testsuite/gcc.dg/tree-ssa/vrp03.c |  2 +-
 5 files changed, 51 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-3.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 22899c51a2f..9bc945ccada 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4159,6 +4159,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cond (eq @0 integer_zerop) @1 (op@2 @1 @0))
@2))
 
+/* PTR == 0 ? 0 : &PTR->field -> PTR if field offset was 0. */
+(simplify
+ (cond (eq @0 integer_zerop) integer_zerop ADDR_EXPR@1)
+ (with {
+   poly_int64 offset;
+   tree res = NULL_TREE;
+   tree tem = @1;
+   if (TREE_CODE (tem) == SSA_NAME)
+ if (gassign *def = dyn_cast  (SSA_NAME_DEF_STMT (tem)))
+   if (gimple_assign_rhs_code (def) == ADDR_EXPR)
+ tem = gimple_assign_rhs1 (def);
+
+   if (TREE_CODE (tem) == ADDR_EXPR)
+ res = get_addr_base_and_unit_offset (TREE_OPERAND (tem, 0), &offset);
+  }
+  (if (res
+   && TREE_CODE (res) == MEM_REF
+   && known_eq (mem_ref_offset (res) + offset, 0)
+   && operand_equal_p (TREE_OPERAND (res, 0), @0))
+   (convert @0
+
 /* Simplifications of shift and rotates.  */
 
 (for rotate (lrotate rrotate)
diff --git a/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c 
b/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c
index fbd20e949b6..9741b91abee 100644
--- a/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c
+++ b/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c
@@ -3,6 +3,9 @@
 
 /* { dg-additional-options "-O2 -fdiagnostics-show-path-depths" } */
 /* { dg-additional-options "-fdiagnostics-path-format=inline-events 
-fdiagnostics-show-caret" } */
+/* Disable phi-opt1 because get_input_file_name gets optimized to just
+   `return inpf;`. */
+/* { dg-additional-options "-fdisable-tree-phiopt1" } */
 
 #include "../../gcc.dg/analyzer/analyzer-decls.h"
 typedef __SIZE_TYPE__ size_t;
@@ -96,4 +99,4 @@ test (const input_file *inpf)
 |   (4) ...to here
 |   (5) argument 1 ('') NULL where 
non-null expected
 |
-   { dg-end-multiline-output "" { target c++ } } */
\ No newline at end of file
+   { dg-end-multiline-output "" { target c++ } } */
diff --git a/gcc/testsuite/c-c++-common/analyzer/inlining-3.c 
b/gcc/testsuite/c-c++-common/analyzer/inlining-3.c
index 0345585bed2..2b2b4858d45 100644
--- a/gcc/testsuite/c-c++-common/analyzer/inlining-3.c
+++ b/gcc/testsuite/c-c++-common/analyzer/inlining-3.c
@@ -2,6 +2,9 @@
after early inlining.  */
 
 /* { dg-additional-options "-O2 -fdiagnostics-show-path-depths" } */
+/* Disable phi-opt1 because get_input_file_name gets optimized to just
+   `return inpf;`. */
+/* { dg-additional-options "-fdisable-tree-phiopt1" } */
 
 #include "../../gcc.dg/analyzer/analyzer-decls.h"
 typedef __SIZE_TYPE__ size_t;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-3.c
new file mode 100644
index 000..ad55bd288b9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-3.c
@@ -0,0 +1,22 @@
+/* { dg-do compile }  */
+/* { dg-options "-O1 -fdump-tree-optimized" }  */
+struct a
+{
+int b[1];
+};
+
+int call1(int *a);
+
+int f(struct a *b)
+{
+  int *c = b->b;
+  int t = call1(c);
+  int *d;
+  if (b) d = b->b; else d = 0;
+  int t1 = call1(d);
+  return t+t1;
+}
+
+/* There should be no if statement and 2 calls to call1. */
+/* { dg-final { scan-tree-dump-not "if " "optimized" } } */
+/* { dg-final { scan-tree-dump-times "call1 " 2 "optimized"  } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c 
b/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c
index 4cbaca41332..1adbf33cad3 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1 -fno-thread-jumps" } 
*/
+/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1 -fno-thread-jumps 
-fdisable-tree-phiopt1" } */
 
 struct A
 {
-- 
2.39.3



[PATCH 3/3] MATCH: Add some more value_replacement simplifications to match

2023-10-29 Thread Andrew Pinski
This moves a few more value_replacements simplifications to match.
/* a == 1 ? b : a * b -> a * b */
/* a == 1 ? b : b / a  -> b / a */
/* a == -1 ? b : a & b -> a & b */

Also adds a testcase to show can we catch these where value_replacement would 
not
(but other passes would).

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* match.pd (`a == 1 ? b : a OP b`): New pattern.
(`a == -1 ? b : a & b`): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-value-4.c: New test.
---
 gcc/match.pd  | 18 ++
 .../gcc.dg/tree-ssa/phi-opt-value-4.c | 36 +++
 2 files changed, 54 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-4.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 9bc945ccada..6efa97cc6ae 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4159,6 +4159,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cond (eq @0 integer_zerop) @1 (op@2 @1 @0))
@2))
 
+/* a == 1 ? b : b / a  -> b / a */
+(for op (trunc_div ceil_div floor_div round_div exact_div)
+ (simplify
+  (cond (eq @0 integer_onep) @1 (op@2 @1 @0))
+   @2))
+
+/* a == 1 ? b : a * b -> a * b */
+(for op (mult)
+ (simplify
+  (cond (eq @0 integer_onep) @1 (op:c@2 @1 @0))
+   @2))
+
+/* a == -1 ? b : a & b -> a & b */
+(for op (bit_and)
+ (simplify
+  (cond (eq @0 integer_all_onesp) @1 (op:c@2 @1 @0))
+   @2))
+
 /* PTR == 0 ? 0 : &PTR->field -> PTR if field offset was 0. */
 (simplify
  (cond (eq @0 integer_zerop) integer_zerop ADDR_EXPR@1)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-4.c
new file mode 100644
index 000..380082cb463
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-4.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-fre3 -fdump-tree-phiopt1 
-fdump-tree-optimized" } */
+
+[[gnu::const]]
+int constcall(int);
+
+int fdiv(int a, int b)
+{
+  int c = b/a;
+  int t = constcall(c);
+  int d;
+  if (a == 1) d = b; else d = c;
+  return constcall(d) + t;
+}
+int fmult(int a, int b)
+{
+  int c = b*a;
+  int t = constcall(c);
+  int d;
+  if (a == 1) d = b; else d = c;
+  return constcall(d) + t;
+}
+int fand(int a, int b)
+{
+  int c = b&a;
+  int t = constcall(c);
+  int d;
+  if (a == -1) d = b; else d = c;
+  return constcall(d) + t;
+}
+
+/* Should be able to optimize away the if statements in phiopt1. */
+/* { dg-final { scan-tree-dump-not "if " "phiopt1" } } */
+/* fre3 should be optimize each function to just `return constcall(a OP b) * 
2;`. */
+/* { dg-final { scan-tree-dump-times "constcall " 3 "fre3" } } */
+/* { dg-final { scan-tree-dump-times "constcall " 3 "optimized" } } */
-- 
2.39.3



[PATCH 1/3] MATCH: first of the value replacement moving from phiopt

2023-10-29 Thread Andrew Pinski
This moves a few simple patterns that are done in value replacement
in phiopt over to match.pd. Just the simple ones which might show up
in other code.

This allows some optimizations to happen even without depending
on sinking from happening and in some cases where phiopt is not
invoked (cond-1.c is an example there).

Changes since v1:
* v2: Add an extra testcase to showcase improvements at -O1.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* match.pd: (`a == 0 ? b : b + a`,
`a == 0 ? b : b - a`): New patterns.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/cond-1.c: New test.
* gcc.dg/tree-ssa/phi-opt-value-1.c: New test.
* gcc.dg/tree-ssa/phi-opt-value-1a.c: New test.
* gcc.dg/tree-ssa/phi-opt-value-2.c: New test.
---
 gcc/match.pd  | 14 ++
 gcc/testsuite/gcc.dg/tree-ssa/cond-1.c| 17 +
 .../gcc.dg/tree-ssa/phi-opt-value-1.c | 17 +
 .../gcc.dg/tree-ssa/phi-opt-value-1a.c| 19 +++
 .../gcc.dg/tree-ssa/phi-opt-value-2.c | 19 +++
 5 files changed, 86 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cond-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1a.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-2.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 7d651a6582d..22899c51a2f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4145,6 +4145,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& (INTEGRAL_TYPE_P (TREE_TYPE (@0
(op (mult (convert:type @0) @2) @1
 
+/* ?: Value replacement. */
+/* a == 0 ? b : b + a  -> b + a */
+(for op (plus bit_ior bit_xor)
+ (simplify
+  (cond (eq @0 integer_zerop) @1 (op:c@2 @1 @0))
+   @2))
+/* a == 0 ? b : b - a  -> b - a */
+/* a == 0 ? b : b ptr+ a  -> b ptr+ a */
+/* a == 0 ? b : b shift/rotate a -> b shift/rotate a */
+(for op (lrotate rrotate lshift rshift minus pointer_plus)
+ (simplify
+  (cond (eq @0 integer_zerop) @1 (op@2 @1 @0))
+   @2))
+
 /* Simplifications of shift and rotates.  */
 
 (for rotate (lrotate rrotate)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cond-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cond-1.c
new file mode 100644
index 000..478a818b206
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cond-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized-raw" } */
+
+int sub(int a, int b, int c, int d) {
+  int e = (a == 0);
+  int f = !e;
+  c = b;
+  d = b - a ;
+  return ((-e & c) | (-f & d));
+}
+
+/* In the end we end up with `(a == 0) ? (b - a) : b`
+   which then can be optimized to just `(b - a)`. */
+
+/* { dg-final { scan-tree-dump-not "cond_expr," "optimized" } } */
+/* { dg-final { scan-tree-dump-not "eq_expr," "optimized" } } */
+/* { dg-final { scan-tree-dump-times "minus_expr," 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1.c
new file mode 100644
index 000..a90de8926c6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* Phi-OPT should be able to optimize this without sinking being invoked. */
+/* { dg-options "-O -fdump-tree-phiopt2 -fdump-tree-optimized -fno-tree-sink" 
} */
+
+char *f(char *a, __SIZE_TYPE__ b) {
+  char *d = a + b;
+  if (b == 0) return a;
+  return d;
+}
+int sub(int a, int b, int c) {
+  int d = a - b;
+  if (b == 0) return a;
+  return d;
+}
+
+/* { dg-final { scan-tree-dump-not "goto" "phiopt2" } } */
+/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1a.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1a.c
new file mode 100644
index 000..b884f94ddd2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1a.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+[[gnu::const]]
+int constcall(int);
+
+int f(int a, int b)
+{
+  int c = b+a;
+  int t = constcall(c);
+  int d;
+  if (a == 0) d= b; else d= c;
+  return constcall(d) + t;
+}
+
+/* There should be no if statement and 2 calls to call1. */
+/* { dg-final { scan-tree-dump-not "if " "optimized" } } */
+/* { dg-final { scan-tree-dump-times "constcall " 1 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-2.c
new file mode 100644
index 000..809ccfe1479
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* Phi-OPT should be able to optimize this without sinking being invoked. */
+/* { dg-options "-O -fdump-tree-phiopt2 -fdump-tree-optimized -fno-tree-sink" 
} */
+
+int f(int a, int b, int c) {
+  int d = a + b;
+  if (c > 5) return c;
+  if (a == 0) return 

Re: [PATCH] Testsuite, i386: Fix test by passing -march

2023-10-30 Thread Andrew Pinski
On Mon, Oct 30, 2023 at 5:05 AM Iain Sandoe  wrote:
>
>
>
> > On 30 Oct 2023, at 11:53, FX Coudert  wrote:
>
> > The newly introduced test gcc.target/i386/pr111698.c currently fails on 
> > Darwin, where the default arch is core2.
> > Andrew suggested in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112287 to 
> > pass a recent value to -march, and I can confirm that it fixes the 
> > testsuite failure on x86_64-apple-darwin21.
> >
> > OK to push?
>
> Fine from a Darwin perspective,
> we could also make it ...
> dg-additional-options “ -march=sandybridge” { target *-*-darwin* }
> … if that is deemed less invasive.

Well It can fail on x86_64-linux-gnu too if GCC was configured with
--with-arch=core2 for an example.
So having it, in this case, not being darwin specific would be
beneficial for all x86_64/i?86 targets.

Thanks,
Andrew

>
> Iain
>
>


Re: [PATCH 2/3] MATCH: Move jump_function_from_stmt support to match.pd

2023-10-30 Thread Andrew Pinski
On Mon, Oct 30, 2023 at 2:29 AM Richard Biener
 wrote:
>
> On Sun, Oct 29, 2023 at 5:41 PM Andrew Pinski  wrote:
> >
> > This moves the value_replacement support for jump_function_from_stmt
> > to match pattern.
> > This allows us to optimize things earlier in phiopt1 rather than waiting
> > to phiopt2. Which means phiopt1 needs to be disable for vrp03.c testcase.
> >
> > Bootstrapped and tested on x86_64-linux-gnu.
>
> Do we need to make sure to only do this after pass_early_object_sizes
> at least?  IIRC early PHI-opt didn't do value-replacement, so maybe
> even after late object-size?  There's PROP_objsz, but no
> function similar to optimize_vectors_before_lowering_p in
> {generic,gimple}-match-head.cc

Let me look into that.
But I suspect any which way we might end up with the same issue as the
problems you found in PR 112266 really.
So I am going to put this patch on the backburner for now (but still
look into this and the fall out from PR 112266 ).

Thanks,
Andrew

>
> Richard.
>
> > gcc/ChangeLog:
> >
> > * match.pd (PTR == 0 ? 0 : &PTR->field): New pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/tree-ssa/vrp03.c: Disable phiopt1.
> > * c-c++-common/analyzer/inlining-3-multiline.c: Likewise.
> > * c-c++-common/analyzer/inlining-3.c: Likewise.
> > * gcc.dg/tree-ssa/phi-opt-value-3.c: New testcase.
> > ---
> >  gcc/match.pd  | 21 ++
> >  .../analyzer/inlining-3-multiline.c   |  5 -
> >  .../c-c++-common/analyzer/inlining-3.c|  3 +++
> >  .../gcc.dg/tree-ssa/phi-opt-value-3.c | 22 +++
> >  gcc/testsuite/gcc.dg/tree-ssa/vrp03.c |  2 +-
> >  5 files changed, 51 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-3.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 22899c51a2f..9bc945ccada 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -4159,6 +4159,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >(cond (eq @0 integer_zerop) @1 (op@2 @1 @0))
> > @2))
> >
> > +/* PTR == 0 ? 0 : &PTR->field -> PTR if field offset was 0. */
> > +(simplify
> > + (cond (eq @0 integer_zerop) integer_zerop ADDR_EXPR@1)
> > + (with {
> > +   poly_int64 offset;
> > +   tree res = NULL_TREE;
> > +   tree tem = @1;
> > +   if (TREE_CODE (tem) == SSA_NAME)
> > + if (gassign *def = dyn_cast  (SSA_NAME_DEF_STMT (tem)))
> > +   if (gimple_assign_rhs_code (def) == ADDR_EXPR)
> > + tem = gimple_assign_rhs1 (def);
> > +
> > +   if (TREE_CODE (tem) == ADDR_EXPR)
> > + res = get_addr_base_and_unit_offset (TREE_OPERAND (tem, 0), &offset);
> > +  }
> > +  (if (res
> > +   && TREE_CODE (res) == MEM_REF
> > +   && known_eq (mem_ref_offset (res) + offset, 0)
> > +   && operand_equal_p (TREE_OPERAND (res, 0), @0))
> > +   (convert @0
> > +
> >  /* Simplifications of shift and rotates.  */
> >
> >  (for rotate (lrotate rrotate)
> > diff --git a/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c 
> > b/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c
> > index fbd20e949b6..9741b91abee 100644
> > --- a/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c
> > +++ b/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c
> > @@ -3,6 +3,9 @@
> >
> >  /* { dg-additional-options "-O2 -fdiagnostics-show-path-depths" } */
> >  /* { dg-additional-options "-fdiagnostics-path-format=inline-events 
> > -fdiagnostics-show-caret" } */
> > +/* Disable phi-opt1 because get_input_file_name gets optimized to just
> > +   `return inpf;`. */
> > +/* { dg-additional-options "-fdisable-tree-phiopt1" } */
> >
> >  #include "../../gcc.dg/analyzer/analyzer-decls.h"
> >  typedef __SIZE_TYPE__ size_t;
> > @@ -96,4 +99,4 @@ test (const input_file *inpf)
> >  |   (4) ...to here
> >  |   (5) argument 1 ('') NULL where 
> > non-null expected
> >  |
> > -   { dg-end-multiline-output "" { target c++ } } */
> > \ No newline at end of file
> > +   { dg-end-multiline-output "" { target c++ } } */
> > diff --git a/gcc/testsuite/c-c++-common/analyzer/inlining-3.c 
> > b/gcc/testsuite/c-c++-common/analyzer/inlining-3.c
> > index 0345585bed2..2b2b4858d45 100644
> > --- a/gcc/

Re: [PATCH 1/2] match.pd: Support combine cond_len_op + vec_cond similar to cond_op

2023-10-30 Thread Andrew Pinski
On Wed, Sep 20, 2023 at 6:10 AM Lehua Ding  wrote:
>
> This patch adds combine cond_len_op and vec_cond to cond_len_op like
> cond_op.
>
> gcc/ChangeLog:
>
> * gimple-match.h (gimple_match_op::gimple_match_op):
> Add interfaces for more arguments.
> (gimple_match_op::set_op): Add interfaces for more arguments.
> * match.pd: Add support of combining cond_len_op + vec_cond
> ---
>  gcc/gimple-match.h | 72 ++
>  gcc/match.pd   | 39 +
>  2 files changed, 111 insertions(+)
>
> diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h
> index bec3ff42e3e..9892c142285 100644
> --- a/gcc/gimple-match.h
> +++ b/gcc/gimple-match.h
> @@ -92,6 +92,10 @@ public:
>code_helper, tree, tree, tree, tree, tree);
>gimple_match_op (const gimple_match_cond &,
>code_helper, tree, tree, tree, tree, tree, tree);
> +  gimple_match_op (const gimple_match_cond &,
> +  code_helper, tree, tree, tree, tree, tree, tree, tree);
> +  gimple_match_op (const gimple_match_cond &,
> +  code_helper, tree, tree, tree, tree, tree, tree, tree, 
> tree);
>
>void set_op (code_helper, tree, unsigned int);
>void set_op (code_helper, tree, tree);
> @@ -100,6 +104,8 @@ public:
>void set_op (code_helper, tree, tree, tree, tree, bool);
>void set_op (code_helper, tree, tree, tree, tree, tree);
>void set_op (code_helper, tree, tree, tree, tree, tree, tree);
> +  void set_op (code_helper, tree, tree, tree, tree, tree, tree, tree);
> +  void set_op (code_helper, tree, tree, tree, tree, tree, tree, tree, tree);
>void set_value (tree);
>
>tree op_or_null (unsigned int) const;
> @@ -212,6 +218,39 @@ gimple_match_op::gimple_match_op (const 
> gimple_match_cond &cond_in,
>ops[4] = op4;
>  }
>
> +inline
> +gimple_match_op::gimple_match_op (const gimple_match_cond &cond_in,
> + code_helper code_in, tree type_in,
> + tree op0, tree op1, tree op2, tree op3,
> + tree op4, tree op5)
> +  : cond (cond_in), code (code_in), type (type_in), reverse (false),
> +num_ops (6)
> +{
> +  ops[0] = op0;
> +  ops[1] = op1;
> +  ops[2] = op2;
> +  ops[3] = op3;
> +  ops[4] = op4;
> +  ops[5] = op5;
> +}

Hmm, does it make sense to start to use variadic templates for these
constructors instead of writing them out?
And we can even add a static_assert to make sure the number of
arguments is <= MAX_NUM_OPS to make sure they are correct. And use
std::is_same to make sure we are only passing tree types.

Thanks,
Andrew

> +
> +inline
> +gimple_match_op::gimple_match_op (const gimple_match_cond &cond_in,
> + code_helper code_in, tree type_in,
> + tree op0, tree op1, tree op2, tree op3,
> + tree op4, tree op5, tree op6)
> +  : cond (cond_in), code (code_in), type (type_in), reverse (false),
> +num_ops (7)
> +{
> +  ops[0] = op0;
> +  ops[1] = op1;
> +  ops[2] = op2;
> +  ops[3] = op3;
> +  ops[4] = op4;
> +  ops[5] = op5;
> +  ops[6] = op6;
> +}
> +
>  /* Change the operation performed to CODE_IN, the type of the result to
> TYPE_IN, and the number of operands to NUM_OPS_IN.  The caller needs
> to set the operands itself.  */
> @@ -299,6 +338,39 @@ gimple_match_op::set_op (code_helper code_in, tree 
> type_in,
>ops[4] = op4;
>  }
>
> +inline void
> +gimple_match_op::set_op (code_helper code_in, tree type_in,
> +tree op0, tree op1, tree op2, tree op3, tree op4,
> +tree op5)
> +{
> +  code = code_in;
> +  type = type_in;
> +  num_ops = 6;
> +  ops[0] = op0;
> +  ops[1] = op1;
> +  ops[2] = op2;
> +  ops[3] = op3;
> +  ops[4] = op4;
> +  ops[5] = op5;
> +}
> +
> +inline void
> +gimple_match_op::set_op (code_helper code_in, tree type_in,
> +tree op0, tree op1, tree op2, tree op3, tree op4,
> +tree op5, tree op6)
> +{
> +  code = code_in;
> +  type = type_in;
> +  num_ops = 7;
> +  ops[0] = op0;
> +  ops[1] = op1;
> +  ops[2] = op2;
> +  ops[3] = op3;
> +  ops[4] = op4;
> +  ops[5] = op5;
> +  ops[6] = op6;
> +}
> +
>  /* Set the "operation" to be the single value VALUE, such as a constant
> or SSA_NAME.  */
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index a37af05f873..75b7e100120 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -103,12 +103,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>IFN_COND_FMIN IFN_COND_FMAX
>IFN_COND_AND IFN_COND_IOR IFN_COND_XOR
>IFN_COND_SHL IFN_COND_SHR)
> +(define_operator_list COND_LEN_BINARY
> +  IFN_COND_LEN_ADD IFN_COND_LEN_SUB
> +  IFN_COND_LEN_MUL IFN_COND_LEN_DIV
> +  IFN_COND_LEN_MOD IFN_COND_LEN_RDIV
> +  IFN_COND_LEN_MIN IFN_COND_LEN_MAX
> +  IFN_COND_LEN_FMIN IFN_COND_LEN_FMAX
> +  IFN_COND_LEN_AND IFN_COND_LEN_IOR IFN_COND_LEN_XO

Re: [PATCH 1/2] match.pd: Support combine cond_len_op + vec_cond similar to cond_op

2023-10-31 Thread Andrew Pinski
On Tue, Oct 31, 2023 at 12:08 AM Lehua Ding  wrote:
>
> Hi Andrew,
>
> On 2023/10/31 14:48, Andrew Pinski wrote:
> >> +inline
> >> +gimple_match_op::gimple_match_op (const gimple_match_cond &cond_in,
> >> + code_helper code_in, tree type_in,
> >> + tree op0, tree op1, tree op2, tree op3,
> >> + tree op4, tree op5)
> >> +  : cond (cond_in), code (code_in), type (type_in), reverse (false),
> >> +num_ops (6)
> >> +{
> >> +  ops[0] = op0;
> >> +  ops[1] = op1;
> >> +  ops[2] = op2;
> >> +  ops[3] = op3;
> >> +  ops[4] = op4;
> >> +  ops[5] = op5;
> >> +}
> > Hmm, does it make sense to start to use variadic templates for these
> > constructors instead of writing them out?
> > And we can even add a static_assert to make sure the number of
> > arguments is <= MAX_NUM_OPS to make sure they are correct. And use
> > std::is_same to make sure we are only passing tree types.
>
> You mean something like this?:
>
> template
> inline
> gimple_match_op::gimple_match_op (const gimple_match_cond &cond_in,
>code_helper code_in, tree type_in,
>   op_types... ops)
>: cond (cond_in), code (code_in), type (type_in), reverse (false),
>  num_ops (sizeof...(ops))
> {
>static_assert (sizeof...(ops) <= MAX_NUM_OPS);
>auto op_list[] = {ops...};
>for (int i = 0; i < sizeof...(ops); i++)
>  this->ops[i] = op_list[i];
> }

Yes and maybe use tree for the type of op_list instead of auto.
I suspect this code was originally written before GCC was written in C++11.
Maybe if this code is being compiled with C++20 we could do something like:
#include 
template< std::same_as... op_types>

To get a decent error message earlier ...

Thanks,
Andrew

>
> --
> Best,
> Lehua (RiVAI)
> lehua.d...@rivai.ai


Re: [PATCH] ifcvt/vect: Emit COND_ADD for conditional scalar reduction.

2023-11-02 Thread Andrew Pinski
On Wed, Sep 20, 2023 at 6:52 AM Robin Dapp  wrote:
>
> Hi,
>
> as described in PR111401 we currently emit a COND and a PLUS expression
> for conditional reductions.  This makes it difficult to combine both
> into a masked reduction statement later.
> This patch improves that by directly emitting a COND_ADD during ifcvt and
> adjusting some vectorizer code to handle it.
>
> It also makes neutral_op_for_reduction return -0 if HONOR_SIGNED_ZEROS
> is true.
>
> Related question/change: We only allow PLUS_EXPR in fold_left_reduction_fn
> but have code to handle MINUS_EXPR in vectorize_fold_left_reduction.  I
> suppose that's intentional but it "just works" on riscv and the testsuite
> doesn't change when allowing MINUS_EXPR so I went ahead and did that.
>
> Bootstrapped and regtested on x86 and aarch64.

This caused gcc.target/i386/avx512f-reduce-op-1.c testcase to start to
fail when testing on a x86_64 that has avx512f (In my case I am using
`Intel(R) Xeon(R) D-2166NT CPU @ 2.00GHz`).  I reverted the commit to
double check it too.

The difference in optimized I see is:
  if (_40 != 3.5e+1) // working
vs
  if (_40 != 6.4e+1) // not working

It is test_epi32_ps which is failing with TEST_PS macro and the plus
operand that uses TESTOP:
TESTOP (add, +, float, ps, 0.0f);       \

I have not reduced the testcase any further though.

Thanks,
Andrew Pinski


>
> Regards
>  Robin
>
> gcc/ChangeLog:
>
> PR middle-end/111401
> * internal-fn.cc (cond_fn_p): New function.
> * internal-fn.h (cond_fn_p): Define.
> * tree-if-conv.cc (convert_scalar_cond_reduction): Emit COND_ADD
> if supported.
> (predicate_scalar_phi): Add whitespace.
> * tree-vect-loop.cc (fold_left_reduction_fn): Add IFN_COND_ADD.
> (neutral_op_for_reduction): Return -0 for PLUS.
> (vect_is_simple_reduction): Don't count else operand in
> COND_ADD.
> (vectorize_fold_left_reduction): Add COND_ADD handling.
> (vectorizable_reduction): Don't count else operand in COND_ADD.
> (vect_transform_reduction): Add COND_ADD handling.
> * tree-vectorizer.h (neutral_op_for_reduction): Add default
> parameter.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c: New test.
> * gcc.target/riscv/rvv/autovec/cond/pr111401.c: New test.
> ---
>  gcc/internal-fn.cc|  38 +
>  gcc/internal-fn.h |   1 +
>  .../vect-cond-reduc-in-order-2-signed-zero.c  | 141 ++
>  .../riscv/rvv/autovec/cond/pr111401.c |  61 
>  gcc/tree-if-conv.cc   |  63 ++--
>  gcc/tree-vect-loop.cc | 130 
>  gcc/tree-vectorizer.h |   2 +-
>  7 files changed, 394 insertions(+), 42 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/pr111401.c
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 0fd34359247..77939890f5a 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4241,6 +4241,44 @@ first_commutative_argument (internal_fn fn)
>  }
>  }
>
> +/* Return true if this CODE describes a conditional (masked) internal_fn.  */
> +
> +bool
> +cond_fn_p (code_helper code)
> +{
> +  if (!code.is_fn_code ())
> +return false;
> +
> +  if (!internal_fn_p ((combined_fn) code))
> +return false;
> +
> +  internal_fn fn = as_internal_fn ((combined_fn) code);
> +  switch (fn)
> +{
> +#undef DEF_INTERNAL_COND_FN
> +#define DEF_INTERNAL_COND_FN(NAME, F, O, T)  \
> +case IFN_COND_##NAME:\
> +case IFN_COND_LEN_##NAME:\
> +  return true;
> +#include "internal-fn.def"
> +#undef DEF_INTERNAL_COND_FN
> +
> +#undef DEF_INTERNAL_SIGNED_COND_FN
> +#define DEF_INTERNAL_SIGNED_COND_FN(NAME, F, S, SO, UO, T)   \
> +case IFN_COND_##NAME:\
> +case IFN_COND_LEN_##NAME:\
> +  return true;
> +#include "internal-fn.def"
> +#undef DEF_INTERNAL_SIGNED_COND_FN
> +
> +default:
> +  return false;
> +}
> +
> +  return false;
> +}
> +
> +
>  /* Return true if this CODE describes an internal_fn that returns a vector 
> with
> elements twice as wide as the element size of the input vectors.  */
>

Re: [PATCH] Remove unnecessary "& 1" in year_month_day_last::day()

2023-11-05 Thread Andrew Pinski
On Sun, Nov 5, 2023 at 9:13 AM Cassio Neri  wrote:
>
> I could not find any entry in gcc's bugzilla for that. Perhaps my search 
> wasn't good enough.

I filed https://gcc.gnu.org/PR112395 with a first attempt at the patch
(will double check it soon).

Thanks,
Andrew

>
>
> On Sun, 5 Nov 2023 at 15:58, Marc Glisse  wrote:
>>
>> On Sun, 5 Nov 2023, Cassio Neri wrote:
>>
>> > When year_month_day_last::day() was implemented, Dr. Matthias Kretz 
>> > realised
>> > that the operation "& 1" wasn't necessary but we did not patch it at that
>> > time. This patch removes the unnecessary operation.
>>
>> Is there an entry in gcc's bugzilla about having the optimizer handle this
>> kind of optimization?
>>
>> unsigned f(unsigned x){
>>if(x>=32)__builtin_unreachable();
>>return 30|(x&1); // --> 30|x
>> }
>>
>> (that optimization would come in addition to your patch, doing the
>> optimization by hand is still a good idea)
>>
>> It looks like the criterion would be a|(b&c) when the possible 1 bits of b
>> are included in the certainly 1 bits of a|c.
>>
>> --
>> Marc Glisse


Re: [PATCH][Aarch64] Fix vec_perm cost for thunderx2t99

2019-11-01 Thread Andrew Pinski
On Fri, Nov 1, 2019 at 7:03 AM Anton Youdkevitch
 wrote:
>
> Hello,
>
> Here is the one-liner that fixes the incorrect
> vec_perm cost for thunderx2t99 chip.
> With the patch applied 526.blender of CPU2017
> gets ~5% improvement with no measurable changes
> for other benchmarks.
>
> Bootstrapped OK on aarch64-linux-gnu.
>
> OK for trunk?

Maybe the big question is vec_perm used for both 1 input and 2 input
cases?  If so maybe splitting the two cases would be important too.
Otherwise this is ok from my point of view but I can't approve it.


Thanks,
Andrew Pinski

>
> 2019-11-01 Anton Youdkevitch 
>
> * gcc/config/aarch64/aarch64.c (thunderx2t99_vector_cost):
> change vec_perm field
>
> --
>   Thanks,
>   Anton


Re: [PATCH] combine: Don't generate IF_THEN_ELSE

2019-11-03 Thread Andrew Pinski
On Thu, May 9, 2019 at 5:05 PM Segher Boessenkool
 wrote:
>
> On all targets I managed to test (21) this results in better code.  Only
> alpha ends up with slightly bigger code.
>
> Committing to trunk.

This introduced:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92342

Thanks,
Andrew Pinski

>
>
> Segher
>
>
> 2019-05-10  Segher Boessenkool  
>
> * combine.c (combine_simplify_rtx): Don't make IF_THEN_ELSE RTL.
>
> ---
>  gcc/combine.c | 8 
>  1 file changed, 8 deletions(-)
>
> diff --git a/gcc/combine.c b/gcc/combine.c
> index 7b236225..8c4375f 100644
> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -5937,14 +5937,6 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, 
> int in_dest,
>  mode, 
> VOIDmode,
>  cond, cop1),
> mode);
> - else
> -   return gen_rtx_IF_THEN_ELSE (mode,
> -simplify_gen_relational 
> (cond_code,
> - mode,
> - 
> VOIDmode,
> - cond,
> - cop1),
> -true_rtx, false_rtx);
>
>   code = GET_CODE (x);
>   op0_mode = VOIDmode;
> --
> 1.8.3.1
>


Re: [PATCH] Refactor tree-loop-distribution for thread safety

2019-11-12 Thread Andrew Pinski
On Tue, Nov 12, 2019 at 1:16 AM Richard Biener
 wrote:
>
> On Sat, Nov 9, 2019 at 3:26 PM Giuliano Belinassi
>  wrote:
> >
> > Hi all,
> >
> > This patch refactors tree-loop-distribution.c for thread safety without
> > use of C11 __thread feature. All global variables were moved to a struct
> > which is initialized at ::execute time.
>
> Thanks for working on this.  I've been thinking on how to make this
> nicer which naturally leads to the use of C++ classes and member
> functions which get 'this' for free.  This means all functions that
> make use of 'priv' in your patch would need to become member
> functions of the class and pass_loop_distribution::execute would
> wrap it like
>
> unsigned int
> pass_loop_distribution::execute (function *fun)
> {
>   return priv_pass_vars().execute (fun);
> }
>
> please find a better name for 'priv_pass_vars' since you can't
> reuse that name for other passes due to C++ ODR rules.
> I would suggest 'loop_distribution'.

Unless you use an anonymous namespace or your own namespace.
This is what I did when I was developing a pass, I used both and even
had an using class statement in that file to reduce the ammount of
typing in some cases.

Thanks,
Andrew


>
> Can you try if going this route works well?
>
> Thanks,
> Richard.
>
> > I can install this patch myself in trunk if it's OK.
> >
> > gcc/ChangeLog
> > 2019-11-09  Giuliano Belinassi  
> >
> > * cfgloop.c (get_loop_body_in_custom_order): New.
> > * cfgloop.h (get_loop_body_in_custom_order): New prototype.
> > * tree-loop-distribution.c (struct priv_pass_vars): New.
> > (bb_top_order_cmp_r): New.
> > (create_rdg_vertices): Update prototype.
> > (stmts_from_loop): Same as above.
> > (update_for_merge): Same as above.
> > (partition_merge_into): Same as above.
> > (get_data_dependence): Same as above.
> > (data_dep_in_cycle_p): Same as above.
> > (update_type_for_merge): Same as above.
> > (build_rdg_partition_for-vertex): Same as above.
> > (classify_builtin_ldst): Same as above.
> > (classify_partition): Same as above.
> > (share_memory_accesses): Same as above.
> > (rdg_build_partitions): Same as above.
> > (pg_add_dependence_edges): Same as above.
> > (build_partition_graph): Same as above.
> > (merge_dep_scc_partitions): Same as above.
> > (break_alias_scc_partitions): Same as above.
> > (finalize_partitions): Same as above.
> > (distribute_loop): Same as above.
> > (bb_top_order_init): New function.
> > (bb_top_order_destroy): New function.
> > (pass_loop_distribution::execute): Initialize struct priv.
> >
> > Thank you,
> > Giuliano.


Re: [PATCH] Switch gcc ftp URL's to http

2019-11-13 Thread Andrew Pinski
On Wed, Nov 13, 2019 at 12:37 PM Janne Blomqvist
 wrote:
>
> The FTP protocol is getting long in the tooth, and we should emphasize
> HTTP where that is available. This patch changes various gcc.gnu.org
> URL's to instead use HTTP.

May I suggest you use https instead of http here?  Because it will be
redirected anyways to use https.

Thanks,
Andrew

>
> For instance, kernel.org shut down FTP access in 2017, with the
> explanation:
>
> - The protocol is inefficient and requires adding awkward kludges to
>   firewalls and load-balancing daemons
> - FTP servers have no support for caching or accelerators, which has
>   significant performance impacts
> - Most software implementations have stagnated and see infrequent
>   updates
>
> ChangeLog:
>
> 2019-11-13  Janne Blomqvist  
>
> * configure.ac: Use http for gcc.gnu.org.
> * configure: Regenerated.
>
> gcc/ChangeLog:
>
> 2019-11-13  Janne Blomqvist  
>
> * configure.ac: Use http for gcc.gnu.org
> * configure: Regenerated.
> * doc/install.texi: Use http for gcc.gnu.org.
> * doc/sourcebuild.texi: Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 2019-11-13  Janne Blomqvist  
>
> * README: Likewise.
>
> libstdc++-v3/ChangeLog:
>
> 2019-11-13  Janne Blomqvist  
>
> * doc/html/api.html: Likewise.
> * doc/xml/api.xml: Likewise.
>
> maintainer-scripts/ChangeLog:
>
> 2019-11-13  Janne Blomqvist  
>
> * gcc_release: Likewise.
> ---
>  configure.ac   |  2 +-
>  gcc/configure.ac   |  2 +-
>  gcc/doc/install.texi   | 11 +--
>  gcc/doc/sourcebuild.texi   |  4 ++--
>  gcc/testsuite/README   |  2 +-
>  libstdc++-v3/doc/html/api.html |  4 ++--
>  libstdc++-v3/doc/xml/api.xml   |  2 +-
>  maintainer-scripts/gcc_release |  2 +-
>  8 files changed, 14 insertions(+), 15 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index d63a8bae940..774e95a989f 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -1665,7 +1665,7 @@ if test -d ${srcdir}/gcc && test "x$have_gmp" = xno; 
> then
>  Try the --with-gmp, --with-mpfr and/or --with-mpc options to specify
>  their locations.  Source code for these libraries can be found at
>  their respective hosting sites as well as at
> -ftp://gcc.gnu.org/pub/gcc/infrastructure/.  See also
> +http://gcc.gnu.org/pub/gcc/infrastructure/.  See also
>  http://gcc.gnu.org/install/prerequisites.html for additional info.  If
>  you obtained GMP, MPFR and/or MPC from a vendor distribution package,
>  make sure that you have installed both the libraries and the header
> diff --git a/gcc/configure.ac b/gcc/configure.ac
> index b9cc2435cdf..7bb77f4e7a0 100644
> --- a/gcc/configure.ac
> +++ b/gcc/configure.ac
> @@ -4748,7 +4748,7 @@ gd:
>   [ .machine ppc7400])
> if test x$gcc_cv_as_machine_directive != xyes; then
>   echo "*** This target requires an assembler supporting 
> \".machine\"" >&2
> - echo you can get it from: 
> ftp://gcc.gnu.org/pub/gcc/infrastructure/cctools-528.5.dmg >&2
> + echo you can get it from: 
> http://gcc.gnu.org/pub/gcc/infrastructure/cctools-528.5.dmg >&2
>   test x$build = x$target && exit 1
> fi
>  ;;
> diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
> index 215a6fa38ff..c88d73f10cc 100644
> --- a/gcc/doc/install.texi
> +++ b/gcc/doc/install.texi
> @@ -333,7 +333,7 @@ requirements.
>  @itemx bzip2 version 1.0.2 (or later)
>
>  Necessary to uncompress GCC @command{tar} files when source code is
> -obtained via FTP mirror sites.
> +obtained via HTTP mirror sites.
>
>  @item GNU make version 3.80 (or later)
>
> @@ -411,7 +411,7 @@ download_prerequisites installs.
>  @item isl Library version 0.15 or later.
>
>  Necessary to build GCC with the Graphite loop optimizations.
> -It can be downloaded from @uref{ftp://gcc.gnu.org/pub/gcc/infrastructure/}.
> +It can be downloaded from @uref{http://gcc.gnu.org/pub/gcc/infrastructure/}.
>  If an isl source distribution is found
>  in a subdirectory of your GCC sources named @file{isl}, it will be
>  built together with GCC.  Alternatively, the @option{--with-isl} configure
> @@ -513,7 +513,7 @@ files in the directories below @file{jit/docs}.
>  @itemx SSH (any version)
>
>  Necessary to access the SVN repository.  Public releases and weekly
> -snapshots of the development sources are also available via FTP@.
> +snapshots of the development sources are also available via HTTP@.
>
>  @item GNU diffutils version 2.7 (or later)
>
> @@ -547,9 +547,8 @@ own sources.
>  @cindex Downloading GCC
>  @cindex Downloading the Source
>
> -GCC is distributed via @uref{http://gcc.gnu.org/svn.html,,SVN} and FTP
> -tarballs compressed with @command{gzip} or
> -@command{bzip2}.
> +GCC is distributed via @uref{http://gcc.gnu.org/svn.html,,SVN} and via
> +HTTP as tarballs compressed with @command{gzip} or @command{bzip2}.
>
>  Please refer to the @uref{http://gcc.gnu.org/releases.html,,releases web 

Re: [PATCH][AArch64] Fix shrinkwrapping interactions with atomics (PR92692)

2020-01-16 Thread Andrew Pinski
On Thu, Jan 16, 2020 at 5:14 AM Richard Sandiford
 wrote:
>
> Wilco Dijkstra  writes:
> > The separate shrinkwrapping pass may insert stores in the middle
> > of atomics loops which can cause issues on some implementations.
> > Avoid this by delaying splitting of atomic patterns until after
> > prolog/epilog generation.
> >
> > Bootstrap completed, no test regressions on AArch64.
> >
> > Andrew, can you verify this fixes the failure you were getting?
> >
> > ChangeLog:
> > 2020-01-16  Wilco Dijkstra  
> >
> > PR target/92692
> > * config/aarch64/aarch64.c (aarch64_split_compare_and_swap)
> > Add assert to ensure prolog has been emitted.
> > (aarch64_split_atomic_op): Likewise.
> > * config/aarch64/atomics.md (aarch64_compare_and_swap)
> > Use epilogue_completed rather than reload_completed.
> > (aarch64_atomic_exchange): Likewise.
> > (aarch64_atomic_): Likewise.
> > (atomic_nand): Likewise.
> > (aarch64_atomic_fetch_): Likewise.
> > (atomic_fetch_nand): Likewise.
> > (aarch64_atomic__fetch): Likewise.
> > (atomic_nand_fetch): Likewise.
>
> OK if Andrew confirms it works, thanks.

Yes this fixes the issue for me.

Thanks,
Andrew

>
> Richard
>
> > ---
> >
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index 
> > ac89cc1f9c938455d33d8850d9ebfc0473cb73dc..cd9d813f2ac4990971f6435fdb28b0f94ae10309
> >  100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -18375,6 +18375,9 @@ aarch64_emit_post_barrier (enum memmodel model)
> >  void
> >  aarch64_split_compare_and_swap (rtx operands[])
> >  {
> > +  /* Split after prolog/epilog to avoid interactions with shrinkwrapping.  
> > */
> > +  gcc_assert (epilogue_completed);
> > +
> >rtx rval, mem, oldval, newval, scratch, x, model_rtx;
> >machine_mode mode;
> >bool is_weak;
> > @@ -18469,6 +18472,9 @@ void
> >  aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx 
> > mem,
> >   rtx value, rtx model_rtx, rtx cond)
> >  {
> > +  /* Split after prolog/epilog to avoid interactions with shrinkwrapping.  
> > */
> > +  gcc_assert (epilogue_completed);
> > +
> >machine_mode mode = GET_MODE (mem);
> >machine_mode wmode = (mode == DImode ? DImode : SImode);
> >const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
> > diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
> > index 
> > c2bcabd0c3c2627b7222dcbc1af9c2e6b7ce6a76..996947799b5ef8445e9786b94e1ce62fd16e5b5c
> >  100644
> > --- a/gcc/config/aarch64/atomics.md
> > +++ b/gcc/config/aarch64/atomics.md
> > @@ -56,7 +56,7 @@ (define_insn_and_split "@aarch64_compare_and_swap"
> > (clobber (match_scratch:SI 7 "=&r"))]
> >""
> >"#"
> > -  "&& reload_completed"
> > +  "&& epilogue_completed"
> >[(const_int 0)]
> >{
> >  aarch64_split_compare_and_swap (operands);
> > @@ -80,7 +80,7 @@ (define_insn_and_split "@aarch64_compare_and_swap"
> > (clobber (match_scratch:SI 7 "=&r"))]
> >""
> >"#"
> > -  "&& reload_completed"
> > +  "&& epilogue_completed"
> >[(const_int 0)]
> >{
> >  aarch64_split_compare_and_swap (operands);
> > @@ -104,7 +104,7 @@ (define_insn_and_split "@aarch64_compare_and_swap"
> > (clobber (match_scratch:SI 7 "=&r"))]
> >""
> >"#"
> > -  "&& reload_completed"
> > +  "&& epilogue_completed"
> >[(const_int 0)]
> >{
> >  aarch64_split_compare_and_swap (operands);
> > @@ -223,7 +223,7 @@ (define_insn_and_split "aarch64_atomic_exchange"
> > (clobber (match_scratch:SI 4 "=&r"))]
> >""
> >"#"
> > -  "&& reload_completed"
> > +  "&& epilogue_completed"
> >[(const_int 0)]
> >{
> >  aarch64_split_atomic_op (SET, operands[0], NULL, operands[1],
> > @@ -344,7 +344,7 @@ (define_insn_and_split 
> > "aarch64_atomic_"
> >(clobber (match_scratch:SI 4 "=&r"))]
> >""
> >"#"
> > -  "&& reload_completed"
> > +  "&& epilogue_completed"
> >[(const_int 0)]
> >{
> >  aarch64_split_atomic_op (, NULL, operands[3], operands[0],
> > @@ -400,7 +400,7 @@ (define_insn_and_split "atomic_nand"
> > (clobber (match_scratch:SI 4 "=&r"))]
> >""
> >"#"
> > -  "&& reload_completed"
> > +  "&& epilogue_completed"
> >[(const_int 0)]
> >{
> >   aarch64_split_atomic_op (NOT, NULL, operands[3], operands[0],
> > @@ -504,7 +504,7 @@ (define_insn_and_split 
> > "aarch64_atomic_fetch_"
> > (clobber (match_scratch:SI 5 "=&r"))]
> >""
> >"#"
> > -  "&& reload_completed"
> > +  "&& epilogue_completed"
> >[(const_int 0)]
> >{
> >  aarch64_split_atomic_op (, operands[0], operands[4], operands[1],
> > @@ -551,7 +551,7 @@ (define_insn_and_split "atomic_fetch_nand"
> > (clobber (match_scratch:SI 5 "=&r"))]
> >""
> >"#"
> > -  "&& reload_completed"
> > +  "&& epilogue_completed"
> >[(const_int 0)]
> >{
> >  aarch64_split_atomic_op (NOT, operands[0], operands[4], operands[1],
> > @@ -604,7 +604,7 @@ (define_insn_and_split 
> > "aar

Re: [PATCH][AArch64] Fix shrinkwrapping interactions with atomics (PR92692)

2020-01-16 Thread Andrew Pinski
On Thu, Jan 16, 2020 at 5:51 PM Andrew Pinski  wrote:
>
> On Thu, Jan 16, 2020 at 5:14 AM Richard Sandiford
>  wrote:
> >
> > Wilco Dijkstra  writes:
> > > The separate shrinkwrapping pass may insert stores in the middle
> > > of atomics loops which can cause issues on some implementations.
> > > Avoid this by delaying splitting of atomic patterns until after
> > > prolog/epilog generation.
> > >
> > > Bootstrap completed, no test regressions on AArch64.
> > >
> > > Andrew, can you verify this fixes the failure you were getting?
> > >
> > > ChangeLog:
> > > 2020-01-16  Wilco Dijkstra  
> > >
> > > PR target/92692
> > > * config/aarch64/aarch64.c (aarch64_split_compare_and_swap)
> > > Add assert to ensure prolog has been emitted.
> > > (aarch64_split_atomic_op): Likewise.
> > > * config/aarch64/atomics.md (aarch64_compare_and_swap)
> > > Use epilogue_completed rather than reload_completed.
> > > (aarch64_atomic_exchange): Likewise.
> > > (aarch64_atomic_): Likewise.
> > > (atomic_nand): Likewise.
> > > (aarch64_atomic_fetch_): Likewise.
> > > (atomic_fetch_nand): Likewise.
> > > (aarch64_atomic__fetch): Likewise.
> > > (atomic_nand_fetch): Likewise.
> >
> > OK if Andrew confirms it works, thanks.
>
> Yes this fixes the issue for me.
Here is the new assembly showing it worked:

d390:   f9000bf3str x19, [sp, #16]
d394:   885ffdc8ldaxr   w8, [x14]
d398:   6b01011fcmp w8, w1
d39c:   5461b.ned3a8
  // b.any
d3a0:   88137dc5stxrw19, w5, [x14]

Thanks,
Andrew Pinski


>
> Thanks,
> Andrew
>
> >
> > Richard
> >
> > > ---
> > >
> > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > > index 
> > > ac89cc1f9c938455d33d8850d9ebfc0473cb73dc..cd9d813f2ac4990971f6435fdb28b0f94ae10309
> > >  100644
> > > --- a/gcc/config/aarch64/aarch64.c
> > > +++ b/gcc/config/aarch64/aarch64.c
> > > @@ -18375,6 +18375,9 @@ aarch64_emit_post_barrier (enum memmodel model)
> > >  void
> > >  aarch64_split_compare_and_swap (rtx operands[])
> > >  {
> > > +  /* Split after prolog/epilog to avoid interactions with 
> > > shrinkwrapping.  */
> > > +  gcc_assert (epilogue_completed);
> > > +
> > >rtx rval, mem, oldval, newval, scratch, x, model_rtx;
> > >machine_mode mode;
> > >bool is_weak;
> > > @@ -18469,6 +18472,9 @@ void
> > >  aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, 
> > > rtx mem,
> > >   rtx value, rtx model_rtx, rtx cond)
> > >  {
> > > +  /* Split after prolog/epilog to avoid interactions with 
> > > shrinkwrapping.  */
> > > +  gcc_assert (epilogue_completed);
> > > +
> > >machine_mode mode = GET_MODE (mem);
> > >machine_mode wmode = (mode == DImode ? DImode : SImode);
> > >const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
> > > diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
> > > index 
> > > c2bcabd0c3c2627b7222dcbc1af9c2e6b7ce6a76..996947799b5ef8445e9786b94e1ce62fd16e5b5c
> > >  100644
> > > --- a/gcc/config/aarch64/atomics.md
> > > +++ b/gcc/config/aarch64/atomics.md
> > > @@ -56,7 +56,7 @@ (define_insn_and_split "@aarch64_compare_and_swap"
> > > (clobber (match_scratch:SI 7 "=&r"))]
> > >""
> > >"#"
> > > -  "&& reload_completed"
> > > +  "&& epilogue_completed"
> > >[(const_int 0)]
> > >{
> > >  aarch64_split_compare_and_swap (operands);
> > > @@ -80,7 +80,7 @@ (define_insn_and_split "@aarch64_compare_and_swap"
> > > (clobber (match_scratch:SI 7 "=&r"))]
> > >""
> > >"#"
> > > -  "&& reload_completed"
> > > +  "&& epilogue_completed"
> > >[(const_int 0)]
> > >{
> > >  aarch64_split_compare_and_swap (operands);
> > > @@ -104,7 +104,7 @@ (define_insn_and_split 
> > > "@aarch64_compare_and_swap"
> > > (clobber (match_scratch:SI 7 "=&r"))]
> > >""
> > >"#"
> > > -  "&& reload_completed"
&g

Re: [PATCH] Fix PR 93242: patchable-function-entry broken on MIPS

2020-01-22 Thread Andrew Pinski
On Wed, Jan 22, 2020 at 12:48 PM Jeff Law  wrote:
>
> On Mon, 2020-01-20 at 09:42 +0100, Richard Biener wrote:
> > On Sat, Jan 18, 2020 at 1:47 AM  wrote:
> > > From: Andrew Pinski 
> > >
> > > On MIPS, .set noreorder/reorder needs to emitted around
> > > the nop.  The template for the nop instruction uses %(/%) to
> > > do that.  But default_print_patchable_function_entry uses
> > > fprintf rather than output_asm_insn to output the instruction.
> > >
> > > This fixes the problem by using output_asm_insn to emit the nop
> > > instruction.
> > >
> > > OK?  Bootstrapped and tested on x86_64-linux-gnu and built a full
> > > mips toolchain also.
> >
> > OK.
> FWIW, I think this may have broke the arc-elf port.  I'm getting
> failures for the patchable function entry tests.  It looks like the
> port wants to peek a the current_output_insn in its handling of an
> output punctuation characters and current_output_insn is NULL.

I suspect arc-elf was failing beforehand; just not crashing the compiler :).
Before this patch we would be printing out "nop%?" for arc-elf.  The
tests are "compile" so they would have "passed" but only because the
tests was not trying to assemble them.  If someone had tried to use
this option of arc-elf, they would have ran into a similar problem as
mips, printing out %? (in arc case).

Thanks,
Andrew Pinski


>
> jeff
> >
>


Re: [PATCH] Fix PR 93242: patchable-function-entry broken on MIPS

2020-01-22 Thread Andrew Pinski
On Wed, Jan 22, 2020 at 1:16 PM Andrew Pinski  wrote:
>
> On Wed, Jan 22, 2020 at 12:48 PM Jeff Law  wrote:
> >
> > On Mon, 2020-01-20 at 09:42 +0100, Richard Biener wrote:
> > > On Sat, Jan 18, 2020 at 1:47 AM  wrote:
> > > > From: Andrew Pinski 
> > > >
> > > > On MIPS, .set noreorder/reorder needs to emitted around
> > > > the nop.  The template for the nop instruction uses %(/%) to
> > > > do that.  But default_print_patchable_function_entry uses
> > > > fprintf rather than output_asm_insn to output the instruction.
> > > >
> > > > This fixes the problem by using output_asm_insn to emit the nop
> > > > instruction.
> > > >
> > > > OK?  Bootstrapped and tested on x86_64-linux-gnu and built a full
> > > > mips toolchain also.
> > >
> > > OK.
> > FWIW, I think this may have broke the arc-elf port.  I'm getting
> > failures for the patchable function entry tests.  It looks like the
> > port wants to peek a the current_output_insn in its handling of an
> > output punctuation characters and current_output_insn is NULL.
>
> I suspect arc-elf was failing beforehand; just not crashing the compiler :).
> Before this patch we would be printing out "nop%?" for arc-elf.  The
> tests are "compile" so they would have "passed" but only because the
> tests was not trying to assemble them.  If someone had tried to use
> this option of arc-elf, they would have ran into a similar problem as
> mips, printing out %? (in arc case).

Just a quick survey of the targets which had an issue before this patch:
* arc - uses %?
* bpf - uses %%
* mips - uses %(/%) which emits .set noreorder/reorder
* nios2 - %.
* s390 - %%

nios2 - checks current_output_insn for null-ness when it is seeing if
it needs to print . or not.
arc - does not check current_output_insn for null-ness

Thanks,
Andrew Pinski


>
> Thanks,
> Andrew Pinski
>
>
> >
> > jeff
> > >
> >


Re: [PATCH] Allow tree-ssa.exp to be run by itself

2020-01-22 Thread Andrew Pinski
On Wed, Jan 22, 2020 at 3:39 PM Jakub Jelinek  wrote:
>
> On Tue, Jan 21, 2020 at 05:56:38PM -0800, apin...@marvell.com wrote:
> > From: Andrew Pinski 
> >
> > tree-ssa testcases sometimes check autovect effective target
> > but does not set it up.  On MIPS, those testcases fail with
> > some TCL error messages.  This fixes the issue by calling
> > check_vect_support_and_set_flags inside tree-ssa.exp.
> > There might be other .exp files which need to be done this
> > way too but I have not checked all of them.
> >
> > OK?  Tested on x86_64-linux-gnu and a cross to mips64-octeon-linux-gnu.
> > Both full run of the testsuite and running tree-ssa.exp by itself.
>
> I don't see how this could have been tested on x86_64-linux, it breaks there
> a lot of tests (ditto i686-linux).  My wild guess is that it changes the
> default dg-do action for tests that don't have any, which in tree-ssa.exp
> used to be dg-do compile but now it tries to link them or run them.
> But it seems like it even affects the dg-do default for tests later in other
> *.exp files, e.g. various i386.exp tests, some ubsan.exp etc.
> What tree-ssa tests were the reason for this change?
> I see
> pr88497-1.c:/* { dg-require-effective-target vect_double } */
> pr88497-2.c:/* { dg-require-effective-target vect_float } */
> pr88497-3.c:/* { dg-require-effective-target vect_int } */
> pr88497-4.c:/* { dg-require-effective-target vect_int } */
> pr88497-5.c:/* { dg-require-effective-target vect_int } */
> is that just those?
Yes it is just those.
check_effective_target_vect_double calls into et-is-effective-target
for mips*-*-* target.
et-is-effective-target is only ever used by the MIPS targets.
It was introduced by g:9b7937cf8a06.

So I did originally test it and it was working beforehand but I just
to retest it and I got the failures you listed below.
I also notice the setting of dg-do-what-default in
check_vect_support_and_set_flags now too :(.

Thanks,
Andrew


> Or the tests that use vect_cmdline_needed effective target?
>
> +FAIL: gcc.dg/tree-ssa/20030920-1.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/20030920-1.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/builtin-snprintf-3.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/builtin-snprintf-3.c compilation failed to 
> produce executable
> +FAIL: gcc.dg/tree-ssa/builtin-sprintf-7.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/builtin-sprintf-7.c compilation failed to 
> produce executable
> +FAIL: gcc.dg/tree-ssa/builtin-sprintf-8.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/builtin-sprintf-8.c compilation failed to 
> produce executable
> +UNRESOLVED: gcc.dg/tree-ssa/forwprop-29.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/ipa-split-3.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/ipa-split-3.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/ivopt_5.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/ivopt_5.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/loop-14.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/loop-14.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/loop-15.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/loop-15.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/loop-22.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/loop-22.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/pr20739.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/pr20739.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/pr25485.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/pr25485.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/pr25501.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/pr25501.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/pr26899.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/pr26899.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/pr77808.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/pr77808.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/pr84436-2.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/pr84436-2.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/pr84436-3.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/pr84436-3.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/pr86061.c (test for excess errors)
> +UNRESOLVED: gcc.dg/tree-ssa/pr86061.c compilation failed to produce 
> executable
> +FAIL: gcc.dg/tree-ssa/pr87205-2.c (test 

Re: [PATCH] Allow tree-ssa.exp to be run by itself

2020-01-22 Thread Andrew Pinski
On Wed, Jan 22, 2020 at 4:34 PM Jakub Jelinek  wrote:
>
> On Wed, Jan 22, 2020 at 04:22:54PM -0800, Andrew Pinski wrote:
> > > I see
> > > pr88497-1.c:/* { dg-require-effective-target vect_double } */
> > > pr88497-2.c:/* { dg-require-effective-target vect_float } */
> > > pr88497-3.c:/* { dg-require-effective-target vect_int } */
> > > pr88497-4.c:/* { dg-require-effective-target vect_int } */
> > > pr88497-5.c:/* { dg-require-effective-target vect_int } */
> > > is that just those?
> > Yes it is just those.
> > check_effective_target_vect_double calls into et-is-effective-target
> > for mips*-*-* target.
> > et-is-effective-target is only ever used by the MIPS targets.
> > It was introduced by g:9b7937cf8a06.
> >
> > So I did originally test it and it was working beforehand but I just
> > to retest it and I got the failures you listed below.
> > I also notice the setting of dg-do-what-default in
> > check_vect_support_and_set_flags now too :(.
>
> My preference would be to revert your patch and move those 5
> tests where they belong (i.e. gcc.dg/vect/) instead.

I agree.  Let me do that.  I looked to see if
"dg-require-effective-target vect_*" is used in any other testcase
outside of gomp/graphite/vect/target testsuite and there is none
other.

Thanks,
Andrew

>
> Jakub
>


Re: [PATCH 2/2] analyzer: avoid use of fold_build2

2020-01-31 Thread Andrew Pinski
On Thu, Jan 30, 2020 at 5:19 PM David Malcolm  wrote:
>
> Various places in the analyzer use fold_build2, test the result, then
> discard it.  It's more efficient to use fold_binary, which avoids
> building and GC-ing a redundant tree for the cases where folding fails.

If these are all true integer constants, then you might want to use
tree_int_cst_compare instead of even using fold_binary/fold_build2.
Also if you are doing equal but always constant (but not always
integer ones), you could use simple_cst_equal instead.

Thanks,
Andrew Pinski

>
> gcc/analyzer/ChangeLog:
> * constraint-manager.cc (range::constrained_to_single_element):
> Replace fold_build2 with fold_binary.  Remove unnecessary newline.
> (constraint_manager::get_or_add_equiv_class): Replace fold_build2
> with fold_binary in two places, and remove out-of-date comment.
> (constraint_manager::eval_condition): Replace fold_build2 with
> fold_binary.
> * region-model.cc (constant_svalue::eval_condition): Likewise.
> (region_model::on_assignment): Likewise.
> ---
>  gcc/analyzer/constraint-manager.cc | 15 ++-
>  gcc/analyzer/region-model.cc   |  6 +++---
>  2 files changed, 9 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/analyzer/constraint-manager.cc 
> b/gcc/analyzer/constraint-manager.cc
> index f3e31ee0830..4d138188856 100644
> --- a/gcc/analyzer/constraint-manager.cc
> +++ b/gcc/analyzer/constraint-manager.cc
> @@ -145,10 +145,9 @@ range::constrained_to_single_element (tree *out)
>m_upper_bound.ensure_closed (true);
>
>// Are they equal?
> -  tree comparison
> -= fold_build2 (EQ_EXPR, boolean_type_node,
> -  m_lower_bound.m_constant,
> -  m_upper_bound.m_constant);
> +  tree comparison = fold_binary (EQ_EXPR, boolean_type_node,
> +m_lower_bound.m_constant,
> +m_upper_bound.m_constant);
>if (comparison == boolean_true_node)
>  {
>*out = m_lower_bound.m_constant;
> @@ -930,7 +929,7 @@ constraint_manager::get_or_add_equiv_class (svalue_id sid)
>FOR_EACH_VEC_ELT (m_equiv_classes, i, ec)
> if (ec->m_constant)
>   {
> -   tree eq = fold_build2 (EQ_EXPR, boolean_type_node,
> +   tree eq = fold_binary (EQ_EXPR, boolean_type_node,
>cst, ec->m_constant);
> if (eq == boolean_true_node)
>   {
> @@ -967,10 +966,8 @@ constraint_manager::get_or_add_equiv_class (svalue_id 
> sid)
>  Determine the direction of the inequality, and record that
>  fact.  */
>   tree lt
> -   = fold_build2 (LT_EXPR, boolean_type_node,
> +   = fold_binary (LT_EXPR, boolean_type_node,
>new_ec->m_constant, other_ec.m_constant);
> - //gcc_assert (lt == boolean_true_node || lt == 
> boolean_false_node);
> - // not true for int vs float comparisons
>   if (lt == boolean_true_node)
> add_constraint_internal (new_id, CONSTRAINT_LT, other_id);
>   else if (lt == boolean_false_node)
> @@ -1016,7 +1013,7 @@ constraint_manager::eval_condition (equiv_class_id 
> lhs_ec,
>if (lhs_const && rhs_const)
>  {
>tree comparison
> -   = fold_build2 (op, boolean_type_node, lhs_const, rhs_const);
> +   = fold_binary (op, boolean_type_node, lhs_const, rhs_const);
>if (comparison == boolean_true_node)
> return tristate (tristate::TS_TRUE);
>if (comparison == boolean_false_node)
> diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
> index b546114bfd5..95d002f9c28 100644
> --- a/gcc/analyzer/region-model.cc
> +++ b/gcc/analyzer/region-model.cc
> @@ -670,7 +670,7 @@ constant_svalue::eval_condition (constant_svalue *lhs,
>if (types_compatible_p (TREE_TYPE (lhs_const), TREE_TYPE (rhs_const)))
>  {
>tree comparison
> -   = fold_build2 (op, boolean_type_node, lhs_const, rhs_const);
> +   = fold_binary (op, boolean_type_node, lhs_const, rhs_const);
>if (comparison == boolean_true_node)
> return tristate (tristate::TS_TRUE);
>if (comparison == boolean_false_node)
> @@ -4070,9 +4070,9 @@ region_model::on_assignment (const gassign *assign, 
> region_model_context *ctxt)
> if (tree rhs1_cst = maybe_get_constant (rhs1_sid))
>   if (tree rhs2_cst = maybe_get_constant (rhs2_sid))
> {
> - tree result = fold_build2 (op, TREE_TYPE (lhs),
> + tree result = fold_binary (op, TREE_TYPE (l

Re: [PATCH][AArch64] Improve popcount expansion

2020-02-03 Thread Andrew Pinski
On Mon, Feb 3, 2020 at 7:02 AM Wilco Dijkstra  wrote:
>
> The popcount expansion uses umov to extend the result and move it back
> to the integer register file.  If we model ADDV as a zero-extending
> operation, fmov can be used to move back to the integer side. This
> results in a ~0.5% speedup on deepsjeng on Cortex-A57.
>
> A typical __builtin_popcount expansion is now:
>
> fmovs0, w0
> cnt v0.8b, v0.8b
> addvb0, v0.8b
> fmovw0, s0
>
> Bootstrap OK, passes regress.

You might want to add a testcase that the autovectorizers too.
Something like this:
unsigned f(unsigned char *a)
{
 unsigned char b = 0;
 for(int i = 0; i < 16; i++)
   b+=a[i];
 return b;
}
--- CUT ---

Currently we get also:
ldr q0, [x0]
addvb0, v0.16b
umovw0, v0.b[0]
ret

Otherwise LGTM.

Thanks,
Andrew

>
> ChangeLog
> 2020-02-02  Wilco Dijkstra  
>
> gcc/
> * config/aarch64/aarch64.md (popcount2): Improve expansion.
> * config/aarch64/aarch64-simd.md
> (aarch64_zero_extend_reduc_plus_): New pattern.
> * config/aarch64/iterators.md (VDQV_E): New iterator.
> testsuite/
> * gcc.target/aarch64/popcnt2.c: New test.
>
> --
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 97f46f96968a6bc2f93bbc812931537b819b3b19..34765ff43c1a090a31e2aed64ce95510317ab8c3
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -2460,6 +2460,17 @@ (define_insn "aarch64_reduc_plus_internal"
>[(set_attr "type" "neon_reduc_add")]
>  )
>
> +;; ADDV with result zero-extended to SI/DImode (for popcount).
> +(define_insn "aarch64_zero_extend_reduc_plus_"
> + [(set (match_operand:GPI 0 "register_operand" "=w")
> +   (zero_extend:GPI
> +   (unspec: [(match_operand:VDQV_E 1 "register_operand" "w")]
> +UNSPEC_ADDV)))]
> + "TARGET_SIMD"
> + "add\\t%0, %1."
> +  [(set_attr "type" "neon_reduc_add")]
> +)
> +
>  (define_insn "aarch64_reduc_plus_internalv2si"
>   [(set (match_operand:V2SI 0 "register_operand" "=w")
> (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 
> 86c2cdfc7973f4b964ba233cfbbe369b24e0ac10..5edc76ee14b55b2b4323530e10bd22b3ffca483e
>  100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -4829,7 +4829,6 @@ (define_expand "popcount2"
>  {
>rtx v = gen_reg_rtx (V8QImode);
>rtx v1 = gen_reg_rtx (V8QImode);
> -  rtx r = gen_reg_rtx (QImode);
>rtx in = operands[1];
>rtx out = operands[0];
>if(mode == SImode)
> @@ -4843,8 +4842,7 @@ (define_expand "popcount2"
>  }
>emit_move_insn (v, gen_lowpart (V8QImode, in));
>emit_insn (gen_popcountv8qi2 (v1, v));
> -  emit_insn (gen_reduc_plus_scal_v8qi (r, v1));
> -  emit_insn (gen_zero_extendqi2 (out, r));
> +  emit_insn (gen_aarch64_zero_extend_reduc_plus_v8qi (out, v1));
>DONE;
>  })
>
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 
> fc973086cb91ae0dc54eeeb0b832d522539d7982..926779bf2442fa60d184ef17308f91996d6e8d1b
>  100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -208,6 +208,9 @@ (define_mode_iterator VDQV [V8QI V16QI V4HI V8HI V4SI 
> V2DI])
>  ;; Advanced SIMD modes (except V2DI) for Integer reduction across lanes.
>  (define_mode_iterator VDQV_S [V8QI V16QI V4HI V8HI V4SI])
>
> +;; Advanced SIMD modes for Integer reduction across lanes (zero/sign 
> extended).
> +(define_mode_iterator VDQV_E [V8QI V16QI V4HI V8HI])
> +
>  ;; All double integer narrow-able modes.
>  (define_mode_iterator VDN [V4HI V2SI DI])
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt2.c 
> b/gcc/testsuite/gcc.target/aarch64/popcnt2.c
> new file mode 100644
> index 
> ..e321858afa4d6ecb6fc7348f39f6e5c6c0c46147
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/popcnt2.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +unsigned
> +foo (int x)
> +{
> +  return __builtin_popcount (x);
> +}
> +
> +unsigned long
> +foo1 (int x)
> +{
> +  return __builtin_popcount (x);
> +}
> +
> +/* { dg-final { scan-assembler-not {popcount} } } */
> +/* { dg-final { scan-assembler-times {cnt\t} 2 } } */
> +/* { dg-final { scan-assembler-times {fmov} 4 } } */
> +/* { dg-final { scan-assembler-not {umov} } } */
> +/* { dg-final { scan-assembler-not {uxtw} } } */
> +/* { dg-final { scan-assembler-not {sxtw} } } */
>


Re: libgo patch committed: Update to Go1.14beta1

2020-02-04 Thread Andrew Pinski
Something like attached.
I will clean it up next week and submit it then.
It should also fix some arm64be related issues too.

Thanks,
Andrew Pinski

On Mon, Feb 3, 2020 at 6:17 PM Ian Lance Taylor  wrote:
>
> On Sun, Feb 2, 2020 at 2:27 AM Andreas Schwab  wrote:
> >
> > I'm getting these errors on aarch64 with -mabi=ilp32:
> >
> > ../../../../libgo/go/runtime/mpagealloc.go:226:38: error: shift count 
> > overflow
> >   226 |  chunks [1 << pallocChunksL1Bits]*[1 << 
> > pallocChunksL2Bits]pallocData
> >   |  ^
> > ../../../../libgo/go/runtime/mgcscavenge.go:487:15: error: shift count 
> > overflow
> >   487 |l2 := (*[1 << 
> > pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&s.chunks[i.l1()])))
> >   |   ^
> > ../../../../libgo/go/runtime/mpagealloc.go:138:22: error: shift count 
> > overflow
> >   138 |   return uint(i) & (1< >   |  ^
> > ../../../../libgo/go/runtime/mpagealloc.go:129:21: error: integer constant 
> > overflow
> >   129 |   return uint(i) >> pallocChunksL1Shift
> >   | ^
> > ../../../../libgo/go/runtime/mpagealloc_64bit.go:34:2: error: integer 
> > constant overflow
> >34 |  summaryL0Bits,
> >   |  ^
>
> I'm not sure that gccgo ever fully worked with aarch64 -mabi=ilp32.
> In Go I think that will have to be represented with a new GOARCH
> value, arm64p32.
>
> Ian
From 14de07bd862051df38160da375fd286ce956785f Mon Sep 17 00:00:00 2001
From: Andrew Pinski 
Date: Wed, 5 Feb 2020 04:36:13 +
Subject: [PATCH] Add ilp32 ARM64 support to gccgo.

Change-Id: Ide52be45dd9fd5d2a5dfc7d138fc56d963d06632
Signed-off-by: Andrew Pinski 
---
 gcc/testsuite/go.test/go-test.exp |  9 ++-
 libgo/configure   | 27 ++-
 libgo/configure.ac| 20 +-
 libgo/go/cmd/cgo/main.go  |  6 +
 libgo/go/cmd/go/go_test.go|  1 +
 libgo/go/cmd/go/internal/imports/build.go |  2 ++
 libgo/go/cmd/internal/sys/arch.go | 10 +++
 libgo/go/cmd/internal/sys/supported.go| 10 ---
 libgo/go/crypto/aes/aes_gcm.go|  2 +-
 libgo/go/crypto/aes/cipher_asm.go |  2 +-
 libgo/go/crypto/aes/cipher_generic.go |  2 +-
 libgo/go/golang.org/x/sys/cpu/byteorder.go|  2 ++
 ...cpu_linux_arm64.go => cpu_linux_arm64x.go} |  2 ++
 .../golang.org/x/sys/cpu/cpu_linux_other.go   |  2 +-
 .../cpu/{cpu_arm64.go => cpu_arm64x.go}   |  2 ++
 libgo/go/internal/cpu/cpu_no_init.go  |  3 +++
 .../syscall/unix/getrandom_linux_generic.go   |  2 +-
 libgo/go/runtime/cputicks.go  |  3 +++
 libgo/go/runtime/hash32.go|  2 +-
 libgo/go/runtime/lfstack_32bit.go |  2 +-
 libgo/go/runtime/mpagealloc_32bit.go  |  2 +-
 .../{os_linux_arm64.go => os_linux_arm64x.go} |  2 +-
 libgo/go/runtime/os_linux_noauxv.go   |  2 +-
 libgo/go/syscall/endian_big.go|  2 +-
 libgo/go/syscall/endian_little.go |  2 +-
 libgo/goarch.sh   |  7 -
 libgo/match.sh|  4 +--
 libgo/testsuite/gotest|  4 +--
 28 files changed, 103 insertions(+), 33 deletions(-)
 rename libgo/go/golang.org/x/sys/cpu/{cpu_linux_arm64.go => 
cpu_linux_arm64x.go} (97%)
 rename libgo/go/internal/cpu/{cpu_arm64.go => cpu_arm64x.go} (98%)
 rename libgo/go/runtime/{os_linux_arm64.go => os_linux_arm64x.go} (94%)

diff --git a/gcc/testsuite/go.test/go-test.exp 
b/gcc/testsuite/go.test/go-test.exp
index 51f9b381d67..7afcba14b64 100644
--- a/gcc/testsuite/go.test/go-test.exp
+++ b/gcc/testsuite/go.test/go-test.exp
@@ -188,7 +188,14 @@ proc go-set-goarch { } {
 
 switch -glob $target_triplet {
"aarch64*-*-*" {
-   set goarch "arm64"
+   if [check_effective_target_lp64] {
+   set goarch "arm64"
+   } else {
+   set goarch "amd64p32"
+   }
+   if [check_effective_target_aarch64_big_endian] {
+   append goarch "be"
+   }
}
"alpha*-*-*" {
set goarch "alpha"
diff --git a/libgo/configure b/libgo/configure
index 2f787392abd..8eca900889f 100755
--- a/libgo/configure
+++ b/libgo/configure
@@ -14070,7 +14070,7 @@ esac
 #   - libgo/go/syscall/endian_XX.go
 #   - possibly others
 # - possibly update files in libgo/go/internal/syscall/unix
-ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips 
mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv 
riscv64 s39

[FYI] Patches that fix testing santiziers with qemu user mode

2020-02-08 Thread Andrew Pinski
Hi,
  These two patches are what I use to fix testing of the santizers
with qemu.  The first one disables coloring always as for some reason
when running with qemu (but not normally), coloring is detected.  I
have not gone and debugged the reason why the sanitizers does not
detect coloring when run under dejagnu even though, it is designed to
be running under a psedu-TTY.

The second patch disables LSAN when clone with the arguments that are
needed to stop the world fail.  This second patch should in theory go
upstream but I am not going to that right now; plus maybe it should
only disable it when used with asan instead of in general.

With these two patches, I get clean test results on aarch64-linux-gnu.
I am mainly sending them here as I think they are useful for people
who are doing testing; especially cross testing.

Thanks,
Andrew Pinski
From 7666c4ec5db5e99530f8ff9411b782326ce96655 Mon Sep 17 00:00:00 2001
From: Andrew Pinski 
Date: Thu, 6 Feb 2020 02:06:27 +
Subject: [PATCH 1/2] Set default coloring to never.

Auto does not work always.  So just disable coloring.

Change-Id: I68564c6b4c35ed6d7f4e2938d765f428995900e7
Signed-off-by: Andrew Pinski 
---
 libsanitizer/sanitizer_common/sanitizer_flags.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libsanitizer/sanitizer_common/sanitizer_flags.inc 
b/libsanitizer/sanitizer_common/sanitizer_flags.inc
index 065258a5a6e..9caf442b2d1 100644
--- a/libsanitizer/sanitizer_common/sanitizer_flags.inc
+++ b/libsanitizer/sanitizer_common/sanitizer_flags.inc
@@ -108,7 +108,7 @@ COMMON_FLAG(
 uptr, clear_shadow_mmap_threshold, 64 * 1024,
 "Large shadow regions are zero-filled using mmap(NORESERVE) instead of "
 "memset(). This is the threshold size in bytes.")
-COMMON_FLAG(const char *, color, "auto",
+COMMON_FLAG(const char *, color, "never",
 "Colorize reports: (always|never|auto).")
 COMMON_FLAG(
 bool, legacy_pthread_cond, false,
-- 
2.17.1

From 1a931d339e6e89bdc7292f6c52ba5f89278bda6a Mon Sep 17 00:00:00 2001
From: Andrew Pinski 
Date: Thu, 6 Feb 2020 17:18:26 -0800
Subject: [PATCH 2/2] Have the ability to disable lsan if clone fails.

clone fails for some cases for qemu, lsan is not needed
for asan testing.  So we can get lsan disabled when the
clone fails.

Tested on aarch64-linux-gnu with qemu.  All asan.exp tests now pass.

Change-Id: I1e281a3701ef0a1a4325ce2fbf8ca263a930fbe5
Signed-off-by: Andrew Pinski 
---
 libsanitizer/asan/asan_rtl.cpp|  2 +-
 libsanitizer/lsan/lsan.cpp|  5 ++
 libsanitizer/lsan/lsan_common.cpp | 81 +++
 libsanitizer/lsan/lsan_common.h   |  1 +
 4 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/libsanitizer/asan/asan_rtl.cpp b/libsanitizer/asan/asan_rtl.cpp
index 594d7752eea..93ef79e111d 100644
--- a/libsanitizer/asan/asan_rtl.cpp
+++ b/libsanitizer/asan/asan_rtl.cpp
@@ -500,7 +500,7 @@ static void AsanInitInternal() {
 
   if (CAN_SANITIZE_LEAKS) {
 __lsan::InitCommonLsan();
-if (common_flags()->detect_leaks && common_flags()->leak_check_at_exit) {
+if (!__lsan::lsan_cannotwork && common_flags()->detect_leaks && 
common_flags()->leak_check_at_exit) {
   if (flags()->halt_on_error)
 Atexit(__lsan::DoLeakCheck);
   else
diff --git a/libsanitizer/lsan/lsan.cpp b/libsanitizer/lsan/lsan.cpp
index 4ce03046ffb..3488f0436a1 100644
--- a/libsanitizer/lsan/lsan.cpp
+++ b/libsanitizer/lsan/lsan.cpp
@@ -108,6 +108,11 @@ extern "C" void __lsan_init() {
   AvoidCVE_2016_2143();
   InitializeFlags();
   InitCommonLsan();
+  if (lsan_cannotwork) {
+lsan_init_is_running = false;
+lsan_inited = true;
+return;
+  }
   InitializeAllocator();
   ReplaceSystemMalloc();
   InitTlsSize();
diff --git a/libsanitizer/lsan/lsan_common.cpp 
b/libsanitizer/lsan/lsan_common.cpp
index 9ff9f4c5d1c..2023a9685dd 100644
--- a/libsanitizer/lsan/lsan_common.cpp
+++ b/libsanitizer/lsan/lsan_common.cpp
@@ -25,9 +25,17 @@
 #include "sanitizer_common/sanitizer_thread_registry.h"
 #include "sanitizer_common/sanitizer_tls_get_addr.h"
 
+#if SANITIZER_LINUX
+#include  // for CLONE_* definitions
+#include  // for signal-related stuff
+#include 
+#endif
+
 #if CAN_SANITIZE_LEAKS
 namespace __lsan {
 
+bool lsan_cannotwork;
+
 // This mutex is used to prevent races between DoLeakCheck and IgnoreObject, 
and
 // also to protect the global list of root regions.
 BlockingMutex global_mutex(LINKER_INITIALIZED);
@@ -111,7 +119,76 @@ const char *MaybeCallLsanDefaultOptions() {
   return (&__lsan_default_options) ? __lsan_default_options() : "";
 }
 
+#if SANITIZER_LINUX
+namespace {
+
+class ScopedStackSpaceWithGuard {
+ public:
+  explicit ScopedStackSpaceWithGuard(uptr stack_size) {
+stack_size_ = stack_size;
+guard_size_ = GetPageSizeCached();
+// FIXME: Omitting MAP_STA

Re: [PATCH v2] [MIPS] Prevent allocation of a GPR for a floating mode pseudo

2020-02-10 Thread Andrew Pinski
On Mon, Feb 10, 2020 at 5:33 AM Mihailo Stojanovic
 wrote:
>
> Similar to the mirror case of allocating an FPR for an integer mode
> pseudo, prevent GPRs from being allocated for a floating mode pseudo.

Can you expand on why you want to do this?
Provide benchmarking or a testcase which this improves the code generation?
I can see this producing much worse code for soft-float (which I still
care about).

Thanks,
Andrew Pinski

>
> gcc/ChangeLog:
>
> * gcc/config/mips/mips.c (mips_ira_change_pseudo_allocno_class):
> Limit the allocation of floating mode pseudos to FP_REGS.
> ---
>  gcc/config/mips/mips.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
> index 513fc5f..ffcb14d 100644
> --- a/gcc/config/mips/mips.c
> +++ b/gcc/config/mips/mips.c
> @@ -22492,6 +22492,12 @@ mips_ira_change_pseudo_allocno_class (int regno, 
> reg_class_t allocno_class,
>   instructions that say integer mode values must be placed in FPRs.  */
>if (INTEGRAL_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class == 
> ALL_REGS)
>  return GR_REGS;
> +
> +  /* Likewise for the mirror case of floating mode pseudos being allocated in
> + a GPR.  */
> +  if (FLOAT_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class == ALL_REGS)
> +return FP_REGS;
> +
>return allocno_class;
>  }
>
> --
> 2.7.4
>


Re: [PATCH][AArch64] Improve clz patterns

2020-02-12 Thread Andrew Pinski
On Wed, Feb 12, 2020 at 9:56 AM Richard Sandiford
 wrote:
>
> Wilco Dijkstra  writes:
> > Hi Richard,
> >
> > Right, so this is an alternative approach using costs - Combine won't try to
> > duplicate instructions if it increases costs, so increasing the ctz cost to 
> > 2
> > instructions (which is the correct cost for ctz anyway)
>
> ...agreed...

Yes I agree a better cost model for CTZ/CLZ is the right solution but
I disagree with 2 ALU instruction as the cost.  It should either be
the same cost as a multiply or have its own cost entry.
For an example on OcteonTX (and ThunderX1), the cost of CLS/CLZ is 4
cycles, the same as the cost as a multiple; on OcteonTX2 it is 5
cycles (again the same cost as a multiple).

Thanks,
Andrew Pinski

>
> > ensures we still get efficient code for this example:
> >
> > [AArch64] Set ctz rtx_cost (PR93565)
> >
> > Although GCC should understand the limited range of clz/ctz/cls results,
> > Combine sometimes behaves oddly and duplicates ctz to remove an unnecessary
> > sign extension.  Avoid this by setting the cost for ctz to be higher than
> > that of a simple ALU instruction.  Deepsjeng performance improves by ~0.6%.
> >
> > Bootstrap OK.
> >
> > ChangeLog:
> > 2020-02-12  Wilco Dijkstra  
> >
> > PR rtl-optimization/93565
> > * config/aarch64/aarch64.c (aarch64_rtx_costs): Add CTZ costs.
> >
> > * gcc.target/aarch64/pr93565.c: New test.
>
> OK, thanks.  Could you remove the bit about combine behaving oddly when
> you commit though?  I think this was simply a target bug and combine
> was being given duff information.
>
> Richard
>
> >
> > --
> >
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index 
> > e40750380cce202473da3cf572ebdbc28a4ecc06..7426629d6c973c06640f75d3de53a2815ff40f1b
> >  100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -11459,6 +11459,13 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int 
> > outer ATTRIBUTE_UNUSED,
> >
> >return false;
> >
> > +case CTZ:
> > +  *cost = COSTS_N_INSNS (2);
> > +
> > +  if (speed)
> > +*cost += extra_cost->alu.clz + extra_cost->alu.rev;
> > +  return false;
> > +
> >  case COMPARE:
> >op0 = XEXP (x, 0);
> >op1 = XEXP (x, 1);
> > diff --git a/gcc/testsuite/gcc.target/aarch64/pr93565.c 
> > b/gcc/testsuite/gcc.target/aarch64/pr93565.c
> > new file mode 100644
> > index 
> > ..7200f80d1bb161f6a058cc6591f61b6b75cf1749
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/pr93565.c
> > @@ -0,0 +1,34 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +static const unsigned long long magic = 0x03f08c5392f756cdULL;
> > +
> > +static const char table[64] = {
> > + 0,  1, 12,  2, 13, 22, 17,  3,
> > +14, 33, 23, 36, 18, 58, 28,  4,
> > +62, 15, 34, 26, 24, 48, 50, 37,
> > +19, 55, 59, 52, 29, 44, 39,  5,
> > +63, 11, 21, 16, 32, 35, 57, 27,
> > +61, 25, 47, 49, 54, 51, 43, 38,
> > +10, 20, 31, 56, 60, 46, 53, 42,
> > + 9, 30, 45, 41,  8, 40,  7,  6,
> > +};
> > +
> > +static inline int ctz1 (unsigned long  b)
> > +{
> > +  unsigned long lsb = b & -b;
> > +  return table[(lsb * magic) >> 58];
> > +}
> > +
> > +void f (unsigned long x, int *p)
> > +{
> > +  if (x != 0)
> > +{
> > +  int a = ctz1 (x);
> > +  *p = a | p[a];
> > +}
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "rbit\t" 1 } } */
> > +/* { dg-final { scan-assembler-times "clz\t" 1 } } */
> > +


Re: [PATCH] aarch64: Allow -mcpu=generic -march=armv8.5-a

2020-02-14 Thread Andrew Pinski
On Fri, Feb 14, 2020 at 2:12 AM Richard Earnshaw (lists)
 wrote:
>
> On 14/02/2020 03:18, apin...@marvell.com wrote:
> > From: Andrew Pinski 
> >
> > Right if someone supplies a -mcpu= option and then overrides
> > that option with -march=*, we get a warning when they conflict.
> > What we need is a generic cpu for each arch level but that is not
> > that useful because the only difference would be the arch level.
> > The best option is to allow -mcpu=generic -march=armv8.5-a not to
> > warn and that is now a generic armv8.5-a arch.
> >
>
> Then they should use -mtune=generic, rather than -mcpu.

Does it make sense to run:
"make check RUNTESTFLAGS="--target_board=unix/{,-mcpu=octeontx2}"
to make sure there are no latent bugs floating around with slightly
different tuning/scheduling?
The majority of the aarch64.exp failures are due to that warning.
If not how would suggest to test a -mcpu= option?

There is another use case:
A specific object file is to be run only on armv8.5-a processors but
someone sets CFLAGS to include -mcpu=octeontx2.
How would you suggest going about handling this case?

These are the two major cases where having a -mcpu=generic which
overrides a previous -mcpu= option and still able to select a higher
-march= option.

Thanks,
Andrew Pinski


>
> R.
> > OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.
> >
> > Thanks,
> > Andrew Pinski
> >
> > ChangeLog:
> > * config/aarch64/aarch64.c (aarch64_override_options): Don't
> > warn when the selected cpu was generic.
> > ---
> >   gcc/config/aarch64/aarch64.c | 6 --
> >   1 file changed, 4 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index 4a34dce..9173afe 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -14075,10 +14075,12 @@ aarch64_override_options (void)
> >   explicit_tune_core = selected_tune->ident;
> >   }
> > /* If both -mcpu and -march are specified check that they are 
> > architecturally
> > - compatible, warn if they're not and prefer the -march ISA flags.  */
> > + compatible, warn if they're not and prefer the -march ISA flags.
> > + Only warn if not using the generic cpu.  */
> > else if (selected_arch)
> >   {
> > -  if (selected_arch->arch != selected_cpu->arch)
> > +  if (selected_cpu->ident != generic
> > +   && selected_arch->arch != selected_cpu->arch)
> >   {
> > warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> 
> > switch",
> >  all_architectures[selected_cpu->arch].name,
> >
>


Re: [PATCH] [AARCH64] Improve vector generation cost model

2019-12-07 Thread Andrew Pinski
On Thu, May 2, 2019 at 9:10 AM Andrew Pinski  wrote:
>
> On Thu, Mar 14, 2019 at 6:19 PM  wrote:
> >
> > From: Andrew Pinski 
> >
> > Hi,
> >   On OcteonTX2, ld1r and ld1 (with a single lane) are split
> > into two different micro-ops unlike most other targets.
> > This adds three extra costs to the cost table:
> > ld1_dup: used for "ld1r {v0.4s}, [x0]"
> > merge_dup: used for "dup v0.4s, v0.4s[0]" and "ins v0.4s[0], v0.4s[0]"
> > ld1_merge: used fir "ld1 {v0.4s}[0], [x0]"
> >
> > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
> Ping?  It has been 1.5 months now.

Ping?  I have bootstrapped and tested on aarch64-linux-gnu recently
with the patch.
Or does this has to wait until Stage 1?

Thanks,
Andrew

>
> >
> > Thanks,
> > Andrew Pinski
> >
> > ChangeLog:
> > * config/arm/aarch-common-protos.h (vector_cost_table):
> > Add merge_dup, ld1_merge, and ld1_dup.
> > * config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs):
> > Update for the new fields.
> > (thunderx_extra_costs): Likewise.
> > (thunderx2t99_extra_costs): Likewise.
> > (tsv110_extra_costs): Likewise.
> > * config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise.
> > (cortexa53_extra_costs): Likewise.
> > (cortexa57_extra_costs): Likewise.
> > (exynosm1_extra_costs): Likewise.
> > (xgene1_extra_costs): Likewise.
> > * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle vec_dup of a memory.
> > Hanlde vec_merge of a memory.
> >
> > Signed-off-by: Andrew Pinski 
> > ---
> >  gcc/config/aarch64/aarch64-cost-tables.h | 20 +++
> >  gcc/config/aarch64/aarch64.c | 22 +
> >  gcc/config/arm/aarch-common-protos.h |  3 +++
> >  gcc/config/arm/aarch-cost-tables.h   | 25 +++-
> >  4 files changed, 61 insertions(+), 9 deletions(-)
> >
> > diff --git a/gcc/config/aarch64/aarch64-cost-tables.h 
> > b/gcc/config/aarch64/aarch64-cost-tables.h
> > index 5c9442e1b89..9a7c70ba595 100644
> > --- a/gcc/config/aarch64/aarch64-cost-tables.h
> > +++ b/gcc/config/aarch64/aarch64-cost-tables.h
> > @@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs =
> >},
> >/* Vector */
> >{
> > -COSTS_N_INSNS (1)  /* alu.  */
> > +COSTS_N_INSNS (1),  /* Alu.  */
> > +COSTS_N_INSNS (1), /* dup_merge.  */
> > +COSTS_N_INSNS (1), /* ld1_merge.  */
> > +COSTS_N_INSNS (1)  /* ld1_dup.  */
> >}
> >  };
> >
> > @@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs =
> >},
> >/* Vector */
> >{
> > -COSTS_N_INSNS (1)  /* Alu.  */
> > +COSTS_N_INSNS (1), /* Alu.  */
> > +COSTS_N_INSNS (1), /* dup_merge.  */
> > +COSTS_N_INSNS (1), /* ld1_merge.  */
> > +COSTS_N_INSNS (1)  /* ld1_dup.  */
> >}
> >  };
> >
> > @@ -330,7 +336,10 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
> >},
> >/* Vector */
> >{
> > -COSTS_N_INSNS (1)  /* Alu.  */
> > +COSTS_N_INSNS (1), /* Alu.  */
> > +COSTS_N_INSNS (1), /* dup_merge.  */
> > +COSTS_N_INSNS (1), /* ld1_merge.  */
> > +COSTS_N_INSNS (1)  /* ld1_dup.  */
> >}
> >  };
> >
> > @@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs =
> >},
> >/* Vector */
> >{
> > -COSTS_N_INSNS (1)  /* alu.  */
> > +COSTS_N_INSNS (1), /* Alu.  */
> > +COSTS_N_INSNS (1), /* dup_merge.  */
> > +COSTS_N_INSNS (1), /* ld1_merge.  */
> > +COSTS_N_INSNS (1)  /* ld1_dup.  */
> >}
> >  };
> >
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index b38505b0872..dc4d3d39af8 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -10568,6 +10568,28 @@ cost_plus:
> >  }
> >break;
> >
> > +case VEC_DUPLICATE:
> > +  if (!speed)
> > +   return false;
> > +
> > +  if (GET_CODE (XEXP (x, 0)) == MEM)
> > +   *cost += extra_cost->vect.ld1_dup;
> > +  else
> > +   *cost += extra_cost->vect.merge_dup;
> > +  return true;
> > +
> > +case VEC_MERGE:
> > +  if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
> > +   {
> > + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM)
> > +   *cost += extra_cost-

[committed] Add a bswap testcase

2019-12-08 Thread Andrew Pinski
Hi,
  While testing a GCC patch (which I am not ready to submit yet), I
wrote a testcase which had produced wrong code (with an earlier
version of the patch).  There was no other testcase for it in the
testsuite so I adding one.
This is a reduced testcase from the uboot PCIe code.


Thanks,
Andrew Pinski

* gcc.c-torture/execute/bswap-3.c: New testcase.
Index: ChangeLog
===
--- ChangeLog   (revision 279099)
+++ ChangeLog   (working copy)
@@ -1,3 +1,7 @@
+2019-12-08  Andrew Pinski  
+
+   * gcc.c-torture/execute/bswap-3.c: New test.
+
 2019-12-08  Sandra Loosemore  
 
Revert:
Index: gcc.c-torture/execute/bswap-3.c
===
--- gcc.c-torture/execute/bswap-3.c (nonexistent)
+++ gcc.c-torture/execute/bswap-3.c (working copy)
@@ -0,0 +1,25 @@
+/* { dg-require-effective-target int32plus } */
+
+int f(unsigned int a) __attribute__((noipa));
+int f(unsigned int a)
+{
+  return ((__builtin_bswap32(a))>>24) & 0x3;
+}
+
+
+int g(unsigned int a) __attribute__((noipa));
+int g(unsigned int a)
+{
+  return a&0x3;
+}
+
+int main(void)
+{
+  for (int b = 0; b <= 0xF; b++)
+{
+  if (f(b) != g(b))
+   __builtin_abort ();
+}
+  return 0;
+}
+


[Committed] Add C testcases for PR 86659

2019-12-13 Thread Andrew Pinski
Hi all,
  Since Ada is not always tested and I had received a testcase from a
customer for the same issue (I had backported the patch which caused
it), I thought I would create a full testcase that fails with C code.

NOTE this does not fail on x86_64 (nor aarch64) before the patch for
PR86659 due to SLOW_BYTE_ACCESS set to 0.  But I changed
SLOW_BYTE_ACCESS to be 1 when I was testing the code.

Committed as obvious after a simple test.

Thanks,
Andrew Pinski

testsuite/ChangeLog:
2019-12-13  Andrew Pinski  

   * gcc.c-torture/execute/pr86659-1.c: New test.
   * gcc.c-torture/execute/pr86659-2.c: New test.
Index: ChangeLog
===
--- ChangeLog   (revision 279337)
+++ ChangeLog   (working copy)
@@ -1,3 +1,8 @@
+2019-12-13  Andrew Pinski  
+
+   * gcc.c-torture/execute/pr86659-1.c: New test.
+   * gcc.c-torture/execute/pr86659-2.c: New test.
+
 2019-12-12  Jakub Jelinek  
 
PR target/92904
Index: gcc.c-torture/execute/pr86659-1.c
===
--- gcc.c-torture/execute/pr86659-1.c   (nonexistent)
+++ gcc.c-torture/execute/pr86659-1.c   (working copy)
@@ -0,0 +1,42 @@
+#define ENDIANBIG __attribute((scalar_storage_order ("big-endian")))
+
+typedef struct ENDIANBIG 
+{
+  unsigned long long  field0:29;
+  unsigned long long  field1:4; 
+  unsigned long long  field2:31;
+}struct1;  
+
+int
+main(void)
+{
+  int value1 = 0;
+  int value2 = 0;
+  int value3 = 0;
+  unsigned int flag;
+  struct1 var1;
+  var1.field0 = 23;
+  
+  flag = var1.field0;
+  value1 = ((var1.field0)?10:20);
+  if(var1.field0)
+{
+  value2 =  10;
+} else
+{
+  value2 = 20;
+}
+
+  value3 = ((flag)?10:20);
+
+  if (value1 != 10)
+__builtin_abort ();
+
+  if (value2 != 10)
+__builtin_abort ();
+
+  if (value3 != 10)
+__builtin_abort ();
+
+  return 0;
+}
Index: gcc.c-torture/execute/pr86659-2.c
===
--- gcc.c-torture/execute/pr86659-2.c   (nonexistent)
+++ gcc.c-torture/execute/pr86659-2.c   (working copy)
@@ -0,0 +1,42 @@
+#define ENDIANBIG __attribute((scalar_storage_order ("little-endian")))
+
+typedef struct ENDIANBIG 
+{
+  unsigned long long  field0:29;
+  unsigned long long  field1:4; 
+  unsigned long long  field2:31;
+}struct1;  
+
+int
+main(void)
+{
+  int value1 = 0;
+  int value2 = 0;
+  int value3 = 0;
+  unsigned int flag;
+  struct1 var1;
+  var1.field0 = 23;
+  
+  flag = var1.field0;
+  value1 = ((var1.field0)?10:20);
+  if(var1.field0)
+{
+  value2 =  10;
+} else
+{
+  value2 = 20;
+}
+
+  value3 = ((flag)?10:20);
+
+  if (value1 != 10)
+__builtin_abort ();
+
+  if (value2 != 10)
+__builtin_abort ();
+
+  if (value3 != 10)
+__builtin_abort ();
+
+  return 0;
+}


Re: [PATCH, c] all plattforms: support using a CC_REG instead cc0_rtx

2019-12-13 Thread Andrew Pinski
On Fri, Dec 13, 2019 at 6:56 PM Stefan Franke  wrote:
>
> Am 2019-12-13 21:59, schrieb Segher Boessenkool:
> > On Fri, Dec 13, 2019 at 08:55:15PM +0100, Stefan Franke wrote:
> >> Am 2019-12-13 18:58, schrieb Segher Boessenkool:
> >> >On Fri, Dec 13, 2019 at 05:25:41PM +0100, Stefan Franke wrote:
> >> >>Why? If you are using a cc register plus your architecture as many
> >> >>instructions which may clobber that cc register, some passes (e.g.
> >> >>gcse)
> >> >>will reorder the insns. This can lead to the situation that an insn is
> >> >>moved between a compare and it' consuming jump insn. Which yields
> >> >>invalid code. (Note that at these stages clobbers are not yet tracked
> >> >>as
> >> >>CLOBBER insns).
> >> >
> >> >Then that port has a bug.  In the m68k port, there are no separate
> >> >compare
> >> >and jump insns ever, but for most ports those won't yet exist during
> >> >gcse.
> >>
> >> it looks like t2o insn for the m68k
> >>
> >> (insn 115 114 116 5 (set (cc0)
> >> (compare (subreg:HI (reg:SI 272) 2)
> >> (reg:HI 273)))
> >> /tmp/compiler-explorer-compiler1191113-13975-1allrsj.w8mc/example.c:216
> >> 17 {*m68k.md:559}
> >>  (nil))
> >> (jump_insn 116 115 117 5 (set (pc)
> >> (if_then_else (ne (cc0)
> >> (const_int 0 [0]))
> >> (label_ref 99)
> >> (pc)))
> >> /tmp/compiler-explorer-compiler1191113-13975-1allrsj.w8mc/example.c:216
> >> 411 {bne}
> >>  (int_list:REG_BR_PROB 4 (nil))
> >>  -> 99)
> >
> > This is an older compiler.  m68k no longer uses cc0 (except it is still
> > mentioned in two comments (well, commented-out code)).
> >
> >> >This is not unique to cc0 conversions: every port has a similar problem
> >> >with all FIXED_REGISTERS.
> >>
> >> It's not related to fixed registers.
> >
> > No, it is exactly the same situation.  You cannot introduce uses of
> > such
> > a register if it might already exist in the insn stream somewhere, not
> > without checking first, and you better have a backup plan too.
> >
> >> It's unique to CC registers since
> >> these are on some plattforms modified by side effects. So after split2
> >> it's modelled using CLOBBERs
> >
> > There are no such implicit side effects if you have gotten rid of cc0.
> > That is the *point* of removing cc0.
> >
> >> >@findex cc0_rtx
> >> >There is only one expression object of code @code{cc0}; it is the
> >> >value of the variable @code{cc0_rtx}.  Any attempt to create an
> >> >expression of code @code{cc0} will return @code{cc0_rtx}.
> >> >
> >> >There is a lot of code that depends on this property, you cannot break
> >> >it without fixing everything.
> >>
> >> There is no need to change the definition or modify any piece
> >> elsewhere.
> >> And the modified comparison will still work for cc0.
> >
> > Then you do not need your patch.  You can compare cc0_rtx by identity.
> >
> >
> > Segher
>
>
> since I still don't get it: i386.md expands cbranch into two insns, e.g.
>
>
> (insn 17 16 18 4 (set (reg:CCNO 17 flags)
>  (compare:CCNO (reg/v:SI 96 [  ])
>      (const_int 0 [0]))) "x.c":2 3 {*cmpsi_ccno_1}
>   (nil))
> (jump_insn 18 17 19 4 (set (pc)
>  (if_then_else (le (reg:CCNO 17 flags)
>  (const_int 0 [0]))
>  (label_ref:DI 28)
>  (pc))) "x.c":2 627 {*jcc_1}
>   (int_list:REG_BR_PROB 1500 (nil))
>
>
> What mechanism guarantees that no other insn is inserted inbetween the
> compare and the jump?

>(Note that at these stages clobbers are not yet tracked as
CLOBBER insns).

All of the instructions that need CLOBBER has it at this point.
So I think your back-end is not describing what it should be describing.
The old saying inside GCC is lie to reload and get wrong code.  That
rings true here too.

Thanks,
Andrew Pinski

>
> Or is i386 also "broken"?
>
> Stefan


Re: [PATCH] Fix PR70985

2019-12-13 Thread Andrew Pinski
On Mon, May 9, 2016 at 2:32 AM Richard Biener  wrote:
>
>
> I am testing the following followup to my BIT_FIELD_REF simplification
> changes which resolve issues when applying to memory BIT_FIELD_REFs.
>
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.

My question is not directly related to this patch but is partly related.
While I was working on lowering bit-field access patch, I ran into a
problem where I am building the lhs, I use fold_build3 to build the
BIT_FIELD_REF and we get a convert expression from it.
Should we be using a fold_build3 for the BIT_FIELD_REF that will be
used on the lhs or should we just disable this optimization for non
GIMPLE?
The testcases where I ran into the issue are the ones which I added
back in October; gcc.c-torture/compile/20191015-1.c and
gcc.c-torture/compile/20191015-2.c.  I added them so when I submit the
patch for lowering for GCC 11, we don't regress (there was no testcase
beforehand).

Thanks,
Andrew Pinski

>
> Richard.
>
> 2016-05-09  Richard Biener  
>
> PR tree-optimization/70985
> * match.pd (BIT_FIELD_REF -> (type)): Disable on GIMPLE when
> op0 isn't a gimple register.
>
> * gcc.dg/torture/pr70985.c: New testcase.
>
> Index: gcc/match.pd
> ===
> *** gcc/match.pd(revision 236021)
> --- gcc/match.pd(working copy)
> *** DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> *** 3244,3249 
> --- 3244,3251 
>(view_convert (imagpart @0)
> (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
>  && INTEGRAL_TYPE_P (type)
> +/* On GIMPLE this should only apply to register arguments.  */
> +&& (! GIMPLE || is_gimple_reg (@0))
>  /* A bit-field-ref that referenced the full argument can be 
> stripped.  */
>  && ((compare_tree_int (@1, TYPE_PRECISION (TREE_TYPE (@0))) == 0
> && integer_zerop (@2))
> Index: gcc/testsuite/gcc.dg/torture/pr70985.c
> ===
> *** gcc/testsuite/gcc.dg/torture/pr70985.c  (revision 0)
> --- gcc/testsuite/gcc.dg/torture/pr70985.c  (working copy)
> ***
> *** 0 
> --- 1,28 
> + /* { dg-do compile } */
> + /* { dg-require-effective-target int32plus } */
> +
> + struct
> + {
> +   int f0:24;
> + } a, c, d;
> +
> + int b;
> +
> + int
> + fn1 ()
> + {
> +   return 0;
> + }
> +
> + void
> + fn2 ()
> + {
> +   int e;
> +   if (b)
> + for (; e;)
> +   {
> +   d = c;
> +   if (fn1 (b))
> + b = a.f0;
> +   }
> + }


[Committed] Add some __int128 bitfield testcases

2019-12-15 Thread Andrew Pinski
Hi,
  While working on the bit-field lowering patch,  I noticed there was
no testcases that used int128_t as bit-fields.  So I added some.
Including two that uses scalar_storage_order.  These are just compile
testcases.

Committed as obvious after testing on x86_64 to make sure the testcases pass.

Thanks,
Andrew Pinski

testsuite/ChangeLog:
* gcc.c-torture/compile/bitfield-1.c: New test.
* gcc.c-torture/compile/bitfield-endian-1.c: New test.
* gcc.c-torture/compile/bitfield-endian-2.c: New test.
Index: ChangeLog
===
--- ChangeLog   (revision 279408)
+++ ChangeLog   (working copy)
@@ -1,3 +1,9 @@
+2019-12-15  Andrew Pinski  
+
+   * gcc.c-torture/compile/bitfield-1.c: New test.
+   * gcc.c-torture/compile/bitfield-endian-1.c: New test.
+   * gcc.c-torture/compile/bitfield-endian-2.c: New test.
+
 2019-12-14  Jakub Jelinek  
 
PR tree-optimization/92930
Index: gcc.c-torture/compile/bitfield-1.c
===
--- gcc.c-torture/compile/bitfield-1.c  (nonexistent)
+++ gcc.c-torture/compile/bitfield-1.c  (working copy)
@@ -0,0 +1,13 @@
+/* { dg-require-effective-target int128 } */
+
+struct f
+{
+  __uint128_t t:124;
+  __uint128_t t1:4;
+};
+
+struct f g(void)
+{
+  struct f t = {1, 2};
+  return t;
+}
Index: gcc.c-torture/compile/bitfield-endian-1.c
===
--- gcc.c-torture/compile/bitfield-endian-1.c   (nonexistent)
+++ gcc.c-torture/compile/bitfield-endian-1.c   (working copy)
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target int128 } */
+
+#define ENDIAN __attribute((scalar_storage_order ("big-endian")))
+
+typedef struct ENDIAN
+{
+  __uint128_t t:124;
+  __uint128_t t1:4;
+}f;
+
+f g(void)
+{
+  f t = {1, 2};
+  return t;
+}
Index: gcc.c-torture/compile/bitfield-endian-2.c
===
--- gcc.c-torture/compile/bitfield-endian-2.c   (nonexistent)
+++ gcc.c-torture/compile/bitfield-endian-2.c   (working copy)
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target int128 } */
+
+#define ENDIAN __attribute((scalar_storage_order ("little-endian")))
+
+typedef struct ENDIAN
+{
+  __uint128_t t:124;
+  __uint128_t t1:4;
+}f;
+
+f g(void)
+{
+  f t = {1, 2};
+  return t;
+}


Re: [patch] Guard aarch64/aapcs64 tests using abitest.S by check_weak_available

2019-12-16 Thread Andrew Pinski
On Mon, Dec 16, 2019 at 1:25 AM Olivier Hainque  wrote:
>
> Hello,
>
> Some tests from  gcc/testsuite/gcc.target/aarch64/aapcs64
> resort to the abitest.S source, which defines a few weak symbols:
>
>   ...
>   .weak   testfunc
>   .weak   testfunc_ptr
>   .weak   saved_return_address
>
> The attached patch is a proposal to prevent the execution of
> those tests in configurations where weak symbols are advertised
> as not supported.
>
> This cures a significant number of failures on VxWorks and bootstrap
> + regress tests fine on aarch64-linux.
>
> Is this ok to commit ?

Why does VxWorks not have weak symbol support when it is an elf
target?  I can understand it not having support in a loader but these
symbols should all be resolved at build time.

Thanks,
Andrew Pinski

>
> Thanks in advance!
>
> With Kind Regards,
>
> Olivier
>
> 2019-12-16  Joel Brobecker  
> Olivier Hainque  
>
> testsuite/
>
> * gcc.target/aarch64/aapcs64/aapcs64.exp: Guard tests using
> abitest.S by check_weak_available.
>


Re: [PATCH][RFC] Introduce BIT_FIELD_INSERT

2019-12-16 Thread Andrew Pinski
On Thu, Nov 15, 2018 at 12:31 AM Richard Biener  wrote:
>
> On Thu, 15 Nov 2018, Richard Biener wrote:
>
> > On Wed, 14 Nov 2018, Andrew Pinski wrote:
> >
> > > On Fri, May 13, 2016 at 3:51 AM Richard Biener  wrote:
> > > >
> > > >
> > > > The following patch adds BIT_FIELD_INSERT, an operation to
> > > > facilitate doing bitfield inserts on registers (as opposed
> > > > to currently where we'd have a BIT_FIELD_REF store).
> > > >
> > > > Originally this was developed as part of bitfield lowering
> > > > where bitfield stores were lowered into read-modify-write
> > > > cycles and the modify part, instead of doing shifting and masking,
> > > > be kept in a more high-level form to ease combining them.
> > > >
> > > > A second use case (the above is still valid) is vector element
> > > > inserts which we currently can only do via memory or
> > > > by extracting all components and re-building the vector using
> > > > a CONSTRUCTOR.  For this second use case I added code
> > > > re-writing the BIT_FIELD_REF stores the C family FEs produce
> > > > into BIT_FIELD_INSERT when update-address-taken can otherwise
> > > > re-write a decl into SSA form (the testcase shows we miss
> > > > a similar opportunity with the MEM_REF form of a vector insert,
> > > > I plan to fix that for the final submission).
> > > >
> > > > One speciality of BIT_FIELD_INSERT as opposed to BIT_FIELD_REF
> > > > is that the size of the insertion is given implicitely via the
> > > > type size/precision of the value to insert.  That avoids
> > > > introducing ways to have quaternary ops in folding and GIMPLE stmts.
> > > >
> > > > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> > > >
> > > > Richard.
> > > >
> > > > 2011-06-16  Richard Guenther  
> > > >
> > > > PR tree-optimization/29756
> > > > * tree.def (BIT_FIELD_INSERT): New tcc_expression tree code.
> > > > * expr.c (expand_expr_real_2): Handle BIT_FIELD_INSERT.
> > > > * fold-const.c (operand_equal_p): Likewise.
> > > > (fold_ternary_loc): Add constant folding of BIT_FIELD_INSERT.
> > > > * gimplify.c (gimplify_expr): Handle BIT_FIELD_INSERT.
> > > > * tree-inline.c (estimate_operator_cost): Likewise.
> > > > * tree-pretty-print.c (dump_generic_node): Likewise.
> > > > * tree-ssa-operands.c (get_expr_operands): Likewise.
> > > > * cfgexpand.c (expand_debug_expr): Likewise.
> > > > * gimple-pretty-print.c (dump_ternary_rhs): Likewise.
> > > > * gimple.c (get_gimple_rhs_num_ops): Handle BIT_FIELD_INSERT.
> > > > * tree-cfg.c (verify_gimple_assign_ternary): Verify 
> > > > BIT_FIELD_INSERT.
> > > >
> > > > * tree-ssa.c (non_rewritable_lvalue_p): We can rewrite
> > > > vector inserts using BIT_FIELD_REF on the lhs.
> > > > (execute_update_addresses_taken): Do it.
> > > >
> > > > * gcc.dg/tree-ssa/vector-6.c: New testcase.
> > > >
> > > > Index: trunk/gcc/expr.c
> > > > ===
> > > > *** trunk.orig/gcc/expr.c   2016-05-12 13:40:30.704262951 +0200
> > > > --- trunk/gcc/expr.c2016-05-12 15:40:32.481225744 +0200
> > > > *** expand_expr_real_2 (sepops ops, rtx targ
> > > > *** 9358,9363 
> > > > --- 9358,9380 
> > > > target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, 
> > > > target);
> > > > return target;
> > > >
> > > > + case BIT_FIELD_INSERT:
> > > > +   {
> > > > +   unsigned bitpos = tree_to_uhwi (treeop2);
> > > > +   unsigned bitsize;
> > > > +   if (INTEGRAL_TYPE_P (TREE_TYPE (treeop1)))
> > > > + bitsize = TYPE_PRECISION (TREE_TYPE (treeop1));
> > > > +   else
> > > > + bitsize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (treeop1)));
> > > > +   rtx op0 = expand_normal (treeop0);
> > > > +   rtx op1 = expand_normal (treeop1);
> > > > +   rtx dst = gen_reg_rtx (mode);
> > > > +   emit_move_insn (dst, op0);
> > > &g

Re: [PATCH][RFC] Introduce BIT_FIELD_INSERT

2019-12-16 Thread Andrew Pinski
On Mon, Dec 16, 2019 at 6:32 PM Andrew Pinski  wrote:
>
> On Thu, Nov 15, 2018 at 12:31 AM Richard Biener  wrote:
> >
> > On Thu, 15 Nov 2018, Richard Biener wrote:
> >
> > > On Wed, 14 Nov 2018, Andrew Pinski wrote:
> > >
> > > > On Fri, May 13, 2016 at 3:51 AM Richard Biener  
> > > > wrote:
> > > > >
> > > > >
> > > > > The following patch adds BIT_FIELD_INSERT, an operation to
> > > > > facilitate doing bitfield inserts on registers (as opposed
> > > > > to currently where we'd have a BIT_FIELD_REF store).
> > > > >
> > > > > Originally this was developed as part of bitfield lowering
> > > > > where bitfield stores were lowered into read-modify-write
> > > > > cycles and the modify part, instead of doing shifting and masking,
> > > > > be kept in a more high-level form to ease combining them.
> > > > >
> > > > > A second use case (the above is still valid) is vector element
> > > > > inserts which we currently can only do via memory or
> > > > > by extracting all components and re-building the vector using
> > > > > a CONSTRUCTOR.  For this second use case I added code
> > > > > re-writing the BIT_FIELD_REF stores the C family FEs produce
> > > > > into BIT_FIELD_INSERT when update-address-taken can otherwise
> > > > > re-write a decl into SSA form (the testcase shows we miss
> > > > > a similar opportunity with the MEM_REF form of a vector insert,
> > > > > I plan to fix that for the final submission).
> > > > >
> > > > > One speciality of BIT_FIELD_INSERT as opposed to BIT_FIELD_REF
> > > > > is that the size of the insertion is given implicitely via the
> > > > > type size/precision of the value to insert.  That avoids
> > > > > introducing ways to have quaternary ops in folding and GIMPLE stmts.
> > > > >
> > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> > > > >
> > > > > Richard.
> > > > >
> > > > > 2011-06-16  Richard Guenther  
> > > > >
> > > > > PR tree-optimization/29756
> > > > > * tree.def (BIT_FIELD_INSERT): New tcc_expression tree code.
> > > > > * expr.c (expand_expr_real_2): Handle BIT_FIELD_INSERT.
> > > > > * fold-const.c (operand_equal_p): Likewise.
> > > > > (fold_ternary_loc): Add constant folding of BIT_FIELD_INSERT.
> > > > > * gimplify.c (gimplify_expr): Handle BIT_FIELD_INSERT.
> > > > > * tree-inline.c (estimate_operator_cost): Likewise.
> > > > > * tree-pretty-print.c (dump_generic_node): Likewise.
> > > > > * tree-ssa-operands.c (get_expr_operands): Likewise.
> > > > > * cfgexpand.c (expand_debug_expr): Likewise.
> > > > > * gimple-pretty-print.c (dump_ternary_rhs): Likewise.
> > > > > * gimple.c (get_gimple_rhs_num_ops): Handle BIT_FIELD_INSERT.
> > > > > * tree-cfg.c (verify_gimple_assign_ternary): Verify 
> > > > > BIT_FIELD_INSERT.
> > > > >
> > > > > * tree-ssa.c (non_rewritable_lvalue_p): We can rewrite
> > > > > vector inserts using BIT_FIELD_REF on the lhs.
> > > > > (execute_update_addresses_taken): Do it.
> > > > >
> > > > > * gcc.dg/tree-ssa/vector-6.c: New testcase.
> > > > >
> > > > > Index: trunk/gcc/expr.c
> > > > > ===
> > > > > *** trunk.orig/gcc/expr.c   2016-05-12 13:40:30.704262951 +0200
> > > > > --- trunk/gcc/expr.c2016-05-12 15:40:32.481225744 +0200
> > > > > *** expand_expr_real_2 (sepops ops, rtx targ
> > > > > *** 9358,9363 
> > > > > --- 9358,9380 
> > > > > target = expand_vec_cond_expr (type, treeop0, treeop1, 
> > > > > treeop2, target);
> > > > > return target;
> > > > >
> > > > > + case BIT_FIELD_INSERT:
> > > > > +   {
> > > > > +   unsigned bitpos = tree_to_uhwi (treeop2);
> > > > > +   unsigned bitsize;
> > > > > +   if (INTEGRAL_TYPE_P (TREE_TYPE (treeop1)))
> > > >

Re: [PATCH] Fix redundant load missed by fre [tree-optimization 92980]

2019-12-17 Thread Andrew Pinski
On Tue, Dec 17, 2019 at 6:33 PM Hongtao Liu  wrote:
>
> Hi:
>   This patch is to simplify A * C + (-D) -> (A - D/C) * C when C is a
> power of 2 and D mod C == 0.
>   bootstrap and make check is ok.

I don't see why D has to be negative here.


>TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
+ && TYPE_UNSIGNED (TREE_TYPE (@0))

This is the wrong check here.
Use INTEGRAL_TYPE_P .

>+ (plus (mult @0 integer_pow2p@1) INTEGER_CST@2)

 You might want a :s here for the mult and/or plus.

unsigned HOST_WIDE_INT d = tree_to_uhwi (@2);
...
Maybe use wide_int math instead of HOST_WIDE_INT here, then you don't
need the tree_fits_uhwi_p check.

Add a testcase should tests the pattern directly rather than indirectly.

Also we are in stage 3 which means bug fixes only so this might/should
wait until stage 1.

Thanks,
Andrew Pinski

>
> changelog
> gcc/
> * gcc/match.pd (A * C + (-D) = (A - D/C) * C. when C is a
> power of 2 and D mod C == 0): Add new simplification.
>
> gcc/testsuite
> * gcc.dg/pr92980.c: New test.
>
> --
> BR,
> Hongtao


Re: [PATCH] Fix redundant load missed by fre [tree-optimization 92980]

2019-12-18 Thread Andrew Pinski
On Wed, Dec 18, 2019 at 1:18 AM Hongtao Liu  wrote:
>
> On Wed, Dec 18, 2019 at 4:26 PM Segher Boessenkool
>  wrote:
> >
> > On Wed, Dec 18, 2019 at 10:37:11AM +0800, Hongtao Liu wrote:
> > > Hi:
> > >   This patch is to simplify A * C + (-D) -> (A - D/C) * C when C is a
> > > power of 2 and D mod C == 0.
> > >   bootstrap and make check is ok.
> >
> > Why would this be a good idea?  It is not reducing the number of
> > operators or similar?
> >
> It helps VN, so that fre will delete redundant load.

It is basically doing a factoring and undoing an optimization that was
done in the front-end (see pointer_int_sum in c-common.c).
But I think the optimization in the front-end should be removed.  It
dates from 1992, a time when GCC did not anything on the tree level
and there was no GCSE (PRE) and the CSE was limited.

Thanks,
Andrew Pinski


> >
> > Segher
>
>
>
> --
> BR,
> Hongtao


[Committed] Fix testsuite/92998: dupq_1.c and simd-abi-8.c fail on big-endian

2019-12-21 Thread Andrew Pinski
Hi,
  These two testcases fail on big-endian aarch64 Linux targets as they
include arm_neon.h or arm_sve.h which in turns includes stdint.h; this
fails as stdint.h includes stubs.h but the stubs for little-endian
does not exist for big-endian.

This fixes the problem by restricting these tests to little-endian only.

Committed as obvious after a test on aarch64_be-linux-gnu.

Thanks,
Andrew Pinski

testsuite/ChangeLog:
PR testsuite/92998
* gcc.target/aarch64/sve/acle/general/dupq_1.c:
Restrict to aarch64_little_endian only.
* gcc.target/aarch64/torture/simd-abi-8.c:
Likewise.
Index: ChangeLog
===
--- ChangeLog   (revision 279708)
+++ ChangeLog   (working copy)
@@ -1,3 +1,11 @@
+2019-12-21  Andrew Pinski  
+
+   PR testsuite/92998
+   * gcc.target/aarch64/sve/acle/general/dupq_1.c:
+   Restrict to aarch64_little_endian only.
+   * gcc.target/aarch64/torture/simd-abi-8.c:
+   Likewise.
+
 2019-12-22  Maciej W. Rozycki  
 
* lib/gcc-defs.exp (gcc-set-multilib-library-path): Use
Index: gcc.target/aarch64/sve/acle/general/dupq_1.c
===
--- gcc.target/aarch64/sve/acle/general/dupq_1.c(revision 279708)
+++ gcc.target/aarch64/sve/acle/general/dupq_1.c(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mlittle-endian" } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
 
 #include 
 
Index: gcc.target/aarch64/torture/simd-abi-8.c
===
--- gcc.target/aarch64/torture/simd-abi-8.c (revision 279708)
+++ gcc.target/aarch64/torture/simd-abi-8.c (working copy)
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
-/* { dg-options "-std=gnu99 -mlittle-endian" } */
+/* { dg-options "-std=gnu99" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+/* { dg-require-effective-target aarch64_little_endian } */
 
 #include 
 


Re: [PATCH] PR tree-optimization/90836 Missing popcount pattern matching

2019-12-29 Thread Andrew Pinski
On Mon, Oct 7, 2019 at 3:05 AM Richard Biener
 wrote:
>
> On Tue, Oct 1, 2019 at 1:48 PM Dmitrij Pochepko
>  wrote:
> >
> > Hi Richard,
> >
> > I updated patch according to all your comments.
> > Also bootstrapped and tested again on x86_64-pc-linux-gnu and 
> > aarch64-linux-gnu, which took some time.
> >
> > attached v3.
>
> OK.

This introduced PR 93098 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93098 ).

Thanks,
Andrew Pinski

>
> Thanks,
> Richard.
>
> > Thanks,
> > Dmitrij
> >
> > On Thu, Sep 26, 2019 at 09:47:04AM +0200, Richard Biener wrote:
> > > On Tue, Sep 24, 2019 at 5:29 PM Dmitrij Pochepko
> > >  wrote:
> > > >
> > > > Hi,
> > > >
> > > > can anybody take a look at v2?
> > >
> > > +(if (tree_to_uhwi (@4) == 1
> > > + && tree_to_uhwi (@10) == 2 && tree_to_uhwi (@5) == 4
> > >
> > > those will still ICE for large __int128_t constants.  Since you do not 
> > > match
> > > any conversions you should probably restrict the precision of 'type' like
> > > with
> > >(if (TYPE_PRECISION (type) <= 64
> > > && tree_to_uhwi (@4) ...
> > >
> > > likewise tree_to_uhwi will fail for negative constants thus if the
> > > pattern assumes
> > > unsigned you should verify that as well with && TYPE_UNSIGNED  (type).
> > >
> > > Your 'argtype' is simply 'type' so you can elide it.
> > >
> > > +   (switch
> > > +   (if (types_match (argtype, long_long_unsigned_type_node))
> > > + (convert (BUILT_IN_POPCOUNTLL:integer_type_node @0)))
> > > +   (if (types_match (argtype, long_unsigned_type_node))
> > > + (convert (BUILT_IN_POPCOUNTL:integer_type_node @0)))
> > > +   (if (types_match (argtype, unsigned_type_node))
> > > + (convert (BUILT_IN_POPCOUNT:integer_type_node @0)))
> > >
> > > Please test small types first so we can avoid popcountll when long == 
> > > long long
> > > or long == int.  I also wonder if we really want to use the builtins and
> > > check optab availability or if we nowadays should use
> > > direct_internal_fn_supported_p (IFN_POPCOUNT, integer_type_node, type,
> > > OPTIMIZE_FOR_BOTH) and
> > >
> > > (convert (IFN_POPCOUNT:type @0))
> > >
> > > without the switch?
> > >
> > > Thanks,
> > > Richard.
> > >
> > > > Thanks,
> > > > Dmitrij
> > > >
> > > > On Mon, Sep 09, 2019 at 10:03:40PM +0300, Dmitrij Pochepko wrote:
> > > > > Hi all.
> > > > >
> > > > > Please take a look at v2 (attached).
> > > > > I changed patch according to review comments. The same testing was 
> > > > > performed again.
> > > > >
> > > > > Thanks,
> > > > > Dmitrij
> > > > >
> > > > > On Thu, Sep 05, 2019 at 06:34:49PM +0300, Dmitrij Pochepko wrote:
> > > > > > This patch adds matching for Hamming weight (popcount) 
> > > > > > implementation. The following sources:
> > > > > >
> > > > > > int
> > > > > > foo64 (unsigned long long a)
> > > > > > {
> > > > > > unsigned long long b = a;
> > > > > > b -= ((b>>1) & 0xULL);
> > > > > > b = ((b>>2) & 0xULL) + (b & 
> > > > > > 0xULL);
> > > > > > b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL;
> > > > > > b *= 0x0101010101010101ULL;
> > > > > > return (int)(b >> 56);
> > > > > > }
> > > > > >
> > > > > > and
> > > > > >
> > > > > > int
> > > > > > foo32 (unsigned int a)
> > > > > > {
> > > > > > unsigned long b = a;
> > > > > > b -= ((b>>1) & 0xUL);
> > > > > > b = ((b>>2) & 0xUL) + (b & 0xUL);
> > > > > > b = ((b>>4) + b) & 0x0F0F0F0FUL;
> > > > > > b *= 0x01010101UL;
> > > > > > return (int)(b >> 24);
> > > > > > }
> > > > > >

[Committed] Fix libobjc on Windows (PR93099)

2020-01-01 Thread Andrew Pinski
Hi,
  On Windows if DLLL_EXPORT was declared objc_EXPORT was defined to an
empty string.  This is wrong as that would produce common symbols in
the headers; except now -fno-common is the default.
So setting the define to extern is the correct fix and removes the
dependency on having common symbols being in use.

Committed after the bug reported tested the patch for me.

Thanks,
Andrew Pinski

ChangeLog:
* objc/objc-decls.h (objc_EXPORT): Define it to extern for DLL_EXPORT
define case.


Re: [Committed] Fix libobjc on Windows (PR93099)

2020-01-01 Thread Andrew Pinski
On Wed, Jan 1, 2020 at 2:14 PM Andrew Pinski  wrote:
>
> Hi,
>   On Windows if DLLL_EXPORT was declared objc_EXPORT was defined to an
> empty string.  This is wrong as that would produce common symbols in
> the headers; except now -fno-common is the default.
> So setting the define to extern is the correct fix and removes the
> dependency on having common symbols being in use.
>
> Committed after the bug reported tested the patch for me.

This time with the patch attached :).

>
> Thanks,
> Andrew Pinski
>
> ChangeLog:
> * objc/objc-decls.h (objc_EXPORT): Define it to extern for DLL_EXPORT
> define case.
Index: objc/objc-decls.h
===
--- objc/objc-decls.h   (revision 279821)
+++ objc/objc-decls.h   (working copy)
@@ -29,7 +29,7 @@ see the files COPYING3 and COPYING.RUNTI
 #if defined (_WIN32) || defined (__WIN32__) || defined (WIN32)
 
 #  ifdef DLL_EXPORT /* defined by libtool (if required) */
-#define objc_EXPORT 
+#define objc_EXPORT extern
 #define objc_DECLARE
 #  else
 #define objc_EXPORT  extern __declspec(dllimport)


[Committed] Add two testcases for 1bit bit-field

2020-01-02 Thread Andrew Pinski
Hi,
  While working an optimization, the optimization would produce wrong
code but I noticed there was no testcase for that case at all.

Committed as obvious.

Thanks,
Andrew Pinski

ChangeLog:
* gcc.c-torture/execute/bitfld-8.c: New testcase.
* gcc.c-torture/execute/bitfld-9.c: New testcase.
Index: gcc.c-torture/execute/bitfld-8.c
===
--- gcc.c-torture/execute/bitfld-8.c(nonexistent)
+++ gcc.c-torture/execute/bitfld-8.c(working copy)
@@ -0,0 +1,63 @@
+struct mouse_button_str {
+unsigned char left  : 1;
+unsigned char right : 1;
+unsigned char middle: 1;
+} button;
+
+static char fct (struct mouse_button_str newbutton) __attribute__ 
((__noipa__));
+static char
+fct (struct mouse_button_str newbutton)
+{
+  char l = newbutton.left;
+  char r = newbutton.right;
+  char m = newbutton.middle;
+   return l || r || m;
+}
+
+int main(void)
+{
+  struct mouse_button_str newbutton1;
+  newbutton1.left = 1;
+  newbutton1.middle = 1;
+  newbutton1.right = 1;
+  if (!fct (newbutton1))
+__builtin_abort ();
+
+  newbutton1.left = 0;
+  newbutton1.middle = 1;
+  newbutton1.right = 1;
+  if (!fct (newbutton1))
+__builtin_abort ();
+  newbutton1.left = 1;
+  newbutton1.middle = 0;
+  newbutton1.right = 1;
+  if (!fct (newbutton1))
+__builtin_abort ();
+  newbutton1.left = 1;
+  newbutton1.middle = 1;
+  newbutton1.right = 0;
+  if (!fct (newbutton1))
+__builtin_abort ();
+
+
+  newbutton1.left = 1;
+  newbutton1.middle = 0;
+  newbutton1.right = 0;
+  if (!fct (newbutton1))
+__builtin_abort ();
+  newbutton1.left = 0;
+  newbutton1.middle = 1;
+  newbutton1.right = 0;
+  if (!fct (newbutton1))
+__builtin_abort ();
+  newbutton1.left = 0;
+  newbutton1.middle = 0;
+  newbutton1.right = 1;
+  if (!fct (newbutton1))
+__builtin_abort ();
+  newbutton1.left = 0;
+  newbutton1.middle = 0;
+  newbutton1.right = 0;
+  if (fct (newbutton1))
+__builtin_abort ();
+}
\ No newline at end of file
Index: gcc.c-torture/execute/bitfld-9.c
===
--- gcc.c-torture/execute/bitfld-9.c(nonexistent)
+++ gcc.c-torture/execute/bitfld-9.c(working copy)
@@ -0,0 +1,63 @@
+struct mouse_button_str {
+unsigned char left  : 1;
+unsigned char right : 1;
+unsigned char middle: 1;
+} button;
+
+static char fct (struct mouse_button_str newbutton) __attribute__ 
((__noipa__));
+static char
+fct (struct mouse_button_str newbutton)
+{
+  char l = newbutton.left;
+  char r = newbutton.right;
+  char m = newbutton.middle;
+  return l && r && m;
+}
+
+int main(void)
+{
+  struct mouse_button_str newbutton1;
+  newbutton1.left = 1;
+  newbutton1.middle = 1;
+  newbutton1.right = 1;
+  if (!fct (newbutton1))
+__builtin_abort ();
+
+  newbutton1.left = 0;
+  newbutton1.middle = 1;
+  newbutton1.right = 1;
+  if (fct (newbutton1))
+__builtin_abort ();
+  newbutton1.left = 1;
+  newbutton1.middle = 0;
+  newbutton1.right = 1;
+  if (fct (newbutton1))
+__builtin_abort ();
+  newbutton1.left = 1;
+  newbutton1.middle = 1;
+  newbutton1.right = 0;
+  if (fct (newbutton1))
+__builtin_abort ();
+
+
+  newbutton1.left = 1;
+  newbutton1.middle = 0;
+  newbutton1.right = 0;
+  if (fct (newbutton1))
+__builtin_abort ();
+  newbutton1.left = 0;
+  newbutton1.middle = 1;
+  newbutton1.right = 0;
+  if (fct (newbutton1))
+__builtin_abort ();
+  newbutton1.left = 0;
+  newbutton1.middle = 0;
+  newbutton1.right = 1;
+  if (fct (newbutton1))
+__builtin_abort ();
+  newbutton1.left = 0;
+  newbutton1.middle = 0;
+  newbutton1.right = 0;
+  if (fct (newbutton1))
+__builtin_abort ();
+}


[PATCH] Simplify ((A & N) ==/!= CST1) &/| ((A & M) ==/!= CST2)

2020-01-03 Thread Andrew Pinski
Hi,
  This adds the following two simplifcations to match.pd:

((A & N) == CST1) & ((A & M) == CST2)
if (N&M)&CST1 == (N&M)&CST2, then
   (A&(N|M)) == (CST1|CST2)
else
  false
And
((A & N) != CST1) | ((A & M) != CST2)

if (N&M)&CST1 == (N&M)&CST2, then
   (A&(N|M)) != (CST1|CST2)
else
  true

NOTE it adds a check to make sure N&~CST1 and M&~CST2 are zero; that
is non outside bits are set in CST1/CST2; just to make sure we don't
have an ordering issue when doing the simplification.

I added a testcase for majority of cases I could think of, so there
are a total of 29 testcases include.

NOTE It does not solve the original testcase in the bug report though,
because we need to handle (A&integer_pow2p) !=/== 0 as ((A &
integer_pow2p) ==/!= integer_pow2p) which is not done in this patch.
I will implement that in a follow up patch.  NOTE this is a step
forward to be able to remove the fold_truth_andor_1 and
optimize_bit_field_compare from fold-const.c.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Thanks,
Andrew Pinski

ChangeLog:
* match.pd (((A & N) ==/!= CST1) &/| ((A & M) ==/!= CST2)): New pattern.

testsuite/ChangeLog:
* gcc.c-torture/execute/cmpandor-1.c: New testcase
* gcc.c-torture/execute/cmpandor-1.h: New file.
* gcc.dg/tree-ssa/eqand-1.c: New testcase.
* gcc.dg/tree-ssa/eqand-2.c: New testcase.
* gcc.dg/tree-ssa/eqand-3.c: New testcase.
* gcc.dg/tree-ssa/eqand-4.c: New testcase.
* gcc.dg/tree-ssa/eqand-5.c: New testcase.
* gcc.dg/tree-ssa/eqand-6.c: New testcase.
* gcc.dg/tree-ssa/eqand-7.c: New testcase.
* gcc.dg/tree-ssa/eqor-1.c: New testcase.
* gcc.dg/tree-ssa/eqor-2.c: New testcase.
* gcc.dg/tree-ssa/eqor-3.c: New testcase.
* gcc.dg/tree-ssa/eqor-4.c: New testcase.
* gcc.dg/tree-ssa/eqor-5.c: New testcase.
* gcc.dg/tree-ssa/eqor-6.c: New testcase.
* gcc.dg/tree-ssa/eqor-7.c: New testcase.
* gcc.dg/tree-ssa/neand-1.c: New testcase.
* gcc.dg/tree-ssa/neand-2.c: New testcase.
* gcc.dg/tree-ssa/neand-3.c: New testcase.
* gcc.dg/tree-ssa/neand-4.c: New testcase.
* gcc.dg/tree-ssa/neand-5.c: New testcase.
* gcc.dg/tree-ssa/neand-6.c: New testcase.
* gcc.dg/tree-ssa/neand-7.c: New testcase.
* gcc.dg/tree-ssa/neor-1.c: New testcase.
* gcc.dg/tree-ssa/neor-2.c: New testcase.
* gcc.dg/tree-ssa/neor-3.c: New testcase.
* gcc.dg/tree-ssa/neor-4.c: New testcase.
* gcc.dg/tree-ssa/neor-5.c: New testcase.
* gcc.dg/tree-ssa/neor-6.c: New testcase.
* gcc.dg/tree-ssa/neor-7.c: New testcase.
Index: match.pd
===
--- match.pd(revision 279865)
+++ match.pd(working copy)
@@ -807,6 +807,35 @@ (define_operator_list COND_TERNARY
&& TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (TREE_TYPE (@1)))
 (cmp (bit_and @0 (convert @1)) @2
 
+/* Transform ((A & N) ==/!= CST1) &/| ((A & M) ==/!= CST2)
+   if (CST1&~N) == 0 && (CST2&~M) == 0 then
+   if  (N&M)&CST1 != (N&M)&CST2 then
+   false/true
+   else
+   (A&(N|M)) ==/!= (CST1|CST2) */
+(for bitop (bit_and bit_ior)
+ cmp   (eq  ne)
+ (simplify
+  (bitop
+   (cmp (bit_and @0 INTEGER_CST@mask1) INTEGER_CST@CST1)
+   (cmp (bit_and @0 INTEGER_CST@mask2) INTEGER_CST@CST2))
+  (with
+   {
+ tree type1 = TREE_TYPE (@0);
+ wide_int mask1 = wi::to_wide (@mask1);
+ wide_int mask2 = wi::to_wide (@mask2);
+ wide_int newmask = mask1 | mask2;
+ wide_int m = mask1 & mask2;
+ wide_int cst1 = wi::to_wide (@CST1);
+ wide_int cst2 = wi::to_wide (@CST2);
+   }
+   (if (wi::bit_and_not (cst1, mask1) == 0
+&& wi::bit_and_not (cst2, mask2) == 0)
+(if (wi::eq_p (m & cst1, m & cst2))
+ (cmp (bit_and @0 { wide_int_to_tree (type1, newmask); } )
+  { wide_int_to_tree (type1, cst1 | cst2); } )
+ { constant_boolean_node (cmp == NE_EXPR, type); })
+
 /* Fold (A & ~B) - (A & B) into (A ^ B) - B.  */
 (simplify
  (minus (bit_and:cs @0 (bit_not @1)) (bit_and:cs @0 @1))
Index: testsuite/gcc.c-torture/execute/cmpandor-1.c
===
--- testsuite/gcc.c-torture/execute/cmpandor-1.c(nonexistent)
+++ testsuite/gcc.c-torture/execute/cmpandor-1.c(working copy)
@@ -0,0 +1,91 @@
+
+#define NOIPA __attribute__((__noipa__))
+
+
+#define functiondefine(name, cmp, bitop, v1, v1cmp, v2, v2cmp) \
+int name(int a) NOIPA; \
+int name(int a)\
+{  \
+  int b = (a & (v1)) cmp (v1cmp);  \
+  int c = (a & (v2)) cmp (v2cmp);  \
+  return b bitop c;\
+}
+
+
+#define func functiondefine
+#include "cmpandor-1.h"
+#undef func
+
+struct inputsuse
+{
+  const char *nam

[Committed] New bit-field testcases Part 3/N

2020-01-05 Thread Andrew Pinski
Hi,
  I found some more testcases that would cause an internal compiler
error while working on my bit-field lowering pass.  Since there was no
testcase already done, I thought it would be best if it was applied to
the trunk.

Thanks,
Andrew Pinski

testsuite/ChangeLog:
* gcc.c-torture/compile/20200105-1.c: New testcase.
* gcc.c-torture/compile/20200105-2.c: New testcase.
* gcc.c-torture/compile/20200105-3.c: New testcase.
Index: gcc.c-torture/compile/20200105-1.c
===
--- gcc.c-torture/compile/20200105-1.c  (nonexistent)
+++ gcc.c-torture/compile/20200105-1.c  (working copy)
@@ -0,0 +1,12 @@
+struct mouse_button_str {
+unsigned char left  : 1;
+unsigned char right : 1;
+unsigned char middle: 1;
+};
+int g(void)
+{
+  signed char a = 0;
+  struct mouse_button_str *newbutton1 = (struct mouse_button_str*)&a;
+  newbutton1->left = 1;
+  return a;
+}
Index: gcc.c-torture/compile/20200105-2.c
===
--- gcc.c-torture/compile/20200105-2.c  (nonexistent)
+++ gcc.c-torture/compile/20200105-2.c  (working copy)
@@ -0,0 +1,12 @@
+struct mouse_button_str {
+signed char left  : 1;
+signed char right : 1;
+signed char middle: 1;
+};
+int g(void)
+{
+  unsigned char a = 0;
+  struct mouse_button_str *newbutton1 = (struct mouse_button_str*)&a;
+  newbutton1->left = 1;
+  return a;
+}
Index: gcc.c-torture/compile/20200105-3.c
===
--- gcc.c-torture/compile/20200105-3.c  (nonexistent)
+++ gcc.c-torture/compile/20200105-3.c  (working copy)
@@ -0,0 +1,12 @@
+struct mouse_button_str {
+unsigned char left  : 1;
+unsigned char right : 1;
+unsigned char middle: 1;
+};
+int g(void)
+{
+  unsigned char a = 0;
+  struct mouse_button_str *newbutton1 = (struct mouse_button_str*)&a;
+  newbutton1->left = 1;
+  return a;
+}


Re: [patch] relax aarch64 stack-clash tests depedence on alloca.h

2020-01-06 Thread Andrew Pinski
On Mon, Jan 6, 2020 at 2:18 PM Olivier Hainque  wrote:
>
> Hello,
>
> The aarch64 testsuite features a few tests for
> the stack-clash-protection facility, all using "alloca"
> after an #include .
>
> The use of alloca.h causes the tests to fail on
> target systems not providing that header, such as some
> variants of VxWorks.
>
> My understanding is that the tests really depend on
> the alloca functionality, not on alloca.h in particular.
>
> This patch is a proposal to modify the tests to
> use __builtin_alloca instead, as done in some other
> places in the testsuite.
>
> This cures the failures on VxWorks and bootstrap+regtest
> fine on aarch64 linux.
>
> Ok to commit ?

Just one small suggestion:
Instead of:
-  char* pStr = alloca(SIZE);
+  char* pStr = __builtin_alloca(SIZE);

Why not just do:
-#include 
+#define alloca __builtin_alloca

Thanks,
Andrew Pinski


>
> Thanks in advance!
>
> Best Regards,
>
> Olivier
>
>
> 2020-01-06  Olivier Hainque  
> Alexandre Oliva  
>
> * gcc.target/aarch64/stack-check-alloca.h: Remove
> #include alloca.h.
> (f_caller): Use __builtin_alloca instead of alloca.
> * gcc.target/aarch64/stack-check-alloca-1.c: Add
> { dg-require-effective-target alloca }.
> * gcc.target/aarch64/stack-check-alloca-2.c: Likewise.
> * gcc.target/aarch64/stack-check-alloca-3.c: Likewise.
> * gcc.target/aarch64/stack-check-alloca-4.c: Likewise.
> * gcc.target/aarch64/stack-check-alloca-5.c: Likewise.
> * gcc.target/aarch64/stack-check-alloca-6.c: Likewise.
> * gcc.target/aarch64/stack-check-alloca-7.c: Likewise.
> * gcc.target/aarch64/stack-check-alloca-8.c: Likewise.
> * gcc.target/aarch64/stack-check-alloca-9.c: Likewise.
> * gcc.target/aarch64/stack-check-alloca-10.c: Likewise.
>


Re: [PATCH][RFC] Introduce BIT_FIELD_INSERT

2020-01-07 Thread Andrew Pinski
On Mon, Jan 6, 2020 at 11:36 PM Richard Biener  wrote:
>
> On Mon, 16 Dec 2019, Andrew Pinski wrote:
>
> > On Thu, Nov 15, 2018 at 12:31 AM Richard Biener  wrote:
> > >
> > > On Thu, 15 Nov 2018, Richard Biener wrote:
> > >
> > > > On Wed, 14 Nov 2018, Andrew Pinski wrote:
> > > >
> > > > > On Fri, May 13, 2016 at 3:51 AM Richard Biener  
> > > > > wrote:
> > > > > >
> > > > > >
> > > > > > The following patch adds BIT_FIELD_INSERT, an operation to
> > > > > > facilitate doing bitfield inserts on registers (as opposed
> > > > > > to currently where we'd have a BIT_FIELD_REF store).
> > > > > >
> > > > > > Originally this was developed as part of bitfield lowering
> > > > > > where bitfield stores were lowered into read-modify-write
> > > > > > cycles and the modify part, instead of doing shifting and masking,
> > > > > > be kept in a more high-level form to ease combining them.
> > > > > >
> > > > > > A second use case (the above is still valid) is vector element
> > > > > > inserts which we currently can only do via memory or
> > > > > > by extracting all components and re-building the vector using
> > > > > > a CONSTRUCTOR.  For this second use case I added code
> > > > > > re-writing the BIT_FIELD_REF stores the C family FEs produce
> > > > > > into BIT_FIELD_INSERT when update-address-taken can otherwise
> > > > > > re-write a decl into SSA form (the testcase shows we miss
> > > > > > a similar opportunity with the MEM_REF form of a vector insert,
> > > > > > I plan to fix that for the final submission).
> > > > > >
> > > > > > One speciality of BIT_FIELD_INSERT as opposed to BIT_FIELD_REF
> > > > > > is that the size of the insertion is given implicitely via the
> > > > > > type size/precision of the value to insert.  That avoids
> > > > > > introducing ways to have quaternary ops in folding and GIMPLE stmts.
> > > > > >
> > > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> > > > > >
> > > > > > Richard.
> > > > > >
> > > > > > 2011-06-16  Richard Guenther  
> > > > > >
> > > > > > PR tree-optimization/29756
> > > > > > * tree.def (BIT_FIELD_INSERT): New tcc_expression tree code.
> > > > > > * expr.c (expand_expr_real_2): Handle BIT_FIELD_INSERT.
> > > > > > * fold-const.c (operand_equal_p): Likewise.
> > > > > > (fold_ternary_loc): Add constant folding of 
> > > > > > BIT_FIELD_INSERT.
> > > > > > * gimplify.c (gimplify_expr): Handle BIT_FIELD_INSERT.
> > > > > > * tree-inline.c (estimate_operator_cost): Likewise.
> > > > > > * tree-pretty-print.c (dump_generic_node): Likewise.
> > > > > > * tree-ssa-operands.c (get_expr_operands): Likewise.
> > > > > > * cfgexpand.c (expand_debug_expr): Likewise.
> > > > > > * gimple-pretty-print.c (dump_ternary_rhs): Likewise.
> > > > > > * gimple.c (get_gimple_rhs_num_ops): Handle 
> > > > > > BIT_FIELD_INSERT.
> > > > > > * tree-cfg.c (verify_gimple_assign_ternary): Verify 
> > > > > > BIT_FIELD_INSERT.
> > > > > >
> > > > > > * tree-ssa.c (non_rewritable_lvalue_p): We can rewrite
> > > > > > vector inserts using BIT_FIELD_REF on the lhs.
> > > > > > (execute_update_addresses_taken): Do it.
> > > > > >
> > > > > > * gcc.dg/tree-ssa/vector-6.c: New testcase.
> > > > > >
> > > > > > Index: trunk/gcc/expr.c
> > > > > > ===
> > > > > > *** trunk.orig/gcc/expr.c   2016-05-12 13:40:30.704262951 +0200
> > > > > > --- trunk/gcc/expr.c2016-05-12 15:40:32.481225744 +0200
> > > > > > *** expand_expr_real_2 (sepops ops, rtx targ
> > > > > > *** 9358,9363 
> > > > > > --- 9358,9380 
> > > > > > target = expand_vec_con

Re: [PATCH] std::experimental::simd

2020-01-07 Thread Andrew Pinski
On Tue, Jan 7, 2020 at 3:01 AM Matthias Kretz  wrote:
>
> Is there any chance left we can get this done for 10.1? If not, can we please
> get it ready for 10.2 ASAP?
>
> Cheers,
>   Matthias
>
> On Montag, 14. Oktober 2019 14:12:12 CET Matthias Kretz wrote:
> > Let me try again to get this patch ready. It will need a few iterations...
> > This patch is without documentation and testsuite. I can add them on request
> > but would prefer a follow-up patch after getting this one right.
> >
> > I recommend to review starting from simd.h + simd_scalar.h, then
> > simd_builtin.h, simd_x86.h, and simd_fixed_size.h. I sure when we get this
> > far we are a few iterations further.
> >
> > Regarding the license. The license header is currently just a copy from my
> > repo, but we can change it to the libstdc++ license. The paperwork with the
> > FSF is done.

Seems like it would be better if we put the x86 and aarch64/arm
specific parts in their own headers.
Also all of the x86 conversion should be removed as
__builtin_convertvector  is supported now.
libstdc++v3 is only ever supported by the version that comes with the compiler.

Thanks,
Andrew

> >
> >
> >   * include/Makefile.am: Add new header.
> >   * include/Makefile.in: Regenerate.
> >   * include/experimental/simd: New header for Parallelism TS 2.
> >   * include/experimental/bits/simd.h: Definition of the public simd
> >   interfaces and general implementation helpers.
> >   * include/experimental/bits/simd_builtin.h: Implementation of the
> >   _VecBuiltin simd_abi.
> >   * include/experimental/bits/simd_combine.h: Preliminary
> >   implementation of the _Combine simd_abi.
> >   * include/experimental/bits/simd_converter.h: Generic simd
> >   conversions.
> >   * include/experimental/bits/simd_detail.h: Internal macros for the
> >   simd implementation.
> >   * include/experimental/bits/simd_fixed_size.h: Simd fixed_size ABI
> >   specific implementations.
> >   * include/experimental/bits/simd_math.h: Math overloads for simd.
> >   * include/experimental/bits/simd_neon.h: Simd NEON specific
> >   implementations.
> >   * include/experimental/bits/simd_scalar.h: Simd scalar ABI
> >   specific implementations.
> >   * include/experimental/bits/simd_x86.h: Simd x86 specific
> >   implementations.
> >   * include/experimental/bits/simd_x86_conversions.h: x86 specific
> >   conversion optimizations.
>
>
> --
> ──
>  Dr. Matthias Kretz   https://mattkretz.github.io
>  GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
>  std::experimental::simd  https://github.com/VcDevel/std-simd
> ──


Re: [committed] d: Fix gdc -O2 -mavx generates misaligned vmovdqa instruction [PR114171]

2024-03-03 Thread Andrew Pinski
On Sat, Mar 2, 2024 at 5:51 PM Iain Buclaw  wrote:
>
> Hi,
>
> This patch fixes a wrong code issue in the D front-end where lowered
> struct comparisons would reinterpret fields with a different (usually
> bigger) alignment than the original.  Use `build_aligned_type' to
> preserve the alignment when casting away for such comparisons.
>
> Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
> to mainline, and backported to releases/gcc-13, releases/gcc-12, and
> releases/gcc-11.
>
> Regards,
> Iain.
> ---
> PR d/114171
>
> gcc/d/ChangeLog:
>
> * d-codegen.cc (lower_struct_comparison): Keep alignment of original
> type in reinterpret cast for comparison.
>
> gcc/testsuite/ChangeLog:
>
> * gdc.dg/torture/pr114171.d: New test.
> ---
>  gcc/d/d-codegen.cc  |  1 +
>  gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +
>  2 files changed, 30 insertions(+)
>  create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d
>
> diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
> index 5bc233928aa..43d7739f8fc 100644
> --- a/gcc/d/d-codegen.cc
> +++ b/gcc/d/d-codegen.cc
> @@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, 
> StructDeclaration *sd,
>   if (tmode == NULL_TREE)
> tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require 
> ()));
>
> + tmode = build_aligned_type (tmode, TYPE_ALIGN (stype));

You might also need to build a may_alias variant too. Or make sure the
access is using the correct aliasing set.
I have not checked if the D front-end does anything special for
aliasing sets so I am not sure if that is needed or not but I suspect
it is.

Thanks,
Andrew Pinski


>   t1ref = build_vconvert (tmode, t1ref);
>   t2ref = build_vconvert (tmode, t2ref);
>
> diff --git a/gcc/testsuite/gdc.dg/torture/pr114171.d 
> b/gcc/testsuite/gdc.dg/torture/pr114171.d
> new file mode 100644
> index 000..0f9ffcab916
> --- /dev/null
> +++ b/gcc/testsuite/gdc.dg/torture/pr114171.d
> @@ -0,0 +1,29 @@
> +// { dg-do run }
> +// { dg-additional-options "-mavx" { target avx_runtime } }
> +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
> +import gcc.builtins;
> +
> +struct S1
> +{
> +string label;
> +}
> +
> +struct S2
> +{
> +ulong pad;
> +S1 label;
> +}
> +
> +pragma(inline, false)
> +auto newitem()
> +{
> +void *p = __builtin_malloc(S2.sizeof);
> +__builtin_memset(p, 0, S2.sizeof);
> +return cast(S2*) p;
> +}
> +
> +int main()
> +{
> +auto bn = newitem();
> +return bn.label is S1.init ? 0 : 1;
> +}
> --
> 2.40.1
>


Re: [PATCH] testsuite: xfail test for arm

2024-03-09 Thread Andrew Pinski
On Sat, Mar 9, 2024 at 1:07 AM Torbjörn SVENSSON
 wrote:
>
> I don't know if this affects other targets than arm-none-eabi, so I
> used arm-*-*. If you think it should be *-*-* or some other target
> selector, please let me know what to use instead.
>
> Ok for releases/gcc-13?

Most likely should be short_enums instead of arm*-*-* (I think the old
arm non-eabi didn't use short enums) due to the fix
r14-6517-gb7e4a4c626e applies when -fshort-enums is used.
Also if you are adding a dg-bogus to the branch, it might makes sense
to the same to the trunk (obviously without the xfail part).
Also makes sense to add a reference to r14-6517-gb7e4a4c626e to the
dg-bogus in the source too.

Thanks,
Andrew Pinski

>
> --
>
> On arm-none-eabi, the test case fails with
> .../null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c:63:65: warning: 
> converting a packed 'enum obj_type' pointer (alignment 1) to a 'struct 
> connection' pointer (alignment 4) may result in an unaligned pointer value 
> [-Waddress-of-packed-member]
>
> The error was fixed in basepoints/gcc-14-6517-gb7e4a4c626e, but it
> was considered to be a too big change to be backported and thus, the
> failing test is marked xfail in GCC13.
>
> gcc/testsuite/ChangeLog:
> * gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c:
> Added dg-bogus with xfail on offending line for arm-*-*.
>
> Signed-off-by: Torbjörn SVENSSON 
> ---
>  .../null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git 
> a/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
>  
> b/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
> index 2a9c715c32c..461d5f1199c 100644
> --- 
> a/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
> +++ 
> b/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
> @@ -60,7 +60,7 @@ static inline enum obj_type obj_type(const enum obj_type *t)
>  }
>  static inline struct connection *__objt_conn(enum obj_type *t)
>  {
> - return ((struct connection *)(((void *)(t)) - ((long)&((struct connection 
> *)0)->obj_type)));
> + return ((struct connection *)(((void *)(t)) - ((long)&((struct connection 
> *)0)->obj_type))); /* { dg-bogus "may result in an unaligned pointer value" 
> "" { xfail arm-*-* } } */
>  }
>  static inline struct connection *objt_conn(enum obj_type *t)
>  {
> --
> 2.25.1
>


Re: [RFC] [PR tree-optimization/92539] Optimize away tests against invalid pointers

2024-03-10 Thread Andrew Pinski
On Sun, Mar 10, 2024 at 2:04 PM Jeff Law  wrote:
>
> Here's a potential approach to fixing PR92539, a P2 -Warray-bounds false
> positive triggered by loop unrolling.
>
> As I speculated a couple years ago, we could eliminate the comparisons
> against bogus pointers.  Consider:
>
> >[local count: 30530247]:
> >   if (last_12 != &MEM  [(void *)"aa" + 3B])
> > goto ; [54.59%]
> >   else
> > goto ; [45.41%]
>
>
> That's a valid comparison as ISO allows us to generate, but not
> dereference, a pointer one element past the end of the object.
>
> But +4B is a bogus pointer.  So given an EQ comparison against that
> pointer we could always return false and for NE always return true.
>
> VRP and DOM seem to be the most natural choices for this kind of
> optimization on the surface.  However DOM is actually not viable because
> the out-of-bounds pointer warning pass is run at the end of VRP.  So
> we've got to take care of this prior to the end of VRP.
>
>
>
> I haven't done a bootstrap or regression test with this.  But if it
> looks reasonable I can certainly push on it further. I have confirmed it
> does eliminate the tests and shuts up the bogus warning.
>
> The downside is this would also shut up valid warnings if user code did
> this kind of test.
>
> Comments/Suggestions?

ENOPATCH

>
> Jeff


[COMMITTED] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-03-10 Thread Andrew Pinski
The problem here is that merge_truthop_with_opposite_arm would
use the type of the result of the comparison rather than the operands
of the comparison to figure out if we are honoring NaNs.
This fixes that oversight and now we get the correct results in this
case.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/95351

gcc/ChangeLog:

* fold-const.cc (merge_truthop_with_opposite_arm): Use
the type of the operands of the comparison and not the type
of the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/float_opposite_arm-1.c: New test.

Signed-off-by: Andrew Pinski 
---
 gcc/fold-const.cc   |  3 ++-
 gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/float_opposite_arm-1.c

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 43105d20be3..299c22bf391 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -6420,7 +6420,6 @@ static tree
 merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
 bool rhs_only)
 {
-  tree type = TREE_TYPE (cmpop);
   enum tree_code code = TREE_CODE (cmpop);
   enum tree_code truthop_code = TREE_CODE (op);
   tree lhs = TREE_OPERAND (op, 0);
@@ -6436,6 +6435,8 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, 
tree cmpop,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
 return NULL_TREE;
 
+  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
+
   if (rhs_code == truthop_code)
 {
   tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, 
rhs_only);
diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c 
b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
new file mode 100644
index 000..d2dbff35066
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* PR middle-end/95351 */
+
+int Foo(double possiblyNAN, double b, double c)
+{
+return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c));
+}
+
+/* Make sure we don't remove either >/<=  */
+
+/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0" "optimized" 
} } */
+
+/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0" "optimized" 
} } */
-- 
2.43.0



  1   2   3   4   5   6   7   8   9   10   >