Re: [PATCH] Loop unswitching: support gswitch statements.

2021-11-24 Thread Richard Biener via Gcc-patches
On Tue, Nov 23, 2021 at 5:36 PM Martin Liška  wrote:
>
> On 11/23/21 16:20, Martin Liška wrote:
> > Sure, so for e.g. case 1 ... 5 we would need to create a new 
> > unswitch_predicate
> > with 1 <= index && index <= 5 tree predicate (and the corresponding irange 
> > range).
> > Later once we unswitch on it, we should use a special unreachable_flag that 
> > will
> > be used for marking of dead edges (similarly how we fold gconds to 
> > boolean_{false/true}_node.
> > Does it make sense?
>
> I have thought about it more and it's not enough. What we really want is 
> having a irange
> for *each edge* (2 for gconds and multiple for gswitchs). Once we select a 
> unswitch_predicate,
> then we need to fold_range in true/false loop all these iranges. Doing that 
> we can handle situations like:
>
> if (index < 1)
> do_something1
>
> if (index > 2)
> do_something2
>
> switch (index)
> case 1 ... 2:
>   do_something;
> ...
>
> as seen the once we unswitch on 'index < 1' and 'index > 2', then the first 
> case will be taken in the false_edge
> of 'index > 2' loop unswitching.

Hmm.  I'm not sure it needs to be this complicated.  We're basically
evaluating ranges/predicates based
on a fixed set of versioning predicates.  Your implementation created
"predicates" for the to be simplified
conditions but in the end we like to evaluate the actual stmt to
figure the taken/not taken edges.  IIRC
elsewhere Andrew showed a snipped on how to evaluate a stmt with a
given range - not sure if that
was useful enough.  So what I think would be nice if we could somehow
use rangers path query
without an actual CFG.  So we virtuall have

  if (versioning-predicate1)
if (versioning-predicate2)
   ;
   else
  for (;;) // out current loop
{
  ...
  if (condition)
;
 ...
  switch (var)
 {
...
  }
}

and versioning-predicate1 and versioning-predicate2 are not in the IL.
What we'd like
to do is seed the path query with a "virtual" path through the two
predicates to the
entry of the loop and compute_ranges based on those.  Then we like to
use range_of_stmt on 'if (condition)' and 'switch (var)' to determine
not taken edges.
Looking somewhat at the sources it seems like we "simply" need to do what
compute_outgoing_relations does - unfortunately the code lacks comments
so I have no idea what jt_fur_source src (...).register_outgoing_edges does ...

Anyway, for now manually simplifying things is fine but I probably would still
stick to a basic interface that marks not taken outgoing edges of a stmt based
on the set of versioning predicates.

Richard.

>
> Martin


[PATCH] Avoid redundant get_loop_body calls in IVOPTs

2021-11-24 Thread Richard Biener via Gcc-patches
This removes redundant get_loop_body calls in IVOPTs by passing
around the body we're gathering early.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-11-23  Richard Biener  

* tree-ssa-loop-ivopts.c (find_givs): Take loop body as
argument instead of re-computing it.
(find_interesting_uses): Likewise.
(find_induction_variables): Pass through loop body.
(tree_ssa_iv_optimize_loop): Pass down loop body.
---
 gcc/tree-ssa-loop-ivopts.c | 16 ++--
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 5a7fd305d91..4769b65b5d3 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -1462,22 +1462,20 @@ find_givs_in_bb (struct ivopts_data *data, basic_block 
bb)
 /* Finds general ivs.  */
 
 static void
-find_givs (struct ivopts_data *data)
+find_givs (struct ivopts_data *data, basic_block *body)
 {
   class loop *loop = data->current_loop;
-  basic_block *body = get_loop_body_in_dom_order (loop);
   unsigned i;
 
   for (i = 0; i < loop->num_nodes; i++)
 find_givs_in_bb (data, body[i]);
-  free (body);
 }
 
 /* For each ssa name defined in LOOP determines whether it is an induction
variable and if so, its initial value and step.  */
 
 static bool
-find_induction_variables (struct ivopts_data *data)
+find_induction_variables (struct ivopts_data *data, basic_block *body)
 {
   unsigned i;
   bitmap_iterator bi;
@@ -1485,7 +1483,7 @@ find_induction_variables (struct ivopts_data *data)
   if (!find_bivs (data))
 return false;
 
-  find_givs (data);
+  find_givs (data, body);
   mark_bivs (data);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -2736,11 +2734,10 @@ split_address_groups (struct ivopts_data *data)
 /* Finds uses of the induction variables that are interesting.  */
 
 static void
-find_interesting_uses (struct ivopts_data *data)
+find_interesting_uses (struct ivopts_data *data, basic_block *body)
 {
   basic_block bb;
   gimple_stmt_iterator bsi;
-  basic_block *body = get_loop_body (data->current_loop);
   unsigned i;
   edge e;
 
@@ -2760,7 +2757,6 @@ find_interesting_uses (struct ivopts_data *data)
if (!is_gimple_debug (gsi_stmt (bsi)))
  find_interesting_uses_stmt (data, gsi_stmt (bsi));
 }
-  free (body);
 
   split_address_groups (data);
 
@@ -8077,11 +8073,11 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, 
class loop *loop,
 
   /* For each ssa name determines whether it behaves as an induction variable
  in some loop.  */
-  if (!find_induction_variables (data))
+  if (!find_induction_variables (data, body))
 goto finish;
 
   /* Finds interesting uses (item 1).  */
-  find_interesting_uses (data);
+  find_interesting_uses (data, body);
   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
 goto finish;
 
-- 
2.31.1


Update GMP/MPFR/MPC/ISL version in contrib/download_prerequisites (and gcc/infrastructure)

2021-11-24 Thread Tobias Burnus

Note: This change has has no effect on 
https://gcc.gnu.org/install/prerequisites.html,
i.e. the minimal versions remain: GMP 4.3.2, MPFR 3.1.0, MPC 1.0.1, ISL 0.15. 
However,
not all features might work with the minimal version and in the future, the 
minimal
version might change. (There is a Graphite feature in the pipeline for GCC 13 
which
will require a newer ISL, whether by bumping the minimal ISL version or using a 
configure
to disable if older is unclear; cf. 
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/584715.html)



This patch consists of two parts:

* Placing a new version under https://gcc.gnu.org/pub/gcc/infrastructure/
* The attached patch to the download script


Upstream links to the new versions (the first idea was to use 'major-release - 
1'
for greater stability but at the end, only GMP is not the newest):

* https://ftp.gnu.org/gnu/gmp/gmp-6.1.2.tar.bz2
  (stable branch; current would be 6.2.1 – before: 6.1.0)

* https://ftp.gnu.org/gnu/mpfr/mpfr-4.1.0.tar.bz2
  (latest version [as 4.0.2 is too old for MPC 1.1.x] - before: 3.1.6)

* https://ftp.gnu.org/gnu/mpc/mpc-1.2.1.tar.gz
  (latest version - before: 1.0.3)

* https://libisl.sourceforge.io/isl-0.24.tar.bz2
  (latest version - before 0.18)

I did note that the script uses http and not https. I left it that way:
some older systems may not have an up-to-date CA trust list or struggle with
now disabled older crypto algorithms. Given that the hash is checked, I think
that's acceptable and (still) makes sense.

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
Update GMP/MPFR/MPC/ISL version in contrib/download_prerequisites

contrib/
	* download_prerequisites: Update to gmp-6.1.2, mpfr-4.1.0,
	mpc-1.2.1 and isl-0.24.
	* contrib/prerequisites.md5: Update hash.
	* contrib/prerequisites.sha512: Likewise.

diff --git a/contrib/download_prerequisites b/contrib/download_prerequisites
index 11c283ecb1a..689e238dab1 100755
--- a/contrib/download_prerequisites
+++ b/contrib/download_prerequisites
@@ -5,7 +5,7 @@
 # Run this from the top level of the GCC source tree and the GCC build will do
 # the right thing.  Run it with the `--help` option for more information.
 #
-# (C) 2010-2016 Free Software Foundation
+# (C) 2010-2021 Free Software Foundation
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -27,10 +27,10 @@ version='(unversioned)'
 # remember to also update the files `contrib/prerequisites.sha512` and
 # `contrib/prerequisites.md5` with the new checksums.
 
-gmp='gmp-6.1.0.tar.bz2'
-mpfr='mpfr-3.1.6.tar.bz2'
-mpc='mpc-1.0.3.tar.gz'
-isl='isl-0.18.tar.bz2'
+gmp='gmp-6.1.2.tar.bz2'
+mpfr='mpfr-4.1.0.tar.bz2'
+mpc='mpc-1.2.1.tar.gz'
+isl='isl-0.24.tar.bz2'
 
 base_url='http://gcc.gnu.org/pub/gcc/infrastructure/'
 
diff --git a/contrib/prerequisites.md5 b/contrib/prerequisites.md5
index 272f7a429b6..f1703493c76 100644
--- a/contrib/prerequisites.md5
+++ b/contrib/prerequisites.md5
@@ -1,4 +1,4 @@
-86ee6e54ebfc4a90b643a65e402c4048  gmp-6.1.0.tar.bz2
-320c28198def956aeacdb240b46b8969  mpfr-3.1.6.tar.bz2
-d6a1d5f8ddea3abd2cc3e98f58352d26  mpc-1.0.3.tar.gz
-11436d6b205e516635b666090b94ab32  isl-0.18.tar.bz2
+8ddbb26dc3bd4e2302984debba1406a5  gmp-6.1.2.tar.bz2
+44b892bc5a45bafb4294d134e13aad1d  mpfr-4.1.0.tar.bz2
+9f16c976c25bb0f76b50be749cd7a3a8  mpc-1.2.1.tar.gz
+dd2f7b78e118c25bd96134a52aae7f4d  isl-0.24.tar.bz2
diff --git a/contrib/prerequisites.sha512 b/contrib/prerequisites.sha512
index 71601457cfc..5edde13e2f8 100644
--- a/contrib/prerequisites.sha512
+++ b/contrib/prerequisites.sha512
@@ -1,4 +1,4 @@
-3c82aeab9c1596d4da8afac2eec38e429e84f3211e1a572cf8fd2b546493c44c039b922a1133eaaa48bd7f3e11dbe795a384e21ed95cbe3ecc58d7ac02246117  gmp-6.1.0.tar.bz2
-0c310dd7956be527884f8059c195a5aca1042b089d0927ac6341e6310b1250a7059bc61b4dfb76c6ab8b67e440878ca203f72674529bbcb46770ed9b6885  mpfr-3.1.6.tar.bz2
-0028b76df130720c1fad7de937a0d041224806ce5ef76589f19c7b49d956071a683e2f20d154c192a231e69756b19e48208f2889b0c13950ceb7b3cfaf059a43  mpc-1.0.3.tar.gz
-85d0b40f4dbf14cb99d17aa07048cdcab2dc3eb527d2fbb1e84c41b2de5f351025370e57448b63b2b8a8cf8a0843a089c3263f9baee1542d5c2e1cb37ed39d94  isl-0.18.tar.bz2
+268db88447174617f5746d9a6ba2b105940cc1a5e73155eb23b6eedf55f8e7724eda05d161b2de19aca9e794956d226ba9ed6f23124c7c82f7e1872e32b003cf  gmp-6.1.2.tar.bz2
+410208ee0d48474c1c10d3d4a59decd2dfa187064183b09358ec4c4666e34d74383128436b404123b831e585d81a9176b24c7ced9d913967c5fce35d4040a0b4  mpfr-4.1.0.tar.bz2
+3279f813ab37f47fdcc800e4ac5f306417d07f539593ca715876e43e04896e1d5bceccfb288ef2908a3f24b760747d0dbd0392a24b9b341bc3e12082e5c836ee  mpc-1.2.1.tar.gz
+aab3bddbda96b801d0f56d2869f943157aad52a6f6e6a61745edd740234c635c38231af20bc3f1a08d416a5e9

RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]

2021-11-24 Thread Kong, Lingling via Gcc-patches
Hi  Uros,

> BTW: When playing with my patch, I introduced (define_insn "*vec_set_0" 
> ...) to optimize scalar load to a vector. Does ix86_expand_vector_set work OK 
> without this pattern?

Yes, ix86_expand_vector_set could work ok with (define_insn 
"_pinsr"), this insn can optimize scalar load to a 
vector.

Thanks,
Lingling

-Original Message-
From: Uros Bizjak  
Sent: Wednesday, November 24, 2021 3:57 PM
To: Kong, Lingling 
Cc: Liu, Hongtao ; gcc-patches@gcc.gnu.org
Subject: Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert 
_Float16 to SFmode with -mf16c [PR 102811]

On Wed, Nov 24, 2021 at 7:25 AM Kong, Lingling via Gcc-patches 
 wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with 
> -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
> And cleared before conversion, updated  movhi_internal and 
> ix86_can_change_mode_class.

Please fix the above commit message.

>
> OK for master?
>
> gcc/ChangeLog:
>
> PR target/102811
> * config/i386/i386.c (ix86_can_change_mode_class): SSE2 can load 
> 16bit data
> to sse register via pinsrw.

Allow 16bit data in XMM register for SSE2 targets.

> * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for f16c.

... for TARGET_F16C.

> (extendhfdf2): Split extendhf2 into separate extendhfsf2, 
> extendhfdf2.
> extendhfdf only for target_avx512fp16.

Restrict extendhfdf for TARGET_AVX512FP16 only.

> (*extendhf2):rename extendhf2.

Rename from extendhf2.

> (truncsfhf2): Likewise.
> (truncdfhf2): Likewise.
> (*trunc2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> PR target/102811
> * gcc.target/i386/pr90773-21.c: Optimized movhi_internal,
> optimize vmovd + movw to vpextrw.

Also allow pextrw.

> * gcc.target/i386/pr90773-23.c: Ditto.
> * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.

Otherwise LGTM.

BTW: When playing with my patch, I introduced (define_insn "*vec_set_0" 
...) to optimize scalar load to a vector. Does ix86_expand_vector_set work OK 
without this pattern?

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.c|  5 +-
>  gcc/config/i386/i386.md   | 74 +--
>  .../i386/avx512vl-vcvtps2ph-pr102811.c| 11 +++
>  gcc/testsuite/gcc.target/i386/pr90773-21.c|  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-23.c|  2 +-
>  5 files changed, 83 insertions(+), 11 deletions(-)  create mode 
> 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 
> e94efdf39fb..4b813533961 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, 
> machine_mode to,
>  disallow a change to these modes, reload will assume it's ok to
>  drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
>  the vec_dupv4hi pattern.
> -NB: AVX512FP16 supports vmovw which can load 16bit data to sse
> -register.  */
> -  int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 
> 4;
> +NB: SSE2 can load 16bit data to sse register via pinsrw.  */
> +  int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
> +4;
>if (GET_MODE_SIZE (from) < mov_size)
> return false;
>  }
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 
> 6eb9de81921..6ee264f1151 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2525,6 +2525,16 @@
>  case TYPE_SSEMOV:
>return ix86_output_ssemov (insn, operands);
>
> +case TYPE_SSELOG:
> +  if (SSE_REG_P (operands[0]))
> +   return MEM_P (operands[1])
> + ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> + : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> +  else
> +   return MEM_P (operands[1])
> + ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> + : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +
>  case TYPE_MSKLOG:
>if (operands[1] == const0_rtx)
> return "kxorw\t%0, %0, %0";
> @@ -2540,13 +2550,17 @@
>  }
>  }
>[(set (attr "isa")
> -   (cond [(eq_attr "alternative" "9,10,11,12,13")
> - (const_string "avx512fp16")
> +   (cond [(eq_attr "alternative" "9,10,11,12")
> + (const_string "sse2")
> +  (eq_attr "alternative" "13")
> + (const_string "sse4")
>]
>(const_string "*")))
> (set (attr "type")
>   (cond [(eq_attr "alternative" "9,10,11,12,13")
> - (const_string "ssemov")
> + (if_then_else (match_test "TARGET_AVX512FP16")
> +   (const_string "ssemov")
> +   (const_string "sselog"))
> (eq_attr "alternative" "4,5,6,7")
>   (const_string "mskmov")
> (e

[PATCH] c++: Return early in apply_late_template_attributes if there are no late attribs [PR101180]

2021-11-24 Thread Jakub Jelinek via Gcc-patches
On Fri, Nov 19, 2021 at 10:40:50AM -0500, Jason Merrill wrote:
> > Shall we also change the function so that it doesn't call
> > cplus_decl_attributes if late_attrs is NULL [...]?
> 
> Please.

Here it is.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-11-24  Jakub Jelinek  

PR c++/101180
* pt.c (apply_late_template_attributes): Return early if there are no
dependent attributes.

--- gcc/cp/pt.c.jj  2021-11-22 10:07:01.360225139 +0100
+++ gcc/cp/pt.c 2021-11-23 11:23:16.808321905 +0100
@@ -11712,6 +11712,9 @@ apply_late_template_attributes (tree *de
   /* Apply any non-dependent attributes.  */
   *p = nondep;
 
+  if (nondep == attributes)
+return true;
+
   /* And then any dependent ones.  */
   tree late_attrs = NULL_TREE;
   tree *q = &late_attrs;


Jakub



[PATCH] bswap: Fix up symbolic merging for xor and plus [PR103376]

2021-11-24 Thread Jakub Jelinek via Gcc-patches
On Mon, Nov 22, 2021 at 08:39:42AM -, Roger Sayle wrote:
> This patch implements PR tree-optimization/103345 to merge adjacent
> loads when combined with addition or bitwise xor.  The current code
> in gimple-ssa-store-merging.c's find_bswap_or_nop alreay handles ior,
> so that all that's required is to treat PLUS_EXPR and BIT_XOR_EXPR in
> the same way at BIT_IOR_EXPR.

Unfortunately they aren't exactly the same.  They work the same if always
at least one operand (or corresponding byte in it) is known to be 0,
0 | 0 = 0 ^ 0 = 0 + 0 = 0.  But for | also x | x = x for any other x,
so perform_symbolic_merge has been accepting either that at least one
of the bytes is 0 or that both are the same, but that is wrong for ^
and +.

The following patch fixes that by passing through the code of binary
operation and allowing non-zero masked1 == masked2 through only
for BIT_IOR_EXPR.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Thinking more about it, perhaps we could do more for BIT_XOR_EXPR.
We could allow masked1 == masked2 case for it, but would need to
do something different than the
  n->n = n1->n | n2->n;
we do on all the bytes together.
In particular, for masked1 == masked2 if masked1 != 0 (well, for 0
both variants are the same) and masked1 != 0xff we would need to
clear corresponding n->n byte instead of setting it to the input
as x ^ x = 0 (but if we don't know what x and y are, the result is
also don't know).  Now, for plus it is much harder, because not only
for non-zero operands we don't know what the result is, but it can
modify upper bytes as well.  So perhaps only if current's byte
masked1 && masked2 set the resulting byte to 0xff (unknown) iff
the byte above it is 0 and 0, and set that resulting byte to 0xff too.
Also, even for | we could instead of return NULL just set the resulting
byte to 0xff if it is different, perhaps it will be masked off later on.
Ok to handle that incrementally?

2021-11-24  Jakub Jelinek  

PR tree-optimization/103376
* gimple-ssa-store-merging.c (perform_symbolic_merge): Add CODE
argument.  If CODE is not BIT_IOR_EXPR, ensure that one of masked1
or masked2 is 0.
(find_bswap_or_nop_1, find_bswap_or_nop,
imm_store_chain_info::try_coalesce_bswap): Adjust
perform_symbolic_merge callers.

* gcc.c-torture/execute/pr103376.c: New test.

--- gcc/gimple-ssa-store-merging.c.jj   2021-11-23 10:26:30.0 +0100
+++ gcc/gimple-ssa-store-merging.c  2021-11-23 11:49:33.806168782 +0100
@@ -434,14 +434,14 @@ find_bswap_or_nop_load (gimple *stmt, tr
   return true;
 }
 
-/* Compute the symbolic number N representing the result of a bitwise OR on 2
-   symbolic number N1 and N2 whose source statements are respectively
-   SOURCE_STMT1 and SOURCE_STMT2.  */
+/* Compute the symbolic number N representing the result of a bitwise OR,
+   bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
+   are respectively SOURCE_STMT1 and SOURCE_STMT2.  CODE is the operation.  */
 
 gimple *
 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
gimple *source_stmt2, struct symbolic_number *n2,
-   struct symbolic_number *n)
+   struct symbolic_number *n, enum tree_code code)
 {
   int i, size;
   uint64_t mask;
@@ -563,7 +563,9 @@ perform_symbolic_merge (gimple *source_s
 
   masked1 = n1->n & mask;
   masked2 = n2->n & mask;
-  if (masked1 && masked2 && masked1 != masked2)
+  /* For BIT_XOR_EXPR or PLUS_EXPR, at least one of masked1 and masked2
+has to be 0, for BIT_IOR_EXPR x | x is still x.  */
+  if (masked1 && masked2 && (code != BIT_IOR_EXPR || masked1 != masked2))
return NULL;
 }
   n->n = n1->n | n2->n;
@@ -769,7 +771,8 @@ find_bswap_or_nop_1 (gimple *stmt, struc
return NULL;
 
  source_stmt
-   = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
+   = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
+ code);
 
  if (!source_stmt)
return NULL;
@@ -943,7 +946,8 @@ find_bswap_or_nop (gimple *stmt, struct
  else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
return NULL;
  ins_stmt
-   = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n);
+   = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n,
+ BIT_IOR_EXPR);
 
  if (!ins_stmt)
return NULL;
@@ -2881,7 +2885,7 @@ imm_store_chain_info::try_coalesce_bswap
  end = MAX (end, info->bitpos + info->bitsize);
 
  ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
-&this_n, &n);
+&this_n, &n, BIT_IOR_EXPR);
  if (ins_stmt

Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]

2021-11-24 Thread Uros Bizjak via Gcc-patches
On Wed, Nov 24, 2021 at 9:06 AM Kong, Lingling  wrote:
>
> Hi  Uros,
>
> > BTW: When playing with my patch, I introduced (define_insn 
> > "*vec_set_0" ...) to optimize scalar load to a vector. Does 
> > ix86_expand_vector_set work OK without this pattern?
>
> Yes, ix86_expand_vector_set could work ok with (define_insn 
> "_pinsr"), this insn can optimize scalar load to a 
> vector.

Ah, now I remember - this pattern can be used to optimize HI/HF mode
scalar loads in the same way as other "vec_set_0" patterns are
used. It is similar to e.g. VI4F_128 mode vec_set_0 pattern. I
was not able to test it properly without AVX512FP16, but the pattern
is otherwise independent of the proposed patch.

Uros.


Re: Update GMP/MPFR/MPC/ISL version in contrib/download_prerequisites (and gcc/infrastructure)

2021-11-24 Thread Richard Biener via Gcc-patches
On Wed, 24 Nov 2021, Tobias Burnus wrote:

> Note: This change has has no effect on
> https://gcc.gnu.org/install/prerequisites.html,
> i.e. the minimal versions remain: GMP 4.3.2, MPFR 3.1.0, MPC 1.0.1, ISL 0.15.
> However,
> not all features might work with the minimal version and in the future, the
> minimal
> version might change. (There is a Graphite feature in the pipeline for GCC 13
> which
> will require a newer ISL, whether by bumping the minimal ISL version or using
> a configure
> to disable if older is unclear; cf.
> https://gcc.gnu.org/pipermail/gcc-patches/2021-November/584715.html)
> 
> 
> 
> This patch consists of two parts:
> 
> * Placing a new version under https://gcc.gnu.org/pub/gcc/infrastructure/
> * The attached patch to the download script
> 
> 
> Upstream links to the new versions (the first idea was to use 'major-release -
> 1'
> for greater stability but at the end, only GMP is not the newest):
> 
> * https://ftp.gnu.org/gnu/gmp/gmp-6.1.2.tar.bz2
>   (stable branch; current would be 6.2.1 – before: 6.1.0)

I've put 6.2.1 to infrastructure, it isn't a .0 and thus should be OK.

> * https://ftp.gnu.org/gnu/mpfr/mpfr-4.1.0.tar.bz2
>   (latest version [as 4.0.2 is too old for MPC 1.1.x] - before: 3.1.6)
> 
> * https://ftp.gnu.org/gnu/mpc/mpc-1.2.1.tar.gz
>   (latest version - before: 1.0.3)
> 
> * https://libisl.sourceforge.io/isl-0.24.tar.bz2
>   (latest version - before 0.18)

I've put the above to infrastructure.

Can you please update the patch and verify the combination builds
fine in-tree again?

Thanks,
Richard.

> I did note that the script uses http and not https. I left it that way:
> some older systems may not have an up-to-date CA trust list or struggle with
> now disabled older crypto algorithms. Given that the hash is checked, I think
> that's acceptable and (still) makes sense.
> 
> Tobias
> -
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634
> München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas
> Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht
> München, HRB 106955
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)


[PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]

2021-11-24 Thread Kong, Lingling via Gcc-patches
Hi,

vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with 
-mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
Cleared before conversion, updated  movhi_internal and 
ix86_can_change_mode_class. And fixed some commit message.

OK for master?

gcc/ChangeLog:

PR target/102811
* config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data in 
XMM register
for TARGET_SSE2.
* config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C.
(extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only.
(*extendhf2): Rename from extendhf2.
(truncsfhf2): Likewise.
(truncdfhf2): Likewise.
(*trunc2): Likewise.

gcc/testsuite/ChangeLog:

PR target/102811
* gcc.target/i386/pr90773-21.c: Optimize movhi_internal,
also allow pextrw replace vmovd + movw.
* gcc.target/i386/pr90773-23.c: Ditto.
* gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
---
 gcc/config/i386/i386.c|  5 +-
 gcc/config/i386/i386.md   | 74 +--
 .../i386/avx512vl-vcvtps2ph-pr102811.c| 11 +++
 gcc/testsuite/gcc.target/i386/pr90773-21.c|  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-23.c|  2 +-
 5 files changed, 83 insertions(+), 11 deletions(-)  create mode 100644 
gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 
e94efdf39fb..4b813533961 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, 
machine_mode to,
 disallow a change to these modes, reload will assume it's ok to
 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
 the vec_dupv4hi pattern.
-NB: AVX512FP16 supports vmovw which can load 16bit data to sse
-register.  */
-  int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
+NB: SSE2 can load 16bit data to sse register via pinsrw.  */
+  int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 
+4;
   if (GET_MODE_SIZE (from) < mov_size)
return false;
 }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 
6eb9de81921..6ee264f1151 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2525,6 +2525,16 @@
 case TYPE_SSEMOV:
   return ix86_output_ssemov (insn, operands);
 
+case TYPE_SSELOG:
+  if (SSE_REG_P (operands[0]))
+   return MEM_P (operands[1])
+ ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
+ : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+  else
+   return MEM_P (operands[1])
+ ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
+ : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+
 case TYPE_MSKLOG:
   if (operands[1] == const0_rtx)
return "kxorw\t%0, %0, %0";
@@ -2540,13 +2550,17 @@
 }
 }
   [(set (attr "isa")
-   (cond [(eq_attr "alternative" "9,10,11,12,13")
- (const_string "avx512fp16")
+   (cond [(eq_attr "alternative" "9,10,11,12")
+ (const_string "sse2")
+  (eq_attr "alternative" "13")
+ (const_string "sse4")
   ]
   (const_string "*")))
(set (attr "type")
  (cond [(eq_attr "alternative" "9,10,11,12,13")
- (const_string "ssemov")
+ (if_then_else (match_test "TARGET_AVX512FP16")
+   (const_string "ssemov")
+   (const_string "sselog"))
(eq_attr "alternative" "4,5,6,7")
  (const_string "mskmov")
(eq_attr "alternative" "8")
@@ -4574,8 +4588,32 @@
   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
 })
 
-(define_insn "extendhf2"
-  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
+(define_expand "extendhfsf2"
+  [(set (match_operand:SF 0 "register_operand")
+   (float_extend:SF
+ (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+{
+  if (!TARGET_AVX512FP16)
+{
+  rtx res = gen_reg_rtx (V4SFmode);
+  rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
+
+  ix86_expand_vector_set (false, tmp, operands[1], 0);
+  emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
+  emit_move_insn (operands[0], gen_lowpart (SFmode, res));
+  DONE;
+}
+})
+
+(define_expand "extendhfdf2"
+  [(set (match_operand:DF 0 "register_operand")
+   (float_extend:DF
+ (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*extendhf2"
+  [(set (match_operand:MODEF 0 "register_operand" "=v")
 (float_extend:MODEF
  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512FP16"
@@ -4766,7 +4804,31 @@
 
 ;; Conversion from {SF,DF}mode to HFmode.
 
-(define_insn "trunchf2"
+(define_expand "truncsfhf2"
+  [(set (match_o

Re: [PATCH] bswap: Fix up symbolic merging for xor and plus [PR103376]

2021-11-24 Thread Richard Biener via Gcc-patches
On Wed, 24 Nov 2021, Jakub Jelinek wrote:

> On Mon, Nov 22, 2021 at 08:39:42AM -, Roger Sayle wrote:
> > This patch implements PR tree-optimization/103345 to merge adjacent
> > loads when combined with addition or bitwise xor.  The current code
> > in gimple-ssa-store-merging.c's find_bswap_or_nop alreay handles ior,
> > so that all that's required is to treat PLUS_EXPR and BIT_XOR_EXPR in
> > the same way at BIT_IOR_EXPR.
> 
> Unfortunately they aren't exactly the same.  They work the same if always
> at least one operand (or corresponding byte in it) is known to be 0,
> 0 | 0 = 0 ^ 0 = 0 + 0 = 0.  But for | also x | x = x for any other x,
> so perform_symbolic_merge has been accepting either that at least one
> of the bytes is 0 or that both are the same, but that is wrong for ^
> and +.
> 
> The following patch fixes that by passing through the code of binary
> operation and allowing non-zero masked1 == masked2 through only
> for BIT_IOR_EXPR.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> Thinking more about it, perhaps we could do more for BIT_XOR_EXPR.
> We could allow masked1 == masked2 case for it, but would need to
> do something different than the
>   n->n = n1->n | n2->n;
> we do on all the bytes together.
> In particular, for masked1 == masked2 if masked1 != 0 (well, for 0
> both variants are the same) and masked1 != 0xff we would need to
> clear corresponding n->n byte instead of setting it to the input
> as x ^ x = 0 (but if we don't know what x and y are, the result is
> also don't know).  Now, for plus it is much harder, because not only
> for non-zero operands we don't know what the result is, but it can
> modify upper bytes as well.  So perhaps only if current's byte
> masked1 && masked2 set the resulting byte to 0xff (unknown) iff
> the byte above it is 0 and 0, and set that resulting byte to 0xff too.
> Also, even for | we could instead of return NULL just set the resulting
> byte to 0xff if it is different, perhaps it will be masked off later on.
> Ok to handle that incrementally?

Not sure if it is worth the trouble - the XOR handling sounds
straight forward at least.  But sure, the merging routine could
simply be conservatively correct here.

Thanks,
Richard.

> 2021-11-24  Jakub Jelinek  
> 
>   PR tree-optimization/103376
>   * gimple-ssa-store-merging.c (perform_symbolic_merge): Add CODE
>   argument.  If CODE is not BIT_IOR_EXPR, ensure that one of masked1
>   or masked2 is 0.
>   (find_bswap_or_nop_1, find_bswap_or_nop,
>   imm_store_chain_info::try_coalesce_bswap): Adjust
>   perform_symbolic_merge callers.
> 
>   * gcc.c-torture/execute/pr103376.c: New test.
> 
> --- gcc/gimple-ssa-store-merging.c.jj 2021-11-23 10:26:30.0 +0100
> +++ gcc/gimple-ssa-store-merging.c2021-11-23 11:49:33.806168782 +0100
> @@ -434,14 +434,14 @@ find_bswap_or_nop_load (gimple *stmt, tr
>return true;
>  }
>  
> -/* Compute the symbolic number N representing the result of a bitwise OR on 2
> -   symbolic number N1 and N2 whose source statements are respectively
> -   SOURCE_STMT1 and SOURCE_STMT2.  */
> +/* Compute the symbolic number N representing the result of a bitwise OR,
> +   bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
> +   are respectively SOURCE_STMT1 and SOURCE_STMT2.  CODE is the operation.  
> */
>  
>  gimple *
>  perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
>   gimple *source_stmt2, struct symbolic_number *n2,
> - struct symbolic_number *n)
> + struct symbolic_number *n, enum tree_code code)
>  {
>int i, size;
>uint64_t mask;
> @@ -563,7 +563,9 @@ perform_symbolic_merge (gimple *source_s
>  
>masked1 = n1->n & mask;
>masked2 = n2->n & mask;
> -  if (masked1 && masked2 && masked1 != masked2)
> +  /* For BIT_XOR_EXPR or PLUS_EXPR, at least one of masked1 and masked2
> +  has to be 0, for BIT_IOR_EXPR x | x is still x.  */
> +  if (masked1 && masked2 && (code != BIT_IOR_EXPR || masked1 != masked2))
>   return NULL;
>  }
>n->n = n1->n | n2->n;
> @@ -769,7 +771,8 @@ find_bswap_or_nop_1 (gimple *stmt, struc
>   return NULL;
>  
> source_stmt
> - = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
> + = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
> +   code);
>  
> if (!source_stmt)
>   return NULL;
> @@ -943,7 +946,8 @@ find_bswap_or_nop (gimple *stmt, struct
> else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
>   return NULL;
> ins_stmt
> - = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n);
> + = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n,
> +   BIT_IOR_EXPR);
>  
> if (!ins_stmt)
>

[PATCH] attribs: Fix ICEs on attributes starting with _ [PR103365]

2021-11-24 Thread Jakub Jelinek via Gcc-patches
Hi!

As the patch shows, we have quite a few asserts that we don't call
lookup_attribute etc. with attr_name that starts with an underscore,
to make sure nobody is trying to call it with non-canonicalized
attribute name like "__cold__" instead of "cold".
We canonicalize only attributes that start with 2 underscores and end
with 2 underscores though.
Before Marek's patch, that wasn't an issue, we had no attributes like
"_foo" or "__bar_" etc., so lookup_scoped_attribute_spec would
always return NULL for those and we wouldn't try to register them,
look them up etc., just with -Wattributes would warn about them.
But now, as the new testcase shows, users can actually request such
attributes to be ignored, and we ICE for those during
register_scoped_attribute and when that is fixed, ICE later on when
somebody uses those attributes because they will be looked up
to find out that they should be ignored.

So, the following patch instead of or in addition to, depending on
how performance sensitive a particular spot is, checking that
attribute doesn't start with underscore allows attribute
names that start with underscore as long as it doesn't canonicalize
(i.e. doesn't start and end with 2 underscores).
In addition to that, I've noticed lookup_attribute_by_prefix
was calling get_attribute_name twice unnecessarily, and 2 tests
were running in c++98 mode with -std=c++98 -std=c++11 which IMHO
isn't useful because -std=c++11 testing is done too when testing
all language versions.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-11-24  Jakub Jelinek  

PR middle-end/103365
* attribs.h (lookup_attribute): Allow attr_name to start with
underscore, as long as canonicalize_attr_name returns false.
(lookup_attribute_by_prefix): Don't call get_attribute_name twice.
* attribs.c (extract_attribute_substring): Reimplement using
canonicalize_attr_name.
(register_scoped_attribute): Change gcc_assert into
gcc_checking_assert, verify !canonicalize_attr_name rather than
that str.str doesn't start with '_'.

* c-c++-common/Wno-attributes-1.c: Require effective target
c || c++11 and drop dg-additional-options.
* c-c++-common/Wno-attributes-2.c: Likewise.
* c-c++-common/Wno-attributes-4.c: New test.

--- gcc/attribs.h.jj2021-11-22 10:06:42.173498383 +0100
+++ gcc/attribs.h   2021-11-23 23:35:13.757972934 +0100
@@ -188,7 +188,11 @@ is_attribute_p (const char *attr_name, c
 static inline tree
 lookup_attribute (const char *attr_name, tree list)
 {
-  gcc_checking_assert (attr_name[0] != '_');
+  if (CHECKING_P && attr_name[0] != '_')
+{
+  size_t attr_len = strlen (attr_name);
+  gcc_checking_assert (!canonicalize_attr_name (attr_name, attr_len));
+}
   /* In most cases, list is NULL_TREE.  */
   if (list == NULL_TREE)
 return NULL_TREE;
@@ -219,7 +223,8 @@ lookup_attribute_by_prefix (const char *
   size_t attr_len = strlen (attr_name);
   while (list)
{
- size_t ident_len = IDENTIFIER_LENGTH (get_attribute_name (list));
+ tree name = get_attribute_name (list);
+ size_t ident_len = IDENTIFIER_LENGTH (name);
 
  if (attr_len > ident_len)
{
@@ -227,7 +232,7 @@ lookup_attribute_by_prefix (const char *
  continue;
}
 
- const char *p = IDENTIFIER_POINTER (get_attribute_name (list));
+ const char *p = IDENTIFIER_POINTER (name);
  gcc_checking_assert (attr_len == 0 || p[0] != '_');
 
  if (strncmp (attr_name, p, attr_len) == 0)
--- gcc/attribs.c.jj2021-11-22 10:06:42.172498397 +0100
+++ gcc/attribs.c   2021-11-23 14:58:25.076233815 +0100
@@ -115,12 +115,7 @@ static const struct attribute_spec empty
 static void
 extract_attribute_substring (struct substring *str)
 {
-  if (str->length > 4 && str->str[0] == '_' && str->str[1] == '_'
-  && str->str[str->length - 1] == '_' && str->str[str->length - 2] == '_')
-{
-  str->length -= 4;
-  str->str += 2;
-}
+  canonicalize_attr_name (str->str, str->length);
 }
 
 /* Insert an array of attributes ATTRIBUTES into a namespace.  This
@@ -387,7 +382,7 @@ register_scoped_attribute (const struct
 
   /* Attribute names in the table must be in the form 'text' and not
  in the form '__text__'.  */
-  gcc_assert (str.length > 0 && str.str[0] != '_');
+  gcc_checking_assert (!canonicalize_attr_name (str.str, str.length));
 
   slot = name_space->attribute_hash
 ->find_slot_with_hash (&str, substring_hash (str.str, str.length),
--- gcc/testsuite/c-c++-common/Wno-attributes-1.c.jj2021-11-11 
14:35:37.637348034 +0100
+++ gcc/testsuite/c-c++-common/Wno-attributes-1.c   2021-11-23 
15:03:05.426198652 +0100
@@ -1,6 +1,5 @@
 /* PR c++/101940 */
-/* { dg-do compile } */
-/* { dg-additional-options "-std=c++11" { target c++ } } */
+/* { dg-do compile { target { c || c++11 } } } */
 /* { dg-additional-op

Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]

2021-11-24 Thread Uros Bizjak via Gcc-patches
On Wed, Nov 24, 2021 at 9:44 AM Kong, Lingling  wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with 
> -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
> Cleared before conversion, updated  movhi_internal and 
> ix86_can_change_mode_class. And fixed some commit message.
>
> OK for master?

OK, with a small adjustment to ChangeLog.

Thanks,
Uros.

> gcc/ChangeLog:
>
> PR target/102811
> * config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data 
> in XMM register
> for TARGET_SSE2.
> * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C.
> (extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only.
> (*extendhf2): Rename from extendhf2.
> (truncsfhf2): Likewise.
> (truncdfhf2): Likewise.
> (*trunc2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> PR target/102811
> * gcc.target/i386/pr90773-21.c: Optimize movhi_internal,
> also allow pextrw replace vmovd + movw.

Just write:

* gcc.target/i386/pr90773-21.c: Allow pextrw instead of movw.

> * gcc.target/i386/pr90773-23.c: Ditto.
> * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
> ---
>  gcc/config/i386/i386.c|  5 +-
>  gcc/config/i386/i386.md   | 74 +--
>  .../i386/avx512vl-vcvtps2ph-pr102811.c| 11 +++
>  gcc/testsuite/gcc.target/i386/pr90773-21.c|  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-23.c|  2 +-
>  5 files changed, 83 insertions(+), 11 deletions(-)  create mode 100644 
> gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 
> e94efdf39fb..4b813533961 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, 
> machine_mode to,
>  disallow a change to these modes, reload will assume it's ok to
>  drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
>  the vec_dupv4hi pattern.
> -NB: AVX512FP16 supports vmovw which can load 16bit data to sse
> -register.  */
> -  int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 
> 4;
> +NB: SSE2 can load 16bit data to sse register via pinsrw.  */
> +  int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
> +4;
>if (GET_MODE_SIZE (from) < mov_size)
> return false;
>  }
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 
> 6eb9de81921..6ee264f1151 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2525,6 +2525,16 @@
>  case TYPE_SSEMOV:
>return ix86_output_ssemov (insn, operands);
>
> +case TYPE_SSELOG:
> +  if (SSE_REG_P (operands[0]))
> +   return MEM_P (operands[1])
> + ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> + : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> +  else
> +   return MEM_P (operands[1])
> + ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> + : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +
>  case TYPE_MSKLOG:
>if (operands[1] == const0_rtx)
> return "kxorw\t%0, %0, %0";
> @@ -2540,13 +2550,17 @@
>  }
>  }
>[(set (attr "isa")
> -   (cond [(eq_attr "alternative" "9,10,11,12,13")
> - (const_string "avx512fp16")
> +   (cond [(eq_attr "alternative" "9,10,11,12")
> + (const_string "sse2")
> +  (eq_attr "alternative" "13")
> + (const_string "sse4")
>]
>(const_string "*")))
> (set (attr "type")
>   (cond [(eq_attr "alternative" "9,10,11,12,13")
> - (const_string "ssemov")
> + (if_then_else (match_test "TARGET_AVX512FP16")
> +   (const_string "ssemov")
> +   (const_string "sselog"))
> (eq_attr "alternative" "4,5,6,7")
>   (const_string "mskmov")
> (eq_attr "alternative" "8")
> @@ -4574,8 +4588,32 @@
>emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
>  })
>
> -(define_insn "extendhf2"
> -  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
> +(define_expand "extendhfsf2"
> +  [(set (match_operand:SF 0 "register_operand")
> +   (float_extend:SF
> + (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +{
> +  if (!TARGET_AVX512FP16)
> +{
> +  rtx res = gen_reg_rtx (V4SFmode);
> +  rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
> +
> +  ix86_expand_vector_set (false, tmp, operands[1], 0);
> +  emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
> +  emit_move_insn (operands[0], gen_lowpart (SFmode, res));
> +  DONE;
> +}
> +})
> +
> +(define_expand "extendhfdf2"
> +  [(set (match_operand:DF 0 "register_operand")
> +   (float_

Re: [PATCH] attribs: Fix ICEs on attributes starting with _ [PR103365]

2021-11-24 Thread Andrew Pinski via Gcc-patches
On Wed, Nov 24, 2021 at 12:48 AM Jakub Jelinek via Gcc-patches
 wrote:
>
> Hi!
>
> As the patch shows, we have quite a few asserts that we don't call
> lookup_attribute etc. with attr_name that starts with an underscore,
> to make sure nobody is trying to call it with non-canonicalized
> attribute name like "__cold__" instead of "cold".
> We canonicalize only attributes that start with 2 underscores and end
> with 2 underscores though.
> Before Marek's patch, that wasn't an issue, we had no attributes like
> "_foo" or "__bar_" etc., so lookup_scoped_attribute_spec would
> always return NULL for those and we wouldn't try to register them,
> look them up etc., just with -Wattributes would warn about them.
> But now, as the new testcase shows, users can actually request such
> attributes to be ignored, and we ICE for those during
> register_scoped_attribute and when that is fixed, ICE later on when
> somebody uses those attributes because they will be looked up
> to find out that they should be ignored.
>
> So, the following patch instead of or in addition to, depending on
> how performance sensitive a particular spot is, checking that
> attribute doesn't start with underscore allows attribute
> names that start with underscore as long as it doesn't canonicalize
> (i.e. doesn't start and end with 2 underscores).
> In addition to that, I've noticed lookup_attribute_by_prefix
> was calling get_attribute_name twice unnecessarily, and 2 tests
> were running in c++98 mode with -std=c++98 -std=c++11 which IMHO
> isn't useful because -std=c++11 testing is done too when testing
> all language versions.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2021-11-24  Jakub Jelinek  
>
> PR middle-end/103365
> * attribs.h (lookup_attribute): Allow attr_name to start with
> underscore, as long as canonicalize_attr_name returns false.
> (lookup_attribute_by_prefix): Don't call get_attribute_name twice.
> * attribs.c (extract_attribute_substring): Reimplement using
> canonicalize_attr_name.
> (register_scoped_attribute): Change gcc_assert into
> gcc_checking_assert, verify !canonicalize_attr_name rather than
> that str.str doesn't start with '_'.
>
> * c-c++-common/Wno-attributes-1.c: Require effective target
> c || c++11 and drop dg-additional-options.
> * c-c++-common/Wno-attributes-2.c: Likewise.
> * c-c++-common/Wno-attributes-4.c: New test.

Only one comment on the new testcases, you might want to add a
testcase for the option on the command line too.

Thanks,
Andrew Pinski

>
> --- gcc/attribs.h.jj2021-11-22 10:06:42.173498383 +0100
> +++ gcc/attribs.h   2021-11-23 23:35:13.757972934 +0100
> @@ -188,7 +188,11 @@ is_attribute_p (const char *attr_name, c
>  static inline tree
>  lookup_attribute (const char *attr_name, tree list)
>  {
> -  gcc_checking_assert (attr_name[0] != '_');
> +  if (CHECKING_P && attr_name[0] != '_')
> +{
> +  size_t attr_len = strlen (attr_name);
> +  gcc_checking_assert (!canonicalize_attr_name (attr_name, attr_len));
> +}
>/* In most cases, list is NULL_TREE.  */
>if (list == NULL_TREE)
>  return NULL_TREE;
> @@ -219,7 +223,8 @@ lookup_attribute_by_prefix (const char *
>size_t attr_len = strlen (attr_name);
>while (list)
> {
> - size_t ident_len = IDENTIFIER_LENGTH (get_attribute_name (list));
> + tree name = get_attribute_name (list);
> + size_t ident_len = IDENTIFIER_LENGTH (name);
>
>   if (attr_len > ident_len)
> {
> @@ -227,7 +232,7 @@ lookup_attribute_by_prefix (const char *
>   continue;
> }
>
> - const char *p = IDENTIFIER_POINTER (get_attribute_name (list));
> + const char *p = IDENTIFIER_POINTER (name);
>   gcc_checking_assert (attr_len == 0 || p[0] != '_');
>
>   if (strncmp (attr_name, p, attr_len) == 0)
> --- gcc/attribs.c.jj2021-11-22 10:06:42.172498397 +0100
> +++ gcc/attribs.c   2021-11-23 14:58:25.076233815 +0100
> @@ -115,12 +115,7 @@ static const struct attribute_spec empty
>  static void
>  extract_attribute_substring (struct substring *str)
>  {
> -  if (str->length > 4 && str->str[0] == '_' && str->str[1] == '_'
> -  && str->str[str->length - 1] == '_' && str->str[str->length - 2] == 
> '_')
> -{
> -  str->length -= 4;
> -  str->str += 2;
> -}
> +  canonicalize_attr_name (str->str, str->length);
>  }
>
>  /* Insert an array of attributes ATTRIBUTES into a namespace.  This
> @@ -387,7 +382,7 @@ register_scoped_attribute (const struct
>
>/* Attribute names in the table must be in the form 'text' and not
>   in the form '__text__'.  */
> -  gcc_assert (str.length > 0 && str.str[0] != '_');
> +  gcc_checking_assert (!canonicalize_attr_name (str.str, str.length));
>
>slot = name_space->attribute_hash
>  ->find_slot_with_hash (&str, substring_ha

Re: [PATCH] middle-end/103193 - avoid canonicalizing <= and >= to == for floats

2021-11-24 Thread Richard Biener via Gcc-patches
On Mon, Nov 15, 2021 at 12:16 PM Richard Biener via Gcc-patches
 wrote:
>
> This avoids doing aforementioned canoncalization when -ftrapping-math
> is in effect and we honor NaNs.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?

Ping.

>
> Thanks,
> Richard.
>
> 2021-11-15  Richard Biener  
>
> PR middle-end/103193
> * match.pd: Avoid canonicalizing (le/ge @0 @0) to (eq @0 @0)
> with NaNs and -ftrapping-math.
> ---
>  gcc/match.pd | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index a319aefa808..a7f1e56fe2f 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -4629,7 +4629,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>(if (! FLOAT_TYPE_P (TREE_TYPE (@0))
> || ! HONOR_NANS (@0))
> { constant_boolean_node (true, type); }
> -   (if (cmp != EQ_EXPR)
> +   (if (cmp != EQ_EXPR
> +   /* With -ftrapping-math conversion to EQ loses an exception.  */
> +   && (! FLOAT_TYPE_P (TREE_TYPE (@0))
> +   || ! flag_trapping_math))
>  (eq @0 @0)
>  (for cmp (ne gt lt)
>   (simplify
> --
> 2.31.1


Re: [PATCH] attribs: Fix ICEs on attributes starting with _ [PR103365]

2021-11-24 Thread Richard Biener via Gcc-patches
On Wed, 24 Nov 2021, Jakub Jelinek wrote:

> Hi!
> 
> As the patch shows, we have quite a few asserts that we don't call
> lookup_attribute etc. with attr_name that starts with an underscore,
> to make sure nobody is trying to call it with non-canonicalized
> attribute name like "__cold__" instead of "cold".
> We canonicalize only attributes that start with 2 underscores and end
> with 2 underscores though.
> Before Marek's patch, that wasn't an issue, we had no attributes like
> "_foo" or "__bar_" etc., so lookup_scoped_attribute_spec would
> always return NULL for those and we wouldn't try to register them,
> look them up etc., just with -Wattributes would warn about them.
> But now, as the new testcase shows, users can actually request such
> attributes to be ignored, and we ICE for those during
> register_scoped_attribute and when that is fixed, ICE later on when
> somebody uses those attributes because they will be looked up
> to find out that they should be ignored.
> 
> So, the following patch instead of or in addition to, depending on
> how performance sensitive a particular spot is, checking that
> attribute doesn't start with underscore allows attribute
> names that start with underscore as long as it doesn't canonicalize
> (i.e. doesn't start and end with 2 underscores).
> In addition to that, I've noticed lookup_attribute_by_prefix
> was calling get_attribute_name twice unnecessarily, and 2 tests
> were running in c++98 mode with -std=c++98 -std=c++11 which IMHO
> isn't useful because -std=c++11 testing is done too when testing
> all language versions.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2021-11-24  Jakub Jelinek  
> 
>   PR middle-end/103365
>   * attribs.h (lookup_attribute): Allow attr_name to start with
>   underscore, as long as canonicalize_attr_name returns false.
>   (lookup_attribute_by_prefix): Don't call get_attribute_name twice.
>   * attribs.c (extract_attribute_substring): Reimplement using
>   canonicalize_attr_name.
>   (register_scoped_attribute): Change gcc_assert into
>   gcc_checking_assert, verify !canonicalize_attr_name rather than
>   that str.str doesn't start with '_'.
> 
>   * c-c++-common/Wno-attributes-1.c: Require effective target
>   c || c++11 and drop dg-additional-options.
>   * c-c++-common/Wno-attributes-2.c: Likewise.
>   * c-c++-common/Wno-attributes-4.c: New test.
> 
> --- gcc/attribs.h.jj  2021-11-22 10:06:42.173498383 +0100
> +++ gcc/attribs.h 2021-11-23 23:35:13.757972934 +0100
> @@ -188,7 +188,11 @@ is_attribute_p (const char *attr_name, c
>  static inline tree
>  lookup_attribute (const char *attr_name, tree list)
>  {
> -  gcc_checking_assert (attr_name[0] != '_');
> +  if (CHECKING_P && attr_name[0] != '_')
> +{
> +  size_t attr_len = strlen (attr_name);
> +  gcc_checking_assert (!canonicalize_attr_name (attr_name, attr_len));
> +}
>/* In most cases, list is NULL_TREE.  */
>if (list == NULL_TREE)
>  return NULL_TREE;
> @@ -219,7 +223,8 @@ lookup_attribute_by_prefix (const char *
>size_t attr_len = strlen (attr_name);
>while (list)
>   {
> -   size_t ident_len = IDENTIFIER_LENGTH (get_attribute_name (list));
> +   tree name = get_attribute_name (list);
> +   size_t ident_len = IDENTIFIER_LENGTH (name);
>  
> if (attr_len > ident_len)
>   {
> @@ -227,7 +232,7 @@ lookup_attribute_by_prefix (const char *
> continue;
>   }
>  
> -   const char *p = IDENTIFIER_POINTER (get_attribute_name (list));
> +   const char *p = IDENTIFIER_POINTER (name);
> gcc_checking_assert (attr_len == 0 || p[0] != '_');
>  
> if (strncmp (attr_name, p, attr_len) == 0)
> --- gcc/attribs.c.jj  2021-11-22 10:06:42.172498397 +0100
> +++ gcc/attribs.c 2021-11-23 14:58:25.076233815 +0100
> @@ -115,12 +115,7 @@ static const struct attribute_spec empty
>  static void
>  extract_attribute_substring (struct substring *str)
>  {
> -  if (str->length > 4 && str->str[0] == '_' && str->str[1] == '_'
> -  && str->str[str->length - 1] == '_' && str->str[str->length - 2] == 
> '_')
> -{
> -  str->length -= 4;
> -  str->str += 2;
> -}
> +  canonicalize_attr_name (str->str, str->length);
>  }
>  
>  /* Insert an array of attributes ATTRIBUTES into a namespace.  This
> @@ -387,7 +382,7 @@ register_scoped_attribute (const struct
>  
>/* Attribute names in the table must be in the form 'text' and not
>   in the form '__text__'.  */
> -  gcc_assert (str.length > 0 && str.str[0] != '_');
> +  gcc_checking_assert (!canonicalize_attr_name (str.str, str.length));
>  
>slot = name_space->attribute_hash
>->find_slot_with_hash (&str, substring_hash (str.str, str.length),
> --- gcc/testsuite/c-c++-common/Wno-attributes-1.c.jj  2021-11-11 
> 14:35:37.637348034 +0100
> +++ gcc/testsuite/c-c++-common/Wno-attributes-1.c 2021-11-23 
> 15:0

Re: [PATCH] middle-end/103193 - avoid canonicalizing <= and >= to == for floats

2021-11-24 Thread Jakub Jelinek via Gcc-patches
On Wed, Nov 24, 2021 at 10:00:56AM +0100, Richard Biener via Gcc-patches wrote:
> On Mon, Nov 15, 2021 at 12:16 PM Richard Biener via Gcc-patches
>  wrote:
> >
> > This avoids doing aforementioned canoncalization when -ftrapping-math
> > is in effect and we honor NaNs.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> >
> > OK?
> 
> Ping.
> 
> >
> > Thanks,
> > Richard.
> >
> > 2021-11-15  Richard Biener  
> >
> > PR middle-end/103193
> > * match.pd: Avoid canonicalizing (le/ge @0 @0) to (eq @0 @0)
> > with NaNs and -ftrapping-math.

Ok, thanks.

> >  gcc/match.pd | 5 -
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index a319aefa808..a7f1e56fe2f 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -4629,7 +4629,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >(if (! FLOAT_TYPE_P (TREE_TYPE (@0))
> > || ! HONOR_NANS (@0))
> > { constant_boolean_node (true, type); }
> > -   (if (cmp != EQ_EXPR)
> > +   (if (cmp != EQ_EXPR
> > +   /* With -ftrapping-math conversion to EQ loses an exception.  */
> > +   && (! FLOAT_TYPE_P (TREE_TYPE (@0))
> > +   || ! flag_trapping_math))
> >  (eq @0 @0)
> >  (for cmp (ne gt lt)
> >   (simplify
> > --
> > 2.31.1

Jakub



RE: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]

2021-11-24 Thread Kong, Lingling via Gcc-patches
OK, This is the patch I prepare to check in.

-Original Message-
From: Uros Bizjak  
Sent: Wednesday, November 24, 2021 4:49 PM
To: Kong, Lingling 
Cc: Liu, Hongtao ; gcc-patches@gcc.gnu.org
Subject: Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert 
_Float16 to SFmode with -mf16c [PR 102811]

On Wed, Nov 24, 2021 at 9:44 AM Kong, Lingling  wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with 
> -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
> Cleared before conversion, updated  movhi_internal and 
> ix86_can_change_mode_class. And fixed some commit message.
>
> OK for master?

OK, with a small adjustment to ChangeLog.

Thanks,
Uros.

> gcc/ChangeLog:
>
> PR target/102811
> * config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data 
> in XMM register
> for TARGET_SSE2.
> * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C.
> (extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only.
> (*extendhf2): Rename from extendhf2.
> (truncsfhf2): Likewise.
> (truncdfhf2): Likewise.
> (*trunc2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> PR target/102811
> * gcc.target/i386/pr90773-21.c: Optimize movhi_internal,
> also allow pextrw replace vmovd + movw.

Just write:

* gcc.target/i386/pr90773-21.c: Allow pextrw instead of movw.

> * gcc.target/i386/pr90773-23.c: Ditto.
> * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
> ---
>  gcc/config/i386/i386.c|  5 +-
>  gcc/config/i386/i386.md   | 74 +--
>  .../i386/avx512vl-vcvtps2ph-pr102811.c| 11 +++
>  gcc/testsuite/gcc.target/i386/pr90773-21.c|  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-23.c|  2 +-
>  5 files changed, 83 insertions(+), 11 deletions(-)  create mode 
> 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 
> e94efdf39fb..4b813533961 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, 
> machine_mode to,
>  disallow a change to these modes, reload will assume it's ok to
>  drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
>  the vec_dupv4hi pattern.
> -NB: AVX512FP16 supports vmovw which can load 16bit data to sse
> -register.  */
> -  int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 
> 4;
> +NB: SSE2 can load 16bit data to sse register via pinsrw.  */
> +  int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
> +4;
>if (GET_MODE_SIZE (from) < mov_size)
> return false;
>  }
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 
> 6eb9de81921..6ee264f1151 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2525,6 +2525,16 @@
>  case TYPE_SSEMOV:
>return ix86_output_ssemov (insn, operands);
>
> +case TYPE_SSELOG:
> +  if (SSE_REG_P (operands[0]))
> +   return MEM_P (operands[1])
> + ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> + : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> +  else
> +   return MEM_P (operands[1])
> + ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> + : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +
>  case TYPE_MSKLOG:
>if (operands[1] == const0_rtx)
> return "kxorw\t%0, %0, %0";
> @@ -2540,13 +2550,17 @@
>  }
>  }
>[(set (attr "isa")
> -   (cond [(eq_attr "alternative" "9,10,11,12,13")
> - (const_string "avx512fp16")
> +   (cond [(eq_attr "alternative" "9,10,11,12")
> + (const_string "sse2")
> +  (eq_attr "alternative" "13")
> + (const_string "sse4")
>]
>(const_string "*")))
> (set (attr "type")
>   (cond [(eq_attr "alternative" "9,10,11,12,13")
> - (const_string "ssemov")
> + (if_then_else (match_test "TARGET_AVX512FP16")
> +   (const_string "ssemov")
> +   (const_string "sselog"))
> (eq_attr "alternative" "4,5,6,7")
>   (const_string "mskmov")
> (eq_attr "alternative" "8") @@ -4574,8 +4588,32 @@
>emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
>  })
>
> -(define_insn "extendhf2"
> -  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
> +(define_expand "extendhfsf2"
> +  [(set (match_operand:SF 0 "register_operand")
> +   (float_extend:SF
> + (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +{
> +  if (!TARGET_AVX512FP16)
> +{
> +  rtx res = gen_reg_rtx (V4SFmode);
> +  rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
> +
> +  ix86_

Re: [PATCH] attribs: Fix ICEs on attributes starting with _ [PR103365]

2021-11-24 Thread Jakub Jelinek via Gcc-patches
On Wed, Nov 24, 2021 at 12:53:02AM -0800, Andrew Pinski wrote:
> Only one comment on the new testcases, you might want to add a
> testcase for the option on the command line too.

You're right, I've committed the patch with the following
incremental diff in it:

--- gcc/testsuite/c-c++-common/Wno-attributes-4.c   2021-11-23 
15:02:23.348804286 +0100
+++ gcc/testsuite/c-c++-common/Wno-attributes-4.c   2021-11-24 
10:05:20.769192421 +0100
@@ -1,8 +1,7 @@
 /* PR middle-end/103365 */
 /* { dg-do compile { target { c || c++11 } } } */
-
-#pragma GCC diagnostic ignored_attributes "foo::_bar"
-#pragma GCC diagnostic ignored_attributes "_foo::bar"
+/* { dg-additional-options "-Wno-attributes=foo::_bar" } */
+/* { dg-additional-options "-Wno-attributes=_foo::bar" } */
 
 [[foo::_bar]] void foo (void);
 [[_foo::bar]] void bar (void);
--- gcc/testsuite/c-c++-common/Wno-attributes-5.c.jj2021-11-24 
10:04:37.228813482 +0100
+++ gcc/testsuite/c-c++-common/Wno-attributes-5.c   2021-11-24 
10:04:20.254055617 +0100
@@ -0,0 +1,8 @@
+/* PR middle-end/103365 */
+/* { dg-do compile { target { c || c++11 } } } */
+
+#pragma GCC diagnostic ignored_attributes "foo::_bar"
+#pragma GCC diagnostic ignored_attributes "_foo::bar"
+
+[[foo::_bar]] void foo (void);
+[[_foo::bar]] void bar (void);

after testing those tests again with vanilla and patched
compiler.

Jakub



Re: [PATCH 1/2] Split return functionality of get_non_stale_global_range.

2021-11-24 Thread Richard Biener via Gcc-patches
On Tue, Nov 23, 2021 at 6:03 PM Andrew MacLeod via Gcc-patches
 wrote:
>
> This is the first of 2 patches which will reduce the depth of the call
> chain in ranger.
>
> This patch simply splits the functionality of the routine
> get_non_stale_global_range() from a single boolean return to a boolean
> return and a bool reference.
>
> This routine queries the global cache for a value.  If  there is no
> value, it queries the legacy global range and sets it to that value.  If
> there was a value, it checks the temporal cache to see if its current,
> and if it is, returns TRUe plus the range.
>
> If the value is not currrent, or it was set to the legacy global value,
> then the timestamp is marked as "always current" as it indicates a
> calculation is ongoing, and we dont want to trigger any additional
> temporal faults until the calculation is done. And finallt FALSE is
> returned for all these cases.
>
> The second patch in the series wants to disambiguate at the call site
> whether this was a failure due to not being in the global cache, or
> whether it was due to the timestamp being out of date and take different
> actions for each case.   Details in the following note.
>
> This has been Bootstrapped on x86_64-pc-linux-gnu with no regressions.  OK?

OK.

Richard.

> Andrew
>


Re: [PATCH 2/2] PR tree-optimization/103231 - Directly resolve range_of_stmt dependencies.

2021-11-24 Thread Richard Biener via Gcc-patches
On Tue, Nov 23, 2021 at 6:04 PM Andrew MacLeod via Gcc-patches
 wrote:
>
> This is the second patch in the series.
>
> Ranger uses its own API to recursively satisfy dependencies. When
> range_of_stmt is called on _1482 = _1154 + _1177;  it picks up the
> ranges of _1154 and _1177 from it's cache. If those statements have not
> been seen yet, it recursively calls range_of_stmt on each one to resolve
> the answer.  Each main API call can trigger up to 5 other calls to get
> to the next API point:
>
> gimple_ranger::fold_range_internal (...)
> gimple_ranger::range_of_stmt (_1154,...)
> gimple_ranger::range_of_expr (_1154,)
> fold_using_range::range_of_range_op (..)
> fold_using_range::fold_stmt (...)
> gimple_ranger::fold_range_internal (...)
> gimple_ranger::range_of_stmt (_1482,...)
>
> For a normal forward walk, values tend to already be in the cache, but
> when we try to answer a range_on_edge question on a back edge, it can
> trigger a very long series of queries.  I spent some time analyzing
> these patterns, and found that regardless of which API entry point was
> used, ultimately range_of_stmt is invoked in a predictable order to
> initiate the cache values.
>
> This patch implements a dependency resolver which when range_of_stmt
> uses when it is called on something which does not have a cache entry
> yet (thus the disambiguation of the temporal failure vs lack of cache
> entry in the previous patch)
>
> This looks at each operand, and if that operand does not have a cache
> entry, pushes it on a stack.   Names are popped from the stack and
> fold_using_range() is invoked once all the operands have been
> resolved.   When we do get to call fold_using_range::fold_stmt(), we are
> sure the operands are cached and the value will simply be calculated.
> This is ultimately the exact series of events that would have happened
> had the main API been used... except we don't involve the call stack
> anymore for each one.
>
> Well, mostly :-).  For this fix, we only do this with operands of stmts
> which have a range-ops handler.. meaning we do not use the API for
> anything range-ops understands.  We will still use the main API for
> resolving PHIS and other statements as they are encountered.We could
> do this for PHIS as well, but for the most part it was the chains of
> stmts within a block that were causing the vast majority of the issue.
> If we later discover large chains of PHIs are causing issues as well,
> then I can easily add them to this as well.  I avoided them this time
> because there is extra overhead involved in traversing all the PHI
> arguments extra times.  Sticking with range-ops limits us to 2 operands
> to check, and the overhead is very minimal.
>
> I have tested this with PHIs as well and we could just include them
> upfront. The overhead is more than doubled, but the increased compile
> time of a VRP pass is still under 1%.
>
> Bootstrapped on x86_64-pc-linux-gnu with no regressions.  OK?

OK.

Richard.

> Andrew
>
>
>


[PATCH][pushed] jit: Initialize function::m_blocks in ctor

2021-11-24 Thread Martin Liška

This resolves the problem reported here:
https://mail.gnu.org/archive/html/bug-gnu-emacs/2021-11/msg00606.html
https://bugzilla.opensuse.org/show_bug.cgi?id=1192951

I'm going to push it as obvious.

Martin

gcc/jit/ChangeLog:

* jit-playback.c (function): Initialize m_blocks vector.
---
 gcc/jit/jit-playback.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/jit/jit-playback.c b/gcc/jit/jit-playback.c
index 59399dee251..b412eae6aa8 100644
--- a/gcc/jit/jit-playback.c
+++ b/gcc/jit/jit-playback.c
@@ -1484,7 +1484,8 @@ function (context *ctxt,
 : m_ctxt(ctxt),
   m_inner_fndecl (fndecl),
   m_inner_bind_expr (NULL),
-  m_kind (kind)
+  m_kind (kind),
+  m_blocks ()
 {
   if (m_kind != GCC_JIT_FUNCTION_IMPORTED)
 {
--
2.33.1



[committed] openmp: Fix up handling of kind(host) and kind(nohost) in ACCEL_COMPILERs [PR103384]

2021-11-24 Thread Jakub Jelinek via Gcc-patches
Hi!

As the testcase shows, we weren't handling kind(host) and kind(nohost) properly
in the ACCEL_COMPILERs, the code written in there is valid for the host
compiler only, where if we are maybe offloaded, we defer resolution after IPA,
otherwise return 0 for kind(nohost) and accept it for kind(host).  Note,
omp_maybe_offloaded is false after IPA.  If ACCEL_COMPILER is defined, it is
the other way around, but also we know we are after IPA.

Bootstrapped/regtested on x86_64-linux and i686-linux and tested with
offloading to nvptx-none where the new testcase fails without the patch,
committed to trunk.

2021-11-24  Jakub Jelinek  

PR middle-end/103384
gcc/
* omp-general.c (omp_context_selector_matches): For ACCEL_COMPILER,
return 0 for kind(host) and continue for kind(nohost).
libgomp/
* testsuite/libgomp.c/declare-variant-2.c: New test.

--- gcc/omp-general.c.jj2021-10-15 11:59:16.135683948 +0200
+++ gcc/omp-general.c   2021-11-23 15:45:32.761468592 +0100
@@ -1487,16 +1487,22 @@ omp_context_selector_matches (tree ctx)
  continue;
if (!strcmp (prop, "host"))
  {
+#ifdef ACCEL_COMPILER
+   return 0;
+#else
if (omp_maybe_offloaded ())
  ret = -1;
continue;
+#endif
  }
if (!strcmp (prop, "nohost"))
  {
+#ifndef ACCEL_COMPILER
if (omp_maybe_offloaded ())
  ret = -1;
else
  return 0;
+#endif
continue;
  }
int r = 0;
--- libgomp/testsuite/libgomp.c/declare-variant-2.c.jj  2021-11-24 
10:14:11.689619756 +0100
+++ libgomp/testsuite/libgomp.c/declare-variant-2.c 2021-11-24 
10:20:37.956110726 +0100
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+
+#include 
+#include 
+
+void
+foo_host (void)
+{
+  if (!omp_is_initial_device ())
+abort ();
+}
+
+#pragma omp declare variant (foo_host) match (device={kind(host)})
+void
+foo (void)
+{
+  if (omp_is_initial_device ())
+abort ();
+}
+
+void
+bar_nohost (void)
+{
+  if (omp_is_initial_device ())
+abort ();
+}
+
+#pragma omp declare variant (bar_nohost) match (device={kind(nohost)})
+void
+bar (void)
+{
+  if (!omp_is_initial_device ())
+abort ();
+}
+
+int
+main ()
+{
+  #pragma omp target
+  {
+foo ();
+bar ();
+  }
+  return 0;
+}

Jakub



Re: [PATCH 1v2/3][vect] Add main vectorized loop unrolling

2021-11-24 Thread Andre Vieira (lists) via Gcc-patches



On 22/11/2021 12:39, Richard Biener wrote:

+  if (first_loop_vinfo->suggested_unroll_factor > 1)
+{
+  if (LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (first_loop_vinfo))
+   {
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, vect_location,
+"* Re-trying analysis with first vector
mode"
+" %s for epilogue with partial vectors of"
+" unrolled first loop.\n",
+GET_MODE_NAME (vector_modes[0]));
+ mode_i = 0;

and the later done check for bigger VF than main loop - why would
we re-start at 0 rather than at the old mode?  Maybe we want to
remember the iterator value we started at when arriving at the
main loop mode?  So if we analyzed successfully with mode_i == 2,
then sucessfully at mode_i == 4 which suggested an unroll of 2,
re-start at the mode_i we continued after the mode_i == 2
successful analysis?  To just consider the "simple" case of
AVX vs SSE it IMHO doesn't make much sense to succeed with
AVX V4DF, succeed with SSE V2DF and figure it's better than V4DF AVX
but get a suggestion of 2 times unroll and then re-try AVX V4DF
just to re-compute that yes, it's worse than SSE V2DF?  You
are probably thinking of SVE vs ADVSIMD here but do we need to
start at 0?  Adding a comment to the code would be nice.

Thanks,


I was indeed thinking SVE vs Advanced SIMD where we end up having to 
compare different vectorization strategies, which will have different 
costs depending. The hypothetical case, as in I don't think I've come 
across one, is where if we decide to vectorize the main loop for V8QI 
and unroll 2x, yielding a VF of 16, we may then want to then use a 
predicated VNx16QI epilogue. Though the question here is whether it is 
possible for an Advanced SIMD V8QI vectorization to beat V16QI but a SVE 
predicated VNx16QI to beat a VNx8QI for the same loop.  Might be good to 
get Sandiford's opinion on this.


I do think that initially I was more concerned with skipping a VNx8QI 
after selecting a V8QI but I just checked and Advanced SIMD modes are 
listed before SVE for (among others) this reason.


Regards,
Andre



Re: [PATCH v2 0/2] RISC-V: add gcc support for Scalar Cryptography v1.0.0-rc6

2021-11-24 Thread Kito Cheng via Gcc-patches
I would prefer to accept those patchset even with no builtin function
or intrinsic function yet,
this not only add the support of -march option, but also introduce the
predefined macros like __riscv_zk*,
which could be used in *.S file to check if those instructions are
available or not.


On Wed, Nov 24, 2021 at 11:23 AM Palmer Dabbelt  wrote:
>
> [Changing to Jim's new address]
>
> On Mon, 22 Nov 2021 00:19:08 PST (-0800), s...@isrc.iscas.ac.cn wrote:
> > From: SiYu Wu 
> >
> > This patch add gcc backend support for RISC-V Scalar Cryptography
> > Extension (k-ext), including machine description, builtins defines and
> > testcases for each k-ext's subset.
> >
> > A note about Zbkx: The Zbkx should be implemented in bitmanip's Zbp, but
> > since zbp is not included in the bitmanip spec v1.0, and crypto's v1.0
> > release will earlier than bitmanip's next release, so for now we
> > implementing it here.
> >
> > Version logs:
> >
> > v2: As Kito mentions, now this patch only includes the arch string related
> > stuff, the builtins and md changes is not included, waiting for the builtin
> > and intrinsic added to the spec. Also removed the unnecessary patches and 
> > add
> > Changelogs.
>
> I don't think there's anything wrong with what's here, but IMO we should
> hold off on merging until GCC does something with these extensions.
>
> IIUC all this enables is passing "-march=*Zk*" instead of
> "-Wa,-march=*Zk*", and while that is useful I'm worried it'll just make
> more of a headache for users who lose a simple way to detect the
> intrinsics.  IMO forcing users to pass -Wa properly encodes the "GCC
> doesn't support these, but binutils does" scenario pretty sanely, and
> users doing things at this level of complexity should be used to that
> already because it happens somewhat frequently.
>
> I'm not sure if I'm missing some use case this for this, though.
>
> > SiYu Wu (2):
> >   RISC-V: Add option defines for Scalar Cryptography
> >   RISC-V: Add implied defines of Zk, Zkn and Zks
> >
> >  gcc/common/config/riscv/riscv-common.c | 38 +-
> >  gcc/config/riscv/arch-canonicalize | 16 ++-
> >  gcc/config/riscv/riscv-opts.h  | 22 +++
> >  gcc/config/riscv/riscv.opt |  3 ++
> >  4 files changed, 77 insertions(+), 2 deletions(-)


Re: [PATCH][pushed] jit: Initialize function::m_blocks in ctor

2021-11-24 Thread Andrea Corallo via Gcc-patches
Martin Liška  writes:

> This resolves the problem reported here:
> https://mail.gnu.org/archive/html/bug-gnu-emacs/2021-11/msg00606.html
> https://bugzilla.opensuse.org/show_bug.cgi?id=1192951
>
> I'm going to push it as obvious.
>
> Martin

Hi Martin,

thanks for the fix!

Question: that piece of code is there since 2014, should we back-port
the fix as well?

Best Regards

  Andrea


Re: [PATCH] [1/2] arm: Implement cortex-M return signing address codegen

2021-11-24 Thread Andrea Corallo via Gcc-patches
Andrea Corallo via Gcc-patches  writes:

> Hi all,
>
> this patch enables address return signature and verification based on
> Armv8.1-M Pointer Authentication [1].
>
> To sign the return address, we use the PAC R12, LR, SP instruction
> upon function entry.  This is signing LR using SP and storing the
> result in R12.  R12 will be pushed into the stack.
>
> During function epilogue R12 will be popped and AUT R12, LR, SP will
> be used to verify that the content of LR is still valid before return.
>
> Here an example of PAC instrumented function prologue and epilogue:
>
> pac r12, lr, sp
> push{r3, r7, lr}
> push{r12}
> sub sp, sp, #4
> [...] function body
> add sp, sp, #4
> pop {r12}
> pop {r3, r7, lr}
> aut r12, lr, sp
> bx  lr
>
> The patch also takes care of generating a PACBTI instruction in place
> of the sequence BTI+PAC when Branch Target Identification is enabled
> contextually.
>
> These two patches apply on top of Tejas series posted here [2].
>
> Regressioned and arm-linux-gnu aarch64-linux-gnu bootstraped.
>
> Best Regards
>
>   Andrea
>
> [1] 
> 
> [2] 

Ping

Best Regards

  Andrea


Re: [PATCH] implement -Winfinite-recursion [PR88232]

2021-11-24 Thread Thomas Schwinge
Hi!

On 2021-11-09T21:28:43-0700, Martin Sebor via Gcc-patches 
 wrote:
> The attached patch adds support to the middle end for detecting
> infinitely recursive calls.  The warning is controlled by the new
> -Winfinite-recursion option.  The option name is the same as
> Clang's.

Thanks!

> The warning detects a superset of problems detected by Clang
> (based on its tests).  It detects the problem in PR88232
> (the feature request) as well as the one in PR 87742,
> an unrelated problem report that was root-caused to bug due
> to infinite recursion.

... and I'm thus confirming that it also would have caught
PR101204 "infinite recursion in gtype-desc.c since
r12-1801-g7036e9ef462fde8181bece4ac4e03f3aa27204dc":

[...]
gtype-desc.c: In function ‘void gt_pch_nx(int_hash*, gt_pointer_operator, void*)’:
gtype-desc.c:11311:1: error: infinite recursion detected 
[-Werror=infinite-recursion]
11311 | gt_pch_nx (int_hash* x ATTRIBUTE_UNUSED,
  | ^
gtype-desc.c:11315:15: note: recursive call
11315 | gt_pch_nx (&((*x)), op, cookie);
  | ~~^
cc1plus: all warnings being treated as errors
make[3]: *** [gtype-desc.o] Error 1
make[3]: Leaving directory `[...]/build-gcc/gcc'
make[2]: *** [all-stage2-gcc] Error 2
make[2]: Leaving directory `[...]/build-gcc'
make[1]: *** [stage2-bubble] Error 2
make[1]: Leaving directory `[...]/build-gcc'
make: *** [all] Error 2

That's with recent fix-up commit c71cb26a9e841888f52e4bfcaad94c8f8ecb4fdb
"Get rid of infinite recursion for 'typedef' used with GTY-marked
'gcc/diagnostic-spec.h:nowarn_map' [PR101204, PR103157]" temporarily
reverted (and commit f861ed8b29a5eb6164d1ddbcfbb6232dddae713f
"Use 'location_hash' for 'gcc/diagnostic-spec.h:nowarn_map'" as a
prerequisite, too).


Grüße
 Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


'gengtype' (was: Get rid of infinite recursion for 'typedef' used with GTY-marked 'gcc/diagnostic-spec.h:nowarn_map' [PR101204])

2021-11-24 Thread Thomas Schwinge
Hi!

On 2021-11-09T21:52:50-0700, Martin Sebor  wrote:
> Amazing how riddled with bugs this
> gengtype stuff is.

Relevant Mike Stump quote from long ago, that I ran into while
researching 'gengtype' vs. 'typedef',
:

| Think of it this way, imagine you had a really good C parser, but that
| you couldn't reuse any of it to write complex transformational style
| code, but rather, had to re-implement a new C parser from scratch and
| you did it, uhm, what's the right word, expediently.

;-\


Alternative approaches to 'gengtype' have been discussed more than once,
but...


Grüße
 Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH][pushed] jit: Initialize function::m_blocks in ctor

2021-11-24 Thread Martin Liška

On 11/24/21 11:06, Andrea Corallo wrote:

Martin Liška  writes:


This resolves the problem reported here:
https://mail.gnu.org/archive/html/bug-gnu-emacs/2021-11/msg00606.html
https://bugzilla.opensuse.org/show_bug.cgi?id=1192951

I'm going to push it as obvious.

Martin


Hi Martin,

thanks for the fix!


You're welcome.



Question: that piece of code is there since 2014, should we back-port
the fix as well?


I've just pushed to to all active code streams (master, releases/gcc-{9,10,11}).

Cheers,
Martin



Best Regards

   Andrea





Re: [PATCH][pushed] jit: Initialize function::m_blocks in ctor

2021-11-24 Thread Andrea Corallo via Gcc-patches
Martin Liška  writes:

[...]

>> Question: that piece of code is there since 2014, should we
>> back-port
>> the fix as well?
>
> I've just pushed to to all active code streams (master, 
> releases/gcc-{9,10,11}).

Wonderful, thanks again.

  Andrea


Re: [PATCH] Loop unswitching: support gswitch statements.

2021-11-24 Thread Martin Liška

On 11/24/21 09:00, Richard Biener wrote:

On Tue, Nov 23, 2021 at 5:36 PM Martin Liška  wrote:


On 11/23/21 16:20, Martin Liška wrote:

Sure, so for e.g. case 1 ... 5 we would need to create a new unswitch_predicate
with 1 <= index && index <= 5 tree predicate (and the corresponding irange 
range).
Later once we unswitch on it, we should use a special unreachable_flag that will
be used for marking of dead edges (similarly how we fold gconds to 
boolean_{false/true}_node.
Does it make sense?


I have thought about it more and it's not enough. What we really want is having 
a irange
for *each edge* (2 for gconds and multiple for gswitchs). Once we select a 
unswitch_predicate,
then we need to fold_range in true/false loop all these iranges. Doing that we 
can handle situations like:

if (index < 1)
 do_something1

if (index > 2)
 do_something2

switch (index)
 case 1 ... 2:
   do_something;
...

as seen the once we unswitch on 'index < 1' and 'index > 2', then the first 
case will be taken in the false_edge
of 'index > 2' loop unswitching.


Hmm.  I'm not sure it needs to be this complicated.  We're basically
evaluating ranges/predicates based
on a fixed set of versioning predicates.  Your implementation created
"predicates" for the to be simplified
conditions but in the end we like to evaluate the actual stmt to
figure the taken/not taken edges.


Yes.


 IIRC
elsewhere Andrew showed a snipped on how to evaluate a stmt with a
given range - not sure if that


I'm using that. First I isolate a irange from a versioning-predicate with
ranger->range_on_edge and I later combine it with:
fold_range (r, stmt, parent_range).



was useful enough.  So what I think would be nice if we could somehow
use rangers path query
without an actual CFG.  So we virtuall have

   if (versioning-predicate1)
 if (versioning-predicate2)
;
else
   for (;;) // out current loop
 {
   ...
   if (condition)
 ;
  ...
   switch (var)
  {
...
   }
 }

and versioning-predicate1 and versioning-predicate2 are not in the IL.
What we'd like
to do is seed the path query with a "virtual" path through the two
predicates to the
entry of the loop and compute_ranges based on those.


What I can do that via building of a vector of tuple
that would be passed to recursive calls of tree_unswitch_single_loop.
That basically describes which true/false edges are taken for the so far created
versioning-predicates. Right? That should be usable.


Then we like to
use range_of_stmt on 'if (condition)' and 'switch (var)' to determine
not taken edges.


Works for me and we would mark unreachable case BBs with a unreachable_flag
(we can't fold it away as shown in the original patch attempt).


Looking somewhat at the sources it seems like we "simply" need to do what
compute_outgoing_relations does - unfortunately the code lacks comments
so I have no idea what jt_fur_source src (...).register_outgoing_edges does ...

Anyway, for now manually simplifying things is fine but I probably would still
stick to a basic interface that marks not taken outgoing edges of a stmt based
on the set of versioning predicates.


Lemme try working on another version of the patch.

Martin



Richard.



Martin




Re: [PATCH 1v2/3][vect] Add main vectorized loop unrolling

2021-11-24 Thread Richard Biener via Gcc-patches
On Wed, 24 Nov 2021, Andre Vieira (lists) wrote:

> 
> On 22/11/2021 12:39, Richard Biener wrote:
> > +  if (first_loop_vinfo->suggested_unroll_factor > 1)
> > +{
> > +  if (LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (first_loop_vinfo))
> > +   {
> > + if (dump_enabled_p ())
> > +   dump_printf_loc (MSG_NOTE, vect_location,
> > +"* Re-trying analysis with first vector
> > mode"
> > +" %s for epilogue with partial vectors of"
> > +" unrolled first loop.\n",
> > +GET_MODE_NAME (vector_modes[0]));
> > + mode_i = 0;
> >
> > and the later done check for bigger VF than main loop - why would
> > we re-start at 0 rather than at the old mode?  Maybe we want to
> > remember the iterator value we started at when arriving at the
> > main loop mode?  So if we analyzed successfully with mode_i == 2,
> > then sucessfully at mode_i == 4 which suggested an unroll of 2,
> > re-start at the mode_i we continued after the mode_i == 2
> > successful analysis?  To just consider the "simple" case of
> > AVX vs SSE it IMHO doesn't make much sense to succeed with
> > AVX V4DF, succeed with SSE V2DF and figure it's better than V4DF AVX
> > but get a suggestion of 2 times unroll and then re-try AVX V4DF
> > just to re-compute that yes, it's worse than SSE V2DF?  You
> > are probably thinking of SVE vs ADVSIMD here but do we need to
> > start at 0?  Adding a comment to the code would be nice.
> >
> > Thanks,
> 
> I was indeed thinking SVE vs Advanced SIMD where we end up having to compare
> different vectorization strategies, which will have different costs depending.
> The hypothetical case, as in I don't think I've come across one, is where if
> we decide to vectorize the main loop for V8QI and unroll 2x, yielding a VF of
> 16, we may then want to then use a predicated VNx16QI epilogue.

But this isn't the epilogue handling ...

> Though the
> question here is whether it is possible for an Advanced SIMD V8QI
> vectorization to beat V16QI but a SVE predicated VNx16QI to beat a VNx8QI for
> the same loop.  Might be good to get Sandiford's opinion on this.
> 
> I do think that initially I was more concerned with skipping a VNx8QI after
> selecting a V8QI but I just checked and Advanced SIMD modes are listed before
> SVE for (among others) this reason.
> 
> Regards,
> Andre
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)


Re: [PATCH, v2, OpenMP 5.0] Implement relaxation of implicit map vs. existing device mappings (for mainline trunk)

2021-11-24 Thread Thomas Schwinge
Hi!

On 2021-11-06T00:51:59+0800, Chung-Lin Tang  wrote:
> On 2021/6/24 11:55 PM, Jakub Jelinek wrote:
>> On Fri, May 14, 2021 at 09:20:25PM +0800, Chung-Lin Tang wrote:
>>> diff --git a/gcc/gimplify.c b/gcc/gimplify.c
>>> index e790f08b23f..69c4a8e0a0a 100644
>>> --- a/gcc/gimplify.c
>>> +++ b/gcc/gimplify.c
>>> @@ -10374,6 +10374,7 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, 
>>> void *data)
>>>   gcc_unreachable ();
>>> }
>>> OMP_CLAUSE_SET_MAP_KIND (clause, kind);
>>> +  OMP_CLAUSE_MAP_IMPLICIT_P (clause) = 1;
>>> if (DECL_SIZE (decl)
>>>   && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
>>> {

>> Also as Thomas mentioned, it should be restricted to non-OpenACC,

> Agreed, I've adjusted the patch to only to this implicit setting for OpenMP.
> This reduces a lot of the originally needed scan test adjustment for existing 
> OpenACC testcases.

..., but not all, because this piece is still effective:

>>> @@ -10971,9 +10972,15 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, 
>>> gimple_seq body, tree *list_p,
>>> list_p = &OMP_CLAUSE_CHAIN (c);
>>>   }
>>>
>>> -  /* Add in any implicit data sharing.  */
>>> +  /* Add in any implicit data sharing. Implicit clauses are added at the 
>>> start
>>> + of the clause list, but after any non-map clauses.  */
>>> struct gimplify_adjust_omp_clauses_data data;
>>> -  data.list_p = list_p;
>>> +  tree *implicit_add_list_p = orig_list_p;
>>> +  while (*implicit_add_list_p
>>> +&& OMP_CLAUSE_CODE (*implicit_add_list_p) != OMP_CLAUSE_MAP)
>>> +implicit_add_list_p = &OMP_CLAUSE_CHAIN (*implicit_add_list_p);

..., which effects changes such as:

> --- a/gcc/testsuite/c-c++-common/goacc/combined-reduction.c
> +++ b/gcc/testsuite/c-c++-common/goacc/combined-reduction.c

> -/* { dg-final { scan-tree-dump-times "omp target oacc_parallel 
> reduction.+:v1. map.tofrom:v1" 1 "gimple" } } */
> +/* { dg-final { scan-tree-dump-times "omp target oacc_parallel 
> reduction.+:v1. firstprivate.n. map.tofrom:v1" 1 "gimple" } } */

> --- a/gcc/testsuite/c-c++-common/goacc/mdc-1.c
> +++ b/gcc/testsuite/c-c++-common/goacc/mdc-1.c

> -/* { dg-final { scan-tree-dump-times "pragma omp target oacc_parallel 
> map.attach:s.e .bias: 0.. map.tofrom:s .len: 32" 1 "omplower" } } */
> +/* { dg-final { scan-tree-dump-times "pragma omp target oacc_parallel 
> map.tofrom:s .len: 32.. map.attach:s.e .bias: 0.." 1 "omplower" } } */

> --- a/gcc/testsuite/g++.dg/goacc/firstprivate-mappings-1.C
> +++ b/gcc/testsuite/g++.dg/goacc/firstprivate-mappings-1.C

> - { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel 
> map\(from:array_so \[len: 4\]\) firstprivate\(} omplower } }
> + { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel 
> firstprivate\([^)]+\) map\(from:array_so \[len: 4\]\)} omplower } }

..., and you've changed:

> --- a/gcc/testsuite/c-c++-common/goacc/firstprivate-mappings-1.c
> +++ b/gcc/testsuite/c-c++-common/goacc/firstprivate-mappings-1.c
> @@ -419,12 +419,7 @@ vla (int array_li)
>copyout (array_so)
>/* The gimplifier has created an implicit 'firstprivate' clause for the 
> array
>   length.
> - { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel 
> map\(from:array_so \[len: 4\]\) firstprivate\(array_li.[0-9]+\)} omplower { 
> target { ! c++ } } } }
> - { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel 
> map\(from:array_so \[len: 4\]\) firstprivate\(} omplower { target { c++ } } } 
> }
> - (C++ computes an intermediate value, so can't scan for 
> 'firstprivate(array_li)'.)  */
> -  /* For C, non-LP64, the gimplifier has also created a mapping for the array
> - itself; PR90859.
> - { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel 
> map\(from:array_so \[len: 4\]\) firstprivate\(array_li.[0-9]+\) 
> map\(tofrom:\(\*array.[0-9]+\) \[len: D\.[0-9]+\]\) map\(firstprivate:array 
> \[pointer assign, bias: 0\]\) \[} omplower { target { c && { ! lp64 } } } } } 
> */
> + { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel 
> firstprivate\(array_li.[0-9]+\) map\(from:array_so \[len: 4\]\) \[} omplower 
> } } */
>{
>  array_so = sizeof array;
>}

..., however the clauses reordering alone isn't going to fix PR90859
"[OMP] Mappings for VLA different depending on 'target { c && { !  lp64 } }'",
so it's not correct to just remove that testing/documentation here -- this
change gave rise to PR103244
"c-c++-common/goacc/firstprivate-mappings-1.c fails in certain
configurations since g:b7e20480630e3eeb9eed8b3941da3b3f0c22c969".  To
resolve that, and until we properly and deliberately look into also for
OpenACC enabling your "Implement relaxation of implicit map vs. existing
device mappings" (we certainly should!), I've now pushed to master branch
commit fdd34569e7a9fc2b6c638a7ef62b965ed7e832ce "Restore previous OpenACC
implicit data clauses ordering [PR103244]", see attached.


Grüße
 Thomas


-

[PATCH] tree-optimization/103168 - Improve VN of pure function calls

2021-11-24 Thread Richard Biener via Gcc-patches
This improves value-numbering of calls that read memory, calls
to const functions with aggregate arguments and calls to
pure functions where the latter include const functions we
demoted to pure for the fear of interposing with a less
optimized version.  Note that for pure functions we do not
handle functions that access global memory.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

Richard.

2021-11-24  Richard Biener  
Jan Hubicka  

PR tree-optimization/103168
* ipa-modref.h (struct modref_summary): Add load_accesses.
* ipa-modref.c (modref_summary::finalize): Initialize load_accesses.
* tree-ssa-sccvn.c (visit_reference_op_call): Use modref
info to walk the virtual use->def chain to CSE const/pure
function calls possibly reading from memory.

* g++.dg/tree-ssa/pr103168.C: New testcase.
---
 gcc/ipa-modref.c |  17 +++
 gcc/ipa-modref.h |   2 +
 gcc/testsuite/g++.dg/tree-ssa/pr103168.C |  24 +
 gcc/tree-ssa-sccvn.c | 126 +++
 4 files changed, 169 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr103168.C

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index 79d7d774715..923ae6c1dd3 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -721,6 +721,23 @@ modref_summary::finalize (tree fun)
break;
}
 }
+  if (loads->every_base)
+load_accesses = 1;
+  else
+{
+  load_accesses = 0;
+  for (auto base_node : loads->bases)
+   {
+ if (base_node->every_ref)
+   load_accesses++;
+ else
+   for (auto ref_node : base_node->refs)
+ if (ref_node->every_access)
+   load_accesses++;
+ else
+   load_accesses += ref_node->accesses->length ();
+   }
+}
 }
 
 /* Get function summary for FUNC if it exists, return NULL otherwise.  */
diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h
index f868eb6de07..a0247f5449f 100644
--- a/gcc/ipa-modref.h
+++ b/gcc/ipa-modref.h
@@ -53,6 +53,8 @@ struct GTY(()) modref_summary
 
   /* Flags coputed by finalize method.  */
 
+  /* Total number of accesses in loads tree.  */
+  unsigned int load_accesses;
   /* global_memory_read is not set for functions calling functions
  with !binds_to_current_def which, after interposition, may read global
  memory but do nothing useful with it (except for crashing if some
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103168.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr103168.C
new file mode 100644
index 000..82924a3e3ce
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr103168.C
@@ -0,0 +1,24 @@
+// { dg-do compile }
+// { dg-options "-O2 -fdump-tree-fre1-details" }
+
+struct a
+{
+  int a;
+  static __attribute__ ((noinline))
+  int ret (int v) {return v;}
+
+  __attribute__ ((noinline))
+  int inca () {return a++;}
+};
+
+int
+test()
+{
+  struct a av;
+  av.a=1;
+  int val = av.ret (0) + av.inca();
+  av.a=2;
+  return val + av.ret(0) + av.inca();
+}
+
+/* { dg-final { scan-tree-dump-times "Replaced a::ret" 1 "fre1" } } */
diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
index 149674e6a16..d31bf329d2e 100644
--- a/gcc/tree-ssa-sccvn.c
+++ b/gcc/tree-ssa-sccvn.c
@@ -71,6 +71,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-loop-niter.h"
 #include "builtins.h"
 #include "fold-const-call.h"
+#include "ipa-modref-tree.h"
+#include "ipa-modref.h"
 #include "tree-ssa-sccvn.h"
 
 /* This algorithm is based on the SCC algorithm presented by Keith
@@ -5084,12 +5086,136 @@ visit_reference_op_call (tree lhs, gcall *stmt)
   struct vn_reference_s vr1;
   vn_reference_t vnresult = NULL;
   tree vdef = gimple_vdef (stmt);
+  modref_summary *summary;
 
   /* Non-ssa lhs is handled in copy_reference_ops_from_call.  */
   if (lhs && TREE_CODE (lhs) != SSA_NAME)
 lhs = NULL_TREE;
 
   vn_reference_lookup_call (stmt, &vnresult, &vr1);
+
+  /* If the lookup did not succeed for pure functions try to use
+ modref info to find a candidate to CSE to.  */
+  const unsigned accesses_limit = 8;
+  if (!vnresult
+  && !vdef
+  && lhs
+  && gimple_vuse (stmt)
+  && (((summary = get_modref_function_summary (stmt, NULL))
+  && !summary->global_memory_read
+  && summary->load_accesses < accesses_limit)
+ || gimple_call_flags (stmt) & ECF_CONST))
+{
+  /* First search if we can do someting useful and build a
+vector of all loads we have to check.  */
+  bool unknown_memory_access = false;
+  auto_vec accesses;
+  unsigned load_accesses = summary ? summary->load_accesses : 0;
+  if (!unknown_memory_access)
+   /* Add loads done as part of setting up the call arguments.
+  That's also necessary for CONST functions which will
+  not have a modref summary.  */
+   for (unsigned i = 0; i < gimple_call_num_args (stmt); ++

Re: [PATCH] tree-optimization/103168 - Improve VN of pure function calls

2021-11-24 Thread Jan Hubicka via Gcc-patches
> This improves value-numbering of calls that read memory, calls
> to const functions with aggregate arguments and calls to
> pure functions where the latter include const functions we
> demoted to pure for the fear of interposing with a less
> optimized version.  Note that for pure functions we do not
> handle functions that access global memory.

Thank you! I am happy we finally undid some of the pessimization caused
by the interposition panic.  I was wondering if I should try next stage1
start tracking eliminated reads in functions, but that is tricky to do
since things like if (*global_var == *globa_var) is folded already in
frontend.

I was thinking a bit what to do abou global accesses and I think we
still can do something (also next stage1).  

Currently we disambiguate using
  if (stmt_may_clobber_ref_p_1 (def, &ref, true))
where ref is the REF is a ao_ref we built from the summary of STMT and
DEF is another statement.  This is fine but we ignore info we have from
PTA on STMT (the statement we try to optimize).

I we could look at DEF, and try to disambiguate all memory it
writes against STMT using PTA oracle and that would let us to handle
global memory well (we don't need REF for that) because we will work out
that some accesses are not escaping to STMT becaue they are not in
CALLUSED. Somewhat anoying is that we don't have predicate in
tree-ssa-alias for that (stmt_clobber_stmt_p :)

Honza


Re: [PATCH 1/2][GCC] arm: Move arm_simd_info array declaration into header

2021-11-24 Thread Murray Steele via Gcc-patches
On 18/11/2021 15:40, Richard Earnshaw wrote:
> 
> 
> On 16/11/2021 10:14, Murray Steele via Gcc-patches wrote:
>> Hi all,
>>
>> This patch moves the arm_simd_type and arm_type_qualifiers enums, and
>> arm_simd_info struct from arm-builtins.c into arm-builtins.h header.
>>
>> This is a first step towards internalising the type definitions for MVE
>> predicate, vector, and tuple types.  By moving arm_simd_types into a
>> header, we allow future patches to use these type trees externally to
>> arm-builtins.c, which is a crucial step towards developing an MVE
>> intrinsics framework similar to the current SVE implementation.
>>
>> Thanks,
>> Murray
>>
>> gcc/ChangeLog:
>>
>> * config/arm/arm-builtins.c (enum arm_type_qualifiers): Move to
>> arm_builtins.h
>> (enum arm_simd_type): Move to arm-builtins.h
>> (struct arm_simd_type_info): Move to arm-builtins.h
>> * config/arm/arm-builtins.h (enum arm_simd_type): Move from
>> arm-builtins.c
>> (enum arm_type_qualifiers): Move from arm-builtins.c
>> (struct arm_simd_type_info): Move from arm-builtins.c
>>
>>
>>
> 
> OK.
> 
> R.

Hi Richard,

I don't currently have write access, so I will need this patch committed on my 
behalf.

Thanks again,
Murray


Re: [PATCH 1/2][GCC] arm: Move arm_simd_info array declaration into header

2021-11-24 Thread Richard Earnshaw via Gcc-patches




On 24/11/2021 12:15, Murray Steele wrote:

On 18/11/2021 15:40, Richard Earnshaw wrote:



On 16/11/2021 10:14, Murray Steele via Gcc-patches wrote:

Hi all,

This patch moves the arm_simd_type and arm_type_qualifiers enums, and
arm_simd_info struct from arm-builtins.c into arm-builtins.h header.

This is a first step towards internalising the type definitions for MVE
predicate, vector, and tuple types.  By moving arm_simd_types into a
header, we allow future patches to use these type trees externally to
arm-builtins.c, which is a crucial step towards developing an MVE
intrinsics framework similar to the current SVE implementation.

Thanks,
Murray

gcc/ChangeLog:

 * config/arm/arm-builtins.c (enum arm_type_qualifiers): Move to
 arm_builtins.h
 (enum arm_simd_type): Move to arm-builtins.h
 (struct arm_simd_type_info): Move to arm-builtins.h
 * config/arm/arm-builtins.h (enum arm_simd_type): Move from
 arm-builtins.c
 (enum arm_type_qualifiers): Move from arm-builtins.c
 (struct arm_simd_type_info): Move from arm-builtins.c





OK.

R.


Hi Richard,

I don't currently have write access, so I will need this patch committed on my 
behalf.

Thanks again,
Murray



That can be done when 2/2 patch has been resolved.  They need to go in 
together.


R.


Re: [PATCH] tree-optimization/103168 - Improve VN of pure function calls

2021-11-24 Thread Richard Biener via Gcc-patches
On Wed, 24 Nov 2021, Jan Hubicka wrote:

> > This improves value-numbering of calls that read memory, calls
> > to const functions with aggregate arguments and calls to
> > pure functions where the latter include const functions we
> > demoted to pure for the fear of interposing with a less
> > optimized version.  Note that for pure functions we do not
> > handle functions that access global memory.
> 
> Thank you! I am happy we finally undid some of the pessimization caused
> by the interposition panic.  I was wondering if I should try next stage1
> start tracking eliminated reads in functions, but that is tricky to do
> since things like if (*global_var == *globa_var) is folded already in
> frontend.
> 
> I was thinking a bit what to do abou global accesses and I think we
> still can do something (also next stage1).  
> 
> Currently we disambiguate using
>   if (stmt_may_clobber_ref_p_1 (def, &ref, true))
> where ref is the REF is a ao_ref we built from the summary of STMT and
> DEF is another statement.  This is fine but we ignore info we have from
> PTA on STMT (the statement we try to optimize).
> 
> I we could look at DEF, and try to disambiguate all memory it
> writes against STMT using PTA oracle and that would let us to handle
> global memory well (we don't need REF for that) because we will work out
> that some accesses are not escaping to STMT becaue they are not in
> CALLUSED. Somewhat anoying is that we don't have predicate in
> tree-ssa-alias for that (stmt_clobber_stmt_p :)

Yes, note that we don't have callused unless IPA PTA is enabled,
but it might be salveagable from IPA reference info?  What we're
missing is a stmt_clobbers_pt_solution_p, or rather a reasonably
cheap way to construct an ao_ref covering all of a points-to
solution.  The not-so-cheap way to do that is

  tree tem = make_ssa_name (ptr_type_node);
  ptr_info_def *pi = get_ptr_info (p);
  pt->pt = *gimple_call_use_set (call_stmt);
  tree ref = build2 (MEM_REF, void_type_node /* ?? */, tem, build_zero_cst 
(ptr_type_node /* that effectively is ref-all */));
  ao_ref_init (&r, ref);
  r->base = ref;
  r->ref = NULL_TREE;
  r->offset = 0;
  r->alias_set = 0;
  r->base_alias_set = 0;

and if we come from IPA reference we first have to build a
points-to bitmap as well.

What would be a bit more convenient is probably adding
a pt_solution * member to ao_ref.  Maybe also avoiding
the MEM_REF build we already do in some cases and overload
the base field using a union and a designator ...

But yes, sth for next stage1.

Richard.


Re: [PATCH] Loop unswitching: support gswitch statements.

2021-11-24 Thread Richard Biener via Gcc-patches
On Wed, Nov 24, 2021 at 11:48 AM Martin Liška  wrote:
>
> On 11/24/21 09:00, Richard Biener wrote:
> > On Tue, Nov 23, 2021 at 5:36 PM Martin Liška  wrote:
> >>
> >> On 11/23/21 16:20, Martin Liška wrote:
> >>> Sure, so for e.g. case 1 ... 5 we would need to create a new 
> >>> unswitch_predicate
> >>> with 1 <= index && index <= 5 tree predicate (and the corresponding 
> >>> irange range).
> >>> Later once we unswitch on it, we should use a special unreachable_flag 
> >>> that will
> >>> be used for marking of dead edges (similarly how we fold gconds to 
> >>> boolean_{false/true}_node.
> >>> Does it make sense?
> >>
> >> I have thought about it more and it's not enough. What we really want is 
> >> having a irange
> >> for *each edge* (2 for gconds and multiple for gswitchs). Once we select a 
> >> unswitch_predicate,
> >> then we need to fold_range in true/false loop all these iranges. Doing 
> >> that we can handle situations like:
> >>
> >> if (index < 1)
> >>  do_something1
> >>
> >> if (index > 2)
> >>  do_something2
> >>
> >> switch (index)
> >>  case 1 ... 2:
> >>do_something;
> >> ...
> >>
> >> as seen the once we unswitch on 'index < 1' and 'index > 2', then the 
> >> first case will be taken in the false_edge
> >> of 'index > 2' loop unswitching.
> >
> > Hmm.  I'm not sure it needs to be this complicated.  We're basically
> > evaluating ranges/predicates based
> > on a fixed set of versioning predicates.  Your implementation created
> > "predicates" for the to be simplified
> > conditions but in the end we like to evaluate the actual stmt to
> > figure the taken/not taken edges.
>
> Yes.
>
> >  IIRC
> > elsewhere Andrew showed a snipped on how to evaluate a stmt with a
> > given range - not sure if that
>
> I'm using that. First I isolate a irange from a versioning-predicate with
> ranger->range_on_edge and I later combine it with:
> fold_range (r, stmt, parent_range).
>
>
> > was useful enough.  So what I think would be nice if we could somehow
> > use rangers path query
> > without an actual CFG.  So we virtuall have
> >
> >if (versioning-predicate1)
> >  if (versioning-predicate2)
> > ;
> > else
> >for (;;) // out current loop
> >  {
> >...
> >if (condition)
> >  ;
> >   ...
> >switch (var)
> >   {
> > ...
> >}
> >  }
> >
> > and versioning-predicate1 and versioning-predicate2 are not in the IL.
> > What we'd like
> > to do is seed the path query with a "virtual" path through the two
> > predicates to the
> > entry of the loop and compute_ranges based on those.
>
> What I can do that via building of a vector of tuple
> that would be passed to recursive calls of tree_unswitch_single_loop.
> That basically describes which true/false edges are taken for the so far 
> created
> versioning-predicates. Right? That should be usable.

Yeah.  Not sure how much incremental re-use we can have here.  I'd keep
things simple at this point and shoot for something that works on a single
recursion level only.

> > Then we like to
> > use range_of_stmt on 'if (condition)' and 'switch (var)' to determine
> > not taken edges.
>
> Works for me and we would mark unreachable case BBs with a unreachable_flag
> (we can't fold it away as shown in the original patch attempt).

As said we probably want to mark edges, unreachable edges from
upthread recursions
should get their flags copied even.

> > Looking somewhat at the sources it seems like we "simply" need to do what
> > compute_outgoing_relations does - unfortunately the code lacks comments
> > so I have no idea what jt_fur_source src (...).register_outgoing_edges does 
> > ...
> >
> > Anyway, for now manually simplifying things is fine but I probably would 
> > still
> > stick to a basic interface that marks not taken outgoing edges of a stmt 
> > based
> > on the set of versioning predicates.
>
> Lemme try working on another version of the patch.

Yup.  You did have a branch, right?  Maybe I'll poke at it a bit as well.

Richard.

> Martin
>
> >
> > Richard.
> >
> >>
> >> Martin
>


[PATCH] ipa: Teach IPA-CP transformation about IPA-SRA modifications (PR 103227)

2021-11-24 Thread Martin Jambor
Hi,

PR 103227 exposed an issue with ordering of transformations of IPA
passes.  IPA-CP can create clones for constants passed by reference
and at the same time IPA-SRA can also decide that the parameter does
not need to be a pointer (or an aggregate) and plan to convert it
into (a) simple scalar(s).  Because no intermediate clone is created
just for the purpose of ordering the transformations and because
IPA-SRA transformation is implemented as part of clone
materialization, the IPA-CP transformation happens only afterwards,
reversing the order of the transformations compared to the ordering of
analyses.

IPA-CP transformation looks at planned substitutions for values passed
by reference or in aggregates but finds that all the relevant
parameters no longer exist.  Currently it subsequently simply gives
up, leading to clones created for no good purpose (and huge regression
of 548.exchange_r.  This patch teaches it recognize the situation,
look up the new scalarized parameter and perform value substitution on
it.  On my desktop this has recovered the lost exchange2 run-time (and
some more).

I have disabled IPA-SRA in a Fortran testcase so that the dumping from
the transformation phase can still be matched in order to verify that
IPA-CP understands the IL after verifying that it does the right thing
also with IPA-SRA.

Bootstrapped, LTO-bootstrapped and tested on x86_64.
Profiled-LTO-bootstrap on the same platform and normal bootstrap and
testing is underway on i686-linux and aarch64-linux.  OK all of that
passes as well?

Thanks,

Martin


gcc/ChangeLog:

2021-11-23  Martin Jambor  

PR ipa/103227
* ipa-prop.h (ipa_get_param): New overload.  Move bits of the existing
one to the new one.
* ipa-param-manipulation.h (ipa_param_adjustments): New member
function get_updated_index_or_split.
* ipa-param-manipulation.c
(ipa_param_adjustments::get_updated_index_or_split): New function.
* ipa-prop.c (adjust_agg_replacement_values): Reimplement, add
capability to identify scalarized parameters and perform substitution
on them.
(ipcp_transform_function): Create descriptors earlier, handle new
return values of adjust_agg_replacement_values.

gcc/testsuite/ChangeLog:

2021-11-23  Martin Jambor  

PR ipa/103227
* gcc.dg/ipa/pr103227-1.c: New test.
* gcc.dg/ipa/pr103227-3.c: Likewise.
* gcc.dg/ipa/pr103227-2.c: Likewise.
* gfortran.dg/pr53787.f90: Disable IPA-SRA.
---
 gcc/ipa-param-manipulation.c  | 33 
 gcc/ipa-param-manipulation.h  |  7 +++
 gcc/ipa-prop.c| 73 +++
 gcc/ipa-prop.h| 15 --
 gcc/testsuite/gcc.dg/ipa/pr103227-1.c | 29 +++
 gcc/testsuite/gcc.dg/ipa/pr103227-2.c | 29 +++
 gcc/testsuite/gcc.dg/ipa/pr103227-3.c | 52 +++
 gcc/testsuite/gfortran.dg/pr53787.f90 |  2 +-
 8 files changed, 216 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr103227-1.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr103227-2.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr103227-3.c

diff --git a/gcc/ipa-param-manipulation.c b/gcc/ipa-param-manipulation.c
index cec1dba701f..479c20b3871 100644
--- a/gcc/ipa-param-manipulation.c
+++ b/gcc/ipa-param-manipulation.c
@@ -449,6 +449,39 @@ ipa_param_adjustments::get_updated_indices (vec 
*new_indices)
 }
 }
 
+/* If a parameter with original INDEX has survived intact, return its new
+   index.  Otherwise return -1.  In that case, if it has been split and there
+   is a new parameter representing a portion at unit OFFSET for which a value
+   of a TYPE can be substituted, store its new index into SPLIT_INDEX,
+   otherwise store -1 there.  */
+int
+ipa_param_adjustments::get_updated_index_or_split (int index,
+  unsigned unit_offset,
+  tree type, int *split_index)
+{
+  unsigned adj_len = vec_safe_length (m_adj_params);
+  for (unsigned i = 0; i < adj_len ; i++)
+{
+  ipa_adjusted_param *apm = &(*m_adj_params)[i];
+  if (apm->base_index != index)
+   continue;
+  if (apm->op == IPA_PARAM_OP_COPY)
+   return i;
+  if (apm->op == IPA_PARAM_OP_SPLIT
+ && apm->unit_offset == unit_offset)
+   {
+ if (useless_type_conversion_p (apm->type, type))
+   *split_index = i;
+ else
+   *split_index = -1;
+ return -1;
+   }
+}
+
+  *split_index = -1;
+  return -1;
+}
+
 /* Return the original index for the given new parameter index.  Return a
negative number if not available.  */
 
diff --git a/gcc/ipa-param-manipulation.h b/gcc/ipa-param-manipulation.h
index 5adf8a22356..d1dad9fac73 100644
--- a/gcc/ipa-param-manipulation.h
+++ b/gcc/ipa-param-manipulation.h
@@ -236,6 +236,13 @@ public:
   void get_surviving_params (vec *

Re: [PATCH] PR middle-end/103059: reload: Also accept ASHIFT with indexed addressing

2021-11-24 Thread Maciej W. Rozycki
On Tue, 23 Nov 2021, Jeff Law wrote:

> >   Let me know if it clears your concerns and whether there's anything else
> > you want me to retrieve from that GDB session.
> Thanks for the clarifications.  I never would have guessed that we could get
> into that code in the way you've described, but being reload nothing should be
> terribly surprising.
> 
> All my concerns have been addressed.  This is fine for the trunk. Thanks for
> your patience & explanations.

 Thank you for your review, I have committed this change to trunk now.

 Richard, OK for GCC 11 (correcting a VAX target regression caused by
commit 204213fdf23d)?

  Maciej


[committed] libstdc++: Replace hyphens in effective target keywords

2021-11-24 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.


An effective target like foo-bar-baz will match a target selector of
*-*-* and cause problems in the testsuite. Several libstdc++ et keywords
are of the form foo-bar, which could still be a problem for *-*
selectors.

Replace hyphens with underscores in the et keywords "debug-mode",
"cxx11-abi", etc.

libstdc++-v3/ChangeLog:

* testsuite/lib/libstdc++.exp: Rename effective target keywords
to avoid dashes in the name.
* testsuite/*: Update effective targe keywords.
---
 .../pstl/uninitialized_construct.cc   |  2 +-
 .../pstl/uninitialized_copy_move.cc   |  2 +-
 .../pstl/uninitialized_fill_destroy.cc|  2 +-
 libstdc++-v3/testsuite/20_util/to_chars/4.cc  |  2 +-
 .../testsuite/20_util/to_chars/double.cc  |  2 +-
 .../testsuite/20_util/to_chars/float.cc   |  2 +-
 .../testsuite/20_util/to_chars/long_double.cc |  2 +-
 .../basic_string/allocator/char/copy.cc   |  2 +-
 .../allocator/char/copy_assign.cc |  2 +-
 .../basic_string/allocator/char/lwg2788.cc|  2 +-
 .../basic_string/allocator/char/move.cc   |  2 +-
 .../allocator/char/move_assign.cc |  2 +-
 .../basic_string/allocator/char/noexcept.cc   |  2 +-
 .../allocator/char/operator_plus.cc   |  2 +-
 .../basic_string/allocator/char/swap.cc   |  2 +-
 .../basic_string/allocator/wchar_t/copy.cc|  2 +-
 .../allocator/wchar_t/copy_assign.cc  |  2 +-
 .../basic_string/allocator/wchar_t/lwg2788.cc |  2 +-
 .../basic_string/allocator/wchar_t/move.cc|  2 +-
 .../allocator/wchar_t/move_assign.cc  |  2 +-
 .../allocator/wchar_t/noexcept.cc |  2 +-
 .../allocator/wchar_t/operator_plus.cc|  2 +-
 .../basic_string/allocator/wchar_t/swap.cc|  2 +-
 .../capacity/char/resize_and_overwrite.cc |  2 +-
 .../basic_string/cons/char/constexpr.cc   |  2 +-
 .../basic_string/cons/wchar_t/constexpr.cc|  2 +-
 .../basic_string/literals/constexpr.cc|  2 +-
 .../basic_string/modifiers/constexpr.cc   |  2 +-
 .../modifiers/insert/char/83328.cc|  2 +-
 .../modifiers/insert/wchar_t/83328.cc |  2 +-
 .../modifiers/swap/char/constexpr.cc  |  2 +-
 .../modifiers/swap/wchar_t/constexpr.cc   |  2 +-
 .../basic_string/types/pmr_typedefs.cc|  2 +-
 .../testsuite/23_containers/list/61347.cc |  2 +-
 .../vector/bool/capacity/constexpr.cc |  2 +-
 .../23_containers/vector/bool/cmp_c++20.cc|  2 +-
 .../vector/bool/cons/constexpr.cc |  2 +-
 .../vector/bool/element_access/constexpr.cc   |  4 ++--
 .../vector/bool/modifiers/assign/constexpr.cc |  2 +-
 .../vector/bool/modifiers/constexpr.cc|  2 +-
 .../vector/bool/modifiers/swap/constexpr.cc   |  2 +-
 .../vector/capacity/constexpr.cc  |  2 +-
 .../23_containers/vector/cmp_c++20.cc |  2 +-
 .../23_containers/vector/cons/constexpr.cc|  2 +-
 .../vector/data_access/constexpr.cc   |  2 +-
 .../vector/element_access/constexpr.cc|  2 +-
 .../vector/modifiers/assign/constexpr.cc  |  2 +-
 .../vector/modifiers/constexpr.cc |  2 +-
 .../vector/modifiers/swap/constexpr.cc|  2 +-
 .../deque_iterators/1.cc  |  2 +-
 .../pstl/alg_merge/inplace_merge.cc   |  2 +-
 .../25_algorithms/pstl/alg_merge/merge.cc |  2 +-
 .../pstl/alg_modifying_operations/copy_if.cc  |  2 +-
 .../alg_modifying_operations/copy_move.cc |  2 +-
 .../pstl/alg_modifying_operations/fill.cc |  2 +-
 .../pstl/alg_modifying_operations/generate.cc |  2 +-
 .../is_partitioned.cc |  2 +-
 .../alg_modifying_operations/partition.cc |  2 +-
 .../partition_copy.cc |  2 +-
 .../pstl/alg_modifying_operations/remove.cc   |  2 +-
 .../alg_modifying_operations/remove_copy.cc   |  2 +-
 .../pstl/alg_modifying_operations/replace.cc  |  2 +-
 .../alg_modifying_operations/replace_copy.cc  |  2 +-
 .../pstl/alg_modifying_operations/rotate.cc   |  2 +-
 .../alg_modifying_operations/rotate_copy.cc   |  2 +-
 .../alg_modifying_operations/swap_ranges.cc   |  2 +-
 .../transform_binary.cc   |  2 +-
 .../transform_unary.cc|  2 +-
 .../pstl/alg_modifying_operations/unique.cc   |  2 +-
 .../unique_copy_equal.cc  |  2 +-
 .../pstl/alg_nonmodifying/adjacent_find.cc|  2 +-
 .../pstl/alg_nonmodifying/all_of.cc   |  2 +-
 .../pstl/alg_nonmodifying/any_of.cc   |  2 +-
 .../pstl/alg_nonmodifying/count.cc|  2 +-
 .../pstl/alg_nonmodifying/equal.cc|  2 +-
 .../pstl/alg_nonmodifying/find.cc |  2 +-
 .../pstl/alg_nonmodifying/find_end.cc |  2 +-
 .../pstl/alg_nonmodifying/find_first_of.cc|  2 +-
 .../pstl/alg_nonmodifying/find_if.cc  |  2 +-
 .../pstl/alg_nonmodifying/for_each.cc |  2 +-
 .../pstl/alg_nonmodifying/mismatch.cc |  2 +-

[committed] libstdc++: Add xfail to some printer tests for debug mode

2021-11-24 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.


The type printers are not substituting std::string for
std::basic_string in debug mode, mark some tests as xfail.

libstdc++-v3/ChangeLog:

* testsuite/libstdc++-prettyprinters/80276.cc: Add xfail for
debug mode.
* testsuite/libstdc++-prettyprinters/libfundts.cc: Likewise.
---
 libstdc++-v3/testsuite/libstdc++-prettyprinters/80276.cc | 2 +-
 libstdc++-v3/testsuite/libstdc++-prettyprinters/libfundts.cc | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/testsuite/libstdc++-prettyprinters/80276.cc 
b/libstdc++-v3/testsuite/libstdc++-prettyprinters/80276.cc
index 00255c9ebca..49420a3f811 100644
--- a/libstdc++-v3/testsuite/libstdc++-prettyprinters/80276.cc
+++ b/libstdc++-v3/testsuite/libstdc++-prettyprinters/80276.cc
@@ -46,7 +46,7 @@ main()
   // { dg-final { whatis-regexp-test p1 
"std::unique_ptr\\*>>>"
 } }
   // { dg-final { whatis-regexp-test p2 
"std::unique_ptr\\*>>\\\[\\\]>"
 } }
   // { dg-final { whatis-regexp-test p3 
"std::unique_ptr\\*>>\\\[10\\\]>"
 } }
-  // { dg-final { whatis-regexp-test p4 
"std::unique_ptr\\\[\\\]>>\\\[99\\\]>"
 { xfail c++20 } } }
+  // { dg-final { whatis-regexp-test p4 
"std::unique_ptr\\\[\\\]>>\\\[99\\\]>"
 { xfail { c++20 || debug_mode } } } }
 
   placeholder(&p1);// Mark SPOT
   placeholder(&p2);
diff --git a/libstdc++-v3/testsuite/libstdc++-prettyprinters/libfundts.cc 
b/libstdc++-v3/testsuite/libstdc++-prettyprinters/libfundts.cc
index af948e02365..85005c0617f 100644
--- a/libstdc++-v3/testsuite/libstdc++-prettyprinters/libfundts.cc
+++ b/libstdc++-v3/testsuite/libstdc++-prettyprinters/libfundts.cc
@@ -50,7 +50,7 @@ main()
   om = std::map{ {1, 2.}, {3, 4.}, {5, 6.} };
 // { dg-final { regexp-test om 
{std::experimental::optional> containing 
std::(__debug::)?map with 3 elements = {\[1\] = 2, \[3\] = 4, \[5\] = 6}} } }
   optional os{ "stringy" };
-// { dg-final { note-test os {std::experimental::optional = 
{[contained value] = "stringy"}} { xfail c++20 } } }
+// { dg-final { note-test os {std::experimental::optional = 
{[contained value] = "stringy"}} { xfail { c++20 || debug_mode } } } }
 
   any a;
 // { dg-final { note-test a {std::experimental::any [no contained value]} } }
@@ -61,7 +61,7 @@ main()
   any ap = (void*)nullptr;
 // { dg-final { note-test ap {std::experimental::any containing void * = 
{[contained value] = 0x0}} } }
   any as = *os;
-// { dg-final { note-test as {std::experimental::any containing std::string = 
{[contained value] = "stringy"}} { xfail c++20 } } }
+// { dg-final { note-test as {std::experimental::any containing std::string = 
{[contained value] = "stringy"}} { xfail { c++20 || debug_mode } } } }
   any as2("stringiest");
 // { dg-final { regexp-test as2 {std::experimental::any containing const char 
\* = {\[contained value\] = 0x[[:xdigit:]]+ "stringiest"}} } }
   any am = *om;
-- 
2.31.1



[PATCH] libstdc++: Remove broken std::allocator base classes [PR103340]

2021-11-24 Thread Jonathan Wakely via Gcc-patches
I plan to commit this Real Soon. Please yell if you need these
alternative std::allocator back-ends to stay (and explain how you're
using them when they've  been broken for years, and start sending test
results to the gcc-testresults mailing list, and ideally offer to
maintain them).

Tested powerpc64le-linux.

=== 8< === 8< === 8< ===

The bitmap_allocator, __mt_alloc and __pool_alloc extensions are no
longer suitable for use as the base class of std::allocator, because
they have not been updated to meet the C++20 requirements.  There is a
patch attached to PR 103340 which addresses that, but more work would be
needed to solve the linking errors that occur when the library is
configured to use them.

Using --enable-libstdcxx-allocator=bitmap wouldn't even bootstrap for
the past few years, and I can't find any gcc-testresults reports using
any of these allocators. This patch removes the configure option to use
these are the std::allocator base class. The allocators are still in the
tree and can be used directly, you just can't configure the library to
use one of them as the base class of std::allocator.

libstdc++-v3/ChangeLog:

PR libstdc++/103340
PR libstdc++/103400
PR libstdc++/103381
* acinclude.m4 (GLIBCXX_ENABLE_ALLOCATOR): Remove mt, bitmap
and pool options.
* configure: Regenerate.
* doc/xml/manual/allocator.xml: Update.
* doc/xml/manual/configure.xml: Update.
* doc/xml/manual/evolution.xml: Document removal.
* doc/xml/manual/mt_allocator.xml: Editorial tweaks.
* doc/html/manual/*: Regenerate.
---
 libstdc++-v3/acinclude.m4 | 14 +---
 libstdc++-v3/configure| 14 +---
 libstdc++-v3/doc/html/manual/api.html |  3 +
 libstdc++-v3/doc/html/manual/configure.html   | 10 ++-
 libstdc++-v3/doc/html/manual/memory.html  | 57 +---
 .../doc/html/manual/mt_allocator.html |  8 +--
 libstdc++-v3/doc/xml/manual/allocator.xml | 67 ++-
 libstdc++-v3/doc/xml/manual/configure.xml | 10 ++-
 libstdc++-v3/doc/xml/manual/evolution.xml |  5 ++
 libstdc++-v3/doc/xml/manual/mt_allocator.xml  |  8 +--
 10 files changed, 64 insertions(+), 132 deletions(-)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 71321055de7..6d9a8875e31 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -2599,7 +2599,7 @@ AC_DEFUN([GLIBCXX_ENABLE_ALLOCATOR], [
   AC_MSG_CHECKING([for std::allocator base class])
   GLIBCXX_ENABLE(libstdcxx-allocator,auto,[[[=KIND]]],
 [use KIND for target std::allocator base],
-[permit new|malloc|mt|bitmap|pool|yes|no|auto])
+[permit new|malloc|yes|no|auto])
 
   # If they didn't use this option switch, or if they specified --enable
   # with no specific model, we'll have to look for one.  If they
@@ -2631,26 +2631,14 @@ AC_DEFUN([GLIBCXX_ENABLE_ALLOCATOR], [
 
   # Set configure bits for specified locale package
   case ${enable_libstdcxx_allocator_flag} in
-bitmap)
-  ALLOCATOR_H=config/allocator/bitmap_allocator_base.h
-  ALLOCATOR_NAME=__gnu_cxx::bitmap_allocator
-  ;;
 malloc)
   ALLOCATOR_H=config/allocator/malloc_allocator_base.h
   ALLOCATOR_NAME=__gnu_cxx::malloc_allocator
   ;;
-mt)
-  ALLOCATOR_H=config/allocator/mt_allocator_base.h
-  ALLOCATOR_NAME=__gnu_cxx::__mt_alloc
-  ;;
 new)
   ALLOCATOR_H=config/allocator/new_allocator_base.h
   ALLOCATOR_NAME=__gnu_cxx::new_allocator
   ;;
-pool)
-  ALLOCATOR_H=config/allocator/pool_allocator_base.h
-  ALLOCATOR_NAME=__gnu_cxx::__pool_alloc
-  ;;
   esac
 
   GLIBCXX_CONDITIONAL(ENABLE_ALLOCATOR_NEW,
diff --git a/libstdc++-v3/doc/xml/manual/allocator.xml 
b/libstdc++-v3/doc/xml/manual/allocator.xml
index 1f429410eb0..aaab4e29aa7 100644
--- a/libstdc++-v3/doc/xml/manual/allocator.xml
+++ b/libstdc++-v3/doc/xml/manual/allocator.xml
@@ -154,8 +154,9 @@

 

- The base class that allocator is derived from
- may not be user-configurable.
+ The choice of base class that allocator
+ is derived from is fixed at the time when GCC is built,
+ and the different choices are not ABI compatible.
 
 
   
@@ -314,6 +315,13 @@
new_allocator.

 
+   
+ Since C++11 the minimal interface require for an allocator is
+ much smaller, as std::allocator_traits
+ can provide default for much of the interface.
+   
+
+
 
 
 Extension 
Allocators
@@ -359,9 +367,10 @@
debug_allocator


-A wrapper around an arbitrary allocator A.  It passes on
-slightly increased size requests to A, and uses the extra
-memory to store size information.  When a pointer is passed
+A wrapper around an arbitrary allocator A.
+It passes on slightly increased size requests to 
A,
+and uses the extra memory to store size information.
+When a pointer is passed
 t

Re: [PATCH] PR middle-end/103059: reload: Also accept ASHIFT with indexed addressing

2021-11-24 Thread Richard Biener via Gcc-patches
On Wed, 24 Nov 2021, Maciej W. Rozycki wrote:

> On Tue, 23 Nov 2021, Jeff Law wrote:
> 
> > >   Let me know if it clears your concerns and whether there's anything else
> > > you want me to retrieve from that GDB session.
> > Thanks for the clarifications.  I never would have guessed that we could get
> > into that code in the way you've described, but being reload nothing should 
> > be
> > terribly surprising.
> > 
> > All my concerns have been addressed.  This is fine for the trunk. Thanks for
> > your patience & explanations.
> 
>  Thank you for your review, I have committed this change to trunk now.
> 
>  Richard, OK for GCC 11 (correcting a VAX target regression caused by
> commit 204213fdf23d)?

OK.

Richard.


[PATCH] c++, v2: Implement C++23 P2128R6 - Multidimensional subscript operator [PR102611]

2021-11-24 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 23, 2021 at 10:28:48PM -0500, Jason Merrill wrote:

Thanks.

> > + while (true)
> > +   {
> > + cp_expr expr (NULL_TREE);
> > + /* Parse the next assignment-expression.  */
> > + if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
> > +   {
> > + /* A braced-init-list.  */
> > + bool expr_nonconst_p;
> > + cp_lexer_set_source_position (parser->lexer);
> > + expr = cp_parser_braced_list (parser, &expr_nonconst_p);
> > +   }
> > + else
> > +   expr = cp_parser_assignment_expression (parser);
> > +
> > + /* If we have an ellipsis, then this is an expression
> > +expansion.  */
> > + if (cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS))
> > +   {
> > + /* Consume the `...'.  */
> > + cp_lexer_consume_token (parser->lexer);
> > + /* Build the argument pack.  */
> > + expr = make_pack_expansion (expr);
> > +   }
> > +
> > + if (expr == error_mark_node)
> > +   index = error_mark_node;
> > + else if (expression_list.get () == NULL
> > +  && !PACK_EXPANSION_P (expr.get_value ()))
> > +   index = expr.get_value ();
> > + else
> > +   vec_safe_push (expression_list, expr.get_value ());
> > +
> > + /* If the next token isn't a `,', then we are done.  */
> > + if (cp_lexer_next_token_is_not (parser->lexer, CPP_COMMA))
> > +   break;
> > +
> > + if (expression_list.get () == NULL && index != error_mark_node)
> > +   {
> > + *&expression_list = make_tree_vector_single (index);
> > + index = NULL_TREE;
> > +   }
> > +
> > + /* Otherwise, consume the `,' and keep going.  */
> > + cp_lexer_consume_token (parser->lexer);
> > +   }
> 
> Let's share this loop with cp_parser_parenthesized_expression_list.

I'd prefer not to share the loop as whole because what exactly is done with
the parsed expressions differs a lot, for the array refs I'd prefer not to
push anything into a vector for the most common case with a single element.
I've outlined into a function what I think I can easily share
(see cp_parser_parenthesized_expression_list_elt in the updated patch).

> > + if (expression_list.get () && index == error_mark_node)
> > +   {
> > + release_tree_vector (*&expression_list);
> > + *&expression_list = NULL;
> 
> This should probably become a release() method in releasing_vec.

Done.

> > + FOR_EACH_VEC_SAFE_ELT (*index_exp_list, i, e)
> 
> This is build_x_compound_expr_from_vec.

Done 2x.

> > +/* Wrapper for above.  */
> 
> I just applied my auto_cond_timevar patch, so you can use that instead of
> the wrapper.

Done.

> > + for (i = 0; i < nargs; ++i)
> > +   {
> > + tree arg = CALL_EXPR_ARG (c, i);
> > +
> > + if (!PACK_EXPANSION_P (arg))
> > +   vec_safe_push (index_exp_list, RECUR (arg));
> > + else
> > +   {
> > + /* Expand the pack expansion and push each entry onto
> > +INDEX_EXP_LIST.  */
> > + arg = tsubst_pack_expansion (arg, args, complain, in_decl);
> > + if (TREE_CODE (arg) == TREE_VEC)
> > +   {
> > + unsigned int len, j;
> > +
> > + len = TREE_VEC_LENGTH (arg);
> > + for (j = 0; j < len; ++j)
> > +   {
> > + tree value = TREE_VEC_ELT (arg, j);
> > + if (value != NULL_TREE)
> > +   value = convert_from_reference (value);
> > + vec_safe_push (index_exp_list, value);
> > +   }
> > +   }
> > + else
> > +   {
> > + /* A partial substitution.  Add one entry.  */
> > + vec_safe_push (index_exp_list, arg);
> > +   }
> > +   }
> > +   }
> 
> Let's share this code with CALL_EXPR instead of duplicating it.

Done as tsubst_copy_and_build_call_args.

Tested on the new testcases so far, ok for trunk if it passes full
bootstrap/regtest?

2021-11-24  Jakub Jelinek  

PR c++/102611
gcc/
* doc/invoke.texi (-Wcomma-subscript): Document that for
-std=c++20 the option isn't enabled by default with -Wno-deprecated
but for -std=c++23 it is.
gcc/c-family/
* c-opts.c (c_common_post_options): Enable -Wcomma-subscript by
default for C++23 regardless of warn_deprecated.
* c-cppbuiltin.c (c_cpp_builtins): Predefine
__cpp_multidimensional_subscript=202110L for C++23.
gcc/cp/
* cp-tree.h (build_op_subscript): Implement P2128R6
- Multidimensional subscript operator.  Declare.
(class releasing_vec): Add release method.
(grok_array_decl): Remove bool argument, add vec **
and 

Re: [PATCH] PR middle-end/103059: reload: Also accept ASHIFT with indexed addressing

2021-11-24 Thread Maciej W. Rozycki
On Wed, 24 Nov 2021, Richard Biener wrote:

> > > All my concerns have been addressed.  This is fine for the trunk. Thanks 
> > > for
> > > your patience & explanations.
> > 
> >  Thank you for your review, I have committed this change to trunk now.
> > 
> >  Richard, OK for GCC 11 (correcting a VAX target regression caused by
> > commit 204213fdf23d)?
> 
> OK.

 Backported now, thank you!  Closing issue.

  Maciej


[COMMITTED] Range-on-edge trace tweak.

2021-11-24 Thread Andrew MacLeod via Gcc-patches
When working with the trace output, I noticed the columns were getting 
out of sync.


When range_on_edge is called with a constant value, it use to not output 
a trace for that. When I added the unexecutable edge code, It possible 
to request a constant on an unexecutable edge  that now returns 
UNDEFINED.  The trace output prints the header, but returns for a 
constant before outputting the trailer and realigning the columns.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed as obvious.

Andrew
From a031bb7a585f789df2aed856a57646b8c45d0878 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Fri, 19 Nov 2021 13:27:18 -0500
Subject: [PATCH 1/4] Range-on-edge trace tweak.

Trace formatting gets out of sync when range on edge is called with a constant.

	* gimple-range.cc (gimple_ranger::range_on_edge): Call trailer when
	a constant is encountered to terminate the trace.
---
 gcc/gimple-range.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc
index a2b68b2bc80..9ca568ce55d 100644
--- a/gcc/gimple-range.cc
+++ b/gcc/gimple-range.cc
@@ -216,7 +216,7 @@ gimple_ranger::range_on_edge (irange &r, edge e, tree name)
 
   bool res = true;
   if (!gimple_range_ssa_p (name))
-return get_tree_range (r, name, NULL);
+res = get_tree_range (r, name, NULL);
   else
 {
   range_on_exit (r, e->src, name);
@@ -230,7 +230,7 @@ gimple_ranger::range_on_edge (irange &r, edge e, tree name)
 
   if (idx)
 tracer.trailer (idx, "range_on_edge", res, name, r);
-  return true;
+  return res;
 }
 
 // fold_range wrapper for range_of_stmt to use as an internal client.
-- 
2.17.2



Re: [PATCH 2/2] PR tree-optimization/103231 - Directly resolve range_of_stmt dependencies.

2021-11-24 Thread Andrew MacLeod via Gcc-patches

On 11/24/21 04:17, Richard Biener wrote:

On Tue, Nov 23, 2021 at 6:04 PM Andrew MacLeod via Gcc-patches
 wrote:

This is the second patch in the series.

Ranger uses its own API to recursively satisfy dependencies. When
range_of_stmt is called on _1482 = _1154 + _1177;  it picks up the
ranges of _1154 and _1177 from it's cache. If those statements have not
been seen yet, it recursively calls range_of_stmt on each one to resolve
the answer.  Each main API call can trigger up to 5 other calls to get
to the next API point:

 gimple_ranger::fold_range_internal (...)
 gimple_ranger::range_of_stmt (_1154,...)
 gimple_ranger::range_of_expr (_1154,)
 fold_using_range::range_of_range_op (..)
 fold_using_range::fold_stmt (...)
 gimple_ranger::fold_range_internal (...)
 gimple_ranger::range_of_stmt (_1482,...)

For a normal forward walk, values tend to already be in the cache, but
when we try to answer a range_on_edge question on a back edge, it can
trigger a very long series of queries.  I spent some time analyzing
these patterns, and found that regardless of which API entry point was
used, ultimately range_of_stmt is invoked in a predictable order to
initiate the cache values.

This patch implements a dependency resolver which when range_of_stmt
uses when it is called on something which does not have a cache entry
yet (thus the disambiguation of the temporal failure vs lack of cache
entry in the previous patch)

This looks at each operand, and if that operand does not have a cache
entry, pushes it on a stack.   Names are popped from the stack and
fold_using_range() is invoked once all the operands have been
resolved.   When we do get to call fold_using_range::fold_stmt(), we are
sure the operands are cached and the value will simply be calculated.
This is ultimately the exact series of events that would have happened
had the main API been used... except we don't involve the call stack
anymore for each one.

Well, mostly :-).  For this fix, we only do this with operands of stmts
which have a range-ops handler.. meaning we do not use the API for
anything range-ops understands.  We will still use the main API for
resolving PHIS and other statements as they are encountered.We could
do this for PHIS as well, but for the most part it was the chains of
stmts within a block that were causing the vast majority of the issue.
If we later discover large chains of PHIs are causing issues as well,
then I can easily add them to this as well.  I avoided them this time
because there is extra overhead involved in traversing all the PHI
arguments extra times.  Sticking with range-ops limits us to 2 operands
to check, and the overhead is very minimal.

I have tested this with PHIs as well and we could just include them
upfront. The overhead is more than doubled, but the increased compile
time of a VRP pass is still under 1%.

Bootstrapped on x86_64-pc-linux-gnu with no regressions.  OK?

OK.

Richard.


Andrew




committed.



Re: [PATCH 1/2] Split return functionality of get_non_stale_global_range.

2021-11-24 Thread Andrew MacLeod via Gcc-patches

On 11/24/21 04:16, Richard Biener wrote:

On Tue, Nov 23, 2021 at 6:03 PM Andrew MacLeod via Gcc-patches
 wrote:

This is the first of 2 patches which will reduce the depth of the call
chain in ranger.

This patch simply splits the functionality of the routine
get_non_stale_global_range() from a single boolean return to a boolean
return and a bool reference.

This routine queries the global cache for a value.  If  there is no
value, it queries the legacy global range and sets it to that value.  If
there was a value, it checks the temporal cache to see if its current,
and if it is, returns TRUe plus the range.

If the value is not currrent, or it was set to the legacy global value,
then the timestamp is marked as "always current" as it indicates a
calculation is ongoing, and we dont want to trigger any additional
temporal faults until the calculation is done. And finallt FALSE is
returned for all these cases.

The second patch in the series wants to disambiguate at the call site
whether this was a failure due to not being in the global cache, or
whether it was due to the timestamp being out of date and take different
actions for each case.   Details in the following note.

This has been Bootstrapped on x86_64-pc-linux-gnu with no regressions.  OK?

OK.

Richard.


committed.




Re: [PATCH] Loop unswitching: support gswitch statements.

2021-11-24 Thread Martin Liška

On 11/24/21 13:48, Richard Biener wrote:

Yup.  You did have a branch, right?  Maybe I'll poke at it a bit as well.


Well, I rebase quite a bit as it's under heavy development now. Do you want me
creating a devel/* branch?

Anyway, I've got a proof-of-concept patch that does:

- unswitch_predicates are first discovered before top-level 
tree_unswitch_single_loop
  is called and they live in a shared cache based on gimple::uid.
- finding candidates in a loop is easy -> uses unswitch_predicates from the 
previous step
- note I allow multiple unswitch_predicates for a BB, it's because gswitch that 
can emit > 2
  for a switch
- evaluate_loop_insns_for_predicate does not fold any statements
- folding happens right before a recursive tree_unswitch_single_loop happens
- costing is not resolved yet, but should be easy
- combine_range can intersect all iranges for a given index variable 
(gimple_cond_lhs for now).

It likely miscompiles gcc.dg/loop-unswitch-5.c, working on that..

Thoughts?
Cheers,
Martindiff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
index f8a15f3d1d1..278fb1112b3 100644
--- a/gcc/dbgcnt.def
+++ b/gcc/dbgcnt.def
@@ -187,6 +187,7 @@ DEBUG_COUNTER (ira_move)
 DEBUG_COUNTER (ivopts_loop)
 DEBUG_COUNTER (lim)
 DEBUG_COUNTER (local_alloc_for_sched)
+DEBUG_COUNTER (loop_unswitch)
 DEBUG_COUNTER (match)
 DEBUG_COUNTER (merged_ipa_icf)
 DEBUG_COUNTER (phiopt_edge_range)
diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-8.c b/gcc/testsuite/gcc.dg/loop-unswitch-8.c
new file mode 100644
index 000..ae5f8f300e9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-unswitch-8.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details" } */
+
+int
+foo(double *a, double *b, double *c, double *d, double *r, int size, int order)
+{
+  for (int i = 0; i < size; i++)
+  {
+double tmp;
+
+if (order < 3)
+  tmp = -8 * a[i];
+else
+  tmp = -4 * b[i];
+
+double x = 3 * tmp + d[i] + tmp;
+
+if (5 > order)
+  x += 2;
+
+if (order == 12345)
+  x *= 5;
+
+double y = 3.4f * tmp + d[i];
+r[i] = x + y;
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times ";; Unswitching loop on condition: order" 3 "unswitch" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-9.c b/gcc/testsuite/gcc.dg/loop-unswitch-9.c
new file mode 100644
index 000..9dd6023d49d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-unswitch-9.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details" } */
+
+int
+foo(double *a, double *b, double *c, double *d, double *r, int size, int order)
+{
+  for (int i = 0; i < size; i++)
+  {
+double tmp;
+
+if (order == 1)
+  tmp = -8 * a[i];
+else
+  {
+	if (order == 2)
+	  tmp = -4 * b[i];
+	else
+	  tmp = a[i];
+  }
+
+r[i] = 3.4f * tmp + d[i];
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times ";; Unswitching loop on condition: order" 2 "unswitch" } } */
diff --git a/gcc/tree-ssa-loop-unswitch.c b/gcc/tree-ssa-loop-unswitch.c
index fe4dacc0833..9db758b5199 100644
--- a/gcc/tree-ssa-loop-unswitch.c
+++ b/gcc/tree-ssa-loop-unswitch.c
@@ -37,6 +37,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-iterator.h"
 #include "cfghooks.h"
 #include "tree-ssa-loop-manip.h"
+#include "tree-pretty-print.h"
+#include "gimple-range.h"
+#include "dbgcnt.h"
+
+#include 
 
 /* This file implements the loop unswitching, i.e. transformation of loops like
 
@@ -74,9 +79,34 @@ along with GCC; see the file COPYING3.  If not see
tree-ssa-loop-im.c ensures that all the suitable conditions are in this
shape.  */
 
+/* A tuple that holds GIMPLE condition and value range for an unswitching
+   predicate.  */
+
+struct unswitch_predicate
+{
+  /* Default constructor.  */
+  unswitch_predicate (tree cond, tree lhs_)
+  : condition (cond), lhs (lhs_), true_range (), false_range ()
+  {}
+
+  tree condition;
+  tree lhs;
+  int_range_max true_range;
+  int_range_max false_range;
+};
+
+static vec> *bb_predicates = NULL;
+
+static gimple_ranger *ranger = NULL;
+
+typedef auto_vec> predicate_vector;
+
 static class loop *tree_unswitch_loop (class loop *, basic_block, tree);
-static bool tree_unswitch_single_loop (class loop *, int);
-static tree tree_may_unswitch_on (basic_block, class loop *);
+static bool tree_unswitch_single_loop (class loop *, int,
+   predicate_vector &predicate_path);
+static void
+find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
+vec &candidates);
 static bool tree_unswitch_outer_loop (class loop *);
 static edge find_loop_guard (class loop *);
 static bool empty_bb_without_guard_p (class loop *, basic_block);
@@ -85,6 +115,55 @@ static void hoist_guard (class loop *, edge);
 static bool check_exit_phi (class loop *);
 static tree get_vop_from_header (class loop *);
 
+static vec &
+get_predicates_for_bb (basic_block bb)
+{
+  gimple *last = last_stmt (b

Reduce scope of a few 'class loop *loop' variables (was: [PATCH v4] Use range-based for loops for traversing loops)

2021-11-24 Thread Thomas Schwinge
Hi!

On 2021-07-30T15:58:36+0800, "Kewen.Lin"  wrote:
> on 2021/7/30 下午3:18, Thomas Schwinge wrote:
>> Curious why in some instances we're not removing the 'class loop *loop'
>> declaration, I had a look, and this may suggest some further clean-up?
>
> [...] I like your nice proposed further clean-up,
> thanks for doing that!

Ping for my patch to "Reduce scope of a few 'class loop *loop' variables",
see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 6051ff3a4ba0b8f44ecb262e4553f8a471c66237 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Fri, 30 Jul 2021 09:23:20 +0200
Subject: [PATCH] Reduce scope of a few 'class loop *loop' variables

Further clean-up after commit e41ba804ba5f5ca433e09238d561b1b4c8b10985
"Use range-based for loops for traversing loops".  No functional change.

	gcc/
	* cfgloop.c (verify_loop_structure): Reduce scope of
	'class loop *loop' variable.
	* ipa-fnsummary.c (analyze_function_body): Likewise.
	* loop-init.c (fix_loop_structure): Likewise.
	* loop-invariant.c (calculate_loop_reg_pressure): Likewise.
	* predict.c (predict_loops): Likewise.
	* tree-loop-distribution.c (loop_distribution::execute): Likewise.
	* tree-vectorizer.c (pass_vectorize::execute): Likewise.
---
 gcc/cfgloop.c| 3 +--
 gcc/ipa-fnsummary.c  | 3 +--
 gcc/loop-init.c  | 2 +-
 gcc/loop-invariant.c | 4 ++--
 gcc/predict.c| 3 +--
 gcc/tree-loop-distribution.c | 2 +-
 gcc/tree-vectorizer.c| 5 ++---
 7 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index 20c24c13c36..3190d12b2ce 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -1398,7 +1398,6 @@ verify_loop_structure (void)
 {
   unsigned *sizes, i, j;
   basic_block bb, *bbs;
-  class loop *loop;
   int err = 0;
   edge e;
   unsigned num = number_of_loops (cfun);
@@ -1689,7 +1688,7 @@ verify_loop_structure (void)
 	  for (; exit; exit = exit->next_e)
 		eloops++;
 
-	  for (loop = bb->loop_father;
+	  for (class loop *loop = bb->loop_father;
 		   loop != e->dest->loop_father
 		   /* When a loop exit is also an entry edge which
 		  can happen when avoiding CFG manipulations
diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
index 7e9201a554a..cb13d2e4b3c 100644
--- a/gcc/ipa-fnsummary.c
+++ b/gcc/ipa-fnsummary.c
@@ -2934,7 +2934,6 @@ analyze_function_body (struct cgraph_node *node, bool early)
   if (nonconstant_names.exists () && !early)
 {
   ipa_fn_summary *s = ipa_fn_summaries->get (node);
-  class loop *loop;
   unsigned max_loop_predicates = opt_for_fn (node->decl,
 		 param_ipa_max_loop_predicates);
 
@@ -2978,7 +2977,7 @@ analyze_function_body (struct cgraph_node *node, bool early)
   /* To avoid quadratic behavior we analyze stride predicates only
  with respect to the containing loop.  Thus we simply iterate
 	 over all defs in the outermost loop body.  */
-  for (loop = loops_for_fn (cfun)->tree_root->inner;
+  for (class loop *loop = loops_for_fn (cfun)->tree_root->inner;
 	   loop != NULL; loop = loop->next)
 	{
 	  ipa_predicate loop_stride = true;
diff --git a/gcc/loop-init.c b/gcc/loop-init.c
index 04054ef6222..f0931a99661 100644
--- a/gcc/loop-init.c
+++ b/gcc/loop-init.c
@@ -201,7 +201,6 @@ fix_loop_structure (bitmap changed_bbs)
 {
   basic_block bb;
   int record_exits = 0;
-  class loop *loop;
   unsigned old_nloops, i;
 
   timevar_push (TV_LOOP_INIT);
@@ -279,6 +278,7 @@ fix_loop_structure (bitmap changed_bbs)
 
   /* Finally free deleted loops.  */
   bool any_deleted = false;
+  class loop *loop;
   FOR_EACH_VEC_ELT (*get_loops (cfun), i, loop)
 if (loop && loop->header == NULL)
   {
diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
index fca0c2b24be..5eee2e5c9f8 100644
--- a/gcc/loop-invariant.c
+++ b/gcc/loop-invariant.c
@@ -2134,7 +2134,7 @@ calculate_loop_reg_pressure (void)
   basic_block bb;
   rtx_insn *insn;
   rtx link;
-  class loop *loop, *parent;
+  class loop *parent;
 
   for (auto loop : loops_list (cfun, 0))
 if (loop->aux == NULL)
@@ -2151,7 +2151,7 @@ calculate_loop_reg_pressure (void)
   if (curr_loop == current_loops->tree_root)
 	continue;
 
-  for (loop = curr_loop;
+  for (class loop *loop = curr_loop;
 	   loop != current_loops->tree_root;
 	   loop = loop_outer (loop))
 	bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb));
diff --git a/gcc/predict.c b/gcc/predict.c
index 68b11135680..3cb4e3c0eb5 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -1927,7 +1927,6 @@ predict_extra_loop_exits (edge exit_edge)
 static void
 predict_loops (void)
 {
-  class loop *loop;
   basic_block bb;
   hash_set  with_recursion(10);
 
@@ -1941,7 +1940,7 @@ predict_loops (v

Re: [PATCH] Loop unswitching: support gswitch statements.

2021-11-24 Thread Martin Liška

On 11/24/21 15:14, Martin Liška wrote:

It likely miscompiles gcc.dg/loop-unswitch-5.c, working on that..


Fixed that in the updated version.

Martindiff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
index f8a15f3d1d1..278fb1112b3 100644
--- a/gcc/dbgcnt.def
+++ b/gcc/dbgcnt.def
@@ -187,6 +187,7 @@ DEBUG_COUNTER (ira_move)
 DEBUG_COUNTER (ivopts_loop)
 DEBUG_COUNTER (lim)
 DEBUG_COUNTER (local_alloc_for_sched)
+DEBUG_COUNTER (loop_unswitch)
 DEBUG_COUNTER (match)
 DEBUG_COUNTER (merged_ipa_icf)
 DEBUG_COUNTER (phiopt_edge_range)
diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-8.c b/gcc/testsuite/gcc.dg/loop-unswitch-8.c
new file mode 100644
index 000..ae5f8f300e9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-unswitch-8.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details" } */
+
+int
+foo(double *a, double *b, double *c, double *d, double *r, int size, int order)
+{
+  for (int i = 0; i < size; i++)
+  {
+double tmp;
+
+if (order < 3)
+  tmp = -8 * a[i];
+else
+  tmp = -4 * b[i];
+
+double x = 3 * tmp + d[i] + tmp;
+
+if (5 > order)
+  x += 2;
+
+if (order == 12345)
+  x *= 5;
+
+double y = 3.4f * tmp + d[i];
+r[i] = x + y;
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times ";; Unswitching loop on condition: order" 3 "unswitch" } } */
diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-9.c b/gcc/testsuite/gcc.dg/loop-unswitch-9.c
new file mode 100644
index 000..9dd6023d49d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-unswitch-9.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details" } */
+
+int
+foo(double *a, double *b, double *c, double *d, double *r, int size, int order)
+{
+  for (int i = 0; i < size; i++)
+  {
+double tmp;
+
+if (order == 1)
+  tmp = -8 * a[i];
+else
+  {
+	if (order == 2)
+	  tmp = -4 * b[i];
+	else
+	  tmp = a[i];
+  }
+
+r[i] = 3.4f * tmp + d[i];
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times ";; Unswitching loop on condition: order" 2 "unswitch" } } */
diff --git a/gcc/tree-ssa-loop-unswitch.c b/gcc/tree-ssa-loop-unswitch.c
index fe4dacc0833..1d5bcfc3237 100644
--- a/gcc/tree-ssa-loop-unswitch.c
+++ b/gcc/tree-ssa-loop-unswitch.c
@@ -37,6 +37,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-iterator.h"
 #include "cfghooks.h"
 #include "tree-ssa-loop-manip.h"
+#include "tree-pretty-print.h"
+#include "gimple-range.h"
+#include "dbgcnt.h"
+
+#include 
 
 /* This file implements the loop unswitching, i.e. transformation of loops like
 
@@ -74,9 +79,34 @@ along with GCC; see the file COPYING3.  If not see
tree-ssa-loop-im.c ensures that all the suitable conditions are in this
shape.  */
 
+/* A tuple that holds GIMPLE condition and value range for an unswitching
+   predicate.  */
+
+struct unswitch_predicate
+{
+  /* Default constructor.  */
+  unswitch_predicate (tree cond, tree lhs_)
+  : condition (cond), lhs (lhs_), true_range (), false_range ()
+  {}
+
+  tree condition;
+  tree lhs;
+  int_range_max true_range;
+  int_range_max false_range;
+};
+
+static vec> *bb_predicates = NULL;
+
+static gimple_ranger *ranger = NULL;
+
+typedef auto_vec> predicate_vector;
+
 static class loop *tree_unswitch_loop (class loop *, basic_block, tree);
-static bool tree_unswitch_single_loop (class loop *, int);
-static tree tree_may_unswitch_on (basic_block, class loop *);
+static bool tree_unswitch_single_loop (class loop *, int,
+   predicate_vector &predicate_path);
+static void
+find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
+vec &candidates);
 static bool tree_unswitch_outer_loop (class loop *);
 static edge find_loop_guard (class loop *);
 static bool empty_bb_without_guard_p (class loop *, basic_block);
@@ -85,6 +115,55 @@ static void hoist_guard (class loop *, edge);
 static bool check_exit_phi (class loop *);
 static tree get_vop_from_header (class loop *);
 
+static vec &
+get_predicates_for_bb (basic_block bb)
+{
+  gimple *last = last_stmt (bb);
+  return (*bb_predicates)[last == NULL ? 0 : gimple_uid (last)];
+}
+
+static void
+set_predicates_for_bb (basic_block bb, vec predicates)
+{
+  gimple_set_uid (last_stmt (bb), bb_predicates->length ());
+  bb_predicates->safe_push (predicates);
+}
+
+static void
+init_loop_unswitch_info (class loop *loop)
+{
+  /* Calculate instruction count.  */
+  basic_block *bbs = get_loop_body (loop);
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+{
+  unsigned insns = 0;
+  for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
+	   gsi_next (&gsi))
+	insns += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
+
+  bbs[i]->aux = (void *)(size_t)insns;
+}
+
+  /* Find all unswitching candidates.  */
+  for (unsigned i = 0; i != loop->num_nodes; i++)
+{
+  /* Find a bb to unswitch on.  */
+  vec cand

Re: [PATCH] tree-optimization/103168 - Improve VN of pure function calls

2021-11-24 Thread Jan Hubicka via Gcc-patches
> 
> Yes, note that we don't have callused unless IPA PTA is enabled,
> but it might be salveagable from IPA reference info?  What we're
> missing is a stmt_clobbers_pt_solution_p, or rather a reasonably
> cheap way to construct an ao_ref covering all of a points-to
> solution.  The not-so-cheap way to do that is
> 
>   tree tem = make_ssa_name (ptr_type_node);
>   ptr_info_def *pi = get_ptr_info (p);
>   pt->pt = *gimple_call_use_set (call_stmt);
>   tree ref = build2 (MEM_REF, void_type_node /* ?? */, tem, build_zero_cst 
> (ptr_type_node /* that effectively is ref-all */));
>   ao_ref_init (&r, ref);
>   r->base = ref;
>   r->ref = NULL_TREE;
>   r->offset = 0;
>   r->alias_set = 0;
>   r->base_alias_set = 0;
> 
> and if we come from IPA reference we first have to build a
> points-to bitmap as well.
> 
> What would be a bit more convenient is probably adding
> a pt_solution * member to ao_ref.  Maybe also avoiding

Having a way to add additional pt_solution to the ref looks like a good
idea, since then it could do through the common path in
tree-ssa-alias.c.  I will look into that next year :)
Honza
> the MEM_REF build we already do in some cases and overload
> the base field using a union and a designator ...
> 
> But yes, sth for next stage1.
> 
> Richard.


[PATCH][RFC] middle-end/46476 - resurrect -Wunreachable-code

2021-11-24 Thread Richard Biener via Gcc-patches
This resurrects -Wunreachable-code and implements a warning for
trivially unreachable code as of CFG construction.  Most problematic
with this is the C/C++ frontend added 'return 0;' stmt in main
which the patch handles for C++ like the C frontend already does
by using BUILTINS_LOCATION.

Another problem for future enhancement is that after CFG construction
we no longer can point to the stmt making a stmt unreachable, so
this implementation tries to warn on the first unreachable
statement of a region.  It might be possible to retain a pointer
to the stmt that triggered creation of a basic-block but I'm not
sure how reliable that would be.

So this is really a simple attempt for now, triggered by myself
running into such a coding error.  As always, the perfect is the
enemy of the good.

It does not pass bootstrap (which enables -Wextra), because of the
situation in g++.dg/Wunreachable-code-5.C where the C++ frontend
prematurely elides conditions like if (! GATHER_STATISTICS) that
evaluate to true - oddly enough it does _not_ do this for
conditions evaluating to false ... (one of the
c-c++-common/Wunreachable-code-2.c cases).

Richard.

2021-11-24  Richard Biener  

PR middle-end/46476
* common.opt (Wunreachable-code): No longer ignored,
add warn_unreachable_code variable, enable with -Wextra.
* doc/invoke.texi (Wunreachable-code): Document.
(Wextra): Amend.
* tree-cfg.c (build_gimple_cfg): Move case label grouping...
(execute_build_cfg): ... here after new -Wunreachable-code
warnings.
(warn_unreachable_code_post_cfg_build): New function.
(mark_forward_reachable_blocks): Likewise.
(reverse_guess_deadend): Likewise.

gcc/cp/
* decl.c (finish_function): Set input_location to
BUILTINS_LOCATION around the code building the return 0
for main().

libgomp/
* oacc-plugin.c (GOMP_PLUGIN_acc_default_dim): Remove spurious
return.

gcc/testsuite/
* c-c++-common/Wunreachable-code-1.c: New testcase.
* c-c++-common/Wunreachable-code-2.c: Likewise.
* c-c++-common/Wunreachable-code-3.c: Likewise.
* gcc.dg/Wunreachable-code-4.c: Likewise.
* g++.dg/Wunreachable-code-5.C: Likewise.
---
 gcc/common.opt|   4 +-
 gcc/cp/decl.c |   9 +-
 gcc/doc/invoke.texi   |   9 +-
 .../c-c++-common/Wunreachable-code-1.c|   8 ++
 .../c-c++-common/Wunreachable-code-2.c|   8 ++
 .../c-c++-common/Wunreachable-code-3.c|  35 ++
 gcc/testsuite/g++.dg/Wunreachable-code-5.C|  11 ++
 gcc/testsuite/gcc.dg/Wunreachable-code-4.c|  10 ++
 gcc/tree-cfg.c| 101 +-
 libgomp/oacc-plugin.c |   1 -
 10 files changed, 186 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-1.c
 create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-2.c
 create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-3.c
 create mode 100644 gcc/testsuite/g++.dg/Wunreachable-code-5.C
 create mode 100644 gcc/testsuite/gcc.dg/Wunreachable-code-4.c

diff --git a/gcc/common.opt b/gcc/common.opt
index 755e1a233b7..0a58cb8a668 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -806,8 +806,8 @@ Common Var(warn_maybe_uninitialized) Warning 
EnabledBy(Wuninitialized)
 Warn about maybe uninitialized automatic variables.
 
 Wunreachable-code
-Common Ignore Warning
-Does nothing. Preserved for backward compatibility.
+Common Var(warn_unreachable_code) Warning EnabledBy(Wextra)
+Warn about trivially unreachable code.
 
 Wunused
 Common Var(warn_unused) Init(0) Warning
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 588094b1db6..26325e41efa 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -17571,7 +17571,14 @@ finish_function (bool inline_p)
 {
   /* Make it so that `main' always returns 0 by default.  */
   if (DECL_MAIN_P (current_function_decl))
-   finish_return_stmt (integer_zero_node);
+   {
+ /* Hack.  We don't want the middle-end to warn that this return
+is unreachable, so we mark its location as special.  */
+ auto saved_il = input_location;
+ input_location = BUILTINS_LOCATION;
+ finish_return_stmt (integer_zero_node);
+ input_location = saved_il;
+   }
 
   if (use_eh_spec_block (current_function_decl))
finish_eh_spec_block (TYPE_RAISES_EXCEPTIONS
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 36fe96b441b..62643e51915 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -267,7 +267,7 @@ in the following sections.
 -Woverloaded-virtual  -Wno-pmf-conversions -Wsign-promo @gol
 -Wsized-deallocation  -Wsuggest-final-methods @gol
 -Wsuggest-final-types  -Wsuggest-override  @gol
--Wno-terminate  -Wuseless-cast  -Wno-vexing-parse  @gol
+-Wno-terminate  -Wunreachable-code  

Re: [PATCH][RFC] middle-end/46476 - resurrect -Wunreachable-code

2021-11-24 Thread Michael Matz via Gcc-patches
Hello,

> +/* Unreachable code in if (0) block.  */
> +void baz(int *p)
> +{
> +   if (0)
> + {
> +return;  /* { dg-bogus "not reachable" } */

Hmm?  Why are you explicitely saying that warning here would be bogus?  It 
quite clearly _is_ unreachable, so warning there makes sense.  Maybe you 
want an XFAILed dg-warning if your current implementation fails to warn, 
and a further XFAILed dg-bogus on the next line?

(Or at the very least a comment in the test case that this is actually not 
what we really want, but rather what current GCCs produce)


Ciao,
Michael.


Re: [PATCH][RFC] middle-end/46476 - resurrect -Wunreachable-code

2021-11-24 Thread Richard Biener via Gcc-patches
On November 24, 2021 4:43:45 PM GMT+01:00, Michael Matz  wrote:
>Hello,
>
>> +/* Unreachable code in if (0) block.  */
>> +void baz(int *p)
>> +{
>> +   if (0)
>> + {
>> +return;  /* { dg-bogus "not reachable" } */
>
>Hmm?  Why are you explicitely saying that warning here would be bogus? 

Because I don't think we want to warn here. Such code is common from template 
instantiation or macro expansion. 

Richard. 

 It 
>quite clearly _is_ unreachable, so warning there makes sense.  Maybe you 
>want an XFAILed dg-warning if your current implementation fails to warn, 
>and a further XFAILed dg-bogus on the next line?
>
>(Or at the very least a comment in the test case that this is actually not 
>what we really want, but rather what current GCCs produce)
>
>
>Ciao,
>Michael.



Re: [PATCH, v2] c++: Diagnose taking address of an immediate member function [PR102753]

2021-11-24 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 23, 2021 at 03:45:20PM -0500, Jason Merrill wrote:
> > I've played a little bit with this (tried to do it at cp_fold time), but
> > there are problems with that.
> > cp_fold of course isn't a good spot for this because it can be called from
> > fold_for_warn and at that point we don't know if we are inside of immediate
> > invocation's argument or not, or it can be called even inside of consteval
> > fn bodies etc.
> 
> How about checking in cp_fold_r instead of cp_fold?

That seems to work.

> > So, let's suppose we do a separate cp_walk_tree just for
> > this if cxx_dialect >= cxx20 e.g. from cp_fold_function and
> > cp_fully_fold_init or some other useful spot, like in the patch below
> > we avoid walking into THEN_CLAUSE of IF_STMT_CONSTEVAL_P IF_STMTs.
> > And if this would be done before cp_fold_function's cp_fold_r walk,
> > we'd also need calls to source_location_current_p as an exception.
> > The major problem is the location used for the error_at,
> > e.g. the ADDR_EXPRs pretty much never EXPR_HAS_LOCATION and PTRMEM_CST
> > doesn't even have location, so while we would report diagnostics, it would
> > be always
> > cc1plus: error: taking address of an immediate function ‘consteval int 
> > S::foo() const’
> > etc.
> 
> I've checked in a patch to give PTRMEM_CST a location wrapper; perhaps that
> will be helpful.

Unfortunately, the location wrappers are optimized away before we get a
chance to use them in cp_fold_r.
So, on the following patch, we get the location right on PTRMEM_CSTs not
used inside of initializers, but for PTRMEM_CSTs in initializers we report
them at UNKNOWN_LOCATION.
As PTRMEM_CST is a C++ FE tree, I wonder if we couldn't instead of your
patch or in addition to it do:
 struct GTY(()) ptrmem_cst {
   struct tree_common common;
   tree member;
+  location_t locus;
 };
#define PTRMEM_CST_LOCATION(NODE) \
  (((ptrmem_cst_t)PTRMEM_CST_CHECK (NODE))->locus)
and in make_ptrmem_cst set PTRMEM_CST_LOCATION to input_location.

> > I guess one option is to report it even later, during gimplification where
> > gimplify_expr etc. track input_location, but what to do with static
> > initializers?
> > Another option would be to have a walk_tree_1 variant that would be updating
> > input_location similarly to how gimplify_expr does that, i.e.
> >saved_location = input_location;
> >if (save_expr != error_mark_node
> >&& EXPR_HAS_LOCATION (*expr_p))
> >  input_location = EXPR_LOCATION (*expr_p);
> > ...
> >input_location = saved_location;
> > but probably using RAII because walk_tree_1 has a lot of returns in it.
> 
> iloc_sentinel seems relevant.
> 
> > And turn walk_tree_1 into a template instantiated twice, once as walk_tree_1
> > without the input_location handling in it and once with it under some
> > different name?
> 
> Maybe just add the handling to walk_tree_1?

The vast majority of walk_tree_1 users don't want it to change
input_location as it goes.  But if we have PTRMEM_CST_LOCATION, I'd think we
don't really need it.

--- gcc/cp/typeck.c.jj  2021-11-24 09:54:11.521738651 +0100
+++ gcc/cp/typeck.c 2021-11-24 16:28:52.281468302 +0100
@@ -6780,19 +6780,15 @@ cp_build_addr_expr_1 (tree arg, bool str
return error_mark_node;
  }
 
-   if (TREE_CODE (t) == FUNCTION_DECL
-   && DECL_IMMEDIATE_FUNCTION_P (t)
-   && !in_immediate_context ())
- {
-   if (complain & tf_error)
- error_at (loc, "taking address of an immediate function %qD",
-   t);
-   return error_mark_node;
- }
-
type = build_ptrmem_type (context_for_name_lookup (t),
  TREE_TYPE (t));
t = make_ptrmem_cst (type, t);
+
+   /* For addresses of immediate member functions ensure we have
+  EXPR_LOCATION set for possible later diagnostics.  */
+   if (TREE_CODE (TREE_OPERAND (arg, 1)) == FUNCTION_DECL
+   && DECL_IMMEDIATE_FUNCTION_P (TREE_OPERAND (arg, 1)))
+ t = maybe_wrap_with_location (t, input_location);
return t;
   }
 
@@ -6816,15 +6812,6 @@ cp_build_addr_expr_1 (tree arg, bool str
 {
   tree stripped_arg = tree_strip_any_location_wrapper (arg);
   if (TREE_CODE (stripped_arg) == FUNCTION_DECL
- && DECL_IMMEDIATE_FUNCTION_P (stripped_arg)
- && !in_immediate_context ())
-   {
- if (complain & tf_error)
-   error_at (loc, "taking address of an immediate function %qD",
- stripped_arg);
- return error_mark_node;
-   }
-  if (TREE_CODE (stripped_arg) == FUNCTION_DECL
  && !mark_used (stripped_arg, complain) && !(complain & tf_error))
return error_mark_node;
   val = build_address (arg);
@@ -6865,6 +6852,13 @@ cp_build_addr_expr_1 (tree arg, bool str
  complain);
 }
 
+  /* For addresses of immediate functions ensure we have EXPR_LOCATION
+ set for possible late

Re: [PATCH][RFC] middle-end/46476 - resurrect -Wunreachable-code

2021-11-24 Thread Marek Polacek via Gcc-patches
On Wed, Nov 24, 2021 at 04:21:31PM +0100, Richard Biener via Gcc-patches wrote:
> This resurrects -Wunreachable-code and implements a warning for
> trivially unreachable code as of CFG construction.  Most problematic
> with this is the C/C++ frontend added 'return 0;' stmt in main
> which the patch handles for C++ like the C frontend already does
> by using BUILTINS_LOCATION.
> 
> Another problem for future enhancement is that after CFG construction
> we no longer can point to the stmt making a stmt unreachable, so
> this implementation tries to warn on the first unreachable
> statement of a region.  It might be possible to retain a pointer
> to the stmt that triggered creation of a basic-block but I'm not
> sure how reliable that would be.
> 
> So this is really a simple attempt for now, triggered by myself
> running into such a coding error.  As always, the perfect is the
> enemy of the good.
> 
> It does not pass bootstrap (which enables -Wextra), because of the
> situation in g++.dg/Wunreachable-code-5.C where the C++ frontend
> prematurely elides conditions like if (! GATHER_STATISTICS) that
> evaluate to true - oddly enough it does _not_ do this for
> conditions evaluating to false ... (one of the
> c-c++-common/Wunreachable-code-2.c cases).

I've taken a look into the C++ thing.  This is genericize_if_stmt:
if we have

  if (0)
return;

then cond is integer_zerop, then_ is a return_expr, but since it has
TREE_SIDE_EFFECTS, we create a COND_EXPR.  For

  if (!0)
 return;

we do
 170   else if (integer_nonzerop (cond) && !TREE_SIDE_EFFECTS (else_))
 171 stmt = then_;
which elides the if completely.

So it seems it would help if we avoided eliding the if stmt if
-Wunreachable-code is in effect.  I'd be happy to make that change,
if it sounds sane.

Marek



Re: [PATCH] implement -Winfinite-recursion [PR88232]

2021-11-24 Thread Martin Sebor via Gcc-patches

On 11/24/21 3:16 AM, Thomas Schwinge wrote:

Hi!

On 2021-11-09T21:28:43-0700, Martin Sebor via Gcc-patches 
 wrote:

The attached patch adds support to the middle end for detecting
infinitely recursive calls.  The warning is controlled by the new
-Winfinite-recursion option.  The option name is the same as
Clang's.


Thanks!


The warning detects a superset of problems detected by Clang
(based on its tests).  It detects the problem in PR88232
(the feature request) as well as the one in PR 87742,
an unrelated problem report that was root-caused to bug due
to infinite recursion.


... and I'm thus confirming that it also would have caught
PR101204 "infinite recursion in gtype-desc.c since
r12-1801-g7036e9ef462fde8181bece4ac4e03f3aa27204dc":


I'd hoped it would pick it up but didn't take the time to verify
it did.  Thanks for double-checking that!

Martin



 [...]
 gtype-desc.c: In function ‘void gt_pch_nx(int_hash*, gt_pointer_operator, void*)’:
 gtype-desc.c:11311:1: error: infinite recursion detected 
[-Werror=infinite-recursion]
 11311 | gt_pch_nx (int_hash* x ATTRIBUTE_UNUSED,
   | ^
 gtype-desc.c:11315:15: note: recursive call
 11315 | gt_pch_nx (&((*x)), op, cookie);
   | ~~^
 cc1plus: all warnings being treated as errors
 make[3]: *** [gtype-desc.o] Error 1
 make[3]: Leaving directory `[...]/build-gcc/gcc'
 make[2]: *** [all-stage2-gcc] Error 2
 make[2]: Leaving directory `[...]/build-gcc'
 make[1]: *** [stage2-bubble] Error 2
 make[1]: Leaving directory `[...]/build-gcc'
 make: *** [all] Error 2

That's with recent fix-up commit c71cb26a9e841888f52e4bfcaad94c8f8ecb4fdb
"Get rid of infinite recursion for 'typedef' used with GTY-marked
'gcc/diagnostic-spec.h:nowarn_map' [PR101204, PR103157]" temporarily
reverted (and commit f861ed8b29a5eb6164d1ddbcfbb6232dddae713f
"Use 'location_hash' for 'gcc/diagnostic-spec.h:nowarn_map'" as a
prerequisite, too).


Grüße
  Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955





Re: [PATCH v2 0/2] RISC-V: add gcc support for Scalar Cryptography v1.0.0-rc6

2021-11-24 Thread Palmer Dabbelt

On Wed, 24 Nov 2021 02:00:33 PST (-0800), Kito Cheng wrote:

I would prefer to accept those patchset even with no builtin function
or intrinsic function yet,
this not only add the support of -march option, but also introduce the
predefined macros like __riscv_zk*,
which could be used in *.S file to check if those instructions are
available or not.


That makes sense, I guess I hadn't thought of that use case.


On Wed, Nov 24, 2021 at 11:23 AM Palmer Dabbelt  wrote:


[Changing to Jim's new address]

On Mon, 22 Nov 2021 00:19:08 PST (-0800), s...@isrc.iscas.ac.cn wrote:
> From: SiYu Wu 
>
> This patch add gcc backend support for RISC-V Scalar Cryptography
> Extension (k-ext), including machine description, builtins defines and
> testcases for each k-ext's subset.
>
> A note about Zbkx: The Zbkx should be implemented in bitmanip's Zbp, but
> since zbp is not included in the bitmanip spec v1.0, and crypto's v1.0
> release will earlier than bitmanip's next release, so for now we
> implementing it here.
>
> Version logs:
>
> v2: As Kito mentions, now this patch only includes the arch string related
> stuff, the builtins and md changes is not included, waiting for the builtin
> and intrinsic added to the spec. Also removed the unnecessary patches and add
> Changelogs.

I don't think there's anything wrong with what's here, but IMO we should
hold off on merging until GCC does something with these extensions.

IIUC all this enables is passing "-march=*Zk*" instead of
"-Wa,-march=*Zk*", and while that is useful I'm worried it'll just make
more of a headache for users who lose a simple way to detect the
intrinsics.  IMO forcing users to pass -Wa properly encodes the "GCC
doesn't support these, but binutils does" scenario pretty sanely, and
users doing things at this level of complexity should be used to that
already because it happens somewhat frequently.

I'm not sure if I'm missing some use case this for this, though.

> SiYu Wu (2):
>   RISC-V: Add option defines for Scalar Cryptography
>   RISC-V: Add implied defines of Zk, Zkn and Zks
>
>  gcc/common/config/riscv/riscv-common.c | 38 +-
>  gcc/config/riscv/arch-canonicalize | 16 ++-
>  gcc/config/riscv/riscv-opts.h  | 22 +++
>  gcc/config/riscv/riscv.opt |  3 ++
>  4 files changed, 77 insertions(+), 2 deletions(-)


Re: [PATCH] AArch64: Improve address rematerialization costs

2021-11-24 Thread Wilco Dijkstra via Gcc-patches
Hi Richard,

> Can you fold in the rtx costs part of the original GOT relaxation patch?

Sure, see below for the updated version.

> I don't think there's enough information here for me to be able to review
> the patch though.  I'll need to find testcases, look in detail at what
> the rtl passes are doing, and try to work out whether (and why) this is
> a good way of fixing things.

Well today GCC does everything with costs rather than backend callbacks.
I'd be interested in hearing about alternatives that have the same effect 
without a callback that allows a backend to decide between spilling and
rematerialization.

Cheers,
Wilco


v2: fold in GOT remat cost

Improve rematerialization costs of addresses.  The current costs are set too 
high
which results in extra register pressure and spilling.  Using lower costs means
addresses will be rematerialized more often rather than being spilled or causing
spills.  This results in significant codesize reductions and performance gains.
SPECINT2017 improves by 0.27% with LTO and 0.16% without LTO.  Codesize is 0.12%
smaller.

Passes bootstrap and regress. OK for commit?

ChangeLog:
2021-06-01  Wilco Dijkstra  

* config/aarch64/aarch64.c (aarch64_rtx_costs): Use better 
rematerialization
costs for HIGH, LO_SUM and SYMREF.
---

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
39de231d8ac6d10362cdd2b48eb9bd9de60c6703..a7f99ece55383168fb0f77e5c11c501d0bb2f013
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13610,45 +13610,28 @@ cost_plus:
  return false;  /* All arguments need to be in registers.  */
}
 
+/* The following costs are used for rematerialization of addresses.
+   Set a low cost for all global accesses - this ensures they are
+   preferred for rematerialization, blocks them from being spilled
+   and reduces register pressure.  The result is significant codesize
+   reductions and performance gains. */
+
 case SYMBOL_REF:
 
-  if (aarch64_cmodel == AARCH64_CMODEL_LARGE
- || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
-   {
- /* LDR.  */
- if (speed)
-   *cost += extra_cost->ldst.load;
-   }
-  else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
-  || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
-   {
- /* ADRP, followed by ADD.  */
- *cost += COSTS_N_INSNS (1);
- if (speed)
-   *cost += 2 * extra_cost->alu.arith;
-   }
-  else if (aarch64_cmodel == AARCH64_CMODEL_TINY
-  || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
-   {
- /* ADR.  */
- if (speed)
-   *cost += extra_cost->alu.arith;
-   }
+  /* Use a separate remateralization cost for GOT accesses.  */
+  if (aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC
+ && aarch64_classify_symbol (x, 0) == SYMBOL_SMALL_GOT_4G)
+   *cost = COSTS_N_INSNS (1) / 2;
 
-  if (flag_pic)
-   {
- /* One extra load instruction, after accessing the GOT.  */
- *cost += COSTS_N_INSNS (1);
- if (speed)
-   *cost += extra_cost->ldst.load;
-   }
+  *cost = 0;
   return true;
 
 case HIGH:
+  *cost = 0;
+  return true;
+
 case LO_SUM:
-  /* ADRP/ADD (immediate).  */
-  if (speed)
-   *cost += extra_cost->alu.arith;
+  *cost = COSTS_N_INSNS (3) / 4;
   return true;
 
 case ZERO_EXTRACT:


Re: Reduce scope of a few 'class loop *loop' variables (was: [PATCH v4] Use range-based for loops for traversing loops)

2021-11-24 Thread Martin Jambor
Hi,

On Wed, Nov 24 2021, Thomas Schwinge wrote:
> Hi!
>
> On 2021-07-30T15:58:36+0800, "Kewen.Lin"  wrote:
>> on 2021/7/30 下午3:18, Thomas Schwinge wrote:
>>> Curious why in some instances we're not removing the 'class loop *loop'
>>> declaration, I had a look, and this may suggest some further clean-up?
>>
>> [...] I like your nice proposed further clean-up,
>> thanks for doing that!
>
> Ping for my patch to "Reduce scope of a few 'class loop *loop' variables",
> see attached.
>

[...]
>

> Further clean-up after commit e41ba804ba5f5ca433e09238d561b1b4c8b10985
> "Use range-based for loops for traversing loops".  No functional change.
>
>   gcc/
>   * cfgloop.c (verify_loop_structure): Reduce scope of
>   'class loop *loop' variable.
>   * ipa-fnsummary.c (analyze_function_body): Likewise.

FWIW, the ipa-fnsummary.c hunk is OK (and better-that-expected clean-up
too, because it avoids the loop variable being hidden by another with
the same name in an earlier loop).

Thanks,

Martin


>   * loop-init.c (fix_loop_structure): Likewise.
>   * loop-invariant.c (calculate_loop_reg_pressure): Likewise.
>   * predict.c (predict_loops): Likewise.
>   * tree-loop-distribution.c (loop_distribution::execute): Likewise.
>   * tree-vectorizer.c (pass_vectorize::execute): Likewise.

[...]

> diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
> index 7e9201a554a..cb13d2e4b3c 100644
> --- a/gcc/ipa-fnsummary.c
> +++ b/gcc/ipa-fnsummary.c
> @@ -2934,7 +2934,6 @@ analyze_function_body (struct cgraph_node *node, bool 
> early)
>if (nonconstant_names.exists () && !early)
>  {
>ipa_fn_summary *s = ipa_fn_summaries->get (node);
> -  class loop *loop;
>unsigned max_loop_predicates = opt_for_fn (node->decl,
>param_ipa_max_loop_predicates);
>  
> @@ -2978,7 +2977,7 @@ analyze_function_body (struct cgraph_node *node, bool 
> early)
>/* To avoid quadratic behavior we analyze stride predicates only
>   with respect to the containing loop.  Thus we simply iterate
>over all defs in the outermost loop body.  */
> -  for (loop = loops_for_fn (cfun)->tree_root->inner;
> +  for (class loop *loop = loops_for_fn (cfun)->tree_root->inner;
>  loop != NULL; loop = loop->next)
>   {
> ipa_predicate loop_stride = true;


Re: [PATCH] C, C++, Fortran, OpenMP: Add 'has_device_addr' clause to 'target' construct

2021-11-24 Thread Marcel Vollweiler

Hi Jakub,

this is again a new version of the 'has_device_addr' patch. It includes
further minor changes in the C/C++ part and in addition the Fortran
implementation.

Tested on x86_64-linux with nvptx offloading with no regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
C, C++, Fortran, OpenMP: Add 'has_device_addr' clause to 'target' construct.

This patch adds the 'has_device_addr' clause to the OpenMP 'target' construct
which was introduced in OpenMP 5.1 (OpenMP API 5.1 specification pp. 197ff):

has_device_addr(list)

"The has_device_addr clause indicates that its list items already have device
addresses and therefore they may be directly accessed from a target device.
If the device address of a list item is not for the device on which the target
region executes, accessing the list item inside the region results in
unspecified behavior. The list items may include array sections." (p. 200)

"A list item may not be specified in both an is_device_ptr clause and a
has_device_addr clause on the directive." (p. 202)

"A list item that appears in an is_device_ptr or a has_device_addr clause must
not be specified in any data-sharing attribute clause on the same target
construct." (p. 203)

gcc/c-family/ChangeLog:

* c-omp.c (c_omp_split_clauses): Add OMP_CLAUSE_HAS_DEVICE_ADDR case.
* c-pragma.h (enum pragma_kind): Add 5.1 in comment.
(enum pragma_omp_clause): Add PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR.

gcc/c/ChangeLog:

* c-parser.c (c_parser_omp_clause_name): Parse 'has_device_addr' clause.
(c_parser_omp_variable_list): Handle array sections.
(c_parser_omp_clause_has_device_addr): Added.
(c_parser_omp_all_clauses): Add PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR case.
(c_parser_omp_target_exit_data): Add HAS_DEVICE_ADDR to OMP_CLAUSE_MASK.
* c-typeck.c (handle_omp_array_sections): Handle clause restrictions.
(c_finish_omp_clauses): Handle array sections.

gcc/cp/ChangeLog:

* parser.c (cp_parser_omp_clause_name): Parse 'has_device_addr' clause.
(cp_parser_omp_var_list_no_open): Handle array sections.
(cp_parser_omp_all_clauses): Add PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR case.
(cp_parser_omp_target_update): Add HAS_DEVICE_ADDR to OMP_CLAUSE_MASK.
* pt.c (tsubst_omp_clauses): Add cases for OMP_CLAUSE_HAS_DEVICE_ADDR.
* semantics.c (handle_omp_array_sections): Handle clause restrictions.
(finish_omp_clauses): Handle array sections.

gcc/fortran/ChangeLog:

* dump-parse-tree.c (show_omp_clauses): Added OMP_LIST_HAS_DEVICE_ADDR
case.
* gfortran.h: Added OMP_LIST_HAS_DEVICE_ADDR.
* openmp.c (enum omp_mask1): Added OMP_CLAUSE_HAS_DEVICE_ADDR.
(gfc_match_omp_clauses): Parse HAS_DEVICE_ADDR clause.
(resolve_omp_clauses): Same.
* trans-openmp.c (gfc_trans_omp_variable_list): Added 
OMP_LIST_HAS_DEVICE_ADDR case.
(gfc_trans_omp_clauses): Firstprivatize of array descriptors.

gcc/ChangeLog:

* gimplify.c (gimplify_scan_omp_clauses): Add OMP_CLAUSE_HAS_DEVICE_ADDR
cases
and handle array sections.
(gimplify_adjust_omp_clauses): Add OMP_CLAUSE_HAS_DEVICE_ADDR case.
* omp-low.c (scan_sharing_clauses): Handle OMP_CLAUSE_HAS_DEVICE_ADDR.
(lower_omp_target): Same.
* tree-core.h (enum omp_clause_code): Same.
* tree-nested.c (convert_nonlocal_omp_clauses): Same.
(convert_local_omp_clauses): Same.
* tree-pretty-print.c (dump_omp_clause): Same.
* tree.c: Same.

libgomp/ChangeLog:

* libgomp.texi: Updated entry for HAS_DEVICE_ADDR.
* target.c (copy_firstprivate_data): Copy only if host address is not
NULL.
* testsuite/libgomp.c++/target-has-device-addr-2.C: New test.
* testsuite/libgomp.c++/target-has-device-addr-4.C: New test.
* testsuite/libgomp.c-c++-common/target-has-device-addr-1.c: New test.
* testsuite/libgomp.c/target-has-device-addr-3.c: New test.
* testsuite/libgomp.fortran/target-has-device-addr-1.f90: New test.
* testsuite/libgomp.fortran/target-has-device-addr-2.f90: New test.
* testsuite/libgomp.fortran/target-has-device-addr-3.f90: New test.
* testsuite/libgomp.fortran/target-has-device-addr-4.f90: New test.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/clauses-1.c: Added has_device_addr to test cases.
* g++.dg/gomp/attrs-1.C: Added has_device_addr to test cases.
* g++.dg/gomp/attrs-2.C: Added has_device_addr to test cases.
* c-c++-common/gomp/target-has-device-addr-1.c: New test.
* c-c++-common/gomp/target-has-device-addr-2.c: New test.
* c-c++-common/gomp/target-is-devi

Re: [PATCH][RFC] middle-end/46476 - resurrect -Wunreachable-code

2021-11-24 Thread Michael Matz via Gcc-patches
Hello,

On Wed, 24 Nov 2021, Richard Biener wrote:

> >> +/* Unreachable code in if (0) block.  */
> >> +void baz(int *p)
> >> +{
> >> +   if (0)
> >> + {
> >> +return;  /* { dg-bogus "not reachable" } */
> >
> >Hmm?  Why are you explicitely saying that warning here would be bogus? 
> 
> Because I don't think we want to warn here. Such code is common from 
> template instantiation or macro expansion.

Like all code with an (const-propagated) explicit 'if (0)', which is of 
course the reason why -Wunreachable-code is a challenge.  IOW: I could 
accept your argument but then wonder why you want to warn about the second 
statement of the guarded block.  The situation was:

  if (0) {
return;  // (1) don't warn here?
whatever++;  // (2) but warn here?
  }

That seems even more confusing.  So you don't want to warn about 
unreachable code (the 'return') but you do want to warn about unreachable 
code within unreachable code (point (2) is unreachable because of the 
if(0) and because of the return).  If your worry is macro/template 
expansion resulting if(0)'s then I don't see why you would only disable 
warnings for some of the statements therein.

It seems we are actually interested in code unreachable via fallthrough or 
labels, not in all unreachable code, so maybe the warning is mis-named.

Btw. what does the code now do about this situation:

  if (0) {
something++;  // 1
return;   // 2
somethingelse++;  // 3
  }

does it warn at (1) or not?  (I assume it unconditionally warns at (3))


Ciao,
Michael.


[PATCH] c++: Fix up diagnostics about taking address of an immediate member function [PR102753]

2021-11-24 Thread Jakub Jelinek via Gcc-patches
On Wed, Nov 24, 2021 at 05:02:03PM +0100, Jakub Jelinek via Gcc-patches wrote:
> Unfortunately, the location wrappers are optimized away before we get a
> chance to use them in cp_fold_r.
> So, on the following patch, we get the location right on PTRMEM_CSTs not
> used inside of initializers, but for PTRMEM_CSTs in initializers we report
> them at UNKNOWN_LOCATION.
> As PTRMEM_CST is a C++ FE tree, I wonder if we couldn't instead of your
> patch or in addition to it do:
>  struct GTY(()) ptrmem_cst {
>struct tree_common common;
>tree member;
> +  location_t locus;
>  };
> #define PTRMEM_CST_LOCATION(NODE) \
>   (((ptrmem_cst_t)PTRMEM_CST_CHECK (NODE))->locus)
> and in make_ptrmem_cst set PTRMEM_CST_LOCATION to input_location.

Here is a full so far lightly tested patch that does this,
GXX_TESTSUITE_STDS=98,11,14,17,20,2b make check-g++ \
RUNTESTFLAGS="dg.exp='consteval* srcloc* is-constant-eval* spaceship-synth* 
constexpr-99287* feat-cxx* lambda-specifiers1*'"
which covers all tests that use consteval keyword in gcc/testsuite/.

Ok for trunk if it passes full bootstrap/regtest?

2021-11-24  Jakub Jelinek  

PR c++/102753
* cp-tree.h (struct ptrmem_cst): Add locus member.
(PTRMEM_CST_LOCATION): Define.
* tree.c (make_ptrmem_cst): Set PTRMEM_CST_LOCATION to input_location.
* typeck.c (cp_build_addr_expr_1): Don't diagnose taking address of
immediate functions here.  Instead when taking their address make
sure the returned ADDR_EXPR has EXPR_LOCATION set.
* pt.c (tsubst_copy): Ensure ADDR_EXPR of immediate function has
EXPR_LOCATION set.
* cp-gimplify.c (cp_fold_r): Diagnose taking address of immediate
functions here.  For consteval if don't walk THEN_CLAUSE.
(cp_genericize_r): Don't assert calls to immediate functions must
be source_location_current_p, instead only constant evaluate
calls to source_location_current_p.

* g++.dg/cpp2a/consteval20.C: Add some extra tests.
* g++.dg/cpp2a/consteval23.C: Likewise.
* g++.dg/cpp2a/consteval25.C: New test.
* g++.dg/cpp2a/srcloc20.C: New test.

--- gcc/cp/cp-tree.h.jj 2021-11-24 15:05:23.291928876 +0100
+++ gcc/cp/cp-tree.h2021-11-24 17:08:19.507806769 +0100
@@ -703,6 +703,7 @@ struct GTY(()) template_parm_index {
 struct GTY(()) ptrmem_cst {
   struct tree_common common;
   tree member;
+  location_t locus;
 };
 typedef struct ptrmem_cst * ptrmem_cst_t;
 
@@ -4724,6 +4725,11 @@ more_aggr_init_expr_args_p (const aggr_i
 #define PTRMEM_CST_MEMBER(NODE) \
   (((ptrmem_cst_t)PTRMEM_CST_CHECK (NODE))->member)
 
+/* For a pointer-to-member constant `X::Y' this is a location where
+   the address of the member has been taken.  */
+#define PTRMEM_CST_LOCATION(NODE) \
+  (((ptrmem_cst_t)PTRMEM_CST_CHECK (NODE))->locus)
+
 /* The expression in question for a TYPEOF_TYPE.  */
 #define TYPEOF_TYPE_EXPR(NODE) (TYPE_VALUES_RAW (TYPEOF_TYPE_CHECK (NODE)))
 
--- gcc/cp/tree.c.jj2021-11-24 15:05:23.371927735 +0100
+++ gcc/cp/tree.c   2021-11-24 17:09:05.348164621 +0100
@@ -5167,6 +5167,7 @@ make_ptrmem_cst (tree type, tree member)
   tree ptrmem_cst = make_node (PTRMEM_CST);
   TREE_TYPE (ptrmem_cst) = type;
   PTRMEM_CST_MEMBER (ptrmem_cst) = member;
+  PTRMEM_CST_LOCATION (ptrmem_cst) = input_location;
   return ptrmem_cst;
 }
 
--- gcc/cp/typeck.c.jj  2021-11-24 09:54:11.521738651 +0100
+++ gcc/cp/typeck.c 2021-11-24 17:09:43.620628485 +0100
@@ -6780,16 +6780,6 @@ cp_build_addr_expr_1 (tree arg, bool str
return error_mark_node;
  }
 
-   if (TREE_CODE (t) == FUNCTION_DECL
-   && DECL_IMMEDIATE_FUNCTION_P (t)
-   && !in_immediate_context ())
- {
-   if (complain & tf_error)
- error_at (loc, "taking address of an immediate function %qD",
-   t);
-   return error_mark_node;
- }
-
type = build_ptrmem_type (context_for_name_lookup (t),
  TREE_TYPE (t));
t = make_ptrmem_cst (type, t);
@@ -6816,15 +6806,6 @@ cp_build_addr_expr_1 (tree arg, bool str
 {
   tree stripped_arg = tree_strip_any_location_wrapper (arg);
   if (TREE_CODE (stripped_arg) == FUNCTION_DECL
- && DECL_IMMEDIATE_FUNCTION_P (stripped_arg)
- && !in_immediate_context ())
-   {
- if (complain & tf_error)
-   error_at (loc, "taking address of an immediate function %qD",
- stripped_arg);
- return error_mark_node;
-   }
-  if (TREE_CODE (stripped_arg) == FUNCTION_DECL
  && !mark_used (stripped_arg, complain) && !(complain & tf_error))
return error_mark_node;
   val = build_address (arg);
@@ -6865,6 +6846,13 @@ cp_build_addr_expr_1 (tree arg, bool str
  complain);
 }
 
+  /* For addresses of immediate functions ensure we have EXPR_LOCATION
+ set for possible later diagnostic

Re: [PATCH][RFC] middle-end/46476 - resurrect -Wunreachable-code

2021-11-24 Thread Martin Sebor via Gcc-patches

On 11/24/21 8:21 AM, Richard Biener via Gcc-patches wrote:

This resurrects -Wunreachable-code and implements a warning for
trivially unreachable code as of CFG construction.  Most problematic
with this is the C/C++ frontend added 'return 0;' stmt in main
which the patch handles for C++ like the C frontend already does
by using BUILTINS_LOCATION.

Another problem for future enhancement is that after CFG construction
we no longer can point to the stmt making a stmt unreachable, so
this implementation tries to warn on the first unreachable
statement of a region.  It might be possible to retain a pointer
to the stmt that triggered creation of a basic-block but I'm not
sure how reliable that would be.

So this is really a simple attempt for now, triggered by myself
running into such a coding error.  As always, the perfect is the
enemy of the good.

It does not pass bootstrap (which enables -Wextra), because of the
situation in g++.dg/Wunreachable-code-5.C where the C++ frontend
prematurely elides conditions like if (! GATHER_STATISTICS) that
evaluate to true - oddly enough it does _not_ do this for
conditions evaluating to false ... (one of the
c-c++-common/Wunreachable-code-2.c cases).


I'm very much in favor of reviving the warning, even in its
current simplistic form.  I especially welcome the suggestion
to enhance it in the future, including adjusting its schedule
among other passes (or adding other, later invocations).  It
would be overly constraining to consider this placement ideal
or set in stone.

Among possible enhancements worth considering is handling
constant conditionals like:

int f (void)
{
  if (1)
return 0;
  else
return 1;   <<< warn
}

int g (void)
{
  if (1)
return 0;
  return 1; <<< warn also in C (not just in C++)
}

By the way, a related feature that would be useful and that's
been requested in the past is warning for stores with no effect,
as in:

  int i;
  i = 1;
  i = 2;   <<< warn here

The detection of the simple cases like the one above can also
be almost trivially implemented.

Martin



Richard.

2021-11-24  Richard Biener  

PR middle-end/46476
* common.opt (Wunreachable-code): No longer ignored,
add warn_unreachable_code variable, enable with -Wextra.
* doc/invoke.texi (Wunreachable-code): Document.
(Wextra): Amend.
* tree-cfg.c (build_gimple_cfg): Move case label grouping...
(execute_build_cfg): ... here after new -Wunreachable-code
warnings.
(warn_unreachable_code_post_cfg_build): New function.
(mark_forward_reachable_blocks): Likewise.
(reverse_guess_deadend): Likewise.

gcc/cp/
* decl.c (finish_function): Set input_location to
BUILTINS_LOCATION around the code building the return 0
for main().

libgomp/
* oacc-plugin.c (GOMP_PLUGIN_acc_default_dim): Remove spurious
return.

gcc/testsuite/
* c-c++-common/Wunreachable-code-1.c: New testcase.
* c-c++-common/Wunreachable-code-2.c: Likewise.
* c-c++-common/Wunreachable-code-3.c: Likewise.
* gcc.dg/Wunreachable-code-4.c: Likewise.
* g++.dg/Wunreachable-code-5.C: Likewise.
---
  gcc/common.opt|   4 +-
  gcc/cp/decl.c |   9 +-
  gcc/doc/invoke.texi   |   9 +-
  .../c-c++-common/Wunreachable-code-1.c|   8 ++
  .../c-c++-common/Wunreachable-code-2.c|   8 ++
  .../c-c++-common/Wunreachable-code-3.c|  35 ++
  gcc/testsuite/g++.dg/Wunreachable-code-5.C|  11 ++
  gcc/testsuite/gcc.dg/Wunreachable-code-4.c|  10 ++
  gcc/tree-cfg.c| 101 +-
  libgomp/oacc-plugin.c |   1 -
  10 files changed, 186 insertions(+), 10 deletions(-)
  create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-1.c
  create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-2.c
  create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-3.c
  create mode 100644 gcc/testsuite/g++.dg/Wunreachable-code-5.C
  create mode 100644 gcc/testsuite/gcc.dg/Wunreachable-code-4.c

diff --git a/gcc/common.opt b/gcc/common.opt
index 755e1a233b7..0a58cb8a668 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -806,8 +806,8 @@ Common Var(warn_maybe_uninitialized) Warning 
EnabledBy(Wuninitialized)
  Warn about maybe uninitialized automatic variables.
  
  Wunreachable-code

-Common Ignore Warning
-Does nothing. Preserved for backward compatibility.
+Common Var(warn_unreachable_code) Warning EnabledBy(Wextra)
+Warn about trivially unreachable code.
  
  Wunused

  Common Var(warn_unused) Init(0) Warning
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 588094b1db6..26325e41efa 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -17571,7 +17571,14 @@ finish_function (bool inline_p)
  {
/* Make it so that `main' always returns 0 by default.  */
if (DECL_MAIN_P (current_functio

Re: [PATCH][RFC] middle-end/46476 - resurrect -Wunreachable-code

2021-11-24 Thread Eric Gallager via Gcc-patches
On Wed, Nov 24, 2021 at 10:22 AM Richard Biener via Gcc-patches
 wrote:
>
> This resurrects -Wunreachable-code and implements a warning for
> trivially unreachable code as of CFG construction.  Most problematic
> with this is the C/C++ frontend added 'return 0;' stmt in main
> which the patch handles for C++ like the C frontend already does
> by using BUILTINS_LOCATION.
>
> Another problem for future enhancement is that after CFG construction
> we no longer can point to the stmt making a stmt unreachable, so
> this implementation tries to warn on the first unreachable
> statement of a region.  It might be possible to retain a pointer
> to the stmt that triggered creation of a basic-block but I'm not
> sure how reliable that would be.
>
> So this is really a simple attempt for now, triggered by myself
> running into such a coding error.  As always, the perfect is the
> enemy of the good.
>
> It does not pass bootstrap (which enables -Wextra), because of the
> situation in g++.dg/Wunreachable-code-5.C where the C++ frontend
> prematurely elides conditions like if (! GATHER_STATISTICS) that
> evaluate to true - oddly enough it does _not_ do this for
> conditions evaluating to false ... (one of the
> c-c++-common/Wunreachable-code-2.c cases).
>
> Richard.

There are several bugs about reviving -Wunreachable-code open, all for
different aspects of it. Do we want to consider making it an umbrella
flag that's split into multiple sub-options?
Bug 46476, which you mentioned, was suggested to be
-Wunreachable-code-return specifically:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46476
Meanwhile, there's also bug 92479, for a warning to be named
-Wunreachable-code-break:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92479
Then there's also bug 82100, which doesn't have a name suggested for
it yet: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82100
I think having separate flags for the 3 of these that are all enabled
by -Wunreachable-code as an umbrella would be good.

Eric

>
> 2021-11-24  Richard Biener  
>
> PR middle-end/46476
> * common.opt (Wunreachable-code): No longer ignored,
> add warn_unreachable_code variable, enable with -Wextra.
> * doc/invoke.texi (Wunreachable-code): Document.
> (Wextra): Amend.
> * tree-cfg.c (build_gimple_cfg): Move case label grouping...
> (execute_build_cfg): ... here after new -Wunreachable-code
> warnings.
> (warn_unreachable_code_post_cfg_build): New function.
> (mark_forward_reachable_blocks): Likewise.
> (reverse_guess_deadend): Likewise.
>
> gcc/cp/
> * decl.c (finish_function): Set input_location to
> BUILTINS_LOCATION around the code building the return 0
> for main().
>
> libgomp/
> * oacc-plugin.c (GOMP_PLUGIN_acc_default_dim): Remove spurious
> return.
>
> gcc/testsuite/
> * c-c++-common/Wunreachable-code-1.c: New testcase.
> * c-c++-common/Wunreachable-code-2.c: Likewise.
> * c-c++-common/Wunreachable-code-3.c: Likewise.
> * gcc.dg/Wunreachable-code-4.c: Likewise.
> * g++.dg/Wunreachable-code-5.C: Likewise.
> ---
>  gcc/common.opt|   4 +-
>  gcc/cp/decl.c |   9 +-
>  gcc/doc/invoke.texi   |   9 +-
>  .../c-c++-common/Wunreachable-code-1.c|   8 ++
>  .../c-c++-common/Wunreachable-code-2.c|   8 ++
>  .../c-c++-common/Wunreachable-code-3.c|  35 ++
>  gcc/testsuite/g++.dg/Wunreachable-code-5.C|  11 ++
>  gcc/testsuite/gcc.dg/Wunreachable-code-4.c|  10 ++
>  gcc/tree-cfg.c| 101 +-
>  libgomp/oacc-plugin.c |   1 -
>  10 files changed, 186 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-1.c
>  create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-2.c
>  create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-3.c
>  create mode 100644 gcc/testsuite/g++.dg/Wunreachable-code-5.C
>  create mode 100644 gcc/testsuite/gcc.dg/Wunreachable-code-4.c
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 755e1a233b7..0a58cb8a668 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -806,8 +806,8 @@ Common Var(warn_maybe_uninitialized) Warning 
> EnabledBy(Wuninitialized)
>  Warn about maybe uninitialized automatic variables.
>
>  Wunreachable-code
> -Common Ignore Warning
> -Does nothing. Preserved for backward compatibility.
> +Common Var(warn_unreachable_code) Warning EnabledBy(Wextra)
> +Warn about trivially unreachable code.
>
>  Wunused
>  Common Var(warn_unused) Init(0) Warning
> diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
> index 588094b1db6..26325e41efa 100644
> --- a/gcc/cp/decl.c
> +++ b/gcc/cp/decl.c
> @@ -17571,7 +17571,14 @@ finish_function (bool inline_p)
>  {
>/* Make it so that `main' always returns 0 by default.  */
>if (DE

Re: Reduce scope of a few 'class loop *loop' variables (was: [PATCH v4] Use range-based for loops for traversing loops)

2021-11-24 Thread Jeff Law via Gcc-patches




On 11/24/2021 7:24 AM, Thomas Schwinge wrote:

Hi!

On 2021-07-30T15:58:36+0800, "Kewen.Lin"  wrote:

on 2021/7/30 下午3:18, Thomas Schwinge wrote:

Curious why in some instances we're not removing the 'class loop *loop'
declaration, I had a look, and this may suggest some further clean-up?

[...] I like your nice proposed further clean-up,
thanks for doing that!

Ping for my patch to "Reduce scope of a few 'class loop *loop' variables",
see attached.

OK for the trunk.  Sorry about the wait.

jeff



[PATCH] PR tree-optimization/103359 - Check for equivalences between PHI argument and def.

2021-11-24 Thread Andrew MacLeod via Gcc-patches
PHI nodes frequently feed each other, and this is particularly true of 
the one/two incoming edge PHIs inserted by some of the loop analysis 
code which is introduced at the start of the VRP passes.


Ranger has a hybrid of optimistic vs pessimistic evaluation, and when it 
switches to pessimistic, it has to assume VARYING for a range.  PHIs are 
calculated as the union of all incoming edges, so once we throw a 
VARYING into the mix, there's not much chance going back.  (mostly 
true... we can sometimes update the range when inputs change, but we 
prefer to avoid iterating when possible)


We already have code to recognize that if an argument to a PHI is the 
same as the def, it cannot provide any additional information and is 
skipped.  ie,


  # h_10 = PHI <4(2), h_10(3), h_10(4), 1(7)>

We can skip the h_10 arguments, and produce [1,1][4,4] as the range with 
any additional information/processing.


This patch extends that slightly to recognize that if the argument is a 
known equivalence of the def, it also does not provide any additional 
information.  This allows us to "ignore" some of the pessimistic VARYING 
values that come in on back edges when the relation oracle indicates 
that there is a known equivalence.


Take for instance the sequence from the PR testcase:

   :
  # h_7 = PHI <4(2), 1(4)>

   :
  # h_18 = PHI 

   :
  # h_22 = PHI 

   :
  # h_20 = PHI 

 We only fully calculate one range at a time, so when calculating h_18, 
we need to first resolve the range h_22 on the back edge 3->6. That 
feeds back to h_18, which isn't fully calculated yet and is 
pessimistically assumed to be VARYING until we do get a value. With h_22 
being varying when resolving h_18 now, we end up makig h_18 varying, and 
lose the info from h_7.


This patch extends the equivalence observation slightly to recognize 
that if the argument is a known equivalence of the def in predecessor 
block, it also does not provide any additional information.  This allows 
us to ignore some of the pessimistic VARYING values that are set when 
the relation oracle indicates that there is a known equivalence.


In the above case, h_22 is known to be equivalent to h_18 in BB3, and so 
we can ignore the range h_22 provides on any edge coming from bb3. There 
is a caveat that if *all* the arguments to a PHI are in the equivalence 
set, then you have to use the range of the equivalence.. otherwise you 
get UNDEFINED.


This will help us to see through some of the artifacts of cycling PHIs 
in these simple cases, and in the above case, we end up with h_7, h_18, 
h_22 and h_20 all in the equivalence set with a range of [1, 1][4, 4], 
and we can remove the code we need to like we did in GCC11.


This wont help with more complex PHI cycles, but that seems like 
something we could be looking at elsewhere, phi-opt maybe, utilizing 
ranger to set the global range when its complex.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  OK?

Andrew




From 9cb5bd6c436165a37717d58388950c5c5ecaf35e Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Tue, 23 Nov 2021 14:12:29 -0500
Subject: [PATCH] Check for equivalences between PHI argument and def.

If a PHI argument on an edge is equivalent with the DEF, then it doesn't
provide any new information, defer processing it unless they are all
equivalences.

	PR tree-optimization/103359
	gcc/
	* gimple-range-fold.cc (fold_using_range::range_of_phi): If arg is
	equivalent to def, don't initially include it's range.

	gcc/testsuite/
	* gcc.dg/pr103359.c: New.
---
 gcc/gimple-range-fold.cc| 16 
 gcc/testsuite/gcc.dg/pr103359.c | 21 +
 2 files changed, 37 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr103359.c

diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc
index ec9690b05e4..d66ada5bb7c 100644
--- a/gcc/gimple-range-fold.cc
+++ b/gcc/gimple-range-fold.cc
@@ -771,6 +771,7 @@ fold_using_range::range_of_phi (irange &r, gphi *phi, fur_source &src)
   tree phi_def = gimple_phi_result (phi);
   tree type = gimple_range_type (phi);
   int_range_max arg_range;
+  int_range_max equiv_range;
   unsigned x;
 
   if (!type)
@@ -794,6 +795,16 @@ fold_using_range::range_of_phi (irange &r, gphi *phi, fur_source &src)
   // Get the range of the argument on its edge.
   src.get_phi_operand (arg_range, arg, e);
 
+  // Likewise, if the incoming PHI argument is equivalent to this
+  // PHI definition, it provides no new info.  Accumulate these ranges
+  // in case all arguments are equivalences.
+  if (src.query ()->query_relation (e, arg, phi_def, false) == EQ_EXPR)
+	{
+	  single_arg = arg;
+	  equiv_range.union_(arg_range);
+	  continue;
+	}
+
   if (!arg_range.undefined_p ())
 	{
 	  // Register potential dependencies for stale value tracking.
@@ -816,6 +827,11 @@ fold_using_range::range_of_phi (irange &r, gphi *phi, fur_source &src)
 	break;
 }
 
+// If all arguments were equivalences, use the equivalen

Re: [PATCH v6] rtl: builtins: (not just) rs6000: Add builtins for fegetround, feclearexcept and feraiseexcept [PR94193]

2021-11-24 Thread Raoni Fassina Firmino via Gcc-patches
Hi Joseph,

Thanks for the detailed review and explanations.

On Mon, Oct 18, 2021 at 03:54:53PM +, Joseph Myers wrote:
> However, it's better to get things right automatically without needing any 
> macros or other header additions at all.  That is, define feclearexcept as 
> a built-in function, *without* the extra arguments, and with the back end 
> knowing about the FE_* values for the target libc.  Then you can simply 
> avoid expanding the function inline when the back end doesn't know both 
> the FE_* values and how to use them.

I took this part to heart, I agree that an approach in molds of the v5
sounds more like it, something that will "just works" or fallback
gracefully. And who knew, with your insight I think I find just
the thing, I am finishing a v7 and I hope it address the previews
consernings and sidestep all this complications with the v6 aproach.


o/
Raoni


Fix handling of static chain in modref

2021-11-24 Thread Jan Hubicka via Gcc-patches
Hi,
this patch fixes wrong code issue where modref did not propagate flags
for static chain in ipa_merge_modref_summary_after_inlininig.  It is a
place I missed to update in original patch extending return slot
tracking to static chain.  Unlike return slot we need to propagate flags
here (return slot is write only parameter and does not need it)

Bootstrapped/regtested x86_64-linux, comitted.
gcc/ChangeLog:

2021-11-24  Jan Hubicka  

* ipa-modref.c (implicit_eaf_flags_for_edge_and_arg): Break out from...
(modref_merge_call_site_flags): ... here.
(ipa_merge_modref_summary_after_inlining): Use it.

gcc/testsuite/ChangeLog:

2021-11-24  Jan Hubicka  

* gcc.c-torture/execute/pr103405.c: New test.

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index 923ae6c1dd3..c2edc0d28a6 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -4827,6 +4827,30 @@ modref_propagate_dump_scc (cgraph_node *component_node)
   }
 }
 
+/* Determine EAF flags know for call E with CALLEE_ECF_FLAGS and ARG.  */
+
+int
+implicit_eaf_flags_for_edge_and_arg (cgraph_edge *e, int callee_ecf_flags,
+bool ignore_stores, int arg)
+{
+  /* Returning the value is already accounted to at local propagation.  */
+  int implicit_flags = EAF_NOT_RETURNED_DIRECTLY
+  | EAF_NOT_RETURNED_INDIRECTLY;
+  if (ignore_stores)
+ implicit_flags |= ignore_stores_eaf_flags;
+  if (callee_ecf_flags & ECF_PURE)
+implicit_flags |= implicit_pure_eaf_flags;
+  if (callee_ecf_flags & (ECF_CONST | ECF_NOVOPS))
+implicit_flags |= implicit_const_eaf_flags;
+  class fnspec_summary *fnspec_sum = fnspec_summaries->get (e);
+  if (fnspec_sum)
+{
+  attr_fnspec fnspec (fnspec_sum->fnspec);
+  implicit_flags |= fnspec.arg_eaf_flags (arg);
+}
+  return implicit_flags;
+}
+
 /* Process escapes in SUM and merge SUMMARY to CUR_SUMMARY
and SUMMARY_LTO to CUR_SUMMARY_LTO.
Return true if something changed.  */
@@ -4857,9 +4881,8 @@ modref_merge_call_site_flags (escape_summary *sum,
 {
   int flags = 0;
   int flags_lto = 0;
-  /* Returning the value is already accounted to at local propagation.  */
-  int implicit_flags = EAF_NOT_RETURNED_DIRECTLY
-  | EAF_NOT_RETURNED_INDIRECTLY;
+  int implicit_flags = implicit_eaf_flags_for_edge_and_arg
+   (e, callee_ecf_flags, ignore_stores, ee->arg);
 
   if (summary && ee->arg < summary->arg_flags.length ())
flags = summary->arg_flags[ee->arg];
@@ -4995,6 +5018,7 @@ modref_propagate_flags_in_scc (cgraph_node 
*component_node)
  if (ecf_flags & (ECF_CONST | ECF_NOVOPS)
  || !callee_edge->inline_failed)
continue;
+
  /* Get the callee and its summary.  */
  enum availability avail;
  callee = callee_edge->callee->function_or_virtual_thunk_symbol
@@ -5081,6 +5105,9 @@ ipa_merge_modref_summary_after_inlining (cgraph_edge 
*edge)
   class modref_summary_lto *callee_info_lto
 = summaries_lto ? summaries_lto->get (edge->callee) : NULL;
   int flags = flags_from_decl_or_type (edge->callee->decl);
+  /* Combine in outer flags.  */
+  for (cgraph_node *n = edge->caller; n->inlined_to; n = n->callers->caller)
+flags |= flags_from_decl_or_type (edge->callee->decl);
   bool ignore_stores = ignore_stores_p (edge->caller->decl, flags);
 
   if (!callee_info && to_info)
@@ -5148,10 +5175,11 @@ ipa_merge_modref_summary_after_inlining (cgraph_edge 
*edge)
 FOR_EACH_VEC_ELT (sum->esc, i, ee)
   {
bool needed = false;
-   /* TODO: We do not have jump functions for return slots, so we
-  never propagate them to outer function.  */
-   if (ee->parm_index < 0)
- continue;
+   int implicit_flags = implicit_eaf_flags_for_edge_and_arg
+   (edge, flags, ignore_stores,
+ee->arg);
+   if (!ee->direct)
+ implicit_flags = deref_flags (implicit_flags, ignore_stores);
if (to_info && (int)to_info->arg_flags.length () > ee->parm_index)
  {
int flags = callee_info
@@ -5159,11 +5187,14 @@ ipa_merge_modref_summary_after_inlining (cgraph_edge 
*edge)
? callee_info->arg_flags[ee->arg] : 0;
if (!ee->direct)
  flags = deref_flags (flags, ignore_stores);
-   else if (ignore_stores)
- flags |= ignore_stores_eaf_flags;
-   flags |= ee->min_flags;
-   to_info->arg_flags[ee->parm_index] &= flags;
-   if (to_info->arg_flags[ee->parm_index])
+   flags |= ee->min_flags | implicit_flags;
+   eaf_flags_t &f = ee->parm_index == MODREF_RETSLOT_PARM
+? to_info->retslot_flags
+: ee->parm_index == MODREF_STATIC_CHAIN_PARM
+? to_info->static_chain_flags
+ 

[PATCH] PR fortran/103411 - ICE in gfc_conv_array_initializer, at fortran/trans-array.c:6377

2021-11-24 Thread Harald Anlauf via Gcc-patches
Dear all,

when checking the SOURCE and SHAPE arguments to the RESHAPE
intrinsic, for absent PAD argument we failed to handle the case
when SHAPE was a parameter.

Fortunately, the proper check was already there, and the code
just needs some tweaking, as well as one of the testcases.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald

From d6af2a33bad852bcea39b8c5b2e7c27976bde2a1 Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Wed, 24 Nov 2021 22:22:24 +0100
Subject: [PATCH] Fortran: improve check of arguments to the RESHAPE intrinsic

gcc/fortran/ChangeLog:

	PR fortran/103411
	* check.c (gfc_check_reshape): Improve check of size of source
	array for the RESHAPE intrinsic against the given shape when pad
	is not given, and shape is a parameter.

gcc/testsuite/ChangeLog:

	PR fortran/103411
	* gfortran.dg/reshape_7.f90: Adjust test to improved check.
	* gfortran.dg/reshape_9.f90: New test.
---
 gcc/fortran/check.c | 17 +
 gcc/testsuite/gfortran.dg/reshape_7.f90 |  2 +-
 gcc/testsuite/gfortran.dg/reshape_9.f90 | 14 ++
 3 files changed, 28 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/reshape_9.f90

diff --git a/gcc/fortran/check.c b/gcc/fortran/check.c
index 5a5aca10ebe..837eb0912c0 100644
--- a/gcc/fortran/check.c
+++ b/gcc/fortran/check.c
@@ -4699,6 +4699,7 @@ gfc_check_reshape (gfc_expr *source, gfc_expr *shape,
   mpz_t size;
   mpz_t nelems;
   int shape_size;
+  bool shape_is_const = false;

   if (!array_check (source, 0))
 return false;
@@ -4736,6 +4737,7 @@ gfc_check_reshape (gfc_expr *source, gfc_expr *shape,
 {
   gfc_expr *e;
   int i, extent;
+  shape_is_const = true;
   for (i = 0; i < shape_size; ++i)
 	{
 	  e = gfc_constructor_lookup_expr (shape->value.constructor, i);
@@ -4748,7 +4750,7 @@ gfc_check_reshape (gfc_expr *source, gfc_expr *shape,
 	  gfc_error ("%qs argument of %qs intrinsic at %L has "
 			 "negative element (%d)",
 			 gfc_current_intrinsic_arg[1]->name,
-			 gfc_current_intrinsic, &e->where, extent);
+			 gfc_current_intrinsic, &shape->where, extent);
 	  return false;
 	}
 	}
@@ -4766,6 +4768,7 @@ gfc_check_reshape (gfc_expr *source, gfc_expr *shape,
   int i, extent;
   gfc_expr *e, *v;

+  shape_is_const = true;
   v = shape->symtree->n.sym->value;

   for (i = 0; i < shape_size; i++)
@@ -4856,8 +4859,7 @@ gfc_check_reshape (gfc_expr *source, gfc_expr *shape,
 	}
 }

-  if (pad == NULL && shape->expr_type == EXPR_ARRAY
-  && gfc_is_constant_expr (shape)
+  if (pad == NULL && shape_is_const
   && !(source->expr_type == EXPR_VARIABLE && source->symtree->n.sym->as
 	   && source->symtree->n.sym->as->type == AS_ASSUMED_SIZE))
 {
@@ -4866,10 +4868,17 @@ gfc_check_reshape (gfc_expr *source, gfc_expr *shape,
 	{
 	  gfc_constructor *c;
 	  bool test;
+	  gfc_constructor_base b;

+	  if (shape->expr_type == EXPR_ARRAY)
+	b = shape->value.constructor;
+	  else if (shape->expr_type == EXPR_VARIABLE)
+	b = shape->symtree->n.sym->value->value.constructor;
+	  else
+	gcc_unreachable ();

 	  mpz_init_set_ui (size, 1);
-	  for (c = gfc_constructor_first (shape->value.constructor);
+	  for (c = gfc_constructor_first (b);
 	   c; c = gfc_constructor_next (c))
 	mpz_mul (size, size, c->expr->value.integer);

diff --git a/gcc/testsuite/gfortran.dg/reshape_7.f90 b/gcc/testsuite/gfortran.dg/reshape_7.f90
index d752650aa4e..4216cb60cbb 100644
--- a/gcc/testsuite/gfortran.dg/reshape_7.f90
+++ b/gcc/testsuite/gfortran.dg/reshape_7.f90
@@ -4,7 +4,7 @@
 subroutine p0
integer, parameter :: sh(2) = [2, 3]
integer, parameter :: &
-   & a(2,2) = reshape([1, 2, 3, 4], sh)   ! { dg-error "Different shape" }
+   & a(2,2) = reshape([1, 2, 3, 4], sh)   ! { dg-error "not enough elements" }
if (a(1,1) /= 0) STOP 1
 end subroutine p0

diff --git a/gcc/testsuite/gfortran.dg/reshape_9.f90 b/gcc/testsuite/gfortran.dg/reshape_9.f90
new file mode 100644
index 000..c46e211b47e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/reshape_9.f90
@@ -0,0 +1,14 @@
+! { dg-do compile }
+! PR fortran/103411 - ICE in gfc_conv_array_initializer
+
+program p
+  integer, parameter :: a(2) = [2,2]
+  integer, parameter :: d(2,2) = reshape([1,2,3,4,5], a)
+  integer, parameter :: c(2,2) = reshape([1,2,3,4], a)
+  integer, parameter :: b(2,2) = &
+   reshape([1,2,3], a) ! { dg-error "not enough elements" }
+  print *, reshape([1,2,3], a) ! { dg-error "not enough elements" }
+  print *, reshape([1,2,3,4], a)
+  print *, reshape([1,2,3,4,5], a)
+  print *, b, c, d
+end
--
2.26.2



Re: [PATCH v6] rtl: builtins: (not just) rs6000: Add builtins for fegetround, feclearexcept and feraiseexcept [PR94193]

2021-11-24 Thread Segher Boessenkool
On Wed, Nov 24, 2021 at 05:22:57PM -0300, Raoni Fassina Firmino wrote:
> Hi Joseph,
> 
> Thanks for the detailed review and explanations.

>From me as well :-)

> On Mon, Oct 18, 2021 at 03:54:53PM +, Joseph Myers wrote:
> > However, it's better to get things right automatically without needing any 
> > macros or other header additions at all.  That is, define feclearexcept as 
> > a built-in function, *without* the extra arguments, and with the back end 
> > knowing about the FE_* values for the target libc.  Then you can simply 
> > avoid expanding the function inline when the back end doesn't know both 
> > the FE_* values and how to use them.
> 
> I took this part to heart, I agree that an approach in molds of the v5
> sounds more like it, something that will "just works" or fallback
> gracefully. And who knew, with your insight I think I find just
> the thing, I am finishing a v7 and I hope it address the previews
> consernings and sidestep all this complications with the v6 aproach.

What you will lose this way is that it will not work on any C library
that doesn't have explicit support.  Which is a shame, but it seems we
cannot avoid this.  Especially the "fesetround should be a function, not
a macro" argument is a showstopper :-/

Thanks,


Segher


Re: [PATCH] c++: Fix up diagnostics about taking address of an immediate member function [PR102753]

2021-11-24 Thread Jason Merrill via Gcc-patches

On 11/24/21 13:02, Jakub Jelinek wrote:

On Wed, Nov 24, 2021 at 05:02:03PM +0100, Jakub Jelinek via Gcc-patches wrote:

Unfortunately, the location wrappers are optimized away before we get a
chance to use them in cp_fold_r.
So, on the following patch, we get the location right on PTRMEM_CSTs not
used inside of initializers, but for PTRMEM_CSTs in initializers we report
them at UNKNOWN_LOCATION.
As PTRMEM_CST is a C++ FE tree, I wonder if we couldn't instead of your
patch or in addition to it do:
  struct GTY(()) ptrmem_cst {
struct tree_common common;
tree member;
+  location_t locus;
  };
#define PTRMEM_CST_LOCATION(NODE) \
   (((ptrmem_cst_t)PTRMEM_CST_CHECK (NODE))->locus)
and in make_ptrmem_cst set PTRMEM_CST_LOCATION to input_location.


Here is a full so far lightly tested patch that does this,
GXX_TESTSUITE_STDS=98,11,14,17,20,2b make check-g++ \
RUNTESTFLAGS="dg.exp='consteval* srcloc* is-constant-eval* spaceship-synth* 
constexpr-99287* feat-cxx* lambda-specifiers1*'"
which covers all tests that use consteval keyword in gcc/testsuite/.

Ok for trunk if it passes full bootstrap/regtest?

2021-11-24  Jakub Jelinek  

PR c++/102753
* cp-tree.h (struct ptrmem_cst): Add locus member.
(PTRMEM_CST_LOCATION): Define.
* tree.c (make_ptrmem_cst): Set PTRMEM_CST_LOCATION to input_location.
* typeck.c (cp_build_addr_expr_1): Don't diagnose taking address of
immediate functions here.  Instead when taking their address make
sure the returned ADDR_EXPR has EXPR_LOCATION set.
* pt.c (tsubst_copy): Ensure ADDR_EXPR of immediate function has
EXPR_LOCATION set.
* cp-gimplify.c (cp_fold_r): Diagnose taking address of immediate
functions here.  For consteval if don't walk THEN_CLAUSE.
(cp_genericize_r): Don't assert calls to immediate functions must
be source_location_current_p, instead only constant evaluate
calls to source_location_current_p.

* g++.dg/cpp2a/consteval20.C: Add some extra tests.
* g++.dg/cpp2a/consteval23.C: Likewise.
* g++.dg/cpp2a/consteval25.C: New test.
* g++.dg/cpp2a/srcloc20.C: New test.

--- gcc/cp/cp-tree.h.jj 2021-11-24 15:05:23.291928876 +0100
+++ gcc/cp/cp-tree.h2021-11-24 17:08:19.507806769 +0100
@@ -703,6 +703,7 @@ struct GTY(()) template_parm_index {
  struct GTY(()) ptrmem_cst {
struct tree_common common;
tree member;
+  location_t locus;
  };
  typedef struct ptrmem_cst * ptrmem_cst_t;
  
@@ -4724,6 +4725,11 @@ more_aggr_init_expr_args_p (const aggr_i

  #define PTRMEM_CST_MEMBER(NODE) \
(((ptrmem_cst_t)PTRMEM_CST_CHECK (NODE))->member)
  
+/* For a pointer-to-member constant `X::Y' this is a location where

+   the address of the member has been taken.  */
+#define PTRMEM_CST_LOCATION(NODE) \
+  (((ptrmem_cst_t)PTRMEM_CST_CHECK (NODE))->locus)
+
  /* The expression in question for a TYPEOF_TYPE.  */
  #define TYPEOF_TYPE_EXPR(NODE) (TYPE_VALUES_RAW (TYPEOF_TYPE_CHECK (NODE)))
  
--- gcc/cp/tree.c.jj	2021-11-24 15:05:23.371927735 +0100

+++ gcc/cp/tree.c   2021-11-24 17:09:05.348164621 +0100
@@ -5167,6 +5167,7 @@ make_ptrmem_cst (tree type, tree member)
tree ptrmem_cst = make_node (PTRMEM_CST);
TREE_TYPE (ptrmem_cst) = type;
PTRMEM_CST_MEMBER (ptrmem_cst) = member;
+  PTRMEM_CST_LOCATION (ptrmem_cst) = input_location;
return ptrmem_cst;
  }
  
--- gcc/cp/typeck.c.jj	2021-11-24 09:54:11.521738651 +0100

+++ gcc/cp/typeck.c 2021-11-24 17:09:43.620628485 +0100
@@ -6780,16 +6780,6 @@ cp_build_addr_expr_1 (tree arg, bool str
return error_mark_node;
  }
  
-	if (TREE_CODE (t) == FUNCTION_DECL

-   && DECL_IMMEDIATE_FUNCTION_P (t)
-   && !in_immediate_context ())
- {
-   if (complain & tf_error)
- error_at (loc, "taking address of an immediate function %qD",
-   t);
-   return error_mark_node;
- }
-
type = build_ptrmem_type (context_for_name_lookup (t),
  TREE_TYPE (t));
t = make_ptrmem_cst (type, t);
@@ -6816,15 +6806,6 @@ cp_build_addr_expr_1 (tree arg, bool str
  {
tree stripped_arg = tree_strip_any_location_wrapper (arg);
if (TREE_CODE (stripped_arg) == FUNCTION_DECL
- && DECL_IMMEDIATE_FUNCTION_P (stripped_arg)
- && !in_immediate_context ())
-   {
- if (complain & tf_error)
-   error_at (loc, "taking address of an immediate function %qD",
- stripped_arg);
- return error_mark_node;
-   }
-  if (TREE_CODE (stripped_arg) == FUNCTION_DECL
  && !mark_used (stripped_arg, complain) && !(complain & tf_error))
return error_mark_node;
val = build_address (arg);
@@ -6865,6 +6846,13 @@ cp_build_addr_expr_1 (tree arg, bool str
  complain);
  }
  
+  /* For addresses of immediate functions ensure we have EXPR_LOCATION

[PATCH] c++, v2: Fix up diagnostics about taking address of an immediate member function [PR102753]

2021-11-24 Thread Jakub Jelinek via Gcc-patches
On Wed, Nov 24, 2021 at 05:15:51PM -0500, Jason Merrill wrote:
> > +case CALL_EXPR:
> > +  if (tree fndecl = cp_get_callee_fndecl_nofold (stmt))
> > +   if (DECL_IMMEDIATE_FUNCTION_P (fndecl)
> > +   && source_location_current_p (fndecl))
> > + {
> > +   tree fn = cp_get_callee (stmt);
> > +   STRIP_NOPS (fn);
> > +   if (TREE_CODE (fn) == ADDR_EXPR)
> > + ((hash_set *) data)->add (fn);
> > + }
> > +  break;
> 
> I'm surprised the source_location::current handling would be needed; why do
> calls to that function live long enough for us to walk into the ADDR_EXPR
> here?  Maybe we should fold it in cp_fold instead of cp_genericize_r.

>From quick testing, moving it to cp_fold works fine too.
I have bootstraps of the previous version of this patch and the
multidimensional subscript pending now, will bootstrap/regtest this
when the current ones finish.

2021-11-24  Jakub Jelinek  

PR c++/102753
* cp-tree.h (struct ptrmem_cst): Add locus member.
(PTRMEM_CST_LOCATION): Define.
* tree.c (make_ptrmem_cst): Set PTRMEM_CST_LOCATION to input_location.
* typeck.c (cp_build_addr_expr_1): Don't diagnose taking address of
immediate functions here.  Instead when taking their address make
sure the returned ADDR_EXPR has EXPR_LOCATION set.
* pt.c (tsubst_copy): Ensure ADDR_EXPR of immediate function has
EXPR_LOCATION set.
* cp-gimplify.c (cp_fold_r): Diagnose taking address of immediate
functions here.  For consteval if don't walk THEN_CLAUSE.
(cp_genericize_r): Move evaluation of calls to
std::source_location::current from here to...
(cp_fold): ... here.  Don't assert calls to immediate functions must
be source_location_current_p, instead only constant evaluate
calls to source_location_current_p.

* g++.dg/cpp2a/consteval20.C: Add some extra tests.
* g++.dg/cpp2a/consteval23.C: Likewise.
* g++.dg/cpp2a/consteval25.C: New test.
* g++.dg/cpp2a/srcloc20.C: New test.

--- gcc/cp/cp-tree.h.jj 2021-11-24 15:05:23.291928876 +0100
+++ gcc/cp/cp-tree.h2021-11-24 17:08:19.507806769 +0100
@@ -703,6 +703,7 @@ struct GTY(()) template_parm_index {
 struct GTY(()) ptrmem_cst {
   struct tree_common common;
   tree member;
+  location_t locus;
 };
 typedef struct ptrmem_cst * ptrmem_cst_t;
 
@@ -4724,6 +4725,11 @@ more_aggr_init_expr_args_p (const aggr_i
 #define PTRMEM_CST_MEMBER(NODE) \
   (((ptrmem_cst_t)PTRMEM_CST_CHECK (NODE))->member)
 
+/* For a pointer-to-member constant `X::Y' this is a location where
+   the address of the member has been taken.  */
+#define PTRMEM_CST_LOCATION(NODE) \
+  (((ptrmem_cst_t)PTRMEM_CST_CHECK (NODE))->locus)
+
 /* The expression in question for a TYPEOF_TYPE.  */
 #define TYPEOF_TYPE_EXPR(NODE) (TYPE_VALUES_RAW (TYPEOF_TYPE_CHECK (NODE)))
 
--- gcc/cp/tree.c.jj2021-11-24 15:05:23.371927735 +0100
+++ gcc/cp/tree.c   2021-11-24 17:09:05.348164621 +0100
@@ -5167,6 +5167,7 @@ make_ptrmem_cst (tree type, tree member)
   tree ptrmem_cst = make_node (PTRMEM_CST);
   TREE_TYPE (ptrmem_cst) = type;
   PTRMEM_CST_MEMBER (ptrmem_cst) = member;
+  PTRMEM_CST_LOCATION (ptrmem_cst) = input_location;
   return ptrmem_cst;
 }
 
--- gcc/cp/typeck.c.jj  2021-11-24 09:54:11.521738651 +0100
+++ gcc/cp/typeck.c 2021-11-24 17:09:43.620628485 +0100
@@ -6780,16 +6780,6 @@ cp_build_addr_expr_1 (tree arg, bool str
return error_mark_node;
  }
 
-   if (TREE_CODE (t) == FUNCTION_DECL
-   && DECL_IMMEDIATE_FUNCTION_P (t)
-   && !in_immediate_context ())
- {
-   if (complain & tf_error)
- error_at (loc, "taking address of an immediate function %qD",
-   t);
-   return error_mark_node;
- }
-
type = build_ptrmem_type (context_for_name_lookup (t),
  TREE_TYPE (t));
t = make_ptrmem_cst (type, t);
@@ -6816,15 +6806,6 @@ cp_build_addr_expr_1 (tree arg, bool str
 {
   tree stripped_arg = tree_strip_any_location_wrapper (arg);
   if (TREE_CODE (stripped_arg) == FUNCTION_DECL
- && DECL_IMMEDIATE_FUNCTION_P (stripped_arg)
- && !in_immediate_context ())
-   {
- if (complain & tf_error)
-   error_at (loc, "taking address of an immediate function %qD",
- stripped_arg);
- return error_mark_node;
-   }
-  if (TREE_CODE (stripped_arg) == FUNCTION_DECL
  && !mark_used (stripped_arg, complain) && !(complain & tf_error))
return error_mark_node;
   val = build_address (arg);
@@ -6865,6 +6846,13 @@ cp_build_addr_expr_1 (tree arg, bool str
  complain);
 }
 
+  /* For addresses of immediate functions ensure we have EXPR_LOCATION
+ set for possible later diagnostics.  */
+  if (TREE_CODE (val) == ADDR_EXPR
+  && TREE_CODE (TREE_OPERAND (val, 0))

Re: aix: adjust installation directories for GCC64

2021-11-24 Thread David Edelsohn via Gcc-patches
On Wed, Sep 15, 2021 at 4:12 AM CHIGOT, CLEMENT  wrote:
>
> As gcc on 64bit for AIX is built with "MULTILIB_MATCHES= .=maix32",
> "-print-multi-directory" and similar flags aren't returning the
> correct directory when used with -maix32: "." is returned instead
> of "ppc32".
> Libgcc installation script needs to be adjust to bypass this
> problem and correctly install 32bit files in a ppc32 subdirectory.
>
> libgcc/ChangeLog:
> 2021-09-03  Clément Chigot  
>
> * config/rs6000/t-slibgcc-aix (SHLIB_INSTALL): Replace
> "$(slibdir)@shlib_slibdir_qual@" by $(inst_libdir).

Hi, Clement

Sorry for the delay.

I believe that this is a change in behavior.  Maybe you don't see it
because you use --enable-version-specific-runtime-libs?  If one uses
that configure option, your patch produces the same results -- it's
explicitly using that version-specific directory.  If one does not use
that configure option, your patch enforces that behavior.  The patch
should not change that behavior.

Based on your question in another email message, I infer that
MULTIOSSUBDIR and shlib_slibdir_qual are defined incorrectly.

Thanks, David


[PATCH v7] rtl: builtins: (not just) rs6000: Add builtins for fegetround, feclearexcept and feraiseexcept [PR94193]

2021-11-24 Thread Raoni Fassina Firmino via Gcc-patches
Changes since v6[6] and v5[5]:
  - Based this version on the v5 one.
  - Reworked all builtins back to the way they are in v5 and added the
following changes:
+ Added a test to target libc, only expanding with glibc as the
  target libc.
+ Updated all three expanders header comment to reflect the added
  behavior (fegetround got a full header as it had none).
+ Added extra documentation for the builtins on doc/extend.texi,
  similar to v6 version, but only the introductory paragraph,
  without a dedicated entry for each, since now they behavior and
  signature match the C99 ones.
  - Changed the description for the return operand in the RTL template
of the fegetround expander.  Using "(set )", the same way as
rs6000_mffsl expander (this change was taken from v6).
  - Updated the commit message mentioning the target libc restriction
and updated changelog.

Tested on top of master (9bf69a8558638ce0cdd69e83a68776deb9b8e053)
on the following plataforms with no regression:
  - powerpc64le-linux-gnu (Power 9)
  - powerpc64le-linux-gnu (Power 8)
  - powerpc64-linux-gnu (Power 9, with 32 and 64 bits tests)

Also made a visual test comparing the generated assembly of a test
program built against glibc and musl (with -mmusl and with musl-gcc).

Documentation changes tested on x86_64-redhat-linux.

Well, turns out v6 was kind of a misstep[7].  But turns out the
solution was in my face the whole time and Joseph was kind enough to
spell it out to me.  I should have known, one can check for the target
libc at runtime. It is a really simple addition to each expander, only
expanding for the libcs the expander know the FE_* and can handle it.
As Joseph mentioned on his review, with that the expander don't have
to always expand and everything is fine.

As I mentioned[8], musl and uclibc both uses the same values as glibc,
I could add then enabling the expanders for them, not sure about it.

I don't know if I should add something to the documentation, more
precisely on section "6.59 Other Built-in Functions Provided by GCC"
in doc/extend.text. Like I mentioned in v6 but I don't know if I'm
doing it right, especially changing such a front facing documentation,
but here it is.

I'm repeating the "changelog" from past versions here for convenience:

Changes since v5[5]:
  - Reworked all builtins to accept the FE_* macros as parameters and
so be agnostic to libc implementations.  Largely based of
fpclassify.  To that end, there is some new files changed:
+ Change the argument list for the builtins declarations in
  builtins.def
+ Added new types in builtin-types.def to use in the buitins
  declarations.
+ Added extra documentation for the builtins on doc/extend.texi,
  similar to fpclassify.
  - Updated doc/md.texi documentation with the new optab behaviors.
  - Updated comments to the expanders and expand handlers to try to
explain whats is going on.
  - Changed the description for the return operand in the RTL template
of the fegetround expander.  Using "(set )", the same way as
rs6000_mffsl expander.
  - Updated testcases with helper macros with the new argument list.

Changes since v4[4]:
  - Fixed more spelling and code style.
  - Add more clarification on  comments for feraiseexcept and
feclearexcept expands;

Changes since v3[3]:
  - Fixed fegetround bug on powerpc64 (big endian) that Segher
spotted;

Changes since v2[2]:
  - Added documentation for the new optabs;
  - Remove use of non portable __builtin_clz;
  - Changed feclearexcept and feraiseexcept to accept all 4 valid
flags at the same time and added more test for that case;
  - Extended feclearexcept and feraiseexcept testcases to match
accepting multiple flags;
  - Fixed builtin-feclearexcept-feraiseexcept-2.c testcase comparison
after feclearexcept tests;
  - Updated commit message to reflect change in feclearexcept and
feraiseexcept from the glibc counterpart;
  - Fixed English spelling and typos;
  - Fixed code-style;
  - Changed subject line tag to make clear it is not just rs6000 code.

Changes since v1[1]:
  - Fixed English spelling;
  - Fixed code-style;
  - Changed match operand predicate in feclearexcept and feraiseexcept;
  - Changed testcase options;
  - Minor changes in test code to be C90 compatible;
  - Other minor changes suggested by Segher;
  - Changed subject line tag (not sure if I tagged correctly or should
include optabs: also)

[1] https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552024.html
[2] https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553297.html
[3] https://gcc.gnu.org/pipermail/gcc-patches/2020-October/557109.html
[4] https://gcc.gnu.org/pipermail/gcc-patches/2020-October/557349.html
[5] https://gcc.gnu.org/pipermail/gcc-patches/2020-November/557984.html
[6] https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581837.html
[7] https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581929.html
[8] ht

Re: [PATCH] c++, v2: Fix up diagnostics about taking address of an immediate member function [PR102753]

2021-11-24 Thread Jason Merrill via Gcc-patches

On 11/24/21 17:42, Jakub Jelinek wrote:

On Wed, Nov 24, 2021 at 05:15:51PM -0500, Jason Merrill wrote:

+case CALL_EXPR:
+  if (tree fndecl = cp_get_callee_fndecl_nofold (stmt))
+   if (DECL_IMMEDIATE_FUNCTION_P (fndecl)
+   && source_location_current_p (fndecl))
+ {
+   tree fn = cp_get_callee (stmt);
+   STRIP_NOPS (fn);
+   if (TREE_CODE (fn) == ADDR_EXPR)
+ ((hash_set *) data)->add (fn);
+ }
+  break;


I'm surprised the source_location::current handling would be needed; why do
calls to that function live long enough for us to walk into the ADDR_EXPR
here?  Maybe we should fold it in cp_fold instead of cp_genericize_r.


 From quick testing, moving it to cp_fold works fine too.
I have bootstraps of the previous version of this patch and the
multidimensional subscript pending now, will bootstrap/regtest this
when the current ones finish.

2021-11-24  Jakub Jelinek  

PR c++/102753
* cp-tree.h (struct ptrmem_cst): Add locus member.
(PTRMEM_CST_LOCATION): Define.
* tree.c (make_ptrmem_cst): Set PTRMEM_CST_LOCATION to input_location.
* typeck.c (cp_build_addr_expr_1): Don't diagnose taking address of
immediate functions here.  Instead when taking their address make
sure the returned ADDR_EXPR has EXPR_LOCATION set.
* pt.c (tsubst_copy): Ensure ADDR_EXPR of immediate function has
EXPR_LOCATION set.
* cp-gimplify.c (cp_fold_r): Diagnose taking address of immediate
functions here.  For consteval if don't walk THEN_CLAUSE.
(cp_genericize_r): Move evaluation of calls to
std::source_location::current from here to...
(cp_fold): ... here.  Don't assert calls to immediate functions must
be source_location_current_p, instead only constant evaluate
calls to source_location_current_p.

* g++.dg/cpp2a/consteval20.C: Add some extra tests.
* g++.dg/cpp2a/consteval23.C: Likewise.
* g++.dg/cpp2a/consteval25.C: New test.
* g++.dg/cpp2a/srcloc20.C: New test.

--- gcc/cp/cp-tree.h.jj 2021-11-24 15:05:23.291928876 +0100
+++ gcc/cp/cp-tree.h2021-11-24 17:08:19.507806769 +0100
@@ -703,6 +703,7 @@ struct GTY(()) template_parm_index {
  struct GTY(()) ptrmem_cst {
struct tree_common common;
tree member;
+  location_t locus;
  };
  typedef struct ptrmem_cst * ptrmem_cst_t;
  
@@ -4724,6 +4725,11 @@ more_aggr_init_expr_args_p (const aggr_i

  #define PTRMEM_CST_MEMBER(NODE) \
(((ptrmem_cst_t)PTRMEM_CST_CHECK (NODE))->member)
  
+/* For a pointer-to-member constant `X::Y' this is a location where

+   the address of the member has been taken.  */
+#define PTRMEM_CST_LOCATION(NODE) \
+  (((ptrmem_cst_t)PTRMEM_CST_CHECK (NODE))->locus)
+
  /* The expression in question for a TYPEOF_TYPE.  */
  #define TYPEOF_TYPE_EXPR(NODE) (TYPE_VALUES_RAW (TYPEOF_TYPE_CHECK (NODE)))
  
--- gcc/cp/tree.c.jj	2021-11-24 15:05:23.371927735 +0100

+++ gcc/cp/tree.c   2021-11-24 17:09:05.348164621 +0100
@@ -5167,6 +5167,7 @@ make_ptrmem_cst (tree type, tree member)
tree ptrmem_cst = make_node (PTRMEM_CST);
TREE_TYPE (ptrmem_cst) = type;
PTRMEM_CST_MEMBER (ptrmem_cst) = member;
+  PTRMEM_CST_LOCATION (ptrmem_cst) = input_location;
return ptrmem_cst;
  }


Please also change build_x_unary_op to improve PTRMEM_CST_LOCATION 
instead of adding a wrapper, and teach cp_expr_location about 
PTRMEM_CST_LOCATION.



--- gcc/cp/typeck.c.jj  2021-11-24 09:54:11.521738651 +0100
+++ gcc/cp/typeck.c 2021-11-24 17:09:43.620628485 +0100
@@ -6780,16 +6780,6 @@ cp_build_addr_expr_1 (tree arg, bool str
return error_mark_node;
  }
  
-	if (TREE_CODE (t) == FUNCTION_DECL

-   && DECL_IMMEDIATE_FUNCTION_P (t)
-   && !in_immediate_context ())
- {
-   if (complain & tf_error)
- error_at (loc, "taking address of an immediate function %qD",
-   t);
-   return error_mark_node;
- }
-
type = build_ptrmem_type (context_for_name_lookup (t),
  TREE_TYPE (t));
t = make_ptrmem_cst (type, t);
@@ -6816,15 +6806,6 @@ cp_build_addr_expr_1 (tree arg, bool str
  {
tree stripped_arg = tree_strip_any_location_wrapper (arg);
if (TREE_CODE (stripped_arg) == FUNCTION_DECL
- && DECL_IMMEDIATE_FUNCTION_P (stripped_arg)
- && !in_immediate_context ())
-   {
- if (complain & tf_error)
-   error_at (loc, "taking address of an immediate function %qD",
- stripped_arg);
- return error_mark_node;
-   }
-  if (TREE_CODE (stripped_arg) == FUNCTION_DECL
  && !mark_used (stripped_arg, complain) && !(complain & tf_error))
return error_mark_node;
val = build_address (arg);
@@ -6865,6 +6846,13 @@ cp_build_addr_expr_1 (tree arg, bool str
  complain);
  }
 

[PATCH] Avoid expecting nonzero size for access none void* arguments [PR101751]

2021-11-24 Thread Martin Sebor via Gcc-patches

When the optional size-index argument to attribute index is
omitted for a pointer, GCC expects the actual pointer argument
to point to an object at least as big as its size implies, or
at least one byte for void*.  This is done to make it possible
to detect past-the-end accesses in calls to functions that
only take a pointer (and not a size).

This logic has proved to be overly restrictive for the "none"
access mode applied to void* pointer arguments as a signal
that a function doesn't access the object.  The use case that
brought this to light is a function that only stores its pointer
argument somewhere for later use, without ever dereferencing it,
like pthread_setspecific() does.  pthread_setspecific() needs to
use attribute access because it takes a const void* argument,
and GCC assumes that functions with const-qualified pointer
arguments read from the memory they point to (as most do) and
issues -Wuninitialized when it detects the object such a pointer
points to is not initialized.

The attached tweak adjusts the logic to exempt void* arguments
with access none from the usual bounds checking by setting
the expected object size to zero.  This lets Glibc to continue
to annotate pthread_setspecific() with attribute access none
in its headers to avoid the -Wuninitialized in user code.

Tested on x86_64-linux.

Martin
Avoid expecting nonzero size for access none void* arguments [PR101751].

Resolves:
PR middle-end/101751 - attribute access none with void pointer expects nonzero size

gcc/ChangeLog:

	PR middle-end/101751
	* doc/invoke.texi (attribute access): Adjust.
	* gimple-ssa-warn-access.cc (pass_waccess::maybe_check_access_sizes):

gcc/testsuite/ChangeLog:

	PR middle-end/101751
	* gcc.dg/Wstringop-overflow-86.c: New test.

diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
index 9a9f48685b9..11c99e5dfab 100644
--- a/gcc/gimple-ssa-warn-access.cc
+++ b/gcc/gimple-ssa-warn-access.cc
@@ -2999,6 +2999,10 @@ pass_waccess::maybe_check_access_sizes (rdwr_map *rwm, tree fndecl, tree fntype,
 	  if (access.second.minsize
 	  && access.second.minsize != HOST_WIDE_INT_M1U)
 	access_nelts = build_int_cstu (sizetype, access.second.minsize);
+	  else if (VOID_TYPE_P (argtype) && access.second.mode == access_none)
+	/* Treat access mode none on a void* argument as expecting
+	   as little as zero bytes.  */
+	access_nelts = size_zero_node;
 	  else
 	access_nelts = size_one_node;
 	}
diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-86.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-86.c
new file mode 100644
index 000..345abe4a274
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-86.c
@@ -0,0 +1,63 @@
+/* PR middle-end/101751 - attribute access none with void pointer expects
+   nonzero size
+   { dg-do compile }
+   { dg-options "-Wall" } */
+
+__attribute__ ((access (none, 1))) void
+fvp_m1 (const void*);
+
+void nowarn_m1 (void)
+{
+  /* Verify these don't trigger a warning for calls to a function
+ declared with attribute access none.  */
+  fvp_m1 ((void*)-1); // { dg-bogus "-Wstringop-" }
+  fvp_m1 ((void*)1);  // { dg-bogus "-Wstringop-" }
+}
+
+
+__attribute__ ((access (none, 1))) void
+fvp_none (void*);
+
+void nowarn_c_cp1 (void)
+{
+  char c;
+  fvp_none (&c);
+  fvp_none (&c + 1);  // { dg-bogus "-Wstringop-" }
+}
+
+void nowarn_f_fp1 (void)
+{
+  fvp_none ((char*)&nowarn_f_fp1);
+  fvp_none ((char*)&nowarn_f_fp1 + 1);
+}
+
+void nowarn_sp1_sp_4 (void)
+{
+  fvp_none ("" + 1);  // { dg-bogus "-Wstringop-" }
+  fvp_none ("123" + 4);   // { dg-bogus "-Wstringop-" }
+}
+
+
+__attribute__ ((access (none, 1))) void
+wfvp_none (void*);// { dg-message "in a call to function 'wfvp_none' declared with attribute 'access \\\(none, 1\\\)'" }
+
+void warn_cm1_p1 (void)
+{
+  char c;
+  /* With optimization both of the following are diagnosed by -Warray-bounds.
+ The second also without optimization by -Wstringop-overread.  They
+ should both be diagnosed by the same warning even without optimization. */
+  wfvp_none (&c - 1); // { dg-warning "" "pr??" { xfail *-*-* } }
+  wfvp_none (&c + 2); // { dg-warning "" }
+}
+
+void warn_fp2 (void)
+{
+  void *p = (char*)&warn_fp2 + sizeof warn_fp2;
+  fvp_none (p);   // { dg-warning "" "pr??" { xfail *-*-* } }
+}
+
+void warn_sp2 (void)
+{
+  wfvp_none ("" + 2); // { dg-warning "" }
+}
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index ef654d7b878..266ef76e5c3 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -2524,7 +2524,6 @@ The following attributes are supported on most targets.
 @table @code
 @c Keep this table alphabetized by attribute name.  Treat _ as space.
 
-@item access
 @itemx access (@var{access-mode}, @var{ref-index})
 @itemx access (@var{access-mode}, @var{ref-index}, @var{size-index})
 
@@ -2598,7 +2597,9 @@ __attribute__ ((access (write_only, 1, 2), access (read_writ

Re: [PATCH][RFC] middle-end/46476 - resurrect -Wunreachable-code

2021-11-24 Thread Jason Merrill via Gcc-patches

On 11/24/21 11:15, Marek Polacek wrote:

On Wed, Nov 24, 2021 at 04:21:31PM +0100, Richard Biener via Gcc-patches wrote:

This resurrects -Wunreachable-code and implements a warning for
trivially unreachable code as of CFG construction.  Most problematic
with this is the C/C++ frontend added 'return 0;' stmt in main
which the patch handles for C++ like the C frontend already does
by using BUILTINS_LOCATION.

Another problem for future enhancement is that after CFG construction
we no longer can point to the stmt making a stmt unreachable, so
this implementation tries to warn on the first unreachable
statement of a region.  It might be possible to retain a pointer
to the stmt that triggered creation of a basic-block but I'm not
sure how reliable that would be.

So this is really a simple attempt for now, triggered by myself
running into such a coding error.  As always, the perfect is the
enemy of the good.

It does not pass bootstrap (which enables -Wextra), because of the
situation in g++.dg/Wunreachable-code-5.C where the C++ frontend
prematurely elides conditions like if (! GATHER_STATISTICS) that
evaluate to true - oddly enough it does _not_ do this for
conditions evaluating to false ... (one of the
c-c++-common/Wunreachable-code-2.c cases).


I've taken a look into the C++ thing.  This is genericize_if_stmt:
if we have

   if (0)
 return;

then cond is integer_zerop, then_ is a return_expr, but since it has
TREE_SIDE_EFFECTS, we create a COND_EXPR.  For

   if (!0)
  return;

we do
  170   else if (integer_nonzerop (cond) && !TREE_SIDE_EFFECTS (else_))
  171 stmt = then_;
which elides the if completely.

So it seems it would help if we avoided eliding the if stmt if
-Wunreachable-code is in effect.  I'd be happy to make that change,
if it sounds sane.


Sure.

Currently the front end does various constant folding as part of 
genericization, as I recall because there were missed optimizations 
without it.  Is this particular one undesirable because it's at the 
statement level rather than within an expression?


Jason



Re: [PATCH] c++, v2: Implement C++23 P2128R6 - Multidimensional subscript operator [PR102611]

2021-11-24 Thread Jason Merrill via Gcc-patches

On 11/24/21 08:37, Jakub Jelinek wrote:

On Tue, Nov 23, 2021 at 10:28:48PM -0500, Jason Merrill wrote:

Thanks.


+ while (true)
+   {
+ cp_expr expr (NULL_TREE);
+ /* Parse the next assignment-expression.  */
+ if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
+   {
+ /* A braced-init-list.  */
+ bool expr_nonconst_p;
+ cp_lexer_set_source_position (parser->lexer);
+ expr = cp_parser_braced_list (parser, &expr_nonconst_p);
+   }
+ else
+   expr = cp_parser_assignment_expression (parser);
+
+ /* If we have an ellipsis, then this is an expression
+expansion.  */
+ if (cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS))
+   {
+ /* Consume the `...'.  */
+ cp_lexer_consume_token (parser->lexer);
+ /* Build the argument pack.  */
+ expr = make_pack_expansion (expr);
+   }
+
+ if (expr == error_mark_node)
+   index = error_mark_node;
+ else if (expression_list.get () == NULL
+  && !PACK_EXPANSION_P (expr.get_value ()))
+   index = expr.get_value ();
+ else
+   vec_safe_push (expression_list, expr.get_value ());
+
+ /* If the next token isn't a `,', then we are done.  */
+ if (cp_lexer_next_token_is_not (parser->lexer, CPP_COMMA))
+   break;
+
+ if (expression_list.get () == NULL && index != error_mark_node)
+   {
+ *&expression_list = make_tree_vector_single (index);
+ index = NULL_TREE;
+   }
+
+ /* Otherwise, consume the `,' and keep going.  */
+ cp_lexer_consume_token (parser->lexer);
+   }


Let's share this loop with cp_parser_parenthesized_expression_list.


I'd prefer not to share the loop as whole because what exactly is done with
the parsed expressions differs a lot, for the array refs I'd prefer not to
push anything into a vector for the most common case with a single element.
I've outlined into a function what I think I can easily share
(see cp_parser_parenthesized_expression_list_elt in the updated patch).


+ if (expression_list.get () && index == error_mark_node)
+   {
+ release_tree_vector (*&expression_list);
+ *&expression_list = NULL;


This should probably become a release() method in releasing_vec.


Done.


+ FOR_EACH_VEC_SAFE_ELT (*index_exp_list, i, e)


This is build_x_compound_expr_from_vec.


Done 2x.


+/* Wrapper for above.  */


I just applied my auto_cond_timevar patch, so you can use that instead of
the wrapper.


Done.


+ for (i = 0; i < nargs; ++i)
+   {
+ tree arg = CALL_EXPR_ARG (c, i);
+
+ if (!PACK_EXPANSION_P (arg))
+   vec_safe_push (index_exp_list, RECUR (arg));
+ else
+   {
+ /* Expand the pack expansion and push each entry onto
+INDEX_EXP_LIST.  */
+ arg = tsubst_pack_expansion (arg, args, complain, in_decl);
+ if (TREE_CODE (arg) == TREE_VEC)
+   {
+ unsigned int len, j;
+
+ len = TREE_VEC_LENGTH (arg);
+ for (j = 0; j < len; ++j)
+   {
+ tree value = TREE_VEC_ELT (arg, j);
+ if (value != NULL_TREE)
+   value = convert_from_reference (value);
+ vec_safe_push (index_exp_list, value);
+   }
+   }
+ else
+   {
+ /* A partial substitution.  Add one entry.  */
+ vec_safe_push (index_exp_list, arg);
+   }
+   }
+   }


Let's share this code with CALL_EXPR instead of duplicating it.


Done as tsubst_copy_and_build_call_args.

Tested on the new testcases so far, ok for trunk if it passes full
bootstrap/regtest?


OK.


2021-11-24  Jakub Jelinek  

PR c++/102611
gcc/
* doc/invoke.texi (-Wcomma-subscript): Document that for
-std=c++20 the option isn't enabled by default with -Wno-deprecated
but for -std=c++23 it is.
gcc/c-family/
* c-opts.c (c_common_post_options): Enable -Wcomma-subscript by
default for C++23 regardless of warn_deprecated.
* c-cppbuiltin.c (c_cpp_builtins): Predefine
__cpp_multidimensional_subscript=202110L for C++23.
gcc/cp/
* cp-tree.h (build_op_subscript): Implement P2128R6
- Multidimensional subscript operator.  Declare.
(class releasing_vec): Add release method.
(grok_array_decl): Remove bool argument, add vec **
  

Re: [PATCH] c++: Return early in apply_late_template_attributes if there are no late attribs [PR101180]

2021-11-24 Thread Jason Merrill via Gcc-patches

On 11/24/21 03:16, Jakub Jelinek wrote:

On Fri, Nov 19, 2021 at 10:40:50AM -0500, Jason Merrill wrote:

Shall we also change the function so that it doesn't call
cplus_decl_attributes if late_attrs is NULL [...]?


Please.


Here it is.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK.


2021-11-24  Jakub Jelinek  

PR c++/101180
* pt.c (apply_late_template_attributes): Return early if there are no
dependent attributes.

--- gcc/cp/pt.c.jj  2021-11-22 10:07:01.360225139 +0100
+++ gcc/cp/pt.c 2021-11-23 11:23:16.808321905 +0100
@@ -11712,6 +11712,9 @@ apply_late_template_attributes (tree *de
/* Apply any non-dependent attributes.  */
*p = nondep;
  
+  if (nondep == attributes)

+return true;
+
/* And then any dependent ones.  */
tree late_attrs = NULL_TREE;
tree *q = &late_attrs;


Jakub





[PATCH] rs6000/test: Add emulated gather test case

2021-11-24 Thread Kewen.Lin via Gcc-patches
Hi,

This patch is to add a test case similar to the one in i386
to add testing coverage for 510.parest_r hotspots.

As evaluated, the emulated gather capability of vectorizer
(r12-2733) can help to speed up SPEC2017 510.parest_r on
Power8/9/10 by 5% to 9% with option sets Ofast unroll and
Ofast lto.  But since rs6000 missed unpacking support for
unsigned int before, it can only vectorize the hotspots
until r12-3134.

By checking why r12-2733 doesn't immediately show its impact
for SPEC2017 510.parest_r while the associated test case
already can get vectorized on rs6000 at that time, I realized
the associated test case use int as INDEXTYPE while the
hotspots actually use unsigned int.  So different from the one
in i386, this patch uses unsigned int as INDEXTYPE since the
unpack support for unsigned int (r12-3134) also matters for
the hotspots vectorization.  Not sure if it's worth to updating
the one in i386 as well?

Tested on powerpc64le-linux-gnu P9 and powerpc64-linux-gnu P8.

Is it ok for trunk?

BR,
Kewen
-
gcc/testsuite/ChangeLog:

* gcc.target/powerpc/vect-gather-1.c: New test.

diff --git a/gcc/testsuite/gcc.target/powerpc/vect-gather-1.c 
b/gcc/testsuite/gcc.target/powerpc/vect-gather-1.c
new file mode 100644
index 000..bf98045ab03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vect-gather-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* Profitable from Power8 since it supports efficient unaligned load.  */
+/* { dg-options "-Ofast -mdejagnu-cpu=power8 -fdump-tree-vect-details 
-fdump-tree-forwprop4" } */
+
+#ifndef INDEXTYPE
+#define INDEXTYPE unsigned int
+#endif
+double vmul(INDEXTYPE *rowstart, INDEXTYPE *rowend,
+   double *luval, double *dst)
+{
+  double res = 0;
+  for (const INDEXTYPE * col = rowstart; col != rowend; ++col, ++luval)
+res += *luval * dst[*col];
+  return res;
+}
+
+/* With gather emulation this should be profitable to vectorize from Power8.  
*/
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
+/* The index vector loads and promotions should be scalar after forwprop.  */
+/* { dg-final { scan-tree-dump-not "vec_unpack" "forwprop4" } } */
--
2.25.1



[PATCH] pr103194-5.c: Replace long with int64_t

2021-11-24 Thread H.J. Lu via Gcc-patches
Replace long with int64_t to work with -mx32.

* gcc.target/i386/pr103194-5.c: Replace long with int64_t.
---
 gcc/testsuite/gcc.target/i386/pr103194-5.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr103194-5.c 
b/gcc/testsuite/gcc.target/i386/pr103194-5.c
index dfaddf0aa6e..2e335285c2f 100644
--- a/gcc/testsuite/gcc.target/i386/pr103194-5.c
+++ b/gcc/testsuite/gcc.target/i386/pr103194-5.c
@@ -2,6 +2,7 @@
 /* { dg-options "-O2" } */
 #include 
 #include 
+#include 
 
 #define FOO(RTYPE,TYPE)
\
   __attribute__((noinline,noclone)) RTYPE  \
@@ -53,7 +54,7 @@
 return __sync_fetch_and_and (a, ~mask) & mask; \
   }\
 
-FOO(int, long);
+FOO(int, int64_t);
 
 /* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 2 } } */
 /* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 4 } } */
-- 
2.33.1



Re: [PATCH] rs6000/test: Add emulated gather test case

2021-11-24 Thread Hongtao Liu via Gcc-patches
On Thu, Nov 25, 2021 at 11:21 AM Kewen.Lin via Gcc-patches
 wrote:
>
> Hi,
>
> This patch is to add a test case similar to the one in i386
> to add testing coverage for 510.parest_r hotspots.
>
> As evaluated, the emulated gather capability of vectorizer
> (r12-2733) can help to speed up SPEC2017 510.parest_r on
> Power8/9/10 by 5% to 9% with option sets Ofast unroll and
> Ofast lto.  But since rs6000 missed unpacking support for
> unsigned int before, it can only vectorize the hotspots
> until r12-3134.
>
> By checking why r12-2733 doesn't immediately show its impact
> for SPEC2017 510.parest_r while the associated test case
> already can get vectorized on rs6000 at that time, I realized
> the associated test case use int as INDEXTYPE while the
> hotspots actually use unsigned int.  So different from the one
> in i386, this patch uses unsigned int as INDEXTYPE since the
> unpack support for unsigned int (r12-3134) also matters for
> the hotspots vectorization.  Not sure if it's worth to updating
> the one in i386 as well?
It looks like the same testcase added in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88531
>
> Tested on powerpc64le-linux-gnu P9 and powerpc64-linux-gnu P8.
>
> Is it ok for trunk?
>
> BR,
> Kewen
> -
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/vect-gather-1.c: New test.
>
> diff --git a/gcc/testsuite/gcc.target/powerpc/vect-gather-1.c 
> b/gcc/testsuite/gcc.target/powerpc/vect-gather-1.c
> new file mode 100644
> index 000..bf98045ab03
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vect-gather-1.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* Profitable from Power8 since it supports efficient unaligned load.  */
> +/* { dg-options "-Ofast -mdejagnu-cpu=power8 -fdump-tree-vect-details 
> -fdump-tree-forwprop4" } */
> +
> +#ifndef INDEXTYPE
> +#define INDEXTYPE unsigned int
> +#endif
> +double vmul(INDEXTYPE *rowstart, INDEXTYPE *rowend,
> +   double *luval, double *dst)
> +{
> +  double res = 0;
> +  for (const INDEXTYPE * col = rowstart; col != rowend; ++col, ++luval)
> +res += *luval * dst[*col];
> +  return res;
> +}
> +
> +/* With gather emulation this should be profitable to vectorize from Power8. 
>  */
> +/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
> +/* The index vector loads and promotions should be scalar after forwprop.  */
> +/* { dg-final { scan-tree-dump-not "vec_unpack" "forwprop4" } } */
> --
> 2.25.1
>


-- 
BR,
Hongtao


Re: [PATCH] rs6000/test: Add emulated gather test case

2021-11-24 Thread Kewen.Lin via Gcc-patches
on 2021/11/25 下午1:17, Hongtao Liu wrote:
> On Thu, Nov 25, 2021 at 11:21 AM Kewen.Lin via Gcc-patches
>  wrote:
>>
>> Hi,
>>
>> This patch is to add a test case similar to the one in i386
>> to add testing coverage for 510.parest_r hotspots.
>>
>> As evaluated, the emulated gather capability of vectorizer
>> (r12-2733) can help to speed up SPEC2017 510.parest_r on
>> Power8/9/10 by 5% to 9% with option sets Ofast unroll and
>> Ofast lto.  But since rs6000 missed unpacking support for
>> unsigned int before, it can only vectorize the hotspots
>> until r12-3134.
>>
>> By checking why r12-2733 doesn't immediately show its impact
>> for SPEC2017 510.parest_r while the associated test case
>> already can get vectorized on rs6000 at that time, I realized
>> the associated test case use int as INDEXTYPE while the
>> hotspots actually use unsigned int.  So different from the one
>> in i386, this patch uses unsigned int as INDEXTYPE since the
>> unpack support for unsigned int (r12-3134) also matters for
>> the hotspots vectorization.  Not sure if it's worth to updating
>> the one in i386 as well?
> It looks like the same testcase added in
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88531

Thanks for the information!  Good to know that there are already
some cases to cover.  :)

BR,
Kewen

>>
>> Tested on powerpc64le-linux-gnu P9 and powerpc64-linux-gnu P8.
>>
>> Is it ok for trunk?
>>
>> BR,
>> Kewen
>> -
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/powerpc/vect-gather-1.c: New test.
>>
>> diff --git a/gcc/testsuite/gcc.target/powerpc/vect-gather-1.c 
>> b/gcc/testsuite/gcc.target/powerpc/vect-gather-1.c
>> new file mode 100644
>> index 000..bf98045ab03
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/vect-gather-1.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile } */
>> +/* Profitable from Power8 since it supports efficient unaligned load.  */
>> +/* { dg-options "-Ofast -mdejagnu-cpu=power8 -fdump-tree-vect-details 
>> -fdump-tree-forwprop4" } */
>> +
>> +#ifndef INDEXTYPE
>> +#define INDEXTYPE unsigned int
>> +#endif
>> +double vmul(INDEXTYPE *rowstart, INDEXTYPE *rowend,
>> +   double *luval, double *dst)
>> +{
>> +  double res = 0;
>> +  for (const INDEXTYPE * col = rowstart; col != rowend; ++col, ++luval)
>> +res += *luval * dst[*col];
>> +  return res;
>> +}
>> +
>> +/* With gather emulation this should be profitable to vectorize from 
>> Power8.  */
>> +/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
>> +/* The index vector loads and promotions should be scalar after forwprop.  
>> */
>> +/* { dg-final { scan-tree-dump-not "vec_unpack" "forwprop4" } } */
>> --
>> 2.25.1
>>
> 
> 




Re: [PATCH] pr103194-5.c: Replace long with int64_t

2021-11-24 Thread Hongtao Liu via Gcc-patches
On Thu, Nov 25, 2021 at 12:18 PM H.J. Lu via Gcc-patches
 wrote:
>
> Replace long with int64_t to work with -mx32.
Thanks.
>
> * gcc.target/i386/pr103194-5.c: Replace long with int64_t.
> ---
>  gcc/testsuite/gcc.target/i386/pr103194-5.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr103194-5.c 
> b/gcc/testsuite/gcc.target/i386/pr103194-5.c
> index dfaddf0aa6e..2e335285c2f 100644
> --- a/gcc/testsuite/gcc.target/i386/pr103194-5.c
> +++ b/gcc/testsuite/gcc.target/i386/pr103194-5.c
> @@ -2,6 +2,7 @@
>  /* { dg-options "-O2" } */
>  #include 
>  #include 
> +#include 
>
>  #define FOO(RTYPE,TYPE)  
>   \
>__attribute__((noinline,noclone)) RTYPE  \
> @@ -53,7 +54,7 @@
>  return __sync_fetch_and_and (a, ~mask) & mask; \
>}\
>
> -FOO(int, long);
> +FOO(int, int64_t);
>
>  /* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 2 } } */
>  /* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 4 } } */
> --
> 2.33.1
>


-- 
BR,
Hongtao


Re: [PATCH][RFC] middle-end/46476 - resurrect -Wunreachable-code

2021-11-24 Thread Richard Biener via Gcc-patches
On Wed, 24 Nov 2021, Jason Merrill wrote:

> On 11/24/21 11:15, Marek Polacek wrote:
> > On Wed, Nov 24, 2021 at 04:21:31PM +0100, Richard Biener via Gcc-patches
> > wrote:
> >> This resurrects -Wunreachable-code and implements a warning for
> >> trivially unreachable code as of CFG construction.  Most problematic
> >> with this is the C/C++ frontend added 'return 0;' stmt in main
> >> which the patch handles for C++ like the C frontend already does
> >> by using BUILTINS_LOCATION.
> >>
> >> Another problem for future enhancement is that after CFG construction
> >> we no longer can point to the stmt making a stmt unreachable, so
> >> this implementation tries to warn on the first unreachable
> >> statement of a region.  It might be possible to retain a pointer
> >> to the stmt that triggered creation of a basic-block but I'm not
> >> sure how reliable that would be.
> >>
> >> So this is really a simple attempt for now, triggered by myself
> >> running into such a coding error.  As always, the perfect is the
> >> enemy of the good.
> >>
> >> It does not pass bootstrap (which enables -Wextra), because of the
> >> situation in g++.dg/Wunreachable-code-5.C where the C++ frontend
> >> prematurely elides conditions like if (! GATHER_STATISTICS) that
> >> evaluate to true - oddly enough it does _not_ do this for
> >> conditions evaluating to false ... (one of the
> >> c-c++-common/Wunreachable-code-2.c cases).
> > 
> > I've taken a look into the C++ thing.  This is genericize_if_stmt:
> > if we have
> > 
> >if (0)
> >  return;
> > 
> > then cond is integer_zerop, then_ is a return_expr, but since it has
> > TREE_SIDE_EFFECTS, we create a COND_EXPR.  For
> > 
> >if (!0)
> >   return;
> > 
> > we do
> >   170   else if (integer_nonzerop (cond) && !TREE_SIDE_EFFECTS (else_))
> >   171 stmt = then_;
> > which elides the if completely.
> > 
> > So it seems it would help if we avoided eliding the if stmt if
> > -Wunreachable-code is in effect.  I'd be happy to make that change,
> > if it sounds sane.

Yes, that seems to work.

> Sure.
> 
> Currently the front end does various constant folding as part of
> genericization, as I recall because there were missed optimizations without
> it.  Is this particular one undesirable because it's at the statement level
> rather than within an expression?

It's undesirable because it short-circuits control flow and thus

  if (0)
return;
  foo ();

becomes

  return;
  foo ();

which looks exactly like a case we want to diagnose (very likely a 
programming error).

So yes, it applies to the statement level and there only to control
statements.

Richard.


[PATCH] bswap: Improve perform_symbolic_merge [PR103376]

2021-11-24 Thread Jakub Jelinek via Gcc-patches
On Wed, Nov 24, 2021 at 09:45:16AM +0100, Richard Biener wrote:
> > Thinking more about it, perhaps we could do more for BIT_XOR_EXPR.
> > We could allow masked1 == masked2 case for it, but would need to
> > do something different than the
> >   n->n = n1->n | n2->n;
> > we do on all the bytes together.
> > In particular, for masked1 == masked2 if masked1 != 0 (well, for 0
> > both variants are the same) and masked1 != 0xff we would need to
> > clear corresponding n->n byte instead of setting it to the input
> > as x ^ x = 0 (but if we don't know what x and y are, the result is
> > also don't know).  Now, for plus it is much harder, because not only
> > for non-zero operands we don't know what the result is, but it can
> > modify upper bytes as well.  So perhaps only if current's byte
> > masked1 && masked2 set the resulting byte to 0xff (unknown) iff
> > the byte above it is 0 and 0, and set that resulting byte to 0xff too.
> > Also, even for | we could instead of return NULL just set the resulting
> > byte to 0xff if it is different, perhaps it will be masked off later on.
> > Ok to handle that incrementally?
> 
> Not sure if it is worth the trouble - the XOR handling sounds
> straight forward at least.  But sure, the merging routine could
> simply be conservatively correct here.

This patch implements that (except that for + it just punts whenever
both operand bytes aren't 0 like before).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-11-25  Jakub Jelinek  

PR tree-optimization/103376
* gimple-ssa-store-merging.c (perform_symbolic_merge): For
BIT_IOR_EXPR, if masked1 && masked2 && masked1 != masked2, don't
punt, but set the corresponding result byte to MARKER_BYTE_UNKNOWN.
For BIT_XOR_EXPR similarly and if masked1 == masked2 and the
byte isn't MARKER_BYTE_UNKNOWN, set the corresponding result byte to
0.

--- gcc/gimple-ssa-store-merging.c.jj   2021-11-24 09:54:37.684365460 +0100
+++ gcc/gimple-ssa-store-merging.c  2021-11-24 11:18:54.46266 +0100
@@ -556,6 +556,7 @@ perform_symbolic_merge (gimple *source_s
   n->bytepos = n_start->bytepos;
   n->type = n_start->type;
   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+  uint64_t res_n = n1->n | n2->n;
 
   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
 {
@@ -563,12 +564,33 @@ perform_symbolic_merge (gimple *source_s
 
   masked1 = n1->n & mask;
   masked2 = n2->n & mask;
-  /* For BIT_XOR_EXPR or PLUS_EXPR, at least one of masked1 and masked2
-has to be 0, for BIT_IOR_EXPR x | x is still x.  */
-  if (masked1 && masked2 && (code != BIT_IOR_EXPR || masked1 != masked2))
-   return NULL;
+  /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x.  */
+  if (masked1 && masked2)
+   {
+ /* + can carry into upper bits, just punt.  */
+ if (code == PLUS_EXPR)
+   return NULL;
+ /* x | x is still x.  */
+ if (code == BIT_IOR_EXPR && masked1 == masked2)
+   continue;
+ if (code == BIT_XOR_EXPR)
+   {
+ /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
+unknown values and unknown ^ unknown is unknown.  */
+ if (masked1 == masked2
+ && masked1 != ((uint64_t) MARKER_BYTE_UNKNOWN
+<< i * BITS_PER_MARKER))
+   {
+ res_n &= ~mask;
+ continue;
+   }
+   }
+ /* Otherwise set the byte to unknown, it might still be
+later masked off.  */
+ res_n |= mask;
+   }
 }
-  n->n = n1->n | n2->n;
+  n->n = res_n;
   n->n_ops = n1->n_ops + n2->n_ops;
 
   return source_stmt;


Jakub



Re: [PATCH][RFC] middle-end/46476 - resurrect -Wunreachable-code

2021-11-24 Thread Richard Biener via Gcc-patches
On Wed, 24 Nov 2021, Martin Sebor wrote:

> On 11/24/21 8:21 AM, Richard Biener via Gcc-patches wrote:
> > This resurrects -Wunreachable-code and implements a warning for
> > trivially unreachable code as of CFG construction.  Most problematic
> > with this is the C/C++ frontend added 'return 0;' stmt in main
> > which the patch handles for C++ like the C frontend already does
> > by using BUILTINS_LOCATION.
> > 
> > Another problem for future enhancement is that after CFG construction
> > we no longer can point to the stmt making a stmt unreachable, so
> > this implementation tries to warn on the first unreachable
> > statement of a region.  It might be possible to retain a pointer
> > to the stmt that triggered creation of a basic-block but I'm not
> > sure how reliable that would be.
> > 
> > So this is really a simple attempt for now, triggered by myself
> > running into such a coding error.  As always, the perfect is the
> > enemy of the good.
> > 
> > It does not pass bootstrap (which enables -Wextra), because of the
> > situation in g++.dg/Wunreachable-code-5.C where the C++ frontend
> > prematurely elides conditions like if (! GATHER_STATISTICS) that
> > evaluate to true - oddly enough it does _not_ do this for
> > conditions evaluating to false ... (one of the
> > c-c++-common/Wunreachable-code-2.c cases).
> 
> I'm very much in favor of reviving the warning, even in its
> current simplistic form.  I especially welcome the suggestion
> to enhance it in the future, including adjusting its schedule
> among other passes (or adding other, later invocations).  It
> would be overly constraining to consider this placement ideal
> or set in stone.
> 
> Among possible enhancements worth considering is handling
> constant conditionals like:
> 
> int f (void)
> {
>   if (1)
> return 0;
>   else
> return 1;   <<< warn
> }
> 
> int g (void)
> {
>   if (1)
> return 0;
>   return 1; <<< warn also in C (not just in C++)
> }

I think both cases are undesirable to warn on, but both would be
possible to implement during parsing and only there they would
possibly make sense (you have to consider the if (1) resulting
from macro expansion or template instantiation which are the
undesirable to warn about cases - not to mention disabled code
that wants to remain syntactically correct, something that cannot
be achieved with #if 0)

> By the way, a related feature that would be useful and that's
> been requested in the past is warning for stores with no effect,
> as in:
> 
>   int i;
>   i = 1;
>   i = 2;   <<< warn here
> 
> The detection of the simple cases like the one above can also
> be almost trivially implemented.

Likewise the above can be done during parsing where it's more
appearant whether the stmts are the same.

Richard.

> Martin
> 
> > 
> > Richard.
> > 
> > 2021-11-24  Richard Biener  
> > 
> >  PR middle-end/46476
> >  * common.opt (Wunreachable-code): No longer ignored,
> >  add warn_unreachable_code variable, enable with -Wextra.
> >  * doc/invoke.texi (Wunreachable-code): Document.
> >  (Wextra): Amend.
> >  * tree-cfg.c (build_gimple_cfg): Move case label grouping...
> >  (execute_build_cfg): ... here after new -Wunreachable-code
> >  warnings.
> >  (warn_unreachable_code_post_cfg_build): New function.
> >  (mark_forward_reachable_blocks): Likewise.
> >  (reverse_guess_deadend): Likewise.
> > 
> > gcc/cp/
> >  * decl.c (finish_function): Set input_location to
> >  BUILTINS_LOCATION around the code building the return 0
> >  for main().
> > 
> > libgomp/
> >  * oacc-plugin.c (GOMP_PLUGIN_acc_default_dim): Remove spurious
> >  return.
> > 
> > gcc/testsuite/
> >  * c-c++-common/Wunreachable-code-1.c: New testcase.
> >  * c-c++-common/Wunreachable-code-2.c: Likewise.
> >  * c-c++-common/Wunreachable-code-3.c: Likewise.
> >  * gcc.dg/Wunreachable-code-4.c: Likewise.
> >  * g++.dg/Wunreachable-code-5.C: Likewise.
> > ---
> >   gcc/common.opt|   4 +-
> >   gcc/cp/decl.c |   9 +-
> >   gcc/doc/invoke.texi   |   9 +-
> >   .../c-c++-common/Wunreachable-code-1.c|   8 ++
> >   .../c-c++-common/Wunreachable-code-2.c|   8 ++
> >   .../c-c++-common/Wunreachable-code-3.c|  35 ++
> >   gcc/testsuite/g++.dg/Wunreachable-code-5.C|  11 ++
> >   gcc/testsuite/gcc.dg/Wunreachable-code-4.c|  10 ++
> >   gcc/tree-cfg.c| 101 +-
> >   libgomp/oacc-plugin.c |   1 -
> >   10 files changed, 186 insertions(+), 10 deletions(-)
> >   create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-1.c
> >   create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-2.c
> >   create mode 100644 gcc/testsuite/c-c++-common/Wunreachable-code-3.c
> >   create mode 100644 gcc/testsuite/g++.dg/Wunreachable-code-5.C
> >   create mode 100644 gcc/testsuite/gcc.dg/Wunreachable-code-4.c
> > 
> > diff --git a/gcc/common.opt b/gcc/com

[PATCH] Fix typo in r12-5486.

2021-11-24 Thread liuhongt via Gcc-patches
TYPE_PRECISION (type) <  TYPE_PRECISION (TREE_TYPE (@2)) supposed to check
integer type but not pointer type, so use second parameter instead.

i.e. first parameter is VPTR, second parameter is I4.

582DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_4,
583  "__atomic_fetch_or_4",
584  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROWCALL_LEAF_LIST)


Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
Failed testcases in PR are verified.
Ok for trunk?

gcc/ChangeLog:

PR middle-end/103419
* match.pd: Fix typo, use the type of second parameter, not
first one.
---
 gcc/match.pd | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 5adcd6bd02c..09c7ce749dc 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4053,7 +4053,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
}
   (if (ibit == ibit2
   && ibit >= 0
-  && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))
+  && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@0))
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
  (bit_and (convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
@@ -4064,21 +4064,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
}
   (if (ibit == ibit2
   && ibit >= 0
-  && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))
+  && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@0))
 
 (match (nop_atomic_bit_test_and_p @0 @0 @4)
  (bit_and:c
   (convert1?@4
(ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
   (convert2? @0))
- (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@0)
 
 (match (nop_atomic_bit_test_and_p @0 @0 @4)
  (bit_and:c
   (convert1?@4
(SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5
   (convert2? @0))
- (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@0)
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
  (bit_and@4 (convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
@@ -4090,7 +4090,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
}
   (if (ibit == ibit2
   && ibit >= 0
-  && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))
+  && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@0))
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
  (bit_and@4
@@ -4103,21 +4103,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
}
   (if (ibit == ibit2
   && ibit >= 0
-  && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))
+  && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@0))
 
 (match (nop_atomic_bit_test_and_p @4 @0 @3)
  (bit_and:c
   (convert1?@3
(ATOMIC_FETCH_AND_N @2 (nop_convert?@4 (bit_not (lshift@0 integer_onep@6 
@7))) @5))
   (convert2? @0))
- (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@4)
 
 (match (nop_atomic_bit_test_and_p @4 @0 @3)
  (bit_and:c
   (convert1?@3
(SYNC_FETCH_AND_AND_N @2 (nop_convert?@4 (bit_not (lshift@0 integer_onep@6 
@7)
   (convert2? @0))
-  (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)
+  (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@4)
 
 #endif
 
-- 
2.18.1