Re: [PING^1][PATCH] postscan_insn hook not called after input_asm

2022-04-07 Thread Paul Iannetta via Gcc-patches
Hi,

I would like to draw your attention on this patch, it's a pretty tiny patch 
which does not affect many targets.

- Original Message -
From: "gcc-patches" 
To: "gcc-patches" 
Sent: Tuesday, March 29, 2022 4:10:16 PM
Subject: [PATCH] postscan_insn hook not called after input_asm

Hi,

While working on the Kalray port of gcc, I noticed that the hook 
TARGET_ASM_FINAL_POSTSCAN_INSN is not called after emitting an instruction 
coming from a basic asm block.  Here is a patch which fixes this behavior.

The following check:
```
$ find gcc/config/ -type f -exec grep "#define TARGET_ASM_FINAL" "{}" +
gcc/config/m68k/m68k.cc:#define TARGET_ASM_FINAL_POSTSCAN_INSN 
m68k_asm_final_postscan_insn
gcc/config/avr/avr.cc:#define TARGET_ASM_FINAL_POSTSCAN_INSN 
avr_asm_final_postscan_insn
gcc/config/mips/mips.cc:#define TARGET_ASM_FINAL_POSTSCAN_INSN 
mips_final_postscan_insn
```
reveals that m68k, avr and mips are the only affected targets upstream.

Paul Iannetta 
Kalray






[PATCH] tree-optimization/105185 - simplify modref query in SCCVN

2022-04-07 Thread Richard Biener via Gcc-patches
This simplifies the modref query for calls in SCCVN again after
r12-8019-g4be08315124281, avoiding an ICE when the modref
analyzed access lacks an actual argument on the caller side.
It effectively reverts r12-7531-gdc46350d44c294.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2022-04-07  Richard Biener  

PR tree-optimization/105185
* tree-ssa-sccvn.cc (visit_reference_op_call): Simplify
modref query again.

* gcc.dg/torture/pr105185.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr105185.c | 13 +
 gcc/tree-ssa-sccvn.cc   |  5 ++---
 2 files changed, 15 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr105185.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr105185.c 
b/gcc/testsuite/gcc.dg/torture/pr105185.c
new file mode 100644
index 000..6ab32360de1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr105185.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+
+int foo (fmt)
+char* fmt;
+{
+  return (__builtin_strchr (fmt, '*') != 0
+  || __builtin_strchr (fmt, 'n') != 0);
+}
+void bar ()
+{
+  if (foo ())
+__builtin_abort ();
+}
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 66b4fc21f5b..d4d0aba880c 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -5140,12 +5140,11 @@ visit_reference_op_call (tree lhs, gcall *stmt)
{
  accesses.quick_grow (accesses.length () + 1);
  ao_ref *r = &accesses.last ();
- tree arg = access_node.get_call_arg (stmt);
- if (!POINTER_TYPE_P (TREE_TYPE (arg))
- || !access_node.get_ao_ref (stmt, r))
+ if (!access_node.get_ao_ref (stmt, r))
{
  /* Initialize a ref based on the argument and
 unknown offset if possible.  */
+ tree arg = access_node.get_call_arg (stmt);
  if (arg && TREE_CODE (arg) == SSA_NAME)
arg = SSA_VAL (arg);
  if (arg
-- 
2.34.1


Re: [RFC/gcov 06/12] gcov-tool: Support file input from stdin

2022-04-07 Thread Martin Liška

On 3/31/22 13:35, Sebastian Huber wrote:

gcc/

* gcov-io.cc (GCOV_MODE_STDIN): Define.
(gcov_position): For gcov-tool, return calculated position if file is
stdin.
(gcov_open):  For gcov-tool, use stdin if filename is NULL.
(gcov_close): For gcov-tool, do not close stdin.
(gcov_read_bytes): For gcov-tool, update position if file is stdin.
(gcov_sync): For gcov-tool, discard input if file is stdin.
---
  gcc/gcov-io.cc | 38 ++
  1 file changed, 38 insertions(+)

diff --git a/gcc/gcov-io.cc b/gcc/gcov-io.cc
index fee3130f94a..177f81166a6 100644
--- a/gcc/gcov-io.cc
+++ b/gcc/gcov-io.cc
@@ -35,8 +35,13 @@ struct gcov_var
int error;  /* < 0 overflow, > 0 disk error.  */
int mode;   /* < 0 writing, > 0 reading.  */
int endian; /* Swap endianness.  */
+#ifdef IN_GCOV_TOOL
+  gcov_position_t pos; /* File position for stdin support. */


One more space after dot.


+#endif
  } gcov_var;
  
+#define GCOV_MODE_STDIN 2

+
  /* Save the current position in the gcov file.  */
  /* We need to expose this function when compiling for gcov-tool.  */
  #ifndef IN_GCOV_TOOL
@@ -45,6 +50,10 @@ static inline
  gcov_position_t
  gcov_position (void)
  {
+#ifdef IN_GCOV_TOOL
+  if (gcov_var.mode == GCOV_MODE_STDIN)
+return gcov_var.pos;
+#endif
return ftell (gcov_var.file);
  }
  
@@ -108,6 +117,16 @@ gcov_open (const char *name, int mode)

  #if !IN_LIBGCOV || defined (IN_GCOV_TOOL)
gcov_var.endian = 0;
  #endif
+#ifdef IN_GCOV_TOOL
+  gcov_var.pos = 0;
+  if (!name)
+{
+  gcov_nonruntime_assert (gcov_var.mode > 0);
+  gcov_var.file = stdin;
+  gcov_var.mode = GCOV_MODE_STDIN;
+  return 1;
+}
+#endif
  #if GCOV_LOCKED
if (mode > 0)
  {
@@ -190,6 +209,11 @@ gcov_open (const char *name, int mode)
  GCOV_LINKAGE int
  gcov_close (void)
  {
+#ifdef IN_GCOV_TOOL
+  if (gcov_var.file == stdin)
+gcov_var.file = 0;
+  else
+#endif
if (gcov_var.file)
  {
if (fclose (gcov_var.file))
@@ -363,6 +387,9 @@ gcov_read_bytes (void *buffer, unsigned count)
if (read != 1)
  return NULL;
  
+#ifdef IN_GCOV_TOOL

+  gcov_var.pos += count;
+#endif
return buffer;
  }
  
@@ -499,6 +526,17 @@ gcov_sync (gcov_position_t base, gcov_unsigned_t length)

  {
gcov_nonruntime_assert (gcov_var.mode > 0);
base += length;
+#ifdef IN_GCOV_TOOL
+  if (gcov_var.mode == GCOV_MODE_STDIN)
+{
+  while (gcov_var.pos < base)
+   {
+ ++gcov_var.pos;
+ (void)fgetc(gcov_var.file);


A space after fgetc, please.

Martin


+   }
+  return;
+}
+#endif
fseek (gcov_var.file, base, SEEK_SET);
  }
  #endif




Re: [RFC/gcov 08/12] gcov: Move prepend to list to read_gcda_file()

2022-04-07 Thread Martin Liška

On 3/31/22 13:35, Sebastian Huber wrote:

|+ /* Prepend to gcov info list */|


Please add dot and 2 spaces.

Martin


Re: [RFC/gcov 11/12] gcov: Record EOF error during read

2022-04-07 Thread Martin Liška

On 3/31/22 13:35, Sebastian Huber wrote:

|+enum gcov_file_error {|


Please put the brace to the next line and comment the enum type.

Martin


Re: [RFC/gcov 12/12] gcov-tool: Add merge-stream subcommand

2022-04-07 Thread Martin Liška

On 3/31/22 13:35, Sebastian Huber wrote:

gcc/

* gcov-tool.cc (gcov_profile_merge_stream): Declare.
(print_merge_stream_usage_message): New.
(merge_stream_usage): Likewise.
(do_merge_stream): Likewise.
(print_usage): Call print_merge_stream_usage_message().
(main): Call do_merge_stream() to execute merge-stream subcommand.

libgcc/

* libgcov-util.c (consume_stream): New.
(get_target_profiles_for_merge): Likewise.
(gcov_profile_merge_stream): Likewise.
---
  gcc/gcov-tool.cc  | 76 
  libgcc/libgcov-util.c | 80 +++
  2 files changed, 156 insertions(+)

diff --git a/gcc/gcov-tool.cc b/gcc/gcov-tool.cc
index d712715cf7e..d8572b184e9 100644
--- a/gcc/gcov-tool.cc
+++ b/gcc/gcov-tool.cc
@@ -42,6 +42,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
  
  extern struct gcov_info *gcov_profile_merge (struct gcov_info*,

 struct gcov_info*, int, int);
+extern struct gcov_info *gcov_profile_merge_stream (const char *, int, int);
  extern int gcov_profile_overlap (struct gcov_info*, struct gcov_info*);
  extern int gcov_profile_normalize (struct gcov_info*, gcov_type);
  extern int gcov_profile_scale (struct gcov_info*, float, int, int);
@@ -229,6 +230,78 @@ do_merge (int argc, char **argv)
return profile_merge (argv[optind], argv[optind+1], output_dir, w1, w2);
  }
  
+/* Usage message for profile merge-stream.  */

+
+static void
+print_merge_stream_usage_message (int error_p)
+{
+  FILE *file = error_p ? stderr : stdout;
+
+  fnotice (file, "  merge-stream [options] [stream-file]  Merge coverage stream 
file (or stdin)\n"
+"and coverage file 
contents\n");
+  fnotice (file, "-v, --verbose   Verbose mode\n");
+  fnotice (file, "-w, --weight Set weights (float point 
values)\n");
+}
+
+static const struct option merge_stream_options[] =
+{
+  { "verbose",no_argument,   NULL, 'v' },
+  { "weight", required_argument, NULL, 'w' },
+  { 0, 0, 0, 0 }
+};
+
+/* Print merge-stream usage and exit.  */
+
+static void ATTRIBUTE_NORETURN
+merge_stream_usage (void)
+{
+  fnotice (stderr, "Merge-stream subcomand usage:");
+  print_merge_stream_usage_message (true);
+  exit (FATAL_EXIT_CODE);
+}
+
+/* Driver for profile merge-stream sub-command.  */
+
+static int
+do_merge_stream (int argc, char **argv)
+{
+  int opt;
+  int w1 = 1, w2 = 1;
+  struct gcov_info *merged_profile;
+
+  optind = 0;
+  while ((opt = getopt_long (argc, argv, "vw:",
+merge_stream_options, NULL)) != -1)
+{
+  switch (opt)
+   {
+   case 'v':
+ verbose = true;
+ gcov_set_verbose ();
+ break;
+   case 'w':
+ sscanf (optarg, "%d,%d", &w1, &w2);
+ if (w1 < 0 || w2 < 0)
+   fatal_error (input_location, "weights need to be non-negative");
+ break;
+   default:
+ merge_stream_usage ();
+   }
+}
+
+  if (argc - optind > 1)
+merge_stream_usage ();
+
+  merged_profile = gcov_profile_merge_stream (argv[optind], w1, w2);
+
+  if (merged_profile)
+gcov_do_dump (merged_profile, 0, -1);
+  else if (verbose)
+fnotice (stdout, "no profile files were merged\n");
+
+  return 0;
+}
+
  /* If N_VAL is no-zero, normalize the profile by setting the largest counter
 counter value to N_VAL and scale others counters proportionally.
 Otherwise, multiply the all counters by SCALE.  */
@@ -505,6 +578,7 @@ print_usage (int error_p)
fnotice (file, "  -h, --helpPrint this help, then 
exit\n");
fnotice (file, "  -v, --version Print version number, 
then exit\n");
print_merge_usage_message (error_p);
+  print_merge_stream_usage_message (error_p);
print_rewrite_usage_message (error_p);
print_overlap_usage_message (error_p);
fnotice (file, "\nFor bug reporting instructions, please see:\n%s.\n",
@@ -594,6 +668,8 @@ main (int argc, char **argv)
  
if (!strcmp (sub_command, "merge"))

  return do_merge (argc - optind, argv + optind);
+  else if (!strcmp (sub_command, "merge-stream"))
+return do_merge_stream (argc - optind, argv + optind);
else if (!strcmp (sub_command, "rewrite"))
  return do_rewrite (argc - optind, argv + optind);
else if (!strcmp (sub_command, "overlap"))
diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
index 622d5a9dc71..0fe60528b48 100644
--- a/libgcc/libgcov-util.c
+++ b/libgcc/libgcov-util.c
@@ -735,6 +735,86 @@ gcov_profile_merge (struct gcov_info *tgt_profile, struct 
gcov_info *src_profile
return tgt_profile;
  }


Please document the new added functions below.

  
+struct gcov_info *

+consume_stream (const char *filename)
+{
+  read_profile_dir_init ()

Re: [RFC/gcov 00/12] Add merge-stream subcommand to gcov-tool

2022-04-07 Thread Martin Liška

On 3/31/22 13:35, Sebastian Huber wrote:

This patch set is a proof of concept.

The aim is to better support gcov in free-standing environments. For example,
you can run a test executable which dumps all gcov info objects in a serial
data stream using __gcov_info_to_gcda() and the new __gcov_filename_to_gcfn().
It could be encoded as base64. It could be also compressed. On the host you
unpack the encoded data stream and feed it into gcov-tool using the new
"merge-stream" subcommand:


Hello.

The patch set is approved with the nits provided. Please install the patches
once stage1 opens.



gcov-tool --help
Usage: gcov-tool [OPTION]... SUB_COMMAND [OPTION]...

Offline tool to handle gcda counts

   -h, --helpPrint this help, then exit
   -v, --version Print version number, then exit
   merge-stream [options] [stream-file]  Merge coverage stream file (or stdin)
 and coverage file contents
 -v, --verbose   Verbose mode
 -w, --weight Set weights (float point values)

Example:

base64 -d log.txt | gcov-tool merge-stream

The patch set does not change the format of gcda files.

TODO:

* Documentation


I would install the patches with documentation change bits.


* Tests


Tests can be added after that.

Thank you for the functionality.
Cheers,
Martin



Sebastian Huber (12):
   gcov-tool: Allow merging of empty profile lists
   gcov: Add mode to all gcov_open()
   gcov: Add open mode parameter to gcov_do_dump()
   gcov: Make gcov_seek() static
   gcov: Add __gcov_filename_to_gcfn()
   gcov-tool: Support file input from stdin
   gcov: Use xstrdup()
   gcov: Move prepend to list to read_gcda_file()
   gcov: Move gcov_open() to caller of read_gcda_file()
   gcov: Fix integer types in ftw_read_file()
   gcov: Record EOF error during read
   gcov-tool: Add merge-stream subcommand

  gcc/gcov-io.cc |  76 ++-
  gcc/gcov-io.h  |  35 +
  gcc/gcov-tool.cc   | 107 +-
  libgcc/gcov.h  |  17 -
  libgcc/libgcov-driver-system.c |   7 +-
  libgcc/libgcov-driver.c|  42 --
  libgcc/libgcov-util.c  | 135 +
  libgcc/libgcov.h   |   3 -
  8 files changed, 326 insertions(+), 96 deletions(-)





GCC 11.2.1 Status Report (2022-04-07)

2022-04-07 Thread Richard Biener via Gcc-patches


Status
==

The gcc-11 branch is in regression and documentation fixing mode.
It's time to prepare for the release of GCC 11.3 as we have accumulated
quite a number of fixes since GCC 11.2 was released.  The plan is
to create a release candidate on Wednesday, April 13th, which is
in about one week and then follow with the release roughly a week
after that.

There are currently three P1 bugs which technically block the
release, we will not hold off releasing for PR102146 which is
extra testsuite failures on powerpc64le after backporting a change
from trunk.  The other two bugs (PR102300, PR103871) are assigned,
so please try to fix them until next week or provide reasoning
why the regression from GCC 11.2 can be ignored (both are rejects-valid).

Quality Data


Priority  #   Change from last report
---   ---
P1  3 +   3
P2  448   + 186
P3  63-  41
P4  223   +  17
P5  25+   1
---   ---
Total P1-P3 514   + 148
Total   762   + 166


Previous Report
===

https://gcc.gnu.org/pipermail/gcc/2021-July/236903.html


Re: [RFC/gcov 00/12] Add merge-stream subcommand to gcov-tool

2022-04-07 Thread Sebastian Huber

Hello Martin,

thanks for the review. I am not really used to write func ().

On 07/04/2022 10:38, Martin Liška wrote:


TODO:

* Documentation


I would install the patches with documentation change bits.


Ok, I will add some documentation changes and send them with a v2 of the 
patch set after stage 0 opens.


I would like to add a chapter about the use of gcov in systems without a 
file system.





* Tests


Tests can be added after that.


Ok, good.

--
embedded brains GmbH
Herr Sebastian HUBER
Dornierstr. 4
82178 Puchheim
Germany
email: sebastian.hu...@embedded-brains.de
phone: +49-89-18 94 741 - 16
fax:   +49-89-18 94 741 - 08

Registergericht: Amtsgericht München
Registernummer: HRB 157899
Vertretungsberechtigte Geschäftsführer: Peter Rasmussen, Thomas Dörfler
Unsere Datenschutzerklärung finden Sie hier:
https://embedded-brains.de/datenschutzerklaerung/


[PATCH v2] rs6000: Adjust mov optabs for opaque modes [PR103353]

2022-04-07 Thread Kewen.Lin via Gcc-patches
Hi,

As PR103353 shows, we may want to continue to expand a MMA built-in
function like a normal function, even if we have already emitted
error messages about some missing required conditions.  As shown in
that PR, without one explicit mov optab on OOmode provided, it would
call emit_move_insn recursively.

So this patch is to allow the mov pattern to be generated when we are
expanding to RTL and have seen errors even without MMA supported, it's
expected that the generated pattern would not cause further ICEs as the
compilation would stop soon after expanding.

Bootstrapped and regtested on powerpc64-linux-gnu P8 and
powerpc64le-linux-gnu P9 and P10.

v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591150.html

v2: Polish some comments and add one test case as Will and Peter suggested.

Is it ok for trunk or upcoming stage1?

BR,
Kewen
--

PR target/103353

gcc/ChangeLog:

* config/rs6000/mma.md (define_expand movoo): Move TARGET_MMA condition
check to preparation statements and add handlings for !TARGET_MMA.
(define_expand movxo): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr103353.c: New test.
---
 gcc/config/rs6000/mma.md| 42 ++---
 gcc/testsuite/gcc.target/powerpc/pr103353.c | 22 +++
 2 files changed, 58 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr103353.c

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 907c9d6d516..746a77a0957 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -268,10 +268,25 @@ (define_int_attr avvi4i4i4
[(UNSPEC_MMA_PMXVI8GER4PP   "pmxvi8ger4pp")
 (define_expand "movoo"
   [(set (match_operand:OO 0 "nonimmediate_operand")
(match_operand:OO 1 "input_operand"))]
-  "TARGET_MMA"
+  ""
 {
-  rs6000_emit_move (operands[0], operands[1], OOmode);
-  DONE;
+  if (TARGET_MMA) {
+rs6000_emit_move (operands[0], operands[1], OOmode);
+DONE;
+  }
+  /* Opaque modes are only expected to be available when MMA is supported,
+ but PR103353 shows we may want to continue to expand a MMA built-in
+ function, even if we have already emitted error messages about some
+ missing required conditions.  As shown in that PR, without one
+ explicit mov optab on OOmode provided, it would call emit_move_insn
+ recursively.  So we allow this pattern to be generated when we are
+ expanding to RTL and have seen errors, even though there is no MMA
+ support.  It would not cause further ICEs as the compilation would
+ stop soon after expanding.  */
+  else if (currently_expanding_to_rtl && seen_error ())
+;
+  else
+gcc_unreachable ();
 })

 (define_insn_and_split "*movoo"
@@ -300,10 +315,25 @@ (define_insn_and_split "*movoo"
 (define_expand "movxo"
   [(set (match_operand:XO 0 "nonimmediate_operand")
(match_operand:XO 1 "input_operand"))]
-  "TARGET_MMA"
+  ""
 {
-  rs6000_emit_move (operands[0], operands[1], XOmode);
-  DONE;
+  if (TARGET_MMA) {
+rs6000_emit_move (operands[0], operands[1], XOmode);
+DONE;
+  }
+  /* Opaque modes are only expected to be available when MMA is supported,
+ but PR103353 shows we may want to continue to expand a MMA built-in
+ function, even if we have already emitted error messages about some
+ missing required conditions.  As shown in that PR, without one
+ explicit mov optab on XOmode provided, it would call emit_move_insn
+ recursively.  So we allow this pattern to be generated when we are
+ expanding to RTL and have seen errors, even though there is no MMA
+ support.  It would not cause further ICEs as the compilation would
+ stop soon after expanding.  */
+  else if (currently_expanding_to_rtl && seen_error ())
+;
+  else
+gcc_unreachable ();
 })

 (define_insn_and_split "*movxo"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103353.c 
b/gcc/testsuite/gcc.target/powerpc/pr103353.c
new file mode 100644
index 000..6b0bedbb958
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103353.c
@@ -0,0 +1,22 @@
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* If the default cpu type is power10 or later, MMA is enabled by default.
+   To keep the test point available all the time, this case specifies
+   -mdejagnu-cpu=power6 to make it be tested without MMA.  */
+/* { dg-options "-maltivec -mdejagnu-cpu=power6" } */
+
+/* Verify there is no ICE and don't check the error messages on MMA
+   requirement since they could be fragile and are not test points
+   of this case.  */
+
+void
+foo (__vector_pair *dst, double *x)
+{
+  dst[0] = __builtin_vsx_lxvp (0, (__vector_pair *)(void *)x);
+  /* { dg-error ".*" "pr103353" { target *-*-* } .-1 } */
+}
+
+void
+bar (__vector_pair *src, double *x)
+{
+  __builtin_vsx_stxvp (src[0], 0, (__vector_pair *)(void *)x);
+}
--
2.27.0


Re: [PATCH] rs6000: Adjust mov optabs for opaque modes [PR103353]

2022-04-07 Thread Kewen.Lin via Gcc-patches
on 2022/4/2 5:52 AM, Peter Bergner wrote:
> On 4/1/22 3:50 PM, will schmidt wrote:
>> Is there a testcase, new or existing, that illustrates this error path?
> 
> Well, the already existsing test case pr101849.c is where the issue was seen,
> but only when compiled by hand outside of the test harness and using only the
> -maltivec option and not the -mcpu=power10 that the test case uses.
> 
> That said, I agree, pr101849.c should probably be copied into another test 
> case
> file (pr103353.c) that uses options that trigger the issue so we can be sure 
> the
> fix won't regress.
> 
> Peter
> 
> 

Thanks for the comments, Will and Peter!

I thought of adding one case before but gave up as the case still emits error 
messages
which can be fragile and are not test points and I didn't find any similar test 
cases
for similar scenarios in powerpc test suite.  Your suggestions make me consider 
it
seriously again.  The patch v2 was just posted with one test case:

  https://gcc.gnu.org/pipermail/gcc-patches/2022-April/592916.html.

Hope it looks better to you, thanks again!

BR,
Kewen


[PATCH][GCC11] tree-optimization/99121 - avoid ICEing for non-constant sizes

2022-04-07 Thread Richard Biener via Gcc-patches
The following is a simple fix to avoid ICEing on non-constant
sizes of ARRAY_REFs instead of backporting too intrusive changes
done on trunk.

Bootstrap / regtest pending on x86_64-unknown-linux-gnu.

2022-04-07  Richard Biener  

PR tree-optimization/99121
* gimple-array-bounds.cc (array_bounds_checker::check_mem_ref):
Bail out for non-constant type size.
---
 gcc/gimple-array-bounds.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/gimple-array-bounds.cc b/gcc/gimple-array-bounds.cc
index 199d9f5d216..b1179518651 100644
--- a/gcc/gimple-array-bounds.cc
+++ b/gcc/gimple-array-bounds.cc
@@ -607,6 +607,8 @@ array_bounds_checker::check_mem_ref (location_t location, 
tree ref,
 
   if (TREE_CODE (reftype) == ARRAY_TYPE)
{
+ if (TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (reftype))) != INTEGER_CST)
+   return false;
  /* Set to the size of the array element (and adjust below).  */
  eltsize = wi::to_offset (TYPE_SIZE_UNIT (TREE_TYPE (reftype)));
  /* Use log2 of size to convert the array byte size in to its
-- 
2.34.1


PING^2 [PATCH] rs6000/test: Adjust p9-vec-length-7 sensitive to unroll [PR103196]

2022-04-07 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping: https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590959.html

This patch is to fix one regressed test case, I guess it still can be 
considered for stage 4.

BR,
Kewen

> on 2022/2/28 1:37 PM, Kewen.Lin via Gcc-patches wrote:
>> Hi,
>>
>> As PR103196 shows, p9-vec-length-full-7.c needs to be adjusted as the
>> complete unrolling can happen on some of its loops.  This patch is to
>> use pragma "GCC unroll 0" to disable all possible loop unrollings.
>> Hope it can help the case not that fragile.
>>
>> There are some other p9-vec-length* cases, I noticed that some of them
>> use either bigger or unknown loop iteration counts, and
>> "p9-vec-length-3*" have considered the effects of complete unrolling.
>> So I just leave them alone for now.
>>
>> Tested on powerpc64-linux-gnu P8 and powerpc64le-linux-gnu P9 and P10.
>>
>> Is it ok for trunk?
>>
>> BR,
>> Kewen
>> -
>>  PR testsuite/103196
>>
>> gcc/testsuite/ChangeLog:
>>
>>  * gcc.target/powerpc/p9-vec-length-7.h: Add DO_PRAGMA macro.
>>  * gcc.target/powerpc/p9-vec-length-epil-7.c: Use unroll pragma to
>>  disable any unrollings.
>>  * gcc.target/powerpc/p9-vec-length-full-7.c: Remove useless option.
>>  * gcc.target/powerpc/p9-vec-length.h: Likewise.
> 
> 


Re: [PATCH] Disable float128 tests on VxWorks, PR target/104253.

2022-04-07 Thread Eric Botcazou via Gcc-patches
> I have run the tests on my usual Linux systems (little endian power10,
> little endian power9, big endian power8), but I don't have access to a
> VxWorks system.  Eric does this fix the failure for you?

Yes, if you add '*' at the end of the selector, i.e. [istarget *-*-vxworks*].

> If it does fix the failure, can I apply the patch to the master branch and
> backport it to GCC 11 and GCC 10?  Sorry about the breakage.

That would be perfect, thanks in advance.

-- 
Eric Botcazou




Re: [PATCH] Disable float128 tests on VxWorks, PR target/104253.

2022-04-07 Thread Segher Boessenkool
On Thu, Apr 07, 2022 at 12:29:45AM -0400, Michael Meissner wrote:
> In PR target/104253, it was pointed out the that test case added as part
> of fixing the PR does not work on VxWorks because float128 is not
> supported on that system.  I have modified the three tests for float128 so
> that they are manually excluded on VxWorks systems.  In looking at the
> code, I also added checks in check_effective_target_ppc_ieee128_ok to
> disable the systems that will never support VSX instructions which are
> required for float128 support (eabi, eabispe, darwin).

It's just one extra to the big list here, but, why do we need all these
manual exclusions anyway?  What is broken about the test itself?

It would be so much more useful if the tests would help us, instead of
producing a lot of extra busy-work.


Segher


Re: [PATCH] rs6000: Guard bifs {un, }pack_{longdouble, ibm128} under hard float [PR103623]

2022-04-07 Thread Segher Boessenkool
Hi!

On Thu, Mar 03, 2022 at 10:11:32AM +0800, Kewen.Lin wrote:
> As PR103623 shows, it's a regression failure due to new built-in
> function framework, previously we guard __builtin_{un,}pack_{longdouble,
> ibm128} built-in functions under hard float, so they are unavailable
> with the given configuration.  While with new bif infrastructure, it
> becomes available and gets ICE due to incomplete supports.
> 
> Segher and Peter pointed out that we should make them available with
> soft float, I agree we can extend them to cover both soft and hard
> float.  But considering it's stage 4 now and this regression is
> classified as P1, also the previous behavior requiring hard float
> aligns with what document [1] says, I guess it may be a good idea to
> fix it with the attached small patch to be consistent with the previous
> behavior.  Then we can extend the functionality in upcoming stage 1.

Or you could just not take away the existing functionality.

What makes it ICE on (at least some configurations of) 32-bit now?  Can
you exclude just 32-bit soft float?


Segher


Fix pure/const propagation in modref

2022-04-07 Thread Jan Hubicka via Gcc-patches
Hi,
this patch fixes ipa-modref propagation of pure/const functions.  When we inline
function, the modref summary is updated to represent the function after
inlining and there we need to propagate nondeterministic and side-effects flag.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

Honza

gcc/ChangeLog:

2022-04-07  Jan Hubicka  

PR ipa/105160
* ipa-modref.cc (ipa_merge_modref_summary_after_inlining):

gcc/testsuite/ChangeLog:

2022-04-07  Jan Hubicka  

PR ipa/105160
* gcc.dg/ipa/pr105160.c: New test.

diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc
index acfd7d80ff8..556816ab429 100644
--- a/gcc/ipa-modref.cc
+++ b/gcc/ipa-modref.cc
@@ -5281,6 +5281,29 @@ ipa_merge_modref_summary_after_inlining (cgraph_edge 
*edge)
   if (!ignore_stores)
to_info_lto->stores->collapse ();
 }
+  /* Merge side effects and non-determinism.
+ PURE/CONST flags makes functions deterministic and if there is
+ no LOOPING_CONST_OR_PURE they also have no side effects.  */
+  if (!(flags & (ECF_CONST | ECF_NOVOPS | ECF_PURE))
+  || (flags & ECF_LOOPING_CONST_OR_PURE))
+{
+  if (to_info)
+   {
+ if (!callee_info || callee_info->side_effects)
+   to_info->side_effects = true;
+ if ((!callee_info || callee_info->nondeterministic)
+ && !ignore_nondeterminism_p (edge->caller->decl, flags))
+   to_info->nondeterministic = true;
+   }
+  if (to_info_lto)
+   {
+ if (!callee_info_lto || callee_info_lto->side_effects)
+   to_info_lto->side_effects = true;
+ if ((!callee_info_lto || callee_info_lto->nondeterministic)
+ && !ignore_nondeterminism_p (edge->caller->decl, flags))
+   to_info_lto->nondeterministic = true;
+   }
+ }
   if (callee_info || callee_info_lto)
 {
   auto_vec  parm_map;
diff --git a/gcc/testsuite/gcc.dg/ipa/pr105160.c 
b/gcc/testsuite/gcc.dg/ipa/pr105160.c
new file mode 100644
index 000..ea80545b102
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr105160.c
@@ -0,0 +1,77 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-ipa-modref" } */
+#define sysreg_read(regname)   \
+({ \
+   unsigned long __sr_val; \
+   asm volatile("");   \
+   \
+   __sr_val;   \
+})
+
+#define sysreg_write(regname, __sw_val)\
+do {   \
+   asm volatile("");   \
+} while (0)
+
+#define isb()  \
+do {   \
+   asm volatile(   \
+   "isb"   \
+   :   \
+   :   \
+   : "memory");\
+} while (0)
+
+static unsigned long sctlr_read(void)
+{
+   return sysreg_read(sctlr_el1);
+}
+
+static void sctlr_write(unsigned long val)
+{
+   sysreg_write(sctlr_el1, val);
+}
+
+static void sctlr_rmw(void)
+{
+   unsigned long val;
+
+   val = sctlr_read();
+   val |= 1UL << 7;
+   sctlr_write(val);
+}
+
+void sctlr_read_multiple(void)
+{
+   sctlr_read();
+   sctlr_read();
+   sctlr_read();
+   sctlr_read();
+}
+
+void sctlr_write_multiple(void)
+{
+   sctlr_write(0);
+   sctlr_write(0);
+   sctlr_write(0);
+   sctlr_write(0);
+   sctlr_write(0);
+}
+
+void sctlr_rmw_multiple(void)
+{
+   sctlr_rmw();
+   sctlr_rmw();
+   sctlr_rmw();
+   sctlr_rmw();
+}
+
+void function(void)
+{
+   sctlr_read_multiple();
+   sctlr_write_multiple();
+   sctlr_rmw_multiple();
+
+   isb();
+}
+/* { dg-final { scan-ipa-dump-not "Function found to be const" "modref"  } } */


Re: Fix pure/const propagation in modref

2022-04-07 Thread Richard Biener via Gcc-patches
On Thu, Apr 7, 2022 at 1:20 PM Jan Hubicka via Gcc-patches
 wrote:
>
> Hi,
> this patch fixes ipa-modref propagation of pure/const functions.  When we 
> inline
> function, the modref summary is updated to represent the function after
> inlining and there we need to propagate nondeterministic and side-effects 
> flag.
>
> Bootstrapped/regtested x86_64-linux, will commit it shortly.

I noticed we have ->writes_errno in the summary which we do not handle
in this function either?  That's also missing on the GCC 11 branch
(where I just picked r12-2851-g9851a1631f2915 and
r12-5538-ga70faf6e4df748, both still in testing).

Richard.

> Honza
>
> gcc/ChangeLog:
>
> 2022-04-07  Jan Hubicka  
>
> PR ipa/105160
> * ipa-modref.cc (ipa_merge_modref_summary_after_inlining):
>
> gcc/testsuite/ChangeLog:
>
> 2022-04-07  Jan Hubicka  
>
> PR ipa/105160
> * gcc.dg/ipa/pr105160.c: New test.
>
> diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc
> index acfd7d80ff8..556816ab429 100644
> --- a/gcc/ipa-modref.cc
> +++ b/gcc/ipa-modref.cc
> @@ -5281,6 +5281,29 @@ ipa_merge_modref_summary_after_inlining (cgraph_edge 
> *edge)
>if (!ignore_stores)
> to_info_lto->stores->collapse ();
>  }
> +  /* Merge side effects and non-determinism.
> + PURE/CONST flags makes functions deterministic and if there is
> + no LOOPING_CONST_OR_PURE they also have no side effects.  */
> +  if (!(flags & (ECF_CONST | ECF_NOVOPS | ECF_PURE))
> +  || (flags & ECF_LOOPING_CONST_OR_PURE))
> +{
> +  if (to_info)
> +   {
> + if (!callee_info || callee_info->side_effects)
> +   to_info->side_effects = true;
> + if ((!callee_info || callee_info->nondeterministic)
> + && !ignore_nondeterminism_p (edge->caller->decl, flags))
> +   to_info->nondeterministic = true;
> +   }
> +  if (to_info_lto)
> +   {
> + if (!callee_info_lto || callee_info_lto->side_effects)
> +   to_info_lto->side_effects = true;
> + if ((!callee_info_lto || callee_info_lto->nondeterministic)
> + && !ignore_nondeterminism_p (edge->caller->decl, flags))
> +   to_info_lto->nondeterministic = true;
> +   }
> + }
>if (callee_info || callee_info_lto)
>  {
>auto_vec  parm_map;
> diff --git a/gcc/testsuite/gcc.dg/ipa/pr105160.c 
> b/gcc/testsuite/gcc.dg/ipa/pr105160.c
> new file mode 100644
> index 000..ea80545b102
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/ipa/pr105160.c
> @@ -0,0 +1,77 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-ipa-modref" } */
> +#define sysreg_read(regname)   \
> +({ \
> +   unsigned long __sr_val; \
> +   asm volatile("");   \
> +   \
> +   __sr_val;   \
> +})
> +
> +#define sysreg_write(regname, __sw_val)\
> +do {   \
> +   asm volatile("");   \
> +} while (0)
> +
> +#define isb()  \
> +do {   \
> +   asm volatile(   \
> +   "isb"   \
> +   :   \
> +   :   \
> +   : "memory");\
> +} while (0)
> +
> +static unsigned long sctlr_read(void)
> +{
> +   return sysreg_read(sctlr_el1);
> +}
> +
> +static void sctlr_write(unsigned long val)
> +{
> +   sysreg_write(sctlr_el1, val);
> +}
> +
> +static void sctlr_rmw(void)
> +{
> +   unsigned long val;
> +
> +   val = sctlr_read();
> +   val |= 1UL << 7;
> +   sctlr_write(val);
> +}
> +
> +void sctlr_read_multiple(void)
> +{
> +   sctlr_read();
> +   sctlr_read();
> +   sctlr_read();
> +   sctlr_read();
> +}
> +
> +void sctlr_write_multiple(void)
> +{
> +   sctlr_write(0);
> +   sctlr_write(0);
> +   sctlr_write(0);
> +   sctlr_write(0);
> +   sctlr_write(0);
> +}
> +
> +void sctlr_rmw_multiple(void)
> +{
> +   sctlr_rmw();
> +   sctlr_rmw();
> +   sctlr_rmw();
> +   sctlr_rmw();
> +}
> +
> +void function(void)
> +{
> +   sctlr_read_multiple();
> +   sctlr_write_multiple();
> +   sctlr_rmw_multiple();
> +
> +   isb();
> +}
> +/* { dg-final { scan-ipa-dump-not "Function found to be const" "modref"  } } 
> */


Fix wrong code in gnatmake

2022-04-07 Thread Jan Hubicka via Gcc-patches
Hi,
this patch fixes miscompilation of gnatmake.  Modref attempts to track memory
accesses relative to the base pointers which are parameters of functions.
If it fails, it still makes difference between unknown memory access and
global memory access.  The second makes it possible to disambiguate with
memory that is not accessible from outside world (i.e. everything that does
not escape from the caller function).  This is useful so we do not punt
when unknown function is called.

Now I added ref_may_access_global_memory_p to tree-ssa-alias whic is using
ptr_deref_may_alias_global_p.  There is however a shift in meaning of this
predicate: the second tests that the dereference may alias with global variable.

In the testcase we are disambiguating heap allocated escaping memory which is
not a global variable but it is still a global memory in the modref's sense.
So we need to test in addition contains_escaped.

The patch simply copies logic from the predicate and adds the check.  
I am not sure if there is better way to handle this?

I apologize for taking so long time to look into the PR (and other my bugs).
Will try to handle them quickly now.

Bootstrapped/regtested x86_64-linux.

Honza

gcc/ChangeLog:

2022-04-07  Jan Hubicka  

PR 104303
* tree-ssa-alias.cc (ref_may_access_global_memory_p): Fix handling of
refs.

diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc
index 50bd47b31f3..9e34f76c3cb 100644
--- a/gcc/tree-ssa-alias.cc
+++ b/gcc/tree-ssa-alias.cc
@@ -2578,8 +2578,24 @@ ref_may_access_global_memory_p (ao_ref *ref)
   if (TREE_CODE (base) == MEM_REF
   || TREE_CODE (base) == TARGET_MEM_REF)
 {
-  if (ptr_deref_may_alias_global_p (TREE_OPERAND (base, 0)))
+  struct ptr_info_def *pi;
+  tree ptr = TREE_OPERAND (base, 0);
+
+  /* If we end up with a pointer constant here that may point
+to global memory.  */
+  if (TREE_CODE (ptr) != SSA_NAME)
+   return true;
+
+  pi = SSA_NAME_PTR_INFO (ptr);
+
+  /* If we do not have points-to information for this variable,
+we have to punt.  */
+  if (!pi)
return true;
+
+  /* ???  This does not use TBAA to prune globals ptr may not access.  */
+  return pt_solution_includes_global (&pi->pt)
+|| pi->pt.vars_contains_escaped;
 }
   else
 {


Re: Fix pure/const propagation in modref

2022-04-07 Thread Jan Hubicka via Gcc-patches
> On Thu, Apr 7, 2022 at 1:20 PM Jan Hubicka via Gcc-patches
>  wrote:
> >
> > Hi,
> > this patch fixes ipa-modref propagation of pure/const functions.  When we 
> > inline
> > function, the modref summary is updated to represent the function after
> > inlining and there we need to propagate nondeterministic and side-effects 
> > flag.
> >
> > Bootstrapped/regtested x86_64-linux, will commit it shortly.
> 
> I noticed we have ->writes_errno in the summary which we do not handle
> in this function either?  That's also missing on the GCC 11 branch

writes_errno should be safe: it only can be set if function calls
something that has writes_errno in fnspec and that is propagated at IPA
time.  Similarly with calls_interposable.

> (where I just picked r12-2851-g9851a1631f2915 and
> r12-5538-ga70faf6e4df748, both still in testing).

Thanks!  I will look into gcc 11 backports as well after fixing the P1s.
Honza
> 
> Richard.
> 
> > Honza
> >
> > gcc/ChangeLog:
> >
> > 2022-04-07  Jan Hubicka  
> >
> > PR ipa/105160
> > * ipa-modref.cc (ipa_merge_modref_summary_after_inlining):
> >
> > gcc/testsuite/ChangeLog:
> >
> > 2022-04-07  Jan Hubicka  
> >
> > PR ipa/105160
> > * gcc.dg/ipa/pr105160.c: New test.
> >
> > diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc
> > index acfd7d80ff8..556816ab429 100644
> > --- a/gcc/ipa-modref.cc
> > +++ b/gcc/ipa-modref.cc
> > @@ -5281,6 +5281,29 @@ ipa_merge_modref_summary_after_inlining (cgraph_edge 
> > *edge)
> >if (!ignore_stores)
> > to_info_lto->stores->collapse ();
> >  }
> > +  /* Merge side effects and non-determinism.
> > + PURE/CONST flags makes functions deterministic and if there is
> > + no LOOPING_CONST_OR_PURE they also have no side effects.  */
> > +  if (!(flags & (ECF_CONST | ECF_NOVOPS | ECF_PURE))
> > +  || (flags & ECF_LOOPING_CONST_OR_PURE))
> > +{
> > +  if (to_info)
> > +   {
> > + if (!callee_info || callee_info->side_effects)
> > +   to_info->side_effects = true;
> > + if ((!callee_info || callee_info->nondeterministic)
> > + && !ignore_nondeterminism_p (edge->caller->decl, flags))
> > +   to_info->nondeterministic = true;
> > +   }
> > +  if (to_info_lto)
> > +   {
> > + if (!callee_info_lto || callee_info_lto->side_effects)
> > +   to_info_lto->side_effects = true;
> > + if ((!callee_info_lto || callee_info_lto->nondeterministic)
> > + && !ignore_nondeterminism_p (edge->caller->decl, flags))
> > +   to_info_lto->nondeterministic = true;
> > +   }
> > + }
> >if (callee_info || callee_info_lto)
> >  {
> >auto_vec  parm_map;
> > diff --git a/gcc/testsuite/gcc.dg/ipa/pr105160.c 
> > b/gcc/testsuite/gcc.dg/ipa/pr105160.c
> > new file mode 100644
> > index 000..ea80545b102
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/ipa/pr105160.c
> > @@ -0,0 +1,77 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O -fdump-ipa-modref" } */
> > +#define sysreg_read(regname)   \
> > +({ \
> > +   unsigned long __sr_val; \
> > +   asm volatile("");   \
> > +   \
> > +   __sr_val;   \
> > +})
> > +
> > +#define sysreg_write(regname, __sw_val)\
> > +do {   \
> > +   asm volatile("");   \
> > +} while (0)
> > +
> > +#define isb()  \
> > +do {   \
> > +   asm volatile(   \
> > +   "isb"   \
> > +   :   \
> > +   :   \
> > +   : "memory");\
> > +} while (0)
> > +
> > +static unsigned long sctlr_read(void)
> > +{
> > +   return sysreg_read(sctlr_el1);
> > +}
> > +
> > +static void sctlr_write(unsigned long val)
> > +{
> > +   sysreg_write(sctlr_el1, val);
> > +}
> > +
> > +static void sctlr_rmw(void)
> > +{
> > +   unsigned long val;
> > +
> > +   val = sctlr_read();
> > +   val |= 1UL << 7;
> > +   sctlr_write(val);
> > +}
> > +
> > +void sctlr_read_multiple(void)
> > +{
> > +   sctlr_read();
> > +   sctlr_read();
> > +   sctlr_read();
> > +   sctlr_read();
> > +}
> > +
> > +void sctlr_write_multiple(void)
> > +{
> > +   sctlr_write(0);
> > +   sctlr_write(0);
> > +   sctlr_write(0);
> > +   sctlr_write(0);
> > +   sctlr_write(0);
> > +}
> > +
> > +void sctlr_rmw_multiple(void)
> > +{
> > +   sctlr_rmw();
> > +   sctlr_rmw();
> > +   sctlr_rmw();
> > +   sctlr_rmw();
> > +}
> > +
> > +void function(void)
> > +{
> > +   sctlr_read_multiple();
> > +   sctlr_write_multiple();
> > +   sctlr_rmw_multiple();
> > +
> > +   isb();
> > +}
> > +/* { dg-final { scan-ipa-dump-not "Function found to be const"

Re: [PATCH] Add condition coverage profiling

2022-04-07 Thread Martin Liška

On 3/28/22 16:40, Jørgen Kvalsvik via Gcc-patches wrote:

... And with another tiny change that fixes Martin's while (1); case.


Hello.

Back to this ;) Thank you for the updated version of the patch. I have a couple 
of comments/requests:

1) I noticed the following cannot be linked:

$ cat errors.C
char trim_filename_name;
int r;

void trim_filename() {
  if (trim_filename_name)
r = 123;
  while (trim_filename_name)
;
}

int
main() {}

$ g++ errors.C -fprofile-conditions -O2
mold: error: undefined symbol: /tmp/ccmZANB5.o: __gcov8._Z13trim_filenamev
collect2: error: ld returned 1 exit status

Btw. how much have you tested the patch set so far? Have you tried building 
something bigger
with -fprofile-conditions enabled?

2) As noticed by Sebastian, please support the new tag in gcov-dump:

$ gcov-dump -l a.gcno
...
a.gcno:0145:  28:LINES
a.gcno:  block 7:`a.c':11
a.gcno:0147:   8:UNKNOWN

3) Then I have some top-level formatting comments:

a) please re-run check_GNU_style.py, I still see:
=== ERROR type #1: blocks of 8 spaces should be replaced with tabs (35 
error(s)) ===
...

b) write comments for each newly added function (and separate it by one empty 
line from
the function definition)

c) do not create visual alignment, we don't use it:

+   cond->set ("count",   new json::integer_number (count));
+   cond->set ("covered", new json::integer_number (covered));

and similar examples

d) finish multiple comments after a dot on the same line:

+/* Number of expressions found - this value is the number of entries in the
+   gcov output and the parameter to gcov_counter_alloc ().
+   */

should be ... gcov_counter_alloc ().  */

e) for long lines with a function call like:

+   const int n = find_conditions_masked_by
+   (block, expr, flags + k, path, CONDITIONS_MAX_TERMS);

do rather
const int n
  = find_conditions_masked_by (block, expr,
   next_arg, ...);

f) for function params:

+int
+collect_reachable_conditionals (
+basic_block pre,
+basic_block post,
+basic_block *out,
+int maxsize,
+sbitmap expr)

use rather:

int
collect_reachable_conditionals (basic_block pre,
second_arg,...

In the next round, I'm going to take a look at the CFG algorithm that identifies
and instruments the sub-expressions.

Thanks.

Cheers,
Martin



Re: Fix wrong code in gnatmake

2022-04-07 Thread Richard Biener via Gcc-patches
On Thu, 7 Apr 2022, Jan Hubicka wrote:

> Hi,
> this patch fixes miscompilation of gnatmake.  Modref attempts to track memory
> accesses relative to the base pointers which are parameters of functions.
> If it fails, it still makes difference between unknown memory access and
> global memory access.  The second makes it possible to disambiguate with
> memory that is not accessible from outside world (i.e. everything that does
> not escape from the caller function).  This is useful so we do not punt
> when unknown function is called.
> 
> Now I added ref_may_access_global_memory_p to tree-ssa-alias whic is using
> ptr_deref_may_alias_global_p.  There is however a shift in meaning of this
> predicate: the second tests that the dereference may alias with global 
> variable.
> 
> In the testcase we are disambiguating heap allocated escaping memory which is
> not a global variable but it is still a global memory in the modref's sense.
> So we need to test in addition contains_escaped.
> 
> The patch simply copies logic from the predicate and adds the check.  
> I am not sure if there is better way to handle this?

I'm testing the following variant which exposes this detail
(escaped local memory global or not) in the APIs that say "global"
which allows to remove ref_may_access_global_memory_p.

Richard.

>From 66f986c1bdea2597b1e9406429bf1c144d757a3e Mon Sep 17 00:00:00 2001
From: Richard Biener 
Date: Thu, 7 Apr 2022 14:07:54 +0200
Subject: [PATCH] ipa/104303 - miscompilation of gnatmake
To: gcc-patches@gcc.gnu.org

Modref attempts to track memory accesses relative to the base pointers
which are parameters of functions.
If it fails, it still makes difference between unknown memory access and
global memory access.  The second makes it possible to disambiguate with
memory that is not accessible from outside world (i.e. everything that does
not escape from the caller function).  This is useful so we do not punt
when unknown function is called.

The added ref_may_access_global_memory_p ends up using
ptr_deref_may_alias_global_p which does not consider escaped automatic
variables as global.  For modref those are still global since they
can be accessed from functions called.

The following adds a flag to the *_global_p APIs indicating whether
escaped local memory should be considered as global or not and
removes ref_may_access_global_memory_p in favor of using
ref_may_alias_global_p with the flag set to true.

Richard.

2022-04-07  Richard Biener  
Jan Hubicka  

PR ipa/104303
* tree-ssa-alias.h (ptr_deref_may_alias_global_p,
ref_may_alias_global_p, ref_may_alias_global_p,
stmt_may_clobber_global_p, pt_solution_includes_global): Add
bool parameters indicating whether escaped locals should be
considered global.
* tree-ssa-structalias.cc (pt_solution_includes_global):
When the new escaped_nonlocal_p flag is true also consider
pt->vars_contains_escaped.
* tree-ssa-alias.cc (ptr_deref_may_alias_global_p):
Pass down new escaped_nonlocal_p flag.
(ref_may_alias_global_p): Likewise.
(stmt_may_clobber_global_p): Likewise.
(ref_may_alias_global_p_1): Likewise.  For decls also
query the escaped solution if true.
(ref_may_access_global_memory_p): Remove.
(modref_may_conflict): Use ref_may_alias_global_p with
escaped locals considered global.
(ref_maybe_used_by_stmt_p): Adjust.
* ipa-fnsummary.cc (points_to_local_or_readonly_memory_p):
Likewise.
* tree-ssa-dse.cc (dse_classify_store): Likewise.
* trans-mem.cc (thread_private_new_memory): Likewise, but
consider escaped locals global.
* tree-ssa-dce.cc (mark_stmt_if_obviously_necessary): Likewise.

* gnat.dg/concat5.adb: New.
* gnat.dg/concat5_pkg1.adb: Likewise.
* gnat.dg/concat5_pkg1.ads: Likewise.
* gnat.dg/concat5_pkg2.adb: Likewise.
* gnat.dg/concat5_pkg2.ads: Likewise.
---
 gcc/ipa-fnsummary.cc   |  2 +-
 gcc/testsuite/gnat.dg/concat5.adb  |  9 
 gcc/testsuite/gnat.dg/concat5_pkg1.adb | 18 +++
 gcc/testsuite/gnat.dg/concat5_pkg1.ads |  5 ++
 gcc/testsuite/gnat.dg/concat5_pkg2.adb | 10 
 gcc/testsuite/gnat.dg/concat5_pkg2.ads |  5 ++
 gcc/trans-mem.cc   |  2 +-
 gcc/tree-ssa-alias.cc  | 65 ++
 gcc/tree-ssa-alias.h   | 10 ++--
 gcc/tree-ssa-dce.cc|  2 +-
 gcc/tree-ssa-dse.cc|  4 +-
 gcc/tree-ssa-structalias.cc| 15 --
 12 files changed, 93 insertions(+), 54 deletions(-)
 create mode 100644 gcc/testsuite/gnat.dg/concat5.adb
 create mode 100644 gcc/testsuite/gnat.dg/concat5_pkg1.adb
 create mode 100644 gcc/testsuite/gnat.dg/concat5_pkg1.ads
 create mode 100644 gcc/testsuite/gnat.dg/concat5_pkg2.adb
 create mode 100644 gcc/testsuite/gnat.dg/concat5_pkg2.ads

diff --git a/gc

Re: [PATCH] toplev: use HOST_WIDE_INT for local_tick to prevent overflow

2022-04-07 Thread Jason A. Donenfeld via Gcc-patches
Hi Richard,

The gcc devs have apparently declined to do anything about this bug
(it's marked as "RESOLVED WONTFIX") and don't care much about plugins
I guess: , so I
won't be sending a v2. However,

On Thu, Apr 7, 2022 at 8:30 AM Richard Biener
 wrote:
> For the issue of overloading -1 I'd
> instead suggest to do
>
> >   struct timeval tv;
> >   gettimeofday (&tv, NULL);
> >   local_tick = (unsigned) tv.tv_sec * 1000 + tv.tv_usec / 1000;
>  /* Avoid aliasing with the special-meaning -1.  */
>  if (local_tick == -1)
>local_tick = 1;
>
> because even with uint64_t the result could be -1, no?

>>> (2**64 - 1 - 1000 - 1649334804) / 1000 / (365 * 24 * 60 * 60)
584942417.3027719

If we're still using this code in 584 million years, God help us all...

But, anyway, if the devs in that bug report do want to fix this
ultimately but don't want to change the type, your `if (local_tick ==
-1) local_tick = 1` thing there and in the next stanza seems like it'd
fix the problem, and also might make things easier on plugins
straddling versions.

Jason


[committed] analyzer: fix leak false +ve with symbolic writes [PR102308]

2022-04-07 Thread David Malcolm via Gcc-patches
(I typoed 102308 as 102208 in a few places when I committed; I've fixed
them in this email, but in particular the subject line in what I pushed
to git has the wrong ID; it should be 102308).

PR analyzer/102308 reports false positives from -Wanalyzer-malloc-leak.
The root cause is the analyzer getting confused about symbolic writes
that could alias a pointer referencing a malloced buffer.

struct st
{
  void *ptr;
  int arr[10];
};

struct st test (int idx)
{
  struct st s;
  s.ptr = __builtin_malloc (1024);  /* (1) */
  s.arr[idx] = 42;  /* (2) */
  return s;
}

When removing overlapping bindings at (2),
store::remove_overlapping_bindings was failing to pass on the
uncertainty_t *, and thus when clobbering the binding of s.ptr, the
heap-allocated pointer was not being added to the set of maybe-bound
svalues, and thus being treated as leaking.

This patch fixes this, so that s.ptr from (1) is treated as maybe-bound
after the write at (2), fixing the leak false postive.

Doing so requires the store to be smarter about how clobbering happens
with various combinations of concrete keys and symbolic keys within
concrete clusters and symbolic clusters, so that we don't lose warnings
about definite leaks.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r12-8047-g88b939b19ab454ab2d932ef292bbc557abe4431c.

gcc/analyzer/ChangeLog:
PR analyzer/102308
* store.cc (binding_map::remove_overlapping_bindings): Add
"always_overlap" param, using it to generalize to the case where
we want to remove all bindings.  Update "uncertainty" logic to
only record maybe-bound values for cases where there is a symbolic
write involved.
(binding_cluster::mark_region_as_unknown): Split param "reg" into
"reg_to_bind" and "reg_for_overlap".
(binding_cluster::maybe_get_compound_binding): Pass "false" to
binding_map::remove_overlapping_bindings new "always_overlap" param.
(binding_cluster::remove_overlapping_bindings): Determine
"always_overlap" and pass it to
binding_map::remove_overlapping_bindings.
(store::set_value): Pass uncertainty to remove_overlapping_bindings
call.  Update for new param of
binding_cluster::mark_region_as_unknown, passing both the base
region of the iter_cluster, and the lhs_reg.
(store::mark_region_as_unknown): Update for new param of
binding_cluster::mark_region_as_unknown, passing "reg" for both.
(store::remove_overlapping_bindings): Add param "uncertainty", and
pass it on to call to
binding_cluster::remove_overlapping_bindings.
* store.h (binding_map::remove_overlapping_bindings): Add
"always_overlap" param.
(binding_cluster::mark_region_as_unknown): Split param "reg" into
"reg_to_bind" and "reg_for_overlap".
(store::remove_overlapping_bindings): Add param "uncertainty".

gcc/testsuite/ChangeLog:
PR analyzer/102308
* gcc.dg/analyzer/symbolic-9.c: New test.
* gcc.dg/analyzer/torture/leak-pr102308-1.c: New test.
* gcc.dg/analyzer/torture/leak-pr102308-2.c: New test.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/store.cc | 112 --
 gcc/analyzer/store.h  |  10 +-
 gcc/testsuite/gcc.dg/analyzer/symbolic-9.c| 197 ++
 .../gcc.dg/analyzer/torture/leak-pr102308-1.c |  19 ++
 .../gcc.dg/analyzer/torture/leak-pr102308-2.c |  12 ++
 5 files changed, 326 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/symbolic-9.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/torture/leak-pr102308-1.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/torture/leak-pr102308-2.c

diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc
index 0014633c335..35f66a4b6fc 100644
--- a/gcc/analyzer/store.cc
+++ b/gcc/analyzer/store.cc
@@ -997,27 +997,61 @@ binding_map::get_overlapping_bindings (const binding_key 
*key,
value:  {BITS_WITHIN(bytes 4-7, inner_val: 
INIT_VAL((*INIT_VAL(p_33(D))).arr))}
 
If UNCERTAINTY is non-NULL, use it to record any svalues that
-   were removed, as being maybe-bound.  */
+   were removed, as being maybe-bound.
+
+   If ALWAYS_OVERLAP, then assume that DROP_KEY can overlap anything
+   in the map, due to one or both of the underlying clusters being
+   symbolic (but not the same symbolic region).  Hence even if DROP_KEY is a
+   concrete binding it could actually be referring to the same memory as
+   distinct concrete bindings in the map.  Remove all bindings, but
+   register any svalues with *UNCERTAINTY.  */
 
 void
 binding_map::remove_overlapping_bindings (store_manager *mgr,
  const binding_key *drop_key,
- uncertainty_t *uncertainty)
+ uncertainty_t *uncertainty,
+

Re: Fix wrong code in gnatmake

2022-04-07 Thread Jan Hubicka via Gcc-patches
> On Thu, 7 Apr 2022, Jan Hubicka wrote:
> 
> > Hi,
> > this patch fixes miscompilation of gnatmake.  Modref attempts to track 
> > memory
> > accesses relative to the base pointers which are parameters of functions.
> > If it fails, it still makes difference between unknown memory access and
> > global memory access.  The second makes it possible to disambiguate with
> > memory that is not accessible from outside world (i.e. everything that does
> > not escape from the caller function).  This is useful so we do not punt
> > when unknown function is called.
> > 
> > Now I added ref_may_access_global_memory_p to tree-ssa-alias whic is using
> > ptr_deref_may_alias_global_p.  There is however a shift in meaning of this
> > predicate: the second tests that the dereference may alias with global 
> > variable.
> > 
> > In the testcase we are disambiguating heap allocated escaping memory which 
> > is
> > not a global variable but it is still a global memory in the modref's sense.
> > So we need to test in addition contains_escaped.
> > 
> > The patch simply copies logic from the predicate and adds the check.  
> > I am not sure if there is better way to handle this?
> 
> I'm testing the following variant which exposes this detail
> (escaped local memory global or not) in the APIs that say "global"
> which allows to remove ref_may_access_global_memory_p.

Thank you.  Indeed it is better to have an explicit flag, since the
clash of names is bit sensitive. 

Honza


Re: [PATCH] rs6000: Guard bifs {un, }pack_{longdouble, ibm128} under hard float [PR103623]

2022-04-07 Thread Jakub Jelinek via Gcc-patches
On Thu, Apr 07, 2022 at 06:09:52AM -0500, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Mar 03, 2022 at 10:11:32AM +0800, Kewen.Lin wrote:
> > As PR103623 shows, it's a regression failure due to new built-in
> > function framework, previously we guard __builtin_{un,}pack_{longdouble,
> > ibm128} built-in functions under hard float, so they are unavailable
> > with the given configuration.  While with new bif infrastructure, it
> > becomes available and gets ICE due to incomplete supports.
> > 
> > Segher and Peter pointed out that we should make them available with
> > soft float, I agree we can extend them to cover both soft and hard
> > float.  But considering it's stage 4 now and this regression is
> > classified as P1, also the previous behavior requiring hard float
> > aligns with what document [1] says, I guess it may be a good idea to
> > fix it with the attached small patch to be consistent with the previous
> > behavior.  Then we can extend the functionality in upcoming stage 1.
> 
> Or you could just not take away the existing functionality.

Have those builtins ever worked with 64-bit soft-float?
When I try e.g. gcc 11 on the #c0 testcase from the PR, I get:
./cc1.r11-1 -quiet -nostdinc pr103623.c -m64 -mlong-double-128 -msoft-float
pr103623.c: In function ‘main’:
pr103623.c:2:16: error: ‘__builtin_unpack_longdouble’ requires the 
‘-mhard-float’ option
2 | #define UNPACK __builtin_unpack_longdouble
  |^
pr103623.c:11:15: note: in expansion of macro ‘UNPACK’
   11 |   double x0 = UNPACK (a, 0);
  |   ^~

>From what I can see, those builtins were using:
/* 128-bit long double floating point builtins.  */
#define BU_LDBL128_2(ENUM, NAME, ATTR, ICODE)   \
  RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,  /* ENUM */  \
"__builtin_" NAME,  /* NAME */  \
(RS6000_BTM_HARD_FLOAT  /* MASK */  \
 | RS6000_BTM_LDBL128), \
(RS6000_BTC_ ## ATTR/* ATTR */  \
 | RS6000_BTC_BINARY),  \
CODE_FOR_ ## ICODE) /* ICODE */
   
/* 128-bit __ibm128 floating point builtins (use -mfloat128 to indicate that
   __ibm128 is available).  */
#define BU_IBM128_2(ENUM, NAME, ATTR, ICODE)\
  RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,  /* ENUM */  \
"__builtin_" NAME,  /* NAME */  \
(RS6000_BTM_HARD_FLOAT  /* MASK */  \
 | RS6000_BTM_FLOAT128),\
(RS6000_BTC_ ## ATTR/* ATTR */  \
 | RS6000_BTC_BINARY),  \
CODE_FOR_ ## ICODE) /* ICODE */
macros and rs6000_builtin_is_supported_p was checking whether
all the bits in the mask are set:
  HOST_WIDE_INT fnmask = rs6000_builtin_info[fncode].mask;
  if ((fnmask & rs6000_builtin_mask) != fnmask)
return false;
  else
return true;
(so logical and of all of them).

Jakub



Re: Fix wrong code in gnatmake

2022-04-07 Thread Richard Biener via Gcc-patches
On Thu, 7 Apr 2022, Jan Hubicka wrote:

> > On Thu, 7 Apr 2022, Jan Hubicka wrote:
> > 
> > > Hi,
> > > this patch fixes miscompilation of gnatmake.  Modref attempts to track 
> > > memory
> > > accesses relative to the base pointers which are parameters of functions.
> > > If it fails, it still makes difference between unknown memory access and
> > > global memory access.  The second makes it possible to disambiguate with
> > > memory that is not accessible from outside world (i.e. everything that 
> > > does
> > > not escape from the caller function).  This is useful so we do not punt
> > > when unknown function is called.
> > > 
> > > Now I added ref_may_access_global_memory_p to tree-ssa-alias whic is using
> > > ptr_deref_may_alias_global_p.  There is however a shift in meaning of this
> > > predicate: the second tests that the dereference may alias with global 
> > > variable.
> > > 
> > > In the testcase we are disambiguating heap allocated escaping memory 
> > > which is
> > > not a global variable but it is still a global memory in the modref's 
> > > sense.
> > > So we need to test in addition contains_escaped.
> > > 
> > > The patch simply copies logic from the predicate and adds the check.  
> > > I am not sure if there is better way to handle this?
> > 
> > I'm testing the following variant which exposes this detail
> > (escaped local memory global or not) in the APIs that say "global"
> > which allows to remove ref_may_access_global_memory_p.
> 
> Thank you.  Indeed it is better to have an explicit flag, since the
> clash of names is bit sensitive. 

OK - bootstrapped / tested on x86_64-unknown-linux-gnu including Ada
and now pushed.

Thanks for analyzing this!

Richard.


Re: [PATCH v2] rs6000: Adjust mov optabs for opaque modes [PR103353]

2022-04-07 Thread will schmidt via Gcc-patches
On Thu, 2022-04-07 at 17:29 +0800, Kewen.Lin wrote:
> Hi,
> 
> As PR103353 shows, we may want to continue to expand a MMA built-in
> function like a normal function, even if we have already emitted
> error messages about some missing required conditions.  As shown in
> that PR, without one explicit mov optab on OOmode provided, it would
> call emit_move_insn recursively.
> 
> So this patch is to allow the mov pattern to be generated when we are
> expanding to RTL and have seen errors even without MMA supported, it's
> expected that the generated pattern would not cause further ICEs as the
> compilation would stop soon after expanding.
> 
> Bootstrapped and regtested on powerpc64-linux-gnu P8 and
> powerpc64le-linux-gnu P9 and P10.
> 
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591150.html
> 
> v2: Polish some comments and add one test case as Will and Peter suggested.

Thanks.

> 
> Is it ok for trunk or upcoming stage1?
> 
> BR,
> Kewen
> --
> 
>   PR target/103353
> 
> gcc/ChangeLog:
> 
>   * config/rs6000/mma.md (define_expand movoo): Move TARGET_MMA condition
>   check to preparation statements and add handlings for !TARGET_MMA.
>   (define_expand movxo): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/pr103353.c: New test.
> ---
>  gcc/config/rs6000/mma.md| 42 ++---
>  gcc/testsuite/gcc.target/powerpc/pr103353.c | 22 +++
>  2 files changed, 58 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr103353.c
> 
> diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
> index 907c9d6d516..746a77a0957 100644
> --- a/gcc/config/rs6000/mma.md
> +++ b/gcc/config/rs6000/mma.md
> @@ -268,10 +268,25 @@ (define_int_attr avvi4i4i4  
> [(UNSPEC_MMA_PMXVI8GER4PP   "pmxvi8ger4pp")
>  (define_expand "movoo"
>[(set (match_operand:OO 0 "nonimmediate_operand")
>   (match_operand:OO 1 "input_operand"))]
> -  "TARGET_MMA"
> +  ""
>  {
> -  rs6000_emit_move (operands[0], operands[1], OOmode);
> -  DONE;
> +  if (TARGET_MMA) {
> +rs6000_emit_move (operands[0], operands[1], OOmode);
> +DONE;
> +  }
> +  /* Opaque modes are only expected to be available when MMA is supported,
> + but PR103353 shows we may want to continue to expand a MMA built-in
> + function, even if we have already emitted error messages about some
> + missing required conditions.  As shown in that PR, without one
> + explicit mov optab on OOmode provided, it would call emit_move_insn
> + recursively.  So we allow this pattern to be generated when we are
> + expanding to RTL and have seen errors, even though there is no MMA
> + support.  It would not cause further ICEs as the compilation would
> + stop soon after expanding.  */
> +  else if (currently_expanding_to_rtl && seen_error ())
> +;
> +  else
> +gcc_unreachable ();
>  })

ok

> 
>  (define_insn_and_split "*movoo"
> @@ -300,10 +315,25 @@ (define_insn_and_split "*movoo"
>  (define_expand "movxo"
>[(set (match_operand:XO 0 "nonimmediate_operand")
>   (match_operand:XO 1 "input_operand"))]
> -  "TARGET_MMA"
> +  ""
>  {
> -  rs6000_emit_move (operands[0], operands[1], XOmode);
> -  DONE;
> +  if (TARGET_MMA) {
> +rs6000_emit_move (operands[0], operands[1], XOmode);
> +DONE;
> +  }
> +  /* Opaque modes are only expected to be available when MMA is supported,
> + but PR103353 shows we may want to continue to expand a MMA built-in
> + function, even if we have already emitted error messages about some
> + missing required conditions.  As shown in that PR, without one
> + explicit mov optab on XOmode provided, it would call emit_move_insn
> + recursively.  So we allow this pattern to be generated when we are
> + expanding to RTL and have seen errors, even though there is no MMA
> + support.  It would not cause further ICEs as the compilation would
> + stop soon after expanding.  */
> +  else if (currently_expanding_to_rtl && seen_error ())
> +;
> +  else
> +gcc_unreachable ();
>  })

ok


> 
>  (define_insn_and_split "*movxo"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103353.c 
> b/gcc/testsuite/gcc.target/powerpc/pr103353.c
> new file mode 100644
> index 000..6b0bedbb958
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103353.c
> @@ -0,0 +1,22 @@
> +/* { dg-require-effective-target powerpc_altivec_ok } */
> +/* If the default cpu type is power10 or later, MMA is enabled by default.
> +   To keep the test point available all the time, this case specifies
> +   -mdejagnu-cpu=power6 to make it be tested without MMA.  */
> +/* { dg-options "-maltivec -mdejagnu-cpu=power6" } */
> +
> +/* Verify there is no ICE and don't check the error messages on MMA
> +   requirement since they could be fragile and are not test points
> +   of this case.  */
> +
> +void
> +foo (__vector_pair *dst, double *x)
> +{
> +  dst[0] = __builtin_vsx_

Re: [PATCH] Disable float128 tests on VxWorks, PR target/104253.

2022-04-07 Thread will schmidt via Gcc-patches
On Thu, 2022-04-07 at 06:00 -0500, Segher Boessenkool wrote:
> On Thu, Apr 07, 2022 at 12:29:45AM -0400, Michael Meissner wrote:
> > In PR target/104253, it was pointed out the that test case added as part
> > of fixing the PR does not work on VxWorks because float128 is not
> > supported on that system.  I have modified the three tests for float128 so
> > that they are manually excluded on VxWorks systems.  In looking at the
> > code, I also added checks in check_effective_target_ppc_ieee128_ok to
> > disable the systems that will never support VSX instructions which are
> > required for float128 support (eabi, eabispe, darwin).
> 
> It's just one extra to the big list here, but, why do we need all these
> manual exclusions anyway?  What is broken about the test itself?



>From the PR, it looks like this test noted an error, not actually a
failure.  

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104253#c17

cc1: warning: The '-mfloat128' option may not be fully supported


which comes out of gcc/config/rs6000/rs6000.cc 
rs6000_option_override_internal() via 

  /* IEEE 128-bit floating point requires VSX support.  */
  if (TARGET_FLOAT128_KEYWORD)
{
  if (!TARGET_VSX)
{

}
  else if (!TARGET_FLOAT128_TYPE)
{
  TARGET_FLOAT128_TYPE = 1;
  warning (0, "The %<-mfloat128%> option may not be fully
supported");
}
}


> 
> It would be so much more useful if the tests would help us, instead of
> producing a lot of extra busy-work.





> 
> 
> Segher



Re: [PATCH] Improve profile handling in switch lowering.

2022-04-07 Thread Martin Liška

PING^3

On 3/24/22 11:22, Martin Liška wrote:

PING^2

On 3/4/22 14:47, Martin Liška wrote:

PING^1

On 1/26/22 12:11, Martin Liška wrote:

Hello.

Right now, switch lowering does not update basic_block::count values
so that they are uninitiliazed. Moreover, I've updated probability scaling
when a more complex expansion happens. There are still some situations where
the profile is a bit imprecise, but the patch improves rapidly the current 
situation.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

 PR tree-optimization/101301
 PR tree-optimization/103680

gcc/ChangeLog:

 * tree-switch-conversion.cc (bit_test_cluster::emit):
 Handle correctly remaining probability.
 (switch_decision_tree::try_switch_expansion): Fix BB's count
 where a cluster expansion happens.
 (switch_decision_tree::emit_cmp_and_jump_insns): Fill up also
 BB count.
 (switch_decision_tree::do_jump_if_equal): Likewise.
 (switch_decision_tree::emit_case_nodes): Handle special case
 for BT expansion which can also fallback to a default BB.
 * tree-switch-conversion.h (cluster::cluster): Add
 m_default_prob probability.
---
  gcc/tree-switch-conversion.cc | 51 ---
  gcc/tree-switch-conversion.h  |  8 +-
  2 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
index 670397c87e4..d6679e8dee3 100644
--- a/gcc/tree-switch-conversion.cc
+++ b/gcc/tree-switch-conversion.cc
@@ -1538,10 +1538,12 @@ bit_test_cluster::emit (tree index_expr, tree 
index_type,
    test[k].target_bb = n->m_case_bb;
    test[k].label = n->m_case_label_expr;
    test[k].bits = 0;
+  test[k].prob = profile_probability::never ();
    count++;
  }

    test[k].bits += n->get_range (n->get_low (), n->get_high ());
+  test[k].prob += n->m_prob;

    lo = tree_to_uhwi (int_const_binop (MINUS_EXPR, n->get_low (), minval));
    if (n->get_high () == NULL_TREE)
@@ -1629,6 +1631,11 @@ bit_test_cluster::emit (tree index_expr, tree index_type,
    /*simple=*/true, NULL_TREE,
    /*before=*/true, GSI_SAME_STMT);

+  profile_probability subtree_prob = m_subtree_prob;
+  profile_probability default_prob = m_default_prob;
+  if (!default_prob.initialized_p ())
+    default_prob = m_subtree_prob.invert ();
+
    if (m_handles_entire_switch && entry_test_needed)
  {
    tree range = int_const_binop (MINUS_EXPR, maxval, minval);
@@ -1639,9 +1646,10 @@ bit_test_cluster::emit (tree index_expr, tree index_type,
  /*simple=*/true, NULL_TREE,
  /*before=*/true, GSI_SAME_STMT);
    tmp = fold_build2 (GT_EXPR, boolean_type_node, idx, range);
+  default_prob = default_prob.apply_scale (1, 2);
    basic_block new_bb
  = hoist_edge_and_branch_if_true (&gsi, tmp, default_bb,
- profile_probability::unlikely ());
+ default_prob);
    gsi = gsi_last_bb (new_bb);
  }

@@ -1662,14 +1670,12 @@ bit_test_cluster::emit (tree index_expr, tree 
index_type,
    else
  csui = tmp;

-  profile_probability prob = profile_probability::always ();
-
    /* for each unique set of cases:
 if (const & csui) goto target  */
    for (k = 0; k < count; k++)
  {
-  prob = profile_probability::always ().apply_scale (test[k].bits,
- bt_range);
+  profile_probability prob = test[k].prob / (subtree_prob + default_prob);
+  subtree_prob -= test[k].prob;
    bt_range -= test[k].bits;
    tmp = wide_int_to_tree (word_type_node, test[k].mask);
    tmp = fold_build2 (BIT_AND_EXPR, word_type_node, csui, tmp);
@@ -1908,9 +1914,13 @@ switch_decision_tree::try_switch_expansion (vec 
&clusters)
    /* Emit cluster-specific switch handling.  */
    for (unsigned i = 0; i < clusters.length (); i++)
  if (clusters[i]->get_type () != SIMPLE_CASE)
-  clusters[i]->emit (index_expr, index_type,
- gimple_switch_default_label (m_switch),
- m_default_bb, gimple_location (m_switch));
+  {
+    edge e = single_pred_edge (clusters[i]->m_case_bb);
+    e->dest->count = e->src->count.apply_probability (e->probability);
+    clusters[i]->emit (index_expr, index_type,
+   gimple_switch_default_label (m_switch),
+   m_default_bb, gimple_location (m_switch));
+  }
  }

    fix_phi_operands_for_edges ();
@@ -2162,6 +2172,7 @@ switch_decision_tree::emit_cmp_and_jump_insns 
(basic_block bb, tree op0,
    edge false_edge = split_block (bb, cond);
    false_edge->flags = EDGE_FALSE_VALUE;
    false_edge->probability = prob.invert ();
+  false_edge->dest->count = bb->count.apply_probability (prob.invert ());

    edge true_edge = make_edge (bb, label_bb, EDGE_TRUE_VALUE);
    true_edge->probabil

Re: [committed][nvptx] Fix ASM_SPEC workaround for sm_30

2022-04-07 Thread Thomas Schwinge
Hi!

On 2022-03-31T09:40:47+0200, Tom de Vries via Gcc-patches 
 wrote:
> Newer versions of CUDA no longer support sm_30, and nvptx-tools as
> currently doesn't handle that gracefully when verifying
> ( https://github.com/MentorEmbedded/nvptx-tools/issues/30 ).

There's now 
'as: Deal with CUDA 11.0, "Support for Kepler 'sm_30' and 'sm_32'
architecture based products is dropped"' available for comment/testing.

> There's a --no-verify work-around in place in ASM_SPEC, but that one doesn't
> work when using -Wa,--verify on the command line.

With that resolved in nvptx-tools, we may then revert these GCC-level
workarounds, GCC commit bf4832d6fa817f66009f100a9cd68953062add7d
"[nvptx] Fix ASM_SPEC workaround for sm_30", and
GCC commit 12fa7641ceed9c9139e2ea7b62c11f3dc5b6f6f4
"[nvptx] Use --no-verify for sm_30".  OK to push, once nvptx-tools ready?

> Use a more robust workaround: verify using sm_35 when misa=sm_30 is specified
> (either implicitly or explicitly).

Thanks for that suggestion!


Grüße
 Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH] rs6000/test: Adjust p9-vec-length-7 sensitive to unroll [PR103196]

2022-04-07 Thread will schmidt via Gcc-patches
On Mon, 2022-02-28 at 13:37 +0800, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> As PR103196 shows, p9-vec-length-full-7.c needs to be adjusted as the
> complete unrolling can happen on some of its loops.  This patch is to
> use pragma "GCC unroll 0" to disable all possible loop unrollings.
> Hope it can help the case not that fragile.

ok

Is the lack of effectiveness of "-fno-unroll-loops" otherwise
understood, or is there further issue behind that option? 

I would
expect the effect of the option, versus the pragma, two to roughly
equivalent.   Obviously it is not.  :-)
> 
> There are some other p9-vec-length* cases, I noticed that some of them
> use either bigger or unknown loop iteration counts, and
> "p9-vec-length-3*" have considered the effects of complete unrolling.
> So I just leave them alone for now.
> 
> Tested on powerpc64-linux-gnu P8 and powerpc64le-linux-gnu P9 and P10.
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -
>   PR testsuite/103196
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/p9-vec-length-7.h: Add DO_PRAGMA macro.
>   * gcc.target/powerpc/p9-vec-length-epil-7.c: Use unroll pragma to
>   disable any unrollings.
>   * gcc.target/powerpc/p9-vec-length-full-7.c: Remove useless option.
>   * gcc.target/powerpc/p9-vec-length.h: Likewise.

I suggest a slight rearrangement and correction.

The -fno-unroll-loops options are removed from *-epil-7.c and *-full-7.c.

p9-vec-length.h  adds the DO_PRAGMA macro.

p9-vec-length-7.h updates (corrects?) whitespace and adds the PRAGMA call for 
"GCC unroll 0" around the test loop. 




> > ---
> >  .../gcc.target/powerpc/p9-vec-length-7.h| 17 +++--
> >  .../gcc.target/powerpc/p9-vec-length-epil-7.c   |  2 +-
> >  .../gcc.target/powerpc/p9-vec-length-full-7.c   |  2 +-
> >  .../gcc.target/powerpc/p9-vec-length.h  |  2 ++
> >  4 files changed, 15 insertions(+), 8 deletions(-)
> > 
> > diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-7.h 
> > b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-7.h
> > index 4ef8f974a04..4f338565619 100644
> > --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-7.h
> > +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-7.h
> > @@ -7,14 +7,19 @@
> >  #define START 1
> >  #define END 59
> >  
> > +/* Note that we use pragma unroll to disable any loop unrollings.  */
> > +
> >  #define test(TYPE) 
> > \
> > -  TYPE x_##TYPE[N] __attribute__((aligned(16)));   
> >  \
> > -  void __attribute__((noinline, noclone)) test_npeel_##TYPE() {
> > \
> > +  TYPE x_##TYPE[N] __attribute__ ((aligned (16))); 
> > \
> > +  void __attribute__ ((noinline, noclone)) test_npeel_##TYPE ()
> > \
> > +  {
> > \
> >  TYPE v = 0;
> > \
> > -for (unsigned int i = START; i < END; i++) {   
> > \
> > -  x_##TYPE[i] = v; 
> > \
> > -  v += 1;  
> > \
> > -}  
> > \
> > +DO_PRAGMA (GCC unroll 0)   
> > \
> > +for (unsigned int i = START; i < END; i++) 
> > \
> > +  {
> > \
> > +   x_##TYPE[i] = v;   \
> > +   v += 1;\
> > +  }
> > \
> >}


Some whitespace fix-ups (ok), and the addition of
the "DO_PRAGMA (GCC unroll 0)".

ok.


> >  
> >  TEST_ALL (test)
> > diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-7.c 
> > b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-7.c
> > index a27ee347ca1..859fedd5679 100644
> > --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-7.c
> > +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-7.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> > -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize 
> > -fno-vect-cost-model -fno-unroll-loops -ffast-math" } */
> > +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize 
> > -fno-vect-cost-model -ffast-math" } */

ok

> >  
> >  /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
> >  
> > diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-7.c 
> > b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-7.c
> > index 89ff38443e7..5fe542bba20 100644
> > --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-7.

[PATCH]AArch64 fix ls64 intrinsics expansion [PR104409]

2022-04-07 Thread Tamar Christina via Gcc-patches
Hi All,

The LS64 intrinsics used a machinery that's not safe to use unless being
called from a pragma instantiation.

This moves the initialization code to a new pragma for arm_acle.h.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

I didn't add the testcase from the PR as it's 65kb but valgrind shows the memory
error is gone.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR target/104409
* config/aarch64/aarch64-builtins.cc (handle_arm_acle_h): New.
(aarch64_general_init_builtins): Move LS64 code.
* config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64): Support
arm_acle.h
* config/aarch64/aarch64-protos.h (handle_arm_acle_h): New.
* config/aarch64/arm_acle.h: Add pragma GCC aarch64 "arm_acle.h".

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 
5217dbdb2ac78bba0a669d22af6d769d1fe91a3d..65d09afc008b891d8b67a443140b4157cfa84c44
 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1641,6 +1641,14 @@ aarch64_init_ls64_builtins (void)
   = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code);
 }
 
+/* Implement #pragma GCC aarch64 "arm_acle.h".  */
+void
+handle_arm_acle_h (void)
+{
+  if (TARGET_LS64)
+aarch64_init_ls64_builtins ();
+}
+
 /* Initialize fpsr fpcr getters and setters.  */
 
 static void
@@ -1730,9 +1738,6 @@ aarch64_general_init_builtins (void)
 
   if (TARGET_MEMTAG)
 aarch64_init_memtag_builtins ();
-
-  if (TARGET_LS64)
-aarch64_init_ls64_builtins ();
 }
 
 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group.  */
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 
caf8e332ea0da0a34f4e96f12a934a5eaeaa1fb2..767ee0c763c56a022089a647c7425afb00644644
 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -302,6 +302,8 @@ aarch64_pragma_aarch64 (cpp_reader *)
 aarch64_sve::handle_arm_sve_h ();
   else if (strcmp (name, "arm_neon.h") == 0)
 handle_arm_neon_h ();
+  else if (strcmp (name, "arm_acle.h") == 0)
+handle_arm_acle_h ();
   else
 error ("unknown %<#pragma GCC aarch64%> option %qs", name);
 }
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
46bade28ed6056bea90067d3af1311f300cea559..c478bb59ae1208329facc74200fe98d00bf93f7c
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -982,6 +982,7 @@ rtx aarch64_general_expand_builtin (unsigned int, tree, 
rtx, int);
 tree aarch64_general_builtin_decl (unsigned, bool);
 tree aarch64_general_builtin_rsqrt (unsigned int);
 tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
+void handle_arm_acle_h (void);
 void handle_arm_neon_h (void);
 
 namespace aarch64_sve {
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 
ecd852f8a46d87787912e6573bf363619812e48f..9775a48c65825b424d3eb442384f5ab87b734fd7
 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -29,6 +29,8 @@
 
 #include 
 
+#pragma GCC aarch64 "arm_acle.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif


-- 
diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 
5217dbdb2ac78bba0a669d22af6d769d1fe91a3d..65d09afc008b891d8b67a443140b4157cfa84c44
 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1641,6 +1641,14 @@ aarch64_init_ls64_builtins (void)
   = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code);
 }
 
+/* Implement #pragma GCC aarch64 "arm_acle.h".  */
+void
+handle_arm_acle_h (void)
+{
+  if (TARGET_LS64)
+aarch64_init_ls64_builtins ();
+}
+
 /* Initialize fpsr fpcr getters and setters.  */
 
 static void
@@ -1730,9 +1738,6 @@ aarch64_general_init_builtins (void)
 
   if (TARGET_MEMTAG)
 aarch64_init_memtag_builtins ();
-
-  if (TARGET_LS64)
-aarch64_init_ls64_builtins ();
 }
 
 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group.  */
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 
caf8e332ea0da0a34f4e96f12a934a5eaeaa1fb2..767ee0c763c56a022089a647c7425afb00644644
 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -302,6 +302,8 @@ aarch64_pragma_aarch64 (cpp_reader *)
 aarch64_sve::handle_arm_sve_h ();
   else if (strcmp (name, "arm_neon.h") == 0)
 handle_arm_neon_h ();
+  else if (strcmp (name, "arm_acle.h") == 0)
+handle_arm_acle_h ();
   else
 error ("unknown %<#pragma GCC aarch64%> option %qs", name);
 }
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
46bade28ed6056bea90067d3af1311f300cea559..c478bb59ae1208329facc74200fe98d00bf93f7c
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch6

Re: [PATCH] Disable float128 tests on VxWorks, PR target/104253.

2022-04-07 Thread Segher Boessenkool
On Thu, Apr 07, 2022 at 08:50:53AM -0500, will schmidt wrote:
> On Thu, 2022-04-07 at 06:00 -0500, Segher Boessenkool wrote:
> > On Thu, Apr 07, 2022 at 12:29:45AM -0400, Michael Meissner wrote:
> > > In PR target/104253, it was pointed out the that test case added as part
> > > of fixing the PR does not work on VxWorks because float128 is not
> > > supported on that system.  I have modified the three tests for float128 so
> > > that they are manually excluded on VxWorks systems.  In looking at the
> > > code, I also added checks in check_effective_target_ppc_ieee128_ok to
> > > disable the systems that will never support VSX instructions which are
> > > required for float128 support (eabi, eabispe, darwin).
> > 
> > It's just one extra to the big list here, but, why do we need all these
> > manual exclusions anyway?  What is broken about the test itself?
> 
> >From the PR, it looks like this test noted an error, not actually a
> failure.  
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104253#c17
> 
> cc1: warning: The '-mfloat128' option may not be fully supported

Aha, thanks for checking.  If that is what causes the error, the test
in target-supports needs robustifying.

> > It would be so much more useful if the tests would help us, instead of
> > producing a lot of extra busy-work.

(^^^ to help improve this)


Segher


Re: [PATCH]AArch64 fix ls64 intrinsics expansion [PR104409]

2022-04-07 Thread Richard Sandiford via Gcc-patches
Tamar Christina  writes:
> Hi All,
>
> The LS64 intrinsics used a machinery that's not safe to use unless being
> called from a pragma instantiation.
>
> This moves the initialization code to a new pragma for arm_acle.h.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> I didn't add the testcase from the PR as it's 65kb but valgrind shows the 
> memory
> error is gone.
>
> Ok for master?

OK, thanks.

Richard

>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR target/104409
> * config/aarch64/aarch64-builtins.cc (handle_arm_acle_h): New.
> (aarch64_general_init_builtins): Move LS64 code.
> * config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64): Support
> arm_acle.h
> * config/aarch64/aarch64-protos.h (handle_arm_acle_h): New.
> * config/aarch64/arm_acle.h: Add pragma GCC aarch64 "arm_acle.h".
>
> --- inline copy of patch --
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> b/gcc/config/aarch64/aarch64-builtins.cc
> index 
> 5217dbdb2ac78bba0a669d22af6d769d1fe91a3d..65d09afc008b891d8b67a443140b4157cfa84c44
>  100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -1641,6 +1641,14 @@ aarch64_init_ls64_builtins (void)
>= aarch64_general_add_builtin (data[i].name, data[i].type, 
> data[i].code);
>  }
>
> +/* Implement #pragma GCC aarch64 "arm_acle.h".  */
> +void
> +handle_arm_acle_h (void)
> +{
> +  if (TARGET_LS64)
> +aarch64_init_ls64_builtins ();
> +}
> +
>  /* Initialize fpsr fpcr getters and setters.  */
>
>  static void
> @@ -1730,9 +1738,6 @@ aarch64_general_init_builtins (void)
>
>if (TARGET_MEMTAG)
>  aarch64_init_memtag_builtins ();
> -
> -  if (TARGET_LS64)
> -aarch64_init_ls64_builtins ();
>  }
>
>  /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group.  */
> diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
> index 
> caf8e332ea0da0a34f4e96f12a934a5eaeaa1fb2..767ee0c763c56a022089a647c7425afb00644644
>  100644
> --- a/gcc/config/aarch64/aarch64-c.cc
> +++ b/gcc/config/aarch64/aarch64-c.cc
> @@ -302,6 +302,8 @@ aarch64_pragma_aarch64 (cpp_reader *)
>  aarch64_sve::handle_arm_sve_h ();
>else if (strcmp (name, "arm_neon.h") == 0)
>  handle_arm_neon_h ();
> +  else if (strcmp (name, "arm_acle.h") == 0)
> +handle_arm_acle_h ();
>else
>  error ("unknown %<#pragma GCC aarch64%> option %qs", name);
>  }
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 
> 46bade28ed6056bea90067d3af1311f300cea559..c478bb59ae1208329facc74200fe98d00bf93f7c
>  100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -982,6 +982,7 @@ rtx aarch64_general_expand_builtin (unsigned int, tree, 
> rtx, int);
>  tree aarch64_general_builtin_decl (unsigned, bool);
>  tree aarch64_general_builtin_rsqrt (unsigned int);
>  tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
> +void handle_arm_acle_h (void);
>  void handle_arm_neon_h (void);
>
>  namespace aarch64_sve {
> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
> index 
> ecd852f8a46d87787912e6573bf363619812e48f..9775a48c65825b424d3eb442384f5ab87b734fd7
>  100644
> --- a/gcc/config/aarch64/arm_acle.h
> +++ b/gcc/config/aarch64/arm_acle.h
> @@ -29,6 +29,8 @@
>
>  #include 
>
> +#pragma GCC aarch64 "arm_acle.h"
> +
>  #ifdef __cplusplus
>  extern "C" {
>  #endif
>
>
> --


[PATCH] fold-const: Fix up make_range_step [PR105189]

2022-04-07 Thread Jakub Jelinek via Gcc-patches
Hi!

The following testcase is miscompiled, because fold_truth_andor
incorrectly folds
(unsigned) foo () >= 0U && 1
into
foo () >= 0
For the unsigned comparison (which is useless in this case,
as >= 0U is always true, but hasn't been folded yet), previous
make_range_step derives exp (unsigned) foo () and
+[0U, -]
range for it.  Next we process the NOP_EXPR.  We have special code
for unsigned to signed casts, already earlier punt if low or high
aren't representable in arg0_type or if it is a narrowing conversion.
For the signed to unsigned casts, I think if high is specified we
are still fine, as we punt for non-representable values in arg0_type,
n_high is then still representable and so was smaller or equal to
signed maximum and either low is not present (equivalent to 0U), or
low must be smaller or equal to high and so for unsigned exp
+[low, high] the signed exp +[n_low, n_high] will be correct.
Similarly, if both low and high aren't specified (always true or
always false), it is ok too.
But if we have for unsigned exp +[low, -] or -[low, -], using
+[n_low, -] or -[n_high, -] is incorrect.  Because low is smaller
or equal to signed maximum and high is unspecified (i.e. unsigned
maximum), when signed that range is a union of +[n_low, -] and
+[-, -1] which is equivalent to -[0, n_low-1], unless low
is 0, in that case we can treat it as [-, -].

Bootstrapped/regtested on powerpc64le-linux, ok for trunk?

2022-04-07  Jakub Jelinek  

PR tree-optimization/105189
* fold-const.cc (make_range_step): Fix up handling of
(unsigned) x +[low, -] ranges for signed x if low fits into
typeof (x).

* g++.dg/torture/pr105189.C: New test.

--- gcc/fold-const.cc.jj2022-04-04 12:09:00.317116906 +0200
+++ gcc/fold-const.cc   2022-04-07 11:32:41.185313269 +0200
@@ -5212,7 +5212,7 @@ make_range_step (location_t loc, enum tr
n_high = fold_convert_loc (loc, arg0_type, n_high);
 
   /* If we're converting arg0 from an unsigned type, to exp,
-a signed type,  we will be doing the comparison as unsigned.
+a signed type, we will be doing the comparison as unsigned.
 The tests above have already verified that LOW and HIGH
 are both positive.
 
@@ -5274,6 +5274,32 @@ make_range_step (location_t loc, enum tr
}
}
 
+  /* Otherwise, if we are converting arg0 from signed type, to exp,
+an unsigned type, we will do the comparison as signed.  If
+high is non-NULL, we punt above if it doesn't fit in the signed
+type, so if we get through here, +[-, high] or +[low, high] are
+equivalent to +[-, n_high] or +[n_low, n_high].  Similarly,
++[-, -] or -[-, -] are equivalent too.  But if low is specified and
+high is not, the +[low, -] range is equivalent to union of
++[n_low, -] and +[-, -1] ranges, so +[low, -] is equivalent to
+-[0, n_low-1] and similarly -[low, -] to +[0, n_low-1], except for
+low being 0, which should be treated as [-, -].  */
+  else if (TYPE_UNSIGNED (exp_type)
+  && !TYPE_UNSIGNED (arg0_type)
+  && low
+  && !high)
+   {
+ if (integer_zerop (low))
+   n_low = NULL_TREE;
+ else
+   {
+ n_high = fold_build2_loc (loc, PLUS_EXPR, arg0_type,
+   n_low, build_int_cst (arg0_type, -1));
+ n_low = build_zero_cst (arg0_type);
+ in_p = !in_p;
+   }
+   }
+
   *p_low = n_low;
   *p_high = n_high;
   *p_in_p = in_p;
--- gcc/testsuite/g++.dg/torture/pr105189.C.jj  2022-04-07 11:40:06.195098778 
+0200
+++ gcc/testsuite/g++.dg/torture/pr105189.C 2022-04-07 11:39:50.177322461 
+0200
@@ -0,0 +1,19 @@
+// PR tree-optimization/105189
+// { dg-do run }
+
+int
+foo ()
+{
+  return -1;
+}
+
+int
+main ()
+{
+  int c = foo () >= 0U && 1;
+  if (c != 1)
+__builtin_abort ();
+  int d = foo () >= 3U && 1;
+  if (d != 1)
+__builtin_abort ();
+}

Jakub



[PATCH] c: Error on va_arg with function type [PR105149]

2022-04-07 Thread Jakub Jelinek via Gcc-patches
Hi!

In the PR Joseph said that the C standard for va_arg talks about
pointers to object type and as a function type is not object type,
it is invalid.

The following patch diagnoses it in the FE, instead of ICEing later on
when optimizations are turned on (and with -O0 doing something weird
at runtime).

Bootstrapped/regtested on powerpc64le-linux, ok for trunk?

2022-04-07  Jakub Jelinek  

PR c/105149
* c-typeck.cc (c_build_va_arg): Reject function types.

* gcc.dg/pr105149.c: New test.

--- gcc/c/c-typeck.cc.jj2022-03-23 10:29:20.172967605 +0100
+++ gcc/c/c-typeck.cc   2022-04-07 12:26:17.236469809 +0200
@@ -15896,6 +15896,12 @@ c_build_va_arg (location_t loc1, tree ex
"type %qT", type);
   return error_mark_node;
 }
+  else if (TREE_CODE (type) == FUNCTION_TYPE)
+{
+  error_at (loc2, "second argument to % is a function type %qT",
+   type);
+  return error_mark_node;
+}
   else if (warn_cxx_compat && TREE_CODE (type) == ENUMERAL_TYPE)
 warning_at (loc2, OPT_Wc___compat,
"C++ requires promoted type, not enum type, in %");
--- gcc/testsuite/gcc.dg/pr105149.c.jj  2022-04-07 12:39:08.202711511 +0200
+++ gcc/testsuite/gcc.dg/pr105149.c 2022-04-07 12:38:21.418364583 +0200
@@ -0,0 +1,16 @@
+/* PR c/105149 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include 
+
+void
+foo (int s, ...)
+{
+  int e;
+  va_list ap;
+
+  va_start (ap, s);
+  e = va_arg (ap, int (void)) ();  /* { dg-error "second argument to 
'va_arg' is a function type" } */
+  va_end (ap);
+}

Jakub



Re: [PATCH] c: Error on va_arg with function type [PR105149]

2022-04-07 Thread Marek Polacek via Gcc-patches
On Thu, Apr 07, 2022 at 05:52:53PM +0200, Jakub Jelinek wrote:
> Hi!
> 
> In the PR Joseph said that the C standard for va_arg talks about
> pointers to object type and as a function type is not object type,
> it is invalid.
> 
> The following patch diagnoses it in the FE, instead of ICEing later on
> when optimizations are turned on (and with -O0 doing something weird
> at runtime).
> 
> Bootstrapped/regtested on powerpc64le-linux, ok for trunk?

Ok, thanks.
 
> 2022-04-07  Jakub Jelinek  
> 
>   PR c/105149
>   * c-typeck.cc (c_build_va_arg): Reject function types.
> 
>   * gcc.dg/pr105149.c: New test.
> 
> --- gcc/c/c-typeck.cc.jj  2022-03-23 10:29:20.172967605 +0100
> +++ gcc/c/c-typeck.cc 2022-04-07 12:26:17.236469809 +0200
> @@ -15896,6 +15896,12 @@ c_build_va_arg (location_t loc1, tree ex
>   "type %qT", type);
>return error_mark_node;
>  }
> +  else if (TREE_CODE (type) == FUNCTION_TYPE)
> +{
> +  error_at (loc2, "second argument to % is a function type %qT",
> + type);
> +  return error_mark_node;
> +}
>else if (warn_cxx_compat && TREE_CODE (type) == ENUMERAL_TYPE)
>  warning_at (loc2, OPT_Wc___compat,
>   "C++ requires promoted type, not enum type, in %");
> --- gcc/testsuite/gcc.dg/pr105149.c.jj2022-04-07 12:39:08.202711511 
> +0200
> +++ gcc/testsuite/gcc.dg/pr105149.c   2022-04-07 12:38:21.418364583 +0200
> @@ -0,0 +1,16 @@
> +/* PR c/105149 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include 
> +
> +void
> +foo (int s, ...)
> +{
> +  int e;
> +  va_list ap;
> +
> +  va_start (ap, s);
> +  e = va_arg (ap, int (void)) ();/* { dg-error "second argument to 
> 'va_arg' is a function type" } */
> +  va_end (ap);
> +}

Marek



[PATCH] libgcc: IA64: don't compile glibc-based unwinder without libc headers

2022-04-07 Thread Sergei Trofimovich via Gcc-patches
From: Sergei Trofimovich 

In --without-headers mode gcc fails to bootstrap on libgcc as:

/build/build/./gcc/xgcc -B/build/build/./gcc/ ... -Dinhibit_libc -c 
fde-glibc.c
../../../gcc-12-20220403/libgcc/config/ia64/fde-glibc.c:33:10:
fatal error: stdlib.h: No such file or directory

Most other linux targets are able to build the --without-headers
compiler without additional effort. This change adds IA64 to the fold.

The change drops part of the code that relies on DYNAMIC glibc
section traversal for backtraces.

Tested bootstrap of ia64-unknown-linux-gnu with and without libc
headers present.

libgcc/
* config/ia64/fde-glibc.c: Make a no-op in inhibit_libc mode.
---
 libgcc/config/ia64/fde-glibc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libgcc/config/ia64/fde-glibc.c b/libgcc/config/ia64/fde-glibc.c
index 9caac2bca24..bd74847fa85 100644
--- a/libgcc/config/ia64/fde-glibc.c
+++ b/libgcc/config/ia64/fde-glibc.c
@@ -22,20 +22,21 @@
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
.  */
 
 /* Locate the FDE entry for a given address, using glibc ld.so routines
to avoid register/deregister calls at DSO load/unload.  */
 
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE 1
 #endif
 #include "config.h"
+#ifndef inhibit_libc
 #include 
 #include 
 #include 
 #include "unwind-ia64.h"
 
 #if __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 2) \
 || (__GLIBC__ == 2 && __GLIBC_MINOR__ == 2 && !defined(DT_CONFIG))
 # error You need GLIBC 2.2.4 or later on IA-64 Linux
 #endif
 
@@ -152,10 +153,11 @@ _Unwind_FindTableEntry (void *pc, unw_word *segment_base, 
unw_word *gp,
   data.pc = (Elf64_Addr) pc;
   data.segment_base = segment_base;
   data.gp = gp;
   data.ret = NULL;
 
   if (dl_iterate_phdr (_Unwind_IteratePhdrCallback, &data) < 0)
 return NULL;
 
   return data.ret;
 }
+#endif
-- 
2.35.1



Re: [PATCH] fold-const: Fix up make_range_step [PR105189]

2022-04-07 Thread Richard Biener via Gcc-patches



> Am 07.04.2022 um 17:47 schrieb Jakub Jelinek via Gcc-patches 
> :
> 
> Hi!
> 
> The following testcase is miscompiled, because fold_truth_andor
> incorrectly folds
> (unsigned) foo () >= 0U && 1
> into
> foo () >= 0
> For the unsigned comparison (which is useless in this case,
> as >= 0U is always true, but hasn't been folded yet), previous
> make_range_step derives exp (unsigned) foo () and
> +[0U, -]
> range for it.  Next we process the NOP_EXPR.  We have special code
> for unsigned to signed casts, already earlier punt if low or high
> aren't representable in arg0_type or if it is a narrowing conversion.
> For the signed to unsigned casts, I think if high is specified we
> are still fine, as we punt for non-representable values in arg0_type,
> n_high is then still representable and so was smaller or equal to
> signed maximum and either low is not present (equivalent to 0U), or
> low must be smaller or equal to high and so for unsigned exp
> +[low, high] the signed exp +[n_low, n_high] will be correct.
> Similarly, if both low and high aren't specified (always true or
> always false), it is ok too.
> But if we have for unsigned exp +[low, -] or -[low, -], using
> +[n_low, -] or -[n_high, -] is incorrect.  Because low is smaller
> or equal to signed maximum and high is unspecified (i.e. unsigned
> maximum), when signed that range is a union of +[n_low, -] and
> +[-, -1] which is equivalent to -[0, n_low-1], unless low
> is 0, in that case we can treat it as [-, -].
> 
> Bootstrapped/regtested on powerpc64le-linux, ok for trunk?

Ok.

Thanks,
Richard 

> 2022-04-07  Jakub Jelinek  
> 
>PR tree-optimization/105189
>* fold-const.cc (make_range_step): Fix up handling of
>(unsigned) x +[low, -] ranges for signed x if low fits into
>typeof (x).
> 
>* g++.dg/torture/pr105189.C: New test.
> 
> --- gcc/fold-const.cc.jj2022-04-04 12:09:00.317116906 +0200
> +++ gcc/fold-const.cc2022-04-07 11:32:41.185313269 +0200
> @@ -5212,7 +5212,7 @@ make_range_step (location_t loc, enum tr
>n_high = fold_convert_loc (loc, arg0_type, n_high);
> 
>   /* If we're converting arg0 from an unsigned type, to exp,
> - a signed type,  we will be doing the comparison as unsigned.
> + a signed type, we will be doing the comparison as unsigned.
> The tests above have already verified that LOW and HIGH
> are both positive.
> 
> @@ -5274,6 +5274,32 @@ make_range_step (location_t loc, enum tr
>}
>}
> 
> +  /* Otherwise, if we are converting arg0 from signed type, to exp,
> + an unsigned type, we will do the comparison as signed.  If
> + high is non-NULL, we punt above if it doesn't fit in the signed
> + type, so if we get through here, +[-, high] or +[low, high] are
> + equivalent to +[-, n_high] or +[n_low, n_high].  Similarly,
> + +[-, -] or -[-, -] are equivalent too.  But if low is specified and
> + high is not, the +[low, -] range is equivalent to union of
> + +[n_low, -] and +[-, -1] ranges, so +[low, -] is equivalent to
> + -[0, n_low-1] and similarly -[low, -] to +[0, n_low-1], except for
> + low being 0, which should be treated as [-, -].  */
> +  else if (TYPE_UNSIGNED (exp_type)
> +   && !TYPE_UNSIGNED (arg0_type)
> +   && low
> +   && !high)
> +{
> +  if (integer_zerop (low))
> +n_low = NULL_TREE;
> +  else
> +{
> +  n_high = fold_build2_loc (loc, PLUS_EXPR, arg0_type,
> +n_low, build_int_cst (arg0_type, -1));
> +  n_low = build_zero_cst (arg0_type);
> +  in_p = !in_p;
> +}
> +}
> +
>   *p_low = n_low;
>   *p_high = n_high;
>   *p_in_p = in_p;
> --- gcc/testsuite/g++.dg/torture/pr105189.C.jj2022-04-07 
> 11:40:06.195098778 +0200
> +++ gcc/testsuite/g++.dg/torture/pr105189.C2022-04-07 11:39:50.177322461 
> +0200
> @@ -0,0 +1,19 @@
> +// PR tree-optimization/105189
> +// { dg-do run }
> +
> +int
> +foo ()
> +{
> +  return -1;
> +}
> +
> +int
> +main ()
> +{
> +  int c = foo () >= 0U && 1;
> +  if (c != 1)
> +__builtin_abort ();
> +  int d = foo () >= 3U && 1;
> +  if (d != 1)
> +__builtin_abort ();
> +}
> 
>Jakub
> 


Re: [PATCH] Add condition coverage profiling

2022-04-07 Thread Sebastian Huber

Hello Jørgen,

there could be an issue with conditions in for loops:

3:  189:  for (int a = 0; a <= 1; ++a) {
branch  0 taken 2
branch  1 taken 1 (fallthrough)
conditions covered 0/2
condition  0 not covered (true)
condition  0 not covered (false)

--
embedded brains GmbH
Herr Sebastian HUBER
Dornierstr. 4
82178 Puchheim
Germany
email: sebastian.hu...@embedded-brains.de
phone: +49-89-18 94 741 - 16
fax:   +49-89-18 94 741 - 08

Registergericht: Amtsgericht München
Registernummer: HRB 157899
Vertretungsberechtigte Geschäftsführer: Peter Rasmussen, Thomas Dörfler
Unsere Datenschutzerklärung finden Sie hier:
https://embedded-brains.de/datenschutzerklaerung/


[AArch64] PR target/105157 Increase number of cores TARGET_CPU_DEFAULT can encode

2022-04-07 Thread Andre Vieira (lists) via Gcc-patches

Hi,

This addresses the compile-time increase seen in the PR target/105157. 
This was being caused by selecting the wrong core tuning, as when we 
added the latest AArch64 the TARGET_CPU_generic tuning was pushed beyond 
the 0x3f mask we used to encode both target cpu and attributes into 
TARGET_CPU_DEFAULT.


This is a quick fix to increase the number of allowed cores for now late 
in stage 4, but for next GCC we plan to fix this in a more suitable and 
maintainable manner.


gcc/ChangeLog:

    PR target/105157
    * config.gcc: Shift ext_mask by TARGET_CPU_NBITS.
    * config/aarch64/aarch64.h (TARGET_CPU_NBITS): New macro.
    (TARGET_CPU_MASK): Likewise.
    (TARGET_CPU_DEFAULT): Use TARGET_CPU_NBITS.
    * config/aarch64/aarch64.cc (aarch64_get_tune_cpu): Use 
TARGET_CPU_MASK.

    (aarch64_get_arch): Likewise.
    (aarch64_override_options): Use TARGET_CPU_NBITS.
    (aarch64_run_selftests): Add test to guarantee TARGET_CPU_NBITS 
is big enough.
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 
2cc5aeec9e46aacc46e4e509d01f57f9f6585169..8946984f5e93cd6fe5364b44e74c64e714ba1d6a
 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4239,7 +4239,7 @@ case "${target}" in
ext_val=`echo $ext_val | sed -e 
's/[a-z0-9]\+//'`
  done
 
- ext_mask="(("$ext_mask") << 6)"
+ ext_mask="(("$ext_mask") << TARGET_CPU_NBITS)"
  if [ x"$base_id" != x ]; then
target_cpu_cname="TARGET_CPU_$base_id | 
$ext_mask"
  fi
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
efa46ac0b8799b5849b609d591186e26e5cb37ff..97b2d38dac9f13717e90f4f6996ab94814922950
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -813,10 +813,17 @@ enum target_cpus
   TARGET_CPU_generic
 };
 
+
+/* Define how many bits are used to represent the CPU in TARGET_CPU_DEFAULT.
+   This needs to be big enough to fit the value of TARGET_CPU_generic.
+   All bits after this are used to represent the AARCH64_CPU_DEFAULT_FLAGS.  */
+#define TARGET_CPU_NBITS 8
+#define TARGET_CPU_MASK ((1 << TARGET_CPU_NBITS) - 1)
+
 /* If there is no CPU defined at configure, use generic as default.  */
 #ifndef TARGET_CPU_DEFAULT
 #define TARGET_CPU_DEFAULT \
-  (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << 6))
+  (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << TARGET_CPU_NBITS))
 #endif
 
 /* If inserting NOP before a mult-accumulate insn remember to adjust the
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
d66626748e5edc46e46edf10e243114a9f74be97..9d83e007d7a31455c167bf21124cf9fbc0d31788
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -18062,12 +18062,12 @@ aarch64_get_tune_cpu (enum aarch64_processor cpu)
   if (cpu != aarch64_none)
 return &all_cores[cpu];
 
-  /* The & 0x3f is to extract the bottom 6 bits that encode the
- default cpu as selected by the --with-cpu GCC configure option
+  /* The & TARGET_CPU_MASK is to extract the bottom TARGET_CPU_NBITS bits that
+ encode the default cpu as selected by the --with-cpu GCC configure option
  in config.gcc.
  ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
  flags mechanism should be reworked to make it more sane.  */
-  return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
+  return &all_cores[TARGET_CPU_DEFAULT & TARGET_CPU_MASK];
 }
 
 /* Return the architecture corresponding to the enum ARCH.
@@ -18079,7 +18079,8 @@ aarch64_get_arch (enum aarch64_arch arch)
   if (arch != aarch64_no_arch)
 return &all_architectures[arch];
 
-  const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
+  const struct processor *cpu
+= &all_cores[TARGET_CPU_DEFAULT & TARGET_CPU_MASK];
 
   return &all_architectures[cpu->arch];
 }
@@ -18166,7 +18167,7 @@ aarch64_override_options (void)
{
  /* Get default configure-time CPU.  */
  selected_cpu = aarch64_get_tune_cpu (aarch64_none);
- aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
+ aarch64_isa_flags = TARGET_CPU_DEFAULT >> TARGET_CPU_NBITS;
}
 
   if (selected_tune)
@@ -27260,6 +27261,9 @@ aarch64_run_selftests (void)
 {
   aarch64_test_loading_full_dump ();
   aarch64_test_fractional_cost ();
+  /* Check whether the TARGET_CPU_MASK is big enough to fit all of the definded
+ CPUs.  */
+  ASSERT_TRUE (TARGET_CPU_generic < TARGET_CPU_MASK);
 }
 
 } // namespace selftest


Re: [PATCH] tree-optimization/105175 - avoid -Wvector-operation-performance

2022-04-07 Thread Martin Sebor via Gcc-patches

On 4/7/22 00:59, Richard Biener wrote:

On Wed, 6 Apr 2022, Martin Sebor wrote:


On 4/6/22 03:23, Richard Biener wrote:

This avoids -Wvector-operation-performance diagnostics for vectorizer
produced code.  It's unfortunate the warning_at code in
tree-vect-generic.cc needs adjustments but the diagnostic suppression
code doesn't magically suppress those otherwise.


It seems like it should, as long as the statement location hasn't
changed after the suppress_diagnostic call in tree-vect-stmts.cc.


The location doesn't change.



Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Martin/David - did I miss something obvious when doing the
tree-vect-generic.cc adjustment?


The only thing I can think of is that because it's not handled in
diagnostic-spec.cc, -Wvector-operation-performance is lumped in with
all other generic warnings that also aren't handled.  It means that
they are all treated as a group.  Whether or not that's what we want
for this specific warning might be something to consider.


So when I call suppress_warning (gimple *, ..) the suppress_warning_at
call constructs a nowarn_spec_t with NW_OTHER, it queries the
nowarn_map where it doesn't find anything yet, and goes on
with

   nowarn_map->put (loc, optspec);
   return true;

suppress_warning then (since supp is true anyway) goes on with

   set_no_warning_bit (stmt, supp);

which is likely what my changes to tree-vect-generic.cc in the end
key on.  When I simply invoke

warning_at (loc, OPT_Wvector_operation_performance,
"...")

I see nowhere that nowarn_spec_t::nowarn_spec_t is invoked, nor
is warning_suppressed_at.  Maybe I'm missing that being done
but I think that's by design?  It at least was surprising to me.


It's been a while so I'm hazy on the details.  I'd initially hoped
to have warning_at(loc, opt, ...) automatically disable warning opt
at loc.  It turned out that there are calls to warning_at() with
same loc and opt where we want to issue the warning (like for
distinct arguments in the same function call).  But I don't recall
trying to test warning_suppressed_at() first, before issuing
a warning.  That would make sense to me.  The only lightly POC
patch below doesn't seem to cause too much fallout:

diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index 73324a728fe..857d70e5d2e 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "selftest-diagnostic.h"
 #include "opts.h"
 #include "cpplib.h"
+#include "tree.h"

 #ifdef HAVE_TERMIOS_H
 # include 
@@ -1337,6 +1338,10 @@ diagnostic_report_diagnostic (diagnostic_context 
*context,

   if (!diagnostic_enabled (context, diagnostic))
 return false;

+  if (!RESERVED_LOCATION_P (location)
+  && warning_suppressed_at (location, 
(opt_code)diagnostic->option_index))

+return false;
+
   if (!report_warning_p && diagnostic->m_iinfo.m_allsyslocs)
 /* Bail if the warning is not to be reported because all locations
in the inlining stack (if there is one) are in system headers.  */


Of course since we lack a warning_at (gimple *, ..) overload
or alternatively extending rich-location to cover diagnostic
suppression contexts, doing this would only work for stmts with
a location that doesn't fall back to that of the current
declaration (for UNKNOWN_LOCATION loc).


David and I discussed adding warning_at(gimple*, ...) and
warning_at(tree, ...) overloads but decided to go with a narrower
API in the initial patch and to consider extending it  later.  It
still seems like a useful feature.



So my main question was if the diagnostic suppression is supposed
to be transparently handled by warning_at (...) or whether indeed
explicit guards need to be added to each diagnostic emission.

As I'm now doing

if (!warning_suppressed_p (gsi_stmt (*gsi),
  OPT_Wvector_operation_performance))


I get to get_nowarn_spec for the stmt which will return NULL
because the no-warning bit is set (but it's always set in the
warning suppression call when done on a stmt!)

When I'm doing

   if (!warning_suppressed_at (loc,
  OPT_Wvector_operation_performance))

then it also suppresses the diagnostic but I think I'm not supposed
to call that function since it will ICE on UNKNOWN_LOCATION and it
would lack the fallback to the nowarning bit for stmts without
location.

That is - the stmt-based query looks correct to me but it will
always use the non-specific flag, at least when I suppress the
diagnostic based on the stmt?!  I think suppress_warning (gimple *,...)
should not set the no-warning bit when it succeeded in amending
the nowarn_map?

So, again, was the requirement to explicitely guard warning_at ()
calls with warning_suppressed_p () calls by design?  If it wasn't
intentional then I think we need to somehow allow to specify
a gimple * or tree as location argument to warning_at, since
we have rich-loc

[RFC 0/7] RISCV: Implement ISA Manual Table A.6 Mappings

2022-04-07 Thread Patrick O'Neill
This series should not be applied as it causes an ABI break.

This RFC aims to bring the RISCV atomics implementation in line with
the recommended mapping present in table A.6 of the ISA manual.

https://github.com/riscv/riscv-isa-manual/blob/c7cf84547b3aefacab5463add1734c1602b67a49/src/memory.tex#L1083-L1157

This mapping being implemented would result in an ABI break due to
libatomic's LR.aq/SC.rl mapping and the A.6's SEQ_CST store mapping
not enforcing SEQ_CST when placed next to eachother.

This can be seen using the following Herd7 litmus test:

RISCV W-RMW

(*
Seq_cst store along with LR.aq/SC.rl is insufficient for a
seq_cst store, seq_cst RMW mapping.
*)

{
0:x7=A; 0:x8=B;
1:x7=A; 1:x8=B;
}

   P0  | P1  ;
   ori x1,x0,1 | ori x1,x0,1 ;
   fence rw,w  | fence rw,rw ;
   sw x1,0(x8) | sw x1,0(x7) ;
   lr.w.aq x3,0(x7)| fence rw,rw ;
   sc.w.rl x1,x1,0(x7) | lw x2,0(x8) ;

exists (0:x3=0 /\ 1:x2=0)

In GCC for SEQ_CST store, we currently emit fence iorw,ow + amoswap.aq,
which successfully enforces ordering for the given litmus test. This
will only be a problem in GCC if we move the SEQ_CST store to the A.6
mapping.

Note: LLVM implements fence rw,w + sw
https://godbolt.org/z/n68P7ne1W

That means that LLVM isn't compatible with libatomic's LR.aq/SC.rl.

* PR target/89835: The RISC-V target uses amoswap.w for relaxed stores

Patrick O'Neill (7):
  RISCV: Enforce Libatomic LR/SC SEQ_CST
  RISCV: Enforce Atomic Compare Exchange SEQ_CST
  RISCV: Add AMO release bits
  RISCV: Optimize AMO Ops
  RISCV: Optimize LR/SC Pairs
  RISCV: Optimize Atomic Stores
  RISCV: Relax mem_thread_fence

 gcc/config/riscv/riscv-protos.h   |  6 ++
 gcc/config/riscv/riscv.cc | 93 +--
 gcc/config/riscv/sync.md  | 46 ++---
 .../gcc.target/riscv/amo-thread-fence-1.c |  6 ++
 .../gcc.target/riscv/amo-thread-fence-2.c |  6 ++
 .../gcc.target/riscv/amo-thread-fence-3.c |  6 ++
 .../gcc.target/riscv/amo-thread-fence-4.c |  6 ++
 .../gcc.target/riscv/amo-thread-fence-5.c |  6 ++
 .../gcc.target/riscv/inline-atomics-model-1.c | 12 +++
 .../gcc.target/riscv/inline-atomics-model-2.c | 12 +++
 .../gcc.target/riscv/inline-atomics-model-3.c | 12 +++
 .../gcc.target/riscv/inline-atomics-model-4.c | 12 +++
 .../gcc.target/riscv/inline-atomics-model-5.c | 12 +++
 gcc/testsuite/gcc.target/riscv/pr89835.c  |  9 ++
 libgcc/config/riscv/atomic.c  |  4 +-
 15 files changed, 223 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/amo-thread-fence-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/amo-thread-fence-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/amo-thread-fence-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/amo-thread-fence-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/amo-thread-fence-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-model-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-model-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-model-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-model-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-model-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr89835.c

-- 
2.25.1



[RFC 1/7] RISCV: Enforce Libatomic LR/SC SEQ_CST

2022-04-07 Thread Patrick O'Neill
Replace LR.aq/SC.rl pairs with the SEQ_CST LR.aqrl/SC.rl pairs
recommended by table A.6 of the ISA manual.

2022-03-31 Patrick O'Neill 

* atomic.c: Change LR.aq/SC.rl pairs into sequentially
consistent LR.aqrl/SC.rl pair.

Signed-off-by: Patrick O'Neill 
---
 libgcc/config/riscv/atomic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libgcc/config/riscv/atomic.c b/libgcc/config/riscv/atomic.c
index 7007e7a20e4..834d0d4380e 100644
--- a/libgcc/config/riscv/atomic.c
+++ b/libgcc/config/riscv/atomic.c
@@ -39,7 +39,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 unsigned old, tmp1, tmp2;  \
\
 asm volatile ("1:\n\t" \
- "lr.w.aq %[old], %[mem]\n\t"  \
+ "lr.w.aqrl %[old], %[mem]\n\t"\
  #insn " %[tmp1], %[old], %[value]\n\t"\
  invert\
  "and %[tmp1], %[tmp1], %[mask]\n\t"   \
@@ -73,7 +73,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 unsigned old, tmp1;
\
\
 asm volatile ("1:\n\t" \
- "lr.w.aq %[old], %[mem]\n\t"  \
+ "lr.w.aqrl %[old], %[mem]\n\t"\
  "and %[tmp1], %[old], %[mask]\n\t"\
  "bne %[tmp1], %[o], 1f\n\t"   \
  "and %[tmp1], %[old], %[not_mask]\n\t"\
-- 
2.25.1



[RFC 2/7] RISCV: Enforce Atomic Compare Exchange SEQ_CST

2022-04-07 Thread Patrick O'Neill
This patch enforces SEQ_CST for atomic compare_exchange ops.

Replace Fence/LR.aq/SC.aq pairs with strong SEQ_CST LR.aqrl/SC.rl pairs
recommended by table A.6 of the ISA manual.

2022-03-31 Patrick O'Neill 

* sync.md: Change LR.aq/SC.rl pairs into sequentially
consistent LR.aqrl/SC.rl pair.

Signed-off-by: Patrick O'Neill 
---
 gcc/config/riscv/sync.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 86b41e6b00a..cb4242d7b2f 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -115,8 +115,8 @@
 UNSPEC_COMPARE_AND_SWAP))
(clobber (match_scratch:GPR 6 "=&r"))]
   "TARGET_ATOMIC"
-  "%F5 1: lr.%A5 %0,%1; bne %0,%z2,1f; sc.%A4 %6,%z3,%1; bnez %6,1b; 
1:"
-  [(set (attr "length") (const_int 20))])
+  
"1:\;lr..aqrl\t%0,%1\;bne\t%0,%z2,1f\;sc..rl\t%6,%z3,%1\;bnez\t%6,1b\;1:"
+  [(set (attr "length") (const_int 16))])
 
 (define_expand "atomic_compare_and_swap"
   [(match_operand:SI 0 "register_operand" "")   ;; bool output
-- 
2.25.1



[RFC 3/7] RISCV: Add AMO release bits

2022-04-07 Thread Patrick O'Neill
This patch sets the relevant .rl bits on amo operations.

2022-03-31 Patrick O'Neill 

* riscv.cc (riscv_print_operand): change behavior of %A to
include release bits.

Signed-off-by: Patrick O'Neill 
---
 gcc/config/riscv/riscv.cc | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index ee756aab694..813e771bec7 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3652,8 +3652,13 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   break;
 
 case 'A':
-  if (riscv_memmodel_needs_amo_acquire ((enum memmodel) INTVAL (op)))
+  if (riscv_memmodel_needs_amo_acquire ((enum memmodel) INTVAL (op)) &&
+ riscv_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
+   fputs (".aqrl", file);
+  else if (riscv_memmodel_needs_amo_acquire ((enum memmodel) INTVAL (op)))
fputs (".aq", file);
+  else if (riscv_memmodel_needs_release_fence ((enum memmodel) INTVAL 
(op)))
+   fputs (".rl", file);
   break;
 
 case 'F':
-- 
2.25.1



[RFC 4/7] RISCV: Optimize AMO Ops

2022-04-07 Thread Patrick O'Neill
Atomic operations with the appropriate bits set already enfore release
semantics. Remove unnecessary release fences from atomic ops.

This change brings amo ops in line with table A.6 of the ISA manual.

2022-03-31 Patrick O'Neill 

* riscv.cc (riscv_memmodel_needs_amo_acquire): Change function
name.
* riscv.cc (riscv_print_operand): Remove unneeded %F case.
* sync.md: Remove unneeded fences.

Signed-off-by: Patrick O'Neill 
---
 gcc/config/riscv/riscv.cc | 16 +---
 gcc/config/riscv/sync.md  | 16 
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 813e771bec7..6da602f1b59 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3589,11 +3589,11 @@ riscv_memmodel_needs_amo_acquire (enum memmodel model)
 }
 }
 
-/* Return true if a FENCE should be emitted to before a memory access to
-   implement the release portion of memory model MODEL.  */
+/* Return true if the .RL suffix should be added to an AMO to implement the
+   release portion of memory model MODEL.  */
 
 static bool
-riscv_memmodel_needs_release_fence (enum memmodel model)
+riscv_memmodel_needs_amo_release (enum memmodel model)
 {
   switch (model)
 {
@@ -3622,7 +3622,6 @@ riscv_memmodel_needs_release_fence (enum memmodel model)
'R' Print the low-part relocation associated with OP.
'C' Print the integer branch condition for comparison OP.
'A' Print the atomic operation suffix for memory model OP.
-   'F' Print a FENCE if the memory model requires a release.
'z' Print x0 if OP is zero, otherwise print OP normally.
'i' Print i if the operand is not a register.
'S' Print shift-index of single-bit mask OP.
@@ -3653,19 +3652,14 @@ riscv_print_operand (FILE *file, rtx op, int letter)
 
 case 'A':
   if (riscv_memmodel_needs_amo_acquire ((enum memmodel) INTVAL (op)) &&
- riscv_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
+ riscv_memmodel_needs_amo_release ((enum memmodel) INTVAL (op)))
fputs (".aqrl", file);
   else if (riscv_memmodel_needs_amo_acquire ((enum memmodel) INTVAL (op)))
fputs (".aq", file);
-  else if (riscv_memmodel_needs_release_fence ((enum memmodel) INTVAL 
(op)))
+  else if (riscv_memmodel_needs_amo_release ((enum memmodel) INTVAL (op)))
fputs (".rl", file);
   break;
 
-case 'F':
-  if (riscv_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
-   fputs ("fence iorw,ow; ", file);
-  break;
-
 case 'i':
   if (code != REG)
 fputs ("i", file);
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index cb4242d7b2f..f8a79465ee3 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -65,8 +65,8 @@
(match_operand:SI 2 "const_int_operand")]  ;; model
   UNSPEC_ATOMIC_STORE))]
   "TARGET_ATOMIC"
-  "%F2amoswap.%A2 zero,%z1,%0"
-  [(set (attr "length") (const_int 8))])
+  "amoswap.%A2 zero,%z1,%0"
+  [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
@@ -76,8 +76,8 @@
   (match_operand:SI 2 "const_int_operand")] ;; model
 UNSPEC_SYNC_OLD_OP))]
   "TARGET_ATOMIC"
-  "%F2amo.%A2 zero,%z1,%0"
-  [(set (attr "length") (const_int 8))])
+  "amo.%A2 zero,%z1,%0"
+  [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_fetch_"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -89,8 +89,8 @@
   (match_operand:SI 3 "const_int_operand")] ;; model
 UNSPEC_SYNC_OLD_OP))]
   "TARGET_ATOMIC"
-  "%F3amo.%A3 %0,%z2,%1"
-  [(set (attr "length") (const_int 8))])
+  "amo.%A3 %0,%z2,%1"
+  [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_exchange"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -101,8 +101,8 @@
(set (match_dup 1)
(match_operand:GPR 2 "register_operand" "0"))]
   "TARGET_ATOMIC"
-  "%F3amoswap.%A3 %0,%z2,%1"
-  [(set (attr "length") (const_int 8))])
+  "amoswap.%A3 %0,%z2,%1"
+  [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_cas_value_strong"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
-- 
2.25.1



[RFC 5/7] RISCV: Optimize LR/SC Pairs

2022-04-07 Thread Patrick O'Neill
Introduce the %I and %J flags for setting the .aqrl bits on LR/SC pairs
as needed.

Atomic compare and exchange ops provide 2 types of memory models. C++17
and later places no restrictions on the relative strength of each model,
so ensure we cover both by using a model that enforces the ordering of
both given models.

This change brings LR/SC ops in line with table A.6 of the ISA manual.

2022-03-31 Patrick O'Neill 

* riscv.cc: Add functions to get the parent of two
memmodels in sync.md.
* riscv-protos.h: Likewise.
* sync.md (atomic_cas_value_strong): Remove static
.aqrl bits on SC op/.rl bits on LR op and replace with %I, %J
flags.
* inline-atomics-model-1.c: New test.
* inline-atomics-model-2.c: Likewise.
* inline-atomics-model-3.c: Likewise.
* inline-atomics-model-4.c: Likewise.
* inline-atomics-model-5.c: Likewise.
* inline-atomics-model-6.c: Likewise.

Signed-off-by: Patrick O'Neill 
---
 gcc/config/riscv/riscv-protos.h   |  4 ++
 gcc/config/riscv/riscv.cc | 68 +++
 gcc/config/riscv/sync.md  | 12 +++-
 .../gcc.target/riscv/inline-atomics-model-1.c | 12 
 .../gcc.target/riscv/inline-atomics-model-2.c | 12 
 .../gcc.target/riscv/inline-atomics-model-3.c | 12 
 .../gcc.target/riscv/inline-atomics-model-4.c | 12 
 .../gcc.target/riscv/inline-atomics-model-5.c | 12 
 8 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-model-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-model-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-model-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-model-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-model-5.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 20c2381c21a..e32ea86a530 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -22,6 +22,8 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_RISCV_PROTOS_H
 #define GCC_RISCV_PROTOS_H
 
+#include "memmodel.h"
+
 /* Symbol types we understand.  The order of this list must match that of
the unspec enum in riscv.md, subsequent to UNSPEC_ADDRESS_FIRST.  */
 enum riscv_symbol_type {
@@ -74,6 +76,8 @@ extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *);
 extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *);
 extern bool riscv_gpr_save_operation_p (rtx);
+extern enum memmodel riscv_parent_memmodel (enum memmodel, enum memmodel);
+extern enum memmodel riscv_simplify_memmodel (enum memmodel);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 6da602f1b59..6fbcd62fe73 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3563,6 +3563,59 @@ riscv_print_operand_reloc (FILE *file, rtx op, bool 
hi_reloc)
   fputc (')', file);
 }
 
+/* Return the memory model that encapuslates both given models. This assumes
+   SYNC models output equivalent code to non-SYNC models.  */
+
+enum memmodel
+riscv_parent_memmodel (enum memmodel model1, enum memmodel model2)
+{
+  model1 = riscv_simplify_memmodel(model1);
+  model2 = riscv_simplify_memmodel(model2);
+
+  enum memmodel weaker = model1 <= model2 ? model1: model2;
+  enum memmodel stronger = model1 > model2 ? model1: model2;
+
+  switch (stronger)
+{
+  case MEMMODEL_SEQ_CST:
+  case MEMMODEL_ACQ_REL:
+   return stronger;
+  case MEMMODEL_RELEASE:
+   if (weaker== MEMMODEL_ACQUIRE || weaker == MEMMODEL_CONSUME)
+ return MEMMODEL_ACQ_REL;
+   else
+ return stronger;
+  case MEMMODEL_ACQUIRE:
+  case MEMMODEL_CONSUME:
+  case MEMMODEL_RELAXED:
+   return stronger;
+  default:
+   gcc_unreachable ();
+}
+}
+
+enum memmodel
+riscv_simplify_memmodel(enum memmodel model) {
+  switch (model)
+{
+  case MEMMODEL_SYNC_SEQ_CST:
+   return MEMMODEL_SEQ_CST;
+  case MEMMODEL_SYNC_ACQUIRE:
+   return MEMMODEL_ACQUIRE;
+  case MEMMODEL_SYNC_RELEASE:
+   return MEMMODEL_RELEASE;
+  case MEMMODEL_SEQ_CST:
+  case MEMMODEL_ACQ_REL:
+  case MEMMODEL_RELEASE:
+  case MEMMODEL_ACQUIRE:
+  case MEMMODEL_CONSUME:
+  case MEMMODEL_RELAXED:
+   return model;
+  default:
+   gcc_unreachable();
+}
+}
+
 /* Return true if the .AQ suffix should be added to an AMO to implement the
acquire portion of memory model MODEL.  */
 
@@ -3622,6 +3675,8 @@ riscv_memmodel_needs_amo_release (enum memmodel model)
'R' Print the low-part relocation associated with OP.
'C' Print the integer branch condition for comparison OP.
'A' Print the atomic operati

[RFC 6/7] RISCV: Optimize Atomic Stores

2022-04-07 Thread Patrick O'Neill
This change brings atomic stores in line with table A.6 of the ISA
manual.

2022-03-31 Patrick O'Neill 

PR target/89835
* riscv.cc (atomic_cas_value_strong): Add %I flag for
atomic store fences.
* sync.md (atomic_store): Use simple store instruction in
combination with a fence.
* pr89835.c: New test.

Signed-off-by: Patrick O'Neill 
---
 gcc/config/riscv/riscv.cc| 6 ++
 gcc/config/riscv/sync.md | 2 +-
 gcc/testsuite/gcc.target/riscv/pr89835.c | 9 +
 3 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr89835.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 6fbcd62fe73..48d18a83f06 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3677,6 +3677,7 @@ riscv_memmodel_needs_amo_release (enum memmodel model)
'A' Print the atomic operation suffix for memory model OP.
'I' Print the LR suffix for memory model OP.
'J' Print the SC suffix for memory model OP.
+   'K'  Print a leading fence for the memory model OP.
'z' Print x0 if OP is zero, otherwise print OP normally.
'i' Print i if the operand is not a register.
'S' Print shift-index of single-bit mask OP.
@@ -3728,6 +3729,11 @@ riscv_print_operand (FILE *file, rtx op, int letter)
fputs (".rl", file);
   break;
 
+case 'K':
+  if (riscv_memmodel_needs_amo_release ((enum memmodel) INTVAL (op)))
+   fputs ("fence\trw,w;", file);
+  break;
+
 case 'i':
   if (code != REG)
 fputs ("i", file);
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index b54338d8eb2..1cc3731da38 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -65,7 +65,7 @@
(match_operand:SI 2 "const_int_operand")]  ;; model
   UNSPEC_ATOMIC_STORE))]
   "TARGET_ATOMIC"
-  "amoswap.%A2 zero,%z1,%0"
+  "%K2s\t%z1,%0"
   [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_"
diff --git a/gcc/testsuite/gcc.target/riscv/pr89835.c 
b/gcc/testsuite/gcc.target/riscv/pr89835.c
new file mode 100644
index 000..ab190e11b60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr89835.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* Verify that relaxed atomic stores use simple store instuctions.  */
+/* { dg-final { scan-assembler-not "amoswap" } } */
+
+void
+foo(int bar, int baz)
+{
+  __atomic_store_n(&bar, baz, __ATOMIC_RELAXED);
+}
-- 
2.25.1



[RFC 7/7] RISCV: Relax mem_thread_fence

2022-04-07 Thread Patrick O'Neill
This change brings atomic fences in line with table A.6 of the ISA
manual.

Relax mem_thread_fence according to the memmodel given.

2022-03-31 Patrick O'Neill 

* riscv.cc: Expose helper functions to sync.md.
* riscv-protos.h: Likewise.
* sync.md (mem_thread_fence_1): Change fence depending on
aquire/release requirements.
* amo-thread-fence-1: New test.
* amo-thread-fence-2: Likewise.
* amo-thread-fence-3: Likewise.
* amo-thread-fence-4: Likewise.
* amo-thread-fence-5: Likewise.

Signed-off-by: Patrick O'Neill 
---
 gcc/config/riscv/riscv-protos.h  |  2 ++
 gcc/config/riscv/riscv.cc|  4 ++--
 gcc/config/riscv/sync.md | 16 +---
 .../gcc.target/riscv/amo-thread-fence-1.c|  6 ++
 .../gcc.target/riscv/amo-thread-fence-2.c|  6 ++
 .../gcc.target/riscv/amo-thread-fence-3.c|  6 ++
 .../gcc.target/riscv/amo-thread-fence-4.c|  6 ++
 .../gcc.target/riscv/amo-thread-fence-5.c|  6 ++
 8 files changed, 47 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/amo-thread-fence-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/amo-thread-fence-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/amo-thread-fence-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/amo-thread-fence-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/amo-thread-fence-5.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index e32ea86a530..4390ede84f5 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -76,6 +76,8 @@ extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *);
 extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *);
 extern bool riscv_gpr_save_operation_p (rtx);
+extern bool riscv_memmodel_needs_amo_acquire (enum memmodel);
+extern bool riscv_memmodel_needs_amo_release (enum memmodel);
 extern enum memmodel riscv_parent_memmodel (enum memmodel, enum memmodel);
 extern enum memmodel riscv_simplify_memmodel (enum memmodel);
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 48d18a83f06..3ee910261a2 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3619,7 +3619,7 @@ riscv_simplify_memmodel(enum memmodel model) {
 /* Return true if the .AQ suffix should be added to an AMO to implement the
acquire portion of memory model MODEL.  */
 
-static bool
+bool
 riscv_memmodel_needs_amo_acquire (enum memmodel model)
 {
   switch (model)
@@ -3645,7 +3645,7 @@ riscv_memmodel_needs_amo_acquire (enum memmodel model)
 /* Return true if the .RL suffix should be added to an AMO to implement the
release portion of memory model MODEL.  */
 
-static bool
+bool
 riscv_memmodel_needs_amo_release (enum memmodel model)
 {
   switch (model)
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 1cc3731da38..a7a040180f5 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -46,14 +46,24 @@
   DONE;
 })
 
-;; Until the RISC-V memory model (hence its mapping from C++) is finalized,
-;; conservatively emit a full FENCE.
 (define_insn "mem_thread_fence_1"
   [(set (match_operand:BLK 0 "" "")
(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))
(match_operand:SI 1 "const_int_operand" "")] ;; model
   ""
-  "fence\tiorw,iorw")
+  {
+enum memmodel model = (enum memmodel) INTVAL (operands[1]);
+if (model == MEMMODEL_ACQ_REL)
+   return "fence.tso";
+else if (riscv_memmodel_needs_amo_acquire (model) &&
+riscv_memmodel_needs_amo_release (model))
+   return "fence\trw,rw";
+else if (riscv_memmodel_needs_amo_acquire (model))
+   return "fence\tr,rw";
+else if (riscv_memmodel_needs_amo_release (model))
+   return "fence\trw,w";
+  }
+  [(set (attr "length") (const_int 4))])
 
 ;; Atomic memory operations.
 
diff --git a/gcc/testsuite/gcc.target/riscv/amo-thread-fence-1.c 
b/gcc/testsuite/gcc.target/riscv/amo-thread-fence-1.c
new file mode 100644
index 000..833629bf2f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/amo-thread-fence-1.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-final { scan-assembler-not "fence\t" } } */
+
+int main() {
+  __atomic_thread_fence(__ATOMIC_RELAXED);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/amo-thread-fence-2.c 
b/gcc/testsuite/gcc.target/riscv/amo-thread-fence-2.c
new file mode 100644
index 000..3395ee41dbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/amo-thread-fence-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-final { scan-assembler "fence\tr,rw" } } */
+
+int main() {
+  __atomic_thread_fence(__ATOMIC_ACQUIRE);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/amo-thread-fence-3.c 
b/gcc/testsuite/gcc.target/riscv/amo-thread-fence-3.c
new file mode 100644
index 000.

[PATCH v1] libstdc++: Default to mutex-based atomics on RISC-V

2022-04-07 Thread Palmer Dabbelt
The RISC-V port requires libatomic to be linked in order to resolve
various atomic functions, which results in builds that have
"--with-libstdcxx-lock-policy=auto" defaulting to mutex-based locks.
Changing this to direct atomics breaks the ABI, this forces the auto
detection mutex-based atomics on RISC-V in order to avoid a silent ABI
break for users.

See Bug 84568 for more discussion.  In the long run there may be a way
to get the higher-performance atomics without an ABI flag day, but
that's going to be a much more complicated operation.  We don't even
have support for the inline atomics yet, but given that some folks have
been discussing hacks to make these libatomic routines appear implicitly
it seems prudent to just turn off the automatic detection for RISC-V.

libstdc++-v3/ChangeLog

* acinclude.md (GLIBCXX_ENABLE_LOCK_POLICY): Force auto to mutex
  for RISC-V.

---

I haven't even built this one, as I'm sure there's a better way to do it
then sticking some more C code in there.
---
 libstdc++-v3/acinclude.m4 | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index f53461c85a5..945c0c66f8d 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -3612,6 +3612,9 @@ AC_DEFUN([GLIBCXX_ENABLE_LOCK_POLICY], [
 dnl Why don't we check 8-byte CAS for sparc64, where _Atomic_word is long?!
 dnl New targets should only check for CAS for the _Atomic_word type.
 AC_TRY_COMPILE([
+#if defined __riscv
+# error "Defaulting to mutex-based locks for ABI compatibility"
+#endif
 #if ! defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
 # error "No 2-byte compare-and-swap"
 #elif ! defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
-- 
2.34.1



[PATCH][committed] testsuite: delete slp scan from loop vect test.

2022-04-07 Thread Tamar Christina via Gcc-patches
Hi All,

I accidentally left in an slp1 check in the vect test which showed up as
UNRESOLVED and had missed it in the sum file.  This deletes that line.

Committed under obvious rule.

Thanks,
Tamar

gcc/testsuite/ChangeLog:

PR testsuite/105196
* gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c: Remove 
slp1 check.

--- inline copy of patch -- 
diff --git 
a/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c 
b/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c
index 
64ea2de10c698fdb9577e9f80cca09362815eac3..ca5f5b257d361849995699ab01afd52129cf787a
 100644
--- a/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c
+++ b/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c
@@ -9,5 +9,4 @@
 /* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" { 
target { vect_complex_add_float } } } } */
 /* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" { 
target { vect_complex_add_float } } } } */
 /* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "vect" } } */
-/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" } } */
 /* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "vect" } } */


-- 
diff --git 
a/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c 
b/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c
index 
64ea2de10c698fdb9577e9f80cca09362815eac3..ca5f5b257d361849995699ab01afd52129cf787a
 100644
--- a/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c
+++ b/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c
@@ -9,5 +9,4 @@
 /* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" { 
target { vect_complex_add_float } } } } */
 /* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" { 
target { vect_complex_add_float } } } } */
 /* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "vect" } } */
-/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" } } */
 /* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "vect" } } */



Re: [PATCH] Disable float128 tests on VxWorks, PR target/104253.

2022-04-07 Thread Michael Meissner via Gcc-patches
On Thu, Apr 07, 2022 at 06:00:51AM -0500, Segher Boessenkool wrote:
> On Thu, Apr 07, 2022 at 12:29:45AM -0400, Michael Meissner wrote:
> > In PR target/104253, it was pointed out the that test case added as part
> > of fixing the PR does not work on VxWorks because float128 is not
> > supported on that system.  I have modified the three tests for float128 so
> > that they are manually excluded on VxWorks systems.  In looking at the
> > code, I also added checks in check_effective_target_ppc_ieee128_ok to
> > disable the systems that will never support VSX instructions which are
> > required for float128 support (eabi, eabispe, darwin).
> 
> It's just one extra to the big list here, but, why do we need all these
> manual exclusions anyway?  What is broken about the test itself?
> 
> It would be so much more useful if the tests would help us, instead of
> producing a lot of extra busy-work.

Those lines were copied from other lines in the power7 era, and have just been
copied since then.  I agree it is perhaps time to remove these in GCC 13, but I
would be hesitant to remove them now.  I can not put in the eabi, eabispe and
darwin lines in check_effective_target_ppc_ieee128_ok, and just add the
vsxworks lines.

With these changes can I check these in and then do a backport later?

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH] Disable float128 tests on VxWorks, PR target/104253.

2022-04-07 Thread Michael Meissner via Gcc-patches
On Thu, Apr 07, 2022 at 12:47:27PM +0200, Eric Botcazou wrote:
> > I have run the tests on my usual Linux systems (little endian power10,
> > little endian power9, big endian power8), but I don't have access to a
> > VxWorks system.  Eric does this fix the failure for you?
> 
> Yes, if you add '*' at the end of the selector, i.e. [istarget *-*-vxworks*].

Ok.

> > If it does fix the failure, can I apply the patch to the master branch and
> > backport it to GCC 11 and GCC 10?  Sorry about the breakage.
> 
> That would be perfect, thanks in advance.
> 
> -- 
> Eric Botcazou
> 
> 

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH] Pass PKG_CONFIG_PATH down from top-level Makefile

2022-04-07 Thread Simon Marchi via Gcc-patches
Ping.

On 2022-03-29 16:04, Simon Marchi wrote:
> Ping!
> 
> On 2022-03-15 17:26, Simon Marchi wrote:
>> From: Simon Marchi 
>>
>> [Sending to binutils, gdb-patches and gcc-patches, since it touches the
>> top-level Makefile/configure]
>>
>> I have my debuginfod library installed in a non-standard location
>> (/opt/debuginfod), which requires me to set
>> PKG_CONFIG_PATH=/opt/debuginfod/lib/pkg-config.  If I just set it during
>> configure:
>>
>> $ PKG_CONFIG_PATH=/opt/debuginfod/lib/pkg-config ./configure 
>> --with-debuginfod
>> $ make
>>
>> or
>>
>> $ ./configure --with-debuginfod 
>> PKG_CONFIG_PATH=/opt/debuginfod/lib/pkg-config
>> $ make
>>
>> Then PKG_CONFIG_PATH is only present (and ignored) during the top-level
>> configure.  When running make (which runs gdb's and binutils'
>> configure), PKG_CONFIG_PATH is not set, which results in their configure
>> script not finding the library:
>>
>> checking for libdebuginfod >= 0.179... no
>> configure: error: "--with-debuginfod was given, but libdebuginfod is 
>> missing or unusable."
>>
>> Change the top-level configure/Makefile system to capture the value
>> passed when configuring the top-level and pass it down to
>> subdirectories (similar to CFLAGS, LDFLAGS, etc).
>>
>> I don't know much about the top-level build system, so I really don't
>> know if I did this correctly.  The changes are:
>>
>>  - Use AC_SUBST(PKG_CONFIG_PATH) in configure.ac, so that
>>@PKG_CONFIG_PATH@ gets replaced with the actual PKG_CONFIG_PATH value
>>in config files (i.e. Makefile)
>>  - Add a PKG_CONFIG_PATH Makefile variable in Makefile.tpl, initialized
>>to @PKG_CONFIG_PATH@
>>  - Add PKG_CONFIG_PATH to HOST_EXPORTS in Makefile.tpl, which are the
>>variables set when running the sub-configures
>>
>> I initially added PKG_CONFIG_PATH to flags_to_pass, in Makefile.def, but
>> I don't think it's needed.  AFAIU, this defines the flags to pass down
>> when calling "make" in subdirectories.  We only need PKG_CONFIG_PATH to
>> be passed down during configure.  After that, it's captured in
>> gdb/config.status, so even if a "make" causes a re-configure later
>> (because gdb/configure has changed, for example), the PKG_CONFIG_PATH
>> value will be remembered.
>>
>> ChangeLog:
>>
>>  * configure.ac: Add AC_SUBST(PKG_CONFIG_PATH).
>>  * configure: Re-generate.
>>  * Makefile.tpl (HOST_EXPORTS): Pass PKG_CONFIG_PATH.
>>  (PKG_CONFIG_PATH): New.
>>  * Makefile.in: Re-generate.
>>
>> Change-Id: I91138dfca41c43b05e53e445f62e4b27882536bf
>> ---
>>  Makefile.in  | 3 +++
>>  Makefile.tpl | 3 +++
>>  configure| 2 ++
>>  configure.ac | 1 +
>>  4 files changed, 9 insertions(+)
>>
>> diff --git a/Makefile.in b/Makefile.in
>> index 3aacd2daac9c..cb39e4790d69 100644
>> --- a/Makefile.in
>> +++ b/Makefile.in
>> @@ -218,6 +218,7 @@ HOST_EXPORTS = \
>>  OBJCOPY="$(OBJCOPY)"; export OBJCOPY; \
>>  OBJDUMP="$(OBJDUMP)"; export OBJDUMP; \
>>  OTOOL="$(OTOOL)"; export OTOOL; \
>> +PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)"; export PKG_CONFIG_PATH; \
>>  READELF="$(READELF)"; export READELF; \
>>  AR_FOR_TARGET="$(AR_FOR_TARGET)"; export AR_FOR_TARGET; \
>>  AS_FOR_TARGET="$(AS_FOR_TARGET)"; export AS_FOR_TARGET; \
>> @@ -444,6 +445,8 @@ LIBCXXFLAGS = $(CXXFLAGS) -fno-implicit-templates
>>  GOCFLAGS = $(CFLAGS)
>>  GDCFLAGS = $(CFLAGS)
>>  
>> +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
>> +
>>  # Pass additional PGO and LTO compiler options to the PGO build.
>>  BUILD_CFLAGS = $(PGO_BUILD_CFLAGS) $(PGO_BUILD_LTO_CFLAGS)
>>  override CFLAGS += $(BUILD_CFLAGS)
>> diff --git a/Makefile.tpl b/Makefile.tpl
>> index 9df77788345a..88db8f44d53f 100644
>> --- a/Makefile.tpl
>> +++ b/Makefile.tpl
>> @@ -221,6 +221,7 @@ HOST_EXPORTS = \
>>  OBJCOPY="$(OBJCOPY)"; export OBJCOPY; \
>>  OBJDUMP="$(OBJDUMP)"; export OBJDUMP; \
>>  OTOOL="$(OTOOL)"; export OTOOL; \
>> +PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)"; export PKG_CONFIG_PATH; \
>>  READELF="$(READELF)"; export READELF; \
>>  AR_FOR_TARGET="$(AR_FOR_TARGET)"; export AR_FOR_TARGET; \
>>  AS_FOR_TARGET="$(AS_FOR_TARGET)"; export AS_FOR_TARGET; \
>> @@ -447,6 +448,8 @@ LIBCXXFLAGS = $(CXXFLAGS) -fno-implicit-templates
>>  GOCFLAGS = $(CFLAGS)
>>  GDCFLAGS = $(CFLAGS)
>>  
>> +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
>> +
>>  # Pass additional PGO and LTO compiler options to the PGO build.
>>  BUILD_CFLAGS = $(PGO_BUILD_CFLAGS) $(PGO_BUILD_LTO_CFLAGS)
>>  override CFLAGS += $(BUILD_CFLAGS)
>> diff --git a/configure b/configure
>> index 26935ebda249..1badcb314f8f 100755
>> --- a/configure
>> +++ b/configure
>> @@ -618,6 +618,7 @@ CXX_FOR_TARGET
>>  CC_FOR_TARGET
>>  RANLIB_PLUGIN_OPTION
>>  AR_PLUGIN_OPTION
>> +PKG_CONFIG_PATH
>>  READELF
>>  OBJDUMP
>>  OBJCOPY
>> @@ -10310,6 +10311,7 @@ fi
>>  
>>  
>>  
>> +
>>  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -plugin option" >&5
>>  $as_echo_n "checking for -plugin option... " >&6; }
>>  
>> diff --git a/configur

Re: [PATCH] rs6000/test: Adjust p9-vec-length-7 sensitive to unroll [PR103196]

2022-04-07 Thread Segher Boessenkool
Hi!

On Thu, Apr 07, 2022 at 09:19:51AM -0500, will schmidt wrote:
> On Mon, 2022-02-28 at 13:37 +0800, Kewen.Lin via Gcc-patches wrote:
> > As PR103196 shows, p9-vec-length-full-7.c needs to be adjusted as the
> > complete unrolling can happen on some of its loops.  This patch is to
> > use pragma "GCC unroll 0" to disable all possible loop unrollings.
> > Hope it can help the case not that fragile.
> 
> ok
> 
> Is the lack of effectiveness of "-fno-unroll-loops" otherwise
> understood, or is there further issue behind that option? 

There is -fpeel-loops as well, and cunroll is independent of all of
these as well?

> I would
> expect the effect of the option, versus the pragma, two to roughly
> equivalent.   Obviously it is not.  :-)

Yes, me too.  And I do not see what makes the difference, if it isn't
the peel thing :-(

Ke Wen, can you try with -fno-peel-loops please?


Segher


Re: [PATCH 0/3] Fix PR debug/105089

2022-04-07 Thread Indu Bhagat via Gcc-patches

ping

On 3/30/22 4:31 PM, Indu Bhagat wrote:

Hello,

This patch set fixes PR debug/105089.

[PS: The first patch in the series "ctfc: get rid of the static variable in
ctf_list_add_ctf_vars" is unrelated to the PR and is combined here only for
ease of review.]

As noted in the PR debug/105089, gcc is emitting two CTF variable records
where it sees an extern variable with declaration and definition in the same
compilation unit.

The CTF format format does not distinguish between the non-defining decl vs.
the defining decl, so the correct behaviour wrt the compiler generating the
type for such extern variables is to simply emit the type of the defining
declaration.

Testing Notes:
-- bootstrapped and reg tested on x86_64 and aarch64
-- built binutils package with -gctf (with CTF-capable linker) on x86_64, no
CTF errors reported.

Thanks,

Indu Bhagat (3):
   ctfc: get rid of the static variable in ctf_list_add_ctf_vars ()
   CTF for extern variable fix [PR105089]
   Refactor and update CTF testcases [PR105089]

  gcc/ctfc.cc   | 62 ++-
  gcc/ctfc.h|  8 ++-
  gcc/ctfout.cc | 28 ++---
  gcc/dwarf2ctf.cc  | 18 +-
  gcc/testsuite/gcc.dg/debug/ctf/ctf-array-2.c  | 22 +++
  gcc/testsuite/gcc.dg/debug/ctf/ctf-array-5.c  | 17 +
  .../gcc.dg/debug/ctf/ctf-variables-3.c| 22 +++
  7 files changed, 147 insertions(+), 30 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-array-5.c
  create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-variables-3.c





[AArch64] PR105162: emit barrier for __sync and __atomic builtins on CPUs without LSE

2022-04-07 Thread Pop, Sebastian via Gcc-patches
Hi,


With -moutline-atomics gcc stops generating a barrier for __sync builtins: 
https://gcc.gnu.org/PR105162

This is a problem on CPUs without LSE instructions where the ld/st exclusives 
do not guarantee a full barrier.

The attached patch adds the barrier to the outline-atomics functions on the 
path without LSE instructions.

In consequence, under -moutline-atomics __atomic and __sync builtins now behave 
the same with and without LSE instructions.

To complete the change, the second patch makes gcc emit the barrier for 
__atomic builtins as well, i.e., independently of is_mm_sync().


Sebastian
From b1ffa7d737427dc9414cb0c315f08b7c84ef647b Mon Sep 17 00:00:00 2001
From: Sebastian Pop 
Date: Wed, 6 Apr 2022 21:42:11 +
Subject: [PATCH] [AArch64] add barrier to no LSE path in outline-atomics
 functions

---
 libgcc/config/aarch64/lse.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
index c353ec2215b..ac77c68e300 100644
--- a/libgcc/config/aarch64/lse.S
+++ b/libgcc/config/aarch64/lse.S
@@ -229,6 +229,7 @@ STARTFN	NAME(swp)
 0:	LDXR		s(0), [x1]
 	STXR		w(tmp1), s(tmp0), [x1]
 	cbnz		w(tmp1), 0b
+dmb ish
 	ret
 
 ENDFN	NAME(swp)
@@ -273,6 +274,7 @@ STARTFN	NAME(LDNM)
 	OP		s(tmp1), s(0), s(tmp0)
 	STXR		w(tmp2), s(tmp1), [x1]
 	cbnz		w(tmp2), 0b
+dmb ish
 	ret
 
 ENDFN	NAME(LDNM)
-- 
2.25.1

From 68c07f95157057f0167723b182f0ccffdac8a17e Mon Sep 17 00:00:00 2001
From: Sebastian Pop 
Date: Thu, 7 Apr 2022 19:18:57 +
Subject: [PATCH 2/2] [AArch64] emit a barrier for __atomic builtins

---
 gcc/config/aarch64/aarch64.cc | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 18f80499079..be1b8d22c6a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22931,9 +22931,7 @@ aarch64_split_compare_and_swap (rtx operands[])
   if (strong_zero_p)
 aarch64_gen_compare_reg (NE, rval, const0_rtx);
 
-  /* Emit any final barrier needed for a __sync operation.  */
-  if (is_mm_sync (model))
-aarch64_emit_post_barrier (model);
+  aarch64_emit_post_barrier (model);
 }
 
 /* Split an atomic operation.  */
@@ -22948,7 +22946,6 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
   machine_mode mode = GET_MODE (mem);
   machine_mode wmode = (mode == DImode ? DImode : SImode);
   const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
-  const bool is_sync = is_mm_sync (model);
   rtx_code_label *label;
   rtx x;
 
@@ -22966,11 +22963,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
 
   /* The initial load can be relaxed for a __sync operation since a final
  barrier will be emitted to stop code hoisting.  */
- if (is_sync)
-aarch64_emit_load_exclusive (mode, old_out, mem,
- GEN_INT (MEMMODEL_RELAXED));
-  else
-aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
+  aarch64_emit_load_exclusive (mode, old_out, mem, GEN_INT (MEMMODEL_RELAXED));
 
   switch (code)
 {
@@ -23016,9 +23009,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
 			gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
   aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
 
-  /* Emit any final barrier needed for a __sync operation.  */
-  if (is_sync)
-aarch64_emit_post_barrier (model);
+  aarch64_emit_post_barrier (model);
 }
 
 static void
-- 
2.25.1



Re: [PATCH] Disable float128 tests on VxWorks, PR target/104253.

2022-04-07 Thread Segher Boessenkool
Hi!

On Thu, Apr 07, 2022 at 02:59:55PM -0400, Michael Meissner wrote:
> On Thu, Apr 07, 2022 at 06:00:51AM -0500, Segher Boessenkool wrote:
> > On Thu, Apr 07, 2022 at 12:29:45AM -0400, Michael Meissner wrote:
> > > In PR target/104253, it was pointed out the that test case added as part
> > > of fixing the PR does not work on VxWorks because float128 is not
> > > supported on that system.  I have modified the three tests for float128 so
> > > that they are manually excluded on VxWorks systems.  In looking at the
> > > code, I also added checks in check_effective_target_ppc_ieee128_ok to
> > > disable the systems that will never support VSX instructions which are
> > > required for float128 support (eabi, eabispe, darwin).
> > 
> > It's just one extra to the big list here, but, why do we need all these
> > manual exclusions anyway?  What is broken about the test itself?
> > 
> > It would be so much more useful if the tests would help us, instead of
> > producing a lot of extra busy-work.
> 
> Those lines were copied from other lines in the power7 era, and have just been
> copied since then.

And never updated or given any (second) thought :-(

> I agree it is perhaps time to remove these in GCC 13, but I
> would be hesitant to remove them now.  I can not put in the eabi, eabispe and
> darwin lines in check_effective_target_ppc_ieee128_ok, and just add the
> vsxworks lines.

What I want to see is the tests (in target-supports) to just work,
without manual intervention for targets that are even mildly
interesting :-(

(Btw, powerpc*-*-eabi* would be simpler, more compact, and more correct
here!)

> With these changes can I check these in and then do a backport later?

Eric already approved it.  It is fine with me of course, but I do want
things to get better eventually!


Segher


Re: [PATCH] c++: tolerate cdtors returning this in constexpr

2022-04-07 Thread Alexandre Oliva via Gcc-patches
Hello, Jason,

On Apr  6, 2022, Jason Merrill  wrote:

> On 4/6/22 15:36, Alexandre Oliva wrote:
> Please adjust your patch subject lines for the new guidelines adopted
> as part of the git transition:

> https://gcc.gnu.org/contribute.html#patches

Oh, wow, I had missed those guidelines entirely!  *blush*

Thanks for the pointer.  I'm taking note of it for future submissions.

>> On targets that return this from cdtors, cxx_eval_call_expression may
>> flag flowing off the end of a dtor.  That's preempted for ctors, and
>> avoided entirely when dtors return void, but when they return this,
>> the return value should be conceptually disregarded, without making
>> room for such internal ABI details to make a program ill-formed, as in
>> g++.dg/cpp2a/constexpr-dtor12.C on arm-eabi.

> Is there a PR for this issue?

I'm afraid not, it's just one of many testsuite fails that I've been
working on cleaning up.

> The patch looks fine, but why doesn't the implicit return 'this' on
> arm-eabi already make the result non-null?

That's a good question that I didn't have an answer for.

It's the explicit 'return' statement in T::~T(), turned into a goto,
that interrupts the iteration over the stmt list containing the return
stmt:

<<< Unknown tree: must_not_throw_expr
  {
<<< Unknown tree: cleanup_stmt
  <<< Unknown tree: cleanup_stmt
;,\
 D.4488 = 0;;, (int *) D.4487;)) >;
// predicted unlikely by goto predictor.;
goto ;
(void) S::~S (&((struct T *) this)->D.4458)
 >>>;
  *(struct T *) this = {CLOBBER}
   >>>;
  } (*)
  :;
  return this;
   >>>

(*) eval'ing this block sets jump_target to , which satisfies
the returns(tree*) predicate, so cxx_eval_statement_list bails.

Now, ISTM that the goto target selected for the return stmt bypasses the
subobject dtor call and the full-object clobber.  That sounds like
another bug, no?

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


libgomp nvptx plugin: Split 'PLUGIN_NVPTX_DYNAMIC' into 'PLUGIN_NVPTX_INCLUDE_SYSTEM_CUDA_H' and 'PLUGIN_NVPTX_LINK_LIBCUDA' (was: [PATCH] Allow building GCC with PTX offloading even without CUDA bein

2022-04-07 Thread Thomas Schwinge
Hi!

On 2017-01-13T19:11:23+0100, Jakub Jelinek  wrote:
> Especially for distributions it is undesirable to need to have proprietary
> CUDA libraries and headers installed when building GCC.

> --- libgomp/plugin/configfrag.ac.jj   2017-01-13 12:07:56.0 +0100
> +++ libgomp/plugin/configfrag.ac  2017-01-13 17:33:26.608240936 +0100

> +   PLUGIN_NVPTX_CPPFLAGS='-I$(srcdir)/plugin/cuda'
> +   PLUGIN_NVPTX_LIBS='-ldl'
> +   PLUGIN_NVPTX_DYNAMIC=1

> +AC_DEFINE_UNQUOTED([PLUGIN_NVPTX_DYNAMIC], [$PLUGIN_NVPTX_DYNAMIC],
> +  [Define to 1 if the NVIDIA plugin should dlopen libcuda.so.1, 0 if it 
> should be linked against it.])

Actually, the conditionals leading to 'PLUGIN_NVPTX_DYNAMIC=1' here do
control two orthogonal aspects; OK to disentangle that with the attached
"libgomp nvptx plugin: Split 'PLUGIN_NVPTX_DYNAMIC' into
'PLUGIN_NVPTX_INCLUDE_SYSTEM_CUDA_H' and 'PLUGIN_NVPTX_LINK_LIBCUDA'"?


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From c455522ac5d8ab41e5d11f8997678e042ff48e87 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Thu, 7 Apr 2022 23:10:16 +0200
Subject: [PATCH] libgomp nvptx plugin: Split 'PLUGIN_NVPTX_DYNAMIC' into
 'PLUGIN_NVPTX_INCLUDE_SYSTEM_CUDA_H' and 'PLUGIN_NVPTX_LINK_LIBCUDA'

Including the GCC-shipped 'include/cuda/cuda.h' vs. system  and
'dlopen'ing the CUDA Driver library vs. linking it are separate concerns.

	libgomp/
	* plugin/Makefrag.am: Handle 'PLUGIN_NVPTX_DYNAMIC'.
	* plugin/configfrag.ac (PLUGIN_NVPTX_DYNAMIC): Change
	'AC_DEFINE_UNQUOTED' into 'AM_CONDITIONAL'.
	* plugin/plugin-nvptx.c: Split 'PLUGIN_NVPTX_DYNAMIC' into
	'PLUGIN_NVPTX_INCLUDE_SYSTEM_CUDA_H' and
	'PLUGIN_NVPTX_LINK_LIBCUDA'.
	* Makefile.in: Regenerate.
	* config.h.in: Likewise.
	* configure: Likewise.
---
 libgomp/Makefile.in   | 26 +++---
 libgomp/config.h.in   |  4 
 libgomp/configure | 21 +++--
 libgomp/plugin/Makefrag.am| 16 +++-
 libgomp/plugin/configfrag.ac  |  3 +--
 libgomp/plugin/plugin-nvptx.c |  4 ++--
 6 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 22cb2136a08..d43c584a32d 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -119,8 +119,16 @@ build_triplet = @build@
 host_triplet = @host@
 target_triplet = @target@
 @PLUGIN_NVPTX_TRUE@am__append_1 = libgomp-plugin-nvptx.la
-@PLUGIN_GCN_TRUE@am__append_2 = libgomp-plugin-gcn.la
-@USE_FORTRAN_TRUE@am__append_3 = openacc.f90
+
+# Including the GCC-shipped 'include/cuda/cuda.h' vs. system .
+@PLUGIN_NVPTX_DYNAMIC_FALSE@@PLUGIN_NVPTX_TRUE@am__append_2 = -DPLUGIN_NVPTX_INCLUDE_SYSTEM_CUDA_H \
+@PLUGIN_NVPTX_DYNAMIC_FALSE@@PLUGIN_NVPTX_TRUE@	-DPLUGIN_NVPTX_LINK_LIBCUDA
+
+# 'dlopen'ing the CUDA Driver library vs. linking it.
+@PLUGIN_NVPTX_DYNAMIC_TRUE@@PLUGIN_NVPTX_TRUE@am__append_3 = $(PLUGIN_NVPTX_LIBS)
+@PLUGIN_NVPTX_DYNAMIC_FALSE@@PLUGIN_NVPTX_TRUE@am__append_4 = $(PLUGIN_NVPTX_LIBS)
+@PLUGIN_GCN_TRUE@am__append_5 = libgomp-plugin-gcn.la
+@USE_FORTRAN_TRUE@am__append_6 = openacc.f90
 subdir = .
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@@ -197,8 +205,10 @@ libgomp_plugin_gcn_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
 	$(libgomp_plugin_gcn_la_LDFLAGS) $(LDFLAGS) -o $@
 @PLUGIN_GCN_TRUE@am_libgomp_plugin_gcn_la_rpath = -rpath \
 @PLUGIN_GCN_TRUE@	$(toolexeclibdir)
+@PLUGIN_NVPTX_DYNAMIC_TRUE@@PLUGIN_NVPTX_TRUE@am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1)
+@PLUGIN_NVPTX_DYNAMIC_FALSE@@PLUGIN_NVPTX_TRUE@am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1)
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_DEPENDENCIES = libgomp.la \
-@PLUGIN_NVPTX_TRUE@	$(am__DEPENDENCIES_1)
+@PLUGIN_NVPTX_TRUE@	$(am__DEPENDENCIES_2) $(am__DEPENDENCIES_3)
 @PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_OBJECTS =  \
 @PLUGIN_NVPTX_TRUE@	libgomp_plugin_nvptx_la-plugin-nvptx.lo
 libgomp_plugin_nvptx_la_OBJECTS =  \
@@ -533,7 +543,7 @@ libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
 AM_CPPFLAGS = $(addprefix -I, $(search_path))
 AM_CFLAGS = $(XCFLAGS)
 AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS)
-toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) $(am__append_2)
+toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) $(am__append_5)
 nodist_toolexeclib_HEADERS = libgomp.spec
 
 # -Wc is only a libtool option.
@@ -559,16 +569,18 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
 	oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
 	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
 	affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-	oacc-target.c $(am__append_3)
+	oacc-target.c $(am__append_6)
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPT

Committed: [PATCH] Disable float128 tests on VxWorks, PR target/104253.

2022-04-07 Thread Michael Meissner via Gcc-patches
This is the patch that I committed.  I will do the backport in a few days to
GCC 11 and 10.

Disable float128 tests on VxWorks, PR target/104253.

In PR target/104253, it was pointed out the that test case added as part
of fixing the PR does not work on VxWorks because float128 is not
supported on that system.  I have modified the three tests for float128 so
that they are manually excluded on VxWorks systems.  In looking at the
code, I also added checks in check_effective_target_ppc_ieee128_ok to
disable the systems that will never support VSX instructions which are
required for float128 support (eabi, eabispe, darwin).

2022-04-07   Michael Meissner  

gcc/testsuite/
PR target/104253
* lib/target-supports.exp (check_ppc_float128_sw_available): Do
not run float128 tests on VxWorks.
(check_ppc_float128_hw_available): Likewise.
(check_effective_target_ppc_ieee128_ok): Likewise.
---
 gcc/testsuite/lib/target-supports.exp | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index ff8edbd3e17..90b90095682 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2318,8 +2318,9 @@ proc check_ppc_mma_hw_available { } {
 proc check_ppc_float128_sw_available { } {
 return [check_cached_effective_target ppc_float128_sw_available {
# Some simulators are known to not support VSX/power8/power9
-   # instructions. For now, disable on Darwin.
-   if { [istarget powerpc-*-eabi]
+   # instructions. For now, disable on Darwin and VxWorks.
+   if { [istarget *-*-vxworks*]
+|| [istarget powerpc-*-eabi]
 || [istarget powerpc*-*-eabispe]
 || [istarget *-*-darwin*]} {
expr 0
@@ -2345,7 +2346,8 @@ proc check_ppc_float128_hw_available { } {
 return [check_cached_effective_target ppc_float128_hw_available {
# Some simulators are known to not support VSX/power8/power9
# instructions. For now, disable on Darwin.
-   if { [istarget powerpc-*-eabi]
+   if { [istarget *-*-vxworks*]
+|| [istarget powerpc-*-eabi]
 || [istarget powerpc*-*-eabispe]
 || [istarget *-*-darwin*]} {
expr 0
@@ -2370,8 +2372,9 @@ proc check_ppc_float128_hw_available { } {
 # See if the __ieee128 keyword is understood.
 proc check_effective_target_ppc_ieee128_ok { } {
 return [check_cached_effective_target ppc_ieee128_ok {
-   # disable on AIX.
-   if { [istarget *-*-aix*] } {
+   # disable on AIX and VxWorks.
+   if { [istarget *-*-aix*]
+|| [istarget *-*-vxworks*]} {
expr 0
} else {
set options "-mfloat128"
-- 
2.35.1


-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH] c++: set loc on call even if result is discarded

2022-04-07 Thread Alexandre Oliva via Gcc-patches
On Apr  6, 2022, Jason Merrill  wrote:

> On 4/6/22 15:37, Alexandre Oliva wrote:
> Need to adjust this subject line, as well.

*nod*, thanks

>> * tree.cc (protected_set_expr_location): Propagate locus to
>> call wrapped in cast-to-void.

> I'm reluctant to put this C++-specific change in a simple function
> shared by all languages;

Perhaps it benefits other languages as well?  The effect is presumably
desirable on other languages too: setting a cast-to-void's location
seems completely ineffective, as it's eventually thrown away, and
perhaps propagating the location to any operand (rather than just calls)
would carry out the intent of [protected_]set_expr_location more
effectively.  It doesn't feel right to require every caller to worry
about that.

> how about handling it in set_cleanup_locs instead?

Like this?  That seems reasonable to me.  I'll give it a spin.

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index a7f6449dafd2e..43627ed30afcb 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -609,7 +609,17 @@ set_cleanup_locs (tree stmts, location_t loc)
 {
   if (TREE_CODE (stmts) == CLEANUP_STMT)
 {
-  protected_set_expr_location (CLEANUP_EXPR (stmts), loc);
+  tree t = CLEANUP_EXPR (stmts);
+  protected_set_expr_location (t, loc);
+  /* Avoid locus differences for C++ cdtor calls depending on whether
+cdtor_returns_this: a conversion to void is added to discard the return
+value, and this conversion ends up carrying the location, and when it
+gets discarded, the location is lost.  So hold it in the call as
+well.  */
+  if (TREE_CODE (t) == NOP_EXPR
+ && TREE_TYPE (t) == void_type_node
+ && TREE_CODE (TREE_OPERAND (t, 0)) == CALL_EXPR)
+   protected_set_expr_location (TREE_OPERAND (t, 0), loc);
   set_cleanup_locs (CLEANUP_BODY (stmts), loc);
 }
   else if (TREE_CODE (stmts) == STATEMENT_LIST)


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH, V3] Eliminate power8 fusion options, use power8 tuning, PR target/102059

2022-04-07 Thread Michael Meissner via Gcc-patches
Eliminate power8 fusion options, use power8 tuning, PR target/102059

This is V3 of the patch.  Compared to V2 of the patch, it changed some of
the comments based on the feedback.  Since -mpower8-fusion-sign was an
undocumented option, I removed some of the wording about its removal.
I removed an older comment that was appropriate when they were written for
the power7 days, but it is not longer appropriate now.

The power8 fusion support used to be set automatically when -mcpu=power8 or
-mtune=power8 was used, and it was cleared for other cpu's.  However, if you
used the target attribute or target #pragma to change the default cpu type or
tuning, you would get an error that a target specifiction option mismatch
occurred.

This occurred because the rs6000_can_inline_p function just compares the ISA
bits between the called inline function and the caller.  If the ISA flags of
the called function is not a subset of the ISA flags of the caller, we won't do
the inlinging.  When a power9 or power10 function inlines a function that is
explicitly compiled for power8, the power8 function has the power8 fusion bits
set and the power9 or power10 functions do not have the fusion bits set.

This code removes the -mpower8-fusion option.  It also removes the
undocumented -mpower8-fusion-sign option.  It only enables power8 fusion
if we are tuning for a power8.  Power8 sign fusion is only enabled if we
are tuning for a power8 and we have -O3 optimization or higher.

I left the -mno-power8-fusion in rs6000.opt and made it so it doesn't
issue a warning.  If the user explicitly used -mpower8-fusion, then they
will get a warning that the swtich has been removed.

Similarly, I left in the pragma target and attribute target support for
power8-fusion, but using it doesn't do anything now.  This is because I
believe the customer who encountered this problem now is explicitly
setting the no-power8-fusion option in the pragma or attribute to avoid
the warning.

I have tested this patch on a little endian power10 system.  I have tested
previous versions on little endian power9 and big endian power8 systems.
Can I apply this patch to the master branch?

I will want to backport the patch to GCC 10 and GCC 11 in a few days.
Note this patch can't be used directly for those backports due to the
other changes for PR target/102059 that have gone into the master branch
but were not back ported.

2022-04-07   Michael Meissner  

gcc/
PR target/102059
* config/rs6000/rs6000-cpus.def (OTHER_FUSION_MASKS): Delete.
(ISA_3_0_MASKS_SERVER): Don't clear the fusion masks.
(POWERPC_MASKS): Remove OPTION_MASK_P8_FUSION.
* config/rs6000/rs6000.cc (rs6000_option_override_internal):
Delete code that set the power8 fusion options automatically.
(rs6000_opt_masks): Allow #pragma target and attribute target to set
power8-fusion, but these no longer represent an option that the
user can set.
(rs6000_print_options_internal): Skip printing nop options.
* config/rs6000/rs6000.h (TARGET_P8_FUSION): New macro.
(TARGET_P8_FUSION_SIGN): Likewise.
(MASK_P8_FUSION): Delete.
* config/rs6000/rs6000.opt (-mpower8-fusion): Recognize the option but
ignore the no form and warn that the option was removed for the regular
form.
(-mpower8-fusion-sign): Warn that the option has been removed.
* doc/invoke.texi (RS/6000 and PowerPC Options): Delete
-mpower8-fusion.

gcc/testsuite/
PR target/102059
* gcc.dg/lto/pr102059-1_0.c: Remove -mno-power8-fusion.
* gcc.dg/lto/pr102059-2_0.c: Likewise.
* gcc.target/powerpc/pr102059-3.c: Likewise.
* gcc.target/powerpc/pr102059-4.c: New test.

---
 gcc/config/rs6000/rs6000-cpus.def | 21 +++-
 gcc/config/rs6000/rs6000.cc   | 48 +--
 gcc/config/rs6000/rs6000.h| 14 +-
 gcc/config/rs6000/rs6000.opt  | 17 +--
 gcc/doc/invoke.texi   | 13 +
 gcc/testsuite/gcc.dg/lto/pr102059-1_0.c   |  2 +-
 gcc/testsuite/gcc.dg/lto/pr102059-2_0.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr102059-3.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr102059-4.c | 23 +
 9 files changed, 71 insertions(+), 71 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr102059-4.c

diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index 963947f6939..9db0c62a3ee 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -43,9 +43,6 @@
 | OPTION_MASK_ALTIVEC  \
 | OPTION_MASK_VSX)
 
-/* For now, don't provide an embedded version of ISA 2.07.  Do not set power8
-   fusion here, instead set it in rs6000.cc if we are tuning for a power8
-   system.  */
 #define ISA_2_7_MASKS_SERVER   (ISA_2_6_MASKS_SERVER

Ping^2 [PATCH, rs6000] Correct match pattern in pr56605.c

2022-04-07 Thread HAO CHEN GUI via Gcc-patches
Hi,
  Gentle ping this:
   https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590958.html
Thanks

On 15/3/2022 上午 10:06, HAO CHEN GUI wrote:
> Hi,
>   Gentle ping this:
>   https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590958.html
> Thanks
> 
> On 28/2/2022 上午 11:17, HAO CHEN GUI wrote:
>> Hi,
>>   This patch corrects the match pattern in pr56605.c. The former pattern
>> is wrong and test case fails with GCC11. It should match following insn on
>> each subtarget after mode promotion is disabled. The patch need to be
>> backported to GCC11.
>>
>> //gimple
>> _17 = (unsigned int) _20;
>>  prolog_loop_niters.4_23 = _17 & 3;
>>
>> //rtl
>> (insn 19 18 20 2 (parallel [
>> (set (reg:CC 208)
>> (compare:CC (and:SI (subreg:SI (reg:DI 207) 0)
>> (const_int 3 [0x3]))
>> (const_int 0 [0])))
>> (set (reg:SI 129 [ prolog_loop_niters.5 ])
>> (and:SI (subreg:SI (reg:DI 207) 0)
>> (const_int 3 [0x3])))
>> ]) 197 {*andsi3_imm_mask_dot2}
>>
>>
>>   Bootstrapped and tested on powerpc64-linux BE/LE and AIX with no 
>> regressions.
>> Is this okay for trunk and GCC11? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>> 2022-02-28 Haochen Gui 
>>
>> gcc/testsuite/
>>  PR target/102146
>>  * gcc.target/powerpc/pr56605.c: Correct match pattern in combine pass.
>>
>>
>> patch.diff
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr56605.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> index fdedbfc573d..231d808aa99 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr56605.c
>> @@ -11,5 +11,5 @@ void foo (short* __restrict sb, int* __restrict ia)
>>  ia[i] = (int) sb[i];
>>  }
>>
>> -/* { dg-final { scan-rtl-dump-times {\(compare:CC 
>> \((?:and|zero_extend):(?:DI) \((?:sub)?reg:[SD]I} 1 "combine" } } */
>> +/* { dg-final { scan-rtl-dump-times {\(compare:CC \(and:SI \(subreg:SI 
>> \(reg:DI} 1 "combine" } } */
>>


New German PO file for 'gcc' (version 12.1-b20220403)

2022-04-07 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'gcc' has been submitted
by the German team of translators.  The file is available at:

https://translationproject.org/latest/gcc/de.po

(This file, 'gcc-12.1-b20220403.de.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

https://translationproject.org/latest/gcc/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

https://translationproject.org/domain/gcc.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




Re: [PATCH] rs6000: Guard bifs {un, }pack_{longdouble, ibm128} under hard float [PR103623]

2022-04-07 Thread Kewen.Lin via Gcc-patches
Hi Segher & Jakub,

Thanks for the comments!

on 2022/4/7 9:00 PM, Jakub Jelinek wrote:
> On Thu, Apr 07, 2022 at 06:09:52AM -0500, Segher Boessenkool wrote:
>> Hi!
>>
>> On Thu, Mar 03, 2022 at 10:11:32AM +0800, Kewen.Lin wrote:
>>> As PR103623 shows, it's a regression failure due to new built-in
>>> function framework, previously we guard __builtin_{un,}pack_{longdouble,
>>> ibm128} built-in functions under hard float, so they are unavailable
>>> with the given configuration.  While with new bif infrastructure, it
>>> becomes available and gets ICE due to incomplete supports.
>>>
>>> Segher and Peter pointed out that we should make them available with
>>> soft float, I agree we can extend them to cover both soft and hard
>>> float.  But considering it's stage 4 now and this regression is
>>> classified as P1, also the previous behavior requiring hard float
>>> aligns with what document [1] says, I guess it may be a good idea to
>>> fix it with the attached small patch to be consistent with the previous
>>> behavior.  Then we can extend the functionality in upcoming stage 1.
>>
>> Or you could just not take away the existing functionality.
> 
> Have those builtins ever worked with 64-bit soft-float?
> When I try e.g. gcc 11 on the #c0 testcase from the PR, I get:
> ./cc1.r11-1 -quiet -nostdinc pr103623.c -m64 -mlong-double-128 -msoft-float
> pr103623.c: In function ‘main’:
> pr103623.c:2:16: error: ‘__builtin_unpack_longdouble’ requires the 
> ‘-mhard-float’ option
> 2 | #define UNPACK __builtin_unpack_longdouble
>   |^
> pr103623.c:11:15: note: in expansion of macro ‘UNPACK’
>11 |   double x0 = UNPACK (a, 0);
>   |   ^~
> 
> From what I can see, those builtins were using:
> /* 128-bit long double floating point builtins.  */
> #define BU_LDBL128_2(ENUM, NAME, ATTR, ICODE)   \
>   RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,  /* ENUM */  \
> "__builtin_" NAME,  /* NAME */  \
> (RS6000_BTM_HARD_FLOAT  /* MASK */  \
>  | RS6000_BTM_LDBL128), \
> (RS6000_BTC_ ## ATTR/* ATTR */  \
>  | RS6000_BTC_BINARY),  \
> CODE_FOR_ ## ICODE) /* ICODE */
>
> /* 128-bit __ibm128 floating point builtins (use -mfloat128 to indicate that
>__ibm128 is available).  */
> #define BU_IBM128_2(ENUM, NAME, ATTR, ICODE)\
>   RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,  /* ENUM */  \
> "__builtin_" NAME,  /* NAME */  \
> (RS6000_BTM_HARD_FLOAT  /* MASK */  \
>  | RS6000_BTM_FLOAT128),\
> (RS6000_BTC_ ## ATTR/* ATTR */  \
>  | RS6000_BTC_BINARY),  \
> CODE_FOR_ ## ICODE) /* ICODE */
> macros and rs6000_builtin_is_supported_p was checking whether
> all the bits in the mask are set:
>   HOST_WIDE_INT fnmask = rs6000_builtin_info[fncode].mask;
>   if ((fnmask & rs6000_builtin_mask) != fnmask)
> return false;
>   else
> return true;
> (so logical and of all of them).
> 

As Jakub noted here, we don't have the soft-float support for both m32 and m64
before, as the bifs are always guarded under hard-float previously.  I just did
a further check with a powerpc-e300c3-linux-gnu cross-build, the ICE exists for
both m32 and m64.

I guessed the discussions in the PR saying a little more difficulty on m32 than
m64 for extending with soft-float support caused the confusion?

>> What makes it ICE on (at least some configurations of) 32-bit now?  Can
>> you exclude just 32-bit soft float?

As clarified above, both 32-bit and 64-bit has the same root cause for the ICE,
the existing define_insn* supports for these bifs only consider hard-float, such
as for the given test case in the PR, it fails in reload as the recognized
unpacktf_nodm doesn't have any available alternatives at soft-float.  eg: we 
only
have register constraint "d" for
  (match_operand:FMOVE128 1 "register_operand" "d,d") 
but it's only available for hard-float.

BR,
Kewen


Re: [PATCH v2] RISCV: Add support for inlining subword atomics

2022-04-07 Thread Pan RZ

Hi Patrick,

Glad to know that efforts have been made to add inlining subword atomic supports into gcc. 
The patch looks great so far, yet as Andreas Schwab has pointed out (at 
riscv-collab/riscv-gcc#337), looks like it only contains atomic fetch stuff. Just wondering 
do you have further plans to implement support for atomic store / exchange as well? Also, 
as a reminder, note that after adding store / exchange supports, some related macros like 
ATOMIC_BOOL_LOCK_FREE and ATOMIC_CHAR_LOCK_FREE's values (defined in ) may 
need to be set to true. Currently in RISC-V gcc, they are all defined as false. This may 
also need to be done for std::atomic::is_always_lock_free and 
std::atomic_is_lock_free(bool_var).

See:https://en.cppreference.com/w/cpp/atomic/atomic_is_lock_free  and
https://github.com/riscv-collab/riscv-gcc/issues/337#issuecomment-1086664815  


What's your opinion on this?

Best regards, RZ Pan (XieJiSS)



Re: [PATCH] libgcc: IA64: don't compile glibc-based unwinder without libc headers

2022-04-07 Thread Richard Biener via Gcc-patches
On Thu, Apr 7, 2022 at 6:13 PM Sergei Trofimovich via Gcc-patches
 wrote:
>
> From: Sergei Trofimovich 
>
> In --without-headers mode gcc fails to bootstrap on libgcc as:
>
> /build/build/./gcc/xgcc -B/build/build/./gcc/ ... -Dinhibit_libc -c 
> fde-glibc.c
> ../../../gcc-12-20220403/libgcc/config/ia64/fde-glibc.c:33:10:
> fatal error: stdlib.h: No such file or directory
>
> Most other linux targets are able to build the --without-headers
> compiler without additional effort. This change adds IA64 to the fold.
>
> The change drops part of the code that relies on DYNAMIC glibc
> section traversal for backtraces.
>
> Tested bootstrap of ia64-unknown-linux-gnu with and without libc
> headers present.

OK.

Thanks,
Richard.

> libgcc/
> * config/ia64/fde-glibc.c: Make a no-op in inhibit_libc mode.
> ---
>  libgcc/config/ia64/fde-glibc.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/libgcc/config/ia64/fde-glibc.c b/libgcc/config/ia64/fde-glibc.c
> index 9caac2bca24..bd74847fa85 100644
> --- a/libgcc/config/ia64/fde-glibc.c
> +++ b/libgcc/config/ia64/fde-glibc.c
> @@ -22,20 +22,21 @@
> see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> .  */
>
>  /* Locate the FDE entry for a given address, using glibc ld.so routines
> to avoid register/deregister calls at DSO load/unload.  */
>
>  #ifndef _GNU_SOURCE
>  #define _GNU_SOURCE 1
>  #endif
>  #include "config.h"
> +#ifndef inhibit_libc
>  #include 
>  #include 
>  #include 
>  #include "unwind-ia64.h"
>
>  #if __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 2) \
>  || (__GLIBC__ == 2 && __GLIBC_MINOR__ == 2 && !defined(DT_CONFIG))
>  # error You need GLIBC 2.2.4 or later on IA-64 Linux
>  #endif
>
> @@ -152,10 +153,11 @@ _Unwind_FindTableEntry (void *pc, unw_word 
> *segment_base, unw_word *gp,
>data.pc = (Elf64_Addr) pc;
>data.segment_base = segment_base;
>data.gp = gp;
>data.ret = NULL;
>
>if (dl_iterate_phdr (_Unwind_IteratePhdrCallback, &data) < 0)
>  return NULL;
>
>return data.ret;
>  }
> +#endif
> --
> 2.35.1
>


Re: [PATCH] tree-optimization/105175 - avoid -Wvector-operation-performance

2022-04-07 Thread Richard Biener via Gcc-patches
On Thu, 7 Apr 2022, Martin Sebor wrote:

> On 4/7/22 00:59, Richard Biener wrote:
> > On Wed, 6 Apr 2022, Martin Sebor wrote:
> > 
> >> On 4/6/22 03:23, Richard Biener wrote:
> >>> This avoids -Wvector-operation-performance diagnostics for vectorizer
> >>> produced code.  It's unfortunate the warning_at code in
> >>> tree-vect-generic.cc needs adjustments but the diagnostic suppression
> >>> code doesn't magically suppress those otherwise.
> >>
> >> It seems like it should, as long as the statement location hasn't
> >> changed after the suppress_diagnostic call in tree-vect-stmts.cc.
> > 
> > The location doesn't change.
> > 
> >>>
> >>> Bootstrap / regtest running on x86_64-unknown-linux-gnu.
> >>>
> >>> Martin/David - did I miss something obvious when doing the
> >>> tree-vect-generic.cc adjustment?
> >>
> >> The only thing I can think of is that because it's not handled in
> >> diagnostic-spec.cc, -Wvector-operation-performance is lumped in with
> >> all other generic warnings that also aren't handled.  It means that
> >> they are all treated as a group.  Whether or not that's what we want
> >> for this specific warning might be something to consider.
> > 
> > So when I call suppress_warning (gimple *, ..) the suppress_warning_at
> > call constructs a nowarn_spec_t with NW_OTHER, it queries the
> > nowarn_map where it doesn't find anything yet, and goes on
> > with
> > 
> >nowarn_map->put (loc, optspec);
> >return true;
> > 
> > suppress_warning then (since supp is true anyway) goes on with
> > 
> >set_no_warning_bit (stmt, supp);
> > 
> > which is likely what my changes to tree-vect-generic.cc in the end
> > key on.  When I simply invoke
> > 
> > warning_at (loc, OPT_Wvector_operation_performance,
> > "...")
> > 
> > I see nowhere that nowarn_spec_t::nowarn_spec_t is invoked, nor
> > is warning_suppressed_at.  Maybe I'm missing that being done
> > but I think that's by design?  It at least was surprising to me.
> 
> It's been a while so I'm hazy on the details.  I'd initially hoped
> to have warning_at(loc, opt, ...) automatically disable warning opt
> at loc.  It turned out that there are calls to warning_at() with
> same loc and opt where we want to issue the warning (like for
> distinct arguments in the same function call).  But I don't recall
> trying to test warning_suppressed_at() first, before issuing
> a warning.  That would make sense to me.  The only lightly POC
> patch below doesn't seem to cause too much fallout:
> 
> diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
> index 73324a728fe..857d70e5d2e 100644
> --- a/gcc/diagnostic.cc
> +++ b/gcc/diagnostic.cc
> @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "selftest-diagnostic.h"
>  #include "opts.h"
>  #include "cpplib.h"
> +#include "tree.h"
> 
>  #ifdef HAVE_TERMIOS_H
>  # include 
> @@ -1337,6 +1338,10 @@ diagnostic_report_diagnostic (diagnostic_context 
> *context,
>if (!diagnostic_enabled (context, diagnostic))
>  return false;
> 
> +  if (!RESERVED_LOCATION_P (location)
> +  && warning_suppressed_at (location,
> (opt_code)diagnostic->option_index))
> +return false;
> +
>if (!report_warning_p && diagnostic->m_iinfo.m_allsyslocs)
>  /* Bail if the warning is not to be reported because all locations
> in the inlining stack (if there is one) are in system headers.  */

Yeah, we should experiment with something like this for GCC 13.

> > Of course since we lack a warning_at (gimple *, ..) overload
> > or alternatively extending rich-location to cover diagnostic
> > suppression contexts, doing this would only work for stmts with
> > a location that doesn't fall back to that of the current
> > declaration (for UNKNOWN_LOCATION loc).
> 
> David and I discussed adding warning_at(gimple*, ...) and
> warning_at(tree, ...) overloads but decided to go with a narrower
> API in the initial patch and to consider extending it  later.  It
> still seems like a useful feature.

Yes.  An alternative might be to, at least for UNKNOWN_LOCATION,
set some kind of "private UNKNOWN_LOCATION" we can annotate when
we want to disable (specific) diagnostics.  Not sure how that would
work (but then issueing diagnostics at UNKNOWN_LOCATION isn't very
nice anyway).

> > 
> > So my main question was if the diagnostic suppression is supposed
> > to be transparently handled by warning_at (...) or whether indeed
> > explicit guards need to be added to each diagnostic emission.
> > 
> > As I'm now doing
> > 
> > if (!warning_suppressed_p (gsi_stmt (*gsi),
> >   OPT_Wvector_operation_performance))
> > 
> > 
> > I get to get_nowarn_spec for the stmt which will return NULL
> > because the no-warning bit is set (but it's always set in the
> > warning suppression call when done on a stmt!)
> > 
> > When I'm doing
> > 
> >if (!warning_suppressed_at (loc,
> >   OPT_Wvector_operation_performance))
> >

Re: [PATCH 0/3] Fix PR debug/105089

2022-04-07 Thread Richard Biener via Gcc-patches
On Thu, Apr 7, 2022 at 9:42 PM Indu Bhagat via Gcc-patches
 wrote:
>
> ping

The series is OK

> On 3/30/22 4:31 PM, Indu Bhagat wrote:
> > Hello,
> >
> > This patch set fixes PR debug/105089.
> >
> > [PS: The first patch in the series "ctfc: get rid of the static variable in
> > ctf_list_add_ctf_vars" is unrelated to the PR and is combined here only for
> > ease of review.]
> >
> > As noted in the PR debug/105089, gcc is emitting two CTF variable records
> > where it sees an extern variable with declaration and definition in the same
> > compilation unit.
> >
> > The CTF format format does not distinguish between the non-defining decl vs.
> > the defining decl, so the correct behaviour wrt the compiler generating the
> > type for such extern variables is to simply emit the type of the defining
> > declaration.
> >
> > Testing Notes:
> > -- bootstrapped and reg tested on x86_64 and aarch64
> > -- built binutils package with -gctf (with CTF-capable linker) on x86_64, no
> > CTF errors reported.
> >
> > Thanks,
> >
> > Indu Bhagat (3):
> >ctfc: get rid of the static variable in ctf_list_add_ctf_vars ()
> >CTF for extern variable fix [PR105089]
> >Refactor and update CTF testcases [PR105089]
> >
> >   gcc/ctfc.cc   | 62 ++-
> >   gcc/ctfc.h|  8 ++-
> >   gcc/ctfout.cc | 28 ++---
> >   gcc/dwarf2ctf.cc  | 18 +-
> >   gcc/testsuite/gcc.dg/debug/ctf/ctf-array-2.c  | 22 +++
> >   gcc/testsuite/gcc.dg/debug/ctf/ctf-array-5.c  | 17 +
> >   .../gcc.dg/debug/ctf/ctf-variables-3.c| 22 +++
> >   7 files changed, 147 insertions(+), 30 deletions(-)
> >   create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-array-5.c
> >   create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-variables-3.c
> >
>


Re: [PATCH] c++: set loc on call even if result is discarded

2022-04-07 Thread Richard Biener via Gcc-patches
On Fri, Apr 8, 2022 at 12:49 AM Alexandre Oliva via Gcc-patches
 wrote:
>
> On Apr  6, 2022, Jason Merrill  wrote:
>
> > On 4/6/22 15:37, Alexandre Oliva wrote:
> > Need to adjust this subject line, as well.
>
> *nod*, thanks
>
> >> * tree.cc (protected_set_expr_location): Propagate locus to
> >> call wrapped in cast-to-void.
>
> > I'm reluctant to put this C++-specific change in a simple function
> > shared by all languages;
>
> Perhaps it benefits other languages as well?  The effect is presumably
> desirable on other languages too: setting a cast-to-void's location
> seems completely ineffective, as it's eventually thrown away, and
> perhaps propagating the location to any operand (rather than just calls)
> would carry out the intent of [protected_]set_expr_location more
> effectively.  It doesn't feel right to require every caller to worry
> about that.

Hmm, but then maybe it would be better the task of the code
actually throwing away the cast?  I agree the original
proposed location to fix this looks bad.

> > how about handling it in set_cleanup_locs instead?
>
> Like this?  That seems reasonable to me.  I'll give it a spin.
>
> diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
> index a7f6449dafd2e..43627ed30afcb 100644
> --- a/gcc/cp/semantics.cc
> +++ b/gcc/cp/semantics.cc
> @@ -609,7 +609,17 @@ set_cleanup_locs (tree stmts, location_t loc)
>  {
>if (TREE_CODE (stmts) == CLEANUP_STMT)
>  {
> -  protected_set_expr_location (CLEANUP_EXPR (stmts), loc);
> +  tree t = CLEANUP_EXPR (stmts);
> +  protected_set_expr_location (t, loc);
> +  /* Avoid locus differences for C++ cdtor calls depending on whether
> +cdtor_returns_this: a conversion to void is added to discard the 
> return
> +value, and this conversion ends up carrying the location, and when it
> +gets discarded, the location is lost.  So hold it in the call as
> +well.  */
> +  if (TREE_CODE (t) == NOP_EXPR
> + && TREE_TYPE (t) == void_type_node
> + && TREE_CODE (TREE_OPERAND (t, 0)) == CALL_EXPR)
> +   protected_set_expr_location (TREE_OPERAND (t, 0), loc);
>set_cleanup_locs (CLEANUP_BODY (stmts), loc);
>  }
>else if (TREE_CODE (stmts) == STATEMENT_LIST)
>
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> Disinformation flourishes because many people care deeply about injustice
> but very few check the facts.  Ask me about