[PATCH v1] RISC-V: Adjust FP rint round tests for RV32

2023-11-06 Thread pan2 . li
From: Pan Li 

The FP rint test cases for RV32 need some additional adjust
for types and data. This patch would like to fix this which
is missed in FP rint support PATCH for RV32 only by mistake.

Please note the math-llrintf-run-0.c will trigger one ICE in the
vsetvl pass in RV32 only.

./riscv32-unknown-elf-gcc -march=rv32gcv -mabi=ilp32d \
  -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math \
  gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-llrintf-run-0.c \
  -o test.elf -lm

Then there will have ICE similar as below, and will file bugzilla for it.

config/riscv/riscv-v.cc:4314
   65 | }
  | ^
0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**,
rtx_def*, bool)

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314
0x1fb1aa2 pre_vsetvl::remove_avl_operand()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342
0x1fb18c1 pre_vsetvl::cleaup()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308
0x1fb216d pass_vsetvl::lazy_vsetvl()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480
0x1fb2214 pass_vsetvl::execute(function*)

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/math-irint-run-0.c: Adjust
test cases.
* gcc.target/riscv/rvv/autovec/unop/math-llrintf-run-0.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/math-lrint-rv32-run-0.c: Ditto.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/unop/math-irint-run-0.c | 94 +-
 .../rvv/autovec/unop/math-llrintf-run-0.c | 98 ++-
 .../rvv/autovec/unop/math-lrint-rv32-run-0.c  | 88 -
 3 files changed, 141 insertions(+), 139 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-irint-run-0.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-irint-run-0.c
index 43bc0849695..aae1d95c2b6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-irint-run-0.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-irint-run-0.c
@@ -5,59 +5,59 @@
 
 #define ARRAY_SIZE 128
 
-float in[ARRAY_SIZE];
-long out[ARRAY_SIZE];
-long ref[ARRAY_SIZE];
+double in[ARRAY_SIZE];
+int out[ARRAY_SIZE];
+int ref[ARRAY_SIZE];
 
-TEST_UNARY_CALL_CVT (float, long, __builtin_lrintf)
-TEST_ASSERT (long)
+TEST_UNARY_CALL_CVT (double, int, __builtin_irint)
+TEST_ASSERT (int)
 
-TEST_INIT_CVT (float, 1.2, long, __builtin_lrintf (1.2), 1)
-TEST_INIT_CVT (float, -1.2, long, __builtin_lrintf (-1.2), 2)
-TEST_INIT_CVT (float, 0.5, long, __builtin_lrintf (0.5), 3)
-TEST_INIT_CVT (float, -0.5, long, __builtin_lrintf (-0.5), 4)
-TEST_INIT_CVT (float, 0.1, long, __builtin_lrintf (0.1), 5)
-TEST_INIT_CVT (float, -0.1, long, __builtin_lrintf (-0.1), 6)
-TEST_INIT_CVT (float, 3.0, long, __builtin_lrintf (3.0), 7)
-TEST_INIT_CVT (float, -3.0, long, __builtin_lrintf (-3.0), 8)
-TEST_INIT_CVT (float, 4503599627370495.5, long, __builtin_lrintf 
(4503599627370495.5), 9)
-TEST_INIT_CVT (float, 4503599627370497.0, long, __builtin_lrintf 
(4503599627370497.0), 10)
-TEST_INIT_CVT (float, -4503599627370495.5, long, __builtin_lrintf 
(-4503599627370495.5), 11)
-TEST_INIT_CVT (float, -4503599627370496.0, long, __builtin_lrintf 
(-4503599627370496.0), 12)
-TEST_INIT_CVT (float, 0.0, long, __builtin_lrintf (-0.0), 13)
-TEST_INIT_CVT (float, -0.0, long, __builtin_lrintf (-0.0), 14)
-TEST_INIT_CVT (float, 9223372036854774784.0, long, __builtin_lrintf 
(9223372036854774784.0), 15)
-TEST_INIT_CVT (float, 9223372036854775808.0, long, __builtin_lrintf 
(9223372036854775808.0), 16)
-TEST_INIT_CVT (float, -9223372036854775808.0, long, __builtin_lrintf 
(-9223372036854775808.0), 17)
-TEST_INIT_CVT (float, -9223372036854777856.0, long, __builtin_lrintf 
(-9223372036854777856.0), 18)
-TEST_INIT_CVT (float, __builtin_inf (), long, __builtin_lrintf (__builtin_inf 
()), 19)
-TEST_INIT_CVT (float, -__builtin_inf (), long, __builtin_lrintf 
(-__builtin_inf ()), 20)
-TEST_INIT_CVT (float, __builtin_nan (""), long, 0x7fff, 21)
+TEST_INIT_CVT (double, 1.2, int, __builtin_irint (1.2), 1)
+TEST_INIT_CVT (double, -1.2, int, __builtin_irint (-1.2), 2)
+TEST_INIT_CVT (double, 0.5, int, __builtin_irint (0.5), 3)
+TEST_INIT_CVT (double, -0.5, int, __builtin_irint (-0.5), 4)
+TEST_INIT_CVT (double, 0.1, int, __builtin_irint (0.1), 5)
+TEST_INIT_CVT (double, -0.1, int, __builtin_irint (-0.1), 6)
+TEST_INIT_CVT (double, 3.0, int, __builtin_irint (3.0), 7)
+TEST_INIT_CVT (double, -3.0, int, __builtin_irint (-3.0), 8)
+TEST_INIT_CVT (double, 4503599627370495.5, int, __builtin_irint 
(4503599627370495.5), 9)
+TEST_INIT_CVT (double, 4503599627370497.0, int, __builtin_irint 
(4503599627370497.0), 10)
+TEST_INIT_CVT (double, -4503599

Re: [PATCH v1] RISC-V: Adjust FP rint round tests for RV32

2023-11-06 Thread juzhe.zh...@rivai.ai
LGTM.



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-11-06 16:33
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Adjust FP rint round tests for RV32
From: Pan Li 
 
The FP rint test cases for RV32 need some additional adjust
for types and data. This patch would like to fix this which
is missed in FP rint support PATCH for RV32 only by mistake.
 
Please note the math-llrintf-run-0.c will trigger one ICE in the
vsetvl pass in RV32 only.
 
./riscv32-unknown-elf-gcc -march=rv32gcv -mabi=ilp32d \
  -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math \
  gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-llrintf-run-0.c \
  -o test.elf -lm
 
Then there will have ICE similar as below, and will file bugzilla for it.
 
config/riscv/riscv-v.cc:4314
   65 | }
  | ^
0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**,
rtx_def*, bool)

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314
0x1fb1aa2 pre_vsetvl::remove_avl_operand()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342
0x1fb18c1 pre_vsetvl::cleaup()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308
0x1fb216d pass_vsetvl::lazy_vsetvl()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480
0x1fb2214 pass_vsetvl::execute(function*)

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/unop/math-irint-run-0.c: Adjust
test cases.
* gcc.target/riscv/rvv/autovec/unop/math-llrintf-run-0.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/math-lrint-rv32-run-0.c: Ditto.
 
Signed-off-by: Pan Li 
---
.../riscv/rvv/autovec/unop/math-irint-run-0.c | 94 +-
.../rvv/autovec/unop/math-llrintf-run-0.c | 98 ++-
.../rvv/autovec/unop/math-lrint-rv32-run-0.c  | 88 -
3 files changed, 141 insertions(+), 139 deletions(-)
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-irint-run-0.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-irint-run-0.c
index 43bc0849695..aae1d95c2b6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-irint-run-0.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-irint-run-0.c
@@ -5,59 +5,59 @@
#define ARRAY_SIZE 128
-float in[ARRAY_SIZE];
-long out[ARRAY_SIZE];
-long ref[ARRAY_SIZE];
+double in[ARRAY_SIZE];
+int out[ARRAY_SIZE];
+int ref[ARRAY_SIZE];
-TEST_UNARY_CALL_CVT (float, long, __builtin_lrintf)
-TEST_ASSERT (long)
+TEST_UNARY_CALL_CVT (double, int, __builtin_irint)
+TEST_ASSERT (int)
-TEST_INIT_CVT (float, 1.2, long, __builtin_lrintf (1.2), 1)
-TEST_INIT_CVT (float, -1.2, long, __builtin_lrintf (-1.2), 2)
-TEST_INIT_CVT (float, 0.5, long, __builtin_lrintf (0.5), 3)
-TEST_INIT_CVT (float, -0.5, long, __builtin_lrintf (-0.5), 4)
-TEST_INIT_CVT (float, 0.1, long, __builtin_lrintf (0.1), 5)
-TEST_INIT_CVT (float, -0.1, long, __builtin_lrintf (-0.1), 6)
-TEST_INIT_CVT (float, 3.0, long, __builtin_lrintf (3.0), 7)
-TEST_INIT_CVT (float, -3.0, long, __builtin_lrintf (-3.0), 8)
-TEST_INIT_CVT (float, 4503599627370495.5, long, __builtin_lrintf 
(4503599627370495.5), 9)
-TEST_INIT_CVT (float, 4503599627370497.0, long, __builtin_lrintf 
(4503599627370497.0), 10)
-TEST_INIT_CVT (float, -4503599627370495.5, long, __builtin_lrintf 
(-4503599627370495.5), 11)
-TEST_INIT_CVT (float, -4503599627370496.0, long, __builtin_lrintf 
(-4503599627370496.0), 12)
-TEST_INIT_CVT (float, 0.0, long, __builtin_lrintf (-0.0), 13)
-TEST_INIT_CVT (float, -0.0, long, __builtin_lrintf (-0.0), 14)
-TEST_INIT_CVT (float, 9223372036854774784.0, long, __builtin_lrintf 
(9223372036854774784.0), 15)
-TEST_INIT_CVT (float, 9223372036854775808.0, long, __builtin_lrintf 
(9223372036854775808.0), 16)
-TEST_INIT_CVT (float, -9223372036854775808.0, long, __builtin_lrintf 
(-9223372036854775808.0), 17)
-TEST_INIT_CVT (float, -9223372036854777856.0, long, __builtin_lrintf 
(-9223372036854777856.0), 18)
-TEST_INIT_CVT (float, __builtin_inf (), long, __builtin_lrintf (__builtin_inf 
()), 19)
-TEST_INIT_CVT (float, -__builtin_inf (), long, __builtin_lrintf 
(-__builtin_inf ()), 20)
-TEST_INIT_CVT (float, __builtin_nan (""), long, 0x7fff, 21)
+TEST_INIT_CVT (double, 1.2, int, __builtin_irint (1.2), 1)
+TEST_INIT_CVT (double, -1.2, int, __builtin_irint (-1.2), 2)
+TEST_INIT_CVT (double, 0.5, int, __builtin_irint (0.5), 3)
+TEST_INIT_CVT (double, -0.5, int, __builtin_irint (-0.5), 4)
+TEST_INIT_CVT (double, 0.1, int, __builtin_irint (0.1), 5)
+TEST_INIT_CVT (double, -0.1, int, __builtin_irint (-0.1), 6)
+TEST_INIT_CVT (double, 3.0, int, __builtin_irint (3.0), 7)
+TEST_INIT_CVT (double, -3.0, int, __builtin_irint (-3.0), 8)
+TEST_INIT_CVT (double, 4503599627370495.5, int, _

Re: [PATCH] Fix PR ada/111909 On Darwin, determine filesystem case sensitivity at runtime

2023-11-06 Thread Arnaud Charlet
> > So without changing fundamentally the model, you can't decide dynamically 
> > for the whole
> > system. Making the choice based on the current directory is pretty random, 
> > since the current
> > directory isn't well defined at program's start up and could be pretty much 
> > any filesystem.
> 
> I’d imagine that projects spread over more than one 
> differently-case-sensitive filesystem would
> be rare. As to the current directory at compiler startup, with GPRbuild it’s 
> the object directory, so
> likely to be somewhere near the project’s source tree.

I am not talking about the current directory when the compiler runs, I am 
talking about the
current directory where the target program runs, which can be pretty much 
anywhere.

In other words, you are modifying a runtime file (adaint.c) which is used both 
by the host compiler
and by the target applications. My comment worries about the target 
applications while yours
applies to the host compiler only.

> > Note that the current setting on arm is actually for iOS, which we did 
> > support at AdaCore
> > at some point (and could revive in the future, who knows).
> 
> Wouldn’t it be more natural to go via LLVM? I understand from Iain that iOS 
> isn’t currently
> supported by GCC.

That's another option. We'd like to keep both options on the table, since both 
options have
pros and cons.

> > So it would be fine to refine the test to differentiate between macOS and 
> > embedded iOS and co,
> > that would be a better change here.
> 
> There didn’t seem to be a way to do that.

OK, I thought there would be some defines that we could use for that, too bad 
if there isn't
and indeed we might need to perform another runtime check then as suggested by 
Iain.

Arno


Re: [PATCH] openmp: Add support for the 'indirect' clause in C/C++

2023-11-06 Thread Tobias Burnus

On 03.11.23 20:53, Kwok Cheung Yeung wrote:

On 17/10/2023 2:12 pm, Tobias Burnus wrote:

C++11 (and C23) attribute do not seem to be properly handled:


(Side remark: Since Saturday, the [[omp::]] attributes syntax is now
also supported in C23.)

[Quoted email text by Kwok: Lots of lines removed that describe how
previously found issues were fixed.]


Okay for mainline, pending successful testing (still in progress)?


LGTM - thanks for the patch and the follow-up fixes.

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[PATCH] tree-optimization/112369 - strip_float_extensions and vectors

2023-11-06 Thread Richard Biener
The following fixes an error in strip_float_extensions when facing
vector conversions.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/112369
* tree.cc (strip_float_extensions): Use element_precision.

* gcc.dg/pr112369.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr112369.c | 23 +++
 gcc/tree.cc |  2 +-
 2 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr112369.c

diff --git a/gcc/testsuite/gcc.dg/pr112369.c b/gcc/testsuite/gcc.dg/pr112369.c
new file mode 100644
index 000..677e3543f54
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr112369.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+
+struct GdkRGBA2 {
+  double a[4];
+};
+struct GdkRGBA3 {
+  float a[4];
+};
+struct GdkRGBA3 f(struct GdkRGBA2 *color) {
+  struct GdkRGBA3 t1;
+  for(int i = 0; i < 4; i++)
+t1.a[i] = color->a[i];
+  struct GdkRGBA3 t2;
+  for(int i = 0; i < 4; i++)
+  {
+float tmp = t1.a[i];
+if (__builtin_isnan(tmp))
+t2.a[i] = tmp;
+  }
+  return t2;
+}
diff --git a/gcc/tree.cc b/gcc/tree.cc
index cfead156ddf..9c9b057cd88 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -12138,7 +12138,7 @@ strip_float_extensions (tree exp)
   if (DECIMAL_FLOAT_TYPE_P (expt) != DECIMAL_FLOAT_TYPE_P (subt))
 return exp;
 
-  if (TYPE_PRECISION (subt) > TYPE_PRECISION (expt))
+  if (element_precision (subt) > element_precision (expt))
 return exp;
 
   return strip_float_extensions (sub);
-- 
2.35.3


Re: [RFC] vect: disable multiple calls of poly simdclones

2023-11-06 Thread Andrew Stubbs




On 06/11/2023 07:52, Richard Biener wrote:

On Fri, 3 Nov 2023, Andre Vieira (lists) wrote:


Hi,

The current codegen code to support VF's that are multiples of a simdclone
simdlen rely on BIT_FIELD_REF to create multiple input vectors.  This does not
work for non-constant simdclones, so we should disable using such clones when
the VF is a multiple of the non-constant simdlen until we change the codegen
to support those.

Enabling SVE simdclone support will cause ICEs if the vectorizer decides to
use a SVE simdclone with a VF that is larger than the simdlen. I'll be away
for the next two weeks, so cant' really discuss this further.
I initially tried to solve the problem, but the way
vectorizable_simd_clone_call is structured doesn't make it easy to replace
BIT_FIELD_REF with the poly-suitable solution right now of using
unpack_{hi,lo}.


I think it should be straight-forward to use unpack_{even,odd} (it's
even/odd for VLA, right?  If lo/hi would be possible then doing
BIT_FIELD_REF would be, too?  Also you need to have multiple stages
of unpack/pack when the factor is more than 2).

There's plenty of time even during stage3 to address this.

At least your patch should have come with a testcase (or two).

Is there a bugreport tracking this issue?  It should affect GCN as well
I guess.


What does "non-constant simdclones" mean? I'm not sure if this is a 
thing that can happen on GCN, or not?


Andrew


[PATCH-3v2, rs6000] Enable 16-byte by pieces move [PR111449]

2023-11-06 Thread HAO CHEN GUI
Hi,
  The patch 2 enables 16-byte by pieces move on rs6000. This patch fixes
the regression cases caused by previous patch. For sra-17/18, the long
array with 4 elements can be loaded by one 16-byte by pieces move on 32-bit
platform. So the array is not be constructed in LC0 and SRA optimization
is unable to be taken. "no-vsx" option is added for 32-bit platform, as
it sets the MOVE_MAX_PIECES to 4-byte on 32-bit platform and the array
can't be loaded by one by pieces move.

  Another regression is on P8 LE. The 16-byte memory to memory is
implemented by two TImode load/store. The TImode load/store is finally
split to two DImode load/store on P8 LE as it doesn't have unaligned
vector load/store instructions. Actually, 16-byte memory to memory move
can be implement by two V2DI reversed load/store on P8 LE. The patch
creates a insn_and_split pattern for this optimization.

  Compared to previous version, it fixes the syntax errors in test cases.

  Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
regressions. Is this OK for trunk?

Thanks
Gui Haochen

ChangeLog
rs6000: Enable 16-byte by pieces move

This patch enables 16-byte by pieces move.  The 16-byte move is generated
with TImode and finally implemented by vector instructions.  There are
several regression cases after the enablement.  16-byte TImode memory to
memory move is originally implemented by two pairs of DImode load/store on
P8 LE as there is no unaligned vsx load/store on it.  The patch fixes
the problem by creating an insn_and_split pattern and converts it to one
pair of reversed load/store.  Two SRA cases lost the SRA optimization as
the array can be loaded by one 16-byte move so that not be initialized in
LC0 on 32-bit platform.  So fixes them by adding no-vsx option.

gcc/
PR target/111449
* config/rs6000/vsx.md (*vsx_le_mem_to_mem_mov_ti): New.

gcc/testsuite/
PR target/111449
* gcc.dg/tree-ssa/sra-17.c: Add no-vsx option for powerpc ilp32.
* gcc.dg/tree-ssa/sra-18.c: Likewise.
* gcc.target/powerpc/pr111449-1.c: New.


patch.diff
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f3b40229094..9f6bc49998a 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -414,6 +414,27 @@ (define_mode_attr VM3_char [(V2DI "d")

 ;; VSX moves

+;; TImode memory to memory move optimization on LE with p8vector
+(define_insn_and_split "*vsx_le_mem_to_mem_mov_ti"
+  [(set (match_operand:TI 0 "indexed_or_indirect_operand" "=Z")
+   (match_operand:TI 1 "indexed_or_indirect_operand" "Z"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
+   && !MEM_VOLATILE_P (operands[0])
+   && !MEM_VOLATILE_P (operands[1])
+   && !reload_completed"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (V2DImode);
+  rtx src =  adjust_address (operands[1], V2DImode, 0);
+  emit_insn (gen_vsx_ld_elemrev_v2di (tmp, src));
+  rtx dest = adjust_address (operands[0], V2DImode, 0);
+  emit_insn (gen_vsx_st_elemrev_v2di (dest, tmp));
+  DONE;
+}
+  [(set_attr "length" "16")])
+
 ;; The patterns for LE permuted loads and stores come before the general
 ;; VSX moves so they match first.
 (define_insn_and_split "*vsx_le_perm_load_"
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c 
b/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
index 221d96b6cd9..b0d4811e77b 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
@@ -1,6 +1,7 @@
 /* { dg-do run { target { aarch64*-*-* alpha*-*-* arm*-*-* hppa*-*-* 
powerpc*-*-* s390*-*-* } } } */
 /* { dg-options "-O2 -fdump-tree-esra --param 
sra-max-scalarization-size-Ospeed=32" } */
 /* { dg-additional-options "-mcpu=ev4" { target alpha*-*-* } } */
+/* { dg-additional-options "-mno-vsx" { target { powerpc*-*-* && ilp32 } } } */

 extern void abort (void);

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c 
b/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
index f5e6a21c2ae..2cdeae6e9e7 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
@@ -1,6 +1,7 @@
 /* { dg-do run { target { aarch64*-*-* alpha*-*-* arm*-*-* hppa*-*-* 
powerpc*-*-* s390*-*-* } } } */
 /* { dg-options "-O2 -fdump-tree-esra --param 
sra-max-scalarization-size-Ospeed=32" } */
 /* { dg-additional-options "-mcpu=ev4" { target alpha*-*-* } } */
+/* { dg-additional-options "-mno-vsx" { target { powerpc*-*-* && ilp32 } } } */

 extern void abort (void);
 struct foo { long x; };
diff --git a/gcc/testsuite/gcc.target/powerpc/pr111449-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr111449-2.c
new file mode 100644
index 000..7003bdc0208
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr111449-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { has_arch_pwr8 } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mvsx -O2" } */
+
+/* Ensure 16-byte by pieces move is enabled.  */
+
+void move1 (void *s1, void *s2)
+{
+  __builtin_memcpy (s1, s2, 16);
+}
+
+

Re: [RFC] vect: disable multiple calls of poly simdclones

2023-11-06 Thread Richard Biener
On Mon, 6 Nov 2023, Andrew Stubbs wrote:

> 
> 
> On 06/11/2023 07:52, Richard Biener wrote:
> > On Fri, 3 Nov 2023, Andre Vieira (lists) wrote:
> > 
> >> Hi,
> >>
> >> The current codegen code to support VF's that are multiples of a simdclone
> >> simdlen rely on BIT_FIELD_REF to create multiple input vectors.  This does
> >> not
> >> work for non-constant simdclones, so we should disable using such clones
> >> when
> >> the VF is a multiple of the non-constant simdlen until we change the
> >> codegen
> >> to support those.
> >>
> >> Enabling SVE simdclone support will cause ICEs if the vectorizer decides to
> >> use a SVE simdclone with a VF that is larger than the simdlen. I'll be away
> >> for the next two weeks, so cant' really discuss this further.
> >> I initially tried to solve the problem, but the way
> >> vectorizable_simd_clone_call is structured doesn't make it easy to replace
> >> BIT_FIELD_REF with the poly-suitable solution right now of using
> >> unpack_{hi,lo}.
> > 
> > I think it should be straight-forward to use unpack_{even,odd} (it's
> > even/odd for VLA, right?  If lo/hi would be possible then doing
> > BIT_FIELD_REF would be, too?  Also you need to have multiple stages
> > of unpack/pack when the factor is more than 2).
> > 
> > There's plenty of time even during stage3 to address this.
> > 
> > At least your patch should have come with a testcase (or two).
> > 
> > Is there a bugreport tracking this issue?  It should affect GCN as well
> > I guess.
> 
> What does "non-constant simdclones" mean? I'm not sure if this is a thing that
> can happen on GCN, or not?

simdclone with a variable (POLY_INT) vector size.

Richard.


Re: [PATCH] libstdc++: Improve static assert messages for monadic operations

2023-11-06 Thread Jonathan Wakely
On Thu, 2 Nov 2023 at 14:55, Jonathan Wakely  wrote:
>
> Any objections or suggestions for better wording?
>
> Tested x86_64-linux.

Pushed to trunk.


>
> -- >8 --
>
> The monadic operations for std::optional and std::expected make use of
> internal helper traits __is_optional nad __is_expected, which are not
> very user-friendly when shown in diagnostics. Add messages to the
> assertions explaining the problem more clearly.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/expected (expected::and_then, expected::or_else):
> Add string literals to static assertions.
> * include/std/optional (optional::and_then, optional::or_else):
> Likewise.
> ---
>  libstdc++-v3/include/std/expected | 64 +++
>  libstdc++-v3/include/std/optional | 24 +---
>  2 files changed, 66 insertions(+), 22 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/expected 
> b/libstdc++-v3/include/std/expected
> index a796f0b6f27..a176d4c3a78 100644
> --- a/libstdc++-v3/include/std/expected
> +++ b/libstdc++-v3/include/std/expected
> @@ -843,8 +843,12 @@ namespace __expected
> and_then(_Fn&& __f) &
> {
>   using _Up = __expected::__result<_Fn, _Tp&>;
> - static_assert(__expected::__is_expected<_Up>);
> - static_assert(is_same_v);
> + static_assert(__expected::__is_expected<_Up>,
> +   "the function passed to std::expected::and_then 
> "
> +   "must return a std::expected");
> + static_assert(is_same_v,
> +   "the function passed to std::expected::and_then 
> "
> +   "must return a std::expected with the same 
> error_type");
>
>   if (has_value())
> return std::__invoke(std::forward<_Fn>(__f), _M_val);
> @@ -857,8 +861,12 @@ namespace __expected
> and_then(_Fn&& __f) const &
> {
>   using _Up = __expected::__result<_Fn, const _Tp&>;
> - static_assert(__expected::__is_expected<_Up>);
> - static_assert(is_same_v);
> + static_assert(__expected::__is_expected<_Up>,
> +   "the function passed to std::expected::and_then 
> "
> +   "must return a std::expected");
> + static_assert(is_same_v,
> +   "the function passed to std::expected::and_then 
> "
> +   "must return a std::expected with the same 
> error_type");
>
>   if (has_value())
> return std::__invoke(std::forward<_Fn>(__f), _M_val);
> @@ -871,8 +879,12 @@ namespace __expected
> and_then(_Fn&& __f) &&
> {
>   using _Up = __expected::__result<_Fn, _Tp&&>;
> - static_assert(__expected::__is_expected<_Up>);
> - static_assert(is_same_v);
> + static_assert(__expected::__is_expected<_Up>,
> +   "the function passed to std::expected::and_then 
> "
> +   "must return a std::expected");
> + static_assert(is_same_v,
> +   "the function passed to std::expected::and_then 
> "
> +   "must return a std::expected with the same 
> error_type");
>
>   if (has_value())
> return std::__invoke(std::forward<_Fn>(__f), std::move(_M_val));
> @@ -886,8 +898,12 @@ namespace __expected
> and_then(_Fn&& __f) const &&
> {
>   using _Up = __expected::__result<_Fn, const _Tp&&>;
> - static_assert(__expected::__is_expected<_Up>);
> - static_assert(is_same_v);
> + static_assert(__expected::__is_expected<_Up>,
> +   "the function passed to std::expected::and_then 
> "
> +   "must return a std::expected");
> + static_assert(is_same_v,
> +   "the function passed to std::expected::and_then 
> "
> +   "must return a std::expected with the same 
> error_type");
>
>   if (has_value())
> return std::__invoke(std::forward<_Fn>(__f), std::move(_M_val));
> @@ -900,8 +916,12 @@ namespace __expected
> or_else(_Fn&& __f) &
> {
>   using _Gr = __expected::__result<_Fn, _Er&>;
> - static_assert(__expected::__is_expected<_Gr>);
> - static_assert(is_same_v);
> + static_assert(__expected::__is_expected<_Gr>,
> +   "the function passed to std::expected::or_else "
> +   "must return a std::expected");
> + static_assert(is_same_v,
> +   "the function passed to std::expected::or_else "
> +   "must return a std::expected with the same 
> value_type");
>
>   if (has_value())
> return _Gr(in_place, _M_val);
> @@ -914,8 +934,12 @@ namespace __expected
> or_else(_Fn&& __f) const &
> {
>   using _Gr = __expected::__result<_Fn, const _Er&>;
> - static_assert(__expected::__is_expected<_

[PATCH v3 1/2]middle-end: expand copysign handling from lockstep to nested iters

2023-11-06 Thread Tamar Christina
Hi All,

various optimizations in match.pd only happened on COPYSIGN in lock step
which means they exclude IFN_COPYSIGN.  COPYSIGN however is restricted to only
the C99 builtins and so doesn't work for vectors.

The patch expands these optimizations to work as nested iters.

This is needed for the second patch which will add the testcase.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/109154
* match.pd: expand existing copysign optimizations.

--- inline copy of patch -- 
diff --git a/gcc/match.pd b/gcc/match.pd
index 
7d651a6582d169793cca4f9a70e334dd80014d92..db95931df0672cf4ef08cca36085c3aa6831519e
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1074,37 +1074,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* cos(copysign(x, y)) -> cos(x).  Similarly for cosh.  */
 (for coss (COS COSH)
- copysigns (COPYSIGN)
- (simplify
-  (coss (copysigns @0 @1))
-   (coss @0)))
+ (for copysigns (COPYSIGN)
+  (simplify
+   (coss (copysigns @0 @1))
+(coss @0
 
 /* pow(copysign(x, y), z) -> pow(x, z) if z is an even integer.  */
 (for pows (POW)
- copysigns (COPYSIGN)
- (simplify
-  (pows (copysigns @0 @2) REAL_CST@1)
-  (with { HOST_WIDE_INT n; }
-   (if (real_isinteger (&TREE_REAL_CST (@1), &n) && (n & 1) == 0)
-(pows @0 @1)
+ (for copysigns (COPYSIGN)
+  (simplify
+   (pows (copysigns @0 @2) REAL_CST@1)
+   (with { HOST_WIDE_INT n; }
+(if (real_isinteger (&TREE_REAL_CST (@1), &n) && (n & 1) == 0)
+ (pows @0 @1))
 /* Likewise for powi.  */
 (for pows (POWI)
- copysigns (COPYSIGN)
- (simplify
-  (pows (copysigns @0 @2) INTEGER_CST@1)
-  (if ((wi::to_wide (@1) & 1) == 0)
-   (pows @0 @1
+ (for copysigns (COPYSIGN)
+  (simplify
+   (pows (copysigns @0 @2) INTEGER_CST@1)
+   (if ((wi::to_wide (@1) & 1) == 0)
+(pows @0 @1)
 
 (for hypots (HYPOT)
- copysigns (COPYSIGN)
- /* hypot(copysign(x, y), z) -> hypot(x, z).  */
- (simplify
-  (hypots (copysigns @0 @1) @2)
-  (hypots @0 @2))
- /* hypot(x, copysign(y, z)) -> hypot(x, y).  */
- (simplify
-  (hypots @0 (copysigns @1 @2))
-  (hypots @0 @1)))
+ (for copysigns (COPYSIGN)
+  /* hypot(copysign(x, y), z) -> hypot(x, z).  */
+  (simplify
+   (hypots (copysigns @0 @1) @2)
+   (hypots @0 @2))
+  /* hypot(x, copysign(y, z)) -> hypot(x, y).  */
+  (simplify
+   (hypots @0 (copysigns @1 @2))
+   (hypots @0 @1
 
 /* copysign(x, CST) -> [-]abs (x).  */
 (for copysigns (COPYSIGN_ALL)




-- 
diff --git a/gcc/match.pd b/gcc/match.pd
index 
7d651a6582d169793cca4f9a70e334dd80014d92..db95931df0672cf4ef08cca36085c3aa6831519e
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1074,37 +1074,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* cos(copysign(x, y)) -> cos(x).  Similarly for cosh.  */
 (for coss (COS COSH)
- copysigns (COPYSIGN)
- (simplify
-  (coss (copysigns @0 @1))
-   (coss @0)))
+ (for copysigns (COPYSIGN)
+  (simplify
+   (coss (copysigns @0 @1))
+(coss @0
 
 /* pow(copysign(x, y), z) -> pow(x, z) if z is an even integer.  */
 (for pows (POW)
- copysigns (COPYSIGN)
- (simplify
-  (pows (copysigns @0 @2) REAL_CST@1)
-  (with { HOST_WIDE_INT n; }
-   (if (real_isinteger (&TREE_REAL_CST (@1), &n) && (n & 1) == 0)
-(pows @0 @1)
+ (for copysigns (COPYSIGN)
+  (simplify
+   (pows (copysigns @0 @2) REAL_CST@1)
+   (with { HOST_WIDE_INT n; }
+(if (real_isinteger (&TREE_REAL_CST (@1), &n) && (n & 1) == 0)
+ (pows @0 @1))
 /* Likewise for powi.  */
 (for pows (POWI)
- copysigns (COPYSIGN)
- (simplify
-  (pows (copysigns @0 @2) INTEGER_CST@1)
-  (if ((wi::to_wide (@1) & 1) == 0)
-   (pows @0 @1
+ (for copysigns (COPYSIGN)
+  (simplify
+   (pows (copysigns @0 @2) INTEGER_CST@1)
+   (if ((wi::to_wide (@1) & 1) == 0)
+(pows @0 @1)
 
 (for hypots (HYPOT)
- copysigns (COPYSIGN)
- /* hypot(copysign(x, y), z) -> hypot(x, z).  */
- (simplify
-  (hypots (copysigns @0 @1) @2)
-  (hypots @0 @2))
- /* hypot(x, copysign(y, z)) -> hypot(x, y).  */
- (simplify
-  (hypots @0 (copysigns @1 @2))
-  (hypots @0 @1)))
+ (for copysigns (COPYSIGN)
+  /* hypot(copysign(x, y), z) -> hypot(x, z).  */
+  (simplify
+   (hypots (copysigns @0 @1) @2)
+   (hypots @0 @2))
+  /* hypot(x, copysign(y, z)) -> hypot(x, y).  */
+  (simplify
+   (hypots @0 (copysigns @1 @2))
+   (hypots @0 @1
 
 /* copysign(x, CST) -> [-]abs (x).  */
 (for copysigns (COPYSIGN_ALL)





[PATCH v3 2/2]middle-end match.pd: optimize fneg (fabs (x)) to copysign (x, -1) [PR109154]

2023-11-06 Thread Tamar Christina
Hi All,

This patch transforms fneg (fabs (x)) into copysign (x, -1) which is more
canonical and allows a target to expand this sequence efficiently.  Such
sequences are common in scientific code working with gradients.

There is an existing canonicalization of copysign (x, -1) to fneg (fabs (x))
which I remove since this is a less efficient form.  The testsuite is also
updated in light of this.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/109154
* match.pd: Add new neg+abs rule, remove inverse copysign rule.

gcc/testsuite/ChangeLog:

PR tree-optimization/109154
* gcc.dg/fold-copysign-1.c: Updated.
* gcc.dg/pr55152-2.c: Updated.
* gcc.dg/tree-ssa/abs-4.c: Updated.
* gcc.dg/tree-ssa/backprop-6.c: Updated.
* gcc.dg/tree-ssa/copy-sign-2.c: Updated.
* gcc.dg/tree-ssa/mult-abs-2.c: Updated.
* gcc.target/aarch64/fneg-abs_1.c: New test.
* gcc.target/aarch64/fneg-abs_2.c: New test.
* gcc.target/aarch64/fneg-abs_3.c: New test.
* gcc.target/aarch64/fneg-abs_4.c: New test.
* gcc.target/aarch64/sve/fneg-abs_1.c: New test.
* gcc.target/aarch64/sve/fneg-abs_2.c: New test.
* gcc.target/aarch64/sve/fneg-abs_3.c: New test.
* gcc.target/aarch64/sve/fneg-abs_4.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/match.pd b/gcc/match.pd
index 
db95931df0672cf4ef08cca36085c3aa6831519e..7a023d510c283c43a87b1795a74761b8af979b53
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1106,13 +1106,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(hypots @0 (copysigns @1 @2))
(hypots @0 @1
 
-/* copysign(x, CST) -> [-]abs (x).  */
-(for copysigns (COPYSIGN_ALL)
- (simplify
-  (copysigns @0 REAL_CST@1)
-  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
-   (negate (abs @0))
-   (abs @0
+/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
+
+(simplify
+ (negate (abs @0))
+ (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
 
 /* copysign(copysign(x, y), z) -> copysign(x, z).  */
 (for copysigns (COPYSIGN_ALL)
diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c 
b/gcc/testsuite/gcc.dg/fold-copysign-1.c
index 
f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6
 100644
--- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
+++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
@@ -12,5 +12,5 @@ double bar (double x)
   return __builtin_copysign (x, minuszero);
 }
 
-/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
index 
54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921457b02ff0b88cc63ce6
 100644
--- a/gcc/testsuite/gcc.dg/pr55152-2.c
+++ b/gcc/testsuite/gcc.dg/pr55152-2.c
@@ -10,4 +10,5 @@ int f(int a)
   return (a<-a)?a:-a;
 }
 
-/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
index 
6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b3a52d733368805ad31d
 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
@@ -9,5 +9,6 @@ long double abs_ld(long double x) { return __builtin_signbit(x) 
? x : -x; }
 
 /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP */
 /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
index 
31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91fa1343cb2718db7ae1
 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
@@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)
 TEST_FUNCTION (double, )
 TEST_FUNCTION (long double, l)
 
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 
"backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 
"backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 
"backprop

[PATCH] Avoid generating RTL code when d->testing_p.

2023-11-06 Thread liuhongt
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

PR target/112393
* config/i386/i386-expand.cc (ix86_expand_vec_perm_vpermt2):
Avoid generating RTL code when d->testing_p.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr112393.c: New test.
---
 gcc/config/i386/i386-expand.cc   |  3 +++
 gcc/testsuite/gcc.target/i386/pr112393.c | 20 
 2 files changed, 23 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr112393.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 6ae5830037d..8fad73c1549 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -5245,6 +5245,9 @@ ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx 
op0, rtx op1,
   if (gen == NULL)
 return false;
 
+  if (d && d->testing_p)
+return true;
+
   /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
  expander, so args are either in d, or in op0, op1 etc.  */
   if (d)
diff --git a/gcc/testsuite/gcc.target/i386/pr112393.c 
b/gcc/testsuite/gcc.target/i386/pr112393.c
new file mode 100644
index 000..c5c5b95b188
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112393.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -Wuninitialized" } */
+typedef _Float16 __attribute__((__vector_size__ (32))) V;
+
+V v;
+
+void
+foo (void)
+{
+  (void) __builtin_shufflevector (v, __builtin_shufflevector (v, (V){},
+  3, 0, 2, 2,
+ 5, 6, 3, 7, 5,
+ 6, 0, 8, 6, 4,
+ 3, 2, 8, 9, 5,
+ 8, 8, 7, 5, 4,
+ 8, 9, 1, 2, 4,
+ 9, 9, 7),
+  40, 33);
+}
+
-- 
2.31.1



Re: [PATCH] libstdc++/complex: Remove implicit type casts in complex

2023-11-06 Thread Jonathan Wakely
On Fri, 3 Nov 2023 at 17:47, Weslley da Silva Pereira
 wrote:
>
> Hi Jonathan,
>
> I am sorry for the delay. The mailing lists libstd...@gcc.gnu.org and 
> gcc-patches@gcc.gnu.org have just too many emails, so your email got lost. I 
> hope my changes still make sense to be included in GCC. Please, find my 
> comments below.

Hi,

Thanks for the updated patch, test etc. Yes, I think this still makes
sense and I'll take care of committing it.



>
> On Thu, May 11, 2023 at 3:57 PM Jonathan Wakely  wrote:
>>
>>
>>
>> On Mon, 27 Mar 2023 at 22:25, Weslley da Silva Pereira via Libstdc++ 
>>  wrote:
>>>
>>> Dear all,
>>>
>>> Here follows a patch that removes implicit type casts in std::complex.
>>>
>>> *Description:* The current implementation of `complex<_Tp>` assumes that
>>> `int, double, long double` are explicitly convertible to `_Tp`. Moreover,
>>> it also assumes that:
>>>
>>> 1. `int` is implicitly convertible to `_Tp`, e.g., when using
>>> `complex<_Tp>(1)`.
>>> 2. `long double` can be attributed to a `_Tp` variable, e.g., when using
>>> `const _Tp __pi_2 = 1.5707963267948966192313216916397514L`.
>>>
>>> This patch transforms the implicit casts (1) and (2) into explicit type
>>> casts. As a result, `std::complex` is now able to support more types. One
>>> example is the type `Eigen::Half` from
>>> https://eigen.tuxfamily.org/dox-devel/Half_8h_source.html which does not
>>> implement implicit type conversions.
>>>
>>> *ChangeLog:*
>>> libstdc++-v3/ChangeLog:
>>>
>>> * include/std/complex:
>>
>>
>> Thank you for the patch. Now that we're in developement stage 1 for GCC 14, 
>> it's time to consider it.
>>
>> You're missing a proper changelog entry, I suggest:
>>
>>* include/std/complex (polar, __complex_sqrt)
>>(__complex_pow_unsigned, pow, __complex_acos): Replace implicit
>>conversions from int and long double to value_type.
>
>
> I agree with your proposal for the changelog.
>
>>
>> You're also missing either a copyright assignment on file with the FSF 
>> (unless you've completed that paperwork?), or a DCO sign-off. Please see 
>> https://gcc.gnu.org/contribute.html#legal and https://gcc.gnu.org/dco.html 
>> for more details.
>
>
> Here is my DCO sign-off:
>
> Copyright:
> Signed-off-by: Weslley da Silva Pereira 
>
>>
>>
>>>
>>>
>>> *Patch:* fix_complex.diff. (Also at
>>> https://github.com/gcc-mirror/gcc/pull/84)
>>>
>>> *OBS:* I didn't find a good reason for adding new tests or test results
>>> here since this is really a small upgrade (in my view) to std::complex.
>>
>>
>> I don't agree. The purpose of this is to support std::complex for a 
>> type Foo without implicit conversions (which isn't required by the standard 
>> btw, only the floating-point types are required to work, but we can support 
>> others as an extension). Without tests, we don't know if that goal has been 
>> met, and we don't know if the goal continues to be met in future versions. A 
>> test would ensure that we don't accidentally re-introduce code requiring 
>> implicit conversions.
>>
>> With a suitable test, I think this patch will be OK for GCC 14.
>>
>> Thanks again for contributing.
>>
>>
>
> Tests:
> See the attached file `test_complex_eigenhalf.cpp`
>
> Test results:
> - When using x86-64 GCC (trunk), I obtained compilation errors as shown in 
> the attached text file. Live example at: https://godbolt.org/z/oa9M34h8P.
> - I observed no errors after applying the suggested patch on my machine.
> - I tried it with the flag `-Wall`. No warnings were shown.
> - My machine configuration and my GCC build information are displayed in the 
> file `config.log` generated by the configuration step of GCC.
>
> Let me know if I need to do anything else.
>
> Thanks,
>   Weslley
>
> --
> Weslley S. Pereira



[committed] libgfortran: Fix calloc call by swapping arg order [PR112364]

2023-11-06 Thread Tobias Burnus

See PR for a discussion whether this change is required for alignment (or 
other) reasons
(looks as if not) - or"just" to match the indented order (arg names + 
description) and
to silence a -Walloc-size warning.

Committed as r14-5148-g17df6ddcf11aef

(BTW: I don't think that it is worthwhile to backport it - the -Walloc-size 
warning is new
since r14-5059-gd880e093d92084.)

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit 17df6ddcf11aef6d200305d35641a7deb2f430e1
Author: Tobias Burnus 
Date:   Mon Nov 6 11:34:31 2023 +0100

libgfortran: Fix calloc call by swapping arg order [PR112364]

The prototype of calloc is
  void *calloc(size_t nmemb, size_t size);
denoting "an array of nmemb objects, each of whose size is size." (C23)

In order to follow the meaning of the argument names and to silence
a -Walloc-size warning, this commit swaps the order of the two args
to read now:  calloc (1, sizeof (transfer_queue));

libgfortran/ChangeLog:

PR libfortran/112364
* io/async.c (enqueue_transfer, enqueue_done_id, enqueue_done,
enqueue_close): Swap 1st and 2nd arg in calloc call.

diff --git a/libgfortran/io/async.c b/libgfortran/io/async.c
index 57097438e89..8fa1f0d4ce0 100644
--- a/libgfortran/io/async.c
+++ b/libgfortran/io/async.c
@@ -262,7 +262,7 @@ init_async_unit (gfc_unit *u)
 void
 enqueue_transfer (async_unit *au, transfer_args *arg, enum aio_do type)
 {
-  transfer_queue *tq = calloc (sizeof (transfer_queue), 1);
+  transfer_queue *tq = calloc (1, sizeof (transfer_queue));
   tq->arg = *arg;
   tq->type = type;
   tq->has_id = 0;
@@ -284,7 +284,7 @@ int
 enqueue_done_id (async_unit *au, enum aio_do type)
 {
   int ret;
-  transfer_queue *tq = calloc (sizeof (transfer_queue), 1);
+  transfer_queue *tq = calloc (1, sizeof (transfer_queue));
 
   tq->type = type;
   tq->has_id = 1;
@@ -308,7 +308,7 @@ enqueue_done_id (async_unit *au, enum aio_do type)
 void
 enqueue_done (async_unit *au, enum aio_do type)
 {
-  transfer_queue *tq = calloc (sizeof (transfer_queue), 1);
+  transfer_queue *tq = calloc (1, sizeof (transfer_queue));
   tq->type = type;
   tq->has_id = 0;
   LOCK (&au->lock);
@@ -328,7 +328,7 @@ enqueue_done (async_unit *au, enum aio_do type)
 void
 enqueue_close (async_unit *au)
 {
-  transfer_queue *tq = calloc (sizeof (transfer_queue), 1);
+  transfer_queue *tq = calloc (1, sizeof (transfer_queue));
 
   tq->type = AIO_CLOSE;
   LOCK (&au->lock);


Re: [PATCH] testsuite, Darwin: Add support for Mach-O function body scans.

2023-11-06 Thread Richard Sandiford
Iain Sandoe  writes:
> Hi Richard,
>
>> On 5 Nov 2023, at 12:11, Richard Sandiford  wrote:
>> 
>> Iain Sandoe  writes:
>
 On 26 Oct 2023, at 21:00, Iain Sandoe  wrote:
>>> 
> On 26 Oct 2023, at 20:49, Richard Sandiford 
>>> wrote:
> 
> Iain Sandoe  writes:
>> This was written before Thomas' modification to the ELF-handling to allow
>> a config-based change for target details.  I did consider updating this
>> to try and use that scheme, but I think that it would sit a little
>> awkwardly, since there are some differences in the start-up scanning for
>> Mach-O.  I would say that in all probability we could improve things but
>> I'd like to put this forward as a well-tested initial implementation.
> 
> Sorry, I would prefer to extend the existing function instead.
> E.g. there's already some divergence between the Mach-O version
> and the default version, in that the Mach-O version doesn't print
> verbose messages.  I also don't think that the current default code
> is so watertight that it'll never need to be updated in future.
 
 Fair enough, will explore what can be done (as I recall last I looked the
 primary difference was in the initial start-up scan).
>>> 
>>> I’ve done this as attached.
>>> 
>>> For the record, when doing it, it gave rise to the same misgivings that led
>>> to the separate implementation before.
>>> 
>>> * as we add formats and uncover asm oddities, they all need to be handled
>>>   in one set of code, IMO it could be come quite convoluted.
>>> 
>>> * now making a change to the MACH-O code, means I have to check I did not
>>>   inadvertently break ELF (and likewise, in theory, an ELF change should 
>>> check
>>>   MACH-O, but many folks do/can not do that).
>>> 
>>> Maybe there’s some half-way-house where code can usefully be shared without
>>> those down-sides.
>>> 
>>> Anyway, to make progress, is the revised version OK for trunk? (tested on
>>> aarch64-linux and aarch64-darwin).
>> 
>> Sorry for the slow reply.  I was hoping we'd be able to share a bit more
>> code than that, and avoid an isMACHO toggle.  Does something like the
>> attached adaption of your patch work?  Only spot-checked on
>> aarch64-linux-gnu so far.
>> 
>> (The patch tries to avoid capturing the user label prefix, hopefully
>> avoiding the needsULP thing.)
>
> Yes, this works for me too for Arm64 Darwin (and probably is fine for other
> Darwin archs in case we implement body tests there).  If we decide to emit
> some comment-based markers to delineat functions without unwind data,
> we can just amend the start and end.
>
> thanks,
> Iain
> (doing some wider testing, but for now the only mach-o cases are in the
>  arm64 code, so the fact that those passed so far is pretty good indication).

OK, great.  It passed testing for me too, so please go ahead and commit
if it does for you.

> -
>
> As an aside what’s the intention for cases like this?
>
>   .data
> foo:
>   . ….
>   .size foo, .-foo

ATM there's no way for the test to say that specific pseudo-ops are
interesting to it.  Same for labels.  It might be useful to add
support for that though.

Thanks,
Richard

>
>
>
>> 
>> Thanks,
>> Richard
>> 
>> 
>> diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
>> index 5df80325dff..2434550f0c3 100644
>> --- a/gcc/testsuite/lib/scanasm.exp
>> +++ b/gcc/testsuite/lib/scanasm.exp
>> @@ -785,23 +785,34 @@ proc configure_check-function-bodies { config } {
>> 
>> # Regexp for the start of a function definition (name in \1).
>> if { [istarget nvptx*-*-*] } {
>> -set up_config(start) {^// BEGIN(?: GLOBAL|) FUNCTION DEF: 
>> ([a-zA-Z_]\S+)$}
>> +set up_config(start) {
>> +{^// BEGIN(?: GLOBAL|) FUNCTION DEF: ([a-zA-Z_]\S+)$}
>> +}
>> +} elseif { [istarget *-*-darwin*] } {
>> +set up_config(start) {
>> +{^_([a-zA-Z_]\S+):$}
>> +{^LFB[0-9]+:}
>> +}
>> } else {
>> -set up_config(start) {^([a-zA-Z_]\S+):$}
>> +set up_config(start) {{^([a-zA-Z_]\S+):$}}
>> }
>> 
>> # Regexp for the end of a function definition.
>> if { [istarget nvptx*-*-*] } {
>>  set up_config(end) {^\}$}
>> +} elseif { [istarget *-*-darwin*] } {
>> +set up_config(end) {^LFE[0-9]+}
>> } else {
>>  set up_config(end) {^\s*\.size}
>> }
>> - 
>> +
>> # Regexp for lines that aren't interesting.
>> if { [istarget nvptx*-*-*] } {
>>  # Skip lines beginning with '//' comments ('-fverbose-asm', for
>>  # example).
>>  set up_config(fluff) {^\s*(?://)}
>> +} elseif { [istarget *-*-darwin*] } {
>> +set up_config(fluff) {^\s*(?:\.|//|@)|^L[0-9ACESV]}
>> } else {
>>  # Skip lines beginning with labels ('.L[...]:') or other directives
>>  # ('.align', '.cfi_startproc', '.quad [...]', '.text', etc.), '//' or
>> @@ -833,9 +844,19 @@ proc parse_function_bodies { config filename result } {
>> set fd [open $filename r

[PATCH] testsuite: skip gcc.target/i386/pr106910-1.c test when using newlib

2023-11-06 Thread Marc Poulhiès
Using newlib produces a different codegen because the support for c99
differs (see libc_has_function hook).

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr106910-1.c: Disable for newlib.
---
Tested on x86_64-linux and x86_64-elf.

OK for master?

 gcc/testsuite/gcc.target/i386/pr106910-1.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/pr106910-1.c 
b/gcc/testsuite/gcc.target/i386/pr106910-1.c
index c7685a32183..00c93f444b6 100644
--- a/gcc/testsuite/gcc.target/i386/pr106910-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr106910-1.c
@@ -1,4 +1,6 @@
+
 /* { dg-do compile { target { ! ia32 } } } */
+/* { dg-skip-if "newlib libc math causes different codegen" { newlib } } */
 /* { dg-options "-msse4.1 -O2 -Ofast" } */
 /* { dg-final { scan-assembler-times "roundps" 9 } } */
 /* { dg-final { scan-assembler-times "cvtps2dq" 1 } } */
-- 
2.42.0



[PATCH] testsuite: require avx_runtime for some tests

2023-11-06 Thread Marc Poulhiès
These 3 tests fails parsing the 'vect' dump when not using -mavx. Make
the dependency explicit.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/vect-ifcvt-18.c: Add dep on avx_runtime.
* gcc.dg/vect/vect-simd-clone-16f.c: Likewise.
* gcc.dg/vect/vect-simd-clone-18f.c: Likewise.
---
Tested on x86_64-linux and x86_64-elf.

Ok for master?

 gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c   | 3 ++-
 gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c | 4 ++--
 gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c 
b/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c
index c1d3c27d819..607194496e9 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c
@@ -1,6 +1,7 @@
 /* { dg-require-effective-target vect_condition } */
 /* { dg-require-effective-target vect_float } */
-/* { dg-additional-options "-Ofast -mavx" { target avx_runtime } } */
+/* { dg-require-effective-target avx_runtime } */
+/* { dg-additional-options "-Ofast -mavx" } */
 
 
 int A0[4] = {36,39,42,45};
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c 
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
index 7cd29e894d0..c6615dc626d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
@@ -1,6 +1,6 @@
 /* { dg-require-effective-target vect_simd_clones } */
-/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
-/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0 
-mavx" } */
+/* { dg-require-effective-target avx_runtime } */
 /* { dg-additional-options "-mno-avx512f" { target { { i?86*-*-* x86_64-*-* } 
&& { ! lp64 } } } } */
 
 #define TYPE __INT64_TYPE__
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c 
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
index 4dd51381d73..787b918d0c4 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
@@ -1,6 +1,6 @@
 /* { dg-require-effective-target vect_simd_clones } */
-/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
-/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0 
-mavx" } */
+/* { dg-require-effective-target  avx_runtime } */
 /* { dg-additional-options "-mno-avx512f" { target { { i?86*-*-* x86_64-*-* } 
&& { ! lp64 } } } } */
 
 #define TYPE __INT64_TYPE__
-- 
2.42.0



[PATCH] testsuite: refine gcc.dg/analyzer/fd-4.c test for newlib

2023-11-06 Thread Marc Poulhiès
Contrary to glibc, including stdio.h from newlib defines mode_t which
conflicts with the test's type definition.

.../gcc/testsuite/gcc.dg/analyzer/fd-4.c:19:3: error: redefinition of typedef 
'mode_t' with different type
...
.../include/sys/types.h:189:25: note: previous declaration of 'mode_t' with 
type 'mode_t' {aka 'unsigned int'}

Defining _MODE_T_DECLARED skips the type definition.

gcc/testsuite/ChangeLog:

* gcc.dg/analyzer/fd-4.c: Fix for newlib.
---
Tested on x86_64-linux and x86_64-elf.

Ok for master?

 gcc/testsuite/gcc.dg/analyzer/fd-4.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.dg/analyzer/fd-4.c 
b/gcc/testsuite/gcc.dg/analyzer/fd-4.c
index 994bad84342..e4a834ade30 100644
--- a/gcc/testsuite/gcc.dg/analyzer/fd-4.c
+++ b/gcc/testsuite/gcc.dg/analyzer/fd-4.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-D_MODE_T_DECLARED=1" { target newlib } } */
 #ifdef _AIX
 #define _MODE_T
 #endif
-- 
2.42.0



Re: [PATCH 5/5] x86: yet more PR target/100711-like splitting

2023-11-06 Thread Jan Beulich
On 25.06.2023 08:41, Hongtao Liu wrote:
> On Sun, Jun 25, 2023 at 2:35 PM Hongtao Liu  wrote:
>>
>> On Sun, Jun 25, 2023 at 2:25 PM Jan Beulich  wrote:
>>>
>>> On 25.06.2023 07:12, Hongtao Liu wrote:
 On Wed, Jun 21, 2023 at 2:29 PM Jan Beulich via Gcc-patches
  wrote:
>
> ---
> For the purpose here (and elsewhere) bcst_vector_operand() (really:
> bcst_mem_operand()) isn't permissive enough: We'd want it to allow
> 128-bit and 256-bit types as well irrespective of AVX512VL being
> enabled. This would likely require a new predicate
> (bcst_intvec_operand()?) and a new constraint (BR? Bi?). (Yet for name
> selection it will want considering that this is applicable to certain
> non-calculational FP operations as well.)
 I think so.
>>>
>>> Any preference towards predicate and constraint naming?
>> something like bcst_mem_operand_$suffiix, $suffix indicates the
>> pattern may use zmm instruction for 128/256-bit operand.
>> maybe just bcst_mem_operand_zmm?
> For constraint, maybe we can reuse Br, relax Br to match bcst_mem_operand_zmm.
> For those original patterns with bcst_mem_operand, it should be ok
> since it's already guarded by the predicate, the constraint must be
> valid.

Hmm, I wanted to get back to this, but then I started wondering about this
reply of yours vs your request to not go farther with the use of "oversized"
insns (i.e. acting in 512-bit registers in lieu of AVX512VL being enabled,
when no FP exceptions can be raised on the otherwise unused elements). Since
iirc the latter came later, am I right in assuming we then also shouldn't go
the route outlined above?

Jan


Re: [PING][PATCH 2/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops

2023-11-06 Thread Richard Sandiford
Stamatis Markianos-Wright  writes:
>> One of the main reasons for reading the arm bits was to try to answer
>> the question: if we switch to a downcounting loop with a GE condition,
>> how do we make sure that the start value is not a large unsigned
>> number that is interpreted as negative by GE?  E.g. if the loop
>> originally counted up in steps of N and used an LTU condition,
>> it could stop at a value in the range [INT_MAX + 1, UINT_MAX].
>> But the loop might never iterate if we start counting down from
>> most values in that range.
>>
>> Does the patch handle that?
>
> So AFAICT this is actually handled in the generic code in `doloop_valid_p`:
>
> This kind of loops fail because of they are "desc->infinite", then no 
> loop-doloop conversion is attempted at all (even for standard dls/le loops)
>
> Thanks to that check I haven't been able to trigger anything like the 
> behaviour you describe, do you think the doloop_valid_p checks are 
> robust enough?

The loops I was thinking of are provably not infinite though.  E.g.:

  for (unsigned int i = 0; i < UINT_MAX - 100; ++i)
...

is known to terminate.  And doloop conversion is safe with the normal
count-down-by-1 approach, so I don't think current code would need
to reject it.  I.e. a conversion to:

  unsigned int i = UINT_MAX - 101;
  do
...
  while (--i != ~0U);

would be safe, but a conversion to:

  int i = UINT_MAX - 101;
  do
...
  while ((i -= step, i > 0));

wouldn't, because the loop body would only be executed once.

I'm only going off the name "infinite" though :)  It's possible that
it has more connotations than that.

Thanks,
Richard


[PATCH] LoongArch: Remove redundant barrier instructions before LL-SC loops

2023-11-06 Thread Xi Ruoyao
This is isomorphic to the LLVM changes [1-2].

On LoongArch, the LL and SC instructions has memory barrier semantics:

- LL:  + 
- SC:  + 

But the compare and swap operation is allowed to fail, and if it fails
the SC instruction is not executed, thus the guarantee of acquiring
semantics cannot be ensured. Therefore, an acquire barrier needs to be
generated when failure_memorder includes an acquire operation.

On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an
acquire barrier; on CPUs implementing LoongArch v1.00, it is a full
barrier.  So it's always enough for acquire semantics.  OTOH if an
acquire semantic is not needed, we still needs the "dbar 0x700" as the
load-load barrier like all LL-SC loops.

[1]:https://github.com/llvm/llvm-project/pull/67391
[2]:https://github.com/llvm/llvm-project/pull/69339

gcc/ChangeLog:

* config/loongarch/loongarch.cc
(loongarch_memmodel_needs_release_fence): Remove.
(loongarch_cas_failure_memorder_needs_acquire): New static
function.
(loongarch_print_operand): Redefine 'G' for the barrier on CAS
failure.
* config/loongarch/sync.md (atomic_cas_value_strong):
Remove the redundant barrier before the LL instruction, and
emit an acquire barrier on failure if needed by
failure_memorder.
(atomic_cas_value_cmp_and_7_): Likewise.
(atomic_cas_value_add_7_): Remove the unnecessary barrier
before the LL instruction.
(atomic_cas_value_sub_7_): Likewise.
(atomic_cas_value_and_7_): Likewise.
(atomic_cas_value_xor_7_): Likewise.
(atomic_cas_value_or_7_): Likewise.
(atomic_cas_value_nand_7_): Likewise.
(atomic_cas_value_exchange_7_): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/cas-acquire.c: New test.
---

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk
and/or GCC 12/13 (for fixing the acquire semantics in failure_memorder)?

 gcc/config/loongarch/loongarch.cc | 27 +++---
 gcc/config/loongarch/sync.md  | 49 +--
 .../gcc.target/loongarch/cas-acquire.c| 84 +++
 3 files changed, 118 insertions(+), 42 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/cas-acquire.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 9b63f0dc322..d9b7a1076a2 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -5833,25 +5833,22 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel 
model)
 }
 }
 
-/* Return true if a FENCE should be emitted to before a memory access to
-   implement the release portion of memory model MODEL.  */
+/* Return true if a FENCE should be emitted after a failed CAS to
+   implement the acquire semantic of failure_memorder.  */
 
 static bool
-loongarch_memmodel_needs_release_fence (enum memmodel model)
+loongarch_cas_failure_memorder_needs_acquire (enum memmodel model)
 {
-  switch (model)
+  switch (memmodel_base (model))
 {
+case MEMMODEL_ACQUIRE:
 case MEMMODEL_ACQ_REL:
+case MEMMODEL_CONSUME:
 case MEMMODEL_SEQ_CST:
-case MEMMODEL_SYNC_SEQ_CST:
-case MEMMODEL_RELEASE:
-case MEMMODEL_SYNC_RELEASE:
   return true;
 
-case MEMMODEL_ACQUIRE:
-case MEMMODEL_CONSUME:
-case MEMMODEL_SYNC_ACQUIRE:
 case MEMMODEL_RELAXED:
+case MEMMODEL_RELEASE:
   return false;
 
 default:
@@ -5966,7 +5963,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool 
hi64_part,
'd' Print CONST_INT OP in decimal.
'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal.
'F' Print the FPU branch condition for comparison OP.
-   'G' Print a DBAR insn if the memory model requires a release.
+   'G' Print a DBAR insn for CAS failure (with an acquire semantic if
+   needed, otherwise a simple load-load barrier).
'H'  Print address 52-61bit relocation associated with OP.
'h'  Print the high-part relocation associated with OP.
'i' Print i if the operand is not a register.
@@ -6057,8 +6055,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
   break;
 
 case 'G':
-  if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
-   fputs ("dbar\t0", file);
+  if (loongarch_cas_failure_memorder_needs_acquire (
+   memmodel_from_int (INTVAL (op
+   fputs ("dbar\t0b10100", file);
+  else
+   fputs ("dbar\t0x700", file);
   break;
 
 case 'h':
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 9924d522bcd..db3a21690b8 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -129,19 +129,18 @@ (define_insn "atomic_cas_value_strong"
(clobber (match_scratch:GPR 6 "=&r"))]
   ""
 {
-  return "%G5\\n\\t"
-"1:\\n\\t"
+  return "1:\\n\\t"
 "ll.\\t%0,%1\\n\\t"
 "bne\\t%0,%z2,2f\\n\\t"
 "or%i3\\t

[PATCH 0/2] libgfortran: empty array fixes [PR112371]

2023-11-06 Thread Mikael Morin
Hello,

while preparing a testcase, I encountered a bug which I filed as
PR112371.  Investigating further, I found two different problems which I
propose to fix with the followup patches.

Those have been bootstraped and regression tested on x86_64-pc-linux-gnu.
OK for master?

Mikael


Mikael Morin (2):
  libgfortran: Remove early return if extent is zero [PR112371]
  libgfortran: Remove empty array descriptor first dimension overwrite
[PR112371]

 gcc/testsuite/gfortran.dg/bound_10.f90 | 207 +
 gcc/testsuite/gfortran.dg/bound_11.f90 | 588 +
 libgfortran/generated/all_l1.c |   6 +-
 libgfortran/generated/all_l16.c|   6 +-
 libgfortran/generated/all_l2.c |   6 +-
 libgfortran/generated/all_l4.c |   6 +-
 libgfortran/generated/all_l8.c |   6 +-
 libgfortran/generated/any_l1.c |   6 +-
 libgfortran/generated/any_l16.c|   6 +-
 libgfortran/generated/any_l2.c |   6 +-
 libgfortran/generated/any_l4.c |   6 +-
 libgfortran/generated/any_l8.c |   6 +-
 libgfortran/generated/count_16_l.c |   6 +-
 libgfortran/generated/count_1_l.c  |   6 +-
 libgfortran/generated/count_2_l.c  |   6 +-
 libgfortran/generated/count_4_l.c  |   6 +-
 libgfortran/generated/count_8_l.c  |   6 +-
 libgfortran/generated/findloc1_c10.c   |  18 +-
 libgfortran/generated/findloc1_c16.c   |  18 +-
 libgfortran/generated/findloc1_c17.c   |  18 +-
 libgfortran/generated/findloc1_c4.c|  18 +-
 libgfortran/generated/findloc1_c8.c|  18 +-
 libgfortran/generated/findloc1_i1.c|  18 +-
 libgfortran/generated/findloc1_i16.c   |  18 +-
 libgfortran/generated/findloc1_i2.c|  18 +-
 libgfortran/generated/findloc1_i4.c|  18 +-
 libgfortran/generated/findloc1_i8.c|  18 +-
 libgfortran/generated/findloc1_r10.c   |  18 +-
 libgfortran/generated/findloc1_r16.c   |  18 +-
 libgfortran/generated/findloc1_r17.c   |  18 +-
 libgfortran/generated/findloc1_r4.c|  18 +-
 libgfortran/generated/findloc1_r8.c|  18 +-
 libgfortran/generated/findloc1_s1.c|  18 +-
 libgfortran/generated/findloc1_s4.c|  18 +-
 libgfortran/generated/iall_i1.c|  21 +-
 libgfortran/generated/iall_i16.c   |  21 +-
 libgfortran/generated/iall_i2.c|  21 +-
 libgfortran/generated/iall_i4.c|  21 +-
 libgfortran/generated/iall_i8.c|  21 +-
 libgfortran/generated/iany_i1.c|  21 +-
 libgfortran/generated/iany_i16.c   |  21 +-
 libgfortran/generated/iany_i2.c|  21 +-
 libgfortran/generated/iany_i4.c|  21 +-
 libgfortran/generated/iany_i8.c|  21 +-
 libgfortran/generated/iparity_i1.c |  21 +-
 libgfortran/generated/iparity_i16.c|  21 +-
 libgfortran/generated/iparity_i2.c |  21 +-
 libgfortran/generated/iparity_i4.c |  21 +-
 libgfortran/generated/iparity_i8.c |  21 +-
 libgfortran/generated/maxloc1_16_i1.c  |  21 +-
 libgfortran/generated/maxloc1_16_i16.c |  21 +-
 libgfortran/generated/maxloc1_16_i2.c  |  21 +-
 libgfortran/generated/maxloc1_16_i4.c  |  21 +-
 libgfortran/generated/maxloc1_16_i8.c  |  21 +-
 libgfortran/generated/maxloc1_16_r10.c |  21 +-
 libgfortran/generated/maxloc1_16_r16.c |  21 +-
 libgfortran/generated/maxloc1_16_r17.c |  21 +-
 libgfortran/generated/maxloc1_16_r4.c  |  21 +-
 libgfortran/generated/maxloc1_16_r8.c  |  21 +-
 libgfortran/generated/maxloc1_16_s1.c  |  21 +-
 libgfortran/generated/maxloc1_16_s4.c  |  21 +-
 libgfortran/generated/maxloc1_4_i1.c   |  21 +-
 libgfortran/generated/maxloc1_4_i16.c  |  21 +-
 libgfortran/generated/maxloc1_4_i2.c   |  21 +-
 libgfortran/generated/maxloc1_4_i4.c   |  21 +-
 libgfortran/generated/maxloc1_4_i8.c   |  21 +-
 libgfortran/generated/maxloc1_4_r10.c  |  21 +-
 libgfortran/generated/maxloc1_4_r16.c  |  21 +-
 libgfortran/generated/maxloc1_4_r17.c  |  21 +-
 libgfortran/generated/maxloc1_4_r4.c   |  21 +-
 libgfortran/generated/maxloc1_4_r8.c   |  21 +-
 libgfortran/generated/maxloc1_4_s1.c   |  21 +-
 libgfortran/generated/maxloc1_4_s4.c   |  21 +-
 libgfortran/generated/maxloc1_8_i1.c   |  21 +-
 libgfortran/generated/maxloc1_8_i16.c  |  21 +-
 libgfortran/generated/maxloc1_8_i2.c   |  21 +-
 libgfortran/generated/maxloc1_8_i4.c   |  21 +-
 libgfortran/generated/maxloc1_8_i8.c   |  21 +-
 libgfortran/generated/maxloc1_8_r10.c  |  21 +-
 libgfortran/generated/maxloc1_8_r16.c  |  21 +-
 libgfortran/generated/maxloc1_8_r17.c  |  21 +-
 libgfortran/generated/maxloc1_8_r4.c   |  21 +-
 libgfortran/generated/maxloc1_8_r8.c   |  21 +-
 libgfortran/generated/maxloc1_8_s1.c   |  21 +-
 libgfortran/generated/maxloc1_8_s4.c   |  21 +-
 libgfortran/generated/maxval1_s1.c |  21 +-
 libgfortran/generated/maxval1_s4.c |  21 +-
 libgfortran/generated/maxval_i1.c  |  21 +-
 libgfortran/generated/maxval_i16.c |  21 +-
 libgfortran/generated/maxval_i2.c  |  21 +-
 libgfortran/generated/maxval_i4.c  |  21 +-
 libgfortran/generated/maxval_i8.c  |  21 +-
 lib

[PATCH 1/2] libgfortran: Remove early return if extent is zero [PR112371]

2023-11-06 Thread Mikael Morin
Remove the early return present in function templates for transformational
functions doing a (masked) reduction of an array along a dimension.
This early return, which triggered if the extent in the reduction dimension
was zero, was wrong because even if the reduction operation degenerates to
a constant value in that case, one has to loop anyway along the other
dimensions to initialize every element of the resulting array with that
constant value.

The offending piece of code was present in several places, and this removes
them all.  Namely, the impacted m4 files are ifunction.m4 for regular
functions and types, ifunction-s.m4 for character minloc and maxloc, and
ifunction-s2.m4 for character minval and maxval.

PR fortran/112371

libgfortran/ChangeLog:

* m4/ifunction.m4 (START_MASKED_ARRAY_FUNCTION): Remove early return if
extent is zero.
* m4/ifunction-s.m4 (START_MASKED_ARRAY_FUNCTION): Ditto.
* m4/ifunction-s2.m4 (START_MASKED_ARRAY_FUNCTION): Ditto.
* generated/iall_i1.c: Regenerate.
* generated/iall_i16.c: Regenerate.
* generated/iall_i2.c: Regenerate.
* generated/iall_i4.c: Regenerate.
* generated/iall_i8.c: Regenerate.
* generated/iany_i1.c: Regenerate.
* generated/iany_i16.c: Regenerate.
* generated/iany_i2.c: Regenerate.
* generated/iany_i4.c: Regenerate.
* generated/iany_i8.c: Regenerate.
* generated/iparity_i1.c: Regenerate.
* generated/iparity_i16.c: Regenerate.
* generated/iparity_i2.c: Regenerate.
* generated/iparity_i4.c: Regenerate.
* generated/iparity_i8.c: Regenerate.
* generated/maxloc1_16_i1.c: Regenerate.
* generated/maxloc1_16_i16.c: Regenerate.
* generated/maxloc1_16_i2.c: Regenerate.
* generated/maxloc1_16_i4.c: Regenerate.
* generated/maxloc1_16_i8.c: Regenerate.
* generated/maxloc1_16_r10.c: Regenerate.
* generated/maxloc1_16_r16.c: Regenerate.
* generated/maxloc1_16_r17.c: Regenerate.
* generated/maxloc1_16_r4.c: Regenerate.
* generated/maxloc1_16_r8.c: Regenerate.
* generated/maxloc1_16_s1.c: Regenerate.
* generated/maxloc1_16_s4.c: Regenerate.
* generated/maxloc1_4_i1.c: Regenerate.
* generated/maxloc1_4_i16.c: Regenerate.
* generated/maxloc1_4_i2.c: Regenerate.
* generated/maxloc1_4_i4.c: Regenerate.
* generated/maxloc1_4_i8.c: Regenerate.
* generated/maxloc1_4_r10.c: Regenerate.
* generated/maxloc1_4_r16.c: Regenerate.
* generated/maxloc1_4_r17.c: Regenerate.
* generated/maxloc1_4_r4.c: Regenerate.
* generated/maxloc1_4_r8.c: Regenerate.
* generated/maxloc1_4_s1.c: Regenerate.
* generated/maxloc1_4_s4.c: Regenerate.
* generated/maxloc1_8_i1.c: Regenerate.
* generated/maxloc1_8_i16.c: Regenerate.
* generated/maxloc1_8_i2.c: Regenerate.
* generated/maxloc1_8_i4.c: Regenerate.
* generated/maxloc1_8_i8.c: Regenerate.
* generated/maxloc1_8_r10.c: Regenerate.
* generated/maxloc1_8_r16.c: Regenerate.
* generated/maxloc1_8_r17.c: Regenerate.
* generated/maxloc1_8_r4.c: Regenerate.
* generated/maxloc1_8_r8.c: Regenerate.
* generated/maxloc1_8_s1.c: Regenerate.
* generated/maxloc1_8_s4.c: Regenerate.
* generated/maxval1_s1.c: Regenerate.
* generated/maxval1_s4.c: Regenerate.
* generated/maxval_i1.c: Regenerate.
* generated/maxval_i16.c: Regenerate.
* generated/maxval_i2.c: Regenerate.
* generated/maxval_i4.c: Regenerate.
* generated/maxval_i8.c: Regenerate.
* generated/maxval_r10.c: Regenerate.
* generated/maxval_r16.c: Regenerate.
* generated/maxval_r17.c: Regenerate.
* generated/maxval_r4.c: Regenerate.
* generated/maxval_r8.c: Regenerate.
* generated/minloc1_16_i1.c: Regenerate.
* generated/minloc1_16_i16.c: Regenerate.
* generated/minloc1_16_i2.c: Regenerate.
* generated/minloc1_16_i4.c: Regenerate.
* generated/minloc1_16_i8.c: Regenerate.
* generated/minloc1_16_r10.c: Regenerate.
* generated/minloc1_16_r16.c: Regenerate.
* generated/minloc1_16_r17.c: Regenerate.
* generated/minloc1_16_r4.c: Regenerate.
* generated/minloc1_16_r8.c: Regenerate.
* generated/minloc1_16_s1.c: Regenerate.
* generated/minloc1_16_s4.c: Regenerate.
* generated/minloc1_4_i1.c: Regenerate.
* generated/minloc1_4_i16.c: Regenerate.
* generated/minloc1_4_i2.c: Regenerate.
* generated/minloc1_4_i4.c: Regenerate.
* generated/minloc1_4_i8.c: Regenerate.
* generated/minloc1_4_r10.c: Regenerate.
* generated/minloc1_4_r16.c: Regenerate.
* generated/minloc1_4_r17.c: Regenerate.
* generated/minloc1_4_r4.c: Regenerate.

[PATCH] libstdc++/112351 - deal with __gthread_once failure during locale init

2023-11-06 Thread Richard Biener
The following makes the C++98 locale init path follow the way the
C++11 performs initialization.  This way we deal with pthread_once
failing, falling back to non-threadsafe initialization which, given we
initialize from the library, should be serialized by the dynamic
loader already.

Bootstrapped and tested on x86_64-unknown-linux-gnu, OK for trunk?
And GCC 13 branch?

Thanks,
Richard.

PR libstdc++/112351
libstdc++-v3/
* src/c++98/locale.cc (locale::facet::_S_get_c_locale):
Always perform non-threadsafe init when threadsafe init
failed.
---
 libstdc++-v3/src/c++98/locale.cc | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/libstdc++-v3/src/c++98/locale.cc b/libstdc++-v3/src/c++98/locale.cc
index d308140bab7..e9bec1db3b6 100644
--- a/libstdc++-v3/src/c++98/locale.cc
+++ b/libstdc++-v3/src/c++98/locale.cc
@@ -216,12 +216,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #ifdef __GTHREADS
 if (__gthread_active_p())
   __gthread_once(&_S_once, _S_initialize_once);
-else
 #endif
-  {
-   if (!_S_c_locale)
- _S_initialize_once();
-  }
+if (__builtin_expect (!_S_c_locale, 0))
+  _S_initialize_once();
 return _S_c_locale;
   }
 
-- 
2.35.3


Re: [PATCH v2] AArch64: Fix strict-align cpymem/setmem [PR103100]

2023-11-06 Thread Wilco Dijkstra

ping
 
v2: Use UINTVAL, rename max_mops_size.

The cpymemdi/setmemdi implementation doesn't fully support strict alignment.
Block the expansion if the alignment is less than 16 with STRICT_ALIGNMENT.
Clean up the condition when to use MOPS.
    
Passes regress/bootstrap, OK for commit?
    
gcc/ChangeLog/
    PR target/103100
    * config/aarch64/aarch64.md (cpymemdi): Remove pattern condition.
    (setmemdi): Likewise.
    * config/aarch64/aarch64.cc (aarch64_expand_cpymem): Support
    strict-align.  Cleanup condition for using MOPS.
    (aarch64_expand_setmem): Likewise.

---

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
dd6874d13a75f20d10a244578afc355b25c73da2..8a12894d6b80de1031d6e7d02dca680c57bce136
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -25261,27 +25261,23 @@ aarch64_expand_cpymem (rtx *operands)
   int mode_bits;
   rtx dst = operands[0];
   rtx src = operands[1];
+  unsigned align = UINTVAL (operands[3]);
   rtx base;
   machine_mode cur_mode = BLKmode;
+  bool size_p = optimize_function_for_size_p (cfun);
 
-  /* Variable-sized memcpy can go through the MOPS expansion if available.  */
-  if (!CONST_INT_P (operands[2]))
+  /* Variable-sized or strict-align copies may use the MOPS expansion.  */
+  if (!CONST_INT_P (operands[2]) || (STRICT_ALIGNMENT && align < 16))
 return aarch64_expand_cpymem_mops (operands);
 
-  unsigned HOST_WIDE_INT size = INTVAL (operands[2]);
-
-  /* Try to inline up to 256 bytes or use the MOPS threshold if available.  */
-  unsigned HOST_WIDE_INT max_copy_size
-    = TARGET_MOPS ? aarch64_mops_memcpy_size_threshold : 256;
+  unsigned HOST_WIDE_INT size = UINTVAL (operands[2]);
 
-  bool size_p = optimize_function_for_size_p (cfun);
+  /* Try to inline up to 256 bytes.  */
+  unsigned max_copy_size = 256;
+  unsigned mops_threshold = aarch64_mops_memcpy_size_threshold;
 
-  /* Large constant-sized cpymem should go through MOPS when possible.
- It should be a win even for size optimization in the general case.
- For speed optimization the choice between MOPS and the SIMD sequence
- depends on the size of the copy, rather than number of instructions,
- alignment etc.  */
-  if (size > max_copy_size)
+  /* Large copies use MOPS when available or a library call.  */
+  if (size > max_copy_size || (TARGET_MOPS && size > mops_threshold))
 return aarch64_expand_cpymem_mops (operands);
 
   int copy_bits = 256;
@@ -25445,12 +25441,13 @@ aarch64_expand_setmem (rtx *operands)
   unsigned HOST_WIDE_INT len;
   rtx dst = operands[0];
   rtx val = operands[2], src;
+  unsigned align = UINTVAL (operands[3]);
   rtx base;
   machine_mode cur_mode = BLKmode, next_mode;
 
-  /* If we don't have SIMD registers or the size is variable use the MOPS
- inlined sequence if possible.  */
-  if (!CONST_INT_P (operands[1]) || !TARGET_SIMD)
+  /* Variable-sized or strict-align memset may use the MOPS expansion.  */
+  if (!CONST_INT_P (operands[1]) || !TARGET_SIMD
+  || (STRICT_ALIGNMENT && align < 16))
 return aarch64_expand_setmem_mops (operands);
 
   bool size_p = optimize_function_for_size_p (cfun);
@@ -25458,10 +25455,13 @@ aarch64_expand_setmem (rtx *operands)
   /* Default the maximum to 256-bytes when considering only libcall vs
  SIMD broadcast sequence.  */
   unsigned max_set_size = 256;
+  unsigned mops_threshold = aarch64_mops_memset_size_threshold;
 
-  len = INTVAL (operands[1]);
-  if (len > max_set_size && !TARGET_MOPS)
-    return false;
+  len = UINTVAL (operands[1]);
+
+  /* Large memset uses MOPS when available or a library call.  */
+  if (len > max_set_size || (TARGET_MOPS && len > mops_threshold))
+    return aarch64_expand_setmem_mops (operands);
 
   int cst_val = !!(CONST_INT_P (val) && (INTVAL (val) != 0));
   /* The MOPS sequence takes:
@@ -25474,12 +25474,6 @@ aarch64_expand_setmem (rtx *operands)
  the arguments + 1 for the call.  */
   unsigned libcall_cost = 4;
 
-  /* Upper bound check.  For large constant-sized setmem use the MOPS sequence
- when available.  */
-  if (TARGET_MOPS
-  && len >= (unsigned HOST_WIDE_INT) aarch64_mops_memset_size_threshold)
-    return aarch64_expand_setmem_mops (operands);
-
   /* Attempt a sequence with a vector broadcast followed by stores.
  Count the number of operations involved to see if it's worth it
  against the alternatives.  A simple counter simd_ops on the
@@ -25521,10 +25515,8 @@ aarch64_expand_setmem (rtx *operands)
   simd_ops++;
   n -= mode_bits;
 
-  /* Do certain trailing copies as overlapping if it's going to be
-    cheaper.  i.e. less instructions to do so.  For instance doing a 15
-    byte copy it's more efficient to do two overlapping 8 byte copies than
-    8 + 4 + 2 + 1.  Only do this when -mstrict-align is not supplied.  */
+  /* Emit trailing writes using overlapping unaligned accesses
+   (when !STRICT_ALIGNME

Re: [PATCH v2] AArch64: Add inline memmove expansion

2023-11-06 Thread Wilco Dijkstra
ping
 
v2: further cleanups, improved comments

Add support for inline memmove expansions.  The generated code is identical
as for memcpy, except that all loads are emitted before stores rather than
being interleaved.  The maximum size is 256 bytes which requires at most 16
registers.

Passes regress/bootstrap, OK for commit?
    
gcc/ChangeLog/
    * config/aarch64/aarch64.opt (aarch64_mops_memmove_size_threshold):
    Change default.
    * config/aarch64/aarch64.md (cpymemdi): Add a parameter.
    (movmemdi): Call aarch64_expand_cpymem.
    * config/aarch64/aarch64.cc (aarch64_copy_one_block): Rename function,
    simplify, support storing generated loads/stores. 
    (aarch64_expand_cpymem): Support expansion of memmove.
    * config/aarch64/aarch64-protos.h (aarch64_expand_cpymem): Add bool arg.

gcc/testsuite/ChangeLog/
    * gcc.target/aarch64/memmove.c: Add new test.

---

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
60a55f4bc1956786ea687fc7cad7ec9e4a84e1f0..0d39622bd2826a3fde54d67b5c5da9ee9286cbbd
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -769,7 +769,7 @@ bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
 tree aarch64_vector_load_decl (tree);
 void aarch64_expand_call (rtx, rtx, rtx, bool);
 bool aarch64_expand_cpymem_mops (rtx *, bool);
-bool aarch64_expand_cpymem (rtx *);
+bool aarch64_expand_cpymem (rtx *, bool);
 bool aarch64_expand_setmem (rtx *);
 bool aarch64_float_const_zero_rtx_p (rtx);
 bool aarch64_float_const_rtx_p (rtx);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
2fa5d09de85d385c1165e399bcc97681ef170916..e19e2d1de2e5b30eca672df05d9dcc1bc106ecc8
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -25238,52 +25238,37 @@ aarch64_progress_pointer (rtx pointer)
   return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
 }
 
-/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
-   MODE bytes.  */
+/* Copy one block of size MODE from SRC to DST at offset OFFSET.  */
 
 static void
-aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
- machine_mode mode)
+aarch64_copy_one_block (rtx *load, rtx *store, rtx src, rtx dst,
+   int offset, machine_mode mode)
 {
-  /* Handle 256-bit memcpy separately.  We do this by making 2 adjacent memory
- address copies using V4SImode so that we can use Q registers.  */
-  if (known_eq (GET_MODE_BITSIZE (mode), 256))
+  /* Emit explict load/store pair instructions for 32-byte copies.  */
+  if (known_eq (GET_MODE_SIZE (mode), 32))
 {
   mode = V4SImode;
+  rtx src1 = adjust_address (src, mode, offset);
+  rtx src2 = adjust_address (src, mode, offset + 16);
+  rtx dst1 = adjust_address (dst, mode, offset);
+  rtx dst2 = adjust_address (dst, mode, offset + 16);
   rtx reg1 = gen_reg_rtx (mode);
   rtx reg2 = gen_reg_rtx (mode);
-  /* "Cast" the pointers to the correct mode.  */
-  *src = adjust_address (*src, mode, 0);
-  *dst = adjust_address (*dst, mode, 0);
-  /* Emit the memcpy.  */
-  emit_insn (aarch64_gen_load_pair (mode, reg1, *src, reg2,
-   aarch64_progress_pointer (*src)));
-  emit_insn (aarch64_gen_store_pair (mode, *dst, reg1,
-    aarch64_progress_pointer (*dst), 
reg2));
-  /* Move the pointers forward.  */
-  *src = aarch64_move_pointer (*src, 32);
-  *dst = aarch64_move_pointer (*dst, 32);
+  *load = aarch64_gen_load_pair (mode, reg1, src1, reg2, src2);
+  *store = aarch64_gen_store_pair (mode, dst1, reg1, dst2, reg2);
   return;
 }
 
   rtx reg = gen_reg_rtx (mode);
-
-  /* "Cast" the pointers to the correct mode.  */
-  *src = adjust_address (*src, mode, 0);
-  *dst = adjust_address (*dst, mode, 0);
-  /* Emit the memcpy.  */
-  emit_move_insn (reg, *src);
-  emit_move_insn (*dst, reg);
-  /* Move the pointers forward.  */
-  *src = aarch64_progress_pointer (*src);
-  *dst = aarch64_progress_pointer (*dst);
+  *load = gen_move_insn (reg, adjust_address (src, mode, offset));
+  *store = gen_move_insn (adjust_address (dst, mode, offset), reg);
 }
 
 /* Expand a cpymem/movmem using the MOPS extension.  OPERANDS are taken
    from the cpymem/movmem pattern.  IS_MEMMOVE is true if this is a memmove
    rather than memcpy.  Return true iff we succeeded.  */
 bool
-aarch64_expand_cpymem_mops (rtx *operands, bool is_memmove = false)
+aarch64_expand_cpymem_mops (rtx *operands, bool is_memmove)
 {
   if (!TARGET_MOPS)
 return false;
@@ -25302,51 +25287,48 @@ aarch64_expand_cpymem_mops (rtx *operands, bool 
is_memmove = false)
   return true;
 }
 
-/* Expand cpymem, as if from a __builtin_memcpy.  Return true if
-   we succeed, otherwise return false, indicating that a libca

Re: [PATCH] AArch64: Cleanup memset expansion

2023-11-06 Thread Wilco Dijkstra
ping
 
Cleanup memset implementation.  Similar to memcpy/memmove, use an offset and
bytes throughout.  Simplify the complex calculations when optimizing for size
by using a fixed limit.

Passes regress/bootstrap, OK for commit?
    
gcc/ChangeLog:
    * config/aarch64/aarch64.cc (aarch64_progress_pointer): Remove function.
    (aarch64_set_one_block_and_progress_pointer): Simplify and clean up.
    (aarch64_expand_setmem): Clean up implementation, use byte offsets,
    simplify size calculation.

---

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
e19e2d1de2e5b30eca672df05d9dcc1bc106ecc8..578a253d6e0e133e19592553fc873b3e73f9f218
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -25229,15 +25229,6 @@ aarch64_move_pointer (rtx pointer, poly_int64 amount)
 next, amount);
 }
 
-/* Return a new RTX holding the result of moving POINTER forward by the
-   size of the mode it points to.  */
-
-static rtx
-aarch64_progress_pointer (rtx pointer)
-{
-  return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
-}
-
 /* Copy one block of size MODE from SRC to DST at offset OFFSET.  */
 
 static void
@@ -25393,46 +25384,22 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove)
   return true;
 }
 
-/* Like aarch64_copy_one_block_and_progress_pointers, except for memset where
-   SRC is a register we have created with the duplicated value to be set.  */
+/* Set one block of size MODE at DST at offset OFFSET to value in SRC.  */
 static void
-aarch64_set_one_block_and_progress_pointer (rtx src, rtx *dst,
-   machine_mode mode)
-{
-  /* If we are copying 128bits or 256bits, we can do that straight from
- the SIMD register we prepared.  */
-  if (known_eq (GET_MODE_BITSIZE (mode), 256))
-    {
-  mode = GET_MODE (src);
-  /* "Cast" the *dst to the correct mode.  */
-  *dst = adjust_address (*dst, mode, 0);
-  /* Emit the memset.  */
-  emit_insn (aarch64_gen_store_pair (mode, *dst, src,
-    aarch64_progress_pointer (*dst), src));
-
-  /* Move the pointers forward.  */
-  *dst = aarch64_move_pointer (*dst, 32);
-  return;
-    }
-  if (known_eq (GET_MODE_BITSIZE (mode), 128))
+aarch64_set_one_block (rtx src, rtx dst, int offset, machine_mode mode)
+{
+  /* Emit explict store pair instructions for 32-byte writes.  */
+  if (known_eq (GET_MODE_SIZE (mode), 32))
 {
-  /* "Cast" the *dst to the correct mode.  */
-  *dst = adjust_address (*dst, GET_MODE (src), 0);
-  /* Emit the memset.  */
-  emit_move_insn (*dst, src);
-  /* Move the pointers forward.  */
-  *dst = aarch64_move_pointer (*dst, 16);
+  mode = V16QImode;
+  rtx dst1 = adjust_address (dst, mode, offset);
+  rtx dst2 = adjust_address (dst, mode, offset + 16);
+  emit_insn (aarch64_gen_store_pair (mode, dst1, src, dst2, src));
   return;
 }
-  /* For copying less, we have to extract the right amount from src.  */
-  rtx reg = lowpart_subreg (mode, src, GET_MODE (src));
-
-  /* "Cast" the *dst to the correct mode.  */
-  *dst = adjust_address (*dst, mode, 0);
-  /* Emit the memset.  */
-  emit_move_insn (*dst, reg);
-  /* Move the pointer forward.  */
-  *dst = aarch64_progress_pointer (*dst);
+  if (known_lt (GET_MODE_SIZE (mode), 16))
+    src = lowpart_subreg (mode, src, GET_MODE (src));
+  emit_move_insn (adjust_address (dst, mode, offset), src);
 }
 
 /* Expand a setmem using the MOPS instructions.  OPERANDS are the same
@@ -25461,7 +25428,7 @@ aarch64_expand_setmem_mops (rtx *operands)
 bool
 aarch64_expand_setmem (rtx *operands)
 {
-  int n, mode_bits;
+  int mode_bytes;
   unsigned HOST_WIDE_INT len;
   rtx dst = operands[0];
   rtx val = operands[2], src;
@@ -25474,104 +25441,70 @@ aarch64_expand_setmem (rtx *operands)
   || (STRICT_ALIGNMENT && align < 16))
 return aarch64_expand_setmem_mops (operands);
 
-  bool size_p = optimize_function_for_size_p (cfun);
-
   /* Default the maximum to 256-bytes when considering only libcall vs
  SIMD broadcast sequence.  */
   unsigned max_set_size = 256;
   unsigned mops_threshold = aarch64_mops_memset_size_threshold;
 
+  /* Reduce the maximum size with -Os.  */
+  if (optimize_function_for_size_p (cfun))
+    max_set_size = 96;
+
   len = UINTVAL (operands[1]);
 
   /* Large memset uses MOPS when available or a library call.  */
   if (len > max_set_size || (TARGET_MOPS && len > mops_threshold))
 return aarch64_expand_setmem_mops (operands);
 
-  int cst_val = !!(CONST_INT_P (val) && (INTVAL (val) != 0));
-  /* The MOPS sequence takes:
- 3 instructions for the memory storing
- + 1 to move the constant size into a reg
- + 1 if VAL is a non-zero constant to move into a reg
-    (zero constants can use XZR directly).  */
-  unsigned mops_cost = 3 + 1 + cst_val;
-  /* A libcall to memset in the 

Re: [PATCH] AArch64: Fix __sync_val_compare_and_swap [PR111404]

2023-11-06 Thread Wilco Dijkstra

 
ping
 

__sync_val_compare_and_swap may be used on 128-bit types and either calls the
outline atomic code or uses an inline loop.  On AArch64 LDXP is only atomic if
the value is stored successfully using STXP, but the current implementations
do not perform the store if the comparison fails.  In this case the value 
returned
is not read atomically.

Passes regress/bootstrap, OK for commit?

gcc/ChangeLog/
    PR target/111404
    * config/aarch64/aarch64.cc (aarch64_split_compare_and_swap):
    For 128-bit store the loaded value and loop if needed.

libgcc/ChangeLog/
    PR target/111404
    * config/aarch64/lse.S (__aarch64_cas16_acq_rel): Execute STLXP using
    either new value or loaded value.

---

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
5e8d0a0c91bc7719de2a8c5627b354cf905a4db0..c44c0b979d0cc3755c61dcf566cfddedccebf1ea
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23413,11 +23413,11 @@ aarch64_split_compare_and_swap (rtx operands[])
   mem = operands[1];
   oldval = operands[2];
   newval = operands[3];
-  is_weak = (operands[4] != const0_rtx);
   model_rtx = operands[5];
   scratch = operands[7];
   mode = GET_MODE (mem);
   model = memmodel_from_int (INTVAL (model_rtx));
+  is_weak = operands[4] != const0_rtx && mode != TImode;
 
   /* When OLDVAL is zero and we want the strong version we can emit a tighter
 loop:
@@ -23478,6 +23478,33 @@ aarch64_split_compare_and_swap (rtx operands[])
   else
 aarch64_gen_compare_reg (NE, scratch, const0_rtx);
 
+  /* 128-bit LDAXP is not atomic unless STLXP succeeds.  So for a mismatch,
+ store the returned value and loop if the STLXP fails.  */
+  if (mode == TImode)
+    {
+  rtx_code_label *label3 = gen_label_rtx ();
+  emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (Pmode, label3)));
+  emit_barrier ();
+
+  emit_label (label2);
+  aarch64_emit_store_exclusive (mode, scratch, mem, rval, model_rtx);
+
+  if (aarch64_track_speculation)
+   {
+ /* Emit an explicit compare instruction, so that we can correctly
+    track the condition codes.  */
+ rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
+ x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
+   }
+  else
+   x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+   gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
+  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+
+  label2 = label3;
+    }
+
   emit_label (label2);
 
   /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
index 
dde3a28e07b13669533dfc5e8fac0a9a6ac33dbd..ba05047ff02b6fc5752235bffa924fc4a2f48c04
 100644
--- a/libgcc/config/aarch64/lse.S
+++ b/libgcc/config/aarch64/lse.S
@@ -160,6 +160,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 #define tmp0    16
 #define tmp1    17
 #define tmp2    15
+#define tmp3   14
+#define tmp4   13
 
 #define BTI_C   hint    34
 
@@ -233,10 +235,11 @@ STARTFN   NAME(cas)
 0:  LDXP    x0, x1, [x4]
 cmp x0, x(tmp0)
 ccmp    x1, x(tmp1), #0, eq
-   bne 1f
-   STXP    w(tmp2), x2, x3, [x4]
-   cbnz    w(tmp2), 0b
-1: BARRIER
+   csel    x(tmp2), x2, x0, eq
+   csel    x(tmp3), x3, x1, eq
+   STXP    w(tmp4), x(tmp2), x(tmp3), [x4]
+   cbnz    w(tmp4), 0b
+   BARRIER
 ret
 
 #endif

Re: [PATCH] libatomic: Improve ifunc selection on AArch64

2023-11-06 Thread Wilco Dijkstra
 

ping


From: Wilco Dijkstra
Sent: 04 August 2023 16:05
To: GCC Patches ; Richard Sandiford 

Cc: Kyrylo Tkachov 
Subject: [PATCH] libatomic: Improve ifunc selection on AArch64 
 

Add support for ifunc selection based on CPUID register.  Neoverse N1 supports
atomic 128-bit load/store, so use the FEAT_USCAT ifunc like newer Neoverse
cores.

Passes regress, OK for commit?

libatomic/
    config/linux/aarch64/host-config.h (ifunc1): Use CPUID in ifunc
    selection.

---

diff --git a/libatomic/config/linux/aarch64/host-config.h 
b/libatomic/config/linux/aarch64/host-config.h
index 
851c78c01cd643318aaa52929ce4550266238b79..e5dc33c030a4bab927874fa6c69425db463fdc4b
 100644
--- a/libatomic/config/linux/aarch64/host-config.h
+++ b/libatomic/config/linux/aarch64/host-config.h
@@ -26,7 +26,7 @@
 
 #ifdef HWCAP_USCAT
 # if N == 16
-#  define IFUNC_COND_1 (hwcap & HWCAP_USCAT)
+#  define IFUNC_COND_1 ifunc1 (hwcap)
 # else
 #  define IFUNC_COND_1  (hwcap & HWCAP_ATOMICS)
 # endif
@@ -50,4 +50,28 @@
 #undef MAYBE_HAVE_ATOMIC_EXCHANGE_16
 #define MAYBE_HAVE_ATOMIC_EXCHANGE_16   1
 
+#ifdef HWCAP_USCAT
+
+#define MIDR_IMPLEMENTOR(midr) (((midr) >> 24) & 255)
+#define MIDR_PARTNUM(midr) (((midr) >> 4) & 0xfff)
+
+static inline bool
+ifunc1 (unsigned long hwcap)
+{
+  if (hwcap & HWCAP_USCAT)
+    return true;
+  if (!(hwcap & HWCAP_CPUID))
+    return false;
+
+  unsigned long midr;
+  asm volatile ("mrs %0, midr_el1" : "=r" (midr));
+
+  /* Neoverse N1 supports atomic 128-bit load/store.  */
+  if (MIDR_IMPLEMENTOR (midr) == 'A' && MIDR_PARTNUM(midr) == 0xd0c)
+    return true;
+
+  return false;
+}
+#endif
+
 #include_next 

Re: [PATCH] libatomic: Enable lock-free 128-bit atomics on AArch64 [PR110061]

2023-11-06 Thread Wilco Dijkstra


ping

From: Wilco Dijkstra
Sent: 02 June 2023 18:28
To: GCC Patches 
Cc: Richard Sandiford ; Kyrylo Tkachov 

Subject: [PATCH] libatomic: Enable lock-free 128-bit atomics on AArch64 
[PR110061] 
 

Enable lock-free 128-bit atomics on AArch64.  This is backwards compatible with
existing binaries, gives better performance than locking atomics and is what
most users expect.

Note 128-bit atomic loads use a load/store exclusive loop if LSE2 is not 
supported.
This results in an implicit store which is invisible to software as long as the 
given
address is writeable (which will be true when using atomics in actual code).

A simple test on an old Cortex-A72 showed 2.7x speedup of 128-bit atomics.

Passes regress, OK for commit?

libatomic/
    PR target/110061
    config/linux/aarch64/atomic_16.S: Implement lock-free ARMv8.0 atomics.
    config/linux/aarch64/host-config.h: Use atomic_16.S for baseline v8.0.
    State we have lock-free atomics.

---

diff --git a/libatomic/config/linux/aarch64/atomic_16.S 
b/libatomic/config/linux/aarch64/atomic_16.S
index 
05439ce394b9653c9bcb582761ff7aaa7c8f9643..0485c284117edf54f41959d2fab9341a9567b1cf
 100644
--- a/libatomic/config/linux/aarch64/atomic_16.S
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -22,6 +22,21 @@
    .  */
 
 
+/* AArch64 128-bit lock-free atomic implementation.
+
+   128-bit atomics are now lock-free for all AArch64 architecture versions.
+   This is backwards compatible with existing binaries and gives better
+   performance than locking atomics.
+
+   128-bit atomic loads use a exclusive loop if LSE2 is not supported.
+   This results in an implicit store which is invisible to software as long
+   as the given address is writeable.  Since all other atomics have explicit
+   writes, this will be true when using atomics in actual code.
+
+   The libat__16 entry points are ARMv8.0.
+   The libat__16_i1 entry points are used when LSE2 is available.  */
+
+
 .arch   armv8-a+lse
 
 #define ENTRY(name) \
@@ -37,6 +52,10 @@ name:    \
 .cfi_endproc;   \
 .size name, .-name;
 
+#define ALIAS(alias,name)  \
+   .global alias;  \
+   .set alias, name;
+
 #define res0 x0
 #define res1 x1
 #define in0  x2
@@ -70,6 +89,24 @@ name:    \
 #define SEQ_CST 5
 
 
+ENTRY (libat_load_16)
+   mov x5, x0
+   cbnz    w1, 2f
+
+   /* RELAXED.  */
+1: ldxp    res0, res1, [x5]
+   stxp    w4, res0, res1, [x5]
+   cbnz    w4, 1b
+   ret
+
+   /* ACQUIRE/CONSUME/SEQ_CST.  */
+2: ldaxp   res0, res1, [x5]
+   stxp    w4, res0, res1, [x5]
+   cbnz    w4, 2b
+   ret
+END (libat_load_16)
+
+
 ENTRY (libat_load_16_i1)
 cbnz    w1, 1f
 
@@ -93,6 +130,23 @@ ENTRY (libat_load_16_i1)
 END (libat_load_16_i1)
 
 
+ENTRY (libat_store_16)
+   cbnz    w4, 2f
+
+   /* RELAXED.  */
+1: ldxp    xzr, tmp0, [x0]
+   stxp    w4, in0, in1, [x0]
+   cbnz    w4, 1b
+   ret
+
+   /* RELEASE/SEQ_CST.  */
+2: ldxp    xzr, tmp0, [x0]
+   stlxp   w4, in0, in1, [x0]
+   cbnz    w4, 2b
+   ret
+END (libat_store_16)
+
+
 ENTRY (libat_store_16_i1)
 cbnz    w4, 1f
 
@@ -101,14 +155,14 @@ ENTRY (libat_store_16_i1)
 ret
 
 /* RELEASE/SEQ_CST.  */
-1: ldaxp   xzr, tmp0, [x0]
+1: ldxp    xzr, tmp0, [x0]
 stlxp   w4, in0, in1, [x0]
 cbnz    w4, 1b
 ret
 END (libat_store_16_i1)
 
 
-ENTRY (libat_exchange_16_i1)
+ENTRY (libat_exchange_16)
 mov x5, x0
 cbnz    w4, 2f
 
@@ -126,22 +180,55 @@ ENTRY (libat_exchange_16_i1)
 stxp    w4, in0, in1, [x5]
 cbnz    w4, 3b
 ret
-4:
-   cmp w4, RELEASE
-   b.ne    6f
 
-   /* RELEASE.  */
-5: ldxp    res0, res1, [x5]
+   /* RELEASE/ACQ_REL/SEQ_CST.  */
+4: ldaxp   res0, res1, [x5]
 stlxp   w4, in0, in1, [x5]
-   cbnz    w4, 5b
+   cbnz    w4, 4b
 ret
+END (libat_exchange_16)
 
-   /* ACQ_REL/SEQ_CST.  */
-6: ldaxp   res0, res1, [x5]
-   stlxp   w4, in0, in1, [x5]
-   cbnz    w4, 6b
+
+ENTRY (libat_compare_exchange_16)
+   ldp exp0, exp1, [x1]
+   cbz w4, 3f
+   cmp w4, RELEASE
+   b.hs    4f
+
+   /* ACQUIRE/CONSUME.  */
+1: ldaxp   tmp0, tmp1, [x0]
+   cmp tmp0, exp0
+   ccmp    tmp1, exp1, 0, eq
+   bne 2f
+   stxp    w4, in0, in1, [x0]
+   cbnz    w4, 1b
+   mov x0, 1
 ret
-END (libat_exchange_16_i1)
+
+2: stp tmp0, tmp1, [x1]
+   mov x0, 0
+   ret
+
+   /* RELAXED.  */
+3: ldxp    tmp0, tmp1, [x0]
+   cmp tmp0, exp0
+   ccmp    tmp1, exp1, 0, eq
+   bne 2b
+   stxp    w4, in0, in1, [x0]
+   cbnz    w4, 3b
+   mov x0, 1
+   ret
+
+   /* RELEASE/ACQ_REL/SEQ_CST.  */
+4: ldaxp   tmp0, tmp1, 

Re: [PATCH] libstdc++/112351 - deal with __gthread_once failure during locale init

2023-11-06 Thread Jonathan Wakely
On Mon, 6 Nov 2023 at 11:52, Richard Biener  wrote:
>
> The following makes the C++98 locale init path follow the way the
> C++11 performs initialization.  This way we deal with pthread_once
> failing, falling back to non-threadsafe initialization which, given we
> initialize from the library, should be serialized by the dynamic
> loader already.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, OK for trunk?
> And GCC 13 branch?
>
> Thanks,
> Richard.
>
> PR libstdc++/112351
> libstdc++-v3/
> * src/c++98/locale.cc (locale::facet::_S_get_c_locale):
> Always perform non-threadsafe init when threadsafe init
> failed.
> ---
>  libstdc++-v3/src/c++98/locale.cc | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/libstdc++-v3/src/c++98/locale.cc 
> b/libstdc++-v3/src/c++98/locale.cc
> index d308140bab7..e9bec1db3b6 100644
> --- a/libstdc++-v3/src/c++98/locale.cc
> +++ b/libstdc++-v3/src/c++98/locale.cc
> @@ -216,12 +216,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  #ifdef __GTHREADS
>  if (__gthread_active_p())
>__gthread_once(&_S_once, _S_initialize_once);
> -else
>  #endif
> -  {
> -   if (!_S_c_locale)
> - _S_initialize_once();
> -  }
> +if (__builtin_expect (!_S_c_locale, 0))
> +  _S_initialize_once();
>  return _S_c_locale;
>}


I think this has a problem, which is handled correctly in
src/c++11/locale_init.cc by checking _S_classic inside the
_S_initialize_once function.

If the first call to __gthread_once does fail then _S_once will not be
changed. We will fall through to calling _S_initialize_once directly
(which is not thread-safe) and set _S_c_locale.

The next time we call _S_initialize, __gthread_once will try to run
again, and because _S_once was not changed, it might call
_S_initialize_once() again, which writes to _S_c_locale again
(possibly causing a data race).

I don't think the slightly different code in src/c++11/locale_init.cc
is different in order to handle __gthread_once failing, I think it's
different because the effects of locale::facet::_S_initialize_once()
and locale::_S_initialize_once() are different. One is safe to call
more than once, and the other isn't.

I don't think we need to care about __gthread_once failing at all, do
we? There are no error conditions for pthread_once, it always returns
0 (previous POSIX revisions said it could return EINVAL for an
uninitialized pthread_once_t but that can't happen here as it's
correctly initialized in src/c++11/locale.cc). Is the concern that it
can fail for non-posix thread models? (I didn't check if any of them
can actually fail)



Re: [PATCH] libstdc++/112351 - deal with __gthread_once failure during locale init

2023-11-06 Thread Jakub Jelinek
On Mon, Nov 06, 2023 at 11:52:08AM +, Richard Biener wrote:
> The following makes the C++98 locale init path follow the way the
> C++11 performs initialization.  This way we deal with pthread_once
> failing, falling back to non-threadsafe initialization which, given we
> initialize from the library, should be serialized by the dynamic
> loader already.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu, OK for trunk?
> And GCC 13 branch?
> 
> Thanks,
> Richard.
> 
>   PR libstdc++/112351
> libstdc++-v3/
>   * src/c++98/locale.cc (locale::facet::_S_get_c_locale):
>   Always perform non-threadsafe init when threadsafe init
>   failed.
> ---
>  libstdc++-v3/src/c++98/locale.cc | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/libstdc++-v3/src/c++98/locale.cc 
> b/libstdc++-v3/src/c++98/locale.cc
> index d308140bab7..e9bec1db3b6 100644
> --- a/libstdc++-v3/src/c++98/locale.cc
> +++ b/libstdc++-v3/src/c++98/locale.cc
> @@ -216,12 +216,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  #ifdef __GTHREADS
>  if (__gthread_active_p())
>__gthread_once(&_S_once, _S_initialize_once);
> -else
>  #endif
> -  {
> - if (!_S_c_locale)
> -   _S_initialize_once();
> -  }
> +if (__builtin_expect (!_S_c_locale, 0))
> +  _S_initialize_once();
>  return _S_c_locale;

Wouldn't it be better to just test __gthread_once return value
#ifdef __THREADS
  if ((!__gthread_active_p()
   || __gthread_once(&_S_once, _S_initialize_once)))
#endif
if (!_S_c_locale)
  _S_initialize_once();
?

Jakub



Re: [PATCH] libstdc++/112351 - deal with __gthread_once failure during locale init

2023-11-06 Thread Jonathan Wakely
On Mon, 6 Nov 2023 at 12:16, Jakub Jelinek  wrote:
>
> On Mon, Nov 06, 2023 at 11:52:08AM +, Richard Biener wrote:
> > The following makes the C++98 locale init path follow the way the
> > C++11 performs initialization.  This way we deal with pthread_once
> > failing, falling back to non-threadsafe initialization which, given we
> > initialize from the library, should be serialized by the dynamic
> > loader already.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, OK for trunk?
> > And GCC 13 branch?
> >
> > Thanks,
> > Richard.
> >
> >   PR libstdc++/112351
> > libstdc++-v3/
> >   * src/c++98/locale.cc (locale::facet::_S_get_c_locale):
> >   Always perform non-threadsafe init when threadsafe init
> >   failed.
> > ---
> >  libstdc++-v3/src/c++98/locale.cc | 7 ++-
> >  1 file changed, 2 insertions(+), 5 deletions(-)
> >
> > diff --git a/libstdc++-v3/src/c++98/locale.cc 
> > b/libstdc++-v3/src/c++98/locale.cc
> > index d308140bab7..e9bec1db3b6 100644
> > --- a/libstdc++-v3/src/c++98/locale.cc
> > +++ b/libstdc++-v3/src/c++98/locale.cc
> > @@ -216,12 +216,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >  #ifdef __GTHREADS
> >  if (__gthread_active_p())
> >__gthread_once(&_S_once, _S_initialize_once);
> > -else
> >  #endif
> > -  {
> > - if (!_S_c_locale)
> > -   _S_initialize_once();
> > -  }
> > +if (__builtin_expect (!_S_c_locale, 0))
> > +  _S_initialize_once();
> >  return _S_c_locale;
>
> Wouldn't it be better to just test __gthread_once return value
> #ifdef __THREADS
>   if ((!__gthread_active_p()
>|| __gthread_once(&_S_once, _S_initialize_once)))
> #endif
> if (!_S_c_locale)
>   _S_initialize_once();
> ?


This still has the problem of calling the function twice, once because
__gthread_once fails and one because it succeeds.



[PATCH 1/3] attribs: Cache the gnu namespace

2023-11-06 Thread Richard Sandiford
Later patches add more calls to get_attribute_namespace.
For scoped attributes, this is a simple operation on tree pointers.
But for normal GNU attributes (the vast majority), it involves a
call to get_identifier ("gnu").  This patch caches the identifier
for speed.

Admittedly I'm just going off gut instinct here.  I'm happy to drop
the patch if this doesn't seem worth a new GC root.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?

Richard

gcc/
* Makefile.in (GTFILES): Add attribs.cc.
* attribs.cc (gnu_namespace_cache): New variable.
(get_gnu_namespace): New function.
(lookup_attribute_spec): Use it instead of get_identifier ("gnu").
(get_attribute_namespace, attribs_cc_tests): Likewise.
---
 gcc/Makefile.in |  3 ++-
 gcc/attribs.cc  | 19 +--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 29cec21c825..a128ff76c07 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2812,7 +2812,8 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h 
$(srcdir)/coretypes.h \
   $(srcdir)/symtab-thunks.h $(srcdir)/symtab-thunks.cc \
   $(srcdir)/symtab-clones.h \
   $(srcdir)/reload.h $(srcdir)/caller-save.cc $(srcdir)/symtab.cc \
-  $(srcdir)/alias.cc $(srcdir)/bitmap.cc $(srcdir)/cselib.cc 
$(srcdir)/cgraph.cc \
+  $(srcdir)/alias.cc $(srcdir)/attribs.cc \
+  $(srcdir)/bitmap.cc $(srcdir)/cselib.cc $(srcdir)/cgraph.cc \
   $(srcdir)/ipa-prop.cc $(srcdir)/ipa-cp.cc $(srcdir)/ipa-utils.h \
   $(srcdir)/ipa-param-manipulation.h $(srcdir)/ipa-sra.cc \
   $(srcdir)/ipa-modref.h $(srcdir)/ipa-modref.cc \
diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index 6725fe78f2c..6c70110e350 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -102,6 +102,19 @@ static const struct attribute_spec 
*lookup_scoped_attribute_spec (const_tree,
 
 static bool attributes_initialized = false;
 
+/* Do not use directly; go through get_gnu_namespace instead.  */
+static GTY(()) tree gnu_namespace_cache;
+
+/* Return the IDENTIFIER_NODE for the gnu namespace.  */
+
+static tree
+get_gnu_namespace ()
+{
+  if (!gnu_namespace_cache)
+gnu_namespace_cache = get_identifier ("gnu");
+  return gnu_namespace_cache;
+}
+
 /* Return base name of the attribute.  Ie '__attr__' is turned into 'attr'.
To avoid need for copying, we simply return length of the string.  */
 
@@ -403,7 +416,7 @@ lookup_attribute_spec (const_tree name)
   name = TREE_VALUE (name);
 }
   else
-ns = get_identifier ("gnu");
+ns = get_gnu_namespace ();
   return lookup_scoped_attribute_spec (ns, name);
 }
 
@@ -420,7 +433,7 @@ get_attribute_namespace (const_tree attr)
 {
   if (cxx11_attribute_p (attr))
 return TREE_PURPOSE (TREE_PURPOSE (attr));
-  return get_identifier ("gnu");
+  return get_gnu_namespace ();
 }
 
 /* Check LAST_DECL and NODE of the same symbol for attributes that are
@@ -2689,3 +2702,5 @@ attribs_cc_tests ()
 } /* namespace selftest */
 
 #endif /* CHECKING_P */
+
+#include "gt-attribs.h"
-- 
2.25.1



[PATCH 2/3] attribs: Consider namespaces when comparing attributes

2023-11-06 Thread Richard Sandiford
decl_attributes and comp_type_attributes both had code that
iterated over one list of attributes and looked for coresponding
attributes in another list.  This patch makes those lookups
namespace-aware.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?

Richard


gcc/
* attribs.cc (find_same_attribute): New function.
(decl_attributes, comp_type_attributes): Use it when looking
up one list's attributes in another list.
---
 gcc/attribs.cc | 29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index 6c70110e350..c23ed3bac91 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -583,6 +583,23 @@ attribute_ignored_p (const attribute_spec *const as)
   return as->max_length == -2;
 }
 
+/* See whether LIST contains at least one instance of attribute ATTR
+   (possibly with different arguments).  Return the first such attribute
+   if so, otherwise return null.  */
+
+static tree
+find_same_attribute (const_tree attr, tree list)
+{
+  if (list == NULL_TREE)
+return NULL_TREE;
+  tree ns = get_attribute_namespace (attr);
+  tree name = get_attribute_name (attr);
+  return private_lookup_attribute (ns ? IDENTIFIER_POINTER (ns) : nullptr,
+  IDENTIFIER_POINTER (name),
+  ns ? IDENTIFIER_LENGTH (ns) : 0,
+  IDENTIFIER_LENGTH (name), list);
+}
+
 /* Process the attributes listed in ATTRIBUTES and install them in *NODE,
which is either a DECL (including a TYPE_DECL) or a TYPE.  If a DECL,
it should be modified in place; if a TYPE, a copy should be created
@@ -912,9 +929,9 @@ decl_attributes (tree *node, tree attributes, int flags,
  else
old_attrs = TYPE_ATTRIBUTES (*anode);
 
- for (a = lookup_attribute (spec->name, old_attrs);
+ for (a = find_same_attribute (attr, old_attrs);
   a != NULL_TREE;
-  a = lookup_attribute (spec->name, TREE_CHAIN (a)))
+  a = find_same_attribute (attr, TREE_CHAIN (a)))
{
  if (simple_cst_equal (TREE_VALUE (a), args) == 1)
break;
@@ -945,8 +962,8 @@ decl_attributes (tree *node, tree attributes, int flags,
  if (TYPE_ATTRIBUTES (variant) == old_attrs)
TYPE_ATTRIBUTES (variant)
  = TYPE_ATTRIBUTES (*anode);
- else if (!lookup_attribute
-  (spec->name, TYPE_ATTRIBUTES (variant)))
+ else if (!find_same_attribute
+  (attr, TYPE_ATTRIBUTES (variant)))
TYPE_ATTRIBUTES (variant) = tree_cons
  (name, args, TYPE_ATTRIBUTES (variant));
}
@@ -1459,7 +1476,7 @@ comp_type_attributes (const_tree type1, const_tree type2)
   if (!as || as->affects_type_identity == false)
continue;
 
-  attr = lookup_attribute (as->name, CONST_CAST_TREE (a2));
+  attr = find_same_attribute (a, CONST_CAST_TREE (a2));
   if (!attr || !attribute_value_equal (a, attr))
break;
 }
@@ -1473,7 +1490,7 @@ comp_type_attributes (const_tree type1, const_tree type2)
  if (!as || as->affects_type_identity == false)
continue;
 
- if (!lookup_attribute (as->name, CONST_CAST_TREE (a1)))
+ if (!find_same_attribute (a, CONST_CAST_TREE (a1)))
break;
  /* We don't need to compare trees again, as we did this
 already in first loop.  */
-- 
2.25.1



[PATCH 3/3] attribs: Namespace-aware lookup_attribute_spec

2023-11-06 Thread Richard Sandiford
attribute_ignored_p already used a namespace-aware query
to find the attribute_spec for an existing attribute:

  const attribute_spec *as = lookup_attribute_spec (TREE_PURPOSE (attr));

This patch does the same for other callers in the file.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?

Richard


gcc/
* attribs.cc (comp_type_attributes): Pass the full TREE_PURPOSE
to lookup_attribute_spec, rather than just the name.
(remove_attributes_matching): Likewise.
---
 gcc/attribs.cc | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index c23ed3bac91..a2935b8101f 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -1472,7 +1472,7 @@ comp_type_attributes (const_tree type1, const_tree type2)
   const struct attribute_spec *as;
   const_tree attr;
 
-  as = lookup_attribute_spec (get_attribute_name (a));
+  as = lookup_attribute_spec (TREE_PURPOSE (a));
   if (!as || as->affects_type_identity == false)
continue;
 
@@ -1486,7 +1486,7 @@ comp_type_attributes (const_tree type1, const_tree type2)
{
  const struct attribute_spec *as;
 
- as = lookup_attribute_spec (get_attribute_name (a));
+ as = lookup_attribute_spec (TREE_PURPOSE (a));
  if (!as || as->affects_type_identity == false)
continue;
 
@@ -1528,8 +1528,7 @@ remove_attributes_matching (tree attrs, Predicate 
predicate)
   const_tree start = attrs;
   for (const_tree attr = attrs; attr; attr = TREE_CHAIN (attr))
 {
-  tree name = get_attribute_name (attr);
-  const attribute_spec *as = lookup_attribute_spec (name);
+  const attribute_spec *as = lookup_attribute_spec (TREE_PURPOSE (attr));
   const_tree end;
   if (!predicate (attr, as))
end = attr;
-- 
2.25.1



[PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system

2023-11-06 Thread Juzhe-Zhong
An ICE was discovered in recent rounding autovec support:

config/riscv/riscv-v.cc:4314
   65 | }
  | ^
0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**,
rtx_def*, bool)

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314
0x1fb1aa2 pre_vsetvl::remove_avl_operand()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342
0x1fb18c1 pre_vsetvl::cleaup()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308
0x1fb216d pass_vsetvl::lazy_vsetvl()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480
0x1fb2214 pass_vsetvl::execute(function*)

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504

The root cause is that the RA reload into (set (reg) vec_duplicate:DI). 
However, it is not valid in RV32 system
since we don't have a single broadcast instruction DI scalar in RV32 system.
We should expand it early for RV32 system.

gcc/ChangeLog:

* config/riscv/predicates.md: Refine predicate.
* config/riscv/riscv-protos.h (can_be_broadcasted_p): New function.
* config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto.
* config/riscv/vector.md (vec_duplicate): New pattern.
(*vec_duplicate): Adapt pattern.

---
 gcc/config/riscv/predicates.md  |  9 +
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv-v.cc | 20 
 gcc/config/riscv/vector.md  | 20 +++-
 4 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index db18054607f..df1c66f3a76 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -553,14 +553,7 @@
 
 ;; The scalar operand can be directly broadcast by RVV instructions.
 (define_predicate "direct_broadcast_operand"
-  (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op))
-   && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op)
-   || rtx_equal_p (op, CONST0_RTX (GET_MODE (op
-   && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), GET_MODE_BITSIZE 
(Pmode)))")
-(ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))")
- (ior (match_code "const_int,const_poly_int")
-  (ior (match_operand 0 "register_operand")
-   (match_test "satisfies_constraint_Wdm (op)"))
+  (match_test "riscv_vector::can_be_broadcasted_p (op)"))
 
 ;; A CONST_INT operand that has exactly two bits cleared.
 (define_predicate "const_nottwobits_operand"
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 6cbf2130f88..acae00f653f 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -595,6 +595,7 @@ uint8_t get_sew (rtx_insn *);
 enum vlmul_type get_vlmul (rtx_insn *);
 int count_regno_occurrences (rtx_insn *, unsigned int);
 bool imm_avl_p (machine_mode);
+bool can_be_broadcasted_p (rtx);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 80d2bb9e289..a64946213c3 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4417,4 +4417,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int 
regno)
   return count;
 }
 
+/* Return true if the OP can be directly broadcasted.  */
+bool
+can_be_broadcasted_p (rtx op)
+{
+  machine_mode mode = GET_MODE (op);
+  /* We don't allow RA (register allocation) reload generate
+(vec_duplicate:DI reg) in RV32 system wheras we allow
+(vec_duplicate:DI mem) in RV32 system.  */
+  if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode)
+  && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode))
+  && !satisfies_constraint_Wdm (op))
+return false;
+
+  if (satisfies_constraint_K (op) || register_operand (op, mode)
+  || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode)))
+return true;
+
+  return can_create_pseudo_p () && nonmemory_operand (op, mode);
+}
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 8509c4fe5f2..e23f64938b7 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1370,11 +1370,29 @@
 ;;  Duplicate Operations
 ;; -
 
+(define_expand "vec_duplicate"
+  [(set (match_operand:V_VLS 0 "register_operand")
+(vec_duplicate:V_VLS
+  (match_operand: 1 "direct_broadcast_operand")))]
+  "TARGET_VECTOR"
+  {
+/* Early expand DImode broadcast in RV32 system to avoid RA reload
+   generate (set (reg) (vec_duplicate:DI)).  */
+if (maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)))
+  {
+riscv_vector::emit_

Ping: [PATCH] Allow target attributes in non-gnu namespaces

2023-11-06 Thread Richard Sandiford
This is a ping+rebase of the patch below.  I've also optimised the
handling of ignored attributes so that we don't register empty tables.
There was also a typo in the jit changes (which I had tested, but the
typo didn't seem to cause a failure).

Retested on aarch64-linux-gnu & x86_64-linux-gnu.  The original was
also tested on the full target list in config-list.mk.

Iain has already approved the D parts (thanks!).  OK for the rest?

And sorry to be pinging something when I'm behind on reviews myself...

---

Currently there are four static sources of attributes:

- LANG_HOOKS_ATTRIBUTE_TABLE
- LANG_HOOKS_COMMON_ATTRIBUTE_TABLE
- LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE
- TARGET_ATTRIBUTE_TABLE

All of the attributes in these tables go in the "gnu" namespace.
This means that they can use the traditional GNU __attribute__((...))
syntax and the standard [[gnu::...]] syntax.

Standard attributes are registered dynamically with a null namespace.
There are no supported attributes in other namespaces (clang, vendor
namespaces, etc.).

This patch tries to generalise things by making the namespace
part of the attribute specification.

It's usual for multiple attributes to be defined in the same namespace,
so rather than adding the namespace to each individual definition,
it seemed better to group attributes in the same namespace together.
This would also allow us to reuse the same table for clang attributes
that are written with the GNU syntax, or other similar situations
where the attribute can be accessed via multiple "spellings".

The patch therefore adds a scoped_attribute_specs that contains
a namespace and a list of attributes in that namespace.

It's still possible to have multiple scoped_attribute_specs
for the same namespace.  E.g. it makes sense to keep the
C++-specific, C/C++-common, and format-related attributes in
separate tables, even though they're all GNU attributes.

Current lists of attributes are terminated by a null name.
Rather than keep that for the new structure, it seemed neater
to use an array_slice.  This also makes the tables slighly more
compact.

In general, a target might want to support attributes in multiple
namespaces.  Rather than have a separate hook for each possibility
(like the three langhooks above), it seemed better to make
TARGET_ATTRIBUTE_TABLE a table of tables.  Specifically, it's
an array_slice of scoped_attribute_specs.

We can do the same thing for langhooks, which allows the three hooks
above to be merged into a single LANG_HOOKS_ATTRIBUTE_TABLE.
It also allows the standard attributes to be registered statically
and checked by the usual attribs.cc checks.

The patch adds a TARGET_GNU_ATTRIBUTES helper for the common case
in which a target wants a single table of gnu attributes.  It can
only be used if the table is free of preprocessor directives.

There are probably other things we need to do to make vendor namespaces
work smoothly.  E.g. in principle it would be good to make exclusion
sets namespace-aware.  But to some extent we have that with standard
vs. gnu attributes too.  This patch is just supposed to be a first step.

gcc/
* attribs.h (scoped_attribute_specs): New structure.
(register_scoped_attributes): Take a reference to a
scoped_attribute_specs instead of separate namespace and array
parameters.
* plugin.h (register_scoped_attributes): Likewise.
* attribs.cc (register_scoped_attributes): Likewise.
(attribute_tables): Change into an array of scoped_attribute_specs
pointers.  Reduce to 1 element for frontends and 1 element for targets.
(empty_attribute_table): Delete.
(check_attribute_tables): Update for changes to attribute_tables.
Use a hash_set to identify duplicates.
(handle_ignored_attributes_option): Update for above changes.
(init_attributes): Likewise.
(excl_pair): Delete.
(test_attribute_exclusions): Update for above changes.  Don't
enforce symmetry for standard attributes in the top-level namespace.
* langhooks-def.h (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete.
(LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Likewise.
(LANG_HOOKS_INITIALIZER): Update accordingly.
(LANG_HOOKS_ATTRIBUTE_TABLE): Define to an empty constructor.
* langhooks.h (lang_hooks::common_attribute_table): Delete.
(lang_hooks::format_attribute_table): Likewise.
(lang_hooks::attribute_table): Redefine to an array of
scoped_attribute_specs pointers.
* target-def.h (TARGET_GNU_ATTRIBUTES): New macro.
* target.def (attribute_spec): Redefine to return an array of
scoped_attribute_specs pointers.
* tree-inline.cc (function_attribute_inlinable_p): Update accordingly.
* doc/tm.texi: Regenerate.
* config/aarch64/aarch64.cc (aarch64_attribute_table): Define using
TARGET_GNU_ATTRIBUTES.
* config/alpha/alpha.cc (vms_attribute_table): Likewise.
* co

Re: [PATCH] attribs: Use existing traits for excl_hash_traits

2023-11-06 Thread Richard Sandiford
Ping.

Richard Sandiford via Gcc-patches  writes:
> excl_hash_traits can be defined more simply by reusing existing traits.
>
> Tested on aarch64-linux-gnu.  OK to install?
>
> Richard
>
>
> gcc/
>   * attribs.cc (excl_hash_traits): Delete.
>   (test_attribute_exclusions): Use pair_hash and nofree_string_hash
>   instead.
> ---
>  gcc/attribs.cc | 45 +++--
>  1 file changed, 3 insertions(+), 42 deletions(-)
>
> diff --git a/gcc/attribs.cc b/gcc/attribs.cc
> index b8cb55b97df..0d4ab23aeb6 100644
> --- a/gcc/attribs.cc
> +++ b/gcc/attribs.cc
> @@ -2640,47 +2640,6 @@ namespace selftest
>  
>  typedef std::pair excl_pair;
>  
> -struct excl_hash_traits: typed_noop_remove
> -{
> -  typedef excl_pair  value_type;
> -  typedef value_type compare_type;
> -
> -  static hashval_t hash (const value_type &x)
> -  {
> -hashval_t h1 = htab_hash_string (x.first);
> -hashval_t h2 = htab_hash_string (x.second);
> -return h1 ^ h2;
> -  }
> -
> -  static bool equal (const value_type &x, const value_type &y)
> -  {
> -return !strcmp (x.first, y.first) && !strcmp (x.second, y.second);
> -  }
> -
> -  static void mark_deleted (value_type &x)
> -  {
> -x = value_type (NULL, NULL);
> -  }
> -
> -  static const bool empty_zero_p = false;
> -
> -  static void mark_empty (value_type &x)
> -  {
> -x = value_type ("", "");
> -  }
> -
> -  static bool is_deleted (const value_type &x)
> -  {
> -return !x.first && !x.second;
> -  }
> -
> -  static bool is_empty (const value_type &x)
> -  {
> -return !*x.first && !*x.second;
> -  }
> -};
> -
> -
>  /* Self-test to verify that each attribute exclusion is symmetric,
> meaning that if attribute A is encoded as incompatible with
> attribute B then the opposite relationship is also encoded.
> @@ -2690,13 +2649,15 @@ struct excl_hash_traits: typed_noop_remove
>  static void
>  test_attribute_exclusions ()
>  {
> +  using excl_hash_traits = pair_hash;
> +
>/* Iterate over the array of attribute tables first (with TI0 as
>   the index) and over the array of attribute_spec in each table
>   (with SI0 as the index).  */
>const size_t ntables = ARRAY_SIZE (attribute_tables);
>  
>/* Set of pairs of mutually exclusive attributes.  */
> -  typedef hash_set exclusion_set;
> +  typedef hash_set exclusion_set;
>exclusion_set excl_set;
>  
>for (size_t ti0 = 0; ti0 != ntables; ++ti0)


Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system

2023-11-06 Thread Kito Cheng
Could you add a testcase? other than that LGTM.

On Mon, Nov 6, 2023 at 8:27 PM Juzhe-Zhong  wrote:
>
> An ICE was discovered in recent rounding autovec support:
>
> config/riscv/riscv-v.cc:4314
>65 | }
>   | ^
> 0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**,
> rtx_def*, bool)
> 
> /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314
> 0x1fb1aa2 pre_vsetvl::remove_avl_operand()
> 
> /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342
> 0x1fb18c1 pre_vsetvl::cleaup()
> 
> /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308
> 0x1fb216d pass_vsetvl::lazy_vsetvl()
> 
> /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480
> 0x1fb2214 pass_vsetvl::execute(function*)
> 
> /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504
>
> The root cause is that the RA reload into (set (reg) vec_duplicate:DI). 
> However, it is not valid in RV32 system
> since we don't have a single broadcast instruction DI scalar in RV32 system.
> We should expand it early for RV32 system.
>
> gcc/ChangeLog:
>
> * config/riscv/predicates.md: Refine predicate.
> * config/riscv/riscv-protos.h (can_be_broadcasted_p): New function.
> * config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto.
> * config/riscv/vector.md (vec_duplicate): New pattern.
> (*vec_duplicate): Adapt pattern.
>
> ---
>  gcc/config/riscv/predicates.md  |  9 +
>  gcc/config/riscv/riscv-protos.h |  1 +
>  gcc/config/riscv/riscv-v.cc | 20 
>  gcc/config/riscv/vector.md  | 20 +++-
>  4 files changed, 41 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
> index db18054607f..df1c66f3a76 100644
> --- a/gcc/config/riscv/predicates.md
> +++ b/gcc/config/riscv/predicates.md
> @@ -553,14 +553,7 @@
>
>  ;; The scalar operand can be directly broadcast by RVV instructions.
>  (define_predicate "direct_broadcast_operand"
> -  (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op))
> -   && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op)
> -   || rtx_equal_p (op, CONST0_RTX (GET_MODE (op
> -   && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), 
> GET_MODE_BITSIZE (Pmode)))")
> -(ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))")
> - (ior (match_code "const_int,const_poly_int")
> -  (ior (match_operand 0 "register_operand")
> -   (match_test "satisfies_constraint_Wdm (op)"))
> +  (match_test "riscv_vector::can_be_broadcasted_p (op)"))
>
>  ;; A CONST_INT operand that has exactly two bits cleared.
>  (define_predicate "const_nottwobits_operand"
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index 6cbf2130f88..acae00f653f 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -595,6 +595,7 @@ uint8_t get_sew (rtx_insn *);
>  enum vlmul_type get_vlmul (rtx_insn *);
>  int count_regno_occurrences (rtx_insn *, unsigned int);
>  bool imm_avl_p (machine_mode);
> +bool can_be_broadcasted_p (rtx);
>  }
>
>  /* We classify builtin types into two classes:
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 80d2bb9e289..a64946213c3 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -4417,4 +4417,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int 
> regno)
>return count;
>  }
>
> +/* Return true if the OP can be directly broadcasted.  */
> +bool
> +can_be_broadcasted_p (rtx op)
> +{
> +  machine_mode mode = GET_MODE (op);
> +  /* We don't allow RA (register allocation) reload generate
> +(vec_duplicate:DI reg) in RV32 system wheras we allow
> +(vec_duplicate:DI mem) in RV32 system.  */
> +  if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode)
> +  && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode))
> +  && !satisfies_constraint_Wdm (op))
> +return false;
> +
> +  if (satisfies_constraint_K (op) || register_operand (op, mode)
> +  || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX 
> (mode)))
> +return true;
> +
> +  return can_create_pseudo_p () && nonmemory_operand (op, mode);
> +}
> +
>  } // namespace riscv_vector
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index 8509c4fe5f2..e23f64938b7 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -1370,11 +1370,29 @@
>  ;;  Duplicate Operations
>  ;; -
>
> +(define_expand "vec_duplicate"
> +  [(set (match_operand:V_VLS 0 "register_operand")
> +(vec_duplic

Re: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system

2023-11-06 Thread juzhe.zh...@rivai.ai
Testcase already existed on the trunk, which is added by Li Pan added recently 
when supporting rounding mode autovec.

https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635280.html 
math-llrintf-run-0.c passed on RV64 but cause ICE on RV32.




juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-11-06 20:38
To: Juzhe-Zhong
CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc
Subject: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system
Could you add a testcase? other than that LGTM.
 
On Mon, Nov 6, 2023 at 8:27 PM Juzhe-Zhong  wrote:
>
> An ICE was discovered in recent rounding autovec support:
>
> config/riscv/riscv-v.cc:4314
>65 | }
>   | ^
> 0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**,
> rtx_def*, bool)
> 
> /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314
> 0x1fb1aa2 pre_vsetvl::remove_avl_operand()
> 
> /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342
> 0x1fb18c1 pre_vsetvl::cleaup()
> 
> /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308
> 0x1fb216d pass_vsetvl::lazy_vsetvl()
> 
> /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480
> 0x1fb2214 pass_vsetvl::execute(function*)
> 
> /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504
>
> The root cause is that the RA reload into (set (reg) vec_duplicate:DI). 
> However, it is not valid in RV32 system
> since we don't have a single broadcast instruction DI scalar in RV32 system.
> We should expand it early for RV32 system.
>
> gcc/ChangeLog:
>
> * config/riscv/predicates.md: Refine predicate.
> * config/riscv/riscv-protos.h (can_be_broadcasted_p): New function.
> * config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto.
> * config/riscv/vector.md (vec_duplicate): New pattern.
> (*vec_duplicate): Adapt pattern.
>
> ---
>  gcc/config/riscv/predicates.md  |  9 +
>  gcc/config/riscv/riscv-protos.h |  1 +
>  gcc/config/riscv/riscv-v.cc | 20 
>  gcc/config/riscv/vector.md  | 20 +++-
>  4 files changed, 41 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
> index db18054607f..df1c66f3a76 100644
> --- a/gcc/config/riscv/predicates.md
> +++ b/gcc/config/riscv/predicates.md
> @@ -553,14 +553,7 @@
>
>  ;; The scalar operand can be directly broadcast by RVV instructions.
>  (define_predicate "direct_broadcast_operand"
> -  (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op))
> -   && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op)
> -   || rtx_equal_p (op, CONST0_RTX (GET_MODE (op
> -   && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), 
> GET_MODE_BITSIZE (Pmode)))")
> -(ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))")
> - (ior (match_code "const_int,const_poly_int")
> -  (ior (match_operand 0 "register_operand")
> -   (match_test "satisfies_constraint_Wdm (op)"))
> +  (match_test "riscv_vector::can_be_broadcasted_p (op)"))
>
>  ;; A CONST_INT operand that has exactly two bits cleared.
>  (define_predicate "const_nottwobits_operand"
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index 6cbf2130f88..acae00f653f 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -595,6 +595,7 @@ uint8_t get_sew (rtx_insn *);
>  enum vlmul_type get_vlmul (rtx_insn *);
>  int count_regno_occurrences (rtx_insn *, unsigned int);
>  bool imm_avl_p (machine_mode);
> +bool can_be_broadcasted_p (rtx);
>  }
>
>  /* We classify builtin types into two classes:
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 80d2bb9e289..a64946213c3 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -4417,4 +4417,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int 
> regno)
>return count;
>  }
>
> +/* Return true if the OP can be directly broadcasted.  */
> +bool
> +can_be_broadcasted_p (rtx op)
> +{
> +  machine_mode mode = GET_MODE (op);
> +  /* We don't allow RA (register allocation) reload generate
> +(vec_duplicate:DI reg) in RV32 system wheras we allow
> +(vec_duplicate:DI mem) in RV32 system.  */
> +  if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode)
> +  && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode))
> +  && !satisfies_constraint_Wdm (op))
> +return false;
> +
> +  if (satisfies_constraint_K (op) || register_operand (op, mode)
> +  || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX 
> (mode)))
> +return true;
> +
> +  return can_create_pseudo_p () && nonmemory_operand (op, mode);
>

Re: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system

2023-11-06 Thread Kito Cheng
I would prefer to add a dedicated test case to test that, so that we
could also cover that even if we didn't enable multi-lib testing for
RV32, and I suppose that should only require compile test for part of
that test case ?

On Mon, Nov 6, 2023 at 8:41 PM juzhe.zh...@rivai.ai
 wrote:
>
> Testcase already existed on the trunk, which is added by Li Pan added 
> recently when supporting rounding mode autovec.
>
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635280.html
>
> math-llrintf-run-0.c passed on RV64 but cause ICE on RV32.
>
>
>
> 
> juzhe.zh...@rivai.ai
>
>
> From: Kito Cheng
> Date: 2023-11-06 20:38
> To: Juzhe-Zhong
> CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc
> Subject: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system
> Could you add a testcase? other than that LGTM.
>
> On Mon, Nov 6, 2023 at 8:27 PM Juzhe-Zhong  wrote:
> >
> > An ICE was discovered in recent rounding autovec support:
> >
> > config/riscv/riscv-v.cc:4314
> >65 | }
> >   | ^
> > 0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**,
> > rtx_def*, bool)
> > 
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314
> > 0x1fb1aa2 pre_vsetvl::remove_avl_operand()
> > 
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342
> > 0x1fb18c1 pre_vsetvl::cleaup()
> > 
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308
> > 0x1fb216d pass_vsetvl::lazy_vsetvl()
> > 
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480
> > 0x1fb2214 pass_vsetvl::execute(function*)
> > 
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504
> >
> > The root cause is that the RA reload into (set (reg) vec_duplicate:DI). 
> > However, it is not valid in RV32 system
> > since we don't have a single broadcast instruction DI scalar in RV32 system.
> > We should expand it early for RV32 system.
> >
> > gcc/ChangeLog:
> >
> > * config/riscv/predicates.md: Refine predicate.
> > * config/riscv/riscv-protos.h (can_be_broadcasted_p): New function.
> > * config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto.
> > * config/riscv/vector.md (vec_duplicate): New pattern.
> > (*vec_duplicate): Adapt pattern.
> >
> > ---
> >  gcc/config/riscv/predicates.md  |  9 +
> >  gcc/config/riscv/riscv-protos.h |  1 +
> >  gcc/config/riscv/riscv-v.cc | 20 
> >  gcc/config/riscv/vector.md  | 20 +++-
> >  4 files changed, 41 insertions(+), 9 deletions(-)
> >
> > diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
> > index db18054607f..df1c66f3a76 100644
> > --- a/gcc/config/riscv/predicates.md
> > +++ b/gcc/config/riscv/predicates.md
> > @@ -553,14 +553,7 @@
> >
> >  ;; The scalar operand can be directly broadcast by RVV instructions.
> >  (define_predicate "direct_broadcast_operand"
> > -  (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op))
> > -   && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op)
> > -   || rtx_equal_p (op, CONST0_RTX (GET_MODE (op
> > -   && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), 
> > GET_MODE_BITSIZE (Pmode)))")
> > -(ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))")
> > - (ior (match_code "const_int,const_poly_int")
> > -  (ior (match_operand 0 "register_operand")
> > -   (match_test "satisfies_constraint_Wdm (op)"))
> > +  (match_test "riscv_vector::can_be_broadcasted_p (op)"))
> >
> >  ;; A CONST_INT operand that has exactly two bits cleared.
> >  (define_predicate "const_nottwobits_operand"
> > diff --git a/gcc/config/riscv/riscv-protos.h 
> > b/gcc/config/riscv/riscv-protos.h
> > index 6cbf2130f88..acae00f653f 100644
> > --- a/gcc/config/riscv/riscv-protos.h
> > +++ b/gcc/config/riscv/riscv-protos.h
> > @@ -595,6 +595,7 @@ uint8_t get_sew (rtx_insn *);
> >  enum vlmul_type get_vlmul (rtx_insn *);
> >  int count_regno_occurrences (rtx_insn *, unsigned int);
> >  bool imm_avl_p (machine_mode);
> > +bool can_be_broadcasted_p (rtx);
> >  }
> >
> >  /* We classify builtin types into two classes:
> > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> > index 80d2bb9e289..a64946213c3 100644
> > --- a/gcc/config/riscv/riscv-v.cc
> > +++ b/gcc/config/riscv/riscv-v.cc
> > @@ -4417,4 +4417,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned 
> > int regno)
> >return count;
> >  }
> >
> > +/* Return true if the OP can be directly broadcasted.  */
> > +bool
> > +can_be_broadcasted_p (rtx op)
> > +{
> > +  machine_mode mode = GET_MODE (op);
> > +  /* We don't allow RA (register allocation) reload generate
> > +(

Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system

2023-11-06 Thread juzhe.zhong
OK。will add it. Replied Message FromKito ChengDate11/06/2023 20:46 Tojuzhe.zh...@rivai.ai Cckito.cheng,gcc-patches,jeffreyalaw,Robin DappSubjectRe: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 systemI would prefer to add a dedicated test case to test that, so that we
could also cover that even if we didn't enable multi-lib testing for
RV32, and I suppose that should only require compile test for part of
that test case ?

On Mon, Nov 6, 2023 at 8:41 PM juzhe.zh...@rivai.ai
 wrote:
>
> Testcase already existed on the trunk, which is added by Li Pan added recently when supporting rounding mode autovec.
>
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635280.html
>
> math-llrintf-run-0.c passed on RV64 but cause ICE on RV32.
>
>
>
> 
> juzhe.zh...@rivai.ai
>
>
> From: Kito Cheng
> Date: 2023-11-06 20:38
> To: Juzhe-Zhong
> CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc
> Subject: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system
> Could you add a testcase? other than that LGTM.
>
> On Mon, Nov 6, 2023 at 8:27 PM Juzhe-Zhong  wrote:
> >
> > An ICE was discovered in recent rounding autovec support:
> >
> > config/riscv/riscv-v.cc:4314
> >    65 | }
> >   | ^
> > 0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**,
> > rtx_def*, bool)
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314
> > 0x1fb1aa2 pre_vsetvl::remove_avl_operand()
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342
> > 0x1fb18c1 pre_vsetvl::cleaup()
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308
> > 0x1fb216d pass_vsetvl::lazy_vsetvl()
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480
> > 0x1fb2214 pass_vsetvl::execute(function*)
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504
> >
> > The root cause is that the RA reload into (set (reg) vec_duplicate:DI). However, it is not valid in RV32 system
> > since we don't have a single broadcast instruction DI scalar in RV32 system.
> > We should expand it early for RV32 system.
> >
> > gcc/ChangeLog:
> >
> > * config/riscv/predicates.md: Refine predicate.
> > * config/riscv/riscv-protos.h (can_be_broadcasted_p): New function.
> > * config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto.
> > * config/riscv/vector.md (vec_duplicate): New pattern.
> > (*vec_duplicate): Adapt pattern.
> >
> > ---
> >  gcc/config/riscv/predicates.md  |  9 +
> >  gcc/config/riscv/riscv-protos.h |  1 +
> >  gcc/config/riscv/riscv-v.cc | 20 
> >  gcc/config/riscv/vector.md  | 20 +++-
> >  4 files changed, 41 insertions(+), 9 deletions(-)
> >
> > diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
> > index db18054607f..df1c66f3a76 100644
> > --- a/gcc/config/riscv/predicates.md
> > +++ b/gcc/config/riscv/predicates.md
> > @@ -553,14 +553,7 @@
> >
> >  ;; The scalar operand can be directly broadcast by RVV instructions.
> >  (define_predicate "direct_broadcast_operand"
> > -  (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op))
> > -   && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op)
> > -   || rtx_equal_p (op, CONST0_RTX (GET_MODE (op
> > -   && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), GET_MODE_BITSIZE (Pmode)))")
> > -    (ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))")
> > - (ior (match_code "const_int,const_poly_int")
> > -  (ior (match_operand 0 "register_operand")
> > -   (match_test "satisfies_constraint_Wdm (op)"))
> > +  (match_test "riscv_vector::can_be_broadcasted_p (op)"))
> >
> >  ;; A CONST_INT operand that has exactly two bits cleared.
> >  (define_predicate "const_nottwobits_operand"
> > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> > index 6cbf2130f88..acae00f653f 100644
> > --- a/gcc/config/riscv/riscv-protos.h
> > +++ b/gcc/config/riscv/riscv-protos.h
> > @@ -595,6 +595,7 @@ uint8_t get_sew (rtx_insn *);
> >  enum vlmul_type get_vlmul (rtx_insn *);
> >  int count_regno_occurrences (rtx_insn *, unsigned int);
> >  bool imm_avl_p (machine_mode);
> > +bool can_be_broadcasted_p (rtx);
> >  }
> >
> >  /* We classify builtin types into two classes:
> > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> > index 80d2bb9e289..a64946213c3 100644
> > --- a/gcc/config/riscv/riscv-v.cc
> > +++ b/gcc/config/riscv/riscv-v.cc
> > @@ -4417,4 +4417,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
> >    return count;
> >  }
> >
> > +/* Return true if the OP can be directly broadcasted

Re: [PATCH] libstdc++/112351 - deal with __gthread_once failure during locale init

2023-11-06 Thread Richard Biener
On Mon, 6 Nov 2023, Jonathan Wakely wrote:

> On Mon, 6 Nov 2023 at 11:52, Richard Biener  wrote:
> >
> > The following makes the C++98 locale init path follow the way the
> > C++11 performs initialization.  This way we deal with pthread_once
> > failing, falling back to non-threadsafe initialization which, given we
> > initialize from the library, should be serialized by the dynamic
> > loader already.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, OK for trunk?
> > And GCC 13 branch?
> >
> > Thanks,
> > Richard.
> >
> > PR libstdc++/112351
> > libstdc++-v3/
> > * src/c++98/locale.cc (locale::facet::_S_get_c_locale):
> > Always perform non-threadsafe init when threadsafe init
> > failed.
> > ---
> >  libstdc++-v3/src/c++98/locale.cc | 7 ++-
> >  1 file changed, 2 insertions(+), 5 deletions(-)
> >
> > diff --git a/libstdc++-v3/src/c++98/locale.cc 
> > b/libstdc++-v3/src/c++98/locale.cc
> > index d308140bab7..e9bec1db3b6 100644
> > --- a/libstdc++-v3/src/c++98/locale.cc
> > +++ b/libstdc++-v3/src/c++98/locale.cc
> > @@ -216,12 +216,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >  #ifdef __GTHREADS
> >  if (__gthread_active_p())
> >__gthread_once(&_S_once, _S_initialize_once);
> > -else
> >  #endif
> > -  {
> > -   if (!_S_c_locale)
> > - _S_initialize_once();
> > -  }
> > +if (__builtin_expect (!_S_c_locale, 0))
> > +  _S_initialize_once();
> >  return _S_c_locale;
> >}
> 
> 
> I think this has a problem, which is handled correctly in
> src/c++11/locale_init.cc by checking _S_classic inside the
> _S_initialize_once function.

We check _S_c_locale here (it's just a pointer) instead of in
_S_initialize_once (), so I think the code is equivalent, no?

> If the first call to __gthread_once does fail then _S_once will not be
> changed. We will fall through to calling _S_initialize_once directly
> (which is not thread-safe) and set _S_c_locale.
> 
> The next time we call _S_initialize, __gthread_once will try to run
> again, and because _S_once was not changed, it might call
> _S_initialize_once() again, which writes to _S_c_locale again
> (possibly causing a data race).

Ah, yeah, so in the C++11 path the check for !_S_classic in
locale::_S_initialize is redundant.  But good spot.

> I don't think the slightly different code in src/c++11/locale_init.cc
> is different in order to handle __gthread_once failing, I think it's
> different because the effects of locale::facet::_S_initialize_once()
> and locale::_S_initialize_once() are different. One is safe to call
> more than once, and the other isn't.
> 
> I don't think we need to care about __gthread_once failing at all, do
> we? There are no error conditions for pthread_once, it always returns
> 0 (previous POSIX revisions said it could return EINVAL for an
> uninitialized pthread_once_t but that can't happen here as it's
> correctly initialized in src/c++11/locale.cc). Is the concern that it
> can fail for non-posix thread models? (I didn't check if any of them
> can actually fail)

The concern is that there are actual products out that break with the
new I/O initialization in libstdc++ for GCC13+ because they have bugs.
It's easy enough to work around those by the proposed patch (plus
correction for the above issue).  I suppose the comment in
locale::_S_initialize_once holds as well for the C++98 path.

The failure mode of the product is that it overrides pthread_once
but does nothing (not even indicate failure) when its pthread_*
override mechanism isn't initialized yet.  With libstdc++ from GCC13
we now use pthread_once "too early" and fail to initialize the locale
object.

Adjusted patch below.

OK after another round of testing?

Thanks,
Richard.


>From 4e3fa2f4426a5a10d189587b63e4d7298c347b01 Mon Sep 17 00:00:00 2001
From: Richard Biener 
Date: Mon, 6 Nov 2023 11:31:40 +0100
Subject: [PATCH] libstdc++/112351 - deal with __gthread_once failure during
 locale init
To: gcc-patches@gcc.gnu.org

The following makes the C++98 locale init path follow the way the
C++11 performs initialization.  This way we deal with pthread_once
failing, falling back to non-threadsafe initialization which, given we
initialize from the library, should be serialized by the dynamic
loader already.

PR libstdc++/112351
libstdc++-v3/
* src/c++98/locale.cc (locale::facet::_S_initialize_once):
Check whether _S_c_locale is already initialized.
(locale::facet::_S_get_c_locale): Always perform non-threadsafe
init when threadsafe init failed.
---
 libstdc++-v3/src/c++98/locale.cc | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/libstdc++-v3/src/c++98/locale.cc b/libstdc++-v3/src/c++98/locale.cc
index d308140bab7..1ef0c394cd7 100644
--- a/libstdc++-v3/src/c++98/locale.cc
+++ b/libstdc++-v3/src/c++98/locale.cc
@@ -206,6 +206,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   void
   locale::facet::_S_initialize_once()
   {
+// Need

[PATCH] RISC-V: VECT: Remember to assert any_known_not_updated_vssa

2023-11-06 Thread Maxim Blinov
From: Maxim Blinov 

This patch is based on and intended for the 
vendors/riscv/gcc-13-with-riscv-opts branch - please apply if looks OK.

Fixes the following ICEs that I'm seeing:

FAIL: gcc.dg/vect/O3-pr49087.c (internal compiler error: in 
vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/no-scevccp-pr86725-1.c (internal compiler error: in 
vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/no-scevccp-pr86725-2.c (internal compiler error: in 
vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/no-scevccp-pr86725-3.c (internal compiler error: in 
vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/no-scevccp-pr86725-4.c (internal compiler error: in 
vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/pr94443.c (internal compiler error: in vect_transform_loops, 
at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/pr94443.c -flto -ffat-lto-objects (internal compiler error: 
in vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/slp-50.c (internal compiler error: in vect_transform_loops, 
at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/slp-50.c -flto -ffat-lto-objects (internal compiler error: in 
vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/vect-cond-13.c (internal compiler error: in 
vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/vect-cond-13.c -flto -ffat-lto-objects (internal compiler 
error: in vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/vect-live-6.c (internal compiler error: in 
vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.dg/vect/vect-live-6.c -flto -ffat-lto-objects (internal compiler 
error: in vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.target/riscv/rvv/autovec/partial/live-1.c (internal compiler error: 
in vect_transform_loops, at tree-vectorizer.cc:1032)
FAIL: gcc.target/riscv/rvv/autovec/partial/live-2.c (internal compiler error: 
in vect_transform_loops, at tree-vectorizer.cc:1032)

-- >8 --

When we create a VEC_EXPAND gimple stmt:

  /* SCALAR_RES = VEC_EXTRACT .  */
  tree scalar_res
= gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
vec_lhs_phi, last_index);

Under the hood we are really just creating a GIMPLE_CALL stmt. Later
on, when we `gsi_insert_seq_before` our stmts:

  if (stmts)
{
  gimple_stmt_iterator exit_gsi = gsi_after_labels (exit_bb);
  gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);

We eventually run into tree-ssa-operands.cc:1147:

  operands_scanner (fn, stmt).build_ssa_operands ();

Since VEC_EXPAND is *not* marked with ECF_NOVOPS, ECF_CONST, or
ECF_PURE flags in internal-fn.def, when
`operand_scanner::parse_ssa_operands` comes across our
VEC_EXTRACT-type GIMPLE_CALL, it generates a `gimple_vop()` artificial
variable.

`operand_scanner::finalize_ssa_defs` then picks this up, so our final
stmt goes from

_73 = .VEC_EXTRACT (vect_last_9.56_71, _72);

to

# .MEM = VDEF <>
_73 = .VEC_EXTRACT (vect_last_9.56_71, _72);

But more importantly it marks us as `ssa_renaming_needed`, in
tree-ssa-operands.cc:420:

  /* If we have a non-SSA_NAME VDEF, mark it for renaming.  */
  if (gimple_vdef (stmt)
  && TREE_CODE (gimple_vdef (stmt)) != SSA_NAME)
{
  fn->gimple_df->rename_vops = 1;
  fn->gimple_df->ssa_renaming_needed = 1;
}

This then proceeds to crash the compiler when we are about to leave
`vect_transform_loops`:

  if (need_ssa_update_p (cfun))
{
  gcc_assert (loop_vinfo->any_known_not_updated_vssa);
  fun->gimple_df->ssa_renaming_needed = false;
  todo |= TODO_update_ssa_only_virtuals;
}

Since,

- `need_ssa_update_p (cfun)` is true (it was set when we generated a
  memory vdef)
- `loop_vinfo->any_known_not_updated_vssa` is false

As the code currently stands, creating a gimple stmt containing a
VEC_EXTRACT should always generate a memory vdef, therefore we should
remember to mark `loop_vinfo->any_known_not_updated_vssa` afterwards.

gcc/ChangeLog:

* tree-vect-loop.cc (vectorizable_live_operation): Remember to
assert loop_vinfo->any_known_not_updated_vssa if we are inserting
a call to VEC_EXPAND.
---
 gcc/tree-vect-loop.cc | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index c8df2c88575..53c3a31d2a8 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10155,6 +10155,11 @@ vectorizable_live_operation (vec_info *vinfo,
= gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
vec_lhs_phi, last_index);
 
+ /* We've expanded SSA at this point, and since VEC_EXTRACT
+will generate vops, make sure to tell GCC that we need to
+update SSA.  */
+ loop_vinfo->any_known_not_updated_vssa = true;
+
  /* Convert the extracted vector element to the scalar type.  */
  new

Re: [PATCH v3 1/2]middle-end: expand copysign handling from lockstep to nested iters

2023-11-06 Thread Richard Biener
On Mon, 6 Nov 2023, Tamar Christina wrote:

> Hi All,
> 
> various optimizations in match.pd only happened on COPYSIGN in lock step
> which means they exclude IFN_COPYSIGN.  COPYSIGN however is restricted to only
> the C99 builtins and so doesn't work for vectors.
> 
> The patch expands these optimizations to work as nested iters.
> 
> This is needed for the second patch which will add the testcase.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

OK.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/109154
>   * match.pd: expand existing copysign optimizations.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> 7d651a6582d169793cca4f9a70e334dd80014d92..db95931df0672cf4ef08cca36085c3aa6831519e
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1074,37 +1074,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  
>  /* cos(copysign(x, y)) -> cos(x).  Similarly for cosh.  */
>  (for coss (COS COSH)
> - copysigns (COPYSIGN)
> - (simplify
> -  (coss (copysigns @0 @1))
> -   (coss @0)))
> + (for copysigns (COPYSIGN)
> +  (simplify
> +   (coss (copysigns @0 @1))
> +(coss @0
>  
>  /* pow(copysign(x, y), z) -> pow(x, z) if z is an even integer.  */
>  (for pows (POW)
> - copysigns (COPYSIGN)
> - (simplify
> -  (pows (copysigns @0 @2) REAL_CST@1)
> -  (with { HOST_WIDE_INT n; }
> -   (if (real_isinteger (&TREE_REAL_CST (@1), &n) && (n & 1) == 0)
> -(pows @0 @1)
> + (for copysigns (COPYSIGN)
> +  (simplify
> +   (pows (copysigns @0 @2) REAL_CST@1)
> +   (with { HOST_WIDE_INT n; }
> +(if (real_isinteger (&TREE_REAL_CST (@1), &n) && (n & 1) == 0)
> + (pows @0 @1))
>  /* Likewise for powi.  */
>  (for pows (POWI)
> - copysigns (COPYSIGN)
> - (simplify
> -  (pows (copysigns @0 @2) INTEGER_CST@1)
> -  (if ((wi::to_wide (@1) & 1) == 0)
> -   (pows @0 @1
> + (for copysigns (COPYSIGN)
> +  (simplify
> +   (pows (copysigns @0 @2) INTEGER_CST@1)
> +   (if ((wi::to_wide (@1) & 1) == 0)
> +(pows @0 @1)
>  
>  (for hypots (HYPOT)
> - copysigns (COPYSIGN)
> - /* hypot(copysign(x, y), z) -> hypot(x, z).  */
> - (simplify
> -  (hypots (copysigns @0 @1) @2)
> -  (hypots @0 @2))
> - /* hypot(x, copysign(y, z)) -> hypot(x, y).  */
> - (simplify
> -  (hypots @0 (copysigns @1 @2))
> -  (hypots @0 @1)))
> + (for copysigns (COPYSIGN)
> +  /* hypot(copysign(x, y), z) -> hypot(x, z).  */
> +  (simplify
> +   (hypots (copysigns @0 @1) @2)
> +   (hypots @0 @2))
> +  /* hypot(x, copysign(y, z)) -> hypot(x, y).  */
> +  (simplify
> +   (hypots @0 (copysigns @1 @2))
> +   (hypots @0 @1
>  
>  /* copysign(x, CST) -> [-]abs (x).  */
>  (for copysigns (COPYSIGN_ALL)
> 
> 
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH v3 2/2]middle-end match.pd: optimize fneg (fabs (x)) to copysign (x, -1) [PR109154]

2023-11-06 Thread Richard Biener
On Mon, 6 Nov 2023, Tamar Christina wrote:

> Hi All,
> 
> This patch transforms fneg (fabs (x)) into copysign (x, -1) which is more
> canonical and allows a target to expand this sequence efficiently.  Such
> sequences are common in scientific code working with gradients.
> 
> There is an existing canonicalization of copysign (x, -1) to fneg (fabs (x))
> which I remove since this is a less efficient form.  The testsuite is also
> updated in light of this.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/109154
>   * match.pd: Add new neg+abs rule, remove inverse copysign rule.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/109154
>   * gcc.dg/fold-copysign-1.c: Updated.
>   * gcc.dg/pr55152-2.c: Updated.
>   * gcc.dg/tree-ssa/abs-4.c: Updated.
>   * gcc.dg/tree-ssa/backprop-6.c: Updated.
>   * gcc.dg/tree-ssa/copy-sign-2.c: Updated.
>   * gcc.dg/tree-ssa/mult-abs-2.c: Updated.
>   * gcc.target/aarch64/fneg-abs_1.c: New test.
>   * gcc.target/aarch64/fneg-abs_2.c: New test.
>   * gcc.target/aarch64/fneg-abs_3.c: New test.
>   * gcc.target/aarch64/fneg-abs_4.c: New test.
>   * gcc.target/aarch64/sve/fneg-abs_1.c: New test.
>   * gcc.target/aarch64/sve/fneg-abs_2.c: New test.
>   * gcc.target/aarch64/sve/fneg-abs_3.c: New test.
>   * gcc.target/aarch64/sve/fneg-abs_4.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> db95931df0672cf4ef08cca36085c3aa6831519e..7a023d510c283c43a87b1795a74761b8af979b53
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1106,13 +1106,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (hypots @0 (copysigns @1 @2))
> (hypots @0 @1
>  
> -/* copysign(x, CST) -> [-]abs (x).  */
> -(for copysigns (COPYSIGN_ALL)
> - (simplify
> -  (copysigns @0 REAL_CST@1)
> -  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
> -   (negate (abs @0))
> -   (abs @0
> +/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
> +
> +(simplify
> + (negate (abs @0))
> + (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))

You also drop (copysign, x, CST) -> abx (x) when x is not
negative - I think that's still worthwhile as it has one less
argument?

Keeping that might also need less testsuite adjustments?

Richard.

>  /* copysign(copysign(x, y), z) -> copysign(x, z).  */
>  (for copysigns (COPYSIGN_ALL)
> diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c 
> b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> index 
> f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6
>  100644
> --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> @@ -12,5 +12,5 @@ double bar (double x)
>return __builtin_copysign (x, minuszero);
>  }
>  
> -/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
> +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
> diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c 
> b/gcc/testsuite/gcc.dg/pr55152-2.c
> index 
> 54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921457b02ff0b88cc63ce6
>  100644
> --- a/gcc/testsuite/gcc.dg/pr55152-2.c
> +++ b/gcc/testsuite/gcc.dg/pr55152-2.c
> @@ -10,4 +10,5 @@ int f(int a)
>return (a<-a)?a:-a;
>  }
>  
> -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> index 
> 6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b3a52d733368805ad31d
>  100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> @@ -9,5 +9,6 @@ long double abs_ld(long double x) { return 
> __builtin_signbit(x) ? x : -x; }
>  
>  /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP */
>  /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> index 
> 31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91fa1343cb2718db7ae1
>  100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> @@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)
>  TEST_FUNCTION (double, )
>

Re: [PATCH] RISC-V: VECT: Remember to assert any_known_not_updated_vssa

2023-11-06 Thread Richard Biener
On Mon, Nov 6, 2023 at 2:02 PM Maxim Blinov  wrote:
>
> From: Maxim Blinov 
>
> This patch is based on and intended for the 
> vendors/riscv/gcc-13-with-riscv-opts branch - please apply if looks OK.
>
> Fixes the following ICEs that I'm seeing:
>
> FAIL: gcc.dg/vect/O3-pr49087.c (internal compiler error: in 
> vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/no-scevccp-pr86725-1.c (internal compiler error: in 
> vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/no-scevccp-pr86725-2.c (internal compiler error: in 
> vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/no-scevccp-pr86725-3.c (internal compiler error: in 
> vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/no-scevccp-pr86725-4.c (internal compiler error: in 
> vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/pr94443.c (internal compiler error: in 
> vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/pr94443.c -flto -ffat-lto-objects (internal compiler error: 
> in vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/slp-50.c (internal compiler error: in vect_transform_loops, 
> at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/slp-50.c -flto -ffat-lto-objects (internal compiler error: 
> in vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/vect-cond-13.c (internal compiler error: in 
> vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/vect-cond-13.c -flto -ffat-lto-objects (internal compiler 
> error: in vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/vect-live-6.c (internal compiler error: in 
> vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.dg/vect/vect-live-6.c -flto -ffat-lto-objects (internal compiler 
> error: in vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.target/riscv/rvv/autovec/partial/live-1.c (internal compiler error: 
> in vect_transform_loops, at tree-vectorizer.cc:1032)
> FAIL: gcc.target/riscv/rvv/autovec/partial/live-2.c (internal compiler error: 
> in vect_transform_loops, at tree-vectorizer.cc:1032)
>
> -- >8 --
>
> When we create a VEC_EXPAND gimple stmt:
>
>   /* SCALAR_RES = VEC_EXTRACT .  */
>   tree scalar_res
> = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
> vec_lhs_phi, last_index);
>
> Under the hood we are really just creating a GIMPLE_CALL stmt. Later
> on, when we `gsi_insert_seq_before` our stmts:
>
>   if (stmts)
> {
>   gimple_stmt_iterator exit_gsi = gsi_after_labels (exit_bb);
>   gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
>
> We eventually run into tree-ssa-operands.cc:1147:
>
>   operands_scanner (fn, stmt).build_ssa_operands ();
>
> Since VEC_EXPAND is *not* marked with ECF_NOVOPS, ECF_CONST, or
> ECF_PURE flags in internal-fn.def, when

I see

DEF_INTERNAL_OPTAB_FN (VEC_EXTRACT, ECF_CONST | ECF_NOTHROW,
   vec_extract, vec_extract)

?

> `operand_scanner::parse_ssa_operands` comes across our
> VEC_EXTRACT-type GIMPLE_CALL, it generates a `gimple_vop()` artificial
> variable.
>
> `operand_scanner::finalize_ssa_defs` then picks this up, so our final
> stmt goes from
>
> _73 = .VEC_EXTRACT (vect_last_9.56_71, _72);
>
> to
>
> # .MEM = VDEF <>
> _73 = .VEC_EXTRACT (vect_last_9.56_71, _72);
>
> But more importantly it marks us as `ssa_renaming_needed`, in
> tree-ssa-operands.cc:420:
>
>   /* If we have a non-SSA_NAME VDEF, mark it for renaming.  */
>   if (gimple_vdef (stmt)
>   && TREE_CODE (gimple_vdef (stmt)) != SSA_NAME)
> {
>   fn->gimple_df->rename_vops = 1;
>   fn->gimple_df->ssa_renaming_needed = 1;
> }
>
> This then proceeds to crash the compiler when we are about to leave
> `vect_transform_loops`:
>
>   if (need_ssa_update_p (cfun))
> {
>   gcc_assert (loop_vinfo->any_known_not_updated_vssa);
>   fun->gimple_df->ssa_renaming_needed = false;
>   todo |= TODO_update_ssa_only_virtuals;
> }
>
> Since,
>
> - `need_ssa_update_p (cfun)` is true (it was set when we generated a
>   memory vdef)
> - `loop_vinfo->any_known_not_updated_vssa` is false
>
> As the code currently stands, creating a gimple stmt containing a
> VEC_EXTRACT should always generate a memory vdef, therefore we should
> remember to mark `loop_vinfo->any_known_not_updated_vssa` afterwards.
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vectorizable_live_operation): Remember to
> assert loop_vinfo->any_known_not_updated_vssa if we are inserting
> a call to VEC_EXPAND.
> ---
>  gcc/tree-vect-loop.cc | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index c8df2c88575..53c3a31d2a8 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10155,6 +10155,11 @@ vectorizable_live_operation (vec_info *vinfo,
> = gimple_build (&stmts, CFN_VEC_EXTRACT,

[PATCH] tree-optimization/112404 - two issues with SLP of .MASK_LOAD

2023-11-06 Thread Richard Biener
The following fixes an oversight in vect_check_scalar_mask when
the mask is external or constant.  When doing BB vectorization
we need to provide a group_size, best via an overload accepting
the SLP node as argument.

When fixed we then run into the issue that we have not analyzed
alignment of the .MASK_LOADs because they were not identified
as loads by vect_gather_slp_loads.  Fixed by reworking the
detection.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

Richard.

PR tree-optimization/112404
* tree-vectorizer.h (get_mask_type_for_scalar_type): Declare
overload with SLP node argument.
* tree-vect-stmts.cc (get_mask_type_for_scalar_type): Implement it.
(vect_check_scalar_mask): Use it.
* tree-vect-slp.cc (vect_gather_slp_loads): Properly identify
loads also for nodes with children, like .MASK_LOAD.
* tree-vect-loop.cc (vect_analyze_loop_2): Look at the
representative for load nodes and check whether it is a grouped
access before looking for load-lanes support.

* gfortran.dg/pr112404.f90: New testcase.
---
 gcc/testsuite/gfortran.dg/pr112404.f90 | 23 +
 gcc/tree-vect-loop.cc  | 47 ++
 gcc/tree-vect-slp.cc   | 23 ++---
 gcc/tree-vect-stmts.cc | 22 +++-
 gcc/tree-vectorizer.h  |  1 +
 5 files changed, 82 insertions(+), 34 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/pr112404.f90

diff --git a/gcc/testsuite/gfortran.dg/pr112404.f90 
b/gcc/testsuite/gfortran.dg/pr112404.f90
new file mode 100644
index 000..573fa28164a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr112404.f90
@@ -0,0 +1,23 @@
+! { dg-do compile }
+! { dg-options "-Ofast" }
+! { dg-additional-options "-mavx2" { target avx2 } }
+   SUBROUTINE sfddagd( regime, znt, ite, jte )
+   REAL, DIMENSION( ime, IN) :: regime, znt
+   REAL, DIMENSION( ite, jte) :: wndcor_u 
+   LOGICAL wrf_dm_on_monitor
+   IF( int4 == 1 ) THEN
+ DO j=jts,jtf
+ DO i=itsu,itf
+   reg = regime(i-1,  j) 
+   IF( reg > 10.0 ) THEN
+ znt0 = znt(i-1,  j) + znt(i,  j) 
+ IF( znt0 <= 0.2) THEN
+   wndcor_u(i,j) = 0.2
+ ENDIF
+   ENDIF
+ ENDDO
+ ENDDO
+ IF ( wrf_dm_on_monitor()) THEN
+ ENDIF
+   ENDIF
+   END
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 362856a6507..5213aa0169c 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2943,17 +2943,19 @@ start_over:
   != IFN_LAST)
{
  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
-   {
- stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
- (SLP_TREE_SCALAR_STMTS (load_node)[0]);
- /* Use SLP for strided accesses (or if we can't
-load-lanes).  */
- if (STMT_VINFO_STRIDED_P (stmt_vinfo)
- || vect_load_lanes_supported
-   (STMT_VINFO_VECTYPE (stmt_vinfo),
-DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST)
-   break;
-   }
+   if (STMT_VINFO_GROUPED_ACCESS
+ (SLP_TREE_REPRESENTATIVE (load_node)))
+ {
+   stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
+   (SLP_TREE_REPRESENTATIVE (load_node));
+   /* Use SLP for strided accesses (or if we can't
+  load-lanes).  */
+   if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+   || vect_load_lanes_supported
+(STMT_VINFO_VECTYPE (stmt_vinfo),
+ DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST)
+ break;
+ }
 
  can_use_lanes
= can_use_lanes && i == SLP_INSTANCE_LOADS (instance).length ();
@@ -3261,16 +3263,19 @@ again:
   "unsupported grouped store\n");
   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
{
- vinfo = SLP_TREE_SCALAR_STMTS (node)[0];
- vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
- bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo);
- size = DR_GROUP_SIZE (vinfo);
- vectype = STMT_VINFO_VECTYPE (vinfo);
- if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST
- && ! vect_grouped_load_supported (vectype, single_element_p,
-   size))
-   return opt_result::failure_at (vinfo->stmt,
-  "unsupported grouped load\n");
+ vinfo = SLP_TREE_REPRESENTATIVE (node);
+ if (STMT_VINFO_GROUPED_ACCESS (vinfo))
+   {
+ vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
+ bool single_element_p = !DR_GROUP_NEXT

Re: [PATCH] RISC-V: VECT: Remember to assert any_known_not_updated_vssa

2023-11-06 Thread Maxim Blinov
On Mon, 6 Nov 2023 at 13:07, Richard Biener  wrote:
> I see
>
> DEF_INTERNAL_OPTAB_FN (VEC_EXTRACT, ECF_CONST | ECF_NOTHROW,
>vec_extract, vec_extract)
>
> ?

Oh, you're right! I should have checked the master branch first... and
I was even wondering why it wasn't marked as such. Should perhaps
cherry pick this for gcc-13-with-riscv-opts?


[PATCH] tree-optimization/111950 - vectorizer loop copying

2023-11-06 Thread Richard Biener
The following simplifies LC-PHI arg population during epilog peeling,
thereby fixing the testcase in this PR.

Bootstrapped and tested on x86_64-unknown-linux-gnu, also built
SPEC CPU 2017 with and without LTO, pushed.

PR tree-optimization/111950
* tre-vect-loop-manip.cc (slpeel_duplicate_current_defs_from_edges):
Remove.
(find_guard_arg): Likewise.
(slpeel_update_phi_nodes_for_guard2): Likewise.
(slpeel_tree_duplicate_loop_to_edge_cfg): Remove calls to
slpeel_duplicate_current_defs_from_edges, do not elide
LC-PHIs for invariant values.
(vect_do_peeling): Materialize PHI arguments for the edge
around the epilog from the PHI defs of the main loop exit.

* gcc.dg/torture/pr111950.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr111950.c |  16 ++
 gcc/tree-vect-loop-manip.cc | 242 +++-
 2 files changed, 41 insertions(+), 217 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr111950.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr111950.c 
b/gcc/testsuite/gcc.dg/torture/pr111950.c
new file mode 100644
index 000..4eeffeb6827
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr111950.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize -fno-vect-cost-model" } */
+
+int a, b, d;
+int c[4];
+unsigned e;
+void f() {
+  char g;
+  for (; d; d++) {
+g = 1;
+for (; g >= 0; g--) {
+  e = b >= 2 || a >> b ?: a;
+  c[g] = e;
+}
+  }
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 43ca985c53c..b9161274ce4 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1392,58 +1392,6 @@ vect_set_loop_condition (class loop *loop, edge loop_e, 
loop_vec_info loop_vinfo
 (gimple *) cond_stmt);
 }
 
-/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
-   For all PHI arguments in FROM->dest and TO->dest from those
-   edges ensure that TO->dest PHI arguments have current_def
-   to that in from.  */
-
-static void
-slpeel_duplicate_current_defs_from_edges (edge from, edge to)
-{
-  gimple_stmt_iterator gsi_from, gsi_to;
-
-  for (gsi_from = gsi_start_phis (from->dest),
-   gsi_to = gsi_start_phis (to->dest);
-   !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
-{
-  gimple *from_phi = gsi_stmt (gsi_from);
-  gimple *to_phi = gsi_stmt (gsi_to);
-  tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
-  tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
-  if (virtual_operand_p (from_arg))
-   {
- gsi_next (&gsi_from);
- continue;
-   }
-  if (virtual_operand_p (to_arg))
-   {
- gsi_next (&gsi_to);
- continue;
-   }
-  if (TREE_CODE (from_arg) != SSA_NAME)
-   gcc_assert (operand_equal_p (from_arg, to_arg, 0));
-  else if (TREE_CODE (to_arg) == SSA_NAME
-  && from_arg != to_arg)
-   {
- if (get_current_def (to_arg) == NULL_TREE)
-   {
- gcc_assert (types_compatible_p (TREE_TYPE (to_arg),
- TREE_TYPE (get_current_def
-  (from_arg;
- set_current_def (to_arg, get_current_def (from_arg));
-   }
-   }
-  gsi_next (&gsi_from);
-  gsi_next (&gsi_to);
-}
-
-  gphi *from_phi = get_virtual_phi (from->dest);
-  gphi *to_phi = get_virtual_phi (to->dest);
-  if (from_phi)
-set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
-get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
-}
-
 /* Given LOOP this function generates a new copy of it and puts it
on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
@@ -1577,21 +1525,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop 
*loop, edge loop_exit,
  adjust_debug_stmts (orig_def, PHI_RESULT (gsi.phi ()), exit->dest);
   }
 
-  /* This condition happens when the loop has been versioned. e.g. due to ifcvt
- versioning the loop.  */
-  if (scalar_loop != loop)
-{
-  /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
-SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
-but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
-the LOOP SSA_NAMEs (on the exit edge and edge from latch to
-header) to have current_def set, so copy them over.  */
-  slpeel_duplicate_current_defs_from_edges (scalar_exit, exit);
-  slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
-  0),
-   EDGE_SUCC (loop->latch, 0));
-}
-
   auto loop_exits = get_loop_exit_edges (loop);
   auto_vec doms;
 
@@ -1655,18 +1588,6 @@ slpeel_tree_du

Re: [PATCH] RISC-V: VECT: Remember to assert any_known_not_updated_vssa

2023-11-06 Thread Kito Cheng
> Oh, you're right! I should have checked the master branch first... and
> I was even wondering why it wasn't marked as such. Should perhaps
> cherry pick this for gcc-13-with-riscv-opts?

 gcc-13-with-riscv-opts mostly maintained by Ventana folks, so maybe
ask Jeff if you want to cherry pick into that branch?


Re: [PATCH] RISC-V: VECT: Remember to assert any_known_not_updated_vssa

2023-11-06 Thread juzhe.zhong
Not sure who is maintaining this branch. I always developing on the master.  CCing to other riscv folks Replied Message FromMaxim BlinovDate11/06/2023 21:13 ToRichard Biener Ccgcc-patches@gcc.gnu.org,juzhe.zh...@rivai.ai,maxim.bli...@imgtec.comSubjectRe: [PATCH] RISC-V: VECT: Remember to assert any_known_not_updated_vssaOn Mon, 6 Nov 2023 at 13:07, Richard Biener  wrote:
> I see
>
> DEF_INTERNAL_OPTAB_FN (VEC_EXTRACT, ECF_CONST | ECF_NOTHROW,
>    vec_extract, vec_extract)
>
> ?

Oh, you're right! I should have checked the master branch first... and
I was even wondering why it wasn't marked as such. Should perhaps
cherry pick this for gcc-13-with-riscv-opts?



Re: [PATCH 5/5] x86: yet more PR target/100711-like splitting

2023-11-06 Thread Hongtao Liu
On Mon, Nov 6, 2023 at 7:10 PM Jan Beulich  wrote:
>
> On 25.06.2023 08:41, Hongtao Liu wrote:
> > On Sun, Jun 25, 2023 at 2:35 PM Hongtao Liu  wrote:
> >>
> >> On Sun, Jun 25, 2023 at 2:25 PM Jan Beulich  wrote:
> >>>
> >>> On 25.06.2023 07:12, Hongtao Liu wrote:
>  On Wed, Jun 21, 2023 at 2:29 PM Jan Beulich via Gcc-patches
>   wrote:
> >
> > ---
> > For the purpose here (and elsewhere) bcst_vector_operand() (really:
> > bcst_mem_operand()) isn't permissive enough: We'd want it to allow
> > 128-bit and 256-bit types as well irrespective of AVX512VL being
> > enabled. This would likely require a new predicate
> > (bcst_intvec_operand()?) and a new constraint (BR? Bi?). (Yet for name
> > selection it will want considering that this is applicable to certain
> > non-calculational FP operations as well.)
>  I think so.
> >>>
> >>> Any preference towards predicate and constraint naming?
> >> something like bcst_mem_operand_$suffiix, $suffix indicates the
> >> pattern may use zmm instruction for 128/256-bit operand.
> >> maybe just bcst_mem_operand_zmm?
> > For constraint, maybe we can reuse Br, relax Br to match 
> > bcst_mem_operand_zmm.
> > For those original patterns with bcst_mem_operand, it should be ok
> > since it's already guarded by the predicate, the constraint must be
> > valid.
>
> Hmm, I wanted to get back to this, but then I started wondering about this
> reply of yours vs your request to not go farther with the use of "oversized"
> insns (i.e. acting in 512-bit registers in lieu of AVX512VL being enabled,
> when no FP exceptions can be raised on the otherwise unused elements). Since
> iirc the latter came later, am I right in assuming we then also shouldn't go
> the route outlined above?
No, we shouldn't.
This reply is just an answer on how to do it technically, but we don't
really want to do it (considering that all AVX512 processors after SKX
will all support AVX512VL)
>
> Jan



-- 
BR,
Hongtao


[PATCH v2] c-family: Enable -fpermissive for C and ObjC

2023-11-06 Thread Florian Weimer
Future changes will treat some C front end warnings similar to
-Wnarrowing.

gcc/

* doc/invoke.texi (Warning Options): Mention C diagnostics
for -fpermissive.

gcc/c-family/

* c.opt (fpermissive): Enable for C and ObjC.
* c-opts.cc (set_std_c89): Enable -fpermissive.

---
v2: Rebased after David's m_* member changes.  Still no test suite
regressions.  Actual tests will need some C permerrors, which
we do not yet have.

 gcc/c-family/c-opts.cc | 6 ++
 gcc/c-family/c.opt | 2 +-
 gcc/doc/invoke.texi| 8 ++--
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index a980912f7e1..dbf92be9698 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -1711,6 +1711,12 @@ set_std_c89 (int c94, int iso)
   flag_isoc99 = 0;
   flag_isoc11 = 0;
   flag_isoc2x = 0;
+  /* -std=gnu89 etc. should not override -pedantic-errors.  */
+  if (!global_dc->m_pedantic_errors)
+{
+  flag_permissive = 1;
+  global_dc->m_permissive = 1;
+}
   lang_hooks.name = "GNU C89";
 }
 
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 29d3d789a49..cc3a6610148 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -2112,7 +2112,7 @@ C ObjC C++ ObjC++
 Look for and use PCH files even when preprocessing.
 
 fpermissive
-C++ ObjC++ Var(flag_permissive)
+C ObjC C++ ObjC++ Var(flag_permissive)
 Downgrade conformance errors to warnings.
 
 fplan9-extensions
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6e776a0faa1..dfa01220b93 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -6170,13 +6170,17 @@ errors by @option{-pedantic-errors}.  For instance:
 Downgrade some required diagnostics about nonconformant code from
 errors to warnings.  Thus, using @option{-fpermissive} allows some
 nonconforming code to compile.  Some C++ diagnostics are controlled
-only by this flag, but it also downgrades some diagnostics that have
-their own flag:
+only by this flag, but it also downgrades some C and C++ diagnostics
+that have their own flag:
 
 @gccoptlist{
 -Wnarrowing @r{(C++)}
 }
 
+The @option{-fpermissive} option is the default for historic C language
+modes (@option{-std=c89}, @option{-std=gnu89}, @option{-std=c90},
+@option{-std=gnu90}).
+
 @opindex Wall
 @opindex Wno-all
 @item -Wall

base-commit: 3cc9ad41db87fb85b13a56bff1f930c258542a70



Re: [PATCH v2] c-family: Enable -fpermissive for C and ObjC

2023-11-06 Thread Jakub Jelinek
On Mon, Nov 06, 2023 at 03:06:39PM +0100, Florian Weimer wrote:
> Future changes will treat some C front end warnings similar to
> -Wnarrowing.
> 
> gcc/
> 
>   * doc/invoke.texi (Warning Options): Mention C diagnostics
>   for -fpermissive.
> 
> gcc/c-family/
> 
>   * c.opt (fpermissive): Enable for C and ObjC.
>   * c-opts.cc (set_std_c89): Enable -fpermissive.

Won't this set flag_permissive even for -std=c89 -std=c99 ?
Haven't tried, but if set_std_c* is called multiple times if more than
one -std= option appears, then perhaps this should be done later after
processing all options, not during that processing.

Jakub



[PATCH V2] RISC-V: Early expand DImode vec_duplicate in RV32 system

2023-11-06 Thread Juzhe-Zhong
An ICE was discovered in recent rounding autovec support:

config/riscv/riscv-v.cc:4314
   65 | }
  | ^
0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**,
rtx_def*, bool)

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314
0x1fb1aa2 pre_vsetvl::remove_avl_operand()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342
0x1fb18c1 pre_vsetvl::cleaup()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308
0x1fb216d pass_vsetvl::lazy_vsetvl()

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480
0x1fb2214 pass_vsetvl::execute(function*)

/home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504

The root cause is that the RA reload into (set (reg) vec_duplicate:DI). 
However, it is not valid in RV32 system
since we don't have a single broadcast instruction DI scalar in RV32 system.
We should expand it early for RV32 system.

gcc/ChangeLog:

* config/riscv/predicates.md: Adapt predicate.
* config/riscv/riscv-protos.h (can_be_broadcasted_p): New function.
* config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto.
* config/riscv/vector.md (vec_duplicate): New pattern.
(*vec_duplicate): Adapt vec_duplicate insn pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c: New test.

---
 gcc/config/riscv/predicates.md|  9 +-
 gcc/config/riscv/riscv-protos.h   |  1 +
 gcc/config/riscv/riscv-v.cc   | 20 +
 gcc/config/riscv/vector.md| 20 -
 .../riscv/rvv/autovec/unop/sew64-rv32.c   | 29 +++
 5 files changed, 70 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index db18054607f..df1c66f3a76 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -553,14 +553,7 @@
 
 ;; The scalar operand can be directly broadcast by RVV instructions.
 (define_predicate "direct_broadcast_operand"
-  (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op))
-   && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op)
-   || rtx_equal_p (op, CONST0_RTX (GET_MODE (op
-   && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), GET_MODE_BITSIZE 
(Pmode)))")
-(ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))")
- (ior (match_code "const_int,const_poly_int")
-  (ior (match_operand 0 "register_operand")
-   (match_test "satisfies_constraint_Wdm (op)"))
+  (match_test "riscv_vector::can_be_broadcasted_p (op)"))
 
 ;; A CONST_INT operand that has exactly two bits cleared.
 (define_predicate "const_nottwobits_operand"
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 9b254a4b278..135df1a1d94 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -598,6 +598,7 @@ uint8_t get_sew (rtx_insn *);
 enum vlmul_type get_vlmul (rtx_insn *);
 int count_regno_occurrences (rtx_insn *, unsigned int);
 bool imm_avl_p (machine_mode);
+bool can_be_broadcasted_p (rtx);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index ff9539fff02..6b8b4da7477 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4418,4 +4418,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int 
regno)
   return count;
 }
 
+/* Return true if the OP can be directly broadcasted.  */
+bool
+can_be_broadcasted_p (rtx op)
+{
+  machine_mode mode = GET_MODE (op);
+  /* We don't allow RA (register allocation) reload generate
+(vec_duplicate:DI reg) in RV32 system wheras we allow
+(vec_duplicate:DI mem) in RV32 system.  */
+  if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode)
+  && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode))
+  && !satisfies_constraint_Wdm (op))
+return false;
+
+  if (satisfies_constraint_K (op) || register_operand (op, mode)
+  || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode)))
+return true;
+
+  return can_create_pseudo_p () && nonmemory_operand (op, mode);
+}
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 8509c4fe5f2..e23f64938b7 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1370,11 +1370,29 @@
 ;;  Duplicate Operations
 ;; -
 
+(define_expand "vec_duplicate"
+  [(set (match_operand:V_VLS 0 "register_operand")
+(vec_duplicate:V_VLS
+ 

[PATCH v1] RISC-V: Support FP round to i/l/ll diff size autovec

2023-11-06 Thread pan2 . li
From: Pan Li 

This patch would like to support the FP below API auto vectorization
with different type size

+--+---+--+
| API  | RV64  | RV32 |
+--+---+--+
| iround   | DF => SI  | DF => SI |
| iroundf  | - | -|
| lround   | - | DF => SI |
| lroundf  | SF => DI  | -|
| llround  | - | -|
| llroundf | SF => DI  | SF => DI |
+--+---+--+

Given below code:
void
test_lroundf (long *out, float *in, unsigned count)
{
  for (unsigned i = 0; i < count; i++)
out[i] = __builtin_lroundf (in[i]);
}

Before this patch:
.L3:
  flw  fa5,0(a1)
  addi a1,a1,4
  addi a0,a0,8
  fcvt.l.s a5,fa5,rmm
  sd   a5,-8(a0)
  bne  a4,a1,.L3

After this patch:
  fsrmi4  // RMM rounding mode
  vsetivli zero,16,e32,m4,ta,ma
.L4:
  vle32.v  v4,0(a5)
  addi a5,a5,64
  vfwcvt.x.f.v v8,v4
  vse64.v  v8,0(a4)
  addi a4,a4,128
  bne  a3,a5,.L4
  andi a5,a2,15
  andi a4,a2,-16
  beq  a5,zero,.L16

Unfortunately, the HF mode is not include due to it requires
additional middle-end support from internal-fun.def.

gcc/ChangeLog:

* config/riscv/autovec.md: Remove the size check of lround.
* config/riscv/riscv-v.cc (expand_vec_lround): Leverage
emit_vec_rounding_to_integer for round.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/math-iround-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-iround-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-llroundf-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-llroundf-run-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-lround-rv32-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-lround-rv32-run-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-lroundf-rv64-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-lroundf-rv64-run-0.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-iround-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-llroundf-0.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-lround-rv32-0.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-lroundf-rv64-0.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/autovec.md   |  6 +-
 gcc/config/riscv/riscv-v.cc   |  8 +-
 .../riscv/rvv/autovec/unop/math-iround-1.c| 18 
 .../rvv/autovec/unop/math-iround-run-1.c  | 83 ++
 .../riscv/rvv/autovec/unop/math-llroundf-0.c  | 19 +
 .../rvv/autovec/unop/math-llroundf-run-0.c| 84 +++
 .../rvv/autovec/unop/math-lround-rv32-0.c | 18 
 .../rvv/autovec/unop/math-lround-rv32-run-0.c | 83 ++
 .../rvv/autovec/unop/math-lroundf-rv64-0.c| 18 
 .../autovec/unop/math-lroundf-rv64-run-0.c| 84 +++
 .../riscv/rvv/autovec/vls/math-iround-1.c | 27 ++
 .../riscv/rvv/autovec/vls/math-llroundf-0.c   | 27 ++
 .../rvv/autovec/vls/math-lround-rv32-0.c  | 27 ++
 .../rvv/autovec/vls/math-lroundf-rv64-0.c | 27 ++
 14 files changed, 520 insertions(+), 9 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-iround-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-iround-run-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-llroundf-0.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-llroundf-run-0.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lround-rv32-0.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lround-rv32-run-0.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lroundf-rv64-0.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lroundf-rv64-run-0.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-iround-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-llroundf-0.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-lround-rv32-0.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-lroundf-rv64-0.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f1f0523d1de..d1804d82552 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2420,8 +2420,7 @@ (define_expand "lrint2"
 (define_expand "lround2"
   [(match_operand:   0 "register_operand")
(match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")]
-  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math
-&& known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE 
(mode))"
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
 riscv_vector::expand_vec_lround (operands[0], operands[1], mode, 
mode);
 DONE;
@@ -24

Re: [PATCH v1] RISC-V: Support FP round to i/l/ll diff size autovec

2023-11-06 Thread 钟居哲
LGTM.



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-11-06 22:16
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support FP round to i/l/ll diff size autovec
From: Pan Li 
 
This patch would like to support the FP below API auto vectorization
with different type size
 
+--+---+--+
| API  | RV64  | RV32 |
+--+---+--+
| iround   | DF => SI  | DF => SI |
| iroundf  | - | -|
| lround   | - | DF => SI |
| lroundf  | SF => DI  | -|
| llround  | - | -|
| llroundf | SF => DI  | SF => DI |
+--+---+--+
 
Given below code:
void
test_lroundf (long *out, float *in, unsigned count)
{
  for (unsigned i = 0; i < count; i++)
out[i] = __builtin_lroundf (in[i]);
}
 
Before this patch:
.L3:
  flw  fa5,0(a1)
  addi a1,a1,4
  addi a0,a0,8
  fcvt.l.s a5,fa5,rmm
  sd   a5,-8(a0)
  bne  a4,a1,.L3
 
After this patch:
  fsrmi4  // RMM rounding mode
  vsetivli zero,16,e32,m4,ta,ma
.L4:
  vle32.v  v4,0(a5)
  addi a5,a5,64
  vfwcvt.x.f.v v8,v4
  vse64.v  v8,0(a4)
  addi a4,a4,128
  bne  a3,a5,.L4
  andi a5,a2,15
  andi a4,a2,-16
  beq  a5,zero,.L16
 
Unfortunately, the HF mode is not include due to it requires
additional middle-end support from internal-fun.def.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md: Remove the size check of lround.
* config/riscv/riscv-v.cc (expand_vec_lround): Leverage
emit_vec_rounding_to_integer for round.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/unop/math-iround-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-iround-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-llroundf-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-llroundf-run-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-lround-rv32-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-lround-rv32-run-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-lroundf-rv64-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-lroundf-rv64-run-0.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-iround-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-llroundf-0.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-lround-rv32-0.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-lroundf-rv64-0.c: New test.
 
Signed-off-by: Pan Li 
---
gcc/config/riscv/autovec.md   |  6 +-
gcc/config/riscv/riscv-v.cc   |  8 +-
.../riscv/rvv/autovec/unop/math-iround-1.c| 18 
.../rvv/autovec/unop/math-iround-run-1.c  | 83 ++
.../riscv/rvv/autovec/unop/math-llroundf-0.c  | 19 +
.../rvv/autovec/unop/math-llroundf-run-0.c| 84 +++
.../rvv/autovec/unop/math-lround-rv32-0.c | 18 
.../rvv/autovec/unop/math-lround-rv32-run-0.c | 83 ++
.../rvv/autovec/unop/math-lroundf-rv64-0.c| 18 
.../autovec/unop/math-lroundf-rv64-run-0.c| 84 +++
.../riscv/rvv/autovec/vls/math-iround-1.c | 27 ++
.../riscv/rvv/autovec/vls/math-llroundf-0.c   | 27 ++
.../rvv/autovec/vls/math-lround-rv32-0.c  | 27 ++
.../rvv/autovec/vls/math-lroundf-rv64-0.c | 27 ++
14 files changed, 520 insertions(+), 9 deletions(-)
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-iround-1.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-iround-run-1.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-llroundf-0.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-llroundf-run-0.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lround-rv32-0.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lround-rv32-run-0.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lroundf-rv64-0.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lroundf-rv64-run-0.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-iround-1.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-llroundf-0.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-lround-rv32-0.c
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-lroundf-rv64-0.c
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f1f0523d1de..d1804d82552 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2420,8 +2420,7 @@ (define_expand "lrint2"
(define_expand "lround2"
   [(match_operand:   0 "register_operand")
(match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")]
-  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math
-&& known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE 
(mode))"
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
 riscv_vector::ex

Re: [PATCH v2] c-family: Enable -fpermissive for C and ObjC

2023-11-06 Thread Florian Weimer
* Jakub Jelinek:

> On Mon, Nov 06, 2023 at 03:06:39PM +0100, Florian Weimer wrote:
>> Future changes will treat some C front end warnings similar to
>> -Wnarrowing.
>> 
>> gcc/
>> 
>>  * doc/invoke.texi (Warning Options): Mention C diagnostics
>>  for -fpermissive.
>> 
>> gcc/c-family/
>> 
>>  * c.opt (fpermissive): Enable for C and ObjC.
>>  * c-opts.cc (set_std_c89): Enable -fpermissive.
>
> Won't this set flag_permissive even for -std=c89 -std=c99 ?
> Haven't tried, but if set_std_c* is called multiple times if more than
> one -std= option appears, then perhaps this should be done later after
> processing all options, not during that processing.

Ugh, you are right.

What would be the right place to do this kind of final option
processing?  Where those SET_OPTION_IF_UNSET are?

Thanks,
Florian



Re: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system

2023-11-06 Thread 钟居哲
Committed with adding testcase as you suggested in V2:
[PATCH V2] RISC-V: Early expand DImode vec_duplicate in RV32 system (gnu.org)



juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-11-06 20:46
To: juzhe.zh...@rivai.ai
CC: kito.cheng; gcc-patches; jeffreyalaw; Robin Dapp
Subject: Re: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 
system
I would prefer to add a dedicated test case to test that, so that we
could also cover that even if we didn't enable multi-lib testing for
RV32, and I suppose that should only require compile test for part of
that test case ?
 
On Mon, Nov 6, 2023 at 8:41 PM juzhe.zh...@rivai.ai
 wrote:
>
> Testcase already existed on the trunk, which is added by Li Pan added 
> recently when supporting rounding mode autovec.
>
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635280.html
>
> math-llrintf-run-0.c passed on RV64 but cause ICE on RV32.
>
>
>
> 
> juzhe.zh...@rivai.ai
>
>
> From: Kito Cheng
> Date: 2023-11-06 20:38
> To: Juzhe-Zhong
> CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc
> Subject: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system
> Could you add a testcase? other than that LGTM.
>
> On Mon, Nov 6, 2023 at 8:27 PM Juzhe-Zhong  wrote:
> >
> > An ICE was discovered in recent rounding autovec support:
> >
> > config/riscv/riscv-v.cc:4314
> >65 | }
> >   | ^
> > 0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**,
> > rtx_def*, bool)
> > 
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314
> > 0x1fb1aa2 pre_vsetvl::remove_avl_operand()
> > 
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342
> > 0x1fb18c1 pre_vsetvl::cleaup()
> > 
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308
> > 0x1fb216d pass_vsetvl::lazy_vsetvl()
> > 
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480
> > 0x1fb2214 pass_vsetvl::execute(function*)
> > 
> > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504
> >
> > The root cause is that the RA reload into (set (reg) vec_duplicate:DI). 
> > However, it is not valid in RV32 system
> > since we don't have a single broadcast instruction DI scalar in RV32 system.
> > We should expand it early for RV32 system.
> >
> > gcc/ChangeLog:
> >
> > * config/riscv/predicates.md: Refine predicate.
> > * config/riscv/riscv-protos.h (can_be_broadcasted_p): New function.
> > * config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto.
> > * config/riscv/vector.md (vec_duplicate): New pattern.
> > (*vec_duplicate): Adapt pattern.
> >
> > ---
> >  gcc/config/riscv/predicates.md  |  9 +
> >  gcc/config/riscv/riscv-protos.h |  1 +
> >  gcc/config/riscv/riscv-v.cc | 20 
> >  gcc/config/riscv/vector.md  | 20 +++-
> >  4 files changed, 41 insertions(+), 9 deletions(-)
> >
> > diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
> > index db18054607f..df1c66f3a76 100644
> > --- a/gcc/config/riscv/predicates.md
> > +++ b/gcc/config/riscv/predicates.md
> > @@ -553,14 +553,7 @@
> >
> >  ;; The scalar operand can be directly broadcast by RVV instructions.
> >  (define_predicate "direct_broadcast_operand"
> > -  (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op))
> > -   && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op)
> > -   || rtx_equal_p (op, CONST0_RTX (GET_MODE (op
> > -   && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), 
> > GET_MODE_BITSIZE (Pmode)))")
> > -(ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))")
> > - (ior (match_code "const_int,const_poly_int")
> > -  (ior (match_operand 0 "register_operand")
> > -   (match_test "satisfies_constraint_Wdm (op)"))
> > +  (match_test "riscv_vector::can_be_broadcasted_p (op)"))
> >
> >  ;; A CONST_INT operand that has exactly two bits cleared.
> >  (define_predicate "const_nottwobits_operand"
> > diff --git a/gcc/config/riscv/riscv-protos.h 
> > b/gcc/config/riscv/riscv-protos.h
> > index 6cbf2130f88..acae00f653f 100644
> > --- a/gcc/config/riscv/riscv-protos.h
> > +++ b/gcc/config/riscv/riscv-protos.h
> > @@ -595,6 +595,7 @@ uint8_t get_sew (rtx_insn *);
> >  enum vlmul_type get_vlmul (rtx_insn *);
> >  int count_regno_occurrences (rtx_insn *, unsigned int);
> >  bool imm_avl_p (machine_mode);
> > +bool can_be_broadcasted_p (rtx);
> >  }
> >
> >  /* We classify builtin types into two classes:
> > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> > index 80d2bb9e289..a64946213c3 100644
> > --- a/gcc/config/riscv/riscv-v.cc
> > +++ b/gcc/config/riscv/riscv-v.c

Re: [PATCH v6 0/21]middle-end: Support early break/return auto-vectorization

2023-11-06 Thread Richard Biener
On Mon, 6 Nov 2023, Tamar Christina wrote:

> Hi All,
> 
> This patch adds initial support for early break vectorization in GCC.
> The support is added for any target that implements a vector cbranch optab,
> this includes both fully masked and non-masked targets.
> 
> Depending on the operation, the vectorizer may also require support for 
> boolean
> mask reductions using Inclusive OR.  This is however only checked then the
> comparison would produce multiple statements.
> 
> Note: I am currently struggling to get patch 7 correct in all cases and could 
> use
>   some feedback there.
> 
> Concretely the kind of loops supported are of the forms:
> 
>  for (int i = 0; i < N; i++)
>  {
>
>if ()
>  {
>...
>;
>  }
>
>  }
> 
> where  can be:
>  - break
>  - return
>  - goto
> 
> Any number of statements can be used before the  occurs.
> 
> Since this is an initial version for GCC 14 it has the following limitations 
> and
> features:
> 
> - Only fixed sized iterations and buffers are supported.  That is to say any
>   vectors loaded or stored must be to statically allocated arrays with known
>   sizes. N must also be known.  This limitation is because our primary target
>   for this optimization is SVE.  For VLA SVE we can't easily do cross page
>   iteraion checks. The result is likely to also not be beneficial. For that
>   reason we punt support for variable buffers till we have First-Faulting
>   support in GCC.
> - any stores in  should not be to the same objects as in
>   .  Loads are fine as long as they don't have the possibility to
>   alias.  More concretely, we block RAW dependencies when the intermediate 
> value
>   can't be separated fromt the store, or the store itself can't be moved.
> - Prologue peeling, alignment peelinig and loop versioning are supported.
> - Fully masked loops, unmasked loops and partially masked loops are supported
> - Any number of loop early exits are supported.
> - No support for epilogue vectorization.  The only epilogue supported is the
>   scalar final one.  Peeling code supports it but the code motion code cannot
>   find instructions to make the move in the epilog.
> - Early breaks are only supported for inner loop vectorization.
> 
> I have pushed a branch to refs/users/tnfchris/heads/gcc-14-early-break
> 
> With the help of IPA and LTO this still gets hit quite often.  During 
> bootstrap
> it hit rather frequently.  Additionally TSVC s332, s481 and s482 all pass now
> since these are tests for support for early exit vectorization.
> 
> This implementation does not support completely handling the early break 
> inside
> the vector loop itself but instead supports adding checks such that if we know
> that we have to exit in the current iteration then we branch to scalar code to
> actually do the final VF iterations which handles all the code in .
> 
> For the scalar loop we know that whatever exit you take you have to perform at
> most VF iterations.  For vector code we only case about the state of fully
> performed iteration and reset the scalar code to the (partially) remaining 
> loop.
> 
> That is to say, the first vector loop executes so long as the early exit isn't
> needed.  Once the exit is taken, the scalar code will perform at most VF extra
> iterations.  The exact number depending on peeling and iteration start and 
> which
> exit was taken (natural or early).   For this scalar loop, all early exits are
> treated the same.
> 
> When we vectorize we move any statement not related to the early break itself
> and that would be incorrect to execute before the break (i.e. has side 
> effects)
> to after the break.  If this is not possible we decline to vectorize.
> 
> This means that we check at the start of iterations whether we are going to 
> exit
> or not.  During the analyis phase we check whether we are allowed to do this
> moving of statements.  Also note that we only move the scalar statements, but
> only do so after peeling but just before we start transforming statements.
> 
> Codegen:
> 
> for e.g.
> 
> #define N 803
> unsigned vect_a[N];
> unsigned vect_b[N];
> 
> unsigned test4(unsigned x)
> {
>  unsigned ret = 0;
>  for (int i = 0; i < N; i++)
>  {
>vect_b[i] = x + i;
>if (vect_a[i] > x)
>  break;
>vect_a[i] = x;
> 
>  }
>  return ret;
> }
> 
> We generate for Adv. SIMD:
> 
> test4:
> adrpx2, .LC0
> adrpx3, .LANCHOR0
> dup v2.4s, w0
> add x3, x3, :lo12:.LANCHOR0
> moviv4.4s, 0x4
> add x4, x3, 3216
> ldr q1, [x2, #:lo12:.LC0]
> mov x1, 0
> mov w2, 0
> .p2align 3,,7
> .L3:
> ldr q0, [x3, x1]
> add v3.4s, v1.4s, v2.4s
> add v1.4s, v1.4s, v4.4s
> cmhiv0.4s, v0.4s, v2.4s
> umaxp   v0.4s, v0.4s, v0.4s
> fmovx5, d0
> cbnzx5, .L6
> add w2, w2, 1
> str q3, [x1, x4]
> str

[PATCH] tree-optimization/112405 - SIMD clone calls with (loop) mask

2023-11-06 Thread Richard Biener
The following fixes the mask argument generation for SIMD clone
calls under either loop masking or when the actual call is not
masked but only a inbranch simd clone is available.  The issue
was that we tried to directly convert the vector mask to the
call argument type but SIMD clone masks require 1 or 0 (which
could be even float) values for mask elements so we have to
resort to a VEC_COND_EXPR to generate them just like we do for
regular passing of the mask.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/112405
* tree-vect-stmts.cc (vectorizable_simd_clone_call):
Properly handle invariant and/or loop mask passing.
---
 gcc/tree-vect-stmts.cc | 62 +++---
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index eefb1eec1ef..65883e04ad7 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4814,36 +4814,42 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
  else
mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
 
- if (!useless_type_conversion_p (TREE_TYPE (mask), masktype))
+ gassign *new_stmt;
+ if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
{
- gassign *new_stmt;
- if (bestn->simdclone->mask_mode != VOIDmode)
-   {
- /* This means we are dealing with integer mask modes.
-First convert to an integer type with the same size as
-the current vector type.  */
- unsigned HOST_WIDE_INT intermediate_size
-   = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
- tree mid_int_type =
-   build_nonstandard_integer_type (intermediate_size, 1);
- mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
- new_stmt
-   = gimple_build_assign (make_ssa_name (mid_int_type),
-  mask);
- gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
- /* Then zero-extend to the mask mode.  */
- mask = fold_build1 (NOP_EXPR, masktype,
- gimple_get_lhs (new_stmt));
-   }
- else
-   mask = build1 (VIEW_CONVERT_EXPR, masktype, mask);
-
- new_stmt = gimple_build_assign (make_ssa_name (masktype),
- mask);
- vect_finish_stmt_generation (vinfo, stmt_info,
-  new_stmt, gsi);
- mask = gimple_assign_lhs (new_stmt);
+ /* This means we are dealing with integer mask modes.
+First convert to an integer type with the same size as
+the current vector type.  */
+ unsigned HOST_WIDE_INT intermediate_size
+ = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
+ tree mid_int_type =
+ build_nonstandard_integer_type (intermediate_size, 1);
+ mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
+ new_stmt
+ = gimple_build_assign (make_ssa_name (mid_int_type),
+mask);
+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+ /* Then zero-extend to the mask mode.  */
+ mask = fold_build1 (NOP_EXPR, masktype,
+ gimple_get_lhs (new_stmt));
+   }
+ else if (bestn->simdclone->mask_mode == VOIDmode)
+   {
+ tree one = fold_convert (TREE_TYPE (masktype),
+  integer_one_node);
+ tree zero = fold_convert (TREE_TYPE (masktype),
+   integer_zero_node);
+ mask = build3 (VEC_COND_EXPR, masktype, mask,
+build_vector_from_val (masktype, one),
+build_vector_from_val (masktype, zero));
}
+ else
+   gcc_unreachable ();
+
+ new_stmt = gimple_build_assign (make_ssa_name (masktype), mask);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+  new_stmt, gsi);
+ mask = gimple_assign_lhs (new_stmt);
  vargs.safe_push (mask);
}
}
-- 
2.35.3


Re: [PATCH v2] c-family: Enable -fpermissive for C and ObjC

2023-11-06 Thread Jakub Jelinek
On Mon, Nov 06, 2023 at 03:19:32PM +0100, Florian Weimer wrote:
> * Jakub Jelinek:
> 
> > On Mon, Nov 06, 2023 at 03:06:39PM +0100, Florian Weimer wrote:
> >> Future changes will treat some C front end warnings similar to
> >> -Wnarrowing.
> >> 
> >> gcc/
> >> 
> >>* doc/invoke.texi (Warning Options): Mention C diagnostics
> >>for -fpermissive.
> >> 
> >> gcc/c-family/
> >> 
> >>* c.opt (fpermissive): Enable for C and ObjC.
> >>* c-opts.cc (set_std_c89): Enable -fpermissive.
> >
> > Won't this set flag_permissive even for -std=c89 -std=c99 ?
> > Haven't tried, but if set_std_c* is called multiple times if more than
> > one -std= option appears, then perhaps this should be done later after
> > processing all options, not during that processing.
> 
> Ugh, you are right.
> 
> What would be the right place to do this kind of final option
> processing?  Where those SET_OPTION_IF_UNSET are?

c_common_post_options ?
Generally, we have global_options, which are the values of the options
(implicit or explicit) and then another variable of the same type,
global_options_set, which uses all values just as booleans whether the
option was set explicitly or not.

Jakub



Re: [PATCH] RISC-V: VECT: Remember to assert any_known_not_updated_vssa

2023-11-06 Thread Jeff Law




On 11/6/23 06:18, Kito Cheng wrote:

Oh, you're right! I should have checked the master branch first... and
I was even wondering why it wasn't marked as such. Should perhaps
cherry pick this for gcc-13-with-riscv-opts?


  gcc-13-with-riscv-opts mostly maintained by Ventana folks, so maybe
ask Jeff if you want to cherry pick into that branch?
I cherry pick all all the riscv bits over to that branch, usually once a 
week.


Jeff


Re: [PATCH 14/21]middle-end: Change loop analysis from looking at at number of BB to actual cfg

2023-11-06 Thread Richard Biener
On Mon, 6 Nov 2023, Tamar Christina wrote:

> Hi All,
> 
> The vectorizer at the moment uses a num_bb check to check for control flow.
> This rejects a number of loops with no reason.  Instead this patch changes it
> to check the destination of the exits instead.
> 
> This also allows early break to work by also dropping the single_exit check.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

I think this can go in independently, one comment below ...

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * tree-vect-loop-manip.cc (slpeel_can_duplicate_loop_p):
>   * tree-vect-loop.cc (vect_analyze_loop_form):
> 
> --- inline copy of patch -- 
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index 
> 9c1405d79fd8fe8689007df3b7605b7a3d3ecdd7..466cf4c47154099a33dc63e22d74eef42d282444
>  100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -1937,12 +1937,10 @@ slpeel_can_duplicate_loop_p (const class loop *loop, 
> const_edge exit_e,
>edge entry_e = loop_preheader_edge (loop);
>gcond *orig_cond = get_loop_exit_condition (exit_e);
>gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
> -  unsigned int num_bb = loop->inner? 5 : 2;
>  
>/* All loops have an outer scope; the only case loop->outer is NULL is for
>   the function itself.  */
>if (!loop_outer (loop)
> -  || loop->num_nodes != num_bb
>|| !empty_block_p (loop->latch)
>|| !exit_e
>/* Verify that new loop exit condition can be trivially modified.  */
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 
> ddb6cad60f2f2cfdc96732f3f256d86e315d7357..27ab6abfa854f14f8a4cf3d9fcb1ac1c203a4198
>  100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -1727,6 +1727,17 @@ vect_analyze_loop_form (class loop *loop, 
> vect_loop_form_info *info)
>  "using as main loop exit: %d -> %d [AUX: %p]\n",
>  exit_e->src->index, exit_e->dest->index, exit_e->aux);
>  
> +  /* Check if we have any control flow that doesn't leave the loop.  */
> +  class loop *v_loop = loop->inner ? loop->inner : loop;
> +  basic_block *bbs= get_loop_body (v_loop);
> +  for (unsigned i = 0; i < v_loop->num_nodes; i++)
> +if (!empty_block_p (bbs[i])
> + && !loop_exits_from_bb_p (v_loop, bbs[i])
> + && bbs[i]->loop_father == v_loop)

That looks a bit complicated.  Better matching the comment would be

   if (EDGE_COUNT (bbs[i]->succs) != 1
   && (EDGE_COUNT (bbs[i]->succs) != 2
   || !loop_exits_from_bb_p (bb[i]->loop_father, bb[i])))

I'd say OK with that change, and independently if the removed
single_exit test below isn't harmful (I suppose it is).

Btw, for the outer loop case we still have the single_exit tests
but you already said you're not supporting multi-exits there yet.

Thanks,
Richard.

> +  return opt_result::failure_at (vect_location,
> +  "not vectorized:"
> +  " unsupported control flow in loop.\n");
> +
>/* Different restrictions apply when we are considering an inner-most loop,
>   vs. an outer (nested) loop.
>   (FORNOW. May want to relax some of these restrictions in the future).  
> */
> @@ -1746,11 +1757,6 @@ vect_analyze_loop_form (class loop *loop, 
> vect_loop_form_info *info)
> |
>  (exit-bb)  */
>  
> -  if (loop->num_nodes != 2)
> - return opt_result::failure_at (vect_location,
> -"not vectorized:"
> -" control flow in loop.\n");
> -
>if (empty_block_p (loop->header))
>   return opt_result::failure_at (vect_location,
>  "not vectorized: empty loop.\n");
> @@ -1782,11 +1788,6 @@ vect_analyze_loop_form (class loop *loop, 
> vect_loop_form_info *info)
>  "not vectorized:"
>  " multiple nested loops.\n");
>  
> -  if (loop->num_nodes != 5)
> - return opt_result::failure_at (vect_location,
> -"not vectorized:"
> -" control flow in loop.\n");
> -
>entryedge = loop_preheader_edge (innerloop);
>if (entryedge->src != loop->header
> || !single_exit (innerloop)
> @@ -1823,9 +1824,6 @@ vect_analyze_loop_form (class loop *loop, 
> vect_loop_form_info *info)
>info->inner_loop_cond = inner.conds[0];
>  }
>  
> -  if (!single_exit (loop))
> -return opt_result::failure_at (vect_location,
> -"not vectorized: multiple exits.\n");
>if (EDGE_COUNT (loop->header->preds) != 2)
>  return opt_result::failure_at (vect_location,
>  "not vectorized:"


nvptx: Use the usual '#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)' (was: libstdc++ "freestanding" ('--disable-hosted-libstdcxx') with '-fno-rtti', '-fno-exceptions': 'libstdc++-v3/libsupc+

2023-11-06 Thread Thomas Schwinge
Hi!

On 2022-07-14T19:39:40+0100, Jonathan Wakely  wrote:
> On Thu, 14 Jul 2022 at 19:14, Thomas Schwinge  wrote:
>> I'm looking into building GCN, nvptx offloading libstdc++ [...]

>> While there is some experimental/incomplete/not-to-be-relied-on support
>> for PTX symbol aliases, we're currently generally running into
>> "error: alias definitions not supported in this configuration"

>> [...]/libstdc++-v3/libsupc++/tinfo.cc:55:1: error: alias definitions not 
>> supported in this configuration
>>55 | std::type_info::__equal (const std::type_info& arg) const 
>> _GLIBCXX_NOEXCEPT
>>   | ^~~
>> make[4]: *** [Makefile:777: tinfo.lo] Error 1
>>
>> That's 'libstdc++-v3/libsupc++/tinfo.cc':
>>
>>  1 // Methods for type_info for -*- C++ -*- Run Time Type Identification.
>> [...]
>> 39 // We can't rely on common symbols being shared between shared 
>> objects.
>> 40 bool std::type_info::
>> 41 operator== (const std::type_info& arg) const _GLIBCXX_NOEXCEPT
>> 42 {
>> 43 #if __GXX_MERGED_TYPEINFO_NAMES
>> 44   return name () == arg.name ();
>> 45 #else
>> 46   /* The name() method will strip any leading '*' prefix. Therefore
>> 47  take care to look at __name rather than name() when looking for
>> 48  the "pointer" prefix.  */
>> 49   return (&arg == this)
>> 50 || (__name[0] != '*' && (__builtin_strcmp (name (), arg.name ()) 
>> == 0));
>> 51 #endif
>> 52 }
>> 53
>> 54 bool
>> 55 std::type_info::__equal (const std::type_info& arg) const 
>> _GLIBCXX_NOEXCEPT
>> 56 __attribute__((alias("_ZNKSt9type_infoeqERKS_")));
>> 57 #endif
>> [...]
>>
>> ..., so there's a manual alias from the line 55 function to the line 41
>> function (if I got that right).
>
> That's only there for backwards compatibility on ARM EABI and other
> targets that don't define __GXX_TYPEINFO_EQUALITY_INLINE==1.
>
> My suggestion would be to define that macro for the target.

Thanks, that did put me on the right track -- we shall define it this
way; indirectly.

Pushed to master branch commit 9837f62f066db532c9db6df38ccf2653d0c3a960
"nvptx: Use the usual '#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 
1)'",
see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 9837f62f066db532c9db6df38ccf2653d0c3a960 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Thu, 14 Jul 2022 23:22:35 +0200
Subject: [PATCH] nvptx: Use the usual '#define MAKE_DECL_ONE_ONLY(DECL)
 (DECL_WEAK (DECL) = 1)'

With this 'MAKE_DECL_ONE_ONLY' definition, we get 'SUPPORTS_ONE_ONLY', and thus
'__GXX_WEAK__', and thus '__GXX_TYPEINFO_EQUALITY_INLINE'.  This unblocks build
of 'libstdc++-v3/libsupc++/tinfo.cc', which otherwise depends on symbol alias
support, which GCC/nvptx doesn't generally provide.  Also, this gets us a
number of FAIL -> PASS progressions in the test suite.

Given that GCC/nvptx support for weak symbols isn't complete, we also get a few
more of the already-known
'error: PTX does not support weak declarations (only weak definitions)':

[-PASS:-]{+FAIL:+} g++.old-deja/g++.other/crash11.C  -std=c++14 (test for excess errors)
[-PASS:-]{+FAIL:+} g++.old-deja/g++.other/crash11.C  -std=c++17 (test for excess errors)
[-PASS:-]{+FAIL:+} g++.old-deja/g++.other/crash11.C  -std=c++20 (test for excess errors)
[-PASS:-]{+FAIL:+} g++.old-deja/g++.other/crash11.C  -std=c++98 (test for excess errors)

[-PASS:-]{+FAIL:+} g++.old-deja/g++.pt/crash29.C  -std=c++14 (test for excess errors)
[-PASS:-]{+FAIL:+} g++.old-deja/g++.pt/crash29.C  -std=c++17 (test for excess errors)
[-PASS:-]{+FAIL:+} g++.old-deja/g++.pt/crash29.C  -std=c++20 (test for excess errors)
[-PASS:-]{+FAIL:+} g++.old-deja/g++.pt/crash29.C  -std=c++98 (test for excess errors)

[-PASS:-]{+FAIL:+} 23_containers/map/56613.cc  -std=gnu++17 (test for excess errors)

... as well as one more of the already-known
'sorry, unimplemented: target cannot support nonlocal goto':

PASS: g++.dg/tree-ssa/pr22488.C  -std=gnu++14 (test for excess errors)
PASS: g++.dg/tree-ssa/pr22488.C  -std=gnu++17 (test for excess errors)
PASS: g++.dg/tree-ssa/pr22488.C  -std=gnu++20 (test for excess errors)
[-PASS:-]{+FAIL:+} g++.dg/tree-ssa/pr22488.C  -std=gnu++98 (test for excess errors)

We shall look into these, later.

	gcc/
	* config/nvptx/nvptx.h (MAKE_DECL_ONE_ONLY): Define.
---
 gcc/config/nvptx/nvptx.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 129427e5654..407fd70f06a 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -319,6 +319,9 @@ struct GTY(()) machine_function
 
 #define SUPPORTS_WEAK 1
 
+#define MAKE_DECL_ONE_ONLY(DECL

[committed] c: Add -std=c23, -std=gnu23, -Wc11-c23-compat options [PR107954]

2023-11-06 Thread Joseph Myers
At the June WG14 meeting, WG14 decided it preferred to keep C23 as the
informal name for the next revision of the C standard, despite
publication not being before 2024 (publication is due in 2024 whether
or not technical changes at the January meeting result in an FDIS
ballot being needed).  At the Cauldron I raised the question of
whether we should thus now add option names such as -std=c23 to GCC,
and there was support for doing so.

Add -std=c23, making -std=c2x a deprecated alias; also add the alias
-std=iso9899:2024.  Likewise, add -std=gnu23, making -std=gnu2x a
deprecated alias, and add -Wc11-c23-compat, making -Wc11-c2x-compat a
deprecated alias.

Here, I'm generally just adding the new options and making the minimum
changes required to do so, with documentation changed to refer to C23
instead of C2X only where directly associated with documentation of
these options.  It's intended that future changes will update
documentation, diagnostics, comments, variable names, testcase names,
etc. to refer consistently to C23.  When such changes are made, the
new tests c23-opts-3.c, c23-opts-5.c and gnu23-opts-2.c are intended
to keep using the old option names they are specifically testing,
while other tests would start using the c23/gnu23 versions of the
names (as well as the tests themselves being renamed).

Updating option names is independent of updating to the final
__STDC_VERSION__ value.  There, the question is whether we should
update the value now or wait for the remaining significant features to
be implemented first.  (I intend to review Martin's tag compatibility
patches for GCC 14.  I'm not aware of anyone working on #embed - or on
the [[unsequenced]] and [[reproducible]] attributes, though support
for standard attributes is optional.)

Bootstrapped with no regressions for x86_64-pc-linux-gnu.

PR c/107954

gcc/
* doc/cpp.texi (__STDC_VERSION__): Refer to -std=c23 and
-std=gnu23 instead of -std=c2x and -std=gnu2x.
* doc/extend.texi (Attribute Syntax): Refer to C23 and -std=c23
instead of C2x and -std=c2x.
* doc/invoke.texi (-Wc11-c23-compat, -std=c23, -std=gnu23)
(-std=iso9899:2024): Document, with -Wc11-c2x-compat, -std=c2x and
-std=gnu2x as deprecated aliases.  Update descriptions of C23.
* doc/standards.texi (Standards): Describe C23 with C2X as an old
name.

gcc/c-family/
* c.opt (Wc11-c2x-compat): Rename to Wc11-c23-compat and make into
a deprecated alias of Wc11-c23-compat.
(std=c2x): Rename to std=c23 and make into a deprecated alias of
std=c23.
(std=gnu2x): Rename to std=gnu23 and make into a deprecated alias
of std=gnu23.
(std=iso9899:2024): New option.  Alias of std=c23.
* c-lex.cc (interpret_float): Use OPT_Wc11_c23_compat instead of
OPT_Wc11_c2x_compat.
* c-opts.cc (c_common_handle_option): Use OPT_std_c23 instead of
OPT_std_c2x and OPT_std_gnu23 instead of OPT_std_gnu2x.

gcc/c/
* c-errors.cc (pedwarn_c11): Use OPT_Wc11_c23_compat instead of
OPT_Wc11_c2x_compat.
* c-typeck.cc (build_conditional_expr, convert_for_assignment):
Use OPT_Wc11_c23_compat instead of OPT_Wc11_c2x_compat.

gcc/testsuite/
* gcc.dg/c23-opts-1.c, gcc.dg/c23-opts-2.c, gcc.dg/c23-opts-3.c,
gcc.dg/c23-opts-4.c, gcc.dg/c23-opts-5.c, gcc.dg/gnu23-opts-1.c,
gcc.dg/gnu23-opts-2.c: New tests.

diff --git a/gcc/c-family/c-lex.cc b/gcc/c-family/c-lex.cc
index 5e457b78cdd..a05632295cf 100644
--- a/gcc/c-family/c-lex.cc
+++ b/gcc/c-family/c-lex.cc
@@ -1190,11 +1190,11 @@ interpret_float (const cpp_token *token, unsigned int 
flags,
if (warn_c11_c2x_compat > 0)
  {
if (pedantic && !flag_isoc2x)
- pedwarn (input_location, OPT_Wc11_c2x_compat,
+ pedwarn (input_location, OPT_Wc11_c23_compat,
   "non-standard suffix on floating constant "
   "before C2X");
else
- warning (OPT_Wc11_c2x_compat,
+ warning (OPT_Wc11_c23_compat,
   "non-standard suffix on floating constant "
   "before C2X");
  }
diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index e9f7e6d424d..6b64821cbf0 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -732,12 +732,12 @@ c_common_handle_option (size_t scode, const char *arg, 
HOST_WIDE_INT value,
set_std_c17 (false /* ISO */);
   break;
 
-case OPT_std_c2x:
+case OPT_std_c23:
   if (!preprocessing_asm_p)
set_std_c2x (true /* ISO */);
   break;
 
-case OPT_std_gnu2x:
+case OPT_std_gnu23:
   if (!preprocessing_asm_p)
set_std_c2x (false /* ISO */);
   break;
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 29d3d789a49..359f071e632 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c

RE: [PATCH v6 0/21]middle-end: Support early break/return auto-vectorization

2023-11-06 Thread Tamar Christina
> -Original Message-
> From: Richard Biener 
> Sent: Monday, November 6, 2023 2:25 PM
> To: Tamar Christina 
> Cc: gcc-patches@gcc.gnu.org; nd 
> Subject: Re: [PATCH v6 0/21]middle-end: Support early break/return auto-
> vectorization
> 
> On Mon, 6 Nov 2023, Tamar Christina wrote:
> 
> > Hi All,
> >
> > This patch adds initial support for early break vectorization in GCC.
> > The support is added for any target that implements a vector cbranch
> > optab, this includes both fully masked and non-masked targets.
> >
> > Depending on the operation, the vectorizer may also require support
> > for boolean mask reductions using Inclusive OR.  This is however only
> > checked then the comparison would produce multiple statements.
> >
> > Note: I am currently struggling to get patch 7 correct in all cases and 
> > could
> use
> >   some feedback there.
> >
> > Concretely the kind of loops supported are of the forms:
> >
> >  for (int i = 0; i < N; i++)
> >  {
> >
> >if ()
> >  {
> >...
> >;
> >  }
> >
> >  }
> >
> > where  can be:
> >  - break
> >  - return
> >  - goto
> >
> > Any number of statements can be used before the  occurs.
> >
> > Since this is an initial version for GCC 14 it has the following
> > limitations and
> > features:
> >
> > - Only fixed sized iterations and buffers are supported.  That is to say any
> >   vectors loaded or stored must be to statically allocated arrays with known
> >   sizes. N must also be known.  This limitation is because our primary 
> > target
> >   for this optimization is SVE.  For VLA SVE we can't easily do cross page
> >   iteraion checks. The result is likely to also not be beneficial. For that
> >   reason we punt support for variable buffers till we have First-Faulting
> >   support in GCC.
> > - any stores in  should not be to the same objects as in
> >   .  Loads are fine as long as they don't have the possibility to
> >   alias.  More concretely, we block RAW dependencies when the intermediate
> value
> >   can't be separated fromt the store, or the store itself can't be moved.
> > - Prologue peeling, alignment peelinig and loop versioning are supported.
> > - Fully masked loops, unmasked loops and partially masked loops are
> > supported
> > - Any number of loop early exits are supported.
> > - No support for epilogue vectorization.  The only epilogue supported is the
> >   scalar final one.  Peeling code supports it but the code motion code 
> > cannot
> >   find instructions to make the move in the epilog.
> > - Early breaks are only supported for inner loop vectorization.
> >
> > I have pushed a branch to refs/users/tnfchris/heads/gcc-14-early-break
> >
> > With the help of IPA and LTO this still gets hit quite often.  During
> > bootstrap it hit rather frequently.  Additionally TSVC s332, s481 and
> > s482 all pass now since these are tests for support for early exit
> vectorization.
> >
> > This implementation does not support completely handling the early
> > break inside the vector loop itself but instead supports adding checks
> > such that if we know that we have to exit in the current iteration
> > then we branch to scalar code to actually do the final VF iterations which
> handles all the code in .
> >
> > For the scalar loop we know that whatever exit you take you have to
> > perform at most VF iterations.  For vector code we only case about the
> > state of fully performed iteration and reset the scalar code to the 
> > (partially)
> remaining loop.
> >
> > That is to say, the first vector loop executes so long as the early
> > exit isn't needed.  Once the exit is taken, the scalar code will
> > perform at most VF extra iterations.  The exact number depending on peeling
> and iteration start and which
> > exit was taken (natural or early).   For this scalar loop, all early exits 
> > are
> > treated the same.
> >
> > When we vectorize we move any statement not related to the early break
> > itself and that would be incorrect to execute before the break (i.e.
> > has side effects) to after the break.  If this is not possible we decline to
> vectorize.
> >
> > This means that we check at the start of iterations whether we are
> > going to exit or not.  During the analyis phase we check whether we
> > are allowed to do this moving of statements.  Also note that we only
> > move the scalar statements, but only do so after peeling but just before we
> start transforming statements.
> >
> > Codegen:
> >
> > for e.g.
> >
> > #define N 803
> > unsigned vect_a[N];
> > unsigned vect_b[N];
> >
> > unsigned test4(unsigned x)
> > {
> >  unsigned ret = 0;
> >  for (int i = 0; i < N; i++)
> >  {
> >vect_b[i] = x + i;
> >if (vect_a[i] > x)
> >  break;
> >vect_a[i] = x;
> >
> >  }
> >  return ret;
> > }
> >
> > We generate for Adv. SIMD:
> >
> > test4:
> > adrpx2, .LC0
> > adrpx3, .LANCHOR0
> > dup v2.4s, w0
> > add x3, x3, :lo12:.LANCHOR0
> > mov

[committed] i386: Use "addr" attribute to limit address regclass to non-REX regs

2023-11-06 Thread Uros Bizjak
Use "addr" attribute with "gpr8" value to limit address register class
to non-REX registers in instructions with high registers, where REX
registers can not be used in the address.

gcc/ChangeLog:

* config/i386/constraints.md (Bc): Remove constraint.
(Bn): Rewrite to use x86_extended_reg_mentioned_p predicate.
* config/i386/i386.cc (ix86_memory_address_reg_class):
Do not limit processing to TARGET_APX_EGPR.  Exit early for
NULL insn.  Do not check recog_data.insn before calling
extract_insn_cached.
(ix86_insn_base_reg_class): Handle ADDR_GPR8.
(ix86_regno_ok_for_insn_base_p): Ditto.
(ix86_insn_index_reg_class): Ditto.
* config/i386/i386.md (*cmpqi_ext_1_mem_rex64):
Remove insn pattern and corresponding peephole2 pattern.
(*cmpi_ext_1): Remove (m,Q) alternative.
Change (QBc,Q) alternative to (QBn,Q).  Add "addr" attribute.
(*cmpqi_ext_3_mem_rex64): Remove insn pattern
and corresponding peephole2 pattern.
(*cmpi_ext_3): Remove (Q,m) alternative.
Change (Q,QnBc) alternative to (Q,QnBn).  Add "addr" attribute.
(*extzvqi_mem_rex64): Remove insn pattern and
corresponding peephole2 pattern.
(*extzvqi): Remove (Q,m) alternative.  Change (Q,QnBc)
alternative to (Q,QnBn).  Add "addr" attribute.
(*insvqi_1_mem_rex64): Remove insn pattern and
corresponding peephole2 pattern.
(*insvqi_1): Remove (Q,m) alternative.  Change (Q,QnBc)
alternative to (Q,QnBn).  Add "addr" attribute.
(@insv_1): Ditto.
(*addqi_ext_0): Remove (m,0,Q) alternative.  Change (QBc,0,Q)
alternative to (QBn,0,Q).  Add "addr" attribute.
(*subqi_ext_0): Ditto.
(*andqi_ext_0): Ditto.
(*qi_ext_0): Ditto.
(*addqi_ext_1): Remove (Q,0,m) alternative.  Change (Q,0,QnBc)
alternative to (Q,0,QnBn).  Add "addr" attribute.
(*andqi_ext_1): Ditto.
(*andqi_ext_1_cc): Ditto.
(*qi_ext_1): Ditto.
(*xorqi_ext_1_cc): Ditto.
* config/i386/predicates.md (nonimm_x64constmem_operand):
Remove predicate.
(general_x64constmem_operand): Ditto.
(norex_memory_operand): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index f6275740eb2..8da8a4170c4 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -187,11 +187,6 @@ (define_special_memory_constraint "Bm"
   "@internal Vector memory operand."
   (match_operand 0 "vector_memory_operand"))
 
-(define_special_memory_constraint "Bc"
-  "@internal Constant memory operand."
-  (and (match_operand 0 "memory_operand")
-   (match_test "constant_address_p (XEXP (op, 0))")))
-
 (define_memory_constraint "Bk"
   "@internal TLS address that allows insn using non-integer registers."
   (and (match_operand 0 "memory_operand")
@@ -199,7 +194,8 @@ (define_memory_constraint "Bk"
 
 (define_special_memory_constraint "Bn"
   "@internal Memory operand without REX prefix."
-  (match_operand 0 "norex_memory_operand"))
+  (and (match_operand 0 "memory_operand")
+   (not (match_test "x86_extended_reg_mentioned_p (op)"
 
 (define_special_memory_constraint "Br"
   "@internal bcst memory operand."
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index fdc9362cf5b..c2bd07fced7 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -11366,50 +11366,49 @@ ix86_memory_address_reg_class (rtx_insn* insn)
  return maximum register class in this case.  */
   enum attr_addr addr_rclass = ADDR_GPR32;
 
-  if (TARGET_APX_EGPR && insn)
-{
-  if (asm_noperands (PATTERN (insn)) >= 0
- || GET_CODE (PATTERN (insn)) == ASM_INPUT)
-   return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
+  if (!insn)
+return addr_rclass;
 
-  /* Return maximum register class for unrecognized instructions.  */
-  if (INSN_CODE (insn) < 0)
-   return addr_rclass;
+  if (asm_noperands (PATTERN (insn)) >= 0
+  || GET_CODE (PATTERN (insn)) == ASM_INPUT)
+return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
 
-  /* Try to recognize the insn before calling get_attr_addr.
-Save current recog_data and current alternative.  */
-  struct recog_data_d saved_recog_data = recog_data;
-  int saved_alternative = which_alternative;
+  /* Return maximum register class for unrecognized instructions.  */
+  if (INSN_CODE (insn) < 0)
+return addr_rclass;
 
-  /* Update recog_data for processing of alternatives.  */
-  if (recog_data.insn != insn)
-   extract_insn_cached (insn);
+  /* Try to recognize the insn before calling get_attr_addr.
+ Save current recog_data and current alternative.  */
+  struct recog_data_d saved_recog_data = recog_data;
+  int saved_alternative = which_alternative;
 
-  /* If current alternative is not set, loop throught enabled
-alternatives and get the most limited register class.  */
-  if (saved_alternative == -1)
-  

Re: [PATCH v2] c-family: Enable -fpermissive for C and ObjC

2023-11-06 Thread Florian Weimer
* Jakub Jelinek:

> On Mon, Nov 06, 2023 at 03:19:32PM +0100, Florian Weimer wrote:
>> * Jakub Jelinek:
>> 
>> > On Mon, Nov 06, 2023 at 03:06:39PM +0100, Florian Weimer wrote:
>> >> Future changes will treat some C front end warnings similar to
>> >> -Wnarrowing.
>> >> 
>> >> gcc/
>> >> 
>> >>   * doc/invoke.texi (Warning Options): Mention C diagnostics
>> >>   for -fpermissive.
>> >> 
>> >> gcc/c-family/
>> >> 
>> >>   * c.opt (fpermissive): Enable for C and ObjC.
>> >>   * c-opts.cc (set_std_c89): Enable -fpermissive.
>> >
>> > Won't this set flag_permissive even for -std=c89 -std=c99 ?
>> > Haven't tried, but if set_std_c* is called multiple times if more than
>> > one -std= option appears, then perhaps this should be done later after
>> > processing all options, not during that processing.
>> 
>> Ugh, you are right.
>> 
>> What would be the right place to do this kind of final option
>> processing?  Where those SET_OPTION_IF_UNSET are?
>
> c_common_post_options ?
> Generally, we have global_options, which are the values of the options
> (implicit or explicit) and then another variable of the same type,
> global_options_set, which uses all values just as booleans whether the
> option was set explicitly or not.

Yes, c_common_post_options seems to work.  Thanks for the hint regarding
global_options_set.  I can use it to make -std=gnu89 -fno-permissive do
something useful.  I'm going to send an update patch.

Florian



[patch] libgomp.texi: Update OpenMP 6.0-preview implementation-status list

2023-11-06 Thread Tobias Burnus

This commit updates the OpenMP 6.0 implementation status,
https://gcc.gnu.org/onlinedocs/libgomp/OpenMP-Implementation-Status.html

Besides some reordering and adding a bunch of 'N' entries, it also adds
'Y' for the recent C23 attribute changes.

Comments? If not, I will commit it later...

Tobias

PS: I am sure the wording can be improved; suggestions welcome, but,
nonetheless, I should do for an implementation list.
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp.texi: Update OpenMP 6.0-preview implementation-status list

libgomp/ChangeLog:

	* libgomp.texi (OpenMP Impl. Status): Update for OpenMP TR12;
	renamed section from TR11.

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index b635f81750b..92680452b1b 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -169,7 +169,7 @@ See also @ref{OpenMP Implementation Status}.
 * OpenMP 5.0:: Feature completion status to 5.0 specification
 * OpenMP 5.1:: Feature completion status to 5.1 specification
 * OpenMP 5.2:: Feature completion status to 5.2 specification
-* OpenMP Technical Report 11:: Feature completion status to first 6.0 preview
+* OpenMP Technical Report 12:: Feature completion status to second 6.0 preview
 @end menu
 
 The @code{_OPENMP} preprocessor macro and Fortran's @code{openmp_version}
@@ -442,33 +442,52 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab
 @end multitable
 
 
-@node OpenMP Technical Report 11
-@section OpenMP Technical Report 11
+@node OpenMP Technical Report 12
+@section OpenMP Technical Report 12
 
-Technical Report (TR) 11 is the first preview for OpenMP 6.0.
+Technical Report (TR) 12 is the second preview for OpenMP 6.0.
 
 @unnumberedsubsec New features listed in Appendix B of the OpenMP specification
 @multitable @columnfractions .60 .10 .25
 @item Features deprecated in versions 5.2, 5.1 and 5.0 were removed
   @tab N/A @tab Backward compatibility
-@item The @code{decl} attribute was added to the C++ attribute syntax
-  @tab Y @tab
+@item Full support for C23 was added @tab P @tab
+@item Full support for C++23 was added @tab P @tab
 @item @code{_ALL} suffix to the device-scope environment variables
   @tab P @tab Host device number wrongly accepted
+@item @code{num_threads} now accepts a list @tab N @tab
+@item Supporting increments with abstract names in @code{OMP_PLACES} @tab N @tab
+@item Extension of @code{OMP_DEFAULT_DEVICE} and new
+  @code{OMP_AVAILABLE_DEVICES} environment vars @tab N @tab
+@item New @code{OMP_THREADS_RESERVE} environment variable @tab N @tab
+@item The @code{decl} attribute was added to the C++ attribute syntax
+  @tab Y @tab
+@item The OpenMP directive syntax was extended to include C 23 attribute
+  specifiers @tab Y @tab
+@item All inarguable clauses take now an optional Boolean argument @tab N @tab
 @item For Fortran, @emph{locator list} can be also function reference with
   data pointer result @tab N @tab
+@item Concept of @emph{assumed-size arrays} in C and C++
+  @tab N @tab
+@item @emph{directive-name-modifier} accepted in all clauses @tab N @tab
+@item For Fortran, atomic with BLOCK construct and, for C/C++, with
+  unlimited curly braces supported @tab N @tab
+@item For Fortran, atomic compare with storing the comparison result
+  @tab N @tab
+@item New @code{looprange} clause @tab N @tab
 @item Ref-count change for @code{use_device_ptr}/@code{use_device_addr}
   @tab N @tab
+@item Support for inductions @tab N @tab
 @item Implicit reduction identifiers of C++ classes
   @tab N @tab
 @item Change of the @emph{map-type} property from @emph{ultimate} to
   @emph{default} @tab N @tab
-@item Concept of @emph{assumed-size arrays} in C and C++
-  @tab N @tab
+@item @code{self} modifier to @code{map} and @code{self} as
+  @code{defaultmap} argument @tab N @tab
 @item Mapping of @emph{assumed-size arrays} in C, C++ and Fortran
   @tab N @tab
 @item @code{groupprivate} directive @tab N @tab
-@item @code{local} clause to declare target directive @tab N @tab
+@item @code{local} clause to @code{declare target} directive @tab N @tab
 @item @code{part_size} allocator trait @tab N @tab
 @item @code{pin_device}, @code{preferred_device} and @code{target_access}
   allocator traits
@@ -478,28 +497,47 @@ Technical Report (TR) 11 is the first preview for OpenMP 6.0.
   modifiers of the @code{init} clause
   @tab N @tab
 @item @code{interop} clause to @code{dispatch} @tab N @tab
+@item @code{message} and @code{severity} calauses to @code{parallel} directive
+  @tab N @tab
+@item @code{self} clause to @code{requires} directive @tab N @tab
+@item @code{no_openmp_constructs} assumptions clause @tab N @tab
+@item @code{reverse} loop-transformation construct @ta

[PATCH] Fix configure script comments(!?!) (Was: Re: [PATCH] genemit: Split insn-emit.cc into ten files)

2023-11-06 Thread Martin Jambor
Hello,

On Thu, Oct 12 2023, Robin Dapp wrote:
>
[...]
> gcc/ChangeLog:
>
>   PR bootstrap/84402
>   PR target/111600
>
>   * Makefile.in: Handle split insn-emit.cc.
>   * configure: Regenerate.
>   * configure.ac: Add --with-insnemit-partitions.
>   * genemit.cc (output_peephole2_scratches): Print to file instead
>   of stdout.
>   (print_code): Ditto.
>   (gen_rtx_scratch): Ditto.
>   (gen_exp): Ditto.
>   (gen_emit_seq): Ditto.
>   (emit_c_code): Ditto.
>   (gen_insn): Ditto.
>   (gen_expand): Ditto.
>   (gen_split): Ditto.
>   (output_add_clobbers): Ditto.
>   (output_added_clobbers_hard_reg_p): Ditto.
>   (print_overload_arguments): Ditto.
>   (print_overload_test): Ditto.
>   (handle_overloaded_code_for): Ditto.
>   (handle_overloaded_gen): Ditto.
>   (print_header): New function.
>   (handle_arg): New function.
>   (main): Split output into 10 files.
>   * gensupport.cc (count_patterns): New function.
>   * gensupport.h (count_patterns): Define.
>   * read-md.cc (md_reader::print_md_ptr_loc): Add file argument.
>   * read-md.h (class md_reader): Change definition.

Following this commit, our buildbot script which checks that configure
scripts where re-generated correctly is unhappy because it insists
comments are wrong, it wants to them to be like this:


diff --git a/gcc/configure b/gcc/configure
index 4d0357cbc28..0d818ae6850 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -2,7 +2,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19995 "configure"
+#line 20003 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -20106,7 +20106,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 20101 "configure"
+#line 20109 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H


I'm not sure what that means, whether a wrong version of
autoconf/automake was used (though when I accidentally tried that, it
has always complained loudly) or if some environment difference can
cause this.  Perhaps I should change the script not to care about
commits though that won't happen soon (or perhaps I should drop the
checks completely) but would people be OK with me checking in the patch
above (with appropriate ChangeLog) to silence buildbot for a while
again?

Thanks,

Martin


Re: [PATCH] Fix configure script comments(!?!)

2023-11-06 Thread Andreas Schwab
On Nov 06 2023, Martin Jambor wrote:

> Following this commit, our buildbot script which checks that configure
> scripts where re-generated correctly is unhappy because it insists
> comments are wrong, it wants to them to be like this:
>
> 
> diff --git a/gcc/configure b/gcc/configure
> index 4d0357cbc28..0d818ae6850 100755
> --- a/gcc/configure
> +++ b/gcc/configure
> @@ -2,7 +2,7 @@ else
>lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>lt_status=$lt_dlunknown
>cat > conftest.$ac_ext <<_LT_EOF
> -#line 19995 "configure"
> +#line 20003 "configure"
>  #include "confdefs.h"
>  
>  #if HAVE_DLFCN_H
> @@ -20106,7 +20106,7 @@ else
>lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>lt_status=$lt_dlunknown
>cat > conftest.$ac_ext <<_LT_EOF
> -#line 20101 "configure"
> +#line 20109 "configure"
>  #include "confdefs.h"
>  
>  #if HAVE_DLFCN_H
> 
>
> I'm not sure what that means, whether a wrong version of
> autoconf/automake was used (though when I accidentally tried that, it
> has always complained loudly) or if some environment difference can
> cause this.

This can happen if commits were rebased without re-generating configure
scripts.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


[ARC PATCH] Improved DImode rotates and right shifts by one bit.

2023-11-06 Thread Roger Sayle

This patch improves the code generated for DImode right shifts (both
arithmetic and logical) by a single bit, and also for DImode rotates
(both left and right) by a single bit.  In approach, this is similar
to the recently added DImode left shift by a single bit patch, but
also builds upon i386.md's UNSPEC carry flag representation:
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632169.html

The benefits can be seen from the four new test cases:

long long ashr(long long x) { return x >> 1; }

Before:
ashr:   asl r2,r1,31
lsr_s   r0,r0
or_sr0,r0,r2
j_s.d   [blink]
asr_s   r1,r1,1

After:
ashr:   asr.f   r1,r1
j_s.d   [blink]
rrc r0,r0

unsigned long long lshr(unsigned long long x) { return x >> 1; }

Before:
lshr:   asl r2,r1,31
lsr_s   r0,r0
or_sr0,r0,r2
j_s.d   [blink]
lsr_s   r1,r1

After:
lshr:   lsr.f   r1,r1
j_s.d   [blink]
rrc r0,r0

unsigned long long rotl(unsigned long long x) { return (x<<1) | (x>>63); }

Before:
rotl:   lsr r12,r1,31
lsr r2,r0,31
asl_s   r3,r0,1
asl_s   r1,r1,1
or  r0,r12,r3
j_s.d   [blink]
or_sr1,r1,r2

After:
rotl:   add.f   r0,r0,r0
adc.f   r1,r1,r1
j_s.d   [blink]
add.cs  r0,r0,1

unsigned long long rotr(unsigned long long x) { return (x>>1) | (x<<63); }

Before:
rotr:   asl r12,r1,31
asl r2,r0,31
lsr_s   r3,r0
lsr_s   r1,r1
or  r0,r12,r3
j_s.d   [blink]
or_sr1,r1,r2

After:
rotr:   asr.f   0,r0
rrc.f   r1,r1
j_s.d   [blink]
rrc r0,r0

On CPUs without a barrel shifter the improvements are even better.

Tested with a cross-compiler to arc-linux hosted on x86_64,
with no new (compile-only) regressions from make -k check.
Ok for mainline if this passes Claudiu's nightly testing?


2023-11-06  Roger Sayle  

gcc/ChangeLog
* config/arc/arc.md (UNSPEC_ARC_CC_NEZ): New UNSPEC that
represents the carry flag being set if the operand is non-zero.
(adc_f): New define_insn representing adc with updated flags.
(ashrdi3): New define_expand that only handles shifts by 1.
(ashrdi3_cnt1): New pre-reload define_insn_and_split.
(lshrdi3): New define_expand that only handles shifts by 1.
(lshrdi3_cnt1): New pre-reload define_insn_and_split.
(rrcsi2): New define_insn for rrc (SImode rotate right through
carry).
(rrcsi2_carry): Likewise for rrc.f, as above but updating flags.
(rotldi3): New define_expand that only handles rotates by 1.
(rotldi3_cnt1): New pre-reload define_insn_and_split.
(rotrdi3): New define_expand that only handles rotates by 1.
(rotrdi3_cnt1): New pre-reload define_insn_and_split.
(lshrsi3_cnt1_carry): New define_insn for lsr.f.
(ashrsi3_cnt1_carry): New define_insn for asr.f.
(btst_0_carry): New define_insn for asr.f without result.

gcc/testsuite/ChangeLog
* gcc.target/arc/ashrdi3-1.c: New test case.
* gcc.target/arc/lshrdi3-1.c: Likewise.
* gcc.target/arc/rotldi3-1.c: Likewise.
* gcc.target/arc/rotrdi3-1.c: Likewise.


Thanks in advance,
Roger
--

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 7702978..97231b9 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -137,6 +137,7 @@
   UNSPEC_ARC_VMAC2HU
   UNSPEC_ARC_VMPY2H
   UNSPEC_ARC_VMPY2HU
+  UNSPEC_ARC_CC_NEZ
 
   VUNSPEC_ARC_RTIE
   VUNSPEC_ARC_SYNC
@@ -2790,6 +2791,31 @@ archs4x, archs4xd"
(set_attr "type" "cc_arith")
(set_attr "length" "4,4,4,4,8,8")])
 
+(define_insn "adc_f"
+  [(set (reg:CC_C CC_REG)
+   (compare:CC_C
+ (zero_extend:DI
+   (plus:SI
+ (plus:SI
+   (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+   (match_operand:SI 1 "register_operand" "%r"))
+ (match_operand:SI 2 "register_operand" "r")))
+ (plus:DI
+   (ltu:DI (reg:CC_C CC_REG) (const_int 0))
+   (zero_extend:DI (match_dup 1)
+   (set (match_operand:SI 0 "register_operand" "=r")
+   (plus:SI
+ (plus:SI
+   (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+   (match_dup 1))
+ (match_dup 2)))]
+  ""
+  "adc.f\\t%0,%1,%2"
+  [(set_attr "cond" "set")
+   (set_attr "predicable" "no")
+   (set_attr "type" "cc_arith")
+   (set_attr "length" "4")])
+
 ; combiner-splitter cmp / scc -> cmp / adc
 (define_split
   [(set (match_operand:SI 0 "dest_reg_operand" "")
@@ -3530,6 +3556,68 @@ archs4x, archs4xd"
   ""
   [(set_attr "length" "8")])
 
+(define_expand "ashrdi3"
+  [(parallel
+  [(set (match_operand:DI 0 "register_operand")
+   (ashiftrt:DI (match_operand:DI 1 "register_operand")
+(match_operand:QI 2 "const_int_operand")))
+   (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+ 

Re: [PING][PATCH 2/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops

2023-11-06 Thread Stamatis Markianos-Wright



On 06/11/2023 11:24, Richard Sandiford wrote:

Stamatis Markianos-Wright  writes:

One of the main reasons for reading the arm bits was to try to answer
the question: if we switch to a downcounting loop with a GE condition,
how do we make sure that the start value is not a large unsigned
number that is interpreted as negative by GE?  E.g. if the loop
originally counted up in steps of N and used an LTU condition,
it could stop at a value in the range [INT_MAX + 1, UINT_MAX].
But the loop might never iterate if we start counting down from
most values in that range.

Does the patch handle that?

So AFAICT this is actually handled in the generic code in `doloop_valid_p`:

This kind of loops fail because of they are "desc->infinite", then no
loop-doloop conversion is attempted at all (even for standard dls/le loops)

Thanks to that check I haven't been able to trigger anything like the
behaviour you describe, do you think the doloop_valid_p checks are
robust enough?

The loops I was thinking of are provably not infinite though.  E.g.:

   for (unsigned int i = 0; i < UINT_MAX - 100; ++i)
 ...

is known to terminate.  And doloop conversion is safe with the normal
count-down-by-1 approach, so I don't think current code would need
to reject it.  I.e. a conversion to:

   unsigned int i = UINT_MAX - 101;
   do
 ...
   while (--i != ~0U);

would be safe, but a conversion to:

   int i = UINT_MAX - 101;
   do
 ...
   while ((i -= step, i > 0));

wouldn't, because the loop body would only be executed once.

I'm only going off the name "infinite" though :)  It's possible that
it has more connotations than that.

Thanks,
Richard


Ack, yep, I see what you mean now, and yep, that kind of loop does 
indeed pass through doloop_valid_p


Interestingly , in the v8-M Arm ARM this is done with:

```

boolean IsLastLowOverheadLoop(INSTR_EXEC_STATE_Type state)
// This does not check whether a loop is currently active.
// If the PE were in a loop, would this be the last one?
return UInt(state.LoopCount) <= (1 << (4 - LTPSIZE));

```

So architecturally the asm we output would be ok (except maybe the 
"branch too far subs;bgt;lctp" fallback at 
`predicated_doloop_end_internal` (maybe that should be `bhi`))... But 
now GE: isn't looking like an accurate representation of this operation 
in the compiler.


I'm wondering if I should try to make `predicated_doloop_end_internal` 
contain a comparison along the lines of:

(gtu: (plus: (LR) (const_int -num_lanes)) (const_int num_lanes_minus_1))

I'll give that a try :)

The only reason I'd chosen to go with GE earlier, tbh, was because of 
the existing handling of GE in loop-doloop.cc


Let me know if any other ideas come to your mind!


Cheers,

Stam




[PATCH] c-family: Enable -fpermissive for C and ObjC

2023-11-06 Thread Florian Weimer
Future changes will treat some C front end warnings similar to
-Wnarrowing.

gcc/

* doc/invoke.texi (Warning Options): Mention C diagnostics
for -fpermissive.

gcc/c-family/

* c.opt (fpermissive): Enable for C and ObjC.
* c-opts.cc (c_common_post_options): Enable -fpermissive.

---
v3: Handle -fpermissive in c_common_post_options, to get useful behavior
for various -std=gnu99 -fpermissive -std=gnu89 permutations.

 gcc/c-family/c-opts.cc | 12 
 gcc/c-family/c.opt |  2 +-
 gcc/doc/invoke.texi|  8 ++--
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index a980912f7e1..23ea79ec802 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -854,6 +854,18 @@ c_common_post_options (const char **pfilename)
   && flag_unsafe_math_optimizations == 0)
 flag_fp_contract_mode = FP_CONTRACT_OFF;
 
+  /* C language modes before C99 enable -fpermissive by default, but
+ only if -pedantic-errors is not specified.  Also treat
+ -fno-permissive as a subset of -pedantic-errors that does not
+ reject certain GNU extensions also present the defaults for later
+ language modes.  */
+  if (!c_dialect_cxx () && !flag_isoc99 && !global_dc->m_pedantic_errors
+  && !global_options_set.x_flag_permissive)
+{
+  flag_permissive = 1;
+  global_dc->m_permissive = 1;
+}
+
   /* If we are compiling C, and we are outside of a standards mode,
  we can permit the new values from ISO/IEC TS 18661-3 for
  FLT_EVAL_METHOD.  Otherwise, we must restrict the possible values to
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 29d3d789a49..cc3a6610148 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -2112,7 +2112,7 @@ C ObjC C++ ObjC++
 Look for and use PCH files even when preprocessing.
 
 fpermissive
-C++ ObjC++ Var(flag_permissive)
+C ObjC C++ ObjC++ Var(flag_permissive)
 Downgrade conformance errors to warnings.
 
 fplan9-extensions
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6e776a0faa1..dfa01220b93 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -6170,13 +6170,17 @@ errors by @option{-pedantic-errors}.  For instance:
 Downgrade some required diagnostics about nonconformant code from
 errors to warnings.  Thus, using @option{-fpermissive} allows some
 nonconforming code to compile.  Some C++ diagnostics are controlled
-only by this flag, but it also downgrades some diagnostics that have
-their own flag:
+only by this flag, but it also downgrades some C and C++ diagnostics
+that have their own flag:
 
 @gccoptlist{
 -Wnarrowing @r{(C++)}
 }
 
+The @option{-fpermissive} option is the default for historic C language
+modes (@option{-std=c89}, @option{-std=gnu89}, @option{-std=c90},
+@option{-std=gnu90}).
+
 @opindex Wall
 @opindex Wno-all
 @item -Wall

base-commit: 3cc9ad41db87fb85b13a56bff1f930c258542a70



[PATCH] Avoid undeclared use of abort in gcc.dg/cpp/wchar-1.c

2023-11-06 Thread Florian Weimer
This should be safe because this is a preprocessor test; it
should not exercise implicit function declarations.

* gcc.dg/cpp/wchar-1.c (main): Call __builtin_abort instead of
abort.

---
 gcc/testsuite/gcc.dg/cpp/wchar-1.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/cpp/wchar-1.c 
b/gcc/testsuite/gcc.dg/cpp/wchar-1.c
index ac9b38b80da..51ef2fb0f1d 100644
--- a/gcc/testsuite/gcc.dg/cpp/wchar-1.c
+++ b/gcc/testsuite/gcc.dg/cpp/wchar-1.c
@@ -14,10 +14,10 @@ int main ()
 
 #if L'\x0' - 1 < 0
   if (c > 0)
-abort ();
+__builtin_abort ();
 #else
   if (c < 0)
-abort ();
+__builtin_abort ();
 #endif
  
   return 0;

base-commit: d28f4194922fe8b25310b926c126e7ec1776c71d



Re: [PATCH] Fix configure script comments(!?!) (Was: Re: [PATCH] genemit: Split insn-emit.cc into ten files)

2023-11-06 Thread Robin Dapp
> I'm not sure what that means, whether a wrong version of
> autoconf/automake was used (though when I accidentally tried that, it
> has always complained loudly) or if some environment difference can
> cause this.  Perhaps I should change the script not to care about
> commits though that won't happen soon (or perhaps I should drop the
> checks completely) but would people be OK with me checking in the patch
> above (with appropriate ChangeLog) to silence buildbot for a while
> again?

Hmm, I made sure to regenerate them on an Intel compile-farm machine
because my local autoconf/automake was too new.  I thought that I
also regenerated after the last rebase before pushing.  But maybe
it auto-merged that change at some point and I forgot?

Regards
 Robin


Re: [PATCH] Avoid undeclared use of abort in gcc.dg/cpp/wchar-1.c

2023-11-06 Thread Jakub Jelinek
On Mon, Nov 06, 2023 at 06:38:04PM +0100, Florian Weimer wrote:
> This should be safe because this is a preprocessor test; it
> should not exercise implicit function declarations.
> 
>   * gcc.dg/cpp/wchar-1.c (main): Call __builtin_abort instead of
>   abort.

Ok.

> diff --git a/gcc/testsuite/gcc.dg/cpp/wchar-1.c 
> b/gcc/testsuite/gcc.dg/cpp/wchar-1.c
> index ac9b38b80da..51ef2fb0f1d 100644
> --- a/gcc/testsuite/gcc.dg/cpp/wchar-1.c
> +++ b/gcc/testsuite/gcc.dg/cpp/wchar-1.c
> @@ -14,10 +14,10 @@ int main ()
>  
>  #if L'\x0' - 1 < 0
>if (c > 0)
> -abort ();
> +__builtin_abort ();
>  #else
>if (c < 0)
> -abort ();
> +__builtin_abort ();
>  #endif
>   
>return 0;
> 
> base-commit: d28f4194922fe8b25310b926c126e7ec1776c71d

Jakub



Re: [PATCH 1/2] libgfortran: Remove early return if extent is zero [PR112371]

2023-11-06 Thread Harald Anlauf

Hi Mikael,

Am 06.11.23 um 12:43 schrieb Mikael Morin:

Remove the early return present in function templates for transformational
functions doing a (masked) reduction of an array along a dimension.
This early return, which triggered if the extent in the reduction dimension
was zero, was wrong because even if the reduction operation degenerates to
a constant value in that case, one has to loop anyway along the other
dimensions to initialize every element of the resulting array with that
constant value.

The offending piece of code was present in several places, and this removes
them all.  Namely, the impacted m4 files are ifunction.m4 for regular
functions and types, ifunction-s.m4 for character minloc and maxloc, and
ifunction-s2.m4 for character minval and maxval.


I wonder if the correct fix would be to replace (instead of deleting)


diff --git a/libgfortran/m4/ifunction.m4 b/libgfortran/m4/ifunction.m4
index c64217ec5db..480649cf691 100644
--- a/libgfortran/m4/ifunction.m4
+++ b/libgfortran/m4/ifunction.m4
@@ -232,8 +232,6 @@ m'name`'rtype_qual`_'atype_code` ('rtype` * const restrict 
retarray,
  }

len = GFC_DESCRIPTOR_EXTENT(array,dim);
-  if (len <= 0)
-return;

mbase = mask->base_addr;



by the following:

  if (len < 0)
len = 0;

See ifunction.m4, lines 56ff, which check if the result of

  len = GFC_DESCRIPTOR_EXTENT(array,dim);

is negative.  I haven't tried to create a testcase, though.

Similarly for the other templates.

Thanks,
Harald



[PATCH][GCC13] PR tree-optimization/105834 - Choose better initial values for ranger.

2023-11-06 Thread Andrew MacLeod

As requested porting this patch from trunk resolves this PR in GCC 13.

Bootstraps on x86_64-pc-linux-gnu with no regressions.  OK for the gcc 
13 branch?


Andrew



From 0182a25607fa353274c27ec57ca497c00f1d1b76 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Mon, 6 Nov 2023 11:33:32 -0500
Subject: [PATCH] Choose better initial values for ranger.

Instead of defaulting to VARYING, fold the stmt using just global ranges.

	PR tree-optimization/105834
	gcc/
	* gimple-range-cache.cc (ranger_cache::get_global_range): Call
	fold_range with global query to choose an initial value.

	gcc/testsuite/
	* gcc.dg/pr105834.c
---
 gcc/gimple-range-cache.cc   | 17 -
 gcc/testsuite/gcc.dg/pr105834.c | 17 +
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr105834.c

diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
index e4e75943632..b09df6c81bf 100644
--- a/gcc/gimple-range-cache.cc
+++ b/gcc/gimple-range-cache.cc
@@ -846,7 +846,22 @@ ranger_cache::get_global_range (vrange &r, tree name, bool ¤t_p)
 		|| m_temporal->current_p (name, m_gori.depend1 (name),
 	  m_gori.depend2 (name));
   else
-m_globals.set_global_range (name, r);
+{
+  // If no global value has been set and value is VARYING, fold the stmt
+  // using just global ranges to get a better initial value.
+  // After inlining we tend to decide some things are constant, so
+  // do not do this evaluation after inlining.
+  if (r.varying_p () && !cfun->after_inlining)
+	{
+	  gimple *s = SSA_NAME_DEF_STMT (name);
+	  if (gimple_get_lhs (s) == name)
+	{
+	  if (!fold_range (r, s, get_global_range_query ()))
+		gimple_range_global (r, name);
+	}
+	}
+  m_globals.set_global_range (name, r);
+}
 
   // If the existing value was not current, mark it as always current.
   if (!current_p)
diff --git a/gcc/testsuite/gcc.dg/pr105834.c b/gcc/testsuite/gcc.dg/pr105834.c
new file mode 100644
index 000..d0eda03ef8b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr105834.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+static int a, b;
+
+void foo();
+
+int main() {
+for (int c = 0; c < 2; c = c + (unsigned)3)
+if (a)
+for (;;)
+if (c > 0)
+b = 0;
+if (b)
+foo();
+}
+/* { dg-final { scan-tree-dump-not "foo" "optimized" } }  */
-- 
2.41.0



Re: [PATCH 2/2] libgfortran: Remove empty array descriptor first dimension overwrite [PR112371]

2023-11-06 Thread Harald Anlauf

Hi Mikael,

Am 06.11.23 um 12:43 schrieb Mikael Morin:

Remove the forced overwrite of the first dimension of the result array
descriptor to set it to zero extent, in the function templates for
transformational functions doing an array reduction along a dimension.  This
overwrite, which happened before early returning in case the result array
was empty, was wrong because an array may have a non-zero extent in the
first dimension and still be empty if it has a zero extent in a higher
dimension.  Overwriting the dimension was resulting in wrong array result
upper bound for the first dimension in that case.

The offending piece of code was present in several places, and this removes
them all.  More precisely, there is only one case to fix for logical
reduction functions, and there are three cases for other reduction
functions, corresponding to non-masked reduction, reduction with array mask,
and reduction with scalar mask.  The impacted m4 files are
ifunction_logical.m4 for logical reduction functions, ifunction.m4 for
regular functions and types, ifunction-s.m4 for character minloc and maxloc,
ifunction-s2.m4 for character minval and maxval, and ifindloc1.m4 for
findloc.


while your fix seems mechanical and correct, I wonder if you looked
at the following pre-existing irregularity which can be seen in
this snippet:


diff --git a/libgfortran/m4/ifunction.m4 b/libgfortran/m4/ifunction.m4
index 480649cf691..abc15b430ab 100644
--- a/libgfortran/m4/ifunction.m4
+++ b/libgfortran/m4/ifunction.m4
@@ -96,12 +96,7 @@ name`'rtype_qual`_'atype_code` ('rtype` * const restrict 
retarray,

retarray->base_addr = xmallocarray (alloc_size, sizeof (rtype_name));
if (alloc_size == 0)
-   {
- /* Make sure we have a zero-sized array.  */
- GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1);
- return;
-
-   }
+   return;
  }
else
  {


This is all enclosed in a block which has
  if (retarray->base_addr == NULL)
but allocates and sets retarray->base_addr, while


@@ -290,11 +285,7 @@ m'name`'rtype_qual`_'atype_code` ('rtype` * const restrict 
retarray,
retarray->dtype.rank = rank;

if (alloc_size == 0)
-   {
- /* Make sure we have a zero-sized array.  */
- GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1);
- return;
-   }
+   return;
else
retarray->base_addr = xmallocarray (alloc_size, sizeof (rtype_name));



and


@@ -454,11 +445,7 @@ void
alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1];

if (alloc_size == 0)
-   {
- /* Make sure we have a zero-sized array.  */
- GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1);
- return;
-   }
+   return;
else
retarray->base_addr = xmallocarray (alloc_size, sizeof (rtype_name));
  }


do not set retarray->base_addr to non-NULL for alloc_size == 0.

Do you know if the first snippet can be safely rewritten to avoid
the (hopefully pointless) xmallocarray for alloc_size == 0?

Thanks,
Harald



[ARC PATCH] Consistent use of whitespace in assembler templates.

2023-11-06 Thread Roger Sayle

This minor clean-up patch tweaks arc.md to use whitespace consistently
in output templates, always using a TAB between the mnemonic and its
operands, and avoiding spaces after commas between operands.  There
should be no functional changes with this patch, though several test
cases' scan-assembler needed to be updated to use \s+ instead of testing
for a TAB or a space explicitly.

Tested with a cross-compiler to arc-linux hosted on x86_64,
with no new (compile-only) regressions from make -k check.
Ok for mainline if this passes Claudiu's nightly testing?


2023-11-06  Roger Sayle  

gcc/ChangeLog
* config/arc/arc.md: Make output template whitespace consistent.

gcc/testsuite/ChangeLog
* gcc.target/arc/jli-1.c: Update dg-final whitespace.
* gcc.target/arc/jli-2.c: Likewise.
* gcc.target/arc/naked-1.c: Likewise.
* gcc.target/arc/naked-2.c: Likewise.
* gcc.target/arc/tmac-1.c: Likewise.
* gcc.target/arc/tmac-2.c: Likewise.


Thanks again,
Roger
--

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 7702978..846aa32 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -669,26 +669,26 @@ archs4x, archs4xd"
|| (satisfies_constraint_Cm3 (operands[1])
&& memory_operand (operands[0], QImode))"
   "@
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   ldb%? %0,%1
-   stb%? %1,%0
-   ldb%? %0,%1
-   xldb%U1 %0,%1
-   ldb%U1%V1 %0,%1
-   xstb%U0 %1,%0
-   stb%U0%V0 %1,%0
-   stb%U0%V0 %1,%0
-   stb%U0%V0 %1,%0
-   stb%U0%V0 %1,%0"
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   ldb%?\\t%0,%1
+   stb%?\\t%1,%0
+   ldb%?\\t%0,%1
+   xldb%U1\\t%0,%1
+   ldb%U1%V1\\t%0,%1
+   xstb%U0\\t%1,%0
+   stb%U0%V0\\t%1,%0
+   stb%U0%V0\\t%1,%0
+   stb%U0%V0\\t%1,%0
+   stb%U0%V0\\t%1,%0"
   [(set_attr "type" 
"move,move,move,move,move,move,move,move,move,move,load,store,load,load,load,store,store,store,store,store")
(set_attr "iscompact" 
"maybe,maybe,maybe,true,true,false,false,false,maybe_limm,false,true,true,true,false,false,false,false,false,false,false")
(set_attr "predicable" 
"yes,no,yes,no,no,yes,no,yes,yes,yes,no,no,no,no,no,no,no,no,no,no")
@@ -713,26 +713,26 @@ archs4x, archs4xd"
|| (satisfies_constraint_Cm3 (operands[1])
&& memory_operand (operands[0], HImode))"
   "@
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   mov%? %0,%1
-   ld%_%? %0,%1
-   st%_%? %1,%0
-   xld%_%U1 %0,%1
-   ld%_%U1%V1 %0,%1
-   xst%_%U0 %1,%0
-   st%_%U0%V0 %1,%0
-   st%_%U0%V0 %1,%0
-   st%_%U0%V0 %1,%0
-   st%_%U0%V0 %1,%0"
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   mov%?\\t%0,%1
+   ld%_%?\\t%0,%1
+   st%_%?\\t%1,%0
+   xld%_%U1\\t%0,%1
+   ld%_%U1%V1\\t%0,%1
+   xst%_%U0\\t%1,%0
+   st%_%U0%V0\\t%1,%0
+   st%_%U0%V0\\t%1,%0
+   st%_%U0%V0\\t%1,%0
+   st%_%U0%V0\\t%1,%0"
   [(set_attr "type" 
"move,move,move,move,move,move,move,move,move,move,move,load,store,load,load,store,store,store,store,store")
(set_attr "iscompact" 
"maybe,maybe,maybe,true,true,false,false,false,maybe_limm,maybe_limm,false,true,true,false,false,false,false,false,false,false")
(set_attr "predicable" 
"yes,no,yes,no,no,yes,no,yes,yes,yes,yes,no,no,no,no,no,no,no,no,no")
@@ -818,7 +818,7 @@ archs4x, archs4xd"
  (plus:SI (reg:SI SP_REG)
   (match_operand 1 "immediate_operand" "Cal")]
   "reload_completed"
-  "ld.a %0,[sp,%1]"
+  "ld.a\\t%0,[sp,%1]"
   [(set_attr "type" "load")
(set_attr "length" "8")])
 
@@ -830,7 +830,7 @@ archs4x, archs4xd"
   (unspec:SI [(match_operand:SI 1 "register_operand" "c")]
UNSPEC_ARC_DIRECT))]
   ""
-  "st%U0 %1,%0\;st%U0.di %1,%0"
+  "st%U0\\t%1,%0\;st%U0.di\\t%1,%0"
   [(set_attr "type" "store")])
 
 ;; Combiner patterns for compare with zero
@@ -944,7 +944,7 @@ archs4x, archs4xd"
(set (match_operand:SI 0 "register_operand" "=w")
(match_dup 3))]
   ""
-  "%O3.f %0,%1"
+  "%O3.f\\t%0,%1"
   [(set_attr "type" "compare")
(set_attr "cond" "set_zn")
(set_attr "length" "4")])
@@ -987,15 +987,15 @@ archs4x, archs4xd"
 switch (which_alternative)
 {
 case 0: case 2: case 3: case 7:
-  return \"tst%? %1,%2\";
+  return \"tst%?\\t%1,%2\";
 case 1:
-  return \"btst%? %1,%z2\";
+  return \"btst%?\\t%1,%z2\";
 case 4:
-  return \"bmsk%?.f 0,%1,%Z2\";
+  return \"bmsk%?.f\\t0,%1,%Z2\";
 case 5:
-  return \"bclr%?.f 0,%1,%M2\";
+  return \"bclr%?.f\\t0,%1,%M2\";
 case 6:
-  return \"asr.f 0,%1,%p2\"

Re: [PATCH 1/2] libgfortran: Remove early return if extent is zero [PR112371]

2023-11-06 Thread Mikael Morin

Le 06/11/2023 à 19:12, Harald Anlauf a écrit :

Hi Mikael,

Am 06.11.23 um 12:43 schrieb Mikael Morin:
Remove the early return present in function templates for 
transformational

functions doing a (masked) reduction of an array along a dimension.
This early return, which triggered if the extent in the reduction 
dimension
was zero, was wrong because even if the reduction operation 
degenerates to

a constant value in that case, one has to loop anyway along the other
dimensions to initialize every element of the resulting array with that
constant value.

The offending piece of code was present in several places, and this 
removes

them all.  Namely, the impacted m4 files are ifunction.m4 for regular
functions and types, ifunction-s.m4 for character minloc and maxloc, and
ifunction-s2.m4 for character minval and maxval.


I wonder if the correct fix would be to replace (instead of deleting)


diff --git a/libgfortran/m4/ifunction.m4 b/libgfortran/m4/ifunction.m4
index c64217ec5db..480649cf691 100644
--- a/libgfortran/m4/ifunction.m4
+++ b/libgfortran/m4/ifunction.m4
@@ -232,8 +232,6 @@ m'name`'rtype_qual`_'atype_code` ('rtype` * const 
restrict retarray,

  }

    len = GFC_DESCRIPTOR_EXTENT(array,dim);
-  if (len <= 0)
-    return;

    mbase = mask->base_addr;



by the following:

   if (len < 0)
     len = 0;

See ifunction.m4, lines 56ff, which check if the result of

   len = GFC_DESCRIPTOR_EXTENT(array,dim);

is negative.  I haven't tried to create a testcase, though.

Similarly for the other templates.



Yes, you're right.  I think I won't try to create a testcase and will 
just pick your suggestion which seems safer.


Re: [PATCH 2/2] libgfortran: Remove empty array descriptor first dimension overwrite [PR112371]

2023-11-06 Thread Mikael Morin

Le 06/11/2023 à 19:20, Harald Anlauf a écrit :

Hi Mikael,

Am 06.11.23 um 12:43 schrieb Mikael Morin:

Remove the forced overwrite of the first dimension of the result array
descriptor to set it to zero extent, in the function templates for
transformational functions doing an array reduction along a 
dimension.  This

overwrite, which happened before early returning in case the result array
was empty, was wrong because an array may have a non-zero extent in the
first dimension and still be empty if it has a zero extent in a higher
dimension.  Overwriting the dimension was resulting in wrong array result
upper bound for the first dimension in that case.

The offending piece of code was present in several places, and this 
removes

them all.  More precisely, there is only one case to fix for logical
reduction functions, and there are three cases for other reduction
functions, corresponding to non-masked reduction, reduction with array 
mask,

and reduction with scalar mask.  The impacted m4 files are
ifunction_logical.m4 for logical reduction functions, ifunction.m4 for
regular functions and types, ifunction-s.m4 for character minloc and 
maxloc,

ifunction-s2.m4 for character minval and maxval, and ifindloc1.m4 for
findloc.


while your fix seems mechanical and correct, I wonder if you looked
at the following pre-existing irregularity which can be seen in
this snippet:


diff --git a/libgfortran/m4/ifunction.m4 b/libgfortran/m4/ifunction.m4
index 480649cf691..abc15b430ab 100644
--- a/libgfortran/m4/ifunction.m4
+++ b/libgfortran/m4/ifunction.m4
@@ -96,12 +96,7 @@ name`'rtype_qual`_'atype_code` ('rtype` * const 
restrict retarray,


    retarray->base_addr = xmallocarray (alloc_size, sizeof 
(rtype_name));

    if (alloc_size == 0)
-    {
-  /* Make sure we have a zero-sized array.  */
-  GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1);
-  return;
-
-    }
+    return;
  }
    else
  {


This is all enclosed in a block which has
   if (retarray->base_addr == NULL)
but allocates and sets retarray->base_addr, while

@@ -290,11 +285,7 @@ m'name`'rtype_qual`_'atype_code` ('rtype` * const 
restrict retarray,

    retarray->dtype.rank = rank;

    if (alloc_size == 0)
-    {
-  /* Make sure we have a zero-sized array.  */
-  GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1);
-  return;
-    }
+    return;
    else
  retarray->base_addr = xmallocarray (alloc_size, sizeof 
(rtype_name));




and


@@ -454,11 +445,7 @@ void
    alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * 
extent[rank-1];


    if (alloc_size == 0)
-    {
-  /* Make sure we have a zero-sized array.  */
-  GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1);
-  return;
-    }
+    return;
    else
  retarray->base_addr = xmallocarray (alloc_size, sizeof 
(rtype_name));

  }


do not set retarray->base_addr to non-NULL for alloc_size == 0.

Do you know if the first snippet can be safely rewritten to avoid
the (hopefully pointless) xmallocarray for alloc_size == 0?



This change to the testcase:

diff --git a/gcc/testsuite/gfortran.dg/bound_11.f90 
b/gcc/testsuite/gfortran.dg/bound_11.f90

index 170eba4ddfd..2e96f843476 100644
--- a/gcc/testsuite/gfortran.dg/bound_11.f90
+++ b/gcc/testsuite/gfortran.dg/bound_11.f90
@@ -88,6 +88,7 @@ contains
 m4 = .false.
 i = 1
 r = sum(a, dim=i)
+if (.not. allocated(r)) stop 210
 if (any(shape(r) /= (/ 3, 0, 7 /))) stop 211
 if (any(ubound(r) /= (/ 3, 0, 7 /))) stop 212
 i = 2
@@ -104,6 +105,7 @@ contains
 if (any(ubound(r) /= (/ 9, 3, 0 /))) stop 218
 i = 1
 r = sum(a, dim=i, mask=m1)
+if (.not. allocated(r)) stop 220
 if (any(shape(r) /= (/ 3, 0, 7 /))) stop 221
 if (any(ubound(r) /= (/ 3, 0, 7 /))) stop 222
 i = 2
@@ -120,6 +122,7 @@ contains
 if (any(ubound(r) /= (/ 9, 3, 0 /))) stop 228
 i = 1
 r = sum(a, dim=i, mask=m4)
+if (.not. allocated(r)) stop 230
 if (any(shape(r) /= (/ 3, 0, 7 /))) stop 231
 if (any(ubound(r) /= (/ 3, 0, 7 /))) stop 232
 i = 2

gives me a FAIL with STOP 220 (or STOP 230 if the STOP 220 line is 
commented); the first one with STOP 210 passes.
So it is the first snippet with the xmallocarray (which supports zero 
values see memory.c) call that is the correct one.

Good catch, I will open a separate PR.

Mikael


[PATCH] RISC-V: Add ABI requirement for XTheadFMemIdx tests

2023-11-06 Thread Christoph Muellner
From: Christoph Müllner 

The XTheadFMemIdx tests set the required ABI for RV32, but not
for RV64, which has the effect that the tests are expected to
succeed for RV64/LP64.  Let's set the ABI to LP64D in these
tests to clarify the requirements.

Signed-off-by: Christoph Müllner 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/xtheadfmemidx-index-update.c: Add ABI.
* gcc.target/riscv/xtheadfmemidx-index-xtheadbb-update.c: Likewise.
* gcc.target/riscv/xtheadfmemidx-index-xtheadbb.c: Likewise.
* gcc.target/riscv/xtheadfmemidx-index.c: Likewise.
* gcc.target/riscv/xtheadfmemidx-uindex-update.c: Likewise.
* gcc.target/riscv/xtheadfmemidx-uindex-xtheadbb-update.c: Likewise.
* gcc.target/riscv/xtheadfmemidx-uindex-xtheadbb.c: Likewise.
* gcc.target/riscv/xtheadfmemidx-uindex.c: Likewise.
---
 gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-update.c | 2 +-
 .../gcc.target/riscv/xtheadfmemidx-index-xtheadbb-update.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-xtheadbb.c   | 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index.c| 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadfmemidx-uindex-update.c| 2 +-
 .../gcc.target/riscv/xtheadfmemidx-uindex-xtheadbb-update.c | 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadfmemidx-uindex-xtheadbb.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadfmemidx-uindex.c   | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-update.c 
b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-update.c
index 24bbb63d174..cb86b8ad296 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-update.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-update.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
-/* { dg-options "-march=rv64gc_xtheadmemidx_xtheadfmemidx" { target { rv64 } } 
} */
+/* { dg-options "-march=rv64gc_xtheadmemidx_xtheadfmemidx -mabi=lp64d" { 
target { rv64 } } } */
 /* { dg-options "-march=rv32imafc_xtheadmemidx_xtheadfmemidx -mabi=ilp32f" { 
target { rv32 } } } */
 
 #include "xtheadmemidx-helpers.h"
diff --git 
a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-xtheadbb-update.c 
b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-xtheadbb-update.c
index 3b931a4b980..cc3f6219c05 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-xtheadbb-update.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-xtheadbb-update.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
-/* { dg-options "-march=rv64gc_xtheadbb_xtheadmemidx_xtheadfmemidx" { target { 
rv64 } } } */
+/* { dg-options "-march=rv64gc_xtheadbb_xtheadmemidx_xtheadfmemidx 
-mabi=lp64d" { target { rv64 } } } */
 /* { dg-options "-march=rv32imafc_xtheadbb_xtheadmemidx_xtheadfmemidx 
-mabi=ilp32f" { target { rv32 } } } */
 
 #include "xtheadmemidx-helpers.h"
diff --git a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-xtheadbb.c 
b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-xtheadbb.c
index 48858605c24..8ee98c87469 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-xtheadbb.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index-xtheadbb.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
-/* { dg-options "-march=rv64gc_xtheadbb_xtheadmemidx_xtheadfmemidx" { target { 
rv64 } } } */
+/* { dg-options "-march=rv64gc_xtheadbb_xtheadmemidx_xtheadfmemidx 
-mabi=lp64d" { target { rv64 } } } */
 /* { dg-options "-march=rv32imafc_xtheadbb_xtheadmemidx_xtheadfmemidx 
-mabi=ilp32f" { target { rv32 } } } */
 
 #include "xtheadmemidx-helpers.h"
diff --git a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index.c 
b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index.c
index 1bb231a9e88..35704063598 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-index.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
-/* { dg-options "-march=rv64gc_xtheadmemidx_xtheadfmemidx" { target { rv64 } } 
} */
+/* { dg-options "-march=rv64gc_xtheadmemidx_xtheadfmemidx -mabi=lp64d" { 
target { rv64 } } } */
 /* { dg-options "-march=rv32imafc_xtheadmemidx_xtheadfmemidx -mabi=ilp32f" { 
target { rv32 } } } */
 
 #include "xtheadmemidx-helpers.h"
diff --git a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-uindex-update.c 
b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-uindex-update.c
index bc50fa799e0..37ffe6afd53 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-uindex-update.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-uindex-update.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
-/* { dg-options "-march=rv64gc_xtheadmemidx_xtheadfmemidx" { target { rv64 } } 
} */
+/* { dg-options "-march=rv64gc_xtheadme

[pushed 2/4] diagnostics: make diagnostic_context::m_urlifier private

2023-11-06 Thread David Malcolm
No functional change intended.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-5167-ga526cc6ff32e22.

gcc/ChangeLog:
* diagnostic.cc (diagnostic_context::set_urlifier): New.
* diagnostic.h (diagnostic_context::set_urlifier): New decl.
(diagnostic_context::m_urlifier): Make private.
* gcc.cc (driver::global_initializations): Use set_urlifier rather
than directly setting field.
* toplev.cc (general_init): Likewise.
---
 gcc/diagnostic.cc | 8 
 gcc/diagnostic.h  | 3 +++
 gcc/gcc.cc| 2 +-
 gcc/toplev.cc | 2 +-
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index 90103e150f7..c617b34f02b 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -373,6 +373,14 @@ diagnostic_context::set_client_data_hooks 
(diagnostic_client_data_hooks *hooks)
   m_client_data_hooks = hooks;
 }
 
+void
+diagnostic_context::set_urlifier (urlifier *urlifier)
+{
+  /* Ideally we'd use a std::unique_ptr here.  */
+  delete m_urlifier;
+  m_urlifier = urlifier;
+}
+
 void
 diagnostic_context::create_edit_context ()
 {
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index 4ef031b5d1c..f9950ec2cf8 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -297,6 +297,7 @@ public:
   void set_output_format (diagnostic_output_format *output_format);
   void set_text_art_charset (enum diagnostic_text_art_charset charset);
   void set_client_data_hooks (diagnostic_client_data_hooks *hooks);
+  void set_urlifier (urlifier *);
   void create_edit_context ();
   void set_warning_as_error_requested (bool val)
   {
@@ -518,10 +519,12 @@ public:
  particular option.  */
   char *(*m_get_option_url) (diagnostic_context *, int);
 
+private:
   /* An optional hook for adding URLs to quoted text strings in
  diagnostics.  Only used for the main diagnostic message.  */
   urlifier *m_urlifier;
 
+public:
   void (*m_print_path) (diagnostic_context *, const diagnostic_path *);
   json::value *(*m_make_json_for_path) (diagnostic_context *,
const diagnostic_path *);
diff --git a/gcc/gcc.cc b/gcc/gcc.cc
index 02464958f36..51120c1489e 100644
--- a/gcc/gcc.cc
+++ b/gcc/gcc.cc
@@ -8292,7 +8292,7 @@ driver::global_initializations ()
   diagnostic_initialize (global_dc, 0);
   diagnostic_color_init (global_dc);
   diagnostic_urls_init (global_dc);
-  global_dc->m_urlifier = make_gcc_urlifier ();
+  global_dc->set_urlifier (make_gcc_urlifier ());
 
 #ifdef GCC_DRIVER_HOST_INITIALIZATION
   /* Perform host dependent initialization when needed.  */
diff --git a/gcc/toplev.cc b/gcc/toplev.cc
index e39162a3e49..d8e8978dd55 100644
--- a/gcc/toplev.cc
+++ b/gcc/toplev.cc
@@ -1049,7 +1049,7 @@ general_init (const char *argv0, bool init_signals)
   global_dc->m_option_state = &global_options;
   global_dc->m_option_name = option_name;
   global_dc->m_get_option_url = get_option_url;
-  global_dc->m_urlifier = make_gcc_urlifier ();
+  global_dc->set_urlifier (make_gcc_urlifier ());
 
   if (init_signals)
 {
-- 
2.26.3



[pushed 1/4] diagnostics: eliminate diagnostic_kind_count

2023-11-06 Thread David Malcolm
No functional change intended.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-5166-g579bb65cdd35a4.

gcc/ChangeLog:
* diagnostic.cc (diagnostic_context::check_max_errors): Replace
uses of diagnostic_kind_count with simple field acesss.
(diagnostic_context::report_diagnostic): Likewise.
(diagnostic_text_output_format::~diagnostic_text_output_format):
Replace use of diagnostic_kind_count with
diagnostic_context::diagnostic_count.
* diagnostic.h (diagnostic_kind_count): Delete.
(errorcount): Replace use of diagnostic_kind_count with
diagnostic_context::diagnostic_count.
(warningcount): Likewise.
(werrorcount): Likewise.
(sorrycount): Likewise.
---
 gcc/diagnostic.cc | 16 
 gcc/diagnostic.h  | 17 -
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index e917e6ce4ac..90103e150f7 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -651,9 +651,9 @@ diagnostic_context::check_max_errors (bool flush)
   if (!m_max_errors)
 return;
 
-  int count = (diagnostic_kind_count (this, DK_ERROR)
-  + diagnostic_kind_count (this, DK_SORRY)
-  + diagnostic_kind_count (this, DK_WERROR));
+  int count = (m_diagnostic_count[DK_ERROR]
+  + m_diagnostic_count[DK_SORRY]
+  + m_diagnostic_count[DK_WERROR]);
 
   if (count >= m_max_errors)
 {
@@ -1547,8 +1547,8 @@ diagnostic_context::report_diagnostic (diagnostic_info 
*diagnostic)
 error has already occurred.  This is counteracted by
 abort_on_error.  */
   if (!CHECKING_P
- && (diagnostic_kind_count (this, DK_ERROR) > 0
- || diagnostic_kind_count (this, DK_SORRY) > 0)
+ && (m_diagnostic_count[DK_ERROR] > 0
+ || m_diagnostic_count[DK_SORRY] > 0)
  && !m_abort_on_error)
{
  expanded_location s 
@@ -1563,9 +1563,9 @@ diagnostic_context::report_diagnostic (diagnostic_info 
*diagnostic)
 diagnostic->message.m_args_ptr);
 }
   if (diagnostic->kind == DK_ERROR && orig_diag_kind == DK_WARNING)
-++diagnostic_kind_count (this, DK_WERROR);
+++m_diagnostic_count[DK_WERROR];
   else
-++diagnostic_kind_count (this, diagnostic->kind);
+++m_diagnostic_count[diagnostic->kind];
 
   /* Is this the initial diagnostic within the stack of groups?  */
   if (m_diagnostic_groups.m_emission_count == 0)
@@ -2336,7 +2336,7 @@ auto_diagnostic_group::~auto_diagnostic_group ()
 diagnostic_text_output_format::~diagnostic_text_output_format ()
 {
   /* Some of the errors may actually have been warnings.  */
-  if (diagnostic_kind_count (&m_context, DK_WERROR))
+  if (m_context.diagnostic_count (DK_WERROR))
 {
   /* -Werror was given.  */
   if (m_context.warning_as_error_requested_p ())
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index cf21558c7b2..4ef031b5d1c 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -701,24 +701,15 @@ extern diagnostic_context *global_dc;
ready for use.  */
 #define diagnostic_ready_p() (global_dc->printer != NULL)
 
-/* The total count of a KIND of diagnostics emitted so far.  */
-
-inline int &
-diagnostic_kind_count (diagnostic_context *context,
-  diagnostic_t kind)
-{
-  return context->diagnostic_count (kind);
-}
-
 /* The number of errors that have been issued so far.  Ideally, these
would take a diagnostic_context as an argument.  */
-#define errorcount diagnostic_kind_count (global_dc, DK_ERROR)
+#define errorcount global_dc->diagnostic_count (DK_ERROR)
 /* Similarly, but for warnings.  */
-#define warningcount diagnostic_kind_count (global_dc, DK_WARNING)
+#define warningcount global_dc->diagnostic_count (DK_WARNING)
 /* Similarly, but for warnings promoted to errors.  */
-#define werrorcount diagnostic_kind_count (global_dc, DK_WERROR)
+#define werrorcount global_dc->diagnostic_count (DK_WERROR)
 /* Similarly, but for sorrys.  */
-#define sorrycount diagnostic_kind_count (global_dc, DK_SORRY)
+#define sorrycount global_dc->diagnostic_count (DK_SORRY)
 
 /* Returns nonzero if warnings should be emitted.  */
 #define diagnostic_report_warnings_p(DC, LOC)  \
-- 
2.26.3



[pushed 3/4] diagnostics: introduce class diagnostic_option_classifier

2023-11-06 Thread David Malcolm
This patch gathers the 6 fields in diagnostic_context relating to
keeping track of overriding the severity of warnings, and
pushing/popping those severities, moving them all into a new class
diagnostic_option_classifier.

No functional change intended.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-5168-g38763e2c188fa9.

gcc/ChangeLog:
* diagnostic.cc (diagnostic_context::push_diagnostics): Convert
to...
(diagnostic_option_classifier::push): ...this.
(diagnostic_context::pop_diagnostics): Convert to...
(diagnostic_option_classifier::pop): ...this.
(diagnostic_context::initialize): Move code to...
(diagnostic_option_classifier::init): ...this new function.
(diagnostic_context::finish): Move code to...
(diagnostic_option_classifier::fini): ...this new function.
(diagnostic_context::classify_diagnostic): Convert to...
(diagnostic_option_classifier::classify_diagnostic): ...this.
(diagnostic_context::update_effective_level_from_pragmas): Convert
to...
(diagnostic_option_classifier::update_effective_level_from_pragmas):
...this.
(diagnostic_context::diagnostic_enabled): Update for refactoring.
* diagnostic.h (struct diagnostic_classification_change_t): Move into...
(class diagnostic_option_classifier): ...this new class.
(diagnostic_context::option_unspecified_p): Update for move of
fields into m_option_classifier.
(diagnostic_context::classify_diagnostic): Likewise.
(diagnostic_context::push_diagnostics): Likewise.
(diagnostic_context::pop_diagnostics): Likewise.
(diagnostic_context::update_effective_level_from_pragmas): Delete.
(diagnostic_context::m_classify_diagnostic): Move into class
diagnostic_option_classifier.
(diagnostic_context::m_option_classifier): Likewise.
(diagnostic_context::m_classification_history): Likewise.
(diagnostic_context::m_n_classification_history): Likewise.
(diagnostic_context::m_push_list): Likewise.
(diagnostic_context::m_n_push): Likewise.
(diagnostic_context::m_option_classifier): New.
---
 gcc/diagnostic.cc | 124 ---
 gcc/diagnostic.h  | 131 --
 2 files changed, 163 insertions(+), 92 deletions(-)

diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index c617b34f02b..addd6606eaa 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -149,13 +149,63 @@ diagnostic_set_caret_max_width (diagnostic_context 
*context, int value)
   context->m_source_printing.max_width = value;
 }
 
+void
+diagnostic_option_classifier::init (int n_opts)
+{
+  m_n_opts = n_opts;
+  m_classify_diagnostic = XNEWVEC (diagnostic_t, n_opts);
+  for (int i = 0; i < n_opts; i++)
+m_classify_diagnostic[i] = DK_UNSPECIFIED;
+  m_push_list = nullptr;
+  m_n_push = 0;
+}
+
+void
+diagnostic_option_classifier::fini ()
+{
+  XDELETEVEC (m_classify_diagnostic);
+  m_classify_diagnostic = nullptr;
+  free (m_push_list);
+  m_n_push = 0;
+}
+
+/* Save all diagnostic classifications in a stack.  */
+
+void
+diagnostic_option_classifier::push ()
+{
+  m_push_list = (int *) xrealloc (m_push_list, (m_n_push + 1) * sizeof (int));
+  m_push_list[m_n_push ++] = m_n_classification_history;
+}
+
+/* Restore the topmost classification set off the stack.  If the stack
+   is empty, revert to the state based on command line parameters.  */
+
+void
+diagnostic_option_classifier::pop (location_t where)
+{
+  int jump_to;
+
+  if (m_n_push)
+jump_to = m_push_list [-- m_n_push];
+  else
+jump_to = 0;
+
+  const int i = m_n_classification_history;
+  m_classification_history =
+(diagnostic_classification_change_t *) xrealloc (m_classification_history, 
(i + 1)
+* sizeof 
(diagnostic_classification_change_t));
+  m_classification_history[i].location = where;
+  m_classification_history[i].option = jump_to;
+  m_classification_history[i].kind = DK_POP;
+  m_n_classification_history ++;
+}
+
 /* Initialize the diagnostic message outputting machinery.  */
 
 void
 diagnostic_context::initialize (int n_opts)
 {
-  int i;
-
   /* Allocate a basic pretty-printer.  Clients will replace this a
  much more elaborated pretty-printer if they wish.  */
   this->printer = XNEW (pretty_printer);
@@ -165,12 +215,10 @@ diagnostic_context::initialize (int n_opts)
   memset (m_diagnostic_count, 0, sizeof m_diagnostic_count);
   m_warning_as_error_requested = false;
   m_n_opts = n_opts;
-  m_classify_diagnostic = XNEWVEC (diagnostic_t, n_opts);
-  for (i = 0; i < n_opts; i++)
-m_classify_diagnostic[i] = DK_UNSPECIFIED;
+  m_option_classifier.init (n_opts);
   m_source_printing.enabled = false;
   diagnostic_set_caret_max_width (this, pp_line_cutoff (this->printer));
-  for (i = 0; i < rich_location::STA

[pushed 4/4] diagnostics: split out struct diagnostic_source_printing_options

2023-11-06 Thread David Malcolm
This patch removes almost all use of diagnostic_context from the
source-printing code.

No functional change intended.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-5169-g54da47f9459890.

gcc/ChangeLog:
* diagnostic-show-locus.cc (class colorizer): Take just a
pretty_printer rather than a diagnostic_context.
(layout::layout): Make context param a const reference,
and pretty_printer param non-optional.
(layout::m_context): Drop field.
(layout::m_options): New field.
(layout::m_colorize_source_p): Drop field.
(layout::m_show_labels_p): Drop field.
(layout::m_show_line_numbers_p): Drop field.
(layout::print_gap_in_line_numbering): Use m_options.
(layout::calculate_line_spans): Likewise.
(layout::calculate_linenum_width): Likewise.
(layout::calculate_x_offset_display): Likewise.
(layout::print_source_line): Likewise.
(layout::start_annotation_line): Likewise.
(layout::print_annotation_line): Likewise.
(layout::print_line): Likewise.
(gcc_rich_location::add_location_if_nearby): Update for changes to
layout ctor.
(diagnostic_show_locus): Likewise.
(selftest::test_offset_impl): Likewise.
(selftest::test_layout_x_offset_display_utf8): Likewise.
(selftest::test_layout_x_offset_display_tab): Likewise.
(selftest::test_tab_expansion): Likewise.
* diagnostic.h (diagnostic_context::m_source_printing): Move
declaration of struct outside diagnostic_context as...
(struct diagnostic_source_printing_options)... this.
---
 gcc/diagnostic-show-locus.cc | 94 +---
 gcc/diagnostic.h | 88 -
 2 files changed, 88 insertions(+), 94 deletions(-)

diff --git a/gcc/diagnostic-show-locus.cc b/gcc/diagnostic-show-locus.cc
index d7a471426b9..43523572fe5 100644
--- a/gcc/diagnostic-show-locus.cc
+++ b/gcc/diagnostic-show-locus.cc
@@ -83,7 +83,7 @@ struct point_state
 class colorizer
 {
  public:
-  colorizer (diagnostic_context *context,
+  colorizer (pretty_printer *pp,
 diagnostic_t diagnostic_kind);
   ~colorizer ();
 
@@ -113,7 +113,7 @@ class colorizer
   static const int STATE_FIXIT_INSERT  = -2;
   static const int STATE_FIXIT_DELETE  = -3;
 
-  diagnostic_context *m_context;
+  pretty_printer *m_pp;
   diagnostic_t m_diagnostic_kind;
   int m_current_state;
   const char *m_range1;
@@ -365,10 +365,10 @@ struct char_display_policy : public cpp_char_column_policy
 class layout
 {
  public:
-  layout (diagnostic_context *context,
+  layout (const diagnostic_context &context,
  rich_location *richloc,
  diagnostic_t diagnostic_kind,
- pretty_printer *pp = nullptr);
+ pretty_printer *pp);
 
   bool maybe_add_location_range (const location_range *loc_range,
 unsigned original_idx,
@@ -428,15 +428,12 @@ class layout
   move_to_column (int *column, int dest_column, bool add_left_margin);
 
  private:
-  diagnostic_context *m_context;
+  const diagnostic_source_printing_options &m_options;
   pretty_printer *m_pp;
   char_display_policy m_policy;
   location_t m_primary_loc;
   exploc_with_display_col m_exploc;
   colorizer m_colorizer;
-  bool m_colorize_source_p;
-  bool m_show_labels_p;
-  bool m_show_line_numbers_p;
   bool m_diagnostic_path_p;
   auto_vec  m_layout_ranges;
   auto_vec  m_fixit_hints;
@@ -451,9 +448,9 @@ class layout
 /* The constructor for "colorizer".  Lookup and store color codes for the
different kinds of things we might need to print.  */
 
-colorizer::colorizer (diagnostic_context *context,
+colorizer::colorizer (pretty_printer *pp,
  diagnostic_t diagnostic_kind) :
-  m_context (context),
+  m_pp (pp),
   m_diagnostic_kind (diagnostic_kind),
   m_current_state (STATE_NORMAL_TEXT)
 {
@@ -461,7 +458,7 @@ colorizer::colorizer (diagnostic_context *context,
   m_range2 = get_color_by_name ("range2");
   m_fixit_insert = get_color_by_name ("fixit-insert");
   m_fixit_delete = get_color_by_name ("fixit-delete");
-  m_stop_color = colorize_stop (pp_show_color (context->printer));
+  m_stop_color = colorize_stop (pp_show_color (m_pp));
 }
 
 /* The destructor for "colorize".  If colorization is on, print a code to
@@ -497,35 +494,35 @@ colorizer::begin_state (int state)
   break;
 
 case STATE_FIXIT_INSERT:
-  pp_string (m_context->printer, m_fixit_insert);
+  pp_string (m_pp, m_fixit_insert);
   break;
 
 case STATE_FIXIT_DELETE:
-  pp_string (m_context->printer, m_fixit_delete);
+  pp_string (m_pp, m_fixit_delete);
   break;
 
 case 0:
   /* Make range 0 be the same color as the "kind" text
 (error vs warning vs note).  */
   pp_string
-   (m_context->printer,
-colorize_start (pp_show_color (m_context->printer),
+   (m_pp,
+ 

Re: [PATCH 2/2] libgfortran: Remove empty array descriptor first dimension overwrite [PR112371]

2023-11-06 Thread Harald Anlauf

Hi Mikael,

Am 06.11.23 um 20:19 schrieb Mikael Morin:


This change to the testcase:

diff --git a/gcc/testsuite/gfortran.dg/bound_11.f90
b/gcc/testsuite/gfortran.dg/bound_11.f90
index 170eba4ddfd..2e96f843476 100644
--- a/gcc/testsuite/gfortran.dg/bound_11.f90
+++ b/gcc/testsuite/gfortran.dg/bound_11.f90
@@ -88,6 +88,7 @@ contains
  m4 = .false.
  i = 1
  r = sum(a, dim=i)
+    if (.not. allocated(r)) stop 210
  if (any(shape(r) /= (/ 3, 0, 7 /))) stop 211
  if (any(ubound(r) /= (/ 3, 0, 7 /))) stop 212
  i = 2
@@ -104,6 +105,7 @@ contains
  if (any(ubound(r) /= (/ 9, 3, 0 /))) stop 218
  i = 1
  r = sum(a, dim=i, mask=m1)
+    if (.not. allocated(r)) stop 220
  if (any(shape(r) /= (/ 3, 0, 7 /))) stop 221
  if (any(ubound(r) /= (/ 3, 0, 7 /))) stop 222
  i = 2
@@ -120,6 +122,7 @@ contains
  if (any(ubound(r) /= (/ 9, 3, 0 /))) stop 228
  i = 1
  r = sum(a, dim=i, mask=m4)
+    if (.not. allocated(r)) stop 230
  if (any(shape(r) /= (/ 3, 0, 7 /))) stop 231
  if (any(ubound(r) /= (/ 3, 0, 7 /))) stop 232
  i = 2

gives me a FAIL with STOP 220 (or STOP 230 if the STOP 220 line is
commented); the first one with STOP 210 passes.
So it is the first snippet with the xmallocarray (which supports zero
values see memory.c) call that is the correct one.
Good catch, I will open a separate PR.


ah, now I see that the case of allocation of zero elements
always allocates one byte, which is needed for r.data to be
non-null.

Go ahead!

Harald



Mikael





[committed] hppa: Enable generation of GNU stack notes on Linux

2023-11-06 Thread John David Anglin
I think we have waited long enough for everone to switch to a
kernel build with VDSO support.  Committed to trunk.

Dave
---

Enable generation of GNU stack notes on Linux

2023-11-06  John David Anglin  

* config/pa/pa-linux.h (NEED_INDICATE_EXEC_STACK): Define to 1.

diff --git a/gcc/config/pa/pa-linux.h b/gcc/config/pa/pa-linux.h
index d38f68b1fa5..96c54765ddb 100644
--- a/gcc/config/pa/pa-linux.h
+++ b/gcc/config/pa/pa-linux.h
@@ -144,8 +144,7 @@ along with GCC; see the file COPYING3.  If not see
 #define HAVE_sync_compare_and_swapsi 1
 #define HAVE_sync_compare_and_swapdi 1
 
-/* It's not possible to enable GNU_stack notes since the kernel needs
-   an executable stack for signal returns and syscall restarts.  */
+/* Enable GNU stack notes.  */
 
 #undef NEED_INDICATE_EXEC_STACK
-#define NEED_INDICATE_EXEC_STACK 0
+#define NEED_INDICATE_EXEC_STACK 1


signature.asc
Description: PGP signature


[committed] hppa: Fix typo in PA 2.0 trampoline template

2023-11-06 Thread John David Anglin
Noticed in glibc testsuite.  Committed to active branches.

Dave
---

hppa: Fix typo in PA 2.0 trampoline template

2023-11-06  John David Anglin  

* config/pa/pa.cc (pa_asm_trampoline_template): Fix typo.

diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc
index 2e906cff7ff..218c48b4ae0 100644
--- a/gcc/config/pa/pa.cc
+++ b/gcc/config/pa/pa.cc
@@ -10401,7 +10401,7 @@ pa_asm_trampoline_template (FILE *f)
  fputs ("\tldw 0(%r22),%r21\n", f);
  fputs ("\tldw 4(%r22),%r19\n", f);
  fputs ("\tbve (%r21)\n", f);
- fputs ("\tldw 52(%r1),%r29\n", f);
+ fputs ("\tldw 52(%r20),%r29\n", f);
  fputs ("\t.word   0\n", f);
  fputs ("\t.word   0\n", f);
  fputs ("\t.word   0\n", f);


signature.asc
Description: PGP signature


[PATCH][_Hashtable] Add missing destructor call

2023-11-06 Thread François Dumont
Noticed looking for other occasion to replace __try/__catch with RAII 
helper.


    libstdc++: [_Hashtable] Add missing node destructor call

    libstdc++-v3/ChangeLog:

    * include/bits/hashtable_policy.h
    (_Hashtable_alloc<>::_M_allocate_node): Add missing call to 
node destructor

    on construct exception.

Tested under Linux x64, ok to commit ?

I hope gmail appli will appreciate .diff instead of .patch. .txt are not 
in .gitignore so annoying to use for patches.


François
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index 2d13bda6ae0..ed2b2c02a4a 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -2020,19 +2020,20 @@ namespace __detail
   _Hashtable_alloc<_NodeAlloc>::_M_allocate_node(_Args&&... __args)
   -> __node_ptr
   {
-	auto __nptr = __node_alloc_traits::allocate(_M_node_allocator(), 1);
+	auto& __alloc = _M_node_allocator();
+	auto __nptr = __node_alloc_traits::allocate(__alloc, 1);
 	__node_ptr __n = std::__to_address(__nptr);
 	__try
 	  {
 	::new ((void*)__n) __node_type;
-	__node_alloc_traits::construct(_M_node_allocator(),
-	   __n->_M_valptr(),
+	__node_alloc_traits::construct(__alloc, __n->_M_valptr(),
 	   std::forward<_Args>(__args)...);
 	return __n;
 	  }
 	__catch(...)
 	  {
-	__node_alloc_traits::deallocate(_M_node_allocator(), __nptr, 1);
+	__n->~__node_type();
+	__node_alloc_traits::deallocate(__alloc, __nptr, 1);
 	__throw_exception_again;
 	  }
   }


Re: [PATCH][_Hashtable] Add missing destructor call

2023-11-06 Thread Sam James


François Dumont  writes:

> Noticed looking for other occasion to replace __try/__catch with RAII
> helper.
>
>     libstdc++: [_Hashtable] Add missing node destructor call
>
>     libstdc++-v3/ChangeLog:
>
>     * include/bits/hashtable_policy.h
>     (_Hashtable_alloc<>::_M_allocate_node): Add missing call
> to node destructor
>     on construct exception.
>
> Tested under Linux x64, ok to commit ?
>
> I hope gmail appli will appreciate .diff instead of .patch. .txt are
> not in .gitignore so annoying to use for patches.

Are you using 'git send-email'? It should handle all of that for you.

>
> François
>
> [2. text/x-patch; hashtable_policy.h.diff]...



[PATCH/RFC] libdiagnostics: a shared library for emitting diagnostics

2023-11-06 Thread David Malcolm
It's fairly easy for tools to implement simple diagnostics
via fprintf of 
  FILE:LINE:COLUMN: error: message
to stderr, but as diagnostics get more featureful, using a shared
library makes sense.

This patch kit extends GCC to add a new "libdiagnostics" shared library
on the host, built around GCC's existing diagnostic-handling code, exposed
via a pure C API intended for client code that wants to emit GCC-style
diagnostics. It implements:
- quoting pertinent source code (with a cache)
- underlining points and ranges in the source code, possibly with labels
- emitting fix-it hints
- generating patches from fix-it hints
- SARIF output

The first patch (for GCC) shows libdiagnostic.h (the public header
file), along with examples of simple self-contained programs that
show various uses of the API.

The second patch (for GCC) is the work-in-progress implementation.

The third patch (for binutils) is an experiment at using the API
with gas.

Status: this is a rough prototype.  I'm posting it now to get
feedback, both from GCC developers, and from projects that might make
use of this library (binutils? GNU Cobol? others?).  The header
file has a "TODO" list at the end listing various known unfinished
aspects, and "known unknowns".

Thoughts?


David Malcolm (2):
  libdiagnostics: header and examples
  libdiagnostics: work-in-progress implementation

 gcc/Makefile.in   |  134 +-
 gcc/configure |2 +-
 gcc/configure.ac  |2 +-
 gcc/input.h   |2 +-
 gcc/libdiagnostics.cc | 1124 +
 gcc/libdiagnostics.h  |  544 
 gcc/libdiagnostics.map|   57 +
 .../libdiagnostics.dg/libdiagnostics.exp  |  544 
 .../libdiagnostics.dg/test-error-with-note.c  |   57 +
 gcc/testsuite/libdiagnostics.dg/test-error.c  |   49 +
 .../libdiagnostics.dg/test-fix-it-hint.c  |   48 +
 .../libdiagnostics.dg/test-helpers.h  |   29 +
 .../libdiagnostics.dg/test-labelled-ranges.c  |   52 +
 .../libdiagnostics.dg/test-logical-location.c |   62 +
 .../libdiagnostics.dg/test-metadata.c |   53 +
 .../libdiagnostics.dg/test-multiple-lines.c   |   58 +
 .../test-note-with-fix-it-hint.c  |   51 +
 .../libdiagnostics.dg/test-warning.c  |   52 +
 .../test-write-sarif-to-file.c|   46 +
 .../test-write-text-to-file.c |   47 +
 20 files changed, 3008 insertions(+), 5 deletions(-)
 create mode 100644 gcc/libdiagnostics.cc
 create mode 100644 gcc/libdiagnostics.h
 create mode 100644 gcc/libdiagnostics.map
 create mode 100644 gcc/testsuite/libdiagnostics.dg/libdiagnostics.exp
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-error-with-note.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-error.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-fix-it-hint.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-helpers.h
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-labelled-ranges.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-logical-location.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-metadata.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-multiple-lines.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-note-with-fix-it-hint.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-warning.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-write-sarif-to-file.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-write-text-to-file.c

-- 
2.26.3



[PATCH 1/2] libdiagnostics: header and examples

2023-11-06 Thread David Malcolm
Here's a work-in-progress patch for GCC that adds a libdiagnostics.h
header describing the public interface, along with various testcases
that show usage examples for the API.  Various aspects of this need
work; posting now for early feedback on overall direction.

How does the interface look?

gcc/ChangeLog:
* libdiagnostics.h: New file.

gcc/testsuite/ChangeLog:
* libdiagnostics.dg/test-error-with-note.c: New test.
* libdiagnostics.dg/test-error.c: New test.
* libdiagnostics.dg/test-fix-it-hint.c: New test.
* libdiagnostics.dg/test-helpers.h: New.
* libdiagnostics.dg/test-labelled-ranges.c: New test.
* libdiagnostics.dg/test-logical-location.c: New test.
* libdiagnostics.dg/test-metadata.c: New test.
* libdiagnostics.dg/test-multiple-lines.c: New test.
* libdiagnostics.dg/test-note-with-fix-it-hint.c: New test.
* libdiagnostics.dg/test-warning.c: New test.
* libdiagnostics.dg/test-write-sarif-to-file.c: New test.
* libdiagnostics.dg/test-write-text-to-file.c: New test.
---
 gcc/libdiagnostics.h  | 544 ++
 .../libdiagnostics.dg/test-error-with-note.c  |  57 ++
 gcc/testsuite/libdiagnostics.dg/test-error.c  |  49 ++
 .../libdiagnostics.dg/test-fix-it-hint.c  |  48 ++
 .../libdiagnostics.dg/test-helpers.h  |  29 +
 .../libdiagnostics.dg/test-labelled-ranges.c  |  52 ++
 .../libdiagnostics.dg/test-logical-location.c |  62 ++
 .../libdiagnostics.dg/test-metadata.c |  53 ++
 .../libdiagnostics.dg/test-multiple-lines.c   |  58 ++
 .../test-note-with-fix-it-hint.c  |  51 ++
 .../libdiagnostics.dg/test-warning.c  |  52 ++
 .../test-write-sarif-to-file.c|  46 ++
 .../test-write-text-to-file.c |  47 ++
 13 files changed, 1148 insertions(+)
 create mode 100644 gcc/libdiagnostics.h
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-error-with-note.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-error.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-fix-it-hint.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-helpers.h
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-labelled-ranges.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-logical-location.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-metadata.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-multiple-lines.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-note-with-fix-it-hint.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-warning.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-write-sarif-to-file.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-write-text-to-file.c

diff --git a/gcc/libdiagnostics.h b/gcc/libdiagnostics.h
new file mode 100644
index 000..672594598fa
--- /dev/null
+++ b/gcc/libdiagnostics.h
@@ -0,0 +1,544 @@
+/* A pure C API for emitting diagnostics.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef LIBDIAGNOSTICS_H
+#define LIBDIAGNOSTICS_H
+
+/* We use FILE * for streams */
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ Macros for attributes.
+ These are all currently empty, and thus for the human reader rather than
+ the compiler.
+ **/
+
+#define LIBDIAGNOSTICS_PARAM_MUST_BE_NON_NULL(ARG_NUM)
+
+#define LIBDIAGNOSTICS_PARAM_CAN_BE_NULL(ARG_NUM)
+
+#define LIBDIAGNOSTICS_PARAM_GCC_FORMAT_STRING(FMT_ARG_NUM, ARGS_ARG_NUM)
+
+
+/**
+ Data structures and types.
+ All structs within the API are opaque.
+ **/
+
+/* An opaque bundle of state for a client of the library.
+   Has zero of more "sinks" to which diagnostics are emitted.
+   Responsibilities:
+   - location-management
+   - caching of source file content
+   - patch generation.  */
+typedef struct diagnostic_manager diagnostic_manager;
+
+/* Types relating to diagnostic output sinks.  */
+
+/* An enum for determining if we should colorize a text output sink.  */
+enum diagnostic_colorize
+{
+  DIAGNOSTIC_C

[PATCH 2/2] libdiagnostics: work-in-progress implementation

2023-11-06 Thread David Malcolm
Here's a work-in-progress patch for GCC that adds the implementation
of libdiagnostics.  Various aspects of this need work; posting now
for early feedback on overall direction.  For example, the testsuite
doesn't yet check the output from the test client programs (and I'm
not quite sure of the best way to express that in DejaGnu).

gcc/ChangeLog:
* Makefile.in (lang_checks): Add check-libdiagnostics.
(start.encap): Add libdiagnostics.
(libdiagnostics_OBJS): New.
...plus a bunch of stuff hacked up from jit/Make-lang.in.
* configure: Regenerate.
* configure.ac (check_languages): Add check-libdiagnostics.
* input.h: Add FIXME.
* libdiagnostics.cc: New file.
* libdiagnostics.map: New file.

gcc/testsuite/ChangeLog:
* libdiagnostics.dg/libdiagnostics.exp: New, based on jit.exp.
---
 gcc/Makefile.in   |  134 +-
 gcc/configure |2 +-
 gcc/configure.ac  |2 +-
 gcc/input.h   |2 +-
 gcc/libdiagnostics.cc | 1124 +
 gcc/libdiagnostics.map|   57 +
 .../libdiagnostics.dg/libdiagnostics.exp  |  544 
 7 files changed, 1860 insertions(+), 5 deletions(-)
 create mode 100644 gcc/libdiagnostics.cc
 create mode 100644 gcc/libdiagnostics.map
 create mode 100644 gcc/testsuite/libdiagnostics.dg/libdiagnostics.exp

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index ff77d3cdc64..8f93ae48024 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -611,7 +611,7 @@ host_xm_defines=@host_xm_defines@
 xm_file_list=@xm_file_list@
 xm_include_list=@xm_include_list@
 xm_defines=@xm_defines@
-lang_checks=
+lang_checks=check-libdiagnostics
 lang_checks_parallelized=
 lang_opt_files=@lang_opt_files@ $(srcdir)/c-family/c.opt $(srcdir)/common.opt 
$(srcdir)/params.opt $(srcdir)/analyzer/analyzer.opt
 lang_specs_files=@lang_specs_files@
@@ -2153,7 +2153,7 @@ all.cross: native gcc-cross$(exeext) cpp$(exeext) specs \
libgcc-support lang.all.cross doc selftest @GENINSRC@ srcextra
 # This is what must be made before installing GCC and converting libraries.
 start.encap: native xgcc$(exeext) cpp$(exeext) specs \
-   libgcc-support lang.start.encap @GENINSRC@ srcextra
+   libgcc-support lang.start.encap libdiagnostics @GENINSRC@ srcextra
 # These can't be made until after GCC can run.
 rest.encap: lang.rest.encap
 # This is what is made with the host's compiler
@@ -2242,6 +2242,136 @@ cpp$(exeext): $(GCC_OBJS) c-family/cppspec.o 
libcommon-target.a $(LIBDEPS) \
  c-family/cppspec.o $(EXTRA_GCC_OBJS) libcommon-target.a \
  $(EXTRA_GCC_LIBS) $(LIBS)
 
+
+libdiagnostics_OBJS = libdiagnostics.o \
+   libcommon.a
+
+# FIXME:
+# Define the names for selecting jit in LANGUAGES.
+# Note that it would be nice to move the dependency on g++
+# into the jit rule, but that needs a little bit of work
+# to do the right thing within all.cross.
+
+LIBDIAGNOSTICS_VERSION_NUM = 0
+LIBDIAGNOSTICS_MINOR_NUM = 0
+LIBDIAGNOSTICS_RELEASE_NUM = 1
+
+ifneq (,$(findstring mingw,$(target)))
+LIBDIAGNOSTICS_FILENAME = libdiagnostics-$(LIBDIAGNOSTICS_VERSION_NUM).dll
+LIBDIAGNOSTICS_IMPORT_LIB = libdiagnostics.dll.a
+
+libdiagnostics: $(LIBDIAGNOSTICS_FILENAME) \
+   $(FULL_DRIVER_NAME)
+
+else
+
+ifneq (,$(findstring darwin,$(host)))
+
+LIBDIAGNOSTICS_AGE = 1
+LIBDIAGNOSTICS_BASENAME = libdiagnostics
+
+LIBDIAGNOSTICS_SONAME = \
+  ${libdir}/$(LIBDIAGNOSTICS_BASENAME).$(LIBDIAGNOSTICS_VERSION_NUM).dylib
+LIBDIAGNOSTICS_FILENAME = 
$(LIBDIAGNOSTICS_BASENAME).$(LIBDIAGNOSTICS_VERSION_NUM).dylib
+LIBDIAGNOSTICS_LINKER_NAME = $(LIBDIAGNOSTICS_BASENAME).dylib
+
+# Conditionalize the use of the LD_VERSION_SCRIPT_OPTION and
+# LD_SONAME_OPTION depending if configure found them, using $(if)
+# We have to define a LIBDIAGNOSTICS_COMMA here, otherwise the commas in the 
"true"
+# result are treated as separators by the $(if).
+LIBDIAGNOSTICS_COMMA := ,
+LIBDIAGNOSTICS_VERSION_SCRIPT_OPTION = \
+   $(if $(LD_VERSION_SCRIPT_OPTION),\
+ 
-Wl$(LIBDIAGNOSTICS_COMMA)$(LD_VERSION_SCRIPT_OPTION)$(LIBDIAGNOSTICS_COMMA)$(srcdir)/libdiagnostics.map)
+
+LIBDIAGNOSTICS_SONAME_OPTION = \
+   $(if $(LD_SONAME_OPTION), \
+
-Wl$(LIBDIAGNOSTICS_COMMA)$(LD_SONAME_OPTION)$(LIBDIAGNOSTICS_COMMA)$(LIBDIAGNOSTICS_SONAME))
+
+LIBDIAGNOSTICS_SONAME_SYMLINK = $(LIBDIAGNOSTICS_FILENAME)
+LIBDIAGNOSTICS_LINKER_NAME_SYMLINK = $(LIBDIAGNOSTICS_LINKER_NAME)
+
+libdiagnostics: $(LIBDIAGNOSTICS_FILENAME) \
+   $(LIBDIAGNOSTICS_SYMLINK) \
+   $(LIBDIAGNOSTICS_LINKER_NAME_SYMLINK) \
+   $(FULL_DRIVER_NAME)
+
+else
+
+LIBDIAGNOSTICS_LINKER_NAME = libdiagnostics.so
+LIBDIAGNOSTICS_SONAME = 
$(LIBDIAGNOSTICS_LINKER_NAME).$(LIBDIAGNOSTICS_VERSION_NUM)
+LIBDIAGNOSTICS_FILENAME = \
+  
$(LIBDIAGNOSTICS_SONAME).$(LIBDIAGNOSTICS_MINOR_NUM).$(LIBDIAGNOSTICS_RELEASE_NUM)
+

[PATCH] binutils: experimental use of libdiagnostics in gas

2023-11-06 Thread David Malcolm
Here's a patch for gas in binutils that makes it use libdiagnostics
(with some nasty hardcoded paths to specific places on my hard drive
to make it easier to develop the API).

For now this hardcodes adding two sinks: a text sink on stderr, and
also a SARIF output to stderr (which happens after all regular output).

For example, without this patch:

   gas testsuite/gas/all/warn-1.s

emits:

testsuite/gas/all/warn-1.s: Assembler messages:
testsuite/gas/all/warn-1.s:3: Warning: a warning message
testsuite/gas/all/warn-1.s:4: Error: .warning argument must be a string
testsuite/gas/all/warn-1.s:5: Warning: .warning directive invoked in source file
testsuite/gas/all/warn-1.s:6: Warning: .warning directive invoked in source file
testsuite/gas/all/warn-1.s:7: Warning:


whereas with this patch:
  LD_LIBRARY_PATH=/home/david/coding-3/gcc-newgit-canvas-2023/build/gcc 
./as-new testsuite/gas/all/warn-1.s
emits:


testsuite/gas/all/warn-1.s:3: warning: a warning message
3 |  .warning "a warning message"   ;# { dg-warning "Warning: a warning 
message" }
  |
testsuite/gas/all/warn-1.s:4: error: .warning argument must be a string
4 |  .warning a warning message ;# { dg-error "Error: .warning argument 
must be a string" }
  |
testsuite/gas/all/warn-1.s:5: warning: .warning directive invoked in source file
5 |  .warning   ;# { dg-warning "Warning: .warning 
directive invoked in source file" }
  |
testsuite/gas/all/warn-1.s:6: warning: .warning directive invoked in source file
6 |  .warning ".warning directive invoked in source file"   ;# { dg-warning 
"Warning: .warning directive invoked in source file" }
  |
testsuite/gas/all/warn-1.s:7: warning:
7 |  .warning "";# { dg-warning "Warning: " }
  |
{"$schema": 
"https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json";,
 "version": "2.1.0", "runs": [{"tool": {"driver": {"rules": []}}, 
"invocations": [{"executionSuccessful": true, "toolExecutionNotifications": 
[]}], "originalUriBaseIds": {"PWD": {"uri": 
"file:///home/david/coding-3/binutils-gdb/gas/"}}, "artifacts": [{"location": 
{"uri": "testsuite/gas/all/warn-1.s", "uriBaseId": "PWD"}, "contents": {"text": 
";# Test .warning directive.\n;# { dg-do assemble }\n .warning \"a warning 
message\"\t;# { dg-warning \"Warning: a warning message\" }\n .warning a 
warning message\t;# { dg-error \"Error: .warning argument must be a string\" 
}\n .warning\t\t\t;# { dg-warning \"Warning: .warning directive invoked in 
source file\" }\n .warning \".warning directive invoked in source file\"\t;# { 
dg-warning \"Warning: .warning directive invoked in source file\" }\n .warning 
\"\"\t\t\t;# { dg-warning \"Warning: \" }\n"}}], "results": [{"ruleId": 
"warning", "level": "warning", "message": {"text": "a warning message"}, 
"locations": [{"physicalLocation": {"artifactLocation": {"uri": 
"testsuite/gas/all/warn-1.s", "uriBaseId": "PWD"}, "region": {"startLine": 3, 
"startColumn": 0, "endColumn": 1}, "contextRegion": {"startLine": 3, "snippet": 
{"text": " .warning \"a warning message\"\t;# { dg-warning \"Warning: a warning 
message\" }\n"], "relatedLocations": [{"physicalLocation": 
{"artifactLocation": {"uri": "testsuite/gas/all/warn-1.s", "uriBaseId": "PWD"}, 
"region": {"startLine": 4, "startColumn": 0, "endColumn": 1}, "contextRegion": 
{"startLine": 4, "snippet": {"text": " .warning a warning message\t;# { 
dg-error \"Error: .warning argument must be a string\" }\n"}}}, "message": 
{"text": ".warning argument must be a string"}}, {"physicalLocation": 
{"artifactLocation": {"uri": "testsuite/gas/all/warn-1.s", "uriBaseId": "PWD"}, 
"region": {"startLine": 5, "startColumn": 0, "endColumn": 1}, "contextRegion": 
{"startLine": 5, "snippet": {"text": " .warning\t\t\t;# { dg-warning \"Warning: 
.warning directive invoked in source file\" }\n"}}}, "message": {"text": 
".warning directive invoked in source file"}}, {"physicalLocation": 
{"artifactLocation": {"uri": "testsuite/gas/all/warn-1.s", "uriBaseId": "PWD"}, 
"region": {"startLine": 6, "startColumn": 0, "endColumn": 1}, "contextRegion": 
{"startLine": 6, "snippet": {"text": " .warning \".warning directive invoked in 
source file\"\t;# { dg-warning \"Warning: .warning directive invoked in source 
file\" }\n"}}}, "message": {"text": ".warning directive invoked in source 
file"}}, {"physicalLocation": {"artifactLocation": {"uri": 
"testsuite/gas/all/warn-1.s", "uriBaseId": "PWD"}, "region": {"startLine": 7, 
"startColumn": 0, "endColumn": 1}, "contextRegion": {"startLine": 7, "snippet": 
{"text": " .warning \"\"\t\t\t;# { dg-warning \"Warning: \" }\n"}}}, "message": 
{"text": ""}}]}]}]}
^^

  1   2   >