[PATCH]middle-end: Remove unused internal function after IVopts cleanup [PR118756]

2025-02-06 Thread Tamar Christina
Hi All,

It seems that after my IVopts patches the function contain_complex_addr_expr
became unused and clang is rightfully complaining about it.

This removes the unused internal function.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/19202
* tree-ssa-loop-ivopts.cc (contain_complex_addr_expr): Remove.

---
diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
index 
989321137df93349f8395a9f746f73eda52fd74f..e37b24062f730e490ae86bd9248e61102ad1a0f0
 100644
--- a/gcc/tree-ssa-loop-ivopts.cc
+++ b/gcc/tree-ssa-loop-ivopts.cc
@@ -1149,34 +1149,6 @@ determine_base_object (struct ivopts_data *data, tree 
expr)
   return obj;
 }
 
-/* Return true if address expression with non-DECL_P operand appears
-   in EXPR.  */
-
-static bool
-contain_complex_addr_expr (tree expr)
-{
-  bool res = false;
-
-  STRIP_NOPS (expr);
-  switch (TREE_CODE (expr))
-{
-case POINTER_PLUS_EXPR:
-case PLUS_EXPR:
-case MINUS_EXPR:
-  res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
-  res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
-  break;
-
-case ADDR_EXPR:
-  return (!DECL_P (TREE_OPERAND (expr, 0)));
-
-default:
-  return false;
-}
-
-  return res;
-}
-
 /* Allocates an induction variable with given initial value BASE and step STEP
for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 




-- 
diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
index 989321137df93349f8395a9f746f73eda52fd74f..e37b24062f730e490ae86bd9248e61102ad1a0f0 100644
--- a/gcc/tree-ssa-loop-ivopts.cc
+++ b/gcc/tree-ssa-loop-ivopts.cc
@@ -1149,34 +1149,6 @@ determine_base_object (struct ivopts_data *data, tree expr)
   return obj;
 }
 
-/* Return true if address expression with non-DECL_P operand appears
-   in EXPR.  */
-
-static bool
-contain_complex_addr_expr (tree expr)
-{
-  bool res = false;
-
-  STRIP_NOPS (expr);
-  switch (TREE_CODE (expr))
-{
-case POINTER_PLUS_EXPR:
-case PLUS_EXPR:
-case MINUS_EXPR:
-  res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
-  res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
-  break;
-
-case ADDR_EXPR:
-  return (!DECL_P (TREE_OPERAND (expr, 0)));
-
-default:
-  return false;
-}
-
-  return res;
-}
-
 /* Allocates an induction variable with given initial value BASE and step STEP
for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 





อยากได้เงินก่อน แบบผ่อนเบาๆ ต้องสินเชื่อ SME เจริญทรัพย์ วงเงินสูง อนุมัติไว โทร 0️⃣9️⃣5️⃣8️⃣4️⃣3️⃣8️⃣6️⃣0️⃣4️⃣ คุณ ภัทร

2025-02-06 Thread Layla 007
บริษัท เจริญทรัพย์ เครดิต
สินเชื่อเพื่อธุรกิจสำหรับเจ้าของกิจการ
- อนุมัติง่าย รู้ผลไว
(สอบถามฟรี)
LINE: big7809


[PATCH] vect: Move induction IV increments [PR110449]

2025-02-06 Thread Richard Sandiford
In this PR, we used to generate:

 .L6:
  mov v30.16b, v31.16b
  faddv31.4s, v31.4s, v27.4s
  faddv29.4s, v30.4s, v28.4s
  stp q30, q29, [x0]
  add x0, x0, 32
  cmp x1, x0
  bne .L6

for an unrolled induction in:

  for (int i = 0; i < 1024; i++)
{
  arr[i] = freq;
  freq += step;
}

with the problem being the unnecessary MOV.

The main induction IV was incremented by VF * step == 2 * nunits * step,
and then nunits * step was added for the second store to arr.

The original patch for the PR (r14-2367-g224fd59b2dc8) avoided the MOV
by incrementing the IV by nunits * step twice.  The problem with that
approach is that it doubles the loop-carried latency.  This change was
deliberately not preserved when moving from loop-vect to SLP and so
the test started failing again after r15-3509-gd34cda720988.

I think the main problem is that we put the IV increment in the wrong
place.  Normal IVs created by create_iv are placed before the exit
condition where possible, but vectorizable_induction instead always
inserted them at the start of the loop body.  The only use of the
incremented IV is by the phi node, so the effect is to make both
the old and new IV values live for the whole loop body, which is
why we need the MOV.

The simplest fix therefore seems to be to reuse the create_iv logic.

Bootstrapped & regression-tested on aarch64-linus-gnu and
x86_64-linux-gnu.  As Richi suggested, I also tested vect.exp
using unix/-mavx512f/--param=vect-partial-vector-usage=2.
OK to install?

Richard


gcc/
PR tree-optimization/110449
* tree-ssa-loop-manip.h (insert_iv_increment): Declare.
* tree-ssa-loop-manip.cc (insert_iv_increment): New function,
split out from...
(create_iv): ...here and generalized to gimple_seqs.
* tree-vect-loop.cc (vectorizable_induction): Use
standard_iv_increment_position and insert_iv_increment
to insert the IV increment.

gcc/testsuite/
PR tree-optimization/110449
* gcc.target/aarch64/pr110449.c: Expect an increment by 8.0,
but test that there is no MOV.
---
 gcc/testsuite/gcc.target/aarch64/pr110449.c | 25 +
 gcc/tree-ssa-loop-manip.cc  | 62 -
 gcc/tree-ssa-loop-manip.h   |  1 +
 gcc/tree-vect-loop.cc   |  6 +-
 4 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/pr110449.c 
b/gcc/testsuite/gcc.target/aarch64/pr110449.c
index bb3b6dcfe08..51ca3f4b816 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr110449.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr110449.c
@@ -1,8 +1,10 @@
 /* { dg-do compile } */
 /* { dg-options "-Ofast -mcpu=neoverse-n2 --param aarch64-vect-unroll-limit=2" 
} */
-/* { dg-final { scan-assembler-not "8.0e\\+0" } } */
+/* { dg-final { scan-assembler {, #?8.0e\+0} } } */
+/* { dg-final { scan-assembler-not {\tmov\tv} } } */
 
-/* Calcualte the vectorized induction with smaller step for an unrolled loop.
+/* Insert the induction IV updates before the exit condition, rather than
+   at the start of the loop body.
 
before (suggested_unroll_factor=2):
  fmovs30, 8.0e+0
@@ -19,15 +21,16 @@
  bne .L6
 
after:
- fmovs31, 4.0e+0
- dup v29.4s, v31.s[0]
- .L6:
- faddv30.4s, v31.4s, v29.4s
- stp q31, q30, [x0]
- add x0, x0, 32
- faddv31.4s, v29.4s, v30.4s
- cmp x0, x1
- bne .L6  */
+ fmovs31, 8.0e+0
+ fmovs29, 4.0e+0
+ dup v31.4s, v31.s[0]
+ dup v29.4s, v29.s[0]
+ .L2:
+ faddv30.4s, v0.4s, v29.4s
+ stp q0, q30, [x0], 32
+ faddv0.4s, v0.4s, v31.4s
+ cmp x1, x0
+ bne .L2  */
 
 void
 foo2 (float *arr, float freq, float step)
diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc
index 6ceb9df370b..2907fa6532d 100644
--- a/gcc/tree-ssa-loop-manip.cc
+++ b/gcc/tree-ssa-loop-manip.cc
@@ -47,6 +47,39 @@ along with GCC; see the file COPYING3.  If not see
so that we can free them all at once.  */
 static bitmap_obstack loop_renamer_obstack;
 
+/* Insert IV increment statements STMTS before or after INCR_POS;
+   AFTER selects which.  INCR_POS and AFTER can be computed using
+   standard_iv_increment_position.  */
+
+void
+insert_iv_increment (gimple_stmt_iterator *incr_pos, bool after,
+gimple_seq stmts)
+{
+  /* Prevent the increment from inheriting a bogus location if it is not put
+ immediately after a statement whose location is known.  */
+  if (after)
+{
+  gimple_stmt_iterator gsi = *incr_pos;
+  if (!gsi_end_p (gsi))
+   gsi_next_nondebug (&gsi);
+  if (gsi_end_p (gsi))
+   {
+ edge e = single_succ_edge (gsi_bb (*incr_pos));
+ gimple_seq_set_location (stmts, e->

Re: [PATCH] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

2025-02-06 Thread Surya Kumari Jangala



On 31/01/25 8:24 am, Michael Meissner wrote:
> Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
> 
> In bug PR target/118541 on power9, power10, and power11 systems, for the
> function:
> 
> extern double __ieee754_acos (double);
> 
> double
> __acospi (double x)
> {
>   double ret = __ieee754_acos (x) / 3.14;
>   return __builtin_isgreater (ret, 1.0) ? 1.0 : ret;
> }
> 
> GCC currently generates the following code:
> 
> Power9  Power10 and Power11
> ==  ===
> bl __ieee754_acos   bl __ieee754_acos@notoc
> nop plfd 0,.LC0@pcrel
> addis 9,2,.LC2@toc@ha   xxspltidp 12,1065353216
> addi 1,1,32 addi 1,1,32
> lfd 0,.LC2@toc@l(9) ld 0,16(1)
> addis 9,2,.LC0@toc@ha   fdiv 0,1,0
> ld 0,16(1)  mtlr 0
> lfd 12,.LC0@toc@l(9)xscmpgtdp 1,0,12
> fdiv 0,1,0  xxsel 1,0,12,1
> mtlr 0  blr
> xscmpgtdp 1,0,12
> xxsel 1,0,12,1
> blr
> 
> This is because ifcvt.c optimizes the conditional floating point move to use 
> the
> XSCMPGTDP instruction.
> 
> However, the XSCMPGTDP instruction will generate an interrupt if one of the
> arguments is a signalling NaN and signalling NaNs can generate an interrupt.
> The IEEE comparison functions (isgreater, etc.) require that the comparison 
> not
> raise an interrupt.

Hi Mike,
Both ordered and unordered compare instructions set the VXSNAN bit. The 
difference 
is that if either operand is an SNaN and the VE bit is 0, then the ordered 
instruction sets the VXVC bit.
The ordered and unordered compare instructions also differ in how QNaN is 
handled.
If either operand is a QNaN, the VXVC exception bit is set by the ordered 
instructions.

Regards,
Surya


Re: The COBOL front end, in 8 notes + toplevel patch

2025-02-06 Thread Matthias Klose

On 06.02.25 00:36, Robert Dubner wrote:

-Original Message-
From: Matthias Klose 
Sent: Tuesday, December 17, 2024 04:26
To: Joseph Myers 
Cc: gcc-patches@gcc.gnu.org; James K. Lowden 
Subject: Re: The COBOL front end, in 8 notes + toplevel patch

On 17.12.24 00:58, Joseph Myers wrote:

On Mon, 16 Dec 2024, Matthias Klose wrote:


On 14.12.24 15:38, Matthias Klose wrote:

I tried to use the patches to build binary packages for Debian.
Found some
issues:


tried to build libgcobol on more architectures, please find the
attached patch to disable building libgcobol on some architectures.


Enabling on x86_64-*-linux* and disabling on i?86-*-linux* is
inherently suspect since the difference between those is only about
what the default multilib is, and says nothing about the multilib used
for a particular compilation of libgcobol.  (Of course we first need
to fix multilib support for libgcobol if that isn't working correctly
- all target libraries in GCC need proper multilib support.)



why is this suspect?  looking at libtsan and libhwasan, these are only
built for the 64bit variant, not for 32 and x32.

Matthias


Matthias, we've put your suggested configure patches into place.  And now I
am exploring exactly the issue you mention here.  The -m32 compilation on a
x86_64 platform is failing, because the -m32 header files don't know
anything about the __int128 declarations we're using.

I've been looking, and I'll continue to look, but if you can point me in the
direction of suppressing libgcobol compilations for -m32 and -m32x, you
might save me a lot of reverse engineering.


have a look at libsanitizer/configure.tgt. The check for
  test x$ac_cv_sizeof_void_p = x8
should work in a similar way.

Matthias



Re: [pushed][PATCH] LoongArch: Fix ICE caused by illegal calls to builtin functions [PR118561].

2025-02-06 Thread Lulu Cheng

Pushed to r14-11275 and r15-7386.

在 2025/1/23 上午11:44, Lulu Cheng 写道:

PR target/118561

gcc/ChangeLog:

* config/loongarch/loongarch-builtins.cc
(loongarch_expand_builtin_lsx_test_branch):
NULL_RTX will not be returned when an error is detected.
(loongarch_expand_builtin): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/pr118561.c: New test.

---
  gcc/config/loongarch/loongarch-builtins.cc| 7 +--
  gcc/testsuite/gcc.target/loongarch/pr118561.c | 9 +
  2 files changed, 14 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/pr118561.c

diff --git a/gcc/config/loongarch/loongarch-builtins.cc 
b/gcc/config/loongarch/loongarch-builtins.cc
index 92d995a916a..1849b35357c 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -2996,7 +2996,10 @@ loongarch_expand_builtin_lsx_test_branch (enum insn_code 
icode, tree exp)
  ops[1].value = force_reg (ops[1].mode, ops[1].value);
  
if ((cbranch = maybe_gen_insn (icode, 3, ops)) == NULL_RTX)

-error ("failed to expand built-in function");
+{
+  error ("failed to expand built-in function");
+  return const0_rtx;
+}
  
cmp_result = gen_reg_rtx (SImode);
  
@@ -3036,7 +3039,7 @@ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,

  {
error_at (EXPR_LOCATION (exp),
"built-in function %qD is not enabled", fndecl);
-  return target;
+  return target ? target : const0_rtx;
  }
  
switch (d->builtin_type)

diff --git a/gcc/testsuite/gcc.target/loongarch/pr118561.c 
b/gcc/testsuite/gcc.target/loongarch/pr118561.c
new file mode 100644
index 000..81a776eada3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr118561.c
@@ -0,0 +1,9 @@
+/* PR target/118561: ICE with -mfpu=none */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=loongarch64 -mfpu=none" } */
+
+int
+test (void)
+{
+  return __builtin_loongarch_movfcsr2gr (0); /* { dg-error "built-in function 
'__builtin_loongarch_movfcsr2gr' is not enabled" } */
+}




[patch,avr] Add support for a Compact Vector Table

2025-02-06 Thread Georg-Johann Lay

Some AVR devices support a Compact Vector Table.

The support is provided by means of a startup code file
crt-cvt.o from AVR-LibC that can be linked instead
of the traditional crt.o.

This patch adds a new command line option -mcvt that links
that CVT startup code (or issues an error when the device
doesn't support a CVT).

Ok for trunk?

Johann

--

AVR: Add support for a Compact Vector Table (CVT).

Some AVR devices support a CVT:

-  Devices from the 0-series, 1-series, 2-series.
-  AVR16, AVR32, AVR64, AVR128 devices.

The support is provided by means of a startup code file
crt-cvt.o from AVR-LibC that can be linked instead
of the traditional crt.o.

This patch adds a new command line option -mcvt that links
that CVT startup code (or issues an error when the device
doesn't support a CVT).

PR target/118764
gcc/
* config/avr/avr.opt (-mcvt): New target option.
* config/avr/avr-arch.h (AVR_CVT): New enum value.
* config/avr/avr-mcus.def: Add AVR_CVT flag for devices that
support it.
* config/avr/avr.cc (avr_handle_isr_attribute) [TARGET_CVT]: Issue
an error when a vector number larger that 3 is used.
* config/avr/gen-avr-mmcu-specs.cc (McuInfo.have_cvt): New property.
(print_mcu) <*avrlibc_startfile>: Use crt-cvt.o depending
on -mcvt (or issue an error when the device doesn't support a CVT).
* doc/invoke.texi (AVR Options): Document -mcvt.AVR: Add support for a Compact Vector Table (CVT).

Some AVR devices support a CVT:

-  Devices from the 0-series, 1-series, 2-series.
-  AVR16, AVR32, AVR64, AVR128 devices.

The support is provided by means of a startup code file
crt-cvt.o from AVR-LibC that can be linked instead
of the traditional crt.o.

This patch adds a new command line option -mcvt that links
that CVT startup code (or issues an error when the device
doesn't support a CVT).

PR target/118764
gcc/
* config/avr/avr.opt (-mcvt): New target option.
* config/avr/avr-arch.h (AVR_CVT): New enum value.
* config/avr/avr-mcus.def: Add AVR_CVT flag for devices that
support it.
* config/avr/avr.cc (avr_handle_isr_attribute) [TARGET_CVT]: Issue
an error when a vector number larger that 3 is used.
* config/avr/gen-avr-mmcu-specs.cc (McuInfo.have_cvt): New property.
(print_mcu) <*avrlibc_startfile>: Use crt-cvt.o depending
on -mcvt (or issue an error when the device doesn't support a CVT).
* doc/invoke.texi (AVR Options): Document -mcvt.

diff --git a/gcc/config/avr/avr-arch.h b/gcc/config/avr/avr-arch.h
index b5b3606c0d1..efd7b146574 100644
--- a/gcc/config/avr/avr-arch.h
+++ b/gcc/config/avr/avr-arch.h
@@ -158,6 +158,14 @@ AVR_ERRATA_SKIP
http://www.atmel.com/dyn/resources/prod_documents/doc2494.pdf
http://www.atmel.com/dyn/resources/prod_documents/doc1436.pdf
 
+AVR_CVT
+  The device supports a CVT (Compact Vector Table) which can be selected
+  with -mcvt, which links startup-code crt-cvt.o instead of the
+  usual crt.o.  This assumes that AVR-LibC implements Issue #1010.
+https://github.com/avrdudes/avr-libc/issues/1010
+  crt-cvt.o also pulls in __do_cvt_init from lib.a which sets
+  bit CPUINT_CTRLA.CPUINT_CVT in order to activate the CVT.
+
 AVR_ISA_RCALL
   Always use RJMP / RCALL and assume JMP / CALL are not available.
   This affects multilib selection via specs generation and -mshort-calls.
@@ -198,14 +206,16 @@ AVR_ISA_FLMAP
 enum avr_device_specific_features
 {
   AVR_ISA_NONE,
-  AVR_ISA_RMW = 0x1, /* device has RMW instructions. */
+  AVR_CVT = 0x1, /* Device supports a "Compact Vector Table" (-mcvt)
+			as configured in field CPUINT_CTRLA.CPUINT_CVT. */
   AVR_SHORT_SP= 0x2, /* Stack Pointer has 8 bits width. */
-  AVR_ERRATA_SKIP = 0x4, /* device has a core erratum. */
-  AVR_ISA_LDS = 0x8, /* whether LDS / STS is valid for all data in static
-			storage.  Only useful for reduced Tiny.	 */
-  AVR_ISA_RCALL	  = 0x10, /* Use RJMP / RCALL even though JMP / CALL
+  AVR_ERRATA_SKIP = 0x4, /* Device has a core erratum. */
+  AVR_ISA_RMW = 0x8, /* Device has RMW instructions. */
+  AVR_ISA_LDS = 0x10, /* Whether LDS / STS is valid for all data in static
+			 storage.  Only useful for reduced Tiny.	 */
+  AVR_ISA_RCALL	  = 0x20, /* Use RJMP / RCALL even though JMP / CALL
 			 are available (-mshort-calls).	 */
-  AVR_ISA_FLMAP	  = 0x20  /* Has NVMCTRL_CTRLB.FLMAP to select which 32 KiB
+  AVR_ISA_FLMAP	  = 0x40  /* Has NVMCTRL_CTRLB.FLMAP to select which 32 KiB
 			 block of program memory is visible in the RAM
 			 address space.	 */
 };
diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def
index b717749b67a..9f79a9a4579 100644
--- a/gcc/config/avr/avr-mcus.def
+++ b/gcc/config/avr/avr-mcus.def
@@ -49,7 +49,7 @@
 	ARCH	Specifies the 

[PATCH] late-combine: Tighten register class check [PR108840]

2025-02-06 Thread Richard Sandiford
gcc.target/aarch64/pr108840.c has failed since r15-268-g9dbff9c05520
(which means that I really ought to have looked at it earlier).

The test wants us to fold an SImode AND into all shifts that use it.
This is something that late-combine is supposed to do, but:

(1) the pre-RA pass chickened out because of a register pressure check

(2) the post-RA pass can't handle it, because the shift uses are in
QImode and the sets are in SImode

Both are things that would be good to fix.  But (1) is particularly
silly.  The constraints on the shift have "rk" for the destination
(so allowing the stack pointer) and "r" for the first source.
Including the stack pointer made the destination seem more permissive
than the source.

The intention was instead to check whether there are any
*allocatable* registers in the destination class that aren't
present in the source.

That's enough for all tests but the last one.  The last one still
fails because combine merges the final shift with the move into
the hard return register, giving an arithmetic instruction with
a hard register destination.  Pre-RA late-combine currently punts
on those, again due to register pressure concerns.  That too is
something I'd like to relax, but not for GCC 15.  In the interim,
the best thing seems to be to disable combine for the test.

Boostrapped & regression-tested on aarch64-linux-gnu and
x86_64-linux-gnu.  OK to install?

Richard


gcc/
PR rtl-optimization/108840
* late-combine.cc (late_combine::check_register_pressure):
Take only allocatable registers into account when checking
the permissiveness of register classes.

gcc/testsuite/
PR rtl-optimization/108840
* gcc.target/aarch64/pr108840.c: Run at -O2 but disable combine.
---
 gcc/late-combine.cc | 10 --
 gcc/testsuite/gcc.target/aarch64/pr108840.c |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/gcc/late-combine.cc b/gcc/late-combine.cc
index 1707ceebd5f..90d7ef09583 100644
--- a/gcc/late-combine.cc
+++ b/gcc/late-combine.cc
@@ -552,8 +552,14 @@ late_combine::check_register_pressure (insn_info *insn, 
rtx set)
  // Make sure that the source operand's class is at least as
  // permissive as the destination operand's class.
  auto src_class = alternative_class (alt, i);
- if (!reg_class_subset_p (dest_class, src_class))
-   return false;
+ if (dest_class != src_class)
+   {
+ auto extra_dest_regs = (reg_class_contents[dest_class]
+ & ~reg_class_contents[src_class]
+ & ~fixed_reg_set);
+ if (!hard_reg_set_empty_p (extra_dest_regs))
+   return false;
+   }
 
  // Make sure that the source operand occupies no more hard
  // registers than the destination operand.  This mostly matters
diff --git a/gcc/testsuite/gcc.target/aarch64/pr108840.c 
b/gcc/testsuite/gcc.target/aarch64/pr108840.c
index 804c1cd9156..7e1ea6fa4fe 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr108840.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr108840.c
@@ -1,6 +1,6 @@
 /* PR target/108840.  Check that the explicit &31 is eliminated.  */
 /* { dg-do compile } */
-/* { dg-options "-O" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fdisable-rtl-combine" } */
 
 int
 foo (int x, int y)
-- 
2.25.1



Re: [PATCH]middle-end: Remove unused internal function after IVopts cleanup [PR118756]

2025-02-06 Thread Richard Biener
On Thu, 6 Feb 2025, Tamar Christina wrote:

> Hi All,
> 
> It seems that after my IVopts patches the function contain_complex_addr_expr
> became unused and clang is rightfully complaining about it.
> 
> This removes the unused internal function.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

OK.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/19202
>   * tree-ssa-loop-ivopts.cc (contain_complex_addr_expr): Remove.
> 
> ---
> diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
> index 
> 989321137df93349f8395a9f746f73eda52fd74f..e37b24062f730e490ae86bd9248e61102ad1a0f0
>  100644
> --- a/gcc/tree-ssa-loop-ivopts.cc
> +++ b/gcc/tree-ssa-loop-ivopts.cc
> @@ -1149,34 +1149,6 @@ determine_base_object (struct ivopts_data *data, tree 
> expr)
>return obj;
>  }
>  
> -/* Return true if address expression with non-DECL_P operand appears
> -   in EXPR.  */
> -
> -static bool
> -contain_complex_addr_expr (tree expr)
> -{
> -  bool res = false;
> -
> -  STRIP_NOPS (expr);
> -  switch (TREE_CODE (expr))
> -{
> -case POINTER_PLUS_EXPR:
> -case PLUS_EXPR:
> -case MINUS_EXPR:
> -  res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
> -  res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
> -  break;
> -
> -case ADDR_EXPR:
> -  return (!DECL_P (TREE_OPERAND (expr, 0)));
> -
> -default:
> -  return false;
> -}
> -
> -  return res;
> -}
> -
>  /* Allocates an induction variable with given initial value BASE and step 
> STEP
> for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
>  
> 
> 
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] vect: Move induction IV increments [PR110449]

2025-02-06 Thread Richard Biener
On Thu, 6 Feb 2025, Richard Sandiford wrote:

> In this PR, we used to generate:
> 
>  .L6:
> mov v30.16b, v31.16b
> faddv31.4s, v31.4s, v27.4s
> faddv29.4s, v30.4s, v28.4s
> stp q30, q29, [x0]
> add x0, x0, 32
> cmp x1, x0
> bne .L6
> 
> for an unrolled induction in:
> 
>   for (int i = 0; i < 1024; i++)
> {
>   arr[i] = freq;
>   freq += step;
> }
> 
> with the problem being the unnecessary MOV.
> 
> The main induction IV was incremented by VF * step == 2 * nunits * step,
> and then nunits * step was added for the second store to arr.
> 
> The original patch for the PR (r14-2367-g224fd59b2dc8) avoided the MOV
> by incrementing the IV by nunits * step twice.  The problem with that
> approach is that it doubles the loop-carried latency.  This change was
> deliberately not preserved when moving from loop-vect to SLP and so
> the test started failing again after r15-3509-gd34cda720988.
> 
> I think the main problem is that we put the IV increment in the wrong
> place.  Normal IVs created by create_iv are placed before the exit
> condition where possible, but vectorizable_induction instead always
> inserted them at the start of the loop body.  The only use of the
> incremented IV is by the phi node, so the effect is to make both
> the old and new IV values live for the whole loop body, which is
> why we need the MOV.
> 
> The simplest fix therefore seems to be to reuse the create_iv logic.
> 
> Bootstrapped & regression-tested on aarch64-linus-gnu and
> x86_64-linux-gnu.  As Richi suggested, I also tested vect.exp
> using unix/-mavx512f/--param=vect-partial-vector-usage=2.
> OK to install?

OK.

Thanks,
Richard.

> Richard
> 
> 
> gcc/
>   PR tree-optimization/110449
>   * tree-ssa-loop-manip.h (insert_iv_increment): Declare.
>   * tree-ssa-loop-manip.cc (insert_iv_increment): New function,
>   split out from...
>   (create_iv): ...here and generalized to gimple_seqs.
>   * tree-vect-loop.cc (vectorizable_induction): Use
>   standard_iv_increment_position and insert_iv_increment
>   to insert the IV increment.
> 
> gcc/testsuite/
>   PR tree-optimization/110449
>   * gcc.target/aarch64/pr110449.c: Expect an increment by 8.0,
>   but test that there is no MOV.
> ---
>  gcc/testsuite/gcc.target/aarch64/pr110449.c | 25 +
>  gcc/tree-ssa-loop-manip.cc  | 62 -
>  gcc/tree-ssa-loop-manip.h   |  1 +
>  gcc/tree-vect-loop.cc   |  6 +-
>  4 files changed, 57 insertions(+), 37 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr110449.c 
> b/gcc/testsuite/gcc.target/aarch64/pr110449.c
> index bb3b6dcfe08..51ca3f4b816 100644
> --- a/gcc/testsuite/gcc.target/aarch64/pr110449.c
> +++ b/gcc/testsuite/gcc.target/aarch64/pr110449.c
> @@ -1,8 +1,10 @@
>  /* { dg-do compile } */
>  /* { dg-options "-Ofast -mcpu=neoverse-n2 --param 
> aarch64-vect-unroll-limit=2" } */
> -/* { dg-final { scan-assembler-not "8.0e\\+0" } } */
> +/* { dg-final { scan-assembler {, #?8.0e\+0} } } */
> +/* { dg-final { scan-assembler-not {\tmov\tv} } } */
>  
> -/* Calcualte the vectorized induction with smaller step for an unrolled loop.
> +/* Insert the induction IV updates before the exit condition, rather than
> +   at the start of the loop body.
>  
> before (suggested_unroll_factor=2):
> fmovs30, 8.0e+0
> @@ -19,15 +21,16 @@
> bne .L6
>  
> after:
> -   fmovs31, 4.0e+0
> -   dup v29.4s, v31.s[0]
> - .L6:
> -   faddv30.4s, v31.4s, v29.4s
> -   stp q31, q30, [x0]
> -   add x0, x0, 32
> -   faddv31.4s, v29.4s, v30.4s
> -   cmp x0, x1
> -   bne .L6  */
> +   fmovs31, 8.0e+0
> +   fmovs29, 4.0e+0
> +   dup v31.4s, v31.s[0]
> +   dup v29.4s, v29.s[0]
> + .L2:
> +   faddv30.4s, v0.4s, v29.4s
> +   stp q0, q30, [x0], 32
> +   faddv0.4s, v0.4s, v31.4s
> +   cmp x1, x0
> +   bne .L2  */
>  
>  void
>  foo2 (float *arr, float freq, float step)
> diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc
> index 6ceb9df370b..2907fa6532d 100644
> --- a/gcc/tree-ssa-loop-manip.cc
> +++ b/gcc/tree-ssa-loop-manip.cc
> @@ -47,6 +47,39 @@ along with GCC; see the file COPYING3.  If not see
> so that we can free them all at once.  */
>  static bitmap_obstack loop_renamer_obstack;
>  
> +/* Insert IV increment statements STMTS before or after INCR_POS;
> +   AFTER selects which.  INCR_POS and AFTER can be computed using
> +   standard_iv_increment_position.  */
> +
> +void
> +insert_iv_increment (gimple_stmt_iterator *incr_pos, bool after,
> +  gimple_seq stmts)
> +{
> +  /* Prevent the increment from inheriting a bogus location if it is not put
> + immediately after a statement whose location is known.  */
> +  if (after)

Re: [RFC][libgcc][PATCH] Fix two issues in libgcc/unwind-dw2-fde.c.

2025-02-06 Thread Richard Biener
On Wed, Feb 5, 2025 at 4:40 PM Qing Zhao  wrote:
>
>
>
> > On Feb 5, 2025, at 07:46, Richard Biener  wrote:
> >
> > On Tue, Feb 4, 2025 at 10:12 PM Qing Zhao  wrote:
> >>
> >> Hi,
> >>
> >> One of our big application segv in libgcc code while unwinding the stack.
> >>
> >> This is a random crash while the application throws a c++ exception and
> >> unwinds the stack. Incidents are random and never can be reproduced by any
> >> test case.
> >>
> >> The libgcc that is used is based on GCC 8.
> >>
> >> Fortunately, we got some information through the stack trace, and narrowed
> >> down the crash is in the routine "init_object" of libgcc/unwind-dw2-pde.c:
> >>
> >> 777   count = classify_object_over_fdes (ob, ob->u.single);
> >>
> >> when loading the 2nd parameter of the call to "classify_object_over_fdes".
> >> i.e, the address that is pointed by "ob->u.single" is invalid.
> >>
> >> And the outer caller we can track back is the following line 1066 of the 
> >> routine
> >> "_Unwind_Find_FDE":
> >>
> >> 1060   /* Classify and search the objects we've not yet processed.  */
> >> 1061   while ((ob = unseen_objects))
> >> 1062 {
> >> 1063   struct object **p;
> >> 1064
> >> 1065   unseen_objects = ob->next;
> >> 1066   f = search_object (ob, pc);
> >>
> >> Then we suspected that the construction of the static variable 
> >> "unseen_objects"
> >> might have some bug.
> >>
> >> Then after studying the source code that constructs "unseen_objects" in
> >> libgcc/unwind-dw2-pde.c, I found an issue in the routines 
> >> "__register_frame"
> >> and "__register_frame_table" as following:
> >>
> >> 127 void
> >> 128 __register_frame (void *begin)
> >> 129 {
> >> 130   struct object *ob;
> >> 131
> >> 132   /* If .eh_frame is empty, don't register at all.  */
> >> 133   if (*(uword *) begin == 0)
> >> 134 return;
> >> 135
> >> 136   ob = malloc (sizeof (struct object));
> >> 137   __register_frame_info (begin, ob);
> >> 138 }
> >>
> >> 181 void
> >> 182 __register_frame_table (void *begin)
> >> 183 {
> >> 184   struct object *ob = malloc (sizeof (struct object));
> >> 185   __register_frame_info_table (begin, ob);
> >> 186 }
> >> 187
> >>
> >> In the above, one obvious issue in the source code is at line 136, line 
> >> 137,
> >> and line 184 and line 185: the return value of call to "malloc" is not 
> >> checked
> >> against NULL before it was passed as the second parameter of the follwoing 
> >> call.
> >>
> >> This might cause unpredicted random behavior.
> >>
> >> I checked the latest trunk GCC libgcc, the same issue is there too.
> >>
> >> This patch is for the latest trunk GCC.
> >>
> >> Please let me know any comments on this?
> >
> > I think I've seen this elsewhere —
>
> Do you mean that you saw this problem (malloc return NULL during 
> register_frame) previously?

It was probably reported to us (SUSE) from a customer/partner, also
from a (llvm?) JIT context.

> > the issue is the unwind register API does
> > not allow for failures but I also think calling abort() is bad.
>
> Okay, I see.
>
> >
> > Are you calling this from a JIT context or so?
>
> I will ask the bug submitter on this to make sure.
>
> > We're assuming that at program
> > start malloc() will not fail.
>
> So, the routine __register_frame is only called at the _start_ of a program?
> >
> > The proper solution is probably to add an alternate ABI which has a way to 
> > fail
> > during registration.
>
> Any suggestion on this (or any other routine I can refer to?)
>
>
> Thanks a lot for the help.
>
> Qing
> >
> > Richard.
> >
> >> thanks.
> >>
> >> Qing
> >>
> >>
> >> ===
> >>
> >> Fix two issues in libgcc/unwind-dw2-fde.c:
> >>
> >> A. The returned value of call to malloc is not checked against NULL.
> >>   This might cause unpredicted behavior during stack unwinding.
> >>
> >> B. Check for null begin parameter (as well as pointer to null) in
> >>   __register_frame_info_table_bases and __register_frame_table.
> >>
> >> libgcc/ChangeLog:
> >>
> >>* unwind-dw2-fde.c (__register_frame): Check the return value of 
> >> call
> >>to malloc.
> >>(__register_frame_info_table_bases): Check for null begin parameter.
> >>(__register_frame_table): Check the return value of call to malloc,
> >>Check for null begin parameter.
> >> ---
> >> libgcc/unwind-dw2-fde.c | 12 
> >> 1 file changed, 12 insertions(+)
> >>
> >> diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c
> >> index cdfd3974c99..f0bc29d682a 100644
> >> --- a/libgcc/unwind-dw2-fde.c
> >> +++ b/libgcc/unwind-dw2-fde.c
> >> @@ -169,6 +169,8 @@ __register_frame (void *begin)
> >> return;
> >>
> >>   ob = malloc (sizeof (struct object));
> >> +  if (ob == NULL)
> >> +abort ();
> >>   __register_frame_info (begin, ob);
> >> }
> >>
> >> @@ -180,6 +182,10 @@ void
> >> __register_frame_info_table_bases (void *begin, struct object *ob,
> >>   void *tbase, void 

[PATCH] c++: Don't use CLEANUP_EH_ONLY for new expression cleanup [PR118763]

2025-02-06 Thread Jakub Jelinek
Hi!

The following testcase is miscompiled since r12-6325 stopped
preevaluating the initializers for new expression.
If evaluating the initializers throws, there is a correct cleanup
for that, but it is marked CLEANUP_EH_ONLY.  While in standard
C++ that is just fine, if it has statement expressions, it can
return or goto out of the expression and we should delete the
pointer in that case too.

There is already a sentry variable initialized to true and
set to false after everything is initialized and used as a guard
for the cleanup, so just removing the CLEANUP_EH_ONLY flag does
everything we need.  And in the normal case of the initializer
not using statement expressions at least with -O2 we get the same code,
while the change changes one
try { sentry = true; ... sentry = false; } catch { if (sentry) delete ...; }
into 
try { sentry = true; ... sentry = false; } finally { if (sentry) delete ...; }
optimizations will see that sentry is false when reaching the finally
other than through an exception.

Though, wonder what other CLEANUP_EH_ONLY cleanups might be an issue
with statement expressions.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-02-06  Jakub Jelinek  

PR c++/118763
* init.cc (build_new_1): Don't set CLEANUP_EH_ONLY.

* g++.dg/asan/pr118763.C: New test.

--- gcc/cp/init.cc.jj   2025-02-04 21:54:35.102087948 +0100
+++ gcc/cp/init.cc  2025-02-06 12:25:17.624810169 +0100
@@ -3842,7 +3842,6 @@ build_new_1 (vec **placemen
  tree end, sentry, begin;
 
  begin = get_target_expr (boolean_true_node);
- CLEANUP_EH_ONLY (begin) = 1;
 
  sentry = TARGET_EXPR_SLOT (begin);
 
--- gcc/testsuite/g++.dg/asan/pr118763.C.jj 2025-02-06 12:23:50.724022482 
+0100
+++ gcc/testsuite/g++.dg/asan/pr118763.C2025-02-06 12:23:29.407319860 
+0100
@@ -0,0 +1,15 @@
+// PR c++/118763
+// { dg-do run }
+
+int *
+foo (bool x)
+{
+  return new int (({ if (x) return nullptr; 1; }));
+}
+
+int
+main ()
+{
+  delete foo (true);
+  delete foo (false);
+}

Jakub



[PATCH] c++: Use cplus_decl_attributes rather than decl_attributes in grokdecl [PR118773]

2025-02-06 Thread Jakub Jelinek
Hi!

My r15-3046 change regressed the first half of the following testcase.
When it calls decl_attributes, it doesn't handle attributes with
dependent arguments correctly and so is now rejected that N is not
a constant integer during template parsing.

I've actually followed the pointer/reference case which did that
too and that one has been failing for a couple of years on the
second part of the testcase.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Note, there is also
  if (decl_context != PARM && decl_context != TYPENAME)
/* Assume that any attributes that get applied late to
   templates will DTRT when applied to the declaration
   as a whole.  */
late_attrs = splice_template_attributes (&attrs, type);
  returned_attrs = decl_attributes (&type,
attr_chainon (returned_attrs,
  attrs),
attr_flags);
  returned_attrs = attr_chainon (late_attrs, returned_attrs);
call directly to decl_attributes in grokdeclarator, but this one handles
the splicing manually, so maybe it is ok as is (and I don't have a testcase
of anything misbehaving for that).

2025-02-06  Jakub Jelinek  

PR c++/118773
* decl.cc (grokdeclarator): Use cplus_decl_attributes rather than
decl_attributes for std_attributes on pointer and array types.

* g++.dg/cpp0x/gen-attrs-87.C: New test.
* g++.dg/gomp/attrs-3.C: Adjust expected diagnostics.

--- gcc/cp/decl.cc.jj   2025-02-06 09:01:25.600721993 +0100
+++ gcc/cp/decl.cc  2025-02-06 15:59:20.707070240 +0100
@@ -13846,7 +13846,7 @@ grokdeclarator (const cp_declarator *dec
 
   The optional attribute-specifier-seq appertains to the
   array type.  */
-   decl_attributes (&type, declarator->std_attributes, 0);
+   cplus_decl_attributes (&type, declarator->std_attributes, 0);
  break;
 
case cdk_function:
@@ -14522,8 +14522,7 @@ grokdeclarator (const cp_declarator *dec
 [the optional attribute-specifier-seq (7.6.1) appertains
  to the pointer and not to the object pointed to].  */
  if (declarator->std_attributes)
-   decl_attributes (&type, declarator->std_attributes,
-0);
+   cplus_decl_attributes (&type, declarator->std_attributes, 0);
 
  ctype = NULL_TREE;
  break;
--- gcc/testsuite/g++.dg/cpp0x/gen-attrs-87.C.jj2025-02-06 
16:14:27.247387432 +0100
+++ gcc/testsuite/g++.dg/cpp0x/gen-attrs-87.C   2025-02-06 16:13:57.413804728 
+0100
@@ -0,0 +1,10 @@
+// PR c++/118773
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+template 
+using T = char[4] [[gnu::aligned (N)]];
+T<2> t;
+template 
+using U = char *[[gnu::aligned (N)]]*;
+U<__alignof (char *)> u;
--- gcc/testsuite/g++.dg/gomp/attrs-3.C.jj  2024-08-30 16:09:01.230290254 
+0200
+++ gcc/testsuite/g++.dg/gomp/attrs-3.C 2025-02-06 19:23:02.331719653 +0100
@@ -32,10 +32,10 @@ foo ()
 i++;
   auto a = [] () [[omp::directive (threadprivate (t1))]] {};   // { dg-error 
"'omp::directive' not allowed to be specified in this context" }
   int [[omp::directive (threadprivate (t2))]] b;   // { dg-warning 
"attribute ignored" }
-  int *[[omp::directive (threadprivate (t3))]] c;  // { dg-warning 
"'omp::directive' scoped attribute directive ignored" }
-  int &[[omp::directive (threadprivate (t4))]] d = b;  // { dg-warning 
"'omp::directive' scoped attribute directive ignored" }
+  int *[[omp::directive (threadprivate (t3))]] c;  // { dg-error 
"'omp::directive' not allowed to be specified in this context" }
+  int &[[omp::directive (threadprivate (t4))]] d = b;  // { dg-error 
"'omp::directive' not allowed to be specified in this context" }
   typedef int T [[omp::directive (threadprivate (t5))]];   // { dg-error 
"'omp::directive' not allowed to be specified in this context" }
   int e [[omp::directive (threadprivate (t6))]] [10];  // { dg-error 
"'omp::directive' not allowed to be specified in this context" }
-  int f[10] [[omp::directive (threadprivate (t6))]];   // { dg-warning 
"'omp::directive' scoped attribute directive ignored" }
+  int f[10] [[omp::directive (threadprivate (t6))]];   // { dg-error 
"'omp::directive' not allowed to be specified in this context" }
   struct [[omp::directive (threadprivate (t7))]] S {}; // { dg-error 
"'omp::directive' not allowed to be specified in this context" }
 }

Jakub



[PATCH v2 3/5][STAGE1] ctf: translate annotation DIEs to internal ctf

2025-02-06 Thread David Faust
Translate DW_TAG_GNU_annotation DIEs created for C attributes
btf_decl_tag and btf_type_tag into an in-memory representation in the
CTF/BTF container.  They will be output in BTF as BTF_KIND_DECL_TAG and
BTF_KIND_TYPE_TAG records.

The new CTF kinds used to represent these annotations, CTF_K_DECL_TAG
and CTF_K_TYPE_TAG, are expected to be formalized in the next version of
the CTF specification.  For now they only exist in memory as a
translation step to BTF, and are not emitted when generating CTF
information.

gcc/
* ctfc.cc (ctf_dtu_d_union_selector): Handle CTF_K_DECL_TAG and
CTF_K_TYPE_TAG.
(ctf_add_type_tag, ctf_add_decl_tag): New.
(ctf_add_variable): Return the new ctf_dvdef_ref rather than zero.
(new_ctf_container): Initialize new members.
(ctfc_delete_container): Deallocate new members.
* ctfc.h (ctf_dvdef, ctf_dvdef_t, ctf_dvdef_ref): Move forward
declarations earlier in file.
(ctf_decl_tag_t): New typedef.
(ctf_dtdef): Add ctf_decl_tag_t member to dtd_u union.
(ctf_dtu_d_union_enum): Add new CTF_DTU_D_TAG enumerator.
(ctf_container): Add ctfc_tags vector and ctfc_type_tags_map hash_map
members.
(ctf_add_type_tag, ctf_add_decl_tag): New function protos.
(ctf_add_variable): Change prototype return type to ctf_dvdef_ref.
* dwarf2ctf.cc (gen_ctf_type_tags, gen_ctf_decl_tags)
(gen_ctf_decl_tags_for_var): New static functions.
(gen_ctf_sou_type): Handle decl tags.
(gen_ctf_function_type): Likewise.
(gen_ctf_variable): Likewise.
(gen_ctf_function): Likewise.
(gen_ctf_type): Handle type tags.

gcc/testsuite
* gcc.dg/debug/ctf/ctf-decl-tag-1.c: New test.
* gcc.dg/debug/ctf/ctf-type-tag-1.c: New test.

include/
* ctf.h (CTF_K_DECL_TAG, CTF_K_TYPE_TAG): New defines.
---
 gcc/ctfc.cc   |  70 +++-
 gcc/ctfc.h|  41 -
 gcc/dwarf2ctf.cc  | 152 +-
 .../gcc.dg/debug/ctf/ctf-decl-tag-1.c |  31 
 .../gcc.dg/debug/ctf/ctf-type-tag-1.c |  19 +++
 include/ctf.h |   4 +
 6 files changed, 303 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-decl-tag-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-type-tag-1.c

diff --git a/gcc/ctfc.cc b/gcc/ctfc.cc
index 51511d69baa..c478fa0a136 100644
--- a/gcc/ctfc.cc
+++ b/gcc/ctfc.cc
@@ -107,6 +107,9 @@ ctf_dtu_d_union_selector (ctf_dtdef_ref ctftype)
   return CTF_DTU_D_ARGUMENTS;
 case CTF_K_SLICE:
   return CTF_DTU_D_SLICE;
+case CTF_K_DECL_TAG:
+case CTF_K_TYPE_TAG:
+  return CTF_DTU_D_TAG;
 default:
   /* The largest member as default.  */
   return CTF_DTU_D_ARRAY;
@@ -445,6 +448,58 @@ ctf_add_reftype (ctf_container_ref ctfc, uint32_t flag, 
ctf_dtdef_ref ref,
   return dtd;
 }
 
+ctf_dtdef_ref
+ctf_add_type_tag (ctf_container_ref ctfc, uint32_t flag, const char *value,
+ ctf_dtdef_ref ref_dtd)
+{
+  ctf_dtdef_ref dtd;
+   /* Create a DTD for the tag, but do not place it in the regular types list;
+  CTF format does not (yet) encode tags.  */
+  dtd = ggc_cleared_alloc ();
+
+  dtd->dtd_name = ctf_add_string (ctfc, value, &(dtd->dtd_data.ctti_name),
+ CTF_AUX_STRTAB);
+  /* A single DW_TAG_GNU_annotation DIE may be referenced by multiple DIEs,
+ e.g. when multiple distinct types specify the same type tag.  We will
+ synthesize multiple CTF DTD records in that case, so we cannot tie them
+ all to the same key (the DW_TAG_GNU_annotation DIE) in ctfc_types.  */
+  dtd->dtd_key = NULL;
+  dtd->ref_type = ref_dtd;
+  dtd->dtd_data.ctti_info = CTF_TYPE_INFO (CTF_K_TYPE_TAG, flag, 0);
+  dtd->dtd_u.dtu_tag.ref_var = NULL; /* Not used for type tags.  */
+  dtd->dtd_u.dtu_tag.component_idx = 0; /* Not used for type tags.  */
+
+  /* Insert tag directly into the tag list.  Type ID will be assigned later.  
*/
+  vec_safe_push (ctfc->ctfc_tags, dtd);
+  return dtd;
+}
+
+ctf_dtdef_ref
+ctf_add_decl_tag (ctf_container_ref ctfc, uint32_t flag, const char *value,
+ ctf_dtdef_ref ref_dtd, uint32_t comp_idx)
+{
+   ctf_dtdef_ref dtd;
+   /* Create a DTD for the tag, but do not place it in the regular types list;
+  ctf format does not (yet) encode tags.  */
+  dtd = ggc_cleared_alloc ();
+
+  dtd->dtd_name = ctf_add_string (ctfc, value, &(dtd->dtd_data.ctti_name),
+ CTF_AUX_STRTAB);
+  /* A single DW_TAG_GNU_annotation DIE may be referenced by multiple DIEs,
+ e.g. when multiple distinct declarations specify the same decl tag.
+ We will synthesize multiple CTF DTD records in that case, so we cannot tie
+ them all to the same key (the DW_TAG_GNU_annotation DIE) in ctfc_types.  
*/
+  dtd->dtd_key = NULL;
+  dtd->ref_type 

Re: [PATCH] RISC-V: Move UNSPEC_SSP_SET and UNSPEC_SSP_TEST to correct enum

2025-02-06 Thread Jeff Law




On 2/6/25 9:30 AM, Craig Blackmore wrote:

stack_protect_{set,test}_ were showing up in RTL dumps as
UNSPEC_COPYSIGN and UNSPEC_FMV_X_W due to UNSPEC_SSP_SET and
UNSPEC_SSP_TEST being put in the unspecv enum instead of unspec.

gcc/ChangeLog:

* config/riscv/riscv.md: Move UNSPEC_SSP_SET and UNSPEC_SSP_TEST
to unspec enum.

Thanks.  Pushed to the trunk.
jeff



[PATCH v2 4/5][STAGE1] btf: generate and output DECL_TAG and TYPE_TAG records

2025-02-06 Thread David Faust
Support the btf_decl_tag and btf_type_tag attributes in BTF by creating
and emitting BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG records,
respectively, for them.

Some care is required when -gprune-btf is in effect to avoid emitting
decl or type tags for declarations or types which have been pruned and
will not be emitted in BTF.

gcc/
* btfout.cc (get_btf_kind): Handle DECL_TAG and TYPE_TAG kinds.
(btf_calc_num_vbytes): Likewise.
(btf_asm_type): Likewise.
(output_asm_btf_vlen_bytes): Likewise.
(output_btf_tags): New.
(btf_output): Call it here.
(btf_add_used_type): Replace with simple wrapper around...
(btf_add_used_type_1): ...the implementation.  Handle
BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG.
(btf_add_vars): Update btf_add_used_type call.
(btf_assign_tag_ids): New.
(btf_mark_type_used): Update btf_add_used_type call.
(btf_collect_pruned_types): Likewise.  Handle type and decl tags.
(btf_finish): Call btf_assign_tag_ids.

gcc/testsuite/
* gcc.dg/debug/btf/btf-decl-tag-1.c: New test.
* gcc.dg/debug/btf/btf-decl-tag-2.c: New test.
* gcc.dg/debug/btf/btf-decl-tag-3.c: New test.
* gcc.dg/debug/btf/btf-decl-tag-4.c: New test.
* gcc.dg/debug/btf/btf-type-tag-1.c: New test.
* gcc.dg/debug/btf/btf-type-tag-2.c: New test.
* gcc.dg/debug/btf/btf-type-tag-3.c: New test.
* gcc.dg/debug/btf/btf-type-tag-4.c: New test.
* gcc.dg/debug/btf/btf-type-tag-5.c: New test.
* gcc.dg/debug/btf/btf-type-tag-6.c: New test.
* gcc.dg/debug/btf/btf-type-tag-c2x-1.c: New test.

include/
* btf.h (BTF_KIND_DECL_TAG, BTF_KIND_TYPE_TAG) New defines.
(struct btf_decl_tag): New.
---
 gcc/btfout.cc | 171 +++---
 .../gcc.dg/debug/btf/btf-decl-tag-1.c |  14 ++
 .../gcc.dg/debug/btf/btf-decl-tag-2.c |  22 +++
 .../gcc.dg/debug/btf/btf-decl-tag-3.c |  22 +++
 .../gcc.dg/debug/btf/btf-decl-tag-4.c |  34 
 .../gcc.dg/debug/btf/btf-type-tag-1.c |  27 +++
 .../gcc.dg/debug/btf/btf-type-tag-2.c |  15 ++
 .../gcc.dg/debug/btf/btf-type-tag-3.c |  21 +++
 .../gcc.dg/debug/btf/btf-type-tag-4.c |  25 +++
 .../gcc.dg/debug/btf/btf-type-tag-5.c |  35 
 .../gcc.dg/debug/btf/btf-type-tag-6.c |  15 ++
 .../gcc.dg/debug/btf/btf-type-tag-c2x-1.c |  23 +++
 include/btf.h |  14 ++
 13 files changed, 414 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-decl-tag-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-decl-tag-2.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-decl-tag-3.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-decl-tag-4.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-2.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-3.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-4.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-5.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-6.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-c2x-1.c

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index ff7ea42a961..c00e0c98015 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -141,6 +141,8 @@ get_btf_kind (uint32_t ctf_kind)
 case CTF_K_VOLATILE: return BTF_KIND_VOLATILE;
 case CTF_K_CONST:return BTF_KIND_CONST;
 case CTF_K_RESTRICT: return BTF_KIND_RESTRICT;
+case CTF_K_DECL_TAG: return BTF_KIND_DECL_TAG;
+case CTF_K_TYPE_TAG: return BTF_KIND_TYPE_TAG;
 default:;
 }
   return BTF_KIND_UNKN;
@@ -217,6 +219,7 @@ btf_calc_num_vbytes (ctf_dtdef_ref dtd)
 case BTF_KIND_CONST:
 case BTF_KIND_RESTRICT:
 case BTF_KIND_FUNC:
+case BTF_KIND_TYPE_TAG:
 /* These kinds have no vlen data.  */
   break;
 
@@ -256,6 +259,10 @@ btf_calc_num_vbytes (ctf_dtdef_ref dtd)
   vlen_bytes += vlen * sizeof (struct btf_var_secinfo);
   break;
 
+case BTF_KIND_DECL_TAG:
+  vlen_bytes += sizeof (struct btf_decl_tag);
+  break;
+
 default:
   break;
 }
@@ -452,6 +459,20 @@ btf_asm_type (ctf_dtdef_ref dtd)
 and should write 0.  */
   dw2_asm_output_data (4, 0, "(unused)");
   return;
+case BTF_KIND_DECL_TAG:
+  {
+   if (dtd->ref_type)
+ break;
+   else if (dtd->dtd_u.dtu_tag.ref_var)
+ {
+   /* ref_type is NULL for decl tag attached to a variable.  */
+   ctf_dvdef_ref dvd = dtd->dtd_u.dtu_tag.ref_var;
+   dw2_asm_output_data (4, dvd->dvd_id,
+"btt_type: (BTF_KIND_VAR '%s')",
+dvd->dvd_name);
+   return;
+ }
+  }
 default:
   break;
 }
@@ -801,6 +82

Re: [PATCH v2] c++: Reject cdtors and conversion operators with a single * as return type [PR118306]

2025-02-06 Thread Simon Martin
Hi Jason,

On 6 Feb 2025, at 16:48, Jason Merrill wrote:

> On 2/5/25 2:21 PM, Simon Martin wrote:
>> Hi Jason,
>>
>> On 4 Feb 2025, at 21:23, Jason Merrill wrote:
>>
>>> On 2/4/25 3:03 PM, Jason Merrill wrote:
 On 2/4/25 11:45 AM, Simon Martin wrote:
> On 4 Feb 2025, at 17:17, Jason Merrill wrote:
>
>> On 2/4/25 10:56 AM, Simon Martin wrote:
>>> Hi Jason,
>>>
>>> On 4 Feb 2025, at 16:39, Jason Merrill wrote:
>>>
 On 1/15/25 9:56 AM, Jason Merrill wrote:
> On 1/15/25 7:24 AM, Simon Martin wrote:
>> Hi,
>>
>> On 14 Jan 2025, at 23:31, Jason Merrill wrote:
>>
>>> On 1/14/25 2:13 PM, Simon Martin wrote:
 On 10 Jan 2025, at 19:10, Andrew Pinski wrote:
> On Fri, Jan 10, 2025 at 3:18 AM Simon Martin
> 
> wrote:
>>
>> We currently accept the following invalid code (EDG and
>> MSVC
>> do
>> as
>> well)
>
> clang does too:
> https://github.com/llvm/llvm-project/issues/121706 .
>>
>
> Note it might be useful if a testcase with multiply `*` is
> included

> too:
> ```
> struct A {
>        A ();
> };
> ```
 Thanks, makes sense to add those. Done in the attached
 updated
 revision,
 successfully tested on x86_64-pc-linux-gnu.
>>>
 +/* Check that it's OK to declare a function at ID_LOC with
 the
 indicated TYPE,
 +   TYPE_QUALS and DECLARATOR.  SFK indicates the kind 
 of
 special
 function (if
 +   any) that this function is.  OPTYPE is the type 
 given
 in
 a
 conversion
     operator declaration, or the class type for a
 constructor/destructor.
     Returns the actual return type of the 
 function;
 that
 may
 be
 different
     than TYPE if an error occurs, or for certain
 special
 functions.
 */
 @@ -12361,8 +12362,19 @@ check_special_function_return_type
 (special_function_kind sfk,
      tree type,
      tree optype,
      int 
 type_quals,
 +    const cp_declarator
 *declarator,
 +    location_t id_loc,
>>>
>>> id_loc should be the same as declarator->id_loc?
>> You’re right.
>>
      const
 location_t*
 locations)
      {
 +  /* If TYPE is unspecified, DECLARATOR, if set, should 
 not
 represent a pointer
 + or a reference type.  */
 +  if (type == NULL_TREE
 +  && declarator
 +  && (declarator->kind == cdk_pointer
 +  || declarator->kind == cdk_reference))
 +    error_at (id_loc, "expected unqualified-id before 
 %qs
 token",
 +  declarator->kind == cdk_pointer ? "*" :
 "&");
>>>
>>> ...and id_loc isn't the location of the ptr-operator, it's 
>>> the
>>
>>> location of the identifier, so this indicates the wrong
>>> column.
>>> I
>>> think using declarator->id_loc makes sense, just not
>>> pretending
>>> it's
>>> the location of the *.
>> Good catch, thanks.
>>
>>> Let's give diagnostics more like the others later in the
>>> function
>>> instead of trying to emulate cp_parser_error.
>> Makes sense. This is what the updated patch does, 
>> successfully
>> tested on
>> x86_64-pc-linux-gnu. OK for GCC 16?
>
> OK.

 Does this also fix 118304?  If so, let's go ahead and apply it
 to
 GCC
 15.
>>> I have checked just now, and we still ICE for 118304’s 
>>> testcase
>>> with
>>> that fix.
>>
>> Why doesn't the preeexisting
>>
>> type = void_type_node;
>>
>> in check_special_function_return_type fix the return type and 
>> avoid
>>
>> the ICE?

> We hit the gcc_assert at method.cc:3593, that Marek’s fix
>>
> bypasses.

 Yes, but why doesn't check_special_function_return_type prevent 
 that?
>>>
>>> Ah, because we call it before walking the declarator.  We need to
>>> check again later, perhaps in grokfndecl, that the type is correct.
>>> Perhaps instead 

[committed] [RISC-V] Fix risc-v expected test output after recent iv changes

2025-02-06 Thread Jeff Law
Richard S's recent change to iv increment insertion removed a reg->reg 
move (which was its intent AFAICT).  This triggered a failure on a riscv 
test.


That test was meant to verify that we didn't have an extraneous reg->reg 
move due to a buglet in the risc-v splitters.  Before the 2023 change we 
had two vector reg->reg moves and after the 2023 fix we had just one. 
With Richard's change we have none ;-)  Adjusting test accordingly.


Pushed to the trunk.

Jeff

commit 33e610110c933b0d65aa82d67864bb881768609f
Author: Jeff Law 
Date:   Thu Feb 6 12:37:11 2025 -0700

[RISC-V] Fix risc-v expected test output after recent iv changes

Richard S's recent change to iv increment insertion removed a reg->reg move
(which was its intent AFAICT).  This triggered a failure on a riscv test.

That test was meant to verify that we didn't have an extraneous reg->reg 
move
due to a buglet in the risc-v splitters.  Before the 2023 change we had two
vector reg->reg moves and after the 2023 fix we had just one.  With 
Richard's
change we have none ;-)  Adjusting test accordingly.

Pushed to the trunk.

gcc/testsuite
* gcc.target/riscv/rvv/autovec/madd-split2-1.c: Update expected
output.

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/madd-split2-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/madd-split2-1.c
index 4f99a5f87c4..8cc0c9f407c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/madd-split2-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/madd-split2-1.c
@@ -10,4 +10,4 @@ foo (long *__restrict a, long *__restrict b, long n)
   return a[1];
 }
 
-/* { dg-final { scan-assembler-times {\tvmv1r\.v} 1 } } */
+/* { dg-final { scan-assembler-not {\tvmv1r\.v} } } */


[PATCH v2 5/5][STAGE1] doc: document btf_type_tag and btf_decl_tag attributes

2025-02-06 Thread David Faust
gcc/
* doc/extend.texi (Common Variable Attributes): Document
btf_decl_tag attribute.
(Common Type Attributes): Document btf_type_tag attribute.
---
 gcc/doc/extend.texi | 68 +
 1 file changed, 68 insertions(+)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 2764597a479..4536cfd7c68 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -8037,6 +8037,41 @@ align them on any target.
 The @code{aligned} attribute can also be used for functions
 (@pxref{Common Function Attributes}.)
 
+@cindex @code{btf_decl_tag} variable attribute
+@item btf_decl_tag (@var{argument})
+The @code{btf_decl_tag} attribute may be used to associate variable
+declarations, struct or union member declarations, function
+declarations, and function parameter declarations with arbitrary strings.
+These strings are not interpreted by the compiler in any way, and have
+no effect on code generation.  Instead, these user-provided strings
+are recorded in DWARF (via @code{DW_AT_GNU_annotation} and
+@code{DW_TAG_GNU_annotation} extensions) and BTF information (via
+@code{BTF_KIND_DECL_TAG} records), and associated to the attributed
+declaration.  If neither DWARF nor BTF information is generated, the
+attribute has no effect.
+
+The argument is treated as an ordinary string in the source language
+with no additional special rules.
+
+The attribute may be supplied multiple times for a single declaration,
+in which case each distinct argument string will be recorded in a
+separate DIE or BTF record, each associated to the declaration.  For
+a single declaration with multiple @code{btf_decl_tag} attributes,
+the order of the @code{DW_TAG_GNU_annotation} DIEs produced is not
+guaranteed to maintain the order of attributes in the source code.
+
+For example:
+
+@smallexample
+int *foo __attribute__ ((btf_decl_tag ("__percpu")));
+@end smallexample
+
+@noindent
+when compiled with @code{-gbtf} results in an additional
+@code{BTF_KIND_DECL_TAG} BTF record to be emitted in the BTF info,
+associating the string ``__percpu'' with the normal @code{BTF_KIND_VAR}
+record for the variable ``foo''.
+
 @cindex @code{counted_by} variable attribute
 @item counted_by (@var{count})
 The @code{counted_by} attribute may be attached to the C99 flexible array
@@ -9226,6 +9261,39 @@ is given by the product of arguments 1 and 2, and that
 @code{malloc_type}, like the standard C function @code{malloc},
 returns an object whose size is given by argument 1 to the function.
 
+@cindex @code{btf_type_tag} type attribute
+@item btf_type_tag (@var{argument})
+The @code{btf_type_tag} attribute may be used to associate (to ``tag'')
+particular types with arbitrary string annotations.  These annotations
+are recorded in debugging info by supported debug formats, currently
+DWARF (via @code{DW_AT_GNU_annotation} and @code{DW_TAG_GNU_annotation}
+extensions) and BTF (via @code{BTF_KIND_TYPE_TAG} records).  These
+annotation strings are not interpreted by the compiler in any way, and
+have no effect on code generation.  If neither DWARF nor BTF
+information is generated, the attribute has no effect.
+
+The argument is treated as an ordinary string in the source language
+with no additional special rules.
+
+The attribute may be supplied multiple times for a single declaration,
+in which case each distinct argument string will be recorded in a
+separate DIE or BTF record, each associated to the type.  For a single
+type with multiple @code{btf_type_tag} attributes, the order of the
+@code{DW_TAG_GNU_annotation} DIEs produced is not guaranteed to
+maintain the order of attributes in the source code.
+
+For example the following code:
+
+@smallexample
+int * __attribute__ ((btf_type_tag ("__user"))) foo;
+@end smallexample
+
+@noindent
+when compiled with @code{-gbtf} results in an additional
+@code{BTF_KIND_TYPE_TAG} BTF record to be emitted in the BTF info,
+associating the string ``__user'' with the normal @code{BTF_KIND_PTR}
+record for the pointer-to-integer type used in the declaration.
+
 @cindex @code{copy} type attribute
 @item copy
 @itemx copy (@var{expression})
-- 
2.45.2



[PATCH v2 1/5][STAGE1] c-family: add btf_type_tag and btf_decl_tag attributes

2025-02-06 Thread David Faust
Add two new c-family attributes, "btf_type_tag" and "btf_decl_tag"
along with a simple shared handler for them.

gcc/c-family/
* c-attribs.cc (c_common_attribute_table): Add btf_decl_tag and
btf_type_tag attributes.
(handle_btf_tag_attribute): New handler for both new attributes.
---
 gcc/c-family/c-attribs.cc | 25 -
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index f3181e7b57c..b51a8e755ae 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -189,6 +189,8 @@ static tree handle_fd_arg_attribute (tree *, tree, tree, 
int, bool *);
 static tree handle_flag_enum_attribute (tree *, tree, tree, int, bool *);
 static tree handle_null_terminated_string_arg_attribute (tree *, tree, tree, 
int, bool *);
 
+static tree handle_btf_tag_attribute (tree *, tree, tree, int, bool *);
+
 /* Helper to define attribute exclusions.  */
 #define ATTR_EXCL(name, function, type, variable)  \
   { name, function, type, variable }
@@ -640,7 +642,11 @@ const struct attribute_spec c_common_gnu_attributes[] =
   { "flag_enum", 0, 0, false, true, false, false,
  handle_flag_enum_attribute, NULL },
   { "null_terminated_string_arg", 1, 1, false, true, true, false,
- handle_null_terminated_string_arg_attribute, NULL}
+ handle_null_terminated_string_arg_attribute, 
NULL},
+  { "btf_type_tag",  1, 1, false, true, false, false,
+ handle_btf_tag_attribute, NULL},
+  { "btf_decl_tag",  1, 1, true, false, false, false,
+ handle_btf_tag_attribute, NULL}
 };
 
 const struct scoped_attribute_specs c_common_gnu_attribute_table =
@@ -5099,6 +5105,23 @@ handle_null_terminated_string_arg_attribute (tree *node, 
tree name, tree args,
   return NULL_TREE;
 }
 
+/* Handle the "btf_decl_tag" and "btf_type_tag" attributes.  */
+
+static tree
+handle_btf_tag_attribute (tree * ARG_UNUSED (node), tree name, tree args,
+ int ARG_UNUSED (flags), bool *no_add_attrs)
+{
+  if (!args)
+*no_add_attrs = true;
+  else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
+{
+  error ("%qE attribute requires a string", name);
+  *no_add_attrs = true;
+}
+
+  return NULL_TREE;
+}
+
 /* Handle the "nonstring" variable attribute.  */
 
 static tree
-- 
2.45.2



[PATCH v2 0/5][STAGE1] Add btf_decl_tag and btf_type_tag C attributes

2025-02-06 Thread David Faust
[v1: https://gcc.gnu.org/pipermail/gcc-patches/2024-October/666911.html
 Changes from v1:
 - Fix a bug in v1 related to generating DWARF for type tags applied to
   struct or union types, especially if the type had multiple type tags
   or was also part of a typedef.
 - Simplified the dwarf2ctf translation of types having both cv-qualifiers
   and type tags applied to them.
 - Add a few new tests.
 - Address review comments from v1.  ]

This patch series adds support for the btf_decl_tag and btf_type_tag attributes
to GCC. This entails:

- Two new C-family attributes that allow to associate (to "tag") particular
  declarations and types with arbitrary strings. As explained below, this is
  intended to be used to, for example, characterize certain pointer types.  A
  single declaration or type may have multiple occurrences of these attributes.

- The conveyance of that information in the DWARF output in the form of a new
  DIE: DW_TAG_GNU_annotation, and a new attribute: DW_AT_GNU_annotation.

- The conveyance of that information in the BTF output in the form of two new
  kinds of BTF objects: BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG. These BTF
  kinds are already supported by LLVM and other tools in the BPF ecosystem.

Both of these attributes are already supported by clang, and beginning to be
used in various ways by BPF users and inside the Linux kernel.

It is worth noting that while the Linux kernel and BPF/BTF is the motivating use
case of this feature, the format of the new DWARF extension is generic.  This
work could be easily adapted to provide a general way for program authors to
annotate types and declarations with arbitrary information for any
post-compilation analysis needs, not just the Linux kernel BPF verifier.  For
example, these annotations could be used to aid in ABI analysis.

Purpose
===

1)  Addition of C-family language constructs (attributes) to specify free-text
tags on certain language elements, such as struct fields.

The purpose of these annotations is to provide additional information about
types, variables, and function parameters of interest to the kernel. A
driving use case is to tag pointer types within the Linux kernel and BPF
programs with additional semantic information, such as '__user' or '__rcu'.

For example, consider the Linux kernel function do_execve with the
following declaration:

  static int do_execve(struct filename *filename,
 const char __user *const __user *__argv,
 const char __user *const __user *__envp);

Here, __user could be defined with these annotations to record semantic
information about the pointer parameters (e.g., they are user-provided) in
DWARF and BTF information. Other kernel facilities such as the BPF verifier
can read the tags and make use of the information.

2)  Conveying the tags in the generated DWARF debug info.

The main motivation for emitting the tags in DWARF is that the Linux kernel
generates its BTF information via pahole, using DWARF as a source:

++  BTF  BTF   +--+
| pahole |---> vmlinux.btf --->| verifier |
++ +--+
^^
||
  DWARF |BTF |
||
 vmlinux  +-+
 module1.ko   | BPF program |
 module2.ko   +-+
   ...

This is because:

a)  Unlike GCC, LLVM will only generate BTF for BPF programs.

b)  GCC can generate BTF for whatever target with -gbtf, but there is no
support for linking/deduplicating BTF in the linker.

c)  pahole injects additional BTF information based on specific knowledge
of kernel objects which is not available to the compiler.

In the scenario above, the verifier needs access to the pointer tags of
both the kernel types/declarations (conveyed in the DWARF and translated
to BTF by pahole) and those of the BPF program (available directly in BTF).

Another motivation for having the tag information in DWARF, unrelated to
BPF and BTF, is that the drgn project (another DWARF consumer) also wants
to benefit from these tags in order to differentiate between different
kinds of pointers in the kernel.

3)  Conveying the tags in the generated BTF debug info.

This is easy: the main purpose of having this info in BTF is for the
compiled BPF programs. The kernel verifier can then access the tags
of pointers used by the BPF programs.

For more information about these tags and the motivation behind them, please
refer to the following Linux kernel discussions: [1], [2], [3].

DWARF Representation


Compared to prior iterations of this 

[PATCH v2 2/5][STAGE1] dwarf: create annotation DIEs for btf tags

2025-02-06 Thread David Faust
The btf_decl_tag and btf_type_tag attributes provide a means to annotate
declarations and types respectively with arbitrary user provided
strings.  These strings are recorded in debug information for
post-compilation uses, and despite the name they are meant to be
recorded in DWARF as well as BTF.  New DWARF extensions
DW_TAG_GNU_annotation and DW_AT_GNU_annotation are used to represent
these user annotations in DWARF.

This patch introduces the new DWARF extension DIE and attribute, and
generates them as necessary to represent user annotations from
btf_decl_tag and btf_type_tag.

The format of the new DIE is as follows:

DW_TAG_GNU_annotation
DW_AT_name: "btf_decl_tag" or "btf_type_tag"
DW_AT_const_value: 
DW_AT_GNU_annotation: 

DW_AT_GNU_annotation is a new attribute extension used to refer to these
new annotation DIEs.  If non-null in any given declaration or type DIE,
it is a reference to a DW_TAG_GNU_annotation DIE holding an annotation
for that declaration or type.  In addition, the DW_TAG_GNU_annotation
DIEs may also have a non-null DW_AT_GNU_annotation, referring to another
annotation DIE.  This allows chains of annotation DIEs to be formed,
such as in the case where a single declaration has multiple instances of
btf_decl_tag with different string annotations.

gcc/
* dwarf2out.cc (struct annotation_node, struct annotation_node_hasher)
(btf_tag_htab): New ancillary structures and hash table.
(annotation_node_hasher::hash, annotation_node_hasher::equal): New.
(hash_btf_tag, gen_btf_tag_dies, gen_btf_type_tag_dies)
(maybe_gen_btf_type_tag_dies, gen_btf_decl_tag_dies): New functions.
(modified_type_die): Handle btf_type_tag attribute.
(gen_formal_parameter_die): Call gen_btf_decl_tags for the parameter.
(gen_decl_die): Call gen_btf_decl_tags for the decl.
(gen_tagged_type_die): Call maybe_gen_btf_type_tag_dies for the type.
(dwarf2out_early_finish): Empty btf_tag_htab hash table.
(dwarf2out_cc_finalize): Delete btf_tag_htab hash table.

include/
* dwarf2.def (DW_TAG_GNU_annotation): New DWARF extension.
(DW_AT_GNU_annotation): Likewise.

gcc/testsuite/
* gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-1.c: New test.
* gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-2.c: New test.
* gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-3.c: New test.
* gcc.dg/debug/dwarf2/dwarf-btf-type-tag-1.c: New test.
* gcc.dg/debug/dwarf2/dwarf-btf-type-tag-2.c: New test.
* gcc.dg/debug/dwarf2/dwarf-btf-type-tag-3.c: New test.
---
 gcc/dwarf2out.cc  | 275 +-
 .../debug/dwarf2/dwarf-btf-decl-tag-1.c   |  11 +
 .../debug/dwarf2/dwarf-btf-decl-tag-2.c   |  25 ++
 .../debug/dwarf2/dwarf-btf-decl-tag-3.c   |  21 ++
 .../debug/dwarf2/dwarf-btf-type-tag-1.c   |  10 +
 .../debug/dwarf2/dwarf-btf-type-tag-2.c   |  31 ++
 .../debug/dwarf2/dwarf-btf-type-tag-3.c   |  15 +
 include/dwarf2.def|   4 +
 8 files changed, 385 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-2.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-3.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-type-tag-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-type-tag-2.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-type-tag-3.c

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 8085b8d85d8..b06fd5ccc04 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -3696,6 +3696,32 @@ static bool frame_pointer_fb_offset_valid;
 
 static vec base_types;
 
+/* A cached btf_type_tag or btf_decl_tag user annotation.  */
+struct GTY ((for_user)) annotation_node
+{
+  const char *name;
+  const char *value;
+  hashval_t hash;
+  dw_die_ref die;
+  struct annotation_node *next;
+};
+
+struct annotation_node_hasher : ggc_ptr_hash
+{
+  typedef const struct annotation_node *compare_type;
+
+  static hashval_t hash (struct annotation_node *);
+  static bool equal (const struct annotation_node *,
+const struct annotation_node *);
+};
+
+/* A hash table of tag annotation nodes for btf_type_tag and btf_decl_tag C
+   attributes.  DIEs for these user annotations may be reused if they are
+   structurally equivalent; this hash table is used to ensure the DIEs are
+   reused wherever possible.  */
+static GTY (()) hash_table *btf_tag_htab;
+
+
 /* Flags to represent a set of attribute classes for attributes that represent
a scalar value (bounds, pointers, ...).  */
 enum dw_scalar_form
@@ -13659,6 +13685,180 @@ long_double_as_float128 (tree type)
   return NULL_TREE;
 }
 
+
+hashval_t
+annotation_node_hasher::hash (struct annotation_node *node)
+{
+  return node->hash;
+}
+
+bool
+annotation_node_hasher::equal

[PATCH] [testsuite] [sparc] skip tls tests if emulated

2025-02-06 Thread Alexandre Oliva


A number of tls tests expect TLS-specific relocations, that are not
present when tls is emulated, as on e.g. leon3-elf.  Skip the tests
when tls is emulated.

Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting
leon3-elf.  Ok to install?


for  gcc/testsuite/ChangeLog

* gcc.target/sparc/tls-ld-int16.c: Skip when tls is emulated.
* gcc.target/sparc/tls-ld-int32.c: Likewise.
* gcc.target/sparc/tls-ld-int8.c: Likewise.
* gcc.target/sparc/tls-ld-uint16.c: Likewise.
* gcc.target/sparc/tls-ld-uint32.c: Likewise.
* gcc.target/sparc/tls-ld-uint8.c: Likewise.
---
 gcc/testsuite/gcc.target/sparc/tls-ld-int16.c  |1 +
 gcc/testsuite/gcc.target/sparc/tls-ld-int32.c  |1 +
 gcc/testsuite/gcc.target/sparc/tls-ld-int8.c   |1 +
 gcc/testsuite/gcc.target/sparc/tls-ld-uint16.c |1 +
 gcc/testsuite/gcc.target/sparc/tls-ld-uint32.c |1 +
 gcc/testsuite/gcc.target/sparc/tls-ld-uint8.c  |1 +
 6 files changed, 6 insertions(+)

diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-int16.c 
b/gcc/testsuite/gcc.target/sparc/tls-ld-int16.c
index d3d28086156ee..de4ce4034c6df 100644
--- a/gcc/testsuite/gcc.target/sparc/tls-ld-int16.c
+++ b/gcc/testsuite/gcc.target/sparc/tls-ld-int16.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2" }
 /* { dg-add-options tls } */
+/* { dg-skip-if "native tls expected" { tls_emulated } } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-int32.c 
b/gcc/testsuite/gcc.target/sparc/tls-ld-int32.c
index cf18147ef7273..5604c24151ac1 100644
--- a/gcc/testsuite/gcc.target/sparc/tls-ld-int32.c
+++ b/gcc/testsuite/gcc.target/sparc/tls-ld-int32.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2" }
 /* { dg-add-options tls } */
+/* { dg-skip-if "native tls expected" { tls_emulated } } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-int8.c 
b/gcc/testsuite/gcc.target/sparc/tls-ld-int8.c
index a07cffc37f8c4..17eb32ea2a107 100644
--- a/gcc/testsuite/gcc.target/sparc/tls-ld-int8.c
+++ b/gcc/testsuite/gcc.target/sparc/tls-ld-int8.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2" }
 /* { dg-add-options tls } */
+/* { dg-skip-if "native tls expected" { tls_emulated } } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-uint16.c 
b/gcc/testsuite/gcc.target/sparc/tls-ld-uint16.c
index 41ee687b28c51..81990a7bf3ca5 100644
--- a/gcc/testsuite/gcc.target/sparc/tls-ld-uint16.c
+++ b/gcc/testsuite/gcc.target/sparc/tls-ld-uint16.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2" }
 /* { dg-add-options tls } */
+/* { dg-skip-if "native tls expected" { tls_emulated } } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-uint32.c 
b/gcc/testsuite/gcc.target/sparc/tls-ld-uint32.c
index 9c7915372b9e7..60524ba87cf6a 100644
--- a/gcc/testsuite/gcc.target/sparc/tls-ld-uint32.c
+++ b/gcc/testsuite/gcc.target/sparc/tls-ld-uint32.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2" }
 /* { dg-add-options tls } */
+/* { dg-skip-if "native tls expected" { tls_emulated } } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-uint8.c 
b/gcc/testsuite/gcc.target/sparc/tls-ld-uint8.c
index 0dcff66eb15e4..7a7492197f952 100644
--- a/gcc/testsuite/gcc.target/sparc/tls-ld-uint8.c
+++ b/gcc/testsuite/gcc.target/sparc/tls-ld-uint8.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2" }
 /* { dg-add-options tls } */
+/* { dg-skip-if "native tls expected" { tls_emulated } } */
 
 #include 
 

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] [testsuite] require -Ofast for vect-ifcvt-18 even without avx

2025-02-06 Thread Alexandre Oliva
On Jan 31, 2025, Richard Biener  wrote:

> On Thu, Jan 30, 2025 at 8:49 PM Alexandre Oliva  wrote:

>> Split -Ofast out of the avx conditional, so that it is passed on the
>> same targets that expect the transformation.

> It effectively would require -Ofast on all targets (it's vectorizing a
> FP reduction),
> so I'd say -Ofast should not depend on x86 only.

IIUC we prefer not to modify testcases, and making -Ofast apply to other
targets would do just that, because the test without -Ofast stands on
its own and has been running for a while, just not with a vectorizable
FP reduction.  That was my reasoning to keep it guarded by a
conditional.  I agree it would have made sense to make it -Ofast on all
targets.

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] lra: initialize allocated_hard_reg_p[] for hard regs referenced in RTL [PR118533]

2025-02-06 Thread Vladimir Makarov



On 1/28/25 1:11 PM, Surya Kumari Jangala wrote:

Hi,
The following patch has been bootstrapped and regtested on powerpc64le-linux,
aarch64-linux and x86_64-linux. This patch is a proposed fix for PR118533.
Request you to please review the patch.

Regards
Surya


lra: initialize allocated_hard_reg_p[] for hard regs referenced in RTL 
[PR118533]

When computing the costs of the hard registers in assign_hard_reg(), the
cost of saving/restoring a callee-save register in the prolog/epilog is
taken into consideration. However, if some other pseudo has already been
assigned that hard register, then any further assignments of that hard
register don't need to be charged the save/restore cost, since it has
already been accounted for. The array allocated_hardreg_p[] specifies if
a hard register has already been allocated to an allocno. This array is
updated whenever a register is assigned. This array should also be
updated if any hard register is referenced in the RTL because any such
register usage should count as the "first" usage. A hard register
reference can occur in the RTL due to inline asm specifying a register
for a local variable, or a target’s machine pattern can explicitly
reference a hard register.


Sorry for delay with the answer. But this problem was already solved in 
January for PR118497 by the following patch:


https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=c4dae80357ccf2e035d8e9ec0a3bb319344c5b41;hp=10850f92b2a618ef1b1ad399530943ef4847823d

Also I am agree with Mike Matz we don't need additional insn scan (which 
is very costly for RTL).  The above patch avoids full RTL scanning.



2025-01-28  Surya Kumari Jangala  

gcc/
PR rtl-optimization/118533
* ira-color.cc (initialize_allocated_hardreg): New function.
(color): Initialize array.
---





Re: [patch, libfortran] PR114618 Version 2 Format produces incorrect output when contains 1x, ok when uses " "

2025-02-06 Thread Jerry D

On 1/31/25 11:30 AM, Harald Anlauf wrote:
--- snip --

So either commit the current version and track this issue in
a PR if not yet done, or have a look if there is a quick fix.

Thanks for the work!

Harald


Committed as:

commit cfed99751c1a3b93ca66451eb1b62271e682f927 (HEAD -> master, 
origin/master, origin/HEAD)

Author: Jerry DeLisle 
Date:   Wed Jan 29 13:40:59 2025 -0800

Fortran: Fix handling of the X edit descriptor.




Re: [PATCH] LoongArch: Correct the mode for mask{eq,ne}z

2025-02-06 Thread Lulu Cheng



在 2025/1/20 上午9:30, Xi Ruoyao 写道:

For mask{eq,ne}z, rk is always compared with 0 in the full width, thus
the mode for rk should be X.


LGTM!

I agree with your point of view.

Thank you.




I found the issue reviewing a patch fixing a similar issue for RISC-V
XTheadCondMov [1], but interestingly I cannot find a test case really
blowing up on LoongArch.  But as the issue is obvious enough let's fix
it anyway so it won't blow up in the future.

[1]: https://gcc.gnu.org/pipermail/gcc-patches/2025-January/674004.html

gcc/ChangeLog:

* config/loongarch/loongarch.md
(*sel_using_): Rename to ...
(*sel_using_): ... here.
(GPR2): Remove as nothing uses it now.
---

Bootstrapped & regtested on loongarch64-linux-gnu.  Ok for trunk?

  gcc/config/loongarch/loongarch.md | 10 +++---
  1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index c17d2928fbf..10197b9d9d5 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -374,10 +374,6 @@ (define_asm_attributes
  ;; from the same template.
  (define_mode_iterator GPR [SI (DI "TARGET_64BIT")])
  
-;; A copy of GPR that can be used when a pattern has two independent

-;; modes.
-(define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")])
-
  ;; This mode iterator allows 16-bit and 32-bit GPR patterns and 32-bit 64-bit
  ;; FPR patterns to be generated from the same template.
  (define_mode_iterator JOIN_MODE [HI
@@ -2507,11 +2503,11 @@ (define_expand "cstore4"
  
  ;; Conditional move instructions.
  
-(define_insn "*sel_using_"

+(define_insn "*sel_using_"
[(set (match_operand:GPR 0 "register_operand" "=r,r")
(if_then_else:GPR
-(equality_op:GPR2 (match_operand:GPR2 1 "register_operand" "r,r")
-  (const_int 0))
+(equality_op:X (match_operand:X 1 "register_operand" "r,r")
+   (const_int 0))
 (match_operand:GPR 2 "reg_or_0_operand" "r,J")
 (match_operand:GPR 3 "reg_or_0_operand" "J,r")))]
"register_operand (operands[2], mode)




Re: [RFC][libgcc][PATCH] Fix two issues in libgcc/unwind-dw2-fde.c.

2025-02-06 Thread Qing Zhao


> On Feb 6, 2025, at 04:36, Richard Biener  wrote:
> 
> On Wed, Feb 5, 2025 at 4:40 PM Qing Zhao  wrote:
>> 
>> 
>> 
>>> On Feb 5, 2025, at 07:46, Richard Biener  wrote:
>>> 
>>> On Tue, Feb 4, 2025 at 10:12 PM Qing Zhao  wrote:
 
 Hi,
 
 One of our big application segv in libgcc code while unwinding the stack.
 
 This is a random crash while the application throws a c++ exception and
 unwinds the stack. Incidents are random and never can be reproduced by any
 test case.
 
 The libgcc that is used is based on GCC 8.
 
 Fortunately, we got some information through the stack trace, and narrowed
 down the crash is in the routine "init_object" of libgcc/unwind-dw2-pde.c:
 
 777   count = classify_object_over_fdes (ob, ob->u.single);
 
 when loading the 2nd parameter of the call to "classify_object_over_fdes".
 i.e, the address that is pointed by "ob->u.single" is invalid.
 
 And the outer caller we can track back is the following line 1066 of the 
 routine
 "_Unwind_Find_FDE":
 
 1060   /* Classify and search the objects we've not yet processed.  */
 1061   while ((ob = unseen_objects))
 1062 {
 1063   struct object **p;
 1064
 1065   unseen_objects = ob->next;
 1066   f = search_object (ob, pc);
 
 Then we suspected that the construction of the static variable 
 "unseen_objects"
 might have some bug.
 
 Then after studying the source code that constructs "unseen_objects" in
 libgcc/unwind-dw2-pde.c, I found an issue in the routines 
 "__register_frame"
 and "__register_frame_table" as following:
 
 127 void
 128 __register_frame (void *begin)
 129 {
 130   struct object *ob;
 131
 132   /* If .eh_frame is empty, don't register at all.  */
 133   if (*(uword *) begin == 0)
 134 return;
 135
 136   ob = malloc (sizeof (struct object));
 137   __register_frame_info (begin, ob);
 138 }
 
 181 void
 182 __register_frame_table (void *begin)
 183 {
 184   struct object *ob = malloc (sizeof (struct object));
 185   __register_frame_info_table (begin, ob);
 186 }
 187
 
 In the above, one obvious issue in the source code is at line 136, line 
 137,
 and line 184 and line 185: the return value of call to "malloc" is not 
 checked
 against NULL before it was passed as the second parameter of the follwoing 
 call.
 
 This might cause unpredicted random behavior.
 
 I checked the latest trunk GCC libgcc, the same issue is there too.
 
 This patch is for the latest trunk GCC.
 
 Please let me know any comments on this?
>>> 
>>> I think I've seen this elsewhere —
>> 
>> Do you mean that you saw this problem (malloc return NULL during 
>> register_frame) previously?
> 
> It was probably reported to us (SUSE) from a customer/partner, also
> from a (llvm?) JIT context.

Okay. 
So, that bug has not been resolved yet?

Qing
> 
>>> the issue is the unwind register API does
>>> not allow for failures but I also think calling abort() is bad.
>> 
>> Okay, I see.
>> 
>>> 
>>> Are you calling this from a JIT context or so?
>> 
>> I will ask the bug submitter on this to make sure.
>> 
>>> We're assuming that at program
>>> start malloc() will not fail.
>> 
>> So, the routine __register_frame is only called at the _start_ of a program?
>>> 
>>> The proper solution is probably to add an alternate ABI which has a way to 
>>> fail
>>> during registration.
>> 
>> Any suggestion on this (or any other routine I can refer to?)
>> 
>> 
>> Thanks a lot for the help.
>> 
>> Qing
>>> 
>>> Richard.
>>> 
 thanks.
 
 Qing
 
 
 ===
 
 Fix two issues in libgcc/unwind-dw2-fde.c:
 
 A. The returned value of call to malloc is not checked against NULL.
  This might cause unpredicted behavior during stack unwinding.
 
 B. Check for null begin parameter (as well as pointer to null) in
  __register_frame_info_table_bases and __register_frame_table.
 
 libgcc/ChangeLog:
 
   * unwind-dw2-fde.c (__register_frame): Check the return value of call
   to malloc.
   (__register_frame_info_table_bases): Check for null begin parameter.
   (__register_frame_table): Check the return value of call to malloc,
   Check for null begin parameter.
 ---
 libgcc/unwind-dw2-fde.c | 12 
 1 file changed, 12 insertions(+)
 
 diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c
 index cdfd3974c99..f0bc29d682a 100644
 --- a/libgcc/unwind-dw2-fde.c
 +++ b/libgcc/unwind-dw2-fde.c
 @@ -169,6 +169,8 @@ __register_frame (void *begin)
return;
 
  ob = malloc (sizeof (struct object));
 +  if (ob == NULL)
 +abort ();
  __register_frame_info (begin, ob);

Re: [PATCH] rtl-optimization/117922 - disable fold-mem-offsets for highly connected CFG

2025-02-06 Thread Jeff Law




On 2/5/25 5:29 AM, Richard Biener wrote:

The PR shows fold-mem-offsets taking ages and a lot of memory computing
DU/UD chains as that requires the RD problem.  The issue is not so much
the memory required for the pruned sets but the high CFG connectivity
(and that the CFG is cyclic) which makes solving the dataflow problem
expensive.

The following adds the same limit as the one imposed by GCSE and CPROP.

Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu, this
reduces the compile-time of the PR26854 testcase from 480s to 150s.

OK?

Thanks,
Richard.

PR rtl-optimization/117922
* fold-mem-offsets.cc (pass_fold_mem_offsets::execute):
Do nothing for a highly connected CFG.

OK
jeff



[PATCH] [testsuite] tolerate later success [PR108357]

2025-02-06 Thread Alexandre Oliva


On leon3-elf and presumably on other targets, the test fails due to
differences in calling conventions and other reasons, that add extra
gimple stmts that prevent the expected optimization at the expected
point.  The optimization takes place anyway, just a little later, so
tolerate that.

Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting
leon3-elf.  Ok to install?


for  gcc/testsuite/ChangeLog

PR tree-optimization/108357
* gcc.dg/tree-ssa/pr108357.c: Tolerate later optimization.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr108357.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr108357.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr108357.c
index 44c457b7a9777..7dff235f89278 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr108357.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr108357.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-threadfull1" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
 
 static char b;
 static unsigned c;
@@ -19,4 +19,7 @@ int main()
   f(g);
 }
 
-/* { dg-final { scan-tree-dump-not "foo" "threadfull1" } } */
+/* We expect threadfull1 to eliminate the call to foo(), but not all targets
+   manage that at that point.  Calling conventions (mandatory promotion) play a
+   role, but there's more than that.  */
+/* { dg-final { scan-tree-dump-not "foo" "optimized" } } */

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH v1 12/16] Refactor FMV name mangling.

2025-02-06 Thread Alfie Richards



Added missing CC

On 05/02/2025 17:14, Richard Sandiford wrote:

Alfie Richards  writes:

On 04/02/2025 10:03, Richard Sandiford wrote:

Alfie Richards  writes:

+    return id;
+  else if (cgraph_node::get_create 
(decl)->dispatcher_resolver_function)

+    id = clone_identifier (id, "resolver");
+  else if (DECL_FUNCTION_VERSIONED (decl))
   {
-  name += ".default";
-  return get_identifier (name.c_str());
-    }
-
-  name += "._";
+  aarch64_fmv_feature_mask feature_mask
+    = get_feature_mask_for_version (decl);
   -  int num_features = ARRAY_SIZE (aarch64_fmv_feature_data);
-  for (int i = 0; i < num_features; i++)
-    {
-  if (feature_mask & aarch64_fmv_feature_data[i].feature_mask)
+  if (feature_mask == 0ULL) // && not implemented!

Could you explain the // comment?

Apologies, this was a temporary marker for myself that slipped through.

   {
-  name += "M";
-  name += aarch64_fmv_feature_data[i].name;
+  if (!DECL_INITIAL (decl))
+    return id;
+  return clone_identifier (id, "default");

This different handling of defined vs. undefined functions feels a
bit weird.  It's not clear to me whether:

(1) The .default version is effectively internal to the translation 
unit,

  and therefore other translation units cannot assume it exists.

(2) Other translation units can assume that the .default version exists
  if they can see a target_version("default") or target_clones 
definition.


(3) Other translation units can assume that the .default version exists
  if they can see a non-default target_version or a 
target_clones definition.


(4) Something else.

Argh, sorry, I meant declaration rather than definition for both
(2) and (3).  My question doesn't make sense as written.


(2) would create a difference between implicit and explicit defaults
and doesn't seem to be what the series implements (mv-symbols6.C from
patch 14).  (3) seems indirect, and IMO it's dangerous to assume that
the mere existence of a non-default version X in one TU implies that
version X will be visible in the TU that contains the resolver.  I 
would

have expected that it would be valid for version X not to be visible
in the TU that contains the resolver, with the consequence that the
resolver won't use it.

(1) seems more appealing on the face of it, so that .default is more
like .resolver.  But that doesn't seem to be what the spec says.

I would say its (4) Something else.

This code is the result of that and a discussion we had where we 
decided we

can avoid mangling the default version symbol if it is external. As we
know that
in the TU where the default is defined, the default
version will be mangled, and the dispatched symbol will take its name.

As the default is the only resolvable version, then all calls and
references
will already have the correct target.

This allows us to avoid making the dispatched symbol and redirecting
default decl calls/references to the dispatched symbol.

That sounds like what I meant by (1): i.e. that the only TU that can
reference .default is the one that creates it (i.e. the one with the
definition).  A declaration with 
__attribute__((target_version("default"))

doesn't itself imply that a .default version exists.

The reason for asking is that the ACLE says:

   The "default" version is mangled with ".default" on top of the
   language-specific name mangling. All versioned functions with their
   mangled names are always resolvable.

which made it sound like other TUs could rely on ".default" existing.

Ahh that does help. I follow now.

My current understanding comes from following the clang implementation
and discussions regarding the spec.

But, I see your point though that this isn't what is specified.

I think a good correct solution would be that if there is
target_version ("default") then to always mangle the default. Then in the
case that there's a default and no other versions, emit a
non-mangled symbol alias to the default version?

Then we cover all cases well, follow the spec, and will be broadly 
compatible

with clang. (ie. in that case currently clang emits the default as if it
wasn't annotated, we would emit the default mangled, and include an alias
to the unmangled symbol)


It's a bit of a hack, and we can instead always mangle the default,
create the dispatched symbol? Apologies if I misunderstood the earlier
conversation.


   }
-    }
   -  if (DECL_ASSEMBLER_NAME_SET_P (decl))
-    SET_DECL_RTL (decl, NULL);
+  std::string suffix = "_";
+
+  int num_features = ARRAY_SIZE (aarch64_fmv_feature_data);
+  for (int i = 0; i < num_features; i++)
+    if (feature_mask & aarch64_fmv_feature_data[i].feature_mask)
+  {
+    suffix += "M";
+    suffix += aarch64_fmv_feature_data[i].name;
+  }
+
+  if (DECL_ASSEMBLER_NAME_SET_P (decl))
+    SET_DECL_RTL (decl, NULL);

Why is it necessary to conditionally res

[PATCH] c++: ICE with unparsed noexcept [PR117106]

2025-02-06 Thread Marek Polacek
Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
In a member-specification of a class, a noexcept-specifier is
a complete-class context.  Thus we delay parsing until the end of
the class via our DEFERRED_PARSE mechanism; see cp_parser_save_noexcept
and cp_parser_late_noexcept_specifier.

We also attempt to defer instantiation of noexcept-specifiers in order
to reduce the number of instantiations; this is done via DEFERRED_NOEXCEPT.

We can even have both, as in noexcept65.C: a DEFERRED_PARSE wrapped in
DEFERRED_NOEXCEPT, which uses the DEFPARSE_INSTANTIATIONS mechanism.
noexcept65.C works, because when we really need the noexcept, which is
when parsing the body of S::A::A(), the noexcept will have been parsed
already; noexcepts are parsed before bodies of member function.

But in this test we have:

  struct A {
  int x;
  template
  void foo() noexcept(noexcept(x)) {}
  auto bar() -> decltype(foo()) {} // #1
  };

and I think the decltype in #1 needs the unparsed noexcept before it
could have been parsed.  clang++ rejects the test and I suppose we
should reject it as well, rather than crashing on a DEFERRED_PARSE
in tsubst_expr.

PR c++/117106
PR c++/118190

gcc/cp/ChangeLog:

* pt.cc (maybe_instantiate_noexcept): Give an error if the noexcept
hasn't been parsed yet.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept89.C: New test.
* g++.dg/cpp0x/noexcept90.C: New test.
---
 gcc/cp/pt.cc| 16 +++-
 gcc/testsuite/g++.dg/cpp0x/noexcept89.C |  9 +
 gcc/testsuite/g++.dg/cpp0x/noexcept90.C | 12 
 3 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept89.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept90.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 39232b5e67f..8108bf5de65 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -27453,7 +27453,8 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t 
complain)
 {
   static hash_set* fns = new hash_set;
   bool added = false;
-  if (DEFERRED_NOEXCEPT_PATTERN (noex) == NULL_TREE)
+  tree pattern = DEFERRED_NOEXCEPT_PATTERN (noex);
+  if (pattern == NULL_TREE)
{
  spec = get_defaulted_eh_spec (fn, complain);
  if (spec == error_mark_node)
@@ -27464,13 +27465,19 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t 
complain)
   else if (!(added = !fns->add (fn)))
{
  /* If hash_set::add returns true, the element was already there.  */
- location_t loc = cp_expr_loc_or_loc (DEFERRED_NOEXCEPT_PATTERN (noex),
-   DECL_SOURCE_LOCATION (fn));
+ location_t loc = cp_expr_loc_or_loc (pattern,
+  DECL_SOURCE_LOCATION (fn));
  error_at (loc,
"exception specification of %qD depends on itself",
fn);
  spec = noexcept_false_spec;
}
+  else if (TREE_CODE (pattern) == DEFERRED_PARSE)
+   {
+ error ("exception specification of %qD is not available "
+"until end of class definition", fn);
+ spec = noexcept_false_spec;
+   }
   else if (push_tinst_level (fn))
{
  const bool push_to_top = maybe_push_to_top_level (fn);
@@ -27497,8 +27504,7 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t 
complain)
++processing_template_decl;
 
  /* Do deferred instantiation of the noexcept-specifier.  */
- noex = tsubst_expr (DEFERRED_NOEXCEPT_PATTERN (noex),
- DEFERRED_NOEXCEPT_ARGS (noex),
+ noex = tsubst_expr (pattern, DEFERRED_NOEXCEPT_ARGS (noex),
  tf_warning_or_error, fn);
  /* Build up the noexcept-specification.  */
  spec = build_noexcept_spec (noex, tf_warning_or_error);
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept89.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept89.C
new file mode 100644
index 000..308abf6fb45
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept89.C
@@ -0,0 +1,9 @@
+// PR c++/117106
+// { dg-do compile { target c++11 } }
+
+struct A {
+int x;
+template
+void foo() noexcept(noexcept(x)) {}
+auto bar() -> decltype(foo()) {} // { dg-error "not available until 
end of class" }
+};
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept90.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept90.C
new file mode 100644
index 000..6d403f66e72
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept90.C
@@ -0,0 +1,12 @@
+// PR c++/118190
+// { dg-do compile { target c++11 } }
+
+struct S {
+  template
+  struct S5 {
+void f1() noexcept(noexcept(i)) { }
+int i;
+  };
+  S5 s5;
+  static_assert (noexcept(s5.f1()), ""); // { dg-error "not available until 
end of class|static assertion failed" }
+};

base-commit: a69b728b5b9efa34d0af9f9ce0b248d05f7791b0
-- 
2.48.1


Re: [PATCH] c++: Allow constexpr reads from volatile std::nullptr_t objects [PR118661]

2025-02-06 Thread Marek Polacek
On Thu, Feb 06, 2025 at 07:32:43PM +0100, Jakub Jelinek wrote:
> Hi!
> 
> As mentioned in the PR, https://eel.is/c++draft/conv.lval#note-1
> says that even volatile reads from std::nullptr_t typed objects actually
> don't read anything and https://eel.is/c++draft/expr.const#10.9
> says that even those are ok in constant expressions.
> 
> So, the following patch adjusts the r9-4793 changes to have an exception
> for NULLPTR_TYPE.
> As [conv.lval]/3 also talks about accessing to inactive member, I've added
> testcase to cover that as well.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2025-02-06  Jakub Jelinek  
> 
>   PR c++/118661
>   * constexpr.cc (potential_constant_expression_1): Don't diagnose
>   lvalue-to-rvalue conversion of volatile lvalue if it has NULLPTR_TYPE.
>   * decl2.cc (decl_maybe_constant_var_p): Return true for constexpr
>   decls with NULLPTR_TYPE even if they are volatile.
> 
>   * g++.dg/cpp0x/constexpr-volatile4.C: New test.
>   * g++.dg/cpp0x/constexpr-union9.C: New test.
> 
> --- gcc/cp/constexpr.cc.jj2025-02-05 13:14:34.771198185 +0100
> +++ gcc/cp/constexpr.cc   2025-02-06 09:53:03.236587121 +0100
> @@ -9717,7 +9717,8 @@ potential_constant_expression_1 (tree t,
>  return true;
>  
>if (TREE_THIS_VOLATILE (t) && want_rval
> -  && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t)))
> +  && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t))
> +  && TREE_CODE (TREE_TYPE (t)) != NULLPTR_TYPE)

Patch looks good but we should use NULLPTR_TYPE_P.

>  {
>if (flags & tf_error)
>   constexpr_error (loc, fundef_p, "lvalue-to-rvalue conversion of "
> --- gcc/cp/decl2.cc.jj2025-01-27 16:45:45.455970792 +0100
> +++ gcc/cp/decl2.cc   2025-02-06 09:53:53.150890600 +0100
> @@ -4985,7 +4985,8 @@ decl_maybe_constant_var_p (tree decl)
>tree type = TREE_TYPE (decl);
>if (!VAR_P (decl))
>  return false;
> -  if (DECL_DECLARED_CONSTEXPR_P (decl) && !TREE_THIS_VOLATILE (decl))
> +  if (DECL_DECLARED_CONSTEXPR_P (decl)
> +  && (!TREE_THIS_VOLATILE (decl) || TREE_CODE (type) == NULLPTR_TYPE))
>  return true;
>if (DECL_HAS_VALUE_EXPR_P (decl))
>  /* A proxy isn't constant.  */
> --- gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C.jj   2025-02-06 
> 09:50:43.339539282 +0100
> +++ gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C  2025-02-06 
> 09:50:16.071919784 +0100
> @@ -0,0 +1,20 @@
> +// PR c++/118661
> +// { dg-do compile { target c++11 } }
> +
> +using nullptr_t = decltype (nullptr);
> +constexpr volatile nullptr_t a = {};
> +constexpr nullptr_t b = a;
> +
> +constexpr nullptr_t
> +foo ()
> +{
> +#if __cplusplus >= 201402L
> +  volatile nullptr_t c = {};
> +  return c;
> +#else
> +  return nullptr;
> +#endif
> +}
> +
> +static_assert (b == nullptr, "");
> +static_assert (foo () == nullptr, "");
> --- gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C.jj  2025-02-06 
> 09:57:46.149639270 +0100
> +++ gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C 2025-02-06 
> 10:01:08.472815988 +0100
> @@ -0,0 +1,16 @@
> +// PR c++/118661
> +// { dg-do compile { target c++11 } }
> +
> +using nullptr_t = decltype (nullptr);
> +union U { int i; nullptr_t n; };
> +constexpr U u = { 42 };
> +static_assert (u.n == nullptr, "");
> +
> +#if __cplusplus >= 201402L
> +constexpr nullptr_t
> +foo ()
> +{
> +  union U { int i; nullptr_t n; } u = { 42 };
> +  return u.n;
> +}
> +#endif
> 
>   Jakub
> 

Marek



[PATCH] c++, v2: Allow constexpr reads from volatile std::nullptr_t objects [PR118661]

2025-02-06 Thread Jakub Jelinek
On Thu, Feb 06, 2025 at 01:45:59PM -0500, Marek Polacek wrote:
> > --- gcc/cp/constexpr.cc.jj  2025-02-05 13:14:34.771198185 +0100
> > +++ gcc/cp/constexpr.cc 2025-02-06 09:53:03.236587121 +0100
> > @@ -9717,7 +9717,8 @@ potential_constant_expression_1 (tree t,
> >  return true;
> >  
> >if (TREE_THIS_VOLATILE (t) && want_rval
> > -  && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t)))
> > +  && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t))
> > +  && TREE_CODE (TREE_TYPE (t)) != NULLPTR_TYPE)
> 
> Patch looks good but we should use NULLPTR_TYPE_P.

You're right.  Here is the patch adjusted:

2025-02-06  Jakub Jelinek  

PR c++/118661
* constexpr.cc (potential_constant_expression_1): Don't diagnose
lvalue-to-rvalue conversion of volatile lvalue if it has NULLPTR_TYPE.
* decl2.cc (decl_maybe_constant_var_p): Return true for constexpr
decls with NULLPTR_TYPE even if they are volatile.

* g++.dg/cpp0x/constexpr-volatile4.C: New test.
* g++.dg/cpp0x/constexpr-union9.C: New test.

--- gcc/cp/constexpr.cc.jj  2025-02-05 13:14:34.771198185 +0100
+++ gcc/cp/constexpr.cc 2025-02-06 09:53:03.236587121 +0100
@@ -9717,7 +9717,8 @@ potential_constant_expression_1 (tree t,
 return true;
 
   if (TREE_THIS_VOLATILE (t) && want_rval
-  && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t)))
+  && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t))
+  && !NULLPTR_TYPE_P (TREE_TYPE (t)))
 {
   if (flags & tf_error)
constexpr_error (loc, fundef_p, "lvalue-to-rvalue conversion of "
--- gcc/cp/decl2.cc.jj  2025-01-27 16:45:45.455970792 +0100
+++ gcc/cp/decl2.cc 2025-02-06 09:53:53.150890600 +0100
@@ -4985,7 +4985,8 @@ decl_maybe_constant_var_p (tree decl)
   tree type = TREE_TYPE (decl);
   if (!VAR_P (decl))
 return false;
-  if (DECL_DECLARED_CONSTEXPR_P (decl) && !TREE_THIS_VOLATILE (decl))
+  if (DECL_DECLARED_CONSTEXPR_P (decl)
+  && (!TREE_THIS_VOLATILE (decl) || NULLPTR_TYPE_P (type)))
 return true;
   if (DECL_HAS_VALUE_EXPR_P (decl))
 /* A proxy isn't constant.  */
--- gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C.jj 2025-02-06 
09:50:43.339539282 +0100
+++ gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C2025-02-06 
09:50:16.071919784 +0100
@@ -0,0 +1,20 @@
+// PR c++/118661
+// { dg-do compile { target c++11 } }
+
+using nullptr_t = decltype (nullptr);
+constexpr volatile nullptr_t a = {};
+constexpr nullptr_t b = a;
+
+constexpr nullptr_t
+foo ()
+{
+#if __cplusplus >= 201402L
+  volatile nullptr_t c = {};
+  return c;
+#else
+  return nullptr;
+#endif
+}
+
+static_assert (b == nullptr, "");
+static_assert (foo () == nullptr, "");
--- gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C.jj2025-02-06 
09:57:46.149639270 +0100
+++ gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C   2025-02-06 
10:01:08.472815988 +0100
@@ -0,0 +1,16 @@
+// PR c++/118661
+// { dg-do compile { target c++11 } }
+
+using nullptr_t = decltype (nullptr);
+union U { int i; nullptr_t n; };
+constexpr U u = { 42 };
+static_assert (u.n == nullptr, "");
+
+#if __cplusplus >= 201402L
+constexpr nullptr_t
+foo ()
+{
+  union U { int i; nullptr_t n; } u = { 42 };
+  return u.n;
+}
+#endif


Jakub



Re: [PATCH] RISC-V: Fix ICE when prefetching addresses less than 12 bits for zicbop

2025-02-06 Thread Jeff Law




On 1/19/25 7:32 AM, Jin Ma wrote:

gcc/ChangeLog:

* config/riscv/riscv.md: Change 'r' to 'p'.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/prefetch-zicbop-ice.c: New test.
I think that rather than adjusting the constraint we should probably 
adjust the operand predicate.   In general we want the predicates to 
represent what the hardware can do.


Accepting "address_operand" here implies that the prefetch opcodes can 
accept any valid address for their mode.  That's not the case for 
prefetch.   In particular the low 5 bits in the offset must be zero 
according to the ISA specification.  This also implies that for an 
absolute address that the low 5 bits of that address must be zero.


We could certainly create a new predicate that allows immediate operands 
and one could construct testcases that would use it, but I don't see it 
as all that useful in practice.  So I'm inclined to go with the simpler 
solution here and only allow register indirect addresses with a zero offset.


That's how Wang Pencheng's patch works.  I still need to re-review 
Tsukasa's patch from a while back.  But my current inclination is to 
take Wang Pencheng's patch as I think it's more correct.  I'll likely 
take testcases from all the patches in this space.


Jeff



Re: C++ patch ping

2025-02-06 Thread Jason Merrill

On 1/29/25 12:27 PM, Jakub Jelinek wrote:

I'd like to ping 4 C++ patches:

https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662379.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662380.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662381.html
   P2552R3 - On the ignorability of standard attributes - series [PR110345]
   Note, all the FE changes are already in, the above are solely missing
   tests which would allow us to mark the paper as resolved


OK.

Jason



Re: [PATCH] loop-iv, riscv: Fix get_biv_step_1 for RISC-V [PR117506]

2025-02-06 Thread Jeff Law




On 2/5/25 11:11 AM, Palmer Dabbelt wrote:

On Wed, 05 Feb 2025 09:57:56 PST (-0800), ja...@redhat.com wrote:

Hi!

The following test ICEs on RISC-V at least latently since
r14-1622-g99bfdb072e67fa3fe294d86b4b2a9f686f8d9705 which added
RISC-V specific case to get_biv_step_1 to recognize also
({zero,sign}_extend:DI (plus:SI op0 op1))

The reason for the ICE is that op1 in this case is CONST_POLY_INT
which unlike the really expected VOIDmode CONST_INTs has its own
mode and still satisfies CONSTANT_P.
GET_MODE (rhs) (SImode) is different from outer_mode (DImode), so
the function later does
    *inner_step = simplify_gen_binary (code, outer_mode,
   *inner_step, op1);
but that obviously ICEs because while *inner_step is either VOIDmode
or DImode, op1 has SImode.

The following patch fixes it by extending op1 using code so that
simplify_gen_binary can handle it.  Another option would be
to change the !CONSTANT_P (op1) 3 lines above this to
!CONST_INT_P (op1), I think it isn't very likely that we get something
useful from other constants there.

Bootstrapped/regtested on x86_64-linux and i686-linux (which doesn't
tell much because such code isn't encountered there, just that the
gcc.dg test at least works), but I have no way to test this on riscv
easily.  Could somebody test it there?


The precommit CI should pick it up, as it's got "RISC-V" in the subject 
line.  I don't see it in the list yet, though.
It picked it up, but Jakub strips a component off the pathnames in his 
patches (the leading a/b) which confused the pre-commit CI tester.


Regardless I can spin it in my own.




Or do you want the
!CONST_INT_P (op1) instead?


Jeff would know way better than I do here, but I think this is just 
trying to match addiw-type patterns and thus CONST_INT would be OK 
because we only have 12-bit constants here.
My recollection of the last touch of this code was it helps us "see 
through" the extension that commonly appears in risc-v code.  It 
addressed a relatively minor regression Jivan saw when changing the 
various risc-v expanders to expose the implicit sign extension.


Jeff



[PATCH] RISC-V: Move UNSPEC_SSP_SET and UNSPEC_SSP_TEST to correct enum

2025-02-06 Thread Craig Blackmore
stack_protect_{set,test}_ were showing up in RTL dumps as
UNSPEC_COPYSIGN and UNSPEC_FMV_X_W due to UNSPEC_SSP_SET and
UNSPEC_SSP_TEST being put in the unspecv enum instead of unspec.

gcc/ChangeLog:

* config/riscv/riscv.md: Move UNSPEC_SSP_SET and UNSPEC_SSP_TEST
to unspec enum.
---
 gcc/config/riscv/riscv.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 09053df1eb9..f7070766783 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -99,6 +99,10 @@
   ;; CRC unspecs
   UNSPEC_CRC
   UNSPEC_CRC_REV
+
+  ;; Stack Smash Protector
+  UNSPEC_SSP_SET
+  UNSPEC_SSP_TEST
 ])
 
 (define_c_enum "unspecv" [
@@ -123,10 +127,6 @@
   UNSPECV_FENCE
   UNSPECV_FENCE_I
 
-  ;; Stack Smash Protector
-  UNSPEC_SSP_SET
-  UNSPEC_SSP_TEST
-
   ;; CMO instructions.
   UNSPECV_CLEAN
   UNSPECV_FLUSH
-- 
2.43.0



Re: [PATCH v3] [ifcombine] avoid creating out-of-bounds BIT_FIELD_REFs [PR118514]

2025-02-06 Thread Sam James
Richard Biener  writes:

> On Thu, Feb 6, 2025 at 2:41 PM Alexandre Oliva  wrote:
>>
>> On Jan 27, 2025, Richard Biener  wrote:
>>
>> > What I was saying that the conservative tree_could_trap_p could say
>> > 'yes' to a certain encoding of a ref but 'no' to another if in reality
>> > the ref can never trap.  We of course cannot (apart from bugs in
>> > tree_could_trap_p) turn a for-sure trap into a not-trap by simply
>> > rewriting the ref.
>>
>> I see.  Yeah, that makes sense.
>>
>> > So I think we want this bit in (and it's dependences), but
>>
>> 'k, I'll split that bit out, after some clarification:
>>
>> > (I see the assert is no longer in the patch).
>>
>> That's because the assert went in as part of an earlier patch.  I take
>> it it should be backed out along with the to-be-split-out bits above,
>> right?
>
> Yes.
>
> (IIRC there's also a PR tripping over this or a similar assert)

Right, PR118706.

>
> Richard.
>
>>
>> --
>> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>>Free Software Activist   GNU Toolchain Engineer
>> More tolerance and less prejudice are key for inclusion and diversity
>> Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] driver: -fhardened and -z lazy/-z norelro [PR117739]

2025-02-06 Thread Marek Polacek
Ping.

On Tue, Jan 21, 2025 at 11:05:46AM -0500, Marek Polacek wrote:
> Ping.
> 
> On Fri, Jan 10, 2025 at 03:07:52PM -0500, Marek Polacek wrote:
> > Ping.
> > 
> > On Fri, Dec 20, 2024 at 08:58:05AM -0500, Marek Polacek wrote:
> > > Ping.
> > > 
> > > On Tue, Nov 26, 2024 at 05:35:50PM -0500, Marek Polacek wrote:
> > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > > 
> > > > -- >8 --
> > > > As the manual states, using "-fhardened -fstack-protector" will produce
> > > > a warning because -fhardened wants to enable -fstack-protector-strong,
> > > > but it can't since it's been overriden by the weaker -fstack-protector.
> > > > 
> > > > -fhardened also attempts to enable -Wl,-z,relro,-z,now.  By the same
> > > > logic as above, "-fhardened -z norelro" or "-fhardened -z lazy" should
> > > > produce the same warning.  But we don't detect this combination, so
> > > > this patch fixes it.  I also renamed a variable to better reflect its
> > > > purpose.
> > > > 
> > > > Also don't check warn_hardened in process_command, since it's always
> > > > true there.
> > > > 
> > > > Also tweak wording in the manual as Jon Wakely suggested on IRC.
> > > > 
> > > > PR driver/117739
> > > > 
> > > > gcc/ChangeLog:
> > > > 
> > > > * doc/invoke.texi: Tweak wording for -Whardened.
> > > > * gcc.cc (driver_handle_option): If -z lazy or -z norelro was
> > > > specified, don't enable linker hardening.
> > > > (process_command): Don't check warn_hardened.
> > > > 
> > > > gcc/testsuite/ChangeLog:
> > > > 
> > > > * c-c++-common/fhardened-16.c: New test.
> > > > * c-c++-common/fhardened-17.c: New test.
> > > > * c-c++-common/fhardened-18.c: New test.
> > > > * c-c++-common/fhardened-19.c: New test.
> > > > * c-c++-common/fhardened-20.c: New test.
> > > > * c-c++-common/fhardened-21.c: New test.
> > > > ---
> > > >  gcc/doc/invoke.texi   |  4 ++--
> > > >  gcc/gcc.cc| 20 ++--
> > > >  gcc/testsuite/c-c++-common/fhardened-16.c |  5 +
> > > >  gcc/testsuite/c-c++-common/fhardened-17.c |  5 +
> > > >  gcc/testsuite/c-c++-common/fhardened-18.c |  5 +
> > > >  gcc/testsuite/c-c++-common/fhardened-19.c |  5 +
> > > >  gcc/testsuite/c-c++-common/fhardened-20.c |  5 +
> > > >  gcc/testsuite/c-c++-common/fhardened-21.c |  5 +
> > > >  8 files changed, 46 insertions(+), 8 deletions(-)
> > > >  create mode 100644 gcc/testsuite/c-c++-common/fhardened-16.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/fhardened-17.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/fhardened-18.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/fhardened-19.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/fhardened-20.c
> > > >  create mode 100644 gcc/testsuite/c-c++-common/fhardened-21.c
> > > > 
> > > > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > > > index 346ac1369b8..371f723539c 100644
> > > > --- a/gcc/doc/invoke.texi
> > > > +++ b/gcc/doc/invoke.texi
> > > > @@ -7012,8 +7012,8 @@ This warning is enabled by @option{-Wall}.
> > > >  Warn when @option{-fhardened} did not enable an option from its set 
> > > > (for
> > > >  which see @option{-fhardened}).  For instance, using 
> > > > @option{-fhardened}
> > > >  and @option{-fstack-protector} at the same time on the command line 
> > > > causes
> > > > -@option{-Whardened} to warn because @option{-fstack-protector-strong} 
> > > > is
> > > > -not enabled by @option{-fhardened}.
> > > > +@option{-Whardened} to warn because @option{-fstack-protector-strong} 
> > > > will
> > > > +not be enabled by @option{-fhardened}.
> > > >  
> > > >  This warning is enabled by default and has effect only when 
> > > > @option{-fhardened}
> > > >  is enabled.
> > > > diff --git a/gcc/gcc.cc b/gcc/gcc.cc
> > > > index 92c92996401..d2718d263bb 100644
> > > > --- a/gcc/gcc.cc
> > > > +++ b/gcc/gcc.cc
> > > > @@ -305,9 +305,10 @@ static size_t dumpdir_length = 0;
> > > > driver added to dumpdir after dumpbase or linker output name.  */
> > > >  static bool dumpdir_trailing_dash_added = false;
> > > >  
> > > > -/* True if -r, -shared, -pie, or -no-pie were specified on the command
> > > > -   line.  */
> > > > -static bool any_link_options_p;
> > > > +/* True if -r, -shared, -pie, -no-pie, -z lazy, or -z norelro were
> > > > +   specified on the command line, and therefore -fhardened should not
> > > > +   add -z now/relro.  */
> > > > +static bool avoid_linker_hardening_p;
> > > >  
> > > >  /* True if -static was specified on the command line.  */
> > > >  static bool static_p;
> > > > @@ -4434,10 +4435,17 @@ driver_handle_option (struct gcc_options *opts,
> > > > }
> > > > /* Record the part after the last comma.  */
> > > > add_infile (arg + prev, "*");
> > > > +   if (strcmp (arg, "-z,lazy") == 0 || strcmp (arg, "-z,norelro") 
> > > > == 0)
> > >

Re: [PATCH v3] [ifcombine] avoid creating out-of-bounds BIT_FIELD_REFs [PR118514]

2025-02-06 Thread Alexandre Oliva
On Jan 27, 2025, Richard Biener  wrote:

> What I was saying that the conservative tree_could_trap_p could say
> 'yes' to a certain encoding of a ref but 'no' to another if in reality
> the ref can never trap.  We of course cannot (apart from bugs in
> tree_could_trap_p) turn a for-sure trap into a not-trap by simply
> rewriting the ref.

I see.  Yeah, that makes sense.

> So I think we want this bit in (and it's dependences), but

'k, I'll split that bit out, after some clarification:

> (I see the assert is no longer in the patch).

That's because the assert went in as part of an earlier patch.  I take
it it should be backed out along with the to-be-split-out bits above,
right?

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


[PATCH] [testsuite] [sparc] select ultrasparc for fsmuld test

2025-02-06 Thread Alexandre Oliva


vis3move-3.c expects fsmuld, that is not available on all variants of
sparc.  Select a cpu that supports it for the test.

Now, -mfix-ut699 irrevocbly disables fsmuld, so skip the test if the
test configuration uses that option.

Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting
leon3-elf with -mfix-ut699.  Ok to install?


for  gcc/testsuite/ChangeLog

* gcc.target/sparc/vis3move-3.c: Select ultrasparc.  Skip with
-mfix-ut699.
---
 gcc/testsuite/gcc.target/sparc/vis3move-3.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/sparc/vis3move-3.c 
b/gcc/testsuite/gcc.target/sparc/vis3move-3.c
index 3b2116eec0cb3..f32ca918bac91 100644
--- a/gcc/testsuite/gcc.target/sparc/vis3move-3.c
+++ b/gcc/testsuite/gcc.target/sparc/vis3move-3.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target ilp32 } */
-/* { dg-options "-O1 -mvis3" } */
+/* { dg-skip-if "prevents fsmuld" { *-*-* } { "-mfix-ut699" } { "" } } */
+/* { dg-options "-O1 -mvis3 -mcpu=ultrasparc" } */
 
 float fnegs (float a)
 {

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


[PATCH] [testsuite] [sparc] use -mtune in alignment tuning test

2025-02-06 Thread Alexandre Oliva


If -mcpu=leon3 is present in the command line for a test run,
overriding it with -mcpu=niagara7 is not enough to override the tuning
for leon3 selected by the previous -mcpu option.

niagara7-align.c tests for niagara7 alignment tuning, so use -mtune
rather than -mcpu.

Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting
leon3-elf.  Ok to install?


for  gcc/testsuite/ChangeLog

* gcc.target/sparc/niagara7-align.c: Use -mtune.
---
 gcc/testsuite/gcc.target/sparc/niagara7-align.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/sparc/niagara7-align.c 
b/gcc/testsuite/gcc.target/sparc/niagara7-align.c
index a46aac17c3294..01a8cb621d5c0 100644
--- a/gcc/testsuite/gcc.target/sparc/niagara7-align.c
+++ b/gcc/testsuite/gcc.target/sparc/niagara7-align.c
@@ -1,4 +1,4 @@
 /* { dg-do compile } */
-/* { dg-options "-falign-functions -mcpu=niagara7" } */
+/* { dg-options "-falign-functions -mtune=niagara7" } */
 /* { dg-final { scan-assembler "\.align 64" } } */
 void foo(void) {}

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [RFC][libgcc][PATCH] Fix two issues in libgcc/unwind-dw2-fde.c.

2025-02-06 Thread Richard Biener
On Thu, Feb 6, 2025 at 2:57 PM Qing Zhao  wrote:
>
>
>
> > On Feb 6, 2025, at 04:36, Richard Biener  wrote:
> >
> > On Wed, Feb 5, 2025 at 4:40 PM Qing Zhao  wrote:
> >>
> >>
> >>
> >>> On Feb 5, 2025, at 07:46, Richard Biener  
> >>> wrote:
> >>>
> >>> On Tue, Feb 4, 2025 at 10:12 PM Qing Zhao  wrote:
> 
>  Hi,
> 
>  One of our big application segv in libgcc code while unwinding the stack.
> 
>  This is a random crash while the application throws a c++ exception and
>  unwinds the stack. Incidents are random and never can be reproduced by 
>  any
>  test case.
> 
>  The libgcc that is used is based on GCC 8.
> 
>  Fortunately, we got some information through the stack trace, and 
>  narrowed
>  down the crash is in the routine "init_object" of 
>  libgcc/unwind-dw2-pde.c:
> 
>  777   count = classify_object_over_fdes (ob, ob->u.single);
> 
>  when loading the 2nd parameter of the call to 
>  "classify_object_over_fdes".
>  i.e, the address that is pointed by "ob->u.single" is invalid.
> 
>  And the outer caller we can track back is the following line 1066 of the 
>  routine
>  "_Unwind_Find_FDE":
> 
>  1060   /* Classify and search the objects we've not yet processed.  */
>  1061   while ((ob = unseen_objects))
>  1062 {
>  1063   struct object **p;
>  1064
>  1065   unseen_objects = ob->next;
>  1066   f = search_object (ob, pc);
> 
>  Then we suspected that the construction of the static variable 
>  "unseen_objects"
>  might have some bug.
> 
>  Then after studying the source code that constructs "unseen_objects" in
>  libgcc/unwind-dw2-pde.c, I found an issue in the routines 
>  "__register_frame"
>  and "__register_frame_table" as following:
> 
>  127 void
>  128 __register_frame (void *begin)
>  129 {
>  130   struct object *ob;
>  131
>  132   /* If .eh_frame is empty, don't register at all.  */
>  133   if (*(uword *) begin == 0)
>  134 return;
>  135
>  136   ob = malloc (sizeof (struct object));
>  137   __register_frame_info (begin, ob);
>  138 }
> 
>  181 void
>  182 __register_frame_table (void *begin)
>  183 {
>  184   struct object *ob = malloc (sizeof (struct object));
>  185   __register_frame_info_table (begin, ob);
>  186 }
>  187
> 
>  In the above, one obvious issue in the source code is at line 136, line 
>  137,
>  and line 184 and line 185: the return value of call to "malloc" is not 
>  checked
>  against NULL before it was passed as the second parameter of the 
>  follwoing call.
> 
>  This might cause unpredicted random behavior.
> 
>  I checked the latest trunk GCC libgcc, the same issue is there too.
> 
>  This patch is for the latest trunk GCC.
> 
>  Please let me know any comments on this?
> >>>
> >>> I think I've seen this elsewhere —
> >>
> >> Do you mean that you saw this problem (malloc return NULL during 
> >> register_frame) previously?
> >
> > It was probably reported to us (SUSE) from a customer/partner, also
> > from a (llvm?) JIT context.
>
> Okay.
> So, that bug has not been resolved yet?

It was resolved from our side IIRC as being a problem in the
application with the JIT and
otherwise a feature request (better unwind API for this use case,
allowing a failure mode).

Richard.

> Qing
> >
> >>> the issue is the unwind register API does
> >>> not allow for failures but I also think calling abort() is bad.
> >>
> >> Okay, I see.
> >>
> >>>
> >>> Are you calling this from a JIT context or so?
> >>
> >> I will ask the bug submitter on this to make sure.
> >>
> >>> We're assuming that at program
> >>> start malloc() will not fail.
> >>
> >> So, the routine __register_frame is only called at the _start_ of a 
> >> program?
> >>>
> >>> The proper solution is probably to add an alternate ABI which has a way 
> >>> to fail
> >>> during registration.
> >>
> >> Any suggestion on this (or any other routine I can refer to?)
> >>
> >>
> >> Thanks a lot for the help.
> >>
> >> Qing
> >>>
> >>> Richard.
> >>>
>  thanks.
> 
>  Qing
> 
> 
>  ===
> 
>  Fix two issues in libgcc/unwind-dw2-fde.c:
> 
>  A. The returned value of call to malloc is not checked against NULL.
>   This might cause unpredicted behavior during stack unwinding.
> 
>  B. Check for null begin parameter (as well as pointer to null) in
>   __register_frame_info_table_bases and __register_frame_table.
> 
>  libgcc/ChangeLog:
> 
>    * unwind-dw2-fde.c (__register_frame): Check the return value of 
>  call
>    to malloc.
>    (__register_frame_info_table_bases): Check for null begin 
>  parameter.
>    (__register_frame_table): Check the return value o

Re: [patch, libfortran] PR114618 Version 2 Format produces incorrect output when contains 1x, ok when uses " "

2025-02-06 Thread Jerry D

On 1/31/25 11:30 AM, Harald Anlauf wrote:

Hi Jerry,

Am 30.01.25 um 21:50 schrieb Jerry D:

On 1/29/25 10:30 AM, Jerry D wrote:

Follow-up:

On 1/28/25 1:33 PM, Harald Anlauf wrote:

Jerry,

while I haven't read your actual patch yet, I think the testcase
is slightly incorrect. In fact, Intel, NAG, Nvidia and AMD flang
disagree with it.

--- snip ---

The following adjustment to the patch puts this right.

 case FMT_X:
 case FMT_TR:
   consume_data_flag = 0;
   dtp->u.p.skips += f->u.n;
   tab_pos = bytes_used + dtp->u.p.skips - 1;
   dtp->u.p.pending_spaces = tab_pos - dtp->u.p.max_pos + 1;
   dtp->u.p.pending_spaces = dtp->u.p.pending_spaces < 0
 ? f->u.n : dtp->u.p.pending_spaces;

   //if (t == FMT_X && tab_pos < dtp->u.p.max_pos)
   //{
 //write_x (dtp, dtp->u.p.skips, dtp->u.p.pending_spaces);
 //dtp->u.p.skips = dtp->u.p.pending_spaces = 0;
   //}

Interestingly, it also fixes a floating point exception I ran into
while setting up another test case for part 2 of this effort. I
suspect this may be what was detected by the auto patch tester.

I will clean this up, adjust the test case for this part and re-submit.

Regards,

Jerry


Here is version 2 of the patch cleaned up and with the test case revised
accordingly.

Thank you Herald for helping with my blindness.

Regressions tested on x86_64. I will wait a bit to see if the auto patch
tester reports anything.

Otherwise, OK for trunk?


this looks mostly good.  There is only one (minor?) issue I saw:
writing output to a file, there is a discrepancy between stream-
and non-stream I/O.  Adding

    write (*, fmt1) 'RADIX', radix(pi)
    write (*, fmt2) 'RADIX', radix(pi)
    open (10, form="formatted")
    write(10, fmt1) 'RADIX', radix(pi)
    write(10, fmt2) 'RADIX', radix(pi)
    close(10)
    open (11, form="formatted", access="stream")
    write(11, fmt1) 'RADIX', radix(pi)
    write(11, fmt2) 'RADIX', radix(pi)
    close(11)

shows agreement between stdout and fort.10, while fort.11 differs:

% head fort.10 fort.11
==> fort.10 <==
RADIX.. 2
RADIX . 2

==> fort.11 <==
RADIX 2
RADIX  2...

Other brands do not show this discrepancy.


Regards,

Jerry

PS working on part 2 still.


Are you addressing this in the second part of your work?

As the test program crashes with current HEAD and earlier
anyway, your patch is already progress, but we don't want
to leave it that way.

So either commit the current version and track this issue in
a PR if not yet done, or have a look if there is a quick fix.

Thanks for the work!

Harald



After studying this a bit more I will commit the subject patch. I have 
opened a new PR to track this.


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118774

Regards,

Jerry


Re: [PATCH v2] ira: Add a target hook for callee-saved register cost scale

2025-02-06 Thread Richard Sandiford
Vladimir Makarov  writes:
> On 2/3/25 1:20 AM, H.J. Lu wrote:
>> commit 3b9b8d6cfdf59337f4b7ce10ce92a98044b2657b
>> Author: Surya Kumari Jangala 
>> Date:   Tue Jun 25 08:37:49 2024 -0500
>>
>>  ira: Scale save/restore costs of callee save registers with block 
>> frequency
>>
>> scales the cost of saving/restoring a callee-save hard register in epilogue
>> and prologue with the entry block frequency, which, if not optimizing for
>> size, is 1, for all targets.  As the result, callee-saved registers
>> may not be used to preserve local variable values across calls on some
>> targets, like x86.  Add a target hook for the callee-saved register cost
>> scale in epilogue and prologue used by IRA.  The default version of this
>> target hook returns 1 if optimizing for size, otherwise returns the entry
>> block frequency.  Add an x86 version of this target hook to restore the
>> old behavior prior to the above commit.
>
> This is a complicated problem resulted in many tries to fix it in some 
> general way.
>
> In general I am agree with Surya's approach to scale cost of reg 
> saves/restores somehow.  But the general approach, although solved some 
> problems, also created a lot of new ones.  May be because IRA does not 
> take some other aspects of using callee saved regs.  And some of them 
> were addressed by other patches. e.g. recently proposed by Surya and one 
> for PR118497.
>
> I also agree with Richard Sandiford's comment that we should avoid 
> introducing the new hooks for RA and I actually tried to stick to this 
> policy for a long time.  But I don't see another solution to introducing 
> the new hook in this case.  It is hard to figure out generally in RA 
> that saves/restores will be insns different from ld/st (e.g. x86 
> push/pop) and that they will be cheaper.
>
> So after some time to think about the patch I decided to approve the RA 
> part of the patch.  I also hope that the work on this problem will 
> continue (e.g. improving default and target hook implementations and 
> documentation how to better use it).

For the record, I strongly object to this.  The hook just seems like a
complete hack to me.  Even if we accept that there is target-specific
information in play, the hook isn't providing that information.

In contrast to what you said above, my objection isn't to having hooks
-- those are often needed and good.  What bothers me is that the hook
isn't well designed.  Hooks should provide information rather than
override code by brute force.

On the other hand, I accept that you're (rightly!) the maintainer.

Thanks,
Richard


[PATCH] x86: Verify that PUSH/POP can be skipped

2025-02-06 Thread H.J. Lu
For

---
int f(int);

int advance(int dz)
{
if (dz > 0)
return (dz + dz) * dz;
else
return dz * f(dz);
}
---

Before r15-1619-g3b9b8d6cfdf593

advance(int):
pushrbx
mov ebx, edi
testedi, edi
jle .L2
imulebx, edi
lea eax, [rbx+rbx]
pop rbx
ret
.L2:
callf(int)
imuleax, ebx
pop rbx
ret

After

 advance(int):
testedi, edi
jle .L2
imuledi, edi
lea eax, [rdi+rdi]
ret
.L2:
sub rsp, 24
mov DWORD PTR [rsp+12], edi
callf(int)
imuleax, DWORD PTR [rsp+12]
add rsp, 24
ret

There's no call in if branch, it's not optimal to push rbx at the entry
of the function, it can be sinked to else branch. When "jle .L2" is not
taken, it can save one push instruction.  Update pr111673.c to verify
that this optimization isn't turned off.

PR rtl-optimization/111673
* gcc.target/i386/pr111673.c: Verify that PUSH/POP can be
skipped.


-- 
H.J.
From 6606ec5573e724295bdceb572ddc2813f021709f Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Fri, 7 Feb 2025 13:49:30 +0800
Subject: [PATCH] x86: Verify that PUSH/POP can be skipped

For

---
int f(int);

int advance(int dz)
{
if (dz > 0)
return (dz + dz) * dz;
else
return dz * f(dz);
}
---

Before r15-1619-g3b9b8d6cfdf593

advance(int):
pushrbx
mov ebx, edi
testedi, edi
jle .L2
imulebx, edi
lea eax, [rbx+rbx]
pop rbx
ret
.L2:
callf(int)
imuleax, ebx
pop rbx
ret

After

 advance(int):
testedi, edi
jle .L2
imuledi, edi
lea eax, [rdi+rdi]
ret
.L2:
sub rsp, 24
mov DWORD PTR [rsp+12], edi
callf(int)
imuleax, DWORD PTR [rsp+12]
add rsp, 24
ret

There's no call in if branch, it's not optimal to push rbx at the entry
of the function, it can be sinked to else branch. When "jle .L2" is not
taken, it can save one push instruction.  Update pr111673.c to verify
that this optimization isn't turned off.

	PR rtl-optimization/111673
	* gcc.target/i386/pr111673.c: Verify that PUSH/POP can be
	skipped.

Signed-off-by: H.J. Lu 
---
 gcc/testsuite/gcc.target/i386/pr111673.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/pr111673.c b/gcc/testsuite/gcc.target/i386/pr111673.c
index 8d8a5a764f0..b9ceacf7651 100644
--- a/gcc/testsuite/gcc.target/i386/pr111673.c
+++ b/gcc/testsuite/gcc.target/i386/pr111673.c
@@ -1,5 +1,19 @@
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-O2 -fdump-rtl-pro_and_epilogue" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.}  } } */
+
+/*
+**advance:
+**.LFB0:
+**	.cfi_startproc
+**	testl	%edi, %edi
+**	jle	.L2
+**	imull	%edi, %edi
+**	leal	\(%rdi,%rdi\), %eax
+**	ret
+**...
+*/
 
 /* Verify there is an early return without the prolog and shrink-wrap
the function. */
-- 
2.48.1



[PATCH] aarch64: Fix bootstrap with --enable-checking=release [PR118771]

2025-02-06 Thread Andrew Pinski
With release checking we get an uninitialization warning
inside aarch64_split_move because of jump threading for the case of `npieces==0`
but `npieces` is never 0 (but there is no way the compiler can know that.
So this fixes the issue by adding a `gcc_assert` to the function which asserts
that `npieces > 0` and fixes the uninitialization warning.

Bootstrapped and tested on aarch64-linux-gnu (with and without 
--enable-checking=release).

The warning:

aarch64.cc: In function 'void aarch64_split_move(rtx, rtx, machine_mode)':
aarch64.cc:3418:31: error: '*(rtx_def**)((char*)&dst_pieces + 
offsetof(auto_vec,auto_vec::m_data[0]))' may be used 
uninitialized [-Werror=maybe-uninitialized]
 3418 |   if (reg_overlap_mentioned_p (dst_pieces[0], src))
  |   ^~~~
aarch64.cc:3408:20: note: 'dst_pieces' declared here
 3408 |   auto_vec dst_pieces, src_pieces;
  |^~

PR target/118771
gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_split_move): Assert that npieces is
greater than 0.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/aarch64.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index c1e40200806..f5f23f6ff4b 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3407,6 +3407,9 @@ aarch64_split_move (rtx dst, rtx src, machine_mode 
single_mode)
GET_MODE_SIZE (single_mode)).to_constant ();
   auto_vec dst_pieces, src_pieces;
 
+  /* There should be at least one piece. */
+  gcc_assert (npieces > 0);
+
   for (unsigned int i = 0; i < npieces; ++i)
 {
   auto off = i * GET_MODE_SIZE (single_mode);
-- 
2.43.0



Re: [PATCH v2] ira: Add a target hook for callee-saved register cost scale

2025-02-06 Thread Richard Biener
On Thu, Feb 6, 2025 at 11:40 PM Vladimir Makarov  wrote:
>
>
> On 2/6/25 4:54 PM, Richard Sandiford wrote:
>
> Vladimir Makarov  writes:
>
> This is a complicated problem resulted in many tries to fix it in some
> general way.
>
> In general I am agree with Surya's approach to scale cost of reg
> saves/restores somehow.  But the general approach, although solved some
> problems, also created a lot of new ones.  May be because IRA does not
> take some other aspects of using callee saved regs.  And some of them
> were addressed by other patches. e.g. recently proposed by Surya and one
> for PR118497.
>
> I also agree with Richard Sandiford's comment that we should avoid
> introducing the new hooks for RA and I actually tried to stick to this
> policy for a long time.  But I don't see another solution to introducing
> the new hook in this case.  It is hard to figure out generally in RA
> that saves/restores will be insns different from ld/st (e.g. x86
> push/pop) and that they will be cheaper.
>
> So after some time to think about the patch I decided to approve the RA
> part of the patch.  I also hope that the work on this problem will
> continue (e.g. improving default and target hook implementations and
> documentation how to better use it).
>
> For the record, I strongly object to this.  The hook just seems like a
> complete hack to me.  Even if we accept that there is target-specific
> information in play, the hook isn't providing that information.
>
> In contrast to what you said above, my objection isn't to having hooks
> -- those are often needed and good.  What bothers me is that the hook
> isn't well designed.  Hooks should provide information rather than
> override code by brute force.
>
> On the other hand, I accept that you're (rightly!) the maintainer.
>
> I also don't like the hook implementation for x86-64 (although this is a 
> matter of target maintainers).  All these costs look voodoo and random to me.
>
> But this problem is longing for more than half year.  I spent a lot of time 
> too on this.  Patches were submitted and reverted and nobody did find so far 
> any solution satisfying all GCC tests.  If somebody finds a solution without 
> the hook, I will be glad to get rid off the hook.  Also the related PRs are 
> marked as P1 ones, it means people think they are important (I am not sure 
> about this myself).  Without fixing them (or downgrading them) there will be 
> no GCC release.  So I am in difficult situation with these PRs and need some 
> resolution.

Just to chime in as the one who likely made those PRs P1.  'P1' here
is really about the testsuite FAIL,
how to resolve it is up to target maintainers - P1 should make them
look, and adjusting the testcase
(or even XFAILing it) is a valid resolution of the P1 regression.

I do agree with Richards comment on the proposed hook to be badly
designed and I do like
Honzas suggestion to instead hookize the biasing.

Richard.

>
>


Re: [PATCH] [testsuite] [sparc] use -mtune in alignment tuning test

2025-02-06 Thread Eric Botcazou
> niagara7-align.c tests for niagara7 alignment tuning, so use -mtune
> rather than -mcpu.
> 
> Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting
> leon3-elf.  Ok to install?
> 
> 
> for  gcc/testsuite/ChangeLog
> 
>   * gcc.target/sparc/niagara7-align.c: Use -mtune.

OK.

-- 
Eric Botcazou




Re: [PATCH] [testsuite] [sparc] select ultrasparc for fsmuld test

2025-02-06 Thread Eric Botcazou
> vis3move-3.c expects fsmuld, that is not available on all variants of
> sparc.  Select a cpu that supports it for the test.
> 
> Now, -mfix-ut699 irrevocbly disables fsmuld, so skip the test if the
> test configuration uses that option.
> 
> Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting
> leon3-elf with -mfix-ut699.  Ok to install?
> 
> 
> for  gcc/testsuite/ChangeLog
> 
>   * gcc.target/sparc/vis3move-3.c: Select ultrasparc.  Skip with
>   -mfix-ut699.

OK.

-- 
Eric Botcazou




[Patch] [gcn] Fix gfx906's sramecc setting

2025-02-06 Thread Tobias Burnus

ROCm 6.3.2 does not like my patch for reasons that I do not understand;
https://gcc.gnu.org/pipermail/gcc-patches/2025-February/675200.html

Until that's sorted, I decided to split off two obvious fixes;
I might suggest some further changes, but the full patch has to
wait until generic really works.

* * *

The attached patch adds '...' to the device macro to avoid touching
this when adding new fields.

And it fixes an issue with gfx906, which shows up when compiling,
e.g. as
  gcc -g -fopenmp -foffload-options=amdgcn-amdhsa=-march=gfx906 file.c
(with offloading code in file.c).

The reason is that '-g' causes mkoffload.cc to create a .o file with
debugging symbols - and that .o file is linked with the GCN device
files. While that file does not contain executable code, the ELF
header still needs to match the GCN .o files as otherwise the linker
complains that there is a mismatch. For the line above, it complains
about: "ld: error: incompatible sramecc: /tmp/ccLhwZle.o".

And in line with the GCC 14 code in mkoffload.cc and with the entries
in https://llvm.org/docs/AMDGPUUsage.html#amdgpu-processor-table for
gfx906 + the llvm-mc / lld implementation, that means that the sramecc
type is 'any' and not unsupported.

OK for mainline?

Tobias
[gcn] Fix gfx906's sramecc setting

When compiling with -g, mkoffload.cc creates a device object file itself;
however, in order that the linker dos not complain, the ELF flags must
match what the compiler / linker does. For gfx906, the assembler defaults
to sramecc = any, but gcn-devices.def contained unsupported, which is not
the same - causing link errors. That's a regression caused by commit
r15-4540-ga6b26e5ea09779 - which can be best seen by looking at the
changes to mkoffload.cc.

Additionally, this commit adds '...' to the GCN_DEVICE #define in gcn.cc
to make it agnostic to the addition of fields.

 gcc/config/gcn/gcn-devices.def | 2 +-
 gcc/config/gcn/gcn.cc  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/config/gcn/gcn-devices.def b/gcc/config/gcn/gcn-devices.def
index 7d47a7b495d..a8b21a358b4 100644
--- a/gcc/config/gcn/gcn-devices.def
+++ b/gcc/config/gcn/gcn-devices.def
@@ -91,7 +91,7 @@ GCN_DEVICE(gfx900, GFX900, 0x2c, ISA_GCN5,
 
 GCN_DEVICE(gfx906, GFX906, 0x2f, ISA_GCN5,
 	   /* XNACK default */ HSACO_ATTR_OFF,
-	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_ANY,
 	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
 	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
 	   /* Max ISA VGPRs */ 256,
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 4200cfaf006..82fc6ff1e41 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -101,7 +101,8 @@ static hash_map lds_allocs;
 /* Import all the data from gcn-devices.def.
The PROCESSOR_GFXnnn should be indices for this table.  */
 const struct gcn_device_def gcn_devices[] = {
-#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, GEN_VER,ARCH_FAM) \
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, \
+		   GEN_VER, ARCH_FAM, ...) \
 {PROCESSOR_ ## NAME, #name, #NAME, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, \
  GEN_VER, #ARCH_FAM},
 #include "gcn-devices.def"


Re: [PATCH] [testsuite] [sparc] skip sparc-ret-1 with -mfix-ut699

2025-02-06 Thread Eric Botcazou
> Option -mfix-ut699 changes the set of instructions that can be placed
> in the delay slot, preventing the expected insn placement.  Skip the
> test if the option is present.
> 
> Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting
> leon3-elf with -mfix-ut699.  Ok to install?
> 
> 
> for  gcc/testsuite/ChangeLog
> 
>   * gcc.target/sparc/sparc-ret-1.c: Skip on -mfix-ut699.

OK.

-- 
Eric Botcazou




Re: [PATCH] RISC-V: Fix ICE when prefetching addresses less than 12 bits for zicbop

2025-02-06 Thread Jeff Law




On 1/19/25 7:32 AM, Jin Ma wrote:

gcc/ChangeLog:

* config/riscv/riscv.md: Change 'r' to 'p'.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/prefetch-zicbop-ice.c: New test.
As expected, there's more to this issue than just adjusting constraints 
or predicates.


As Tsukasa noted a while back the current builtin is largely useless as 
the most natural ways to pass instruction addresses simply don't work.


The builtin expansion path we're using is custom to the backend and as a 
result we don't get the usual methods to force arguments into registers, 
so we'd need to turn the relevant patters into expanders with a 
corresponding pattern.  We'll need to make the operands fairly loose on 
the expander, then force things into registers for the pattern.


And there's the question of the extraneous imm5 operand in the pattern. 
And there's a nonzero possibility that we need to adjust the API as well.


The net is I think we should fix the obvious ICE, but that the deeper 
issues should wait for gcc-16's new development window.



Jeff


Re: [PATCH v2] ira: Add a target hook for callee-saved register cost scale

2025-02-06 Thread Jan Hubicka
> > +/* Implement TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE.  */
> > + 
> > +static int
> > +ix86_ira_callee_saved_register_cost_scale (int)
> > +{
> > +  return 1;
> > +}
> > +

> > return cl;
> >   }
> > +int
> > +default_ira_callee_saved_register_cost_scale (int)
> > +{
> > +  return (optimize_size
> > + ? 1
> > + : REG_FREQ_FROM_BB (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
> > +}
> > +

I am not sure how this makes sense - why x86 would be significantly
different from other targets?

I think the only bit non-standard thing is that prologue/epilogue code
can use push/pop that is shorter than mov used to save caller saved
registers.

I went through few testcases:

void d();
void a()
{   
int b;
asm ("use %0":"=r" (b));
d();
asm volatile (""::"r" (b));
}
compiler with -O2 -fira-verbose=1 gives:

Popping a0(r99,l0)  -- (0=12000,12000) (1=12000,12000) (2=12000,12000) 
(4=12000,12000) (5=12000,12000) (36=12000,12000) (37=12000,12000) 
(38=12000,12000) (39=12000,12000) (3=11000,11000) (6=11000,11000) 
(40=11000,11000) (41=11000,11000) (42=11000,11000) (43=11000,11000)

load and save costs are 6. So spill pair is 12 weighted by 1000 that is
REG_FREQ_MAX.

Register 0 (EAX) has cost 12000 which makes sense to me:
  - load and save costs are 6, combined spill pair is 12
  - REG_FREQ_MAX is 1000 and since function has only one BB, it has
maximal frequency, so we get 12000.

Register 3 (first caller saved) has cost 11000.  This comes from:
add_cost = ((ira_memory_move_cost[mode][rclass][0]
 + ira_memory_move_cost[mode][rclass][1])
* saved_nregs / hard_regno_nregs (hard_regno,
  mode) - 1)
  ^^
  here

   * (optimize_size ? 1 :
  REG_FREQ_FROM_BB (ENTRY_BLOCK_PTR_FOR_FN (cfun)));

There is no comment why -1, but I suppose it is there to biass costs to
use prologue/epilogue instad of caller save sequence when runtime cost
estimate is even.

Now for
void d();
void a()
{
for (int i = 0; i < 100; i++)
  d();
int b;
asm ("use %0":"=r" (b));
d();
asm volatile (""::"r" (b));
}

I get

  Popping a0(r100,l0)  -- (0=120,120) (1=120,120) (2=120,120) (4=120,120) 
(5=120,120) (36=120,120) (37=120,120) (38=120,120) (39=120,120) (3=0,0) 
(6=110,110) (40=110,110) (41=110,110) (42=110,110) (43=110,110)

This also makes sense to me, since there is loop the basic block has
lower frequency of 10, thus costs are scaled down.

void d();
int cnd;
void a()
{
int b;
asm ("use %0":"=r" (b));

if (__builtin_expect_with_probability (cnd, 1, 0.8))
  d();
asm volatile (""::"r" (b));
}

I get

 Popping a0(r100,l0)  -- (0=9600,9600) (1=9600,9600) (2=9600,9600) 
(4=9600,9600) (5=9600,9600) (36=9600,9600) (37=9600,9600) (38=9600,9600) 
(39=9600,9600) (3=11000,11000) (6=11000,11000) (40=11000,11000) 
(41=11000,11000) (42=11000,11000) (43=11000,11000)

which seems also correct.  It is better to use caller saved registr
since call to d() has lower frequency then the entry basic block. This
is what gcc 13 and this patch gets wrong

 Popping a0(r100,l0)  -- (1=9600,9600) (2=9600,9600) (4=9600,9600) 
(5=9600,9600) (36=9600,9600) (37=9600,9600) (38=9600,9600) (39=9600,9600) 
(3=11,11) (6=11,11) (40=11,11) (41=11,11) (42=11,11) (43=11,11)

Due to missing scaling factor we think that using callee saved registr
is win while it is not.  GCC13 gets this wrong even for probability 0.

Looking into PRs referneced in the patch:
PR111673 is the original bug that motivated correcting the cost (adding
 the scale by entry block frequency)
PR115932 is cris-elf I don't know how to bencmark easily.
PR116028 seems to be about shrink wrapping in

void f(int *i)
{
if (!i)
return;
else
{
__builtin_printf("Hi");
*i=0;
}
}

here I see tha tthe cost model misses the fact that epilogue will be
shrink-wrapped so both caller and callee saving will result in one spill
after the early exit.

PR117081 is about regression in povray. The reducted testcase:

void foo (void);
void bar (void);

int
test (int a)
{
  int r;

  if (r = -a)
foo ();
  else
bar ();

  return r;
}

shows that we now use caller saved register (EAX) to hold the return value 
which yields longer code.  The costs are
Popping a0(r98,l0)  -- (0=13000,13000) (3=15000,15000) (6=15000,15000) 
(40=15000,15000) (41=15000,15000) (42=15000,15000) (43=15000,15000)

here 15000 is 11000+4000 where I think 4000 is cost of 2 reg-reg moves
multiplied by REG_FREQ_MAX.   This seems correct. GCC 13 uses callee
saved register and produces:

 :
   0:   53  push   %rbx 

Re: [PATCH] [testsuite] [sparc] skip tls tests if emulated

2025-02-06 Thread Eric Botcazou
> A number of tls tests expect TLS-specific relocations, that are not
> present when tls is emulated, as on e.g. leon3-elf.  Skip the tests
> when tls is emulated.
> 
> Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting
> leon3-elf.  Ok to install?
> 
> 
> for  gcc/testsuite/ChangeLog
> 
>   * gcc.target/sparc/tls-ld-int16.c: Skip when tls is emulated.
>   * gcc.target/sparc/tls-ld-int32.c: Likewise.
>   * gcc.target/sparc/tls-ld-int8.c: Likewise.
>   * gcc.target/sparc/tls-ld-uint16.c: Likewise.
>   * gcc.target/sparc/tls-ld-uint32.c: Likewise.
>   * gcc.target/sparc/tls-ld-uint8.c: Likewise.

OK.

-- 
Eric Botcazou




Re: [PATCH v2] ira: Add a target hook for callee-saved register cost scale

2025-02-06 Thread Vladimir Makarov


On 2/6/25 4:54 PM, Richard Sandiford wrote:

Vladimir Makarov writes:

This is a complicated problem resulted in many tries to fix it in some
general way.

In general I am agree with Surya's approach to scale cost of reg
saves/restores somehow.  But the general approach, although solved some
problems, also created a lot of new ones.  May be because IRA does not
take some other aspects of using callee saved regs.  And some of them
were addressed by other patches. e.g. recently proposed by Surya and one
for PR118497.

I also agree with Richard Sandiford's comment that we should avoid
introducing the new hooks for RA and I actually tried to stick to this
policy for a long time.  But I don't see another solution to introducing
the new hook in this case.  It is hard to figure out generally in RA
that saves/restores will be insns different from ld/st (e.g. x86
push/pop) and that they will be cheaper.

So after some time to think about the patch I decided to approve the RA
part of the patch.  I also hope that the work on this problem will
continue (e.g. improving default and target hook implementations and
documentation how to better use it).

For the record, I strongly object to this.  The hook just seems like a
complete hack to me.  Even if we accept that there is target-specific
information in play, the hook isn't providing that information.

In contrast to what you said above, my objection isn't to having hooks
-- those are often needed and good.  What bothers me is that the hook
isn't well designed.  Hooks should provide information rather than
override code by brute force.

On the other hand, I accept that you're (rightly!) the maintainer.

I also don't like the hook implementation for x86-64 (although this is a 
matter of target maintainers).  All these costs look voodoo and random 
to me.


But this problem is longing for more than half year.  I spent a lot of 
time too on this.  Patches were submitted and reverted and nobody did 
find so far any solution satisfying all GCC tests.  If somebody finds a 
solution without the hook, I will be glad to get rid off the hook.  Also 
the related PRs are marked as P1 ones, it means people think they are 
important (I am not sure about this myself).  Without fixing them (or 
downgrading them) there will be no GCC release.  So I am in difficult 
situation with these PRs and need some resolution.




Re: [Patch] [GCN] Handle generic ISA names in libgomp's plugin-gcn.c

2025-02-06 Thread Tobias Burnus

After spending some time with the debugger, I am now convinced that
ROCm 6.3.2 does not yet support generic. The amd-staging branch at
https://github.com/ROCm/ROCR-Runtime/ support does, albeit only after
the tag rocm-6.3.2. However, the released ROCm 6.3.2 does not match
that tagged commit as it seems to contain at least the much newer
commit eec21304 (but not the generic support that appeared in an
in-between commit.) [AOMP is also different; it seems as if 20.0-1
does not support it, but 20.0-2 might; AOMP does not seem to have
commit eec21304 to confuse things.]

* * *

The attached patch now adds gfx9-generic - alongside the existing
gfx{10-3,1}-generic and all gfx* that are enabled by those.

See previous thread for the related discussions.

OK for mainline?

(This patch depends on the just submitted patch:
"[Patch] [gcn] Fix gfx906's sramecc setting",
https://gcc.gnu.org/pipermail/gcc-patches/2025-February/675251.html )

* * *

RFC:
The patch currently does not document any gfx*-generic nor the
gfx902 etc. I wonder whether a follow-up patch should just add
the non-generic ones with "(Experimental)" to invoke.texi?

Or any better ideas regarding what to make available now
and what only later, once a generic-supporting ROCm is available?

In other words: What bits of:
https://gcc.gnu.org/pipermail/gcc-patches/2025-February/675200.html ?

Tobias
[gcn] Add gfx9-generic and generic-associated gfx*

This patch adds gfx9-generic, completing the gfx*-generic support.
It also adds all gfx* devices that are part of any of the gfx*-generic,
i.e. gfx902, gfx904, gfx909, gfx1031, gfx1032, gfx1033, gfx1034,
gfx1035, gfx1101, gfx1102, gfx1150, gfx1151, gfx1152, and gfx1153.

gcc/ChangeLog:

	* config/gcn/gcn-devices.def (GCN_DEVICE): Add gfx9-generic,
	gfx902, gfx904, gfx909, gfx1031, gfx1032, gfx1033, gfx1034,
	gfx1035, gfx1101, gfx1102, gfx1150, gfx1151, gfx1152, and gfx1153.
	Add a currently unused column linking, a specific ISA to a generic
	one (if it exists).
	* config/gcn/gcn-tables.opt: Regenerate

 gcc/config/gcn/gcn-devices.def | 202 ++---
 gcc/config/gcn/gcn-tables.opt  |  45 +
 2 files changed, 236 insertions(+), 11 deletions(-)

diff --git a/gcc/config/gcn/gcn-devices.def b/gcc/config/gcn/gcn-devices.def
index a8b21a358b4..af1420382e2 100644
--- a/gcc/config/gcn/gcn-devices.def
+++ b/gcc/config/gcn/gcn-devices.def
@@ -71,6 +71,10 @@
 	generated by the used llvm-mc assembler.
   10 "Architecture Family Name"  (string, external)
 	Used to #define '__GFX<...>__'.
+  11 "GENERIC NAME" (text, external)
+	The name of the generic ISA this device is compatible with or "NONE",
+	where the generic name is the NAME (field 2) of the associated
+	generic device.
 
 Fields marked "external", above, have values defined elsewhere (HSA, ROCM,
 LLVM, ELF, etc.) and must have matching definitions here.  Fields marked
@@ -86,7 +90,30 @@ GCN_DEVICE(gfx900, GFX900, 0x2c, ISA_GCN5,
 	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
 	   /* Max ISA VGPRs */ 256,
 	   /* Generic code obj version */ 0,  /* non-generic */
-	   /* Architecture Family */ GFX9
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ GFX9_GENERIC
+	   )
+
+GCN_DEVICE(gfx902, GFX902, 0x2d, ISA_GCN5,
+	   /* XNACK default */ HSACO_ATTR_OFF,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ GFX9_GENERIC
+	   )
+
+GCN_DEVICE(gfx904, GFX904, 0x2e, ISA_GCN5,
+	   /* XNACK default */ HSACO_ATTR_OFF,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ GFX9_GENERIC
 	   )
 
 GCN_DEVICE(gfx906, GFX906, 0x2f, ISA_GCN5,
@@ -96,7 +123,8 @@ GCN_DEVICE(gfx906, GFX906, 0x2f, ISA_GCN5,
 	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
 	   /* Max ISA VGPRs */ 256,
 	   /* Generic code obj version */ 0,  /* non-generic */
-	   /* Architecture Family */ GFX9
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ GFX9_GENERIC
 	   )
 
 GCN_DEVICE(gfx908, GFX908, 0x30, ISA_CDNA1,
@@ -106,7 +134,19 @@ GCN_DEVICE(gfx908, GFX908, 0x30, ISA_CDNA1,
 	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
 	   /* Max ISA VGPRs */ 256,
 	   /* Generic code obj version */ 0,  /* non-generic */
-	   /* Architecture Family */ GFX9
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ NONE
+	   )
+
+GCN_DEVICE(gfx909, GFX909, 0x31, ISA_GCN5,
+	   /* XNACK default */ HSACO_ATTR_ANY,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Ge

Patch ping^5 (Re: [PATCH] analyzer: Handle nonnull_if_nonzero attribute [PR117023])

2025-02-06 Thread Jakub Jelinek
On Tue, Jan 21, 2025 at 04:59:16PM +0100, Jakub Jelinek wrote:
> On Tue, Jan 07, 2025 at 01:49:04PM +0100, Jakub Jelinek wrote:
> > On Wed, Dec 18, 2024 at 12:15:15PM +0100, Jakub Jelinek wrote:
> > > On Fri, Dec 06, 2024 at 05:07:40PM +0100, Jakub Jelinek wrote:
> > > > I'd like to ping the
> > > > https://gcc.gnu.org/pipermail/gcc-patches/2024-November/668699.html
> > > > patch.
> > > > 
> > > > The patches it depended on are already committed and there is a patch
> > > > which depends on this (the builtins shift from nonnull to 
> > > > nonnull_if_nonzero
> > > > where needed) which has been approved but can't be committed.
> > > 
> > > Gentle ping on this one.
> > 
> > Ping.
> 
> Ping again.

Ping.

> > > > > 2024-11-14  Jakub Jelinek  
> > > > > 
> > > > >   PR c/117023
> > > > > gcc/analyzer/
> > > > >   * sm-malloc.cc (malloc_state_machine::on_stmt): Handle
> > > > >   also nonnull_if_nonzero attributes.
> > > > > gcc/testsuite/
> > > > >   * c-c++-common/analyzer/call-summaries-malloc.c
> > > > >   (test_use_without_check): Pass 4 rather than sz to memset.
> > > > >   * c-c++-common/analyzer/strncpy-1.c (test_null_dst,
> > > > >   test_null_src): Pass 42 rather than count to strncpy.

Jakub



Re: [PATCH v2] ira: Add a target hook for callee-saved register cost scale

2025-02-06 Thread Vladimir Makarov



On 2/3/25 1:20 AM, H.J. Lu wrote:

commit 3b9b8d6cfdf59337f4b7ce10ce92a98044b2657b
Author: Surya Kumari Jangala 
Date:   Tue Jun 25 08:37:49 2024 -0500

 ira: Scale save/restore costs of callee save registers with block frequency

scales the cost of saving/restoring a callee-save hard register in epilogue
and prologue with the entry block frequency, which, if not optimizing for
size, is 1, for all targets.  As the result, callee-saved registers
may not be used to preserve local variable values across calls on some
targets, like x86.  Add a target hook for the callee-saved register cost
scale in epilogue and prologue used by IRA.  The default version of this
target hook returns 1 if optimizing for size, otherwise returns the entry
block frequency.  Add an x86 version of this target hook to restore the
old behavior prior to the above commit.


This is a complicated problem resulted in many tries to fix it in some 
general way.


In general I am agree with Surya's approach to scale cost of reg 
saves/restores somehow.  But the general approach, although solved some 
problems, also created a lot of new ones.  May be because IRA does not 
take some other aspects of using callee saved regs.  And some of them 
were addressed by other patches. e.g. recently proposed by Surya and one 
for PR118497.


I also agree with Richard Sandiford's comment that we should avoid 
introducing the new hooks for RA and I actually tried to stick to this 
policy for a long time.  But I don't see another solution to introducing 
the new hook in this case.  It is hard to figure out generally in RA 
that saves/restores will be insns different from ld/st (e.g. x86 
push/pop) and that they will be cheaper.


So after some time to think about the patch I decided to approve the RA 
part of the patch.  I also hope that the work on this problem will 
continue (e.g. improving default and target hook implementations and 
documentation how to better use it).


We have a lot of tests checking expected code generation (some of them 
are wrong, e.g. expecting particular hard regs).  But those are small 
tests.  We should pay more attention to check impact on SPEC for such 
patches. It is more time-consuming but it is the right way.  A patch can 
introduce worse code generation for small test but can be a win for real 
applications as ones in SPEC.  Any RA in industrial compiler can not 
generate optimal code for most cases.  So I would not mark such GCC test 
failures as P1 PRs.


The RA patch is OK for me for committing it into the trunk. Thank you.


PR rtl-optimization/111673
PR rtl-optimization/115932
PR rtl-optimization/116028
PR rtl-optimization/117081
PR rtl-optimization/117082
PR rtl-optimization/118497
* ira-color.cc (assign_hard_reg): Call the target hook for the
callee-saved register cost scale in epilogue and prologue.
* target.def (ira_callee_saved_register_cost_scale): New target
hook.
* targhooks.cc (default_ira_callee_saved_register_cost_scale):
New.
* targhooks.h (default_ira_callee_saved_register_cost_scale):
Likewise.
* config/i386/i386.cc (ix86_ira_callee_saved_register_cost_scale):
New.
(TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE): Likewise.
* doc/tm.texi: Regenerated.
* doc/tm.texi.in (TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE):
New.

Signed-off-by: H.J. Lu 
---
  gcc/config/i386/i386.cc | 11 +++
  gcc/doc/tm.texi |  8 
  gcc/doc/tm.texi.in  |  2 ++
  gcc/ira-color.cc|  3 +--
  gcc/target.def  | 12 
  gcc/targhooks.cc|  8 
  gcc/targhooks.h |  1 +
  7 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index f89201684a8..3128973ba79 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20600,6 +20600,14 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
return false;
  }
  
+/* Implement TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE.  */

+
+static int
+ix86_ira_callee_saved_register_cost_scale (int)
+{
+  return 1;
+}
+
  /* Return true if a set of DST by the expression SRC should be allowed.
 This prevents complex sets of likely_spilled hard regs before split1.  */
  
@@ -27078,6 +27086,9 @@ ix86_libgcc_floating_mode_supported_p

  #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS 
ix86_preferred_output_reload_class
  #undef TARGET_CLASS_LIKELY_SPILLED_P
  #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
+#undef TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE
+#define TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE \
+  ix86_ira_callee_saved_register_cost_scale
  
  #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST

  #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 0de24eda6f0..9f42913a4ef 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/d

[Patch, fortran] PR118750 - [14/15 Regression] ICE on associate with elemental function with polymorphic array dummy argument

2025-02-06 Thread Paul Richard Thomas
This regression must have been generated during my ASSOCIATE campaign; I am
not sure when or where. Fortunately the fix is extremely simple and is
explained in the ChangeLog.

Regression tests fine. OK for both affected branches?

Paul


Change.Logs
Description: Binary data
diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index 7f73d53e31e..a961843edc4 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -10741,7 +10741,7 @@ resolve_assoc_var (gfc_symbol* sym, bool resolve_target)
 			  || gfc_is_ptr_fcn (target));
 
   /* Finally resolve if this is an array or not.  */
-  if (target->expr_type == EXPR_FUNCTION
+  if (target->expr_type == EXPR_FUNCTION && target->rank == 0
   && (sym->ts.type == BT_CLASS || sym->ts.type == BT_DERIVED))
 {
   gfc_expression_rank (target);
diff --git a/gcc/testsuite/gfortran.dg/associate_72.f90 b/gcc/testsuite/gfortran.dg/associate_72.f90
new file mode 100644
index 000..993ebdfd5a7
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/associate_72.f90
@@ -0,0 +1,26 @@
+! { dg-do compile }
+! { dg-options "-fdump-tree-original" }
+!
+! Test the fix for the 14/15 regression PR118750
+!
+! Contributed by Damian Rouson  
+!
+  implicit none
+
+  type string_t
+character(:), allocatable :: str
+  end type
+
+  associate(str_a => get_string([string_t ("abcd"),string_t ("ef")]))
+if (str_a(1)%str//str_a(2)%str /= "abcdef") STOP 1 ! Returned "Invalid array reference at (1)"
+  end associate
+
+contains
+
+  type(string_t) elemental function get_string(mold)
+class(string_t), intent(in) :: mold
+get_string = string_t(mold%str)
+  end function
+
+end 
+! { dg-final { scan-tree-dump-times "array01_string_t str_a" 1 "original" } }


Re: [PATCH v2] c++: Reject cdtors and conversion operators with a single * as return type [PR118306]

2025-02-06 Thread Jason Merrill

On 2/5/25 2:21 PM, Simon Martin wrote:

Hi Jason,

On 4 Feb 2025, at 21:23, Jason Merrill wrote:


On 2/4/25 3:03 PM, Jason Merrill wrote:

On 2/4/25 11:45 AM, Simon Martin wrote:

On 4 Feb 2025, at 17:17, Jason Merrill wrote:


On 2/4/25 10:56 AM, Simon Martin wrote:

Hi Jason,

On 4 Feb 2025, at 16:39, Jason Merrill wrote:


On 1/15/25 9:56 AM, Jason Merrill wrote:

On 1/15/25 7:24 AM, Simon Martin wrote:

Hi,

On 14 Jan 2025, at 23:31, Jason Merrill wrote:


On 1/14/25 2:13 PM, Simon Martin wrote:

On 10 Jan 2025, at 19:10, Andrew Pinski wrote:

On Fri, Jan 10, 2025 at 3:18 AM Simon Martin

wrote:


We currently accept the following invalid code (EDG and
MSVC
do
as
well)


clang does too:
https://github.com/llvm/llvm-project/issues/121706 .




Note it might be useful if a testcase with multiply `*` is
included



too:
```
struct A {
       A ();
};
```

Thanks, makes sense to add those. Done in the attached
updated
revision,
successfully tested on x86_64-pc-linux-gnu.



+/* Check that it's OK to declare a function at ID_LOC with
the
indicated TYPE,
+   TYPE_QUALS and DECLARATOR.  SFK indicates the kind of
special
function (if
+   any) that this function is.  OPTYPE is the type given
in
a
conversion
    operator declaration, or the class type for a
constructor/destructor.
    Returns the actual return type of the function;
that
may
be
different
    than TYPE if an error occurs, or for certain
special
functions.
*/
@@ -12361,8 +12362,19 @@ check_special_function_return_type
(special_function_kind sfk,
     tree type,
     tree optype,
     int type_quals,
+    const cp_declarator
*declarator,
+    location_t id_loc,


id_loc should be the same as declarator->id_loc?

You’re right.


     const
location_t*
locations)
     {
+  /* If TYPE is unspecified, DECLARATOR, if set, should not
represent a pointer
+ or a reference type.  */
+  if (type == NULL_TREE
+  && declarator
+  && (declarator->kind == cdk_pointer
+  || declarator->kind == cdk_reference))
+    error_at (id_loc, "expected unqualified-id before %qs
token",
+  declarator->kind == cdk_pointer ? "*" :
"&");


...and id_loc isn't the location of the ptr-operator, it's the



location of the identifier, so this indicates the wrong
column.
I
think using declarator->id_loc makes sense, just not
pretending
it's
the location of the *.

Good catch, thanks.


Let's give diagnostics more like the others later in the
function
instead of trying to emulate cp_parser_error.

Makes sense. This is what the updated patch does, successfully
tested on
x86_64-pc-linux-gnu. OK for GCC 16?


OK.


Does this also fix 118304?  If so, let's go ahead and apply it
to
GCC
15.

I have checked just now, and we still ICE for 118304’s testcase
with
that fix.


Why doesn't the preeexisting

type = void_type_node;

in check_special_function_return_type fix the return type and avoid



the ICE?



We hit the gcc_assert at method.cc:3593, that Marek’s fix



bypasses.


Yes, but why doesn't check_special_function_return_type prevent that?


Ah, because we call it before walking the declarator.  We need to
check again later, perhaps in grokfndecl, that the type is correct.
Perhaps instead of your patch.

One “issue” with adding another check in or close to grokfndecl is
that DECLARATOR will have “been moved to the ID”, and the fact that
we had a CDK_POINTER kind is “lost”. We could obviously somehow
propagate this information, but there might be something easier.


The information isn't lost: it's now reflected in the (wrong) return 
type.  One place it would make sense to check would be



if (ctype && (sfk == sfk_constructor
  || sfk == sfk_destructor))
  {
/* We are within a class's scope. If our declarator name 
   is the same as the class name, and we are defining
   a function, then it is a constructor/destructor, and  
   therefore returns a void type.  */


Here 'type' is still the return type, we haven't gotten to 
build_function_type yet.


Jason



Re: [RFC][libgcc][PATCH] Fix two issues in libgcc/unwind-dw2-fde.c.

2025-02-06 Thread Qing Zhao


> On Feb 6, 2025, at 09:25, Richard Biener  wrote:
> 
> On Thu, Feb 6, 2025 at 2:57 PM Qing Zhao  wrote:
>> 
>> 
>> 
>>> On Feb 6, 2025, at 04:36, Richard Biener  wrote:
>>> 
>>> On Wed, Feb 5, 2025 at 4:40 PM Qing Zhao  wrote:
 
 
 
> On Feb 5, 2025, at 07:46, Richard Biener  
> wrote:
> 
> On Tue, Feb 4, 2025 at 10:12 PM Qing Zhao  wrote:
>> 
>> Hi,
>> 
>> One of our big application segv in libgcc code while unwinding the stack.
>> 
>> This is a random crash while the application throws a c++ exception and
>> unwinds the stack. Incidents are random and never can be reproduced by 
>> any
>> test case.
>> 
>> The libgcc that is used is based on GCC 8.
>> 
>> Fortunately, we got some information through the stack trace, and 
>> narrowed
>> down the crash is in the routine "init_object" of 
>> libgcc/unwind-dw2-pde.c:
>> 
>> 777   count = classify_object_over_fdes (ob, ob->u.single);
>> 
>> when loading the 2nd parameter of the call to 
>> "classify_object_over_fdes".
>> i.e, the address that is pointed by "ob->u.single" is invalid.
>> 
>> And the outer caller we can track back is the following line 1066 of the 
>> routine
>> "_Unwind_Find_FDE":
>> 
>> 1060   /* Classify and search the objects we've not yet processed.  */
>> 1061   while ((ob = unseen_objects))
>> 1062 {
>> 1063   struct object **p;
>> 1064
>> 1065   unseen_objects = ob->next;
>> 1066   f = search_object (ob, pc);
>> 
>> Then we suspected that the construction of the static variable 
>> "unseen_objects"
>> might have some bug.
>> 
>> Then after studying the source code that constructs "unseen_objects" in
>> libgcc/unwind-dw2-pde.c, I found an issue in the routines 
>> "__register_frame"
>> and "__register_frame_table" as following:
>> 
>> 127 void
>> 128 __register_frame (void *begin)
>> 129 {
>> 130   struct object *ob;
>> 131
>> 132   /* If .eh_frame is empty, don't register at all.  */
>> 133   if (*(uword *) begin == 0)
>> 134 return;
>> 135
>> 136   ob = malloc (sizeof (struct object));
>> 137   __register_frame_info (begin, ob);
>> 138 }
>> 
>> 181 void
>> 182 __register_frame_table (void *begin)
>> 183 {
>> 184   struct object *ob = malloc (sizeof (struct object));
>> 185   __register_frame_info_table (begin, ob);
>> 186 }
>> 187
>> 
>> In the above, one obvious issue in the source code is at line 136, line 
>> 137,
>> and line 184 and line 185: the return value of call to "malloc" is not 
>> checked
>> against NULL before it was passed as the second parameter of the 
>> follwoing call.
>> 
>> This might cause unpredicted random behavior.
>> 
>> I checked the latest trunk GCC libgcc, the same issue is there too.
>> 
>> This patch is for the latest trunk GCC.
>> 
>> Please let me know any comments on this?
> 
> I think I've seen this elsewhere —
 
 Do you mean that you saw this problem (malloc return NULL during 
 register_frame) previously?
>>> 
>>> It was probably reported to us (SUSE) from a customer/partner, also
>>> from a (llvm?) JIT context.
>> 
>> Okay.
>> So, that bug has not been resolved yet?
> 
> It was resolved from our side IIRC as being a problem in the
> application with the JIT and
> otherwise a feature request (better unwind API for this use case,
> allowing a failure mode).

Okay, I see.

Thanks.

Qing
> 
> Richard.
> 
>> Qing
>>> 
> the issue is the unwind register API does
> not allow for failures but I also think calling abort() is bad.
 
 Okay, I see.
 
> 
> Are you calling this from a JIT context or so?
 
 I will ask the bug submitter on this to make sure.
 
> We're assuming that at program
> start malloc() will not fail.
 
 So, the routine __register_frame is only called at the _start_ of a 
 program?
> 
> The proper solution is probably to add an alternate ABI which has a way 
> to fail
> during registration.
 
 Any suggestion on this (or any other routine I can refer to?)
 
 
 Thanks a lot for the help.
 
 Qing
> 
> Richard.
> 
>> thanks.
>> 
>> Qing
>> 
>> 
>> ===
>> 
>> Fix two issues in libgcc/unwind-dw2-fde.c:
>> 
>> A. The returned value of call to malloc is not checked against NULL.
>> This might cause unpredicted behavior during stack unwinding.
>> 
>> B. Check for null begin parameter (as well as pointer to null) in
>> __register_frame_info_table_bases and __register_frame_table.
>> 
>> libgcc/ChangeLog:
>> 
>>  * unwind-dw2-fde.c (__register_frame): Check the return value of 
>> call
>>  to mal

Re: [PATCH] loop-iv, riscv: Fix get_biv_step_1 for RISC-V [PR117506]

2025-02-06 Thread Jeff Law




On 2/5/25 10:57 AM, Jakub Jelinek wrote:

Hi!

The following test ICEs on RISC-V at least latently since
r14-1622-g99bfdb072e67fa3fe294d86b4b2a9f686f8d9705 which added
RISC-V specific case to get_biv_step_1 to recognize also
({zero,sign}_extend:DI (plus:SI op0 op1))

The reason for the ICE is that op1 in this case is CONST_POLY_INT
which unlike the really expected VOIDmode CONST_INTs has its own
mode and still satisfies CONSTANT_P.
GET_MODE (rhs) (SImode) is different from outer_mode (DImode), so
the function later does
 *inner_step = simplify_gen_binary (code, outer_mode,
*inner_step, op1);
but that obviously ICEs because while *inner_step is either VOIDmode
or DImode, op1 has SImode.

The following patch fixes it by extending op1 using code so that
simplify_gen_binary can handle it.  Another option would be
to change the !CONSTANT_P (op1) 3 lines above this to
!CONST_INT_P (op1), I think it isn't very likely that we get something
useful from other constants there.

Bootstrapped/regtested on x86_64-linux and i686-linux (which doesn't
tell much because such code isn't encountered there, just that the
gcc.dg test at least works), but I have no way to test this on riscv
easily.  Could somebody test it there?  Or do you want the
!CONST_INT_P (op1) instead?

2025-02-05  Jakub Jelinek  

PR rtl-optimization/117506
* loop-iv.cc (get_biv_step_1): For {ZERO,SIGN}_EXTEND
of PLUS apply {ZERO,SIGN}_EXTEND to op1.

* gcc.dg/pr117506.c: New test.
* gcc.target/riscv/pr117506.c: New test.
LGTM.  While there was a regression reported in the run in my tester, it 
was actually from a patch from Richard S. that just landed which appears 
to need a trivial scan-asm adjustment.


Jeff



Re: [Patch, fortran] PR118750 - [14/15 Regression] ICE on associate with elemental function with polymorphic array dummy argument

2025-02-06 Thread Andre Vehreschild

Hi Paul,

looks fine to me.

Thanks for the patch,
Andre
Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen
Tel.: +49 178 3637538 * ve...@gmx.de
Am 6. Februar 2025 16:49:18 schrieb Paul Richard Thomas
:

This regression must have been generated during my ASSOCIATE campaign; I am
not sure when or where. Fortunately the fix is extremely simple and is
explained in the ChangeLog.

Regression tests fine. OK for both affected branches?

Paul




Re: [patch,avr] Add support for a Compact Vector Table

2025-02-06 Thread Denis Chertykov
Georg-Johann Lay  writes:

> Some AVR devices support a Compact Vector Table.
>
> The support is provided by means of a startup code file
> crt-cvt.o from AVR-LibC that can be linked instead
> of the traditional crt.o.
>
> This patch adds a new command line option -mcvt that links
> that CVT startup code (or issues an error when the device
> doesn't support a CVT).
>
> Ok for trunk?

Ok.

Denis


[PATCH] RISC-V: Fix ratio in vsetvl fuse rule [PR115703].

2025-02-06 Thread Robin Dapp
Hi,

in PR115703 we fuse two vsetvls:

Fuse curr info since prev info compatible with it:
  prev_info: VALID (insn 438, bb 2)
Demand fields: demand_ge_sew demand_non_zero_avl
SEW=32, VLMUL=m1, RATIO=32, MAX_SEW=64
TAIL_POLICY=agnostic, MASK_POLICY=agnostic
AVL=(reg:DI 0 zero)
VL=(reg:DI 9 s1 [312])
  curr_info: VALID (insn 92, bb 20)
Demand fields: demand_ratio_and_ge_sew demand_avl
SEW=64, VLMUL=m1, RATIO=64, MAX_SEW=64
TAIL_POLICY=agnostic, MASK_POLICY=agnostic
AVL=(const_int 4 [0x4])
VL=(nil)
  prev_info after fused: VALID (insn 438, bb 2)
Demand fields: demand_ratio_and_ge_sew demand_avl
SEW=64, VLMUL=mf2, RATIO=64, MAX_SEW=64
TAIL_POLICY=agnostic, MASK_POLICY=agnostic
AVL=(const_int 4 [0x4])
VL=(nil).

The result is vsetvl zero, zero, e64, mf2, ta, ma.  The previous vsetvl
set vl = 4 but here we wrongly set it to vl = 2.  As all the following
vsetvls only ever change the ratio we never recover.

The issue is quite difficult to trigger.  The last known bad commit is
r15-3458-g5326306e7d9d36.  With that commit the output is wrong but
-fno-schedule-insns makes it correct.  From the next commit on the issue is
latent.  I still added the PR's test as scan and run check even if they don't
trigger right now (and I'm not sure the run test will ever fail, but well).
I verified that the patch fixes the issue when applied on top of
r15-3458-g5326306e7d9d36.

Regtested on rv64gcv_zvl512b.  Let's see what the CI says.

Regards
 Robin

PR target/115703

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc: Use max_sew for calculating the
new LMUL.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr115703-run.c: New test.
* gcc.target/riscv/rvv/autovec/pr115703.c: New test.
---
 gcc/config/riscv/riscv-vsetvl.cc  |  3 +-
 .../riscv/rvv/autovec/pr115703-run.c  | 44 +++
 .../gcc.target/riscv/rvv/autovec/pr115703.c   | 38 
 3 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 72c4c59514e..82284624a24 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1756,7 +1756,8 @@ private:
   inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev,
const vsetvl_info &next)
   {
-prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
+int max_sew = MAX (prev.get_sew (), next.get_sew ());
+prev.set_vlmul (calculate_vlmul (max_sew, next.get_ratio ()));
 use_max_sew (prev, next);
 prev.set_ratio (next.get_ratio ());
   }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c
new file mode 100644
index 000..0c2c3d7d4fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-require-effective-target rvv_zvl256b_ok } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -fwhole-program 
-fwrapv" } */
+
+int a, i;
+unsigned long b;
+unsigned c, f;
+long long d = 1;
+short e, m;
+long g, h;
+
+__attribute__ ((noipa))
+void check (unsigned long long x)
+{
+  if (x != 13667643351234938049ull)
+__builtin_abort ();
+}
+
+int main() {
+  for (int q = 0; q < 2; q += 1) {
+for (short r = 0; r < 2; r += 1)
+  for (char s = 0; s < 6; s++)
+for (short t = 0; t < 011; t += 12081 - 12080)
+  for (short u = 0; u < 11; u++) {
+a = ({ a > 1 ? a : 1; });
+b = ({ b > 5 ? b : 5; });
+for (short j = 0; j < 2; j = 2080)
+  c = ({ c > 030 ? c : 030; });
+for (short k = 0; k < 2; k += 2080)
+  d *= 7;
+e *= 10807;
+f = ({ f > 3 ? f : 3; });
+  }
+for (int l = 0; l < 21; l += 1)
+  for (int n = 0; n < 16; n++) {
+g = ({ m ? g : m; });
+for (char o = 0; o < 7; o += 1)
+  h *= 3;
+i = ({ i < 0 ? i : 0; });
+  }
+  }
+
+  check (d);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c
new file mode 100644
index 000..fc147fefa98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -fwhole-program -fwrapv" } */
+
+int a, i;
+unsigned long b;
+unsigned c, f;
+long long d = 1;
+short e, m;
+long g, h;
+
+int main() {
+  for (int q = 0; q < 2; q += 1) {
+for (short r = 0; r < 2; r += 1)
+  for (char s = 0; s < 6; s++)
+for (short t = 0; t < 011; t

Re: [Patch, fortran] PR118750 - [14/15 Regression] ICE on associate with elemental function with polymorphic array dummy argument

2025-02-06 Thread Paul Richard Thomas
Hi Andre,

Thanks for the review - pushed as commit r15-7389.

Paul


On Thu, 6 Feb 2025 at 16:22, Andre Vehreschild  wrote:

> Hi Paul,
>
> looks fine to me.
>
> Thanks for the patch,
> Andre
> Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen
> Tel.: +49 178 3637538 * ve...@gmx.de
>
> Am 6. Februar 2025 16:49:18 schrieb Paul Richard Thomas <
> paul.richard.tho...@gmail.com>:
>
>> This regression must have been generated during my ASSOCIATE campaign; I
>> am not sure when or where. Fortunately the fix is extremely simple and is
>> explained in the ChangeLog.
>>
>> Regression tests fine. OK for both affected branches?
>>
>> Paul
>>
>>
>


Re: [Patch] [GCN] Handle generic ISA names in libgomp's plugin-gcn.c

2025-02-06 Thread Tobias Burnus

Updated version; changes:
* Fixed SRAMECC setting for gfx906 - should be ANY (as in GCC 14)
  not UNSUPPORTED. (Shows up as link error when compiling with '-g'.)

* Fixed generic handling for gfx9* - copy'n'paste bug: gfx11-generic
  is clearly wrong.

* Implemented autoconvert from -march=gfx to -march=gfx*-generic
  if the latter but not the former has configured run-time libraries.
  (with a warning).

* Largely improved the runtime diagnostic.

* It also uses non-deprecated functions for the loader - in the hope
  that it would help, but it makes no difference. As the hsa.h header
  shipping with GCC is quite old, I think changing it should be fine
  as I doubt that there is any libhsa*.so in use that doesn't support
  it. - On the other hand, as it doesn't solve a known issue, we
  don't have to update it.

* * *

Thus, it should work in principle. - BUT:

I have now a ROCm 6.3.2 available - and it DOES NOT work.
ROCm 6.3.2 supports gfx*generic as a grep shows.

Trying a hello-world program with -march=gfx9-generic and running it with
LOADER_ENABLE_LOGGING=1 set, it fails with: LoaderError: code object's 
ISA (amdgcn-amd-amdhsa--gfx9-generic) is invalid and returns 
HSA_STATUS_ERROR_INVALID_ISA_NAME.Looking at 
https://github.com/ROCm/ROCR-Runtime I have *no* idea why it fails - the 
name "amd-amdhsa--gfx9-generic"

should be in the map as generic name, there is nothing suspicious
with the lookup - and the fail is before there is any compatibility
check regarding the actual hardware, which is the next check.

'gfx11-generic' is a bit longer supported and fails identically.
(The hardware is gfx906, but that shouldn't matter.)

As I assume that someone has tested it, some variant should work.
The question is only what. The library as too many indirections
but all code is rather simple and I fail to see why it wouldn't
work. (Some code is not obvious but as gfx906 works, those
bits are probably fine.)

* * *
Thus, if anyone has an idea how to make progress from here ... BTW: The 
gfx906 SRAMECC fix should be committed separately and soon - before we 
forget it and before GCC 15 ships. Tobias
[GCN] Handle generic ISA names in libgomp's plugin-gcn.c

There are plenty of AMD GPUs, each having its own ISA; to reduce the number
of compile-for ISA, some of them were bundled under a generic name, supporting
either all features or a large subset of it. Initial support for this was
added in r15-4550-g1bdeebe69b71bf. On the assembler/linker side, it requires 
LLVM 19 - and on the runtime side, ROCm 6.3.x (released in December 2024).

This commit adds gfx9-generic alongside gfx10-3-generic and gfx11-generic
and all previously unsupported specific devices supported by either of
them, i.e. gfx902, gfx904, gfx909, gfx1031, gfx1032, gfx1033, gfx1034,
gfx1035, gfx1101, gfx1102, gfx1150, gfx1151, gfx1152, and gfx1153.
Those are marked as 'experimental' as they have not been tested and
also because not all features of, e.g., gfx115x are supported - only
those of the common subset provided as by gfx11-generic.

While gfx10-3-generic and gfx11-generic cover all GPUs, gfx9-generic
only covers a small subset - and, in particular, the compute MI100
and MI200 devices are not included.

On the runtime side, the assumption that all generic code will work
and only when the ROCm (actually: rocr) fails to load, a specific
error message is printed, providing some suggestions how to fix a
fail.

On the compiler side: In order to make it easier to ship only the
generic libraries, the compiler will fallback to the generic version
if a (multi)lib exists for it but not for the specific version.

gcc/ChangeLog:

	* config/gcn/gcn-devices.def (GCN_DEVICE):  Add field to for
	the generic ISA for the device.
	Fix sramecc setting of gfx906 - should be ANY not UNSUPPORTED.
	* config/gcn/gcn-tables.opt: Regenerate.
	* config/gcn/gcn.cc (gcn_devices): Add missing tailing ... to
	GCN_DEVICE macro #define.
	* config/gcn/mkoffload.cc (enum elf_arch_code): Add
	EF_AMDGPU_MACH_AMDGCN_NONE.
	(elf_arch): Use enum elf_arch_code as type.
	(tool_cleanup): Silence warning by removing tailing '.' from error.
	(get_arch_name): Return enum elf_arch_code.
	(elf_arch_generic_update): New; replace specific device by
	generic device if only the latter has a multilib.
	(main): Call it; replace -march= as needed.
	* doc/install.texi (AMD GCN): Document gfx*-generic handling
	and dependencies.
	* doc/invoke.texi (AMD GCN): Update -march= for
	gfx{9,10-3,11}-generic and added compatible gfx* devices.

libgomp/ChangeLog:

	* plugin/plugin-gcn.c (EF_AMDGPU_MACH): Add EF_AMDGPU_MACH_AMDGCN_NONE.
	(EF_AMDGPU_GENERIC_VERSION_V, EF_AMDGPU_GENERIC_VERSION_OFFSET,
	GET_GENERIC_VERSION): Define.
	(elf_gcn_isa_is_generic, get_generic_isa_code): New.
	(isa_matches_agent): Add 'failed' flag, if false, accept all generic
	if true, print extended diagnostic.
	(create_and_finalize_hsa_program): Call it also for
	ERROR_INVALID_CODE_OBJECT and output more diagnostic if the error is
	

[PATCH] [testsuite] [sparc] skip sparc-ret-1 with -mfix-ut699

2025-02-06 Thread Alexandre Oliva


Option -mfix-ut699 changes the set of instructions that can be placed
in the delay slot, preventing the expected insn placement.  Skip the
test if the option is present.

Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting
leon3-elf with -mfix-ut699.  Ok to install?


for  gcc/testsuite/ChangeLog

* gcc.target/sparc/sparc-ret-1.c: Skip on -mfix-ut699.
---
 gcc/testsuite/gcc.target/sparc/sparc-ret-1.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.target/sparc/sparc-ret-1.c 
b/gcc/testsuite/gcc.target/sparc/sparc-ret-1.c
index 808e8a98f0e89..ef459c5016e96 100644
--- a/gcc/testsuite/gcc.target/sparc/sparc-ret-1.c
+++ b/gcc/testsuite/gcc.target/sparc/sparc-ret-1.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-skip-if "no register windows" { *-*-* } { "-mflat" } { "" } } */
+/* { dg-skip-if "prevents expected asm" { *-*-* } { "-mfix-ut699" } { "" } } */
 /* { dg-require-effective-target ilp32 } */
 /* { dg-options "-mcpu=ultrasparc -O" } */
 

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH v3] [ifcombine] avoid creating out-of-bounds BIT_FIELD_REFs [PR118514]

2025-02-06 Thread Richard Biener
On Thu, Feb 6, 2025 at 2:41 PM Alexandre Oliva  wrote:
>
> On Jan 27, 2025, Richard Biener  wrote:
>
> > What I was saying that the conservative tree_could_trap_p could say
> > 'yes' to a certain encoding of a ref but 'no' to another if in reality
> > the ref can never trap.  We of course cannot (apart from bugs in
> > tree_could_trap_p) turn a for-sure trap into a not-trap by simply
> > rewriting the ref.
>
> I see.  Yeah, that makes sense.
>
> > So I think we want this bit in (and it's dependences), but
>
> 'k, I'll split that bit out, after some clarification:
>
> > (I see the assert is no longer in the patch).
>
> That's because the assert went in as part of an earlier patch.  I take
> it it should be backed out along with the to-be-split-out bits above,
> right?

Yes.

(IIRC there's also a PR tripping over this or a similar assert)

Richard.

>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> More tolerance and less prejudice are key for inclusion and diversity
> Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] c++: Fix up handling of for/while loops with declarations in condition [PR86769]

2025-02-06 Thread Jason Merrill

On 2/5/25 12:02 PM, Jakub Jelinek wrote:

Hi!

As the following testcase show (note, just for-{3,4,6,7,8}.C, constexpr-86769.C
and stmtexpr27.C FAIL without the patch, the rest is just that I couldn't
find coverage for some details and so added tests we don't regress or for5.C
is from Marek's attempt in the PR), we weren't correctly handling for/while
loops with declarations as conditions.

The C++ FE has the simplify_loop_decl_cond function which transforms
such loops as mentioned in the comment:
 while (A x = 42) { }
 for (; A x = 42;) { }
becomes
 while (true) { A x = 42; if (!x) break; }
 for (;;) { A x = 42; if (!x) break; }
For for loops this is not enough, as the x declaration should be
still in scope when expr (if any) is executed, and injecting the
expr expression into the body in the FE needs to have the continue
label in between, something normally added by the c-family
genericization.  One of my thoughts was to just add there an artificial
label plus the expr expression in the FE and tell c-family about that
label, so that it doesn't create it but uses what has been emitted.

Unfortunately break/continue are resolved to labels only at c-family
genericization time and by moving the condition (and its preparation
statements such as the DECL_EXPR) into the body (and perhaps by also
moving there the (increment) expr as well) we resolve incorrectly any
break/continue statement appearing in cond (or newly perhaps also expr)
expression(s).  While in standard C++ one can't have something like that
there, with statement expressions they are possible there, and we actually
have testsuite coverage that when they appear outside of the body of the
loop they bind to an outer loop rather than the inner one.  When the FE
moves everything into the body, c-family can't distinguish any more between
the user body vs. the condition/preparation statements vs. expr expression.

So, the following patch instead keeps them separate and does the merging
only at the c-family loop genericization time.  For that the patch
introduces two new operands of FOR_STMT and WHILE_STMT, *_COND_PREP
which is forced to be a BIND_EXPR which contains the preparation statements
like DECL_EXPR, and the initialization of that variable, so basically what
{FOR,WHILE}_BODY has when we encounter the function dealing with this,
except one optional CLEANUP_STMT at the end which holds cleanups for the
variable if it needs to be destructed.  This CLEANUP_STMT is removed and
the actual cleanup saved into another new operand, *_COND_CLEANUP.

The c-family loop genericization handles such loops roughly the way
https://eel.is/c++draft/stmt.for and https://eel.is/c++draft/stmt.while
specifies, so the body is (if *_COND_CLEANUP is non-NULL)
{ A x = 42; try { if (!x) break; body; cont_label: expr; } finally { cleanup; } 
}
and otherwise
{ A x = 42; if (!x) break; body; cont_label: expr; }
i.e. the *_COND, *_BODY, optional continue label, FOR_EXPR  are appended
into the body of the *_COND_PREP BIND_EXPR.

And when doing constexpr evaluation of such FOR/WHILE loops, we treat
it similarly, first evaluate *_COND_PREP except the
   for (tree decl = BIND_EXPR_VARS (t); decl; decl = DECL_CHAIN (decl))
 destroy_value_checked (ctx, decl, non_constant_p);
part of BIND_EXPR handling for it, then evaluate *_COND (and decide based
on whether it was false or true like before), then *_BODY, then FOR_EXPR,
then *_COND_CLEANUP (similarly to the way how CLEANUP_STMT handling handles
that) and finally do those destroy_value_checked.

Note, the constexpr-86769.C testcase FAILs with both clang++ and MSVC (note,
the rest of tests PASS with clang++) but I believe it must be just a bug
in those compilers, new int is done in all the constructors and delete is
done in the destructor, so when clang++ reports one of the new int weren't
deallocated during constexpr evaluation I don't see how that would be
possible.  When the same test has all the constexpr stuff, all the new int
are properly deleted at runtime when compiled by both compilers and valgrind
is happy about it, no leaks.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-02-05  Jakub Jelinek  

PR c++/86769
gcc/c-family/
* c-common.def (FOR_STMT): Add 2 operands and document them.
(WHILE_STMT): Likewise.
* c-common.h (WHILE_COND_PREP, WHILE_COND_CLEANUP): Define.
(FOR_COND_PREP, FOR_COND_CLEANUP): Define.
* c-gimplify.cc (genericize_c_loop): Add COND_PREP and COND_CLEANUP
arguments, handle them if they are non-NULL.
(genericize_for_stmt, genericize_while_stmt, genericize_do_stmt):
Adjust callers.
gcc/c/
* c-parser.cc (c_parser_while_statement): Add 2 further NULL_TREE
operands to build_stmt.
(c_parser_for_statement): Likewise.
gcc/cp/
* semantics.cc (simplify_loop_decl_cond): Remove.
(adjust_loop_decl_cond): New function.
 

[PATCH] c++, v2: Fix up handling of for/while loops with declarations in condition [PR86769]

2025-02-06 Thread Jakub Jelinek
On Thu, Feb 06, 2025 at 10:40:52AM -0500, Jason Merrill wrote:
Thanks.

> > @@ -7202,6 +7210,28 @@ cxx_eval_loop_expr (const constexpr_ctx
> > cxx_eval_constant_expression (ctx, expr, vc_prvalue,
> >   non_constant_p, overflow_p,
> >   jump_target);
> > +
> > + if (cond_cleanup && !*non_constant_p)
> > +   {
> > + iloc_sentinel ils (loc);
> 
> What does this do, given that you're setting the locations on the cleanups?
> 
> I guess this was copied from the CLEANUP_STMT handling, but I'm not sure why
> it's there either; commenting it out there doesn't seem to break any
> constexpr* tests.

Indeed, it also looked weird to me.

> Let's share this repeated code with the copy below by using a lambda, as in
> the attached.

So like this?

So far tested with
GXX_TESTSUITE_STDS=98,11,14,17,20,23,26 make check-g++ \
RUNTESTFLAGS="debug.exp=cleanup1.C dg.exp='name-independent-decl*.C 
redeclaration-*.C pr18770.C for*.C stmtexpr* constexpr-86769.C' 
old-deja.exp=cond.C"
Ok for trunk if it passes full bootstrap/regtest?

2025-02-06  Jakub Jelinek  
Jason Merrill  

PR c++/86769
gcc/c-family/
* c-common.def (FOR_STMT): Add 2 operands and document them.
(WHILE_STMT): Likewise.
* c-common.h (WHILE_COND_PREP, WHILE_COND_CLEANUP): Define.
(FOR_COND_PREP, FOR_COND_CLEANUP): Define.
* c-gimplify.cc (genericize_c_loop): Add COND_PREP and COND_CLEANUP
arguments, handle them if they are non-NULL.
(genericize_for_stmt, genericize_while_stmt, genericize_do_stmt):
Adjust callers.
gcc/c/
* c-parser.cc (c_parser_while_statement): Add 2 further NULL_TREE
operands to build_stmt.
(c_parser_for_statement): Likewise.
gcc/cp/
* semantics.cc (set_one_cleanup_loc): New function.
(set_cleanup_locs): Use it.
(simplify_loop_decl_cond): Remove.
(adjust_loop_decl_cond): New function.
(begin_while_stmt): Add 2 further NULL_TREE operands to build_stmt.
(finish_while_stmt_cond): Call adjust_loop_decl_cond instead of
simplify_loop_decl_cond.
(finish_while_stmt): Call do_poplevel also on WHILE_COND_PREP if
non-NULL and also use pop_stmt_list rather than do_poplevel for
WHILE_BODY in that case.  Call set_one_cleanup_loc.
(begin_for_stmt): Add 2 further NULL_TREE operands to build_stmt.
(finish_for_cond): Call adjust_loop_decl_cond instead of
simplify_loop_decl_cond.
(finish_for_stmt): Call do_poplevel also on FOR_COND_PREP if non-NULL
and also use pop_stmt_list rather than do_poplevel for FOR_BODY in
that case.  Call set_one_cleanup_loc.
* constexpr.cc (cxx_eval_loop_expr): Handle
{WHILE,FOR}_COND_{PREP,CLEANUP}.
(check_for_return_continue): Handle {WHILE,FOR}_COND_PREP.
(potential_constant_expression_1): RECUR on
{WHILE,FOR}_COND_{PREP,CLEANUP}.
gcc/testsuite/
* g++.dg/diagnostic/redeclaration-7.C: New test.
* g++.dg/expr/for3.C: New test.
* g++.dg/expr/for4.C: New test.
* g++.dg/expr/for5.C: New test.
* g++.dg/expr/for6.C: New test.
* g++.dg/expr/for7.C: New test.
* g++.dg/expr/for8.C: New test.
* g++.dg/ext/stmtexpr27.C: New test.
* g++.dg/cpp2a/constexpr-86769.C: New test.
* g++.dg/cpp26/name-independent-decl7.C: New test.
* g++.dg/cpp26/name-independent-decl8.C: New test.

--- gcc/c-family/c-common.def.jj2025-02-05 13:14:34.464202519 +0100
+++ gcc/c-family/c-common.def   2025-02-06 18:50:18.191080462 +0100
@@ -58,13 +58,14 @@ DEFTREECODE (SIZEOF_EXPR, "sizeof_expr",
 DEFTREECODE (PAREN_SIZEOF_EXPR, "paren_sizeof_expr", tcc_expression, 1)
 
 /* Used to represent a `for' statement. The operands are
-   FOR_INIT_STMT, FOR_COND, FOR_EXPR, FOR_BODY, FOR_SCOPE, and FOR_NAME
-   respectively.  */
-DEFTREECODE (FOR_STMT, "for_stmt", tcc_statement, 6)
+   FOR_INIT_STMT, FOR_COND, FOR_EXPR, FOR_BODY, FOR_SCOPE, FOR_NAME,
+   FOR_COND_PREP and FOR_COND_CLEANUP, respectively.  */
+DEFTREECODE (FOR_STMT, "for_stmt", tcc_statement, 8)
 
 /* Used to represent a 'while' statement. The operands are WHILE_COND,
-   WHILE_BODY, and WHILE_NAME, respectively.  */
-DEFTREECODE (WHILE_STMT, "while_stmt", tcc_statement, 3)
+   WHILE_BODY, WHILE_NAME, WHILE_COND_PREP and WHILE_COND_CLEANUP,
+   respectively.  */
+DEFTREECODE (WHILE_STMT, "while_stmt", tcc_statement, 5)
 
 /* Used to represent a 'do' statement. The operands are DO_COND, DO_BODY,
and DO_NAME, respectively.  */
--- gcc/c-family/c-common.h.jj  2025-02-05 13:14:34.505201941 +0100
+++ gcc/c-family/c-common.h 2025-02-06 18:50:18.198080368 +0100
@@ -1517,10 +1517,13 @@ extern tree build_userdef_literal (tree
 
 
 /* WHILE_STMT accessors.  These give access to the condition of the
-   while statement, the body and name of the while statement, res

[PATCH] c++: Allow constexpr reads from volatile std::nullptr_t objects [PR118661]

2025-02-06 Thread Jakub Jelinek
Hi!

As mentioned in the PR, https://eel.is/c++draft/conv.lval#note-1
says that even volatile reads from std::nullptr_t typed objects actually
don't read anything and https://eel.is/c++draft/expr.const#10.9
says that even those are ok in constant expressions.

So, the following patch adjusts the r9-4793 changes to have an exception
for NULLPTR_TYPE.
As [conv.lval]/3 also talks about accessing to inactive member, I've added
testcase to cover that as well.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-02-06  Jakub Jelinek  

PR c++/118661
* constexpr.cc (potential_constant_expression_1): Don't diagnose
lvalue-to-rvalue conversion of volatile lvalue if it has NULLPTR_TYPE.
* decl2.cc (decl_maybe_constant_var_p): Return true for constexpr
decls with NULLPTR_TYPE even if they are volatile.

* g++.dg/cpp0x/constexpr-volatile4.C: New test.
* g++.dg/cpp0x/constexpr-union9.C: New test.

--- gcc/cp/constexpr.cc.jj  2025-02-05 13:14:34.771198185 +0100
+++ gcc/cp/constexpr.cc 2025-02-06 09:53:03.236587121 +0100
@@ -9717,7 +9717,8 @@ potential_constant_expression_1 (tree t,
 return true;
 
   if (TREE_THIS_VOLATILE (t) && want_rval
-  && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t)))
+  && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t))
+  && TREE_CODE (TREE_TYPE (t)) != NULLPTR_TYPE)
 {
   if (flags & tf_error)
constexpr_error (loc, fundef_p, "lvalue-to-rvalue conversion of "
--- gcc/cp/decl2.cc.jj  2025-01-27 16:45:45.455970792 +0100
+++ gcc/cp/decl2.cc 2025-02-06 09:53:53.150890600 +0100
@@ -4985,7 +4985,8 @@ decl_maybe_constant_var_p (tree decl)
   tree type = TREE_TYPE (decl);
   if (!VAR_P (decl))
 return false;
-  if (DECL_DECLARED_CONSTEXPR_P (decl) && !TREE_THIS_VOLATILE (decl))
+  if (DECL_DECLARED_CONSTEXPR_P (decl)
+  && (!TREE_THIS_VOLATILE (decl) || TREE_CODE (type) == NULLPTR_TYPE))
 return true;
   if (DECL_HAS_VALUE_EXPR_P (decl))
 /* A proxy isn't constant.  */
--- gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C.jj 2025-02-06 
09:50:43.339539282 +0100
+++ gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C2025-02-06 
09:50:16.071919784 +0100
@@ -0,0 +1,20 @@
+// PR c++/118661
+// { dg-do compile { target c++11 } }
+
+using nullptr_t = decltype (nullptr);
+constexpr volatile nullptr_t a = {};
+constexpr nullptr_t b = a;
+
+constexpr nullptr_t
+foo ()
+{
+#if __cplusplus >= 201402L
+  volatile nullptr_t c = {};
+  return c;
+#else
+  return nullptr;
+#endif
+}
+
+static_assert (b == nullptr, "");
+static_assert (foo () == nullptr, "");
--- gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C.jj2025-02-06 
09:57:46.149639270 +0100
+++ gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C   2025-02-06 
10:01:08.472815988 +0100
@@ -0,0 +1,16 @@
+// PR c++/118661
+// { dg-do compile { target c++11 } }
+
+using nullptr_t = decltype (nullptr);
+union U { int i; nullptr_t n; };
+constexpr U u = { 42 };
+static_assert (u.n == nullptr, "");
+
+#if __cplusplus >= 201402L
+constexpr nullptr_t
+foo ()
+{
+  union U { int i; nullptr_t n; } u = { 42 };
+  return u.n;
+}
+#endif

Jakub



[PATCH] c++: Fix up name independent decl in structured binding handling in range for [PR115586]

2025-02-06 Thread Jakub Jelinek
Hi!

cp_parser_range_for temporarily reverts IDENTIFIER_BINDING changes
to hide the decls from the structured bindings from lookup during
parsing of the expression after :
If there are 2 or more name independent decls, we undo IDENTIFIER_BINDING
for the same name multiple times, even when just one has been added
(with a TREE_LIST inside of it as decl).

The following patch fixes it by handling the _ name at most once, the
later loop will DTRT then and just reinstall the temporarily hidden
binding with the TREE_LIST in there.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-02-06  Jakub Jelinek  

PR c++/115586
* parser.cc (cp_parser_range_for): For name independent decls in
structured bindings, only push the name/binding once per
structured binding.

* g++.dg/cpp26/name-independent-decl9.C: New test.
* g++.dg/cpp26/name-independent-decl10.C: New test.

--- gcc/cp/parser.cc.jj 2025-02-04 21:14:50.0 +0100
+++ gcc/cp/parser.cc2025-02-06 11:01:49.018050841 +0100
@@ -14570,9 +14570,19 @@ cp_parser_range_for (cp_parser *parser,
  decomp = &decomp_d;
  decomp->count = tree_to_uhwi (TREE_OPERAND (v, 1)) + 1;
  decomp->decl = d;
+ bool seen_name_independent_decl = false;
  for (unsigned int i = 0; i < decomp->count;
   i++, d = DECL_CHAIN (d))
{
+ if (name_independent_decl_p (d))
+   {
+ /* If there is more than one _ decl in
+the structured binding, just push and move it
+away once.  */
+ if (seen_name_independent_decl)
+   continue;
+ seen_name_independent_decl = true;
+   }
  tree name = DECL_NAME (d);
  names.safe_push (name);
  bindings.safe_push (IDENTIFIER_BINDING (name));
--- gcc/testsuite/g++.dg/cpp26/name-independent-decl9.C.jj  2025-02-06 
11:09:15.450816690 +0100
+++ gcc/testsuite/g++.dg/cpp26/name-independent-decl9.C 2025-02-06 
11:19:01.704625049 +0100
@@ -0,0 +1,49 @@
+// PR c++/115586
+// { dg-do compile { target c++11 } }
+// { dg-options "-Wunused-variable -Wunused-but-set-variable 
-Wunused-parameter -Wshadow" }
+
+struct S { int a, b, c; };
+
+void
+foo ()
+{
+  S s[4] = {};
+  for (auto [_, _, a] : s) // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+++a;   // { dg-warning "structured bindings only 
available with" "" { target c++14_down } .-1 }
+  for (auto _ : s)
+++_.b;
+  for (auto [a, _, b] : s) // { dg-warning "structured bindings only 
available with" "" { target c++14_down } }
+++a;
+  for (auto [_, _, a] : s) // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+int _ = ++a;   // { dg-warning "structured bindings only 
available with" "" { target c++14_down } .-1 }
+  for (auto _ : s)
+{
+  int _ = 2;   // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+}
+  for (auto [a, _, b] : s) // { dg-warning "structured bindings only 
available with" "" { target c++14_down } }
+int _ = ++b;   // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+}
+
+void
+bar ()
+{
+  S s[4] = {};
+  auto [_, c, _] = s[0];   // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+  ++c; // { dg-warning "structured bindings only 
available with" "" { target c++14_down } .-1 }
+  for (auto [a, _, _] : s) // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+++a;   // { dg-warning "structured bindings only 
available with" "" { target c++14_down } .-1 }
+  for (auto _ : s)
+++_.c;
+  for (auto [a, b, _] : s) // { dg-warning "structured bindings only 
available with" "" { target c++14_down } }
+++b;
+  for (auto [a, _, _] : s) // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+{  // { dg-warning "structured bindings only 
available with" "" { target c++14_down } .-1 }
+  int _ = ++a;
+}
+  for (auto _ : s)
+int _ = 5; // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+  for (auto [a, b, _] : s) // { dg-warning "structured bindings only 
available with" "" { target c++14_down } }
+{
+  int _ = a + b;   // { dg-warning "name-independent declarations 
only available with" "" { target c++23_down } }
+}
+}
--- gcc/testsuite/g++.dg/cpp26/name-independent-decl10.C.jj 2025-02-06 
11:23:38.787751260 +0100
+++ gcc/testsuit