[PATCH]middle-end: Remove unused internal function after IVopts cleanup [PR118756]
Hi All, It seems that after my IVopts patches the function contain_complex_addr_expr became unused and clang is rightfully complaining about it. This removes the unused internal function. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: PR tree-optimization/19202 * tree-ssa-loop-ivopts.cc (contain_complex_addr_expr): Remove. --- diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc index 989321137df93349f8395a9f746f73eda52fd74f..e37b24062f730e490ae86bd9248e61102ad1a0f0 100644 --- a/gcc/tree-ssa-loop-ivopts.cc +++ b/gcc/tree-ssa-loop-ivopts.cc @@ -1149,34 +1149,6 @@ determine_base_object (struct ivopts_data *data, tree expr) return obj; } -/* Return true if address expression with non-DECL_P operand appears - in EXPR. */ - -static bool -contain_complex_addr_expr (tree expr) -{ - bool res = false; - - STRIP_NOPS (expr); - switch (TREE_CODE (expr)) -{ -case POINTER_PLUS_EXPR: -case PLUS_EXPR: -case MINUS_EXPR: - res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0)); - res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1)); - break; - -case ADDR_EXPR: - return (!DECL_P (TREE_OPERAND (expr, 0))); - -default: - return false; -} - - return res; -} - /* Allocates an induction variable with given initial value BASE and step STEP for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */ -- diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc index 989321137df93349f8395a9f746f73eda52fd74f..e37b24062f730e490ae86bd9248e61102ad1a0f0 100644 --- a/gcc/tree-ssa-loop-ivopts.cc +++ b/gcc/tree-ssa-loop-ivopts.cc @@ -1149,34 +1149,6 @@ determine_base_object (struct ivopts_data *data, tree expr) return obj; } -/* Return true if address expression with non-DECL_P operand appears - in EXPR. */ - -static bool -contain_complex_addr_expr (tree expr) -{ - bool res = false; - - STRIP_NOPS (expr); - switch (TREE_CODE (expr)) -{ -case POINTER_PLUS_EXPR: -case PLUS_EXPR: -case MINUS_EXPR: - res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0)); - res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1)); - break; - -case ADDR_EXPR: - return (!DECL_P (TREE_OPERAND (expr, 0))); - -default: - return false; -} - - return res; -} - /* Allocates an induction variable with given initial value BASE and step STEP for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
อยากได้เงินก่อน แบบผ่อนเบาๆ ต้องสินเชื่อ SME เจริญทรัพย์ วงเงินสูง อนุมัติไว โทร 0️⃣9️⃣5️⃣8️⃣4️⃣3️⃣8️⃣6️⃣0️⃣4️⃣ คุณ ภัทร
บริษัท เจริญทรัพย์ เครดิต สินเชื่อเพื่อธุรกิจสำหรับเจ้าของกิจการ - อนุมัติง่าย รู้ผลไว (สอบถามฟรี) LINE: big7809
[PATCH] vect: Move induction IV increments [PR110449]
In this PR, we used to generate: .L6: mov v30.16b, v31.16b faddv31.4s, v31.4s, v27.4s faddv29.4s, v30.4s, v28.4s stp q30, q29, [x0] add x0, x0, 32 cmp x1, x0 bne .L6 for an unrolled induction in: for (int i = 0; i < 1024; i++) { arr[i] = freq; freq += step; } with the problem being the unnecessary MOV. The main induction IV was incremented by VF * step == 2 * nunits * step, and then nunits * step was added for the second store to arr. The original patch for the PR (r14-2367-g224fd59b2dc8) avoided the MOV by incrementing the IV by nunits * step twice. The problem with that approach is that it doubles the loop-carried latency. This change was deliberately not preserved when moving from loop-vect to SLP and so the test started failing again after r15-3509-gd34cda720988. I think the main problem is that we put the IV increment in the wrong place. Normal IVs created by create_iv are placed before the exit condition where possible, but vectorizable_induction instead always inserted them at the start of the loop body. The only use of the incremented IV is by the phi node, so the effect is to make both the old and new IV values live for the whole loop body, which is why we need the MOV. The simplest fix therefore seems to be to reuse the create_iv logic. Bootstrapped & regression-tested on aarch64-linus-gnu and x86_64-linux-gnu. As Richi suggested, I also tested vect.exp using unix/-mavx512f/--param=vect-partial-vector-usage=2. OK to install? Richard gcc/ PR tree-optimization/110449 * tree-ssa-loop-manip.h (insert_iv_increment): Declare. * tree-ssa-loop-manip.cc (insert_iv_increment): New function, split out from... (create_iv): ...here and generalized to gimple_seqs. * tree-vect-loop.cc (vectorizable_induction): Use standard_iv_increment_position and insert_iv_increment to insert the IV increment. gcc/testsuite/ PR tree-optimization/110449 * gcc.target/aarch64/pr110449.c: Expect an increment by 8.0, but test that there is no MOV. --- gcc/testsuite/gcc.target/aarch64/pr110449.c | 25 + gcc/tree-ssa-loop-manip.cc | 62 - gcc/tree-ssa-loop-manip.h | 1 + gcc/tree-vect-loop.cc | 6 +- 4 files changed, 57 insertions(+), 37 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/pr110449.c b/gcc/testsuite/gcc.target/aarch64/pr110449.c index bb3b6dcfe08..51ca3f4b816 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr110449.c +++ b/gcc/testsuite/gcc.target/aarch64/pr110449.c @@ -1,8 +1,10 @@ /* { dg-do compile } */ /* { dg-options "-Ofast -mcpu=neoverse-n2 --param aarch64-vect-unroll-limit=2" } */ -/* { dg-final { scan-assembler-not "8.0e\\+0" } } */ +/* { dg-final { scan-assembler {, #?8.0e\+0} } } */ +/* { dg-final { scan-assembler-not {\tmov\tv} } } */ -/* Calcualte the vectorized induction with smaller step for an unrolled loop. +/* Insert the induction IV updates before the exit condition, rather than + at the start of the loop body. before (suggested_unroll_factor=2): fmovs30, 8.0e+0 @@ -19,15 +21,16 @@ bne .L6 after: - fmovs31, 4.0e+0 - dup v29.4s, v31.s[0] - .L6: - faddv30.4s, v31.4s, v29.4s - stp q31, q30, [x0] - add x0, x0, 32 - faddv31.4s, v29.4s, v30.4s - cmp x0, x1 - bne .L6 */ + fmovs31, 8.0e+0 + fmovs29, 4.0e+0 + dup v31.4s, v31.s[0] + dup v29.4s, v29.s[0] + .L2: + faddv30.4s, v0.4s, v29.4s + stp q0, q30, [x0], 32 + faddv0.4s, v0.4s, v31.4s + cmp x1, x0 + bne .L2 */ void foo2 (float *arr, float freq, float step) diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc index 6ceb9df370b..2907fa6532d 100644 --- a/gcc/tree-ssa-loop-manip.cc +++ b/gcc/tree-ssa-loop-manip.cc @@ -47,6 +47,39 @@ along with GCC; see the file COPYING3. If not see so that we can free them all at once. */ static bitmap_obstack loop_renamer_obstack; +/* Insert IV increment statements STMTS before or after INCR_POS; + AFTER selects which. INCR_POS and AFTER can be computed using + standard_iv_increment_position. */ + +void +insert_iv_increment (gimple_stmt_iterator *incr_pos, bool after, +gimple_seq stmts) +{ + /* Prevent the increment from inheriting a bogus location if it is not put + immediately after a statement whose location is known. */ + if (after) +{ + gimple_stmt_iterator gsi = *incr_pos; + if (!gsi_end_p (gsi)) + gsi_next_nondebug (&gsi); + if (gsi_end_p (gsi)) + { + edge e = single_succ_edge (gsi_bb (*incr_pos)); + gimple_seq_set_location (stmts, e->
Re: [PATCH] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
On 31/01/25 8:24 am, Michael Meissner wrote: > Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. > > In bug PR target/118541 on power9, power10, and power11 systems, for the > function: > > extern double __ieee754_acos (double); > > double > __acospi (double x) > { > double ret = __ieee754_acos (x) / 3.14; > return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; > } > > GCC currently generates the following code: > > Power9 Power10 and Power11 > == === > bl __ieee754_acos bl __ieee754_acos@notoc > nop plfd 0,.LC0@pcrel > addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 > addi 1,1,32 addi 1,1,32 > lfd 0,.LC2@toc@l(9) ld 0,16(1) > addis 9,2,.LC0@toc@ha fdiv 0,1,0 > ld 0,16(1) mtlr 0 > lfd 12,.LC0@toc@l(9)xscmpgtdp 1,0,12 > fdiv 0,1,0 xxsel 1,0,12,1 > mtlr 0 blr > xscmpgtdp 1,0,12 > xxsel 1,0,12,1 > blr > > This is because ifcvt.c optimizes the conditional floating point move to use > the > XSCMPGTDP instruction. > > However, the XSCMPGTDP instruction will generate an interrupt if one of the > arguments is a signalling NaN and signalling NaNs can generate an interrupt. > The IEEE comparison functions (isgreater, etc.) require that the comparison > not > raise an interrupt. Hi Mike, Both ordered and unordered compare instructions set the VXSNAN bit. The difference is that if either operand is an SNaN and the VE bit is 0, then the ordered instruction sets the VXVC bit. The ordered and unordered compare instructions also differ in how QNaN is handled. If either operand is a QNaN, the VXVC exception bit is set by the ordered instructions. Regards, Surya
Re: The COBOL front end, in 8 notes + toplevel patch
On 06.02.25 00:36, Robert Dubner wrote: -Original Message- From: Matthias Klose Sent: Tuesday, December 17, 2024 04:26 To: Joseph Myers Cc: gcc-patches@gcc.gnu.org; James K. Lowden Subject: Re: The COBOL front end, in 8 notes + toplevel patch On 17.12.24 00:58, Joseph Myers wrote: On Mon, 16 Dec 2024, Matthias Klose wrote: On 14.12.24 15:38, Matthias Klose wrote: I tried to use the patches to build binary packages for Debian. Found some issues: tried to build libgcobol on more architectures, please find the attached patch to disable building libgcobol on some architectures. Enabling on x86_64-*-linux* and disabling on i?86-*-linux* is inherently suspect since the difference between those is only about what the default multilib is, and says nothing about the multilib used for a particular compilation of libgcobol. (Of course we first need to fix multilib support for libgcobol if that isn't working correctly - all target libraries in GCC need proper multilib support.) why is this suspect? looking at libtsan and libhwasan, these are only built for the 64bit variant, not for 32 and x32. Matthias Matthias, we've put your suggested configure patches into place. And now I am exploring exactly the issue you mention here. The -m32 compilation on a x86_64 platform is failing, because the -m32 header files don't know anything about the __int128 declarations we're using. I've been looking, and I'll continue to look, but if you can point me in the direction of suppressing libgcobol compilations for -m32 and -m32x, you might save me a lot of reverse engineering. have a look at libsanitizer/configure.tgt. The check for test x$ac_cv_sizeof_void_p = x8 should work in a similar way. Matthias
Re: [pushed][PATCH] LoongArch: Fix ICE caused by illegal calls to builtin functions [PR118561].
Pushed to r14-11275 and r15-7386. 在 2025/1/23 上午11:44, Lulu Cheng 写道: PR target/118561 gcc/ChangeLog: * config/loongarch/loongarch-builtins.cc (loongarch_expand_builtin_lsx_test_branch): NULL_RTX will not be returned when an error is detected. (loongarch_expand_builtin): Likewise. gcc/testsuite/ChangeLog: * gcc.target/loongarch/pr118561.c: New test. --- gcc/config/loongarch/loongarch-builtins.cc| 7 +-- gcc/testsuite/gcc.target/loongarch/pr118561.c | 9 + 2 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/pr118561.c diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc index 92d995a916a..1849b35357c 100644 --- a/gcc/config/loongarch/loongarch-builtins.cc +++ b/gcc/config/loongarch/loongarch-builtins.cc @@ -2996,7 +2996,10 @@ loongarch_expand_builtin_lsx_test_branch (enum insn_code icode, tree exp) ops[1].value = force_reg (ops[1].mode, ops[1].value); if ((cbranch = maybe_gen_insn (icode, 3, ops)) == NULL_RTX) -error ("failed to expand built-in function"); +{ + error ("failed to expand built-in function"); + return const0_rtx; +} cmp_result = gen_reg_rtx (SImode); @@ -3036,7 +3039,7 @@ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, { error_at (EXPR_LOCATION (exp), "built-in function %qD is not enabled", fndecl); - return target; + return target ? target : const0_rtx; } switch (d->builtin_type) diff --git a/gcc/testsuite/gcc.target/loongarch/pr118561.c b/gcc/testsuite/gcc.target/loongarch/pr118561.c new file mode 100644 index 000..81a776eada3 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr118561.c @@ -0,0 +1,9 @@ +/* PR target/118561: ICE with -mfpu=none */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=loongarch64 -mfpu=none" } */ + +int +test (void) +{ + return __builtin_loongarch_movfcsr2gr (0); /* { dg-error "built-in function '__builtin_loongarch_movfcsr2gr' is not enabled" } */ +}
[patch,avr] Add support for a Compact Vector Table
Some AVR devices support a Compact Vector Table. The support is provided by means of a startup code file crt-cvt.o from AVR-LibC that can be linked instead of the traditional crt.o. This patch adds a new command line option -mcvt that links that CVT startup code (or issues an error when the device doesn't support a CVT). Ok for trunk? Johann -- AVR: Add support for a Compact Vector Table (CVT). Some AVR devices support a CVT: - Devices from the 0-series, 1-series, 2-series. - AVR16, AVR32, AVR64, AVR128 devices. The support is provided by means of a startup code file crt-cvt.o from AVR-LibC that can be linked instead of the traditional crt.o. This patch adds a new command line option -mcvt that links that CVT startup code (or issues an error when the device doesn't support a CVT). PR target/118764 gcc/ * config/avr/avr.opt (-mcvt): New target option. * config/avr/avr-arch.h (AVR_CVT): New enum value. * config/avr/avr-mcus.def: Add AVR_CVT flag for devices that support it. * config/avr/avr.cc (avr_handle_isr_attribute) [TARGET_CVT]: Issue an error when a vector number larger that 3 is used. * config/avr/gen-avr-mmcu-specs.cc (McuInfo.have_cvt): New property. (print_mcu) <*avrlibc_startfile>: Use crt-cvt.o depending on -mcvt (or issue an error when the device doesn't support a CVT). * doc/invoke.texi (AVR Options): Document -mcvt.AVR: Add support for a Compact Vector Table (CVT). Some AVR devices support a CVT: - Devices from the 0-series, 1-series, 2-series. - AVR16, AVR32, AVR64, AVR128 devices. The support is provided by means of a startup code file crt-cvt.o from AVR-LibC that can be linked instead of the traditional crt.o. This patch adds a new command line option -mcvt that links that CVT startup code (or issues an error when the device doesn't support a CVT). PR target/118764 gcc/ * config/avr/avr.opt (-mcvt): New target option. * config/avr/avr-arch.h (AVR_CVT): New enum value. * config/avr/avr-mcus.def: Add AVR_CVT flag for devices that support it. * config/avr/avr.cc (avr_handle_isr_attribute) [TARGET_CVT]: Issue an error when a vector number larger that 3 is used. * config/avr/gen-avr-mmcu-specs.cc (McuInfo.have_cvt): New property. (print_mcu) <*avrlibc_startfile>: Use crt-cvt.o depending on -mcvt (or issue an error when the device doesn't support a CVT). * doc/invoke.texi (AVR Options): Document -mcvt. diff --git a/gcc/config/avr/avr-arch.h b/gcc/config/avr/avr-arch.h index b5b3606c0d1..efd7b146574 100644 --- a/gcc/config/avr/avr-arch.h +++ b/gcc/config/avr/avr-arch.h @@ -158,6 +158,14 @@ AVR_ERRATA_SKIP http://www.atmel.com/dyn/resources/prod_documents/doc2494.pdf http://www.atmel.com/dyn/resources/prod_documents/doc1436.pdf +AVR_CVT + The device supports a CVT (Compact Vector Table) which can be selected + with -mcvt, which links startup-code crt-cvt.o instead of the + usual crt.o. This assumes that AVR-LibC implements Issue #1010. +https://github.com/avrdudes/avr-libc/issues/1010 + crt-cvt.o also pulls in __do_cvt_init from lib.a which sets + bit CPUINT_CTRLA.CPUINT_CVT in order to activate the CVT. + AVR_ISA_RCALL Always use RJMP / RCALL and assume JMP / CALL are not available. This affects multilib selection via specs generation and -mshort-calls. @@ -198,14 +206,16 @@ AVR_ISA_FLMAP enum avr_device_specific_features { AVR_ISA_NONE, - AVR_ISA_RMW = 0x1, /* device has RMW instructions. */ + AVR_CVT = 0x1, /* Device supports a "Compact Vector Table" (-mcvt) + as configured in field CPUINT_CTRLA.CPUINT_CVT. */ AVR_SHORT_SP= 0x2, /* Stack Pointer has 8 bits width. */ - AVR_ERRATA_SKIP = 0x4, /* device has a core erratum. */ - AVR_ISA_LDS = 0x8, /* whether LDS / STS is valid for all data in static - storage. Only useful for reduced Tiny. */ - AVR_ISA_RCALL = 0x10, /* Use RJMP / RCALL even though JMP / CALL + AVR_ERRATA_SKIP = 0x4, /* Device has a core erratum. */ + AVR_ISA_RMW = 0x8, /* Device has RMW instructions. */ + AVR_ISA_LDS = 0x10, /* Whether LDS / STS is valid for all data in static + storage. Only useful for reduced Tiny. */ + AVR_ISA_RCALL = 0x20, /* Use RJMP / RCALL even though JMP / CALL are available (-mshort-calls). */ - AVR_ISA_FLMAP = 0x20 /* Has NVMCTRL_CTRLB.FLMAP to select which 32 KiB + AVR_ISA_FLMAP = 0x40 /* Has NVMCTRL_CTRLB.FLMAP to select which 32 KiB block of program memory is visible in the RAM address space. */ }; diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def index b717749b67a..9f79a9a4579 100644 --- a/gcc/config/avr/avr-mcus.def +++ b/gcc/config/avr/avr-mcus.def @@ -49,7 +49,7 @@ ARCH Specifies the
[PATCH] late-combine: Tighten register class check [PR108840]
gcc.target/aarch64/pr108840.c has failed since r15-268-g9dbff9c05520 (which means that I really ought to have looked at it earlier). The test wants us to fold an SImode AND into all shifts that use it. This is something that late-combine is supposed to do, but: (1) the pre-RA pass chickened out because of a register pressure check (2) the post-RA pass can't handle it, because the shift uses are in QImode and the sets are in SImode Both are things that would be good to fix. But (1) is particularly silly. The constraints on the shift have "rk" for the destination (so allowing the stack pointer) and "r" for the first source. Including the stack pointer made the destination seem more permissive than the source. The intention was instead to check whether there are any *allocatable* registers in the destination class that aren't present in the source. That's enough for all tests but the last one. The last one still fails because combine merges the final shift with the move into the hard return register, giving an arithmetic instruction with a hard register destination. Pre-RA late-combine currently punts on those, again due to register pressure concerns. That too is something I'd like to relax, but not for GCC 15. In the interim, the best thing seems to be to disable combine for the test. Boostrapped & regression-tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Richard gcc/ PR rtl-optimization/108840 * late-combine.cc (late_combine::check_register_pressure): Take only allocatable registers into account when checking the permissiveness of register classes. gcc/testsuite/ PR rtl-optimization/108840 * gcc.target/aarch64/pr108840.c: Run at -O2 but disable combine. --- gcc/late-combine.cc | 10 -- gcc/testsuite/gcc.target/aarch64/pr108840.c | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/gcc/late-combine.cc b/gcc/late-combine.cc index 1707ceebd5f..90d7ef09583 100644 --- a/gcc/late-combine.cc +++ b/gcc/late-combine.cc @@ -552,8 +552,14 @@ late_combine::check_register_pressure (insn_info *insn, rtx set) // Make sure that the source operand's class is at least as // permissive as the destination operand's class. auto src_class = alternative_class (alt, i); - if (!reg_class_subset_p (dest_class, src_class)) - return false; + if (dest_class != src_class) + { + auto extra_dest_regs = (reg_class_contents[dest_class] + & ~reg_class_contents[src_class] + & ~fixed_reg_set); + if (!hard_reg_set_empty_p (extra_dest_regs)) + return false; + } // Make sure that the source operand occupies no more hard // registers than the destination operand. This mostly matters diff --git a/gcc/testsuite/gcc.target/aarch64/pr108840.c b/gcc/testsuite/gcc.target/aarch64/pr108840.c index 804c1cd9156..7e1ea6fa4fe 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr108840.c +++ b/gcc/testsuite/gcc.target/aarch64/pr108840.c @@ -1,6 +1,6 @@ /* PR target/108840. Check that the explicit &31 is eliminated. */ /* { dg-do compile } */ -/* { dg-options "-O" } */ +/* { dg-options "-O2 -fno-tree-vectorize -fdisable-rtl-combine" } */ int foo (int x, int y) -- 2.25.1
Re: [PATCH]middle-end: Remove unused internal function after IVopts cleanup [PR118756]
On Thu, 6 Feb 2025, Tamar Christina wrote: > Hi All, > > It seems that after my IVopts patches the function contain_complex_addr_expr > became unused and clang is rightfully complaining about it. > > This removes the unused internal function. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? OK. > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/19202 > * tree-ssa-loop-ivopts.cc (contain_complex_addr_expr): Remove. > > --- > diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc > index > 989321137df93349f8395a9f746f73eda52fd74f..e37b24062f730e490ae86bd9248e61102ad1a0f0 > 100644 > --- a/gcc/tree-ssa-loop-ivopts.cc > +++ b/gcc/tree-ssa-loop-ivopts.cc > @@ -1149,34 +1149,6 @@ determine_base_object (struct ivopts_data *data, tree > expr) >return obj; > } > > -/* Return true if address expression with non-DECL_P operand appears > - in EXPR. */ > - > -static bool > -contain_complex_addr_expr (tree expr) > -{ > - bool res = false; > - > - STRIP_NOPS (expr); > - switch (TREE_CODE (expr)) > -{ > -case POINTER_PLUS_EXPR: > -case PLUS_EXPR: > -case MINUS_EXPR: > - res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0)); > - res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1)); > - break; > - > -case ADDR_EXPR: > - return (!DECL_P (TREE_OPERAND (expr, 0))); > - > -default: > - return false; > -} > - > - return res; > -} > - > /* Allocates an induction variable with given initial value BASE and step > STEP > for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */ > > > > > > -- Richard Biener SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
Re: [PATCH] vect: Move induction IV increments [PR110449]
On Thu, 6 Feb 2025, Richard Sandiford wrote: > In this PR, we used to generate: > > .L6: > mov v30.16b, v31.16b > faddv31.4s, v31.4s, v27.4s > faddv29.4s, v30.4s, v28.4s > stp q30, q29, [x0] > add x0, x0, 32 > cmp x1, x0 > bne .L6 > > for an unrolled induction in: > > for (int i = 0; i < 1024; i++) > { > arr[i] = freq; > freq += step; > } > > with the problem being the unnecessary MOV. > > The main induction IV was incremented by VF * step == 2 * nunits * step, > and then nunits * step was added for the second store to arr. > > The original patch for the PR (r14-2367-g224fd59b2dc8) avoided the MOV > by incrementing the IV by nunits * step twice. The problem with that > approach is that it doubles the loop-carried latency. This change was > deliberately not preserved when moving from loop-vect to SLP and so > the test started failing again after r15-3509-gd34cda720988. > > I think the main problem is that we put the IV increment in the wrong > place. Normal IVs created by create_iv are placed before the exit > condition where possible, but vectorizable_induction instead always > inserted them at the start of the loop body. The only use of the > incremented IV is by the phi node, so the effect is to make both > the old and new IV values live for the whole loop body, which is > why we need the MOV. > > The simplest fix therefore seems to be to reuse the create_iv logic. > > Bootstrapped & regression-tested on aarch64-linus-gnu and > x86_64-linux-gnu. As Richi suggested, I also tested vect.exp > using unix/-mavx512f/--param=vect-partial-vector-usage=2. > OK to install? OK. Thanks, Richard. > Richard > > > gcc/ > PR tree-optimization/110449 > * tree-ssa-loop-manip.h (insert_iv_increment): Declare. > * tree-ssa-loop-manip.cc (insert_iv_increment): New function, > split out from... > (create_iv): ...here and generalized to gimple_seqs. > * tree-vect-loop.cc (vectorizable_induction): Use > standard_iv_increment_position and insert_iv_increment > to insert the IV increment. > > gcc/testsuite/ > PR tree-optimization/110449 > * gcc.target/aarch64/pr110449.c: Expect an increment by 8.0, > but test that there is no MOV. > --- > gcc/testsuite/gcc.target/aarch64/pr110449.c | 25 + > gcc/tree-ssa-loop-manip.cc | 62 - > gcc/tree-ssa-loop-manip.h | 1 + > gcc/tree-vect-loop.cc | 6 +- > 4 files changed, 57 insertions(+), 37 deletions(-) > > diff --git a/gcc/testsuite/gcc.target/aarch64/pr110449.c > b/gcc/testsuite/gcc.target/aarch64/pr110449.c > index bb3b6dcfe08..51ca3f4b816 100644 > --- a/gcc/testsuite/gcc.target/aarch64/pr110449.c > +++ b/gcc/testsuite/gcc.target/aarch64/pr110449.c > @@ -1,8 +1,10 @@ > /* { dg-do compile } */ > /* { dg-options "-Ofast -mcpu=neoverse-n2 --param > aarch64-vect-unroll-limit=2" } */ > -/* { dg-final { scan-assembler-not "8.0e\\+0" } } */ > +/* { dg-final { scan-assembler {, #?8.0e\+0} } } */ > +/* { dg-final { scan-assembler-not {\tmov\tv} } } */ > > -/* Calcualte the vectorized induction with smaller step for an unrolled loop. > +/* Insert the induction IV updates before the exit condition, rather than > + at the start of the loop body. > > before (suggested_unroll_factor=2): > fmovs30, 8.0e+0 > @@ -19,15 +21,16 @@ > bne .L6 > > after: > - fmovs31, 4.0e+0 > - dup v29.4s, v31.s[0] > - .L6: > - faddv30.4s, v31.4s, v29.4s > - stp q31, q30, [x0] > - add x0, x0, 32 > - faddv31.4s, v29.4s, v30.4s > - cmp x0, x1 > - bne .L6 */ > + fmovs31, 8.0e+0 > + fmovs29, 4.0e+0 > + dup v31.4s, v31.s[0] > + dup v29.4s, v29.s[0] > + .L2: > + faddv30.4s, v0.4s, v29.4s > + stp q0, q30, [x0], 32 > + faddv0.4s, v0.4s, v31.4s > + cmp x1, x0 > + bne .L2 */ > > void > foo2 (float *arr, float freq, float step) > diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc > index 6ceb9df370b..2907fa6532d 100644 > --- a/gcc/tree-ssa-loop-manip.cc > +++ b/gcc/tree-ssa-loop-manip.cc > @@ -47,6 +47,39 @@ along with GCC; see the file COPYING3. If not see > so that we can free them all at once. */ > static bitmap_obstack loop_renamer_obstack; > > +/* Insert IV increment statements STMTS before or after INCR_POS; > + AFTER selects which. INCR_POS and AFTER can be computed using > + standard_iv_increment_position. */ > + > +void > +insert_iv_increment (gimple_stmt_iterator *incr_pos, bool after, > + gimple_seq stmts) > +{ > + /* Prevent the increment from inheriting a bogus location if it is not put > + immediately after a statement whose location is known. */ > + if (after)
Re: [RFC][libgcc][PATCH] Fix two issues in libgcc/unwind-dw2-fde.c.
On Wed, Feb 5, 2025 at 4:40 PM Qing Zhao wrote: > > > > > On Feb 5, 2025, at 07:46, Richard Biener wrote: > > > > On Tue, Feb 4, 2025 at 10:12 PM Qing Zhao wrote: > >> > >> Hi, > >> > >> One of our big application segv in libgcc code while unwinding the stack. > >> > >> This is a random crash while the application throws a c++ exception and > >> unwinds the stack. Incidents are random and never can be reproduced by any > >> test case. > >> > >> The libgcc that is used is based on GCC 8. > >> > >> Fortunately, we got some information through the stack trace, and narrowed > >> down the crash is in the routine "init_object" of libgcc/unwind-dw2-pde.c: > >> > >> 777 count = classify_object_over_fdes (ob, ob->u.single); > >> > >> when loading the 2nd parameter of the call to "classify_object_over_fdes". > >> i.e, the address that is pointed by "ob->u.single" is invalid. > >> > >> And the outer caller we can track back is the following line 1066 of the > >> routine > >> "_Unwind_Find_FDE": > >> > >> 1060 /* Classify and search the objects we've not yet processed. */ > >> 1061 while ((ob = unseen_objects)) > >> 1062 { > >> 1063 struct object **p; > >> 1064 > >> 1065 unseen_objects = ob->next; > >> 1066 f = search_object (ob, pc); > >> > >> Then we suspected that the construction of the static variable > >> "unseen_objects" > >> might have some bug. > >> > >> Then after studying the source code that constructs "unseen_objects" in > >> libgcc/unwind-dw2-pde.c, I found an issue in the routines > >> "__register_frame" > >> and "__register_frame_table" as following: > >> > >> 127 void > >> 128 __register_frame (void *begin) > >> 129 { > >> 130 struct object *ob; > >> 131 > >> 132 /* If .eh_frame is empty, don't register at all. */ > >> 133 if (*(uword *) begin == 0) > >> 134 return; > >> 135 > >> 136 ob = malloc (sizeof (struct object)); > >> 137 __register_frame_info (begin, ob); > >> 138 } > >> > >> 181 void > >> 182 __register_frame_table (void *begin) > >> 183 { > >> 184 struct object *ob = malloc (sizeof (struct object)); > >> 185 __register_frame_info_table (begin, ob); > >> 186 } > >> 187 > >> > >> In the above, one obvious issue in the source code is at line 136, line > >> 137, > >> and line 184 and line 185: the return value of call to "malloc" is not > >> checked > >> against NULL before it was passed as the second parameter of the follwoing > >> call. > >> > >> This might cause unpredicted random behavior. > >> > >> I checked the latest trunk GCC libgcc, the same issue is there too. > >> > >> This patch is for the latest trunk GCC. > >> > >> Please let me know any comments on this? > > > > I think I've seen this elsewhere — > > Do you mean that you saw this problem (malloc return NULL during > register_frame) previously? It was probably reported to us (SUSE) from a customer/partner, also from a (llvm?) JIT context. > > the issue is the unwind register API does > > not allow for failures but I also think calling abort() is bad. > > Okay, I see. > > > > > Are you calling this from a JIT context or so? > > I will ask the bug submitter on this to make sure. > > > We're assuming that at program > > start malloc() will not fail. > > So, the routine __register_frame is only called at the _start_ of a program? > > > > The proper solution is probably to add an alternate ABI which has a way to > > fail > > during registration. > > Any suggestion on this (or any other routine I can refer to?) > > > Thanks a lot for the help. > > Qing > > > > Richard. > > > >> thanks. > >> > >> Qing > >> > >> > >> === > >> > >> Fix two issues in libgcc/unwind-dw2-fde.c: > >> > >> A. The returned value of call to malloc is not checked against NULL. > >> This might cause unpredicted behavior during stack unwinding. > >> > >> B. Check for null begin parameter (as well as pointer to null) in > >> __register_frame_info_table_bases and __register_frame_table. > >> > >> libgcc/ChangeLog: > >> > >>* unwind-dw2-fde.c (__register_frame): Check the return value of > >> call > >>to malloc. > >>(__register_frame_info_table_bases): Check for null begin parameter. > >>(__register_frame_table): Check the return value of call to malloc, > >>Check for null begin parameter. > >> --- > >> libgcc/unwind-dw2-fde.c | 12 > >> 1 file changed, 12 insertions(+) > >> > >> diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c > >> index cdfd3974c99..f0bc29d682a 100644 > >> --- a/libgcc/unwind-dw2-fde.c > >> +++ b/libgcc/unwind-dw2-fde.c > >> @@ -169,6 +169,8 @@ __register_frame (void *begin) > >> return; > >> > >> ob = malloc (sizeof (struct object)); > >> + if (ob == NULL) > >> +abort (); > >> __register_frame_info (begin, ob); > >> } > >> > >> @@ -180,6 +182,10 @@ void > >> __register_frame_info_table_bases (void *begin, struct object *ob, > >> void *tbase, void
[PATCH] c++: Don't use CLEANUP_EH_ONLY for new expression cleanup [PR118763]
Hi! The following testcase is miscompiled since r12-6325 stopped preevaluating the initializers for new expression. If evaluating the initializers throws, there is a correct cleanup for that, but it is marked CLEANUP_EH_ONLY. While in standard C++ that is just fine, if it has statement expressions, it can return or goto out of the expression and we should delete the pointer in that case too. There is already a sentry variable initialized to true and set to false after everything is initialized and used as a guard for the cleanup, so just removing the CLEANUP_EH_ONLY flag does everything we need. And in the normal case of the initializer not using statement expressions at least with -O2 we get the same code, while the change changes one try { sentry = true; ... sentry = false; } catch { if (sentry) delete ...; } into try { sentry = true; ... sentry = false; } finally { if (sentry) delete ...; } optimizations will see that sentry is false when reaching the finally other than through an exception. Though, wonder what other CLEANUP_EH_ONLY cleanups might be an issue with statement expressions. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2025-02-06 Jakub Jelinek PR c++/118763 * init.cc (build_new_1): Don't set CLEANUP_EH_ONLY. * g++.dg/asan/pr118763.C: New test. --- gcc/cp/init.cc.jj 2025-02-04 21:54:35.102087948 +0100 +++ gcc/cp/init.cc 2025-02-06 12:25:17.624810169 +0100 @@ -3842,7 +3842,6 @@ build_new_1 (vec **placemen tree end, sentry, begin; begin = get_target_expr (boolean_true_node); - CLEANUP_EH_ONLY (begin) = 1; sentry = TARGET_EXPR_SLOT (begin); --- gcc/testsuite/g++.dg/asan/pr118763.C.jj 2025-02-06 12:23:50.724022482 +0100 +++ gcc/testsuite/g++.dg/asan/pr118763.C2025-02-06 12:23:29.407319860 +0100 @@ -0,0 +1,15 @@ +// PR c++/118763 +// { dg-do run } + +int * +foo (bool x) +{ + return new int (({ if (x) return nullptr; 1; })); +} + +int +main () +{ + delete foo (true); + delete foo (false); +} Jakub
[PATCH] c++: Use cplus_decl_attributes rather than decl_attributes in grokdecl [PR118773]
Hi! My r15-3046 change regressed the first half of the following testcase. When it calls decl_attributes, it doesn't handle attributes with dependent arguments correctly and so is now rejected that N is not a constant integer during template parsing. I've actually followed the pointer/reference case which did that too and that one has been failing for a couple of years on the second part of the testcase. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Note, there is also if (decl_context != PARM && decl_context != TYPENAME) /* Assume that any attributes that get applied late to templates will DTRT when applied to the declaration as a whole. */ late_attrs = splice_template_attributes (&attrs, type); returned_attrs = decl_attributes (&type, attr_chainon (returned_attrs, attrs), attr_flags); returned_attrs = attr_chainon (late_attrs, returned_attrs); call directly to decl_attributes in grokdeclarator, but this one handles the splicing manually, so maybe it is ok as is (and I don't have a testcase of anything misbehaving for that). 2025-02-06 Jakub Jelinek PR c++/118773 * decl.cc (grokdeclarator): Use cplus_decl_attributes rather than decl_attributes for std_attributes on pointer and array types. * g++.dg/cpp0x/gen-attrs-87.C: New test. * g++.dg/gomp/attrs-3.C: Adjust expected diagnostics. --- gcc/cp/decl.cc.jj 2025-02-06 09:01:25.600721993 +0100 +++ gcc/cp/decl.cc 2025-02-06 15:59:20.707070240 +0100 @@ -13846,7 +13846,7 @@ grokdeclarator (const cp_declarator *dec The optional attribute-specifier-seq appertains to the array type. */ - decl_attributes (&type, declarator->std_attributes, 0); + cplus_decl_attributes (&type, declarator->std_attributes, 0); break; case cdk_function: @@ -14522,8 +14522,7 @@ grokdeclarator (const cp_declarator *dec [the optional attribute-specifier-seq (7.6.1) appertains to the pointer and not to the object pointed to]. */ if (declarator->std_attributes) - decl_attributes (&type, declarator->std_attributes, -0); + cplus_decl_attributes (&type, declarator->std_attributes, 0); ctype = NULL_TREE; break; --- gcc/testsuite/g++.dg/cpp0x/gen-attrs-87.C.jj2025-02-06 16:14:27.247387432 +0100 +++ gcc/testsuite/g++.dg/cpp0x/gen-attrs-87.C 2025-02-06 16:13:57.413804728 +0100 @@ -0,0 +1,10 @@ +// PR c++/118773 +// { dg-do compile { target c++11 } } +// { dg-options "" } + +template +using T = char[4] [[gnu::aligned (N)]]; +T<2> t; +template +using U = char *[[gnu::aligned (N)]]*; +U<__alignof (char *)> u; --- gcc/testsuite/g++.dg/gomp/attrs-3.C.jj 2024-08-30 16:09:01.230290254 +0200 +++ gcc/testsuite/g++.dg/gomp/attrs-3.C 2025-02-06 19:23:02.331719653 +0100 @@ -32,10 +32,10 @@ foo () i++; auto a = [] () [[omp::directive (threadprivate (t1))]] {}; // { dg-error "'omp::directive' not allowed to be specified in this context" } int [[omp::directive (threadprivate (t2))]] b; // { dg-warning "attribute ignored" } - int *[[omp::directive (threadprivate (t3))]] c; // { dg-warning "'omp::directive' scoped attribute directive ignored" } - int &[[omp::directive (threadprivate (t4))]] d = b; // { dg-warning "'omp::directive' scoped attribute directive ignored" } + int *[[omp::directive (threadprivate (t3))]] c; // { dg-error "'omp::directive' not allowed to be specified in this context" } + int &[[omp::directive (threadprivate (t4))]] d = b; // { dg-error "'omp::directive' not allowed to be specified in this context" } typedef int T [[omp::directive (threadprivate (t5))]]; // { dg-error "'omp::directive' not allowed to be specified in this context" } int e [[omp::directive (threadprivate (t6))]] [10]; // { dg-error "'omp::directive' not allowed to be specified in this context" } - int f[10] [[omp::directive (threadprivate (t6))]]; // { dg-warning "'omp::directive' scoped attribute directive ignored" } + int f[10] [[omp::directive (threadprivate (t6))]]; // { dg-error "'omp::directive' not allowed to be specified in this context" } struct [[omp::directive (threadprivate (t7))]] S {}; // { dg-error "'omp::directive' not allowed to be specified in this context" } } Jakub
[PATCH v2 3/5][STAGE1] ctf: translate annotation DIEs to internal ctf
Translate DW_TAG_GNU_annotation DIEs created for C attributes btf_decl_tag and btf_type_tag into an in-memory representation in the CTF/BTF container. They will be output in BTF as BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG records. The new CTF kinds used to represent these annotations, CTF_K_DECL_TAG and CTF_K_TYPE_TAG, are expected to be formalized in the next version of the CTF specification. For now they only exist in memory as a translation step to BTF, and are not emitted when generating CTF information. gcc/ * ctfc.cc (ctf_dtu_d_union_selector): Handle CTF_K_DECL_TAG and CTF_K_TYPE_TAG. (ctf_add_type_tag, ctf_add_decl_tag): New. (ctf_add_variable): Return the new ctf_dvdef_ref rather than zero. (new_ctf_container): Initialize new members. (ctfc_delete_container): Deallocate new members. * ctfc.h (ctf_dvdef, ctf_dvdef_t, ctf_dvdef_ref): Move forward declarations earlier in file. (ctf_decl_tag_t): New typedef. (ctf_dtdef): Add ctf_decl_tag_t member to dtd_u union. (ctf_dtu_d_union_enum): Add new CTF_DTU_D_TAG enumerator. (ctf_container): Add ctfc_tags vector and ctfc_type_tags_map hash_map members. (ctf_add_type_tag, ctf_add_decl_tag): New function protos. (ctf_add_variable): Change prototype return type to ctf_dvdef_ref. * dwarf2ctf.cc (gen_ctf_type_tags, gen_ctf_decl_tags) (gen_ctf_decl_tags_for_var): New static functions. (gen_ctf_sou_type): Handle decl tags. (gen_ctf_function_type): Likewise. (gen_ctf_variable): Likewise. (gen_ctf_function): Likewise. (gen_ctf_type): Handle type tags. gcc/testsuite * gcc.dg/debug/ctf/ctf-decl-tag-1.c: New test. * gcc.dg/debug/ctf/ctf-type-tag-1.c: New test. include/ * ctf.h (CTF_K_DECL_TAG, CTF_K_TYPE_TAG): New defines. --- gcc/ctfc.cc | 70 +++- gcc/ctfc.h| 41 - gcc/dwarf2ctf.cc | 152 +- .../gcc.dg/debug/ctf/ctf-decl-tag-1.c | 31 .../gcc.dg/debug/ctf/ctf-type-tag-1.c | 19 +++ include/ctf.h | 4 + 6 files changed, 303 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-decl-tag-1.c create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-type-tag-1.c diff --git a/gcc/ctfc.cc b/gcc/ctfc.cc index 51511d69baa..c478fa0a136 100644 --- a/gcc/ctfc.cc +++ b/gcc/ctfc.cc @@ -107,6 +107,9 @@ ctf_dtu_d_union_selector (ctf_dtdef_ref ctftype) return CTF_DTU_D_ARGUMENTS; case CTF_K_SLICE: return CTF_DTU_D_SLICE; +case CTF_K_DECL_TAG: +case CTF_K_TYPE_TAG: + return CTF_DTU_D_TAG; default: /* The largest member as default. */ return CTF_DTU_D_ARRAY; @@ -445,6 +448,58 @@ ctf_add_reftype (ctf_container_ref ctfc, uint32_t flag, ctf_dtdef_ref ref, return dtd; } +ctf_dtdef_ref +ctf_add_type_tag (ctf_container_ref ctfc, uint32_t flag, const char *value, + ctf_dtdef_ref ref_dtd) +{ + ctf_dtdef_ref dtd; + /* Create a DTD for the tag, but do not place it in the regular types list; + CTF format does not (yet) encode tags. */ + dtd = ggc_cleared_alloc (); + + dtd->dtd_name = ctf_add_string (ctfc, value, &(dtd->dtd_data.ctti_name), + CTF_AUX_STRTAB); + /* A single DW_TAG_GNU_annotation DIE may be referenced by multiple DIEs, + e.g. when multiple distinct types specify the same type tag. We will + synthesize multiple CTF DTD records in that case, so we cannot tie them + all to the same key (the DW_TAG_GNU_annotation DIE) in ctfc_types. */ + dtd->dtd_key = NULL; + dtd->ref_type = ref_dtd; + dtd->dtd_data.ctti_info = CTF_TYPE_INFO (CTF_K_TYPE_TAG, flag, 0); + dtd->dtd_u.dtu_tag.ref_var = NULL; /* Not used for type tags. */ + dtd->dtd_u.dtu_tag.component_idx = 0; /* Not used for type tags. */ + + /* Insert tag directly into the tag list. Type ID will be assigned later. */ + vec_safe_push (ctfc->ctfc_tags, dtd); + return dtd; +} + +ctf_dtdef_ref +ctf_add_decl_tag (ctf_container_ref ctfc, uint32_t flag, const char *value, + ctf_dtdef_ref ref_dtd, uint32_t comp_idx) +{ + ctf_dtdef_ref dtd; + /* Create a DTD for the tag, but do not place it in the regular types list; + ctf format does not (yet) encode tags. */ + dtd = ggc_cleared_alloc (); + + dtd->dtd_name = ctf_add_string (ctfc, value, &(dtd->dtd_data.ctti_name), + CTF_AUX_STRTAB); + /* A single DW_TAG_GNU_annotation DIE may be referenced by multiple DIEs, + e.g. when multiple distinct declarations specify the same decl tag. + We will synthesize multiple CTF DTD records in that case, so we cannot tie + them all to the same key (the DW_TAG_GNU_annotation DIE) in ctfc_types. */ + dtd->dtd_key = NULL; + dtd->ref_type
Re: [PATCH] RISC-V: Move UNSPEC_SSP_SET and UNSPEC_SSP_TEST to correct enum
On 2/6/25 9:30 AM, Craig Blackmore wrote: stack_protect_{set,test}_ were showing up in RTL dumps as UNSPEC_COPYSIGN and UNSPEC_FMV_X_W due to UNSPEC_SSP_SET and UNSPEC_SSP_TEST being put in the unspecv enum instead of unspec. gcc/ChangeLog: * config/riscv/riscv.md: Move UNSPEC_SSP_SET and UNSPEC_SSP_TEST to unspec enum. Thanks. Pushed to the trunk. jeff
[PATCH v2 4/5][STAGE1] btf: generate and output DECL_TAG and TYPE_TAG records
Support the btf_decl_tag and btf_type_tag attributes in BTF by creating and emitting BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG records, respectively, for them. Some care is required when -gprune-btf is in effect to avoid emitting decl or type tags for declarations or types which have been pruned and will not be emitted in BTF. gcc/ * btfout.cc (get_btf_kind): Handle DECL_TAG and TYPE_TAG kinds. (btf_calc_num_vbytes): Likewise. (btf_asm_type): Likewise. (output_asm_btf_vlen_bytes): Likewise. (output_btf_tags): New. (btf_output): Call it here. (btf_add_used_type): Replace with simple wrapper around... (btf_add_used_type_1): ...the implementation. Handle BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG. (btf_add_vars): Update btf_add_used_type call. (btf_assign_tag_ids): New. (btf_mark_type_used): Update btf_add_used_type call. (btf_collect_pruned_types): Likewise. Handle type and decl tags. (btf_finish): Call btf_assign_tag_ids. gcc/testsuite/ * gcc.dg/debug/btf/btf-decl-tag-1.c: New test. * gcc.dg/debug/btf/btf-decl-tag-2.c: New test. * gcc.dg/debug/btf/btf-decl-tag-3.c: New test. * gcc.dg/debug/btf/btf-decl-tag-4.c: New test. * gcc.dg/debug/btf/btf-type-tag-1.c: New test. * gcc.dg/debug/btf/btf-type-tag-2.c: New test. * gcc.dg/debug/btf/btf-type-tag-3.c: New test. * gcc.dg/debug/btf/btf-type-tag-4.c: New test. * gcc.dg/debug/btf/btf-type-tag-5.c: New test. * gcc.dg/debug/btf/btf-type-tag-6.c: New test. * gcc.dg/debug/btf/btf-type-tag-c2x-1.c: New test. include/ * btf.h (BTF_KIND_DECL_TAG, BTF_KIND_TYPE_TAG) New defines. (struct btf_decl_tag): New. --- gcc/btfout.cc | 171 +++--- .../gcc.dg/debug/btf/btf-decl-tag-1.c | 14 ++ .../gcc.dg/debug/btf/btf-decl-tag-2.c | 22 +++ .../gcc.dg/debug/btf/btf-decl-tag-3.c | 22 +++ .../gcc.dg/debug/btf/btf-decl-tag-4.c | 34 .../gcc.dg/debug/btf/btf-type-tag-1.c | 27 +++ .../gcc.dg/debug/btf/btf-type-tag-2.c | 15 ++ .../gcc.dg/debug/btf/btf-type-tag-3.c | 21 +++ .../gcc.dg/debug/btf/btf-type-tag-4.c | 25 +++ .../gcc.dg/debug/btf/btf-type-tag-5.c | 35 .../gcc.dg/debug/btf/btf-type-tag-6.c | 15 ++ .../gcc.dg/debug/btf/btf-type-tag-c2x-1.c | 23 +++ include/btf.h | 14 ++ 13 files changed, 414 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-decl-tag-1.c create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-decl-tag-2.c create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-decl-tag-3.c create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-decl-tag-4.c create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-1.c create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-2.c create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-3.c create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-4.c create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-5.c create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-6.c create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-type-tag-c2x-1.c diff --git a/gcc/btfout.cc b/gcc/btfout.cc index ff7ea42a961..c00e0c98015 100644 --- a/gcc/btfout.cc +++ b/gcc/btfout.cc @@ -141,6 +141,8 @@ get_btf_kind (uint32_t ctf_kind) case CTF_K_VOLATILE: return BTF_KIND_VOLATILE; case CTF_K_CONST:return BTF_KIND_CONST; case CTF_K_RESTRICT: return BTF_KIND_RESTRICT; +case CTF_K_DECL_TAG: return BTF_KIND_DECL_TAG; +case CTF_K_TYPE_TAG: return BTF_KIND_TYPE_TAG; default:; } return BTF_KIND_UNKN; @@ -217,6 +219,7 @@ btf_calc_num_vbytes (ctf_dtdef_ref dtd) case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_FUNC: +case BTF_KIND_TYPE_TAG: /* These kinds have no vlen data. */ break; @@ -256,6 +259,10 @@ btf_calc_num_vbytes (ctf_dtdef_ref dtd) vlen_bytes += vlen * sizeof (struct btf_var_secinfo); break; +case BTF_KIND_DECL_TAG: + vlen_bytes += sizeof (struct btf_decl_tag); + break; + default: break; } @@ -452,6 +459,20 @@ btf_asm_type (ctf_dtdef_ref dtd) and should write 0. */ dw2_asm_output_data (4, 0, "(unused)"); return; +case BTF_KIND_DECL_TAG: + { + if (dtd->ref_type) + break; + else if (dtd->dtd_u.dtu_tag.ref_var) + { + /* ref_type is NULL for decl tag attached to a variable. */ + ctf_dvdef_ref dvd = dtd->dtd_u.dtu_tag.ref_var; + dw2_asm_output_data (4, dvd->dvd_id, +"btt_type: (BTF_KIND_VAR '%s')", +dvd->dvd_name); + return; + } + } default: break; } @@ -801,6 +82
Re: [PATCH v2] c++: Reject cdtors and conversion operators with a single * as return type [PR118306]
Hi Jason, On 6 Feb 2025, at 16:48, Jason Merrill wrote: > On 2/5/25 2:21 PM, Simon Martin wrote: >> Hi Jason, >> >> On 4 Feb 2025, at 21:23, Jason Merrill wrote: >> >>> On 2/4/25 3:03 PM, Jason Merrill wrote: On 2/4/25 11:45 AM, Simon Martin wrote: > On 4 Feb 2025, at 17:17, Jason Merrill wrote: > >> On 2/4/25 10:56 AM, Simon Martin wrote: >>> Hi Jason, >>> >>> On 4 Feb 2025, at 16:39, Jason Merrill wrote: >>> On 1/15/25 9:56 AM, Jason Merrill wrote: > On 1/15/25 7:24 AM, Simon Martin wrote: >> Hi, >> >> On 14 Jan 2025, at 23:31, Jason Merrill wrote: >> >>> On 1/14/25 2:13 PM, Simon Martin wrote: On 10 Jan 2025, at 19:10, Andrew Pinski wrote: > On Fri, Jan 10, 2025 at 3:18 AM Simon Martin > > wrote: >> >> We currently accept the following invalid code (EDG and >> MSVC >> do >> as >> well) > > clang does too: > https://github.com/llvm/llvm-project/issues/121706 . >> > > Note it might be useful if a testcase with multiply `*` is > included > too: > ``` > struct A { > A (); > }; > ``` Thanks, makes sense to add those. Done in the attached updated revision, successfully tested on x86_64-pc-linux-gnu. >>> +/* Check that it's OK to declare a function at ID_LOC with the indicated TYPE, + TYPE_QUALS and DECLARATOR. SFK indicates the kind of special function (if + any) that this function is. OPTYPE is the type given in a conversion operator declaration, or the class type for a constructor/destructor. Returns the actual return type of the function; that may be different than TYPE if an error occurs, or for certain special functions. */ @@ -12361,8 +12362,19 @@ check_special_function_return_type (special_function_kind sfk, tree type, tree optype, int type_quals, + const cp_declarator *declarator, + location_t id_loc, >>> >>> id_loc should be the same as declarator->id_loc? >> You’re right. >> const location_t* locations) { + /* If TYPE is unspecified, DECLARATOR, if set, should not represent a pointer + or a reference type. */ + if (type == NULL_TREE + && declarator + && (declarator->kind == cdk_pointer + || declarator->kind == cdk_reference)) + error_at (id_loc, "expected unqualified-id before %qs token", + declarator->kind == cdk_pointer ? "*" : "&"); >>> >>> ...and id_loc isn't the location of the ptr-operator, it's >>> the >> >>> location of the identifier, so this indicates the wrong >>> column. >>> I >>> think using declarator->id_loc makes sense, just not >>> pretending >>> it's >>> the location of the *. >> Good catch, thanks. >> >>> Let's give diagnostics more like the others later in the >>> function >>> instead of trying to emulate cp_parser_error. >> Makes sense. This is what the updated patch does, >> successfully >> tested on >> x86_64-pc-linux-gnu. OK for GCC 16? > > OK. Does this also fix 118304? If so, let's go ahead and apply it to GCC 15. >>> I have checked just now, and we still ICE for 118304’s >>> testcase >>> with >>> that fix. >> >> Why doesn't the preeexisting >> >> type = void_type_node; >> >> in check_special_function_return_type fix the return type and >> avoid >> >> the ICE? > We hit the gcc_assert at method.cc:3593, that Marek’s fix >> > bypasses. Yes, but why doesn't check_special_function_return_type prevent that? >>> >>> Ah, because we call it before walking the declarator. We need to >>> check again later, perhaps in grokfndecl, that the type is correct. >>> Perhaps instead
[committed] [RISC-V] Fix risc-v expected test output after recent iv changes
Richard S's recent change to iv increment insertion removed a reg->reg move (which was its intent AFAICT). This triggered a failure on a riscv test. That test was meant to verify that we didn't have an extraneous reg->reg move due to a buglet in the risc-v splitters. Before the 2023 change we had two vector reg->reg moves and after the 2023 fix we had just one. With Richard's change we have none ;-) Adjusting test accordingly. Pushed to the trunk. Jeff commit 33e610110c933b0d65aa82d67864bb881768609f Author: Jeff Law Date: Thu Feb 6 12:37:11 2025 -0700 [RISC-V] Fix risc-v expected test output after recent iv changes Richard S's recent change to iv increment insertion removed a reg->reg move (which was its intent AFAICT). This triggered a failure on a riscv test. That test was meant to verify that we didn't have an extraneous reg->reg move due to a buglet in the risc-v splitters. Before the 2023 change we had two vector reg->reg moves and after the 2023 fix we had just one. With Richard's change we have none ;-) Adjusting test accordingly. Pushed to the trunk. gcc/testsuite * gcc.target/riscv/rvv/autovec/madd-split2-1.c: Update expected output. diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/madd-split2-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/madd-split2-1.c index 4f99a5f87c4..8cc0c9f407c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/madd-split2-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/madd-split2-1.c @@ -10,4 +10,4 @@ foo (long *__restrict a, long *__restrict b, long n) return a[1]; } -/* { dg-final { scan-assembler-times {\tvmv1r\.v} 1 } } */ +/* { dg-final { scan-assembler-not {\tvmv1r\.v} } } */
[PATCH v2 5/5][STAGE1] doc: document btf_type_tag and btf_decl_tag attributes
gcc/ * doc/extend.texi (Common Variable Attributes): Document btf_decl_tag attribute. (Common Type Attributes): Document btf_type_tag attribute. --- gcc/doc/extend.texi | 68 + 1 file changed, 68 insertions(+) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 2764597a479..4536cfd7c68 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -8037,6 +8037,41 @@ align them on any target. The @code{aligned} attribute can also be used for functions (@pxref{Common Function Attributes}.) +@cindex @code{btf_decl_tag} variable attribute +@item btf_decl_tag (@var{argument}) +The @code{btf_decl_tag} attribute may be used to associate variable +declarations, struct or union member declarations, function +declarations, and function parameter declarations with arbitrary strings. +These strings are not interpreted by the compiler in any way, and have +no effect on code generation. Instead, these user-provided strings +are recorded in DWARF (via @code{DW_AT_GNU_annotation} and +@code{DW_TAG_GNU_annotation} extensions) and BTF information (via +@code{BTF_KIND_DECL_TAG} records), and associated to the attributed +declaration. If neither DWARF nor BTF information is generated, the +attribute has no effect. + +The argument is treated as an ordinary string in the source language +with no additional special rules. + +The attribute may be supplied multiple times for a single declaration, +in which case each distinct argument string will be recorded in a +separate DIE or BTF record, each associated to the declaration. For +a single declaration with multiple @code{btf_decl_tag} attributes, +the order of the @code{DW_TAG_GNU_annotation} DIEs produced is not +guaranteed to maintain the order of attributes in the source code. + +For example: + +@smallexample +int *foo __attribute__ ((btf_decl_tag ("__percpu"))); +@end smallexample + +@noindent +when compiled with @code{-gbtf} results in an additional +@code{BTF_KIND_DECL_TAG} BTF record to be emitted in the BTF info, +associating the string ``__percpu'' with the normal @code{BTF_KIND_VAR} +record for the variable ``foo''. + @cindex @code{counted_by} variable attribute @item counted_by (@var{count}) The @code{counted_by} attribute may be attached to the C99 flexible array @@ -9226,6 +9261,39 @@ is given by the product of arguments 1 and 2, and that @code{malloc_type}, like the standard C function @code{malloc}, returns an object whose size is given by argument 1 to the function. +@cindex @code{btf_type_tag} type attribute +@item btf_type_tag (@var{argument}) +The @code{btf_type_tag} attribute may be used to associate (to ``tag'') +particular types with arbitrary string annotations. These annotations +are recorded in debugging info by supported debug formats, currently +DWARF (via @code{DW_AT_GNU_annotation} and @code{DW_TAG_GNU_annotation} +extensions) and BTF (via @code{BTF_KIND_TYPE_TAG} records). These +annotation strings are not interpreted by the compiler in any way, and +have no effect on code generation. If neither DWARF nor BTF +information is generated, the attribute has no effect. + +The argument is treated as an ordinary string in the source language +with no additional special rules. + +The attribute may be supplied multiple times for a single declaration, +in which case each distinct argument string will be recorded in a +separate DIE or BTF record, each associated to the type. For a single +type with multiple @code{btf_type_tag} attributes, the order of the +@code{DW_TAG_GNU_annotation} DIEs produced is not guaranteed to +maintain the order of attributes in the source code. + +For example the following code: + +@smallexample +int * __attribute__ ((btf_type_tag ("__user"))) foo; +@end smallexample + +@noindent +when compiled with @code{-gbtf} results in an additional +@code{BTF_KIND_TYPE_TAG} BTF record to be emitted in the BTF info, +associating the string ``__user'' with the normal @code{BTF_KIND_PTR} +record for the pointer-to-integer type used in the declaration. + @cindex @code{copy} type attribute @item copy @itemx copy (@var{expression}) -- 2.45.2
[PATCH v2 1/5][STAGE1] c-family: add btf_type_tag and btf_decl_tag attributes
Add two new c-family attributes, "btf_type_tag" and "btf_decl_tag" along with a simple shared handler for them. gcc/c-family/ * c-attribs.cc (c_common_attribute_table): Add btf_decl_tag and btf_type_tag attributes. (handle_btf_tag_attribute): New handler for both new attributes. --- gcc/c-family/c-attribs.cc | 25 - 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc index f3181e7b57c..b51a8e755ae 100644 --- a/gcc/c-family/c-attribs.cc +++ b/gcc/c-family/c-attribs.cc @@ -189,6 +189,8 @@ static tree handle_fd_arg_attribute (tree *, tree, tree, int, bool *); static tree handle_flag_enum_attribute (tree *, tree, tree, int, bool *); static tree handle_null_terminated_string_arg_attribute (tree *, tree, tree, int, bool *); +static tree handle_btf_tag_attribute (tree *, tree, tree, int, bool *); + /* Helper to define attribute exclusions. */ #define ATTR_EXCL(name, function, type, variable) \ { name, function, type, variable } @@ -640,7 +642,11 @@ const struct attribute_spec c_common_gnu_attributes[] = { "flag_enum", 0, 0, false, true, false, false, handle_flag_enum_attribute, NULL }, { "null_terminated_string_arg", 1, 1, false, true, true, false, - handle_null_terminated_string_arg_attribute, NULL} + handle_null_terminated_string_arg_attribute, NULL}, + { "btf_type_tag", 1, 1, false, true, false, false, + handle_btf_tag_attribute, NULL}, + { "btf_decl_tag", 1, 1, true, false, false, false, + handle_btf_tag_attribute, NULL} }; const struct scoped_attribute_specs c_common_gnu_attribute_table = @@ -5099,6 +5105,23 @@ handle_null_terminated_string_arg_attribute (tree *node, tree name, tree args, return NULL_TREE; } +/* Handle the "btf_decl_tag" and "btf_type_tag" attributes. */ + +static tree +handle_btf_tag_attribute (tree * ARG_UNUSED (node), tree name, tree args, + int ARG_UNUSED (flags), bool *no_add_attrs) +{ + if (!args) +*no_add_attrs = true; + else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST) +{ + error ("%qE attribute requires a string", name); + *no_add_attrs = true; +} + + return NULL_TREE; +} + /* Handle the "nonstring" variable attribute. */ static tree -- 2.45.2
[PATCH v2 0/5][STAGE1] Add btf_decl_tag and btf_type_tag C attributes
[v1: https://gcc.gnu.org/pipermail/gcc-patches/2024-October/666911.html Changes from v1: - Fix a bug in v1 related to generating DWARF for type tags applied to struct or union types, especially if the type had multiple type tags or was also part of a typedef. - Simplified the dwarf2ctf translation of types having both cv-qualifiers and type tags applied to them. - Add a few new tests. - Address review comments from v1. ] This patch series adds support for the btf_decl_tag and btf_type_tag attributes to GCC. This entails: - Two new C-family attributes that allow to associate (to "tag") particular declarations and types with arbitrary strings. As explained below, this is intended to be used to, for example, characterize certain pointer types. A single declaration or type may have multiple occurrences of these attributes. - The conveyance of that information in the DWARF output in the form of a new DIE: DW_TAG_GNU_annotation, and a new attribute: DW_AT_GNU_annotation. - The conveyance of that information in the BTF output in the form of two new kinds of BTF objects: BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG. These BTF kinds are already supported by LLVM and other tools in the BPF ecosystem. Both of these attributes are already supported by clang, and beginning to be used in various ways by BPF users and inside the Linux kernel. It is worth noting that while the Linux kernel and BPF/BTF is the motivating use case of this feature, the format of the new DWARF extension is generic. This work could be easily adapted to provide a general way for program authors to annotate types and declarations with arbitrary information for any post-compilation analysis needs, not just the Linux kernel BPF verifier. For example, these annotations could be used to aid in ABI analysis. Purpose === 1) Addition of C-family language constructs (attributes) to specify free-text tags on certain language elements, such as struct fields. The purpose of these annotations is to provide additional information about types, variables, and function parameters of interest to the kernel. A driving use case is to tag pointer types within the Linux kernel and BPF programs with additional semantic information, such as '__user' or '__rcu'. For example, consider the Linux kernel function do_execve with the following declaration: static int do_execve(struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp); Here, __user could be defined with these annotations to record semantic information about the pointer parameters (e.g., they are user-provided) in DWARF and BTF information. Other kernel facilities such as the BPF verifier can read the tags and make use of the information. 2) Conveying the tags in the generated DWARF debug info. The main motivation for emitting the tags in DWARF is that the Linux kernel generates its BTF information via pahole, using DWARF as a source: ++ BTF BTF +--+ | pahole |---> vmlinux.btf --->| verifier | ++ +--+ ^^ || DWARF |BTF | || vmlinux +-+ module1.ko | BPF program | module2.ko +-+ ... This is because: a) Unlike GCC, LLVM will only generate BTF for BPF programs. b) GCC can generate BTF for whatever target with -gbtf, but there is no support for linking/deduplicating BTF in the linker. c) pahole injects additional BTF information based on specific knowledge of kernel objects which is not available to the compiler. In the scenario above, the verifier needs access to the pointer tags of both the kernel types/declarations (conveyed in the DWARF and translated to BTF by pahole) and those of the BPF program (available directly in BTF). Another motivation for having the tag information in DWARF, unrelated to BPF and BTF, is that the drgn project (another DWARF consumer) also wants to benefit from these tags in order to differentiate between different kinds of pointers in the kernel. 3) Conveying the tags in the generated BTF debug info. This is easy: the main purpose of having this info in BTF is for the compiled BPF programs. The kernel verifier can then access the tags of pointers used by the BPF programs. For more information about these tags and the motivation behind them, please refer to the following Linux kernel discussions: [1], [2], [3]. DWARF Representation Compared to prior iterations of this
[PATCH v2 2/5][STAGE1] dwarf: create annotation DIEs for btf tags
The btf_decl_tag and btf_type_tag attributes provide a means to annotate declarations and types respectively with arbitrary user provided strings. These strings are recorded in debug information for post-compilation uses, and despite the name they are meant to be recorded in DWARF as well as BTF. New DWARF extensions DW_TAG_GNU_annotation and DW_AT_GNU_annotation are used to represent these user annotations in DWARF. This patch introduces the new DWARF extension DIE and attribute, and generates them as necessary to represent user annotations from btf_decl_tag and btf_type_tag. The format of the new DIE is as follows: DW_TAG_GNU_annotation DW_AT_name: "btf_decl_tag" or "btf_type_tag" DW_AT_const_value: DW_AT_GNU_annotation: DW_AT_GNU_annotation is a new attribute extension used to refer to these new annotation DIEs. If non-null in any given declaration or type DIE, it is a reference to a DW_TAG_GNU_annotation DIE holding an annotation for that declaration or type. In addition, the DW_TAG_GNU_annotation DIEs may also have a non-null DW_AT_GNU_annotation, referring to another annotation DIE. This allows chains of annotation DIEs to be formed, such as in the case where a single declaration has multiple instances of btf_decl_tag with different string annotations. gcc/ * dwarf2out.cc (struct annotation_node, struct annotation_node_hasher) (btf_tag_htab): New ancillary structures and hash table. (annotation_node_hasher::hash, annotation_node_hasher::equal): New. (hash_btf_tag, gen_btf_tag_dies, gen_btf_type_tag_dies) (maybe_gen_btf_type_tag_dies, gen_btf_decl_tag_dies): New functions. (modified_type_die): Handle btf_type_tag attribute. (gen_formal_parameter_die): Call gen_btf_decl_tags for the parameter. (gen_decl_die): Call gen_btf_decl_tags for the decl. (gen_tagged_type_die): Call maybe_gen_btf_type_tag_dies for the type. (dwarf2out_early_finish): Empty btf_tag_htab hash table. (dwarf2out_cc_finalize): Delete btf_tag_htab hash table. include/ * dwarf2.def (DW_TAG_GNU_annotation): New DWARF extension. (DW_AT_GNU_annotation): Likewise. gcc/testsuite/ * gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-1.c: New test. * gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-2.c: New test. * gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-3.c: New test. * gcc.dg/debug/dwarf2/dwarf-btf-type-tag-1.c: New test. * gcc.dg/debug/dwarf2/dwarf-btf-type-tag-2.c: New test. * gcc.dg/debug/dwarf2/dwarf-btf-type-tag-3.c: New test. --- gcc/dwarf2out.cc | 275 +- .../debug/dwarf2/dwarf-btf-decl-tag-1.c | 11 + .../debug/dwarf2/dwarf-btf-decl-tag-2.c | 25 ++ .../debug/dwarf2/dwarf-btf-decl-tag-3.c | 21 ++ .../debug/dwarf2/dwarf-btf-type-tag-1.c | 10 + .../debug/dwarf2/dwarf-btf-type-tag-2.c | 31 ++ .../debug/dwarf2/dwarf-btf-type-tag-3.c | 15 + include/dwarf2.def| 4 + 8 files changed, 385 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-1.c create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-2.c create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-decl-tag-3.c create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-type-tag-1.c create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-type-tag-2.c create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/dwarf-btf-type-tag-3.c diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc index 8085b8d85d8..b06fd5ccc04 100644 --- a/gcc/dwarf2out.cc +++ b/gcc/dwarf2out.cc @@ -3696,6 +3696,32 @@ static bool frame_pointer_fb_offset_valid; static vec base_types; +/* A cached btf_type_tag or btf_decl_tag user annotation. */ +struct GTY ((for_user)) annotation_node +{ + const char *name; + const char *value; + hashval_t hash; + dw_die_ref die; + struct annotation_node *next; +}; + +struct annotation_node_hasher : ggc_ptr_hash +{ + typedef const struct annotation_node *compare_type; + + static hashval_t hash (struct annotation_node *); + static bool equal (const struct annotation_node *, +const struct annotation_node *); +}; + +/* A hash table of tag annotation nodes for btf_type_tag and btf_decl_tag C + attributes. DIEs for these user annotations may be reused if they are + structurally equivalent; this hash table is used to ensure the DIEs are + reused wherever possible. */ +static GTY (()) hash_table *btf_tag_htab; + + /* Flags to represent a set of attribute classes for attributes that represent a scalar value (bounds, pointers, ...). */ enum dw_scalar_form @@ -13659,6 +13685,180 @@ long_double_as_float128 (tree type) return NULL_TREE; } + +hashval_t +annotation_node_hasher::hash (struct annotation_node *node) +{ + return node->hash; +} + +bool +annotation_node_hasher::equal
[PATCH] [testsuite] [sparc] skip tls tests if emulated
A number of tls tests expect TLS-specific relocations, that are not present when tls is emulated, as on e.g. leon3-elf. Skip the tests when tls is emulated. Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting leon3-elf. Ok to install? for gcc/testsuite/ChangeLog * gcc.target/sparc/tls-ld-int16.c: Skip when tls is emulated. * gcc.target/sparc/tls-ld-int32.c: Likewise. * gcc.target/sparc/tls-ld-int8.c: Likewise. * gcc.target/sparc/tls-ld-uint16.c: Likewise. * gcc.target/sparc/tls-ld-uint32.c: Likewise. * gcc.target/sparc/tls-ld-uint8.c: Likewise. --- gcc/testsuite/gcc.target/sparc/tls-ld-int16.c |1 + gcc/testsuite/gcc.target/sparc/tls-ld-int32.c |1 + gcc/testsuite/gcc.target/sparc/tls-ld-int8.c |1 + gcc/testsuite/gcc.target/sparc/tls-ld-uint16.c |1 + gcc/testsuite/gcc.target/sparc/tls-ld-uint32.c |1 + gcc/testsuite/gcc.target/sparc/tls-ld-uint8.c |1 + 6 files changed, 6 insertions(+) diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-int16.c b/gcc/testsuite/gcc.target/sparc/tls-ld-int16.c index d3d28086156ee..de4ce4034c6df 100644 --- a/gcc/testsuite/gcc.target/sparc/tls-ld-int16.c +++ b/gcc/testsuite/gcc.target/sparc/tls-ld-int16.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } /* { dg-add-options tls } */ +/* { dg-skip-if "native tls expected" { tls_emulated } } */ #include diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-int32.c b/gcc/testsuite/gcc.target/sparc/tls-ld-int32.c index cf18147ef7273..5604c24151ac1 100644 --- a/gcc/testsuite/gcc.target/sparc/tls-ld-int32.c +++ b/gcc/testsuite/gcc.target/sparc/tls-ld-int32.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } /* { dg-add-options tls } */ +/* { dg-skip-if "native tls expected" { tls_emulated } } */ #include diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-int8.c b/gcc/testsuite/gcc.target/sparc/tls-ld-int8.c index a07cffc37f8c4..17eb32ea2a107 100644 --- a/gcc/testsuite/gcc.target/sparc/tls-ld-int8.c +++ b/gcc/testsuite/gcc.target/sparc/tls-ld-int8.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } /* { dg-add-options tls } */ +/* { dg-skip-if "native tls expected" { tls_emulated } } */ #include diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-uint16.c b/gcc/testsuite/gcc.target/sparc/tls-ld-uint16.c index 41ee687b28c51..81990a7bf3ca5 100644 --- a/gcc/testsuite/gcc.target/sparc/tls-ld-uint16.c +++ b/gcc/testsuite/gcc.target/sparc/tls-ld-uint16.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } /* { dg-add-options tls } */ +/* { dg-skip-if "native tls expected" { tls_emulated } } */ #include diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-uint32.c b/gcc/testsuite/gcc.target/sparc/tls-ld-uint32.c index 9c7915372b9e7..60524ba87cf6a 100644 --- a/gcc/testsuite/gcc.target/sparc/tls-ld-uint32.c +++ b/gcc/testsuite/gcc.target/sparc/tls-ld-uint32.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } /* { dg-add-options tls } */ +/* { dg-skip-if "native tls expected" { tls_emulated } } */ #include diff --git a/gcc/testsuite/gcc.target/sparc/tls-ld-uint8.c b/gcc/testsuite/gcc.target/sparc/tls-ld-uint8.c index 0dcff66eb15e4..7a7492197f952 100644 --- a/gcc/testsuite/gcc.target/sparc/tls-ld-uint8.c +++ b/gcc/testsuite/gcc.target/sparc/tls-ld-uint8.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } /* { dg-add-options tls } */ +/* { dg-skip-if "native tls expected" { tls_emulated } } */ #include -- Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/ Free Software Activist GNU Toolchain Engineer More tolerance and less prejudice are key for inclusion and diversity Excluding neuro-others for not behaving ""normal"" is *not* inclusive
Re: [PATCH] [testsuite] require -Ofast for vect-ifcvt-18 even without avx
On Jan 31, 2025, Richard Biener wrote: > On Thu, Jan 30, 2025 at 8:49 PM Alexandre Oliva wrote: >> Split -Ofast out of the avx conditional, so that it is passed on the >> same targets that expect the transformation. > It effectively would require -Ofast on all targets (it's vectorizing a > FP reduction), > so I'd say -Ofast should not depend on x86 only. IIUC we prefer not to modify testcases, and making -Ofast apply to other targets would do just that, because the test without -Ofast stands on its own and has been running for a while, just not with a vectorizable FP reduction. That was my reasoning to keep it guarded by a conditional. I agree it would have made sense to make it -Ofast on all targets. -- Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/ Free Software Activist GNU Toolchain Engineer More tolerance and less prejudice are key for inclusion and diversity Excluding neuro-others for not behaving ""normal"" is *not* inclusive
Re: [PATCH] lra: initialize allocated_hard_reg_p[] for hard regs referenced in RTL [PR118533]
On 1/28/25 1:11 PM, Surya Kumari Jangala wrote: Hi, The following patch has been bootstrapped and regtested on powerpc64le-linux, aarch64-linux and x86_64-linux. This patch is a proposed fix for PR118533. Request you to please review the patch. Regards Surya lra: initialize allocated_hard_reg_p[] for hard regs referenced in RTL [PR118533] When computing the costs of the hard registers in assign_hard_reg(), the cost of saving/restoring a callee-save register in the prolog/epilog is taken into consideration. However, if some other pseudo has already been assigned that hard register, then any further assignments of that hard register don't need to be charged the save/restore cost, since it has already been accounted for. The array allocated_hardreg_p[] specifies if a hard register has already been allocated to an allocno. This array is updated whenever a register is assigned. This array should also be updated if any hard register is referenced in the RTL because any such register usage should count as the "first" usage. A hard register reference can occur in the RTL due to inline asm specifying a register for a local variable, or a target’s machine pattern can explicitly reference a hard register. Sorry for delay with the answer. But this problem was already solved in January for PR118497 by the following patch: https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=c4dae80357ccf2e035d8e9ec0a3bb319344c5b41;hp=10850f92b2a618ef1b1ad399530943ef4847823d Also I am agree with Mike Matz we don't need additional insn scan (which is very costly for RTL). The above patch avoids full RTL scanning. 2025-01-28 Surya Kumari Jangala gcc/ PR rtl-optimization/118533 * ira-color.cc (initialize_allocated_hardreg): New function. (color): Initialize array. ---
Re: [patch, libfortran] PR114618 Version 2 Format produces incorrect output when contains 1x, ok when uses " "
On 1/31/25 11:30 AM, Harald Anlauf wrote: --- snip -- So either commit the current version and track this issue in a PR if not yet done, or have a look if there is a quick fix. Thanks for the work! Harald Committed as: commit cfed99751c1a3b93ca66451eb1b62271e682f927 (HEAD -> master, origin/master, origin/HEAD) Author: Jerry DeLisle Date: Wed Jan 29 13:40:59 2025 -0800 Fortran: Fix handling of the X edit descriptor.
Re: [PATCH] LoongArch: Correct the mode for mask{eq,ne}z
在 2025/1/20 上午9:30, Xi Ruoyao 写道: For mask{eq,ne}z, rk is always compared with 0 in the full width, thus the mode for rk should be X. LGTM! I agree with your point of view. Thank you. I found the issue reviewing a patch fixing a similar issue for RISC-V XTheadCondMov [1], but interestingly I cannot find a test case really blowing up on LoongArch. But as the issue is obvious enough let's fix it anyway so it won't blow up in the future. [1]: https://gcc.gnu.org/pipermail/gcc-patches/2025-January/674004.html gcc/ChangeLog: * config/loongarch/loongarch.md (*sel_using_): Rename to ... (*sel_using_): ... here. (GPR2): Remove as nothing uses it now. --- Bootstrapped & regtested on loongarch64-linux-gnu. Ok for trunk? gcc/config/loongarch/loongarch.md | 10 +++--- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index c17d2928fbf..10197b9d9d5 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -374,10 +374,6 @@ (define_asm_attributes ;; from the same template. (define_mode_iterator GPR [SI (DI "TARGET_64BIT")]) -;; A copy of GPR that can be used when a pattern has two independent -;; modes. -(define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")]) - ;; This mode iterator allows 16-bit and 32-bit GPR patterns and 32-bit 64-bit ;; FPR patterns to be generated from the same template. (define_mode_iterator JOIN_MODE [HI @@ -2507,11 +2503,11 @@ (define_expand "cstore4" ;; Conditional move instructions. -(define_insn "*sel_using_" +(define_insn "*sel_using_" [(set (match_operand:GPR 0 "register_operand" "=r,r") (if_then_else:GPR -(equality_op:GPR2 (match_operand:GPR2 1 "register_operand" "r,r") - (const_int 0)) +(equality_op:X (match_operand:X 1 "register_operand" "r,r") + (const_int 0)) (match_operand:GPR 2 "reg_or_0_operand" "r,J") (match_operand:GPR 3 "reg_or_0_operand" "J,r")))] "register_operand (operands[2], mode)
Re: [RFC][libgcc][PATCH] Fix two issues in libgcc/unwind-dw2-fde.c.
> On Feb 6, 2025, at 04:36, Richard Biener wrote: > > On Wed, Feb 5, 2025 at 4:40 PM Qing Zhao wrote: >> >> >> >>> On Feb 5, 2025, at 07:46, Richard Biener wrote: >>> >>> On Tue, Feb 4, 2025 at 10:12 PM Qing Zhao wrote: Hi, One of our big application segv in libgcc code while unwinding the stack. This is a random crash while the application throws a c++ exception and unwinds the stack. Incidents are random and never can be reproduced by any test case. The libgcc that is used is based on GCC 8. Fortunately, we got some information through the stack trace, and narrowed down the crash is in the routine "init_object" of libgcc/unwind-dw2-pde.c: 777 count = classify_object_over_fdes (ob, ob->u.single); when loading the 2nd parameter of the call to "classify_object_over_fdes". i.e, the address that is pointed by "ob->u.single" is invalid. And the outer caller we can track back is the following line 1066 of the routine "_Unwind_Find_FDE": 1060 /* Classify and search the objects we've not yet processed. */ 1061 while ((ob = unseen_objects)) 1062 { 1063 struct object **p; 1064 1065 unseen_objects = ob->next; 1066 f = search_object (ob, pc); Then we suspected that the construction of the static variable "unseen_objects" might have some bug. Then after studying the source code that constructs "unseen_objects" in libgcc/unwind-dw2-pde.c, I found an issue in the routines "__register_frame" and "__register_frame_table" as following: 127 void 128 __register_frame (void *begin) 129 { 130 struct object *ob; 131 132 /* If .eh_frame is empty, don't register at all. */ 133 if (*(uword *) begin == 0) 134 return; 135 136 ob = malloc (sizeof (struct object)); 137 __register_frame_info (begin, ob); 138 } 181 void 182 __register_frame_table (void *begin) 183 { 184 struct object *ob = malloc (sizeof (struct object)); 185 __register_frame_info_table (begin, ob); 186 } 187 In the above, one obvious issue in the source code is at line 136, line 137, and line 184 and line 185: the return value of call to "malloc" is not checked against NULL before it was passed as the second parameter of the follwoing call. This might cause unpredicted random behavior. I checked the latest trunk GCC libgcc, the same issue is there too. This patch is for the latest trunk GCC. Please let me know any comments on this? >>> >>> I think I've seen this elsewhere — >> >> Do you mean that you saw this problem (malloc return NULL during >> register_frame) previously? > > It was probably reported to us (SUSE) from a customer/partner, also > from a (llvm?) JIT context. Okay. So, that bug has not been resolved yet? Qing > >>> the issue is the unwind register API does >>> not allow for failures but I also think calling abort() is bad. >> >> Okay, I see. >> >>> >>> Are you calling this from a JIT context or so? >> >> I will ask the bug submitter on this to make sure. >> >>> We're assuming that at program >>> start malloc() will not fail. >> >> So, the routine __register_frame is only called at the _start_ of a program? >>> >>> The proper solution is probably to add an alternate ABI which has a way to >>> fail >>> during registration. >> >> Any suggestion on this (or any other routine I can refer to?) >> >> >> Thanks a lot for the help. >> >> Qing >>> >>> Richard. >>> thanks. Qing === Fix two issues in libgcc/unwind-dw2-fde.c: A. The returned value of call to malloc is not checked against NULL. This might cause unpredicted behavior during stack unwinding. B. Check for null begin parameter (as well as pointer to null) in __register_frame_info_table_bases and __register_frame_table. libgcc/ChangeLog: * unwind-dw2-fde.c (__register_frame): Check the return value of call to malloc. (__register_frame_info_table_bases): Check for null begin parameter. (__register_frame_table): Check the return value of call to malloc, Check for null begin parameter. --- libgcc/unwind-dw2-fde.c | 12 1 file changed, 12 insertions(+) diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c index cdfd3974c99..f0bc29d682a 100644 --- a/libgcc/unwind-dw2-fde.c +++ b/libgcc/unwind-dw2-fde.c @@ -169,6 +169,8 @@ __register_frame (void *begin) return; ob = malloc (sizeof (struct object)); + if (ob == NULL) +abort (); __register_frame_info (begin, ob);
Re: [PATCH] rtl-optimization/117922 - disable fold-mem-offsets for highly connected CFG
On 2/5/25 5:29 AM, Richard Biener wrote: The PR shows fold-mem-offsets taking ages and a lot of memory computing DU/UD chains as that requires the RD problem. The issue is not so much the memory required for the pruned sets but the high CFG connectivity (and that the CFG is cyclic) which makes solving the dataflow problem expensive. The following adds the same limit as the one imposed by GCSE and CPROP. Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu, this reduces the compile-time of the PR26854 testcase from 480s to 150s. OK? Thanks, Richard. PR rtl-optimization/117922 * fold-mem-offsets.cc (pass_fold_mem_offsets::execute): Do nothing for a highly connected CFG. OK jeff
[PATCH] [testsuite] tolerate later success [PR108357]
On leon3-elf and presumably on other targets, the test fails due to differences in calling conventions and other reasons, that add extra gimple stmts that prevent the expected optimization at the expected point. The optimization takes place anyway, just a little later, so tolerate that. Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting leon3-elf. Ok to install? for gcc/testsuite/ChangeLog PR tree-optimization/108357 * gcc.dg/tree-ssa/pr108357.c: Tolerate later optimization. --- gcc/testsuite/gcc.dg/tree-ssa/pr108357.c |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr108357.c b/gcc/testsuite/gcc.dg/tree-ssa/pr108357.c index 44c457b7a9777..7dff235f89278 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr108357.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr108357.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-threadfull1" } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ static char b; static unsigned c; @@ -19,4 +19,7 @@ int main() f(g); } -/* { dg-final { scan-tree-dump-not "foo" "threadfull1" } } */ +/* We expect threadfull1 to eliminate the call to foo(), but not all targets + manage that at that point. Calling conventions (mandatory promotion) play a + role, but there's more than that. */ +/* { dg-final { scan-tree-dump-not "foo" "optimized" } } */ -- Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/ Free Software Activist GNU Toolchain Engineer More tolerance and less prejudice are key for inclusion and diversity Excluding neuro-others for not behaving ""normal"" is *not* inclusive
Re: [PATCH v1 12/16] Refactor FMV name mangling.
Added missing CC On 05/02/2025 17:14, Richard Sandiford wrote: Alfie Richards writes: On 04/02/2025 10:03, Richard Sandiford wrote: Alfie Richards writes: + return id; + else if (cgraph_node::get_create (decl)->dispatcher_resolver_function) + id = clone_identifier (id, "resolver"); + else if (DECL_FUNCTION_VERSIONED (decl)) { - name += ".default"; - return get_identifier (name.c_str()); - } - - name += "._"; + aarch64_fmv_feature_mask feature_mask + = get_feature_mask_for_version (decl); - int num_features = ARRAY_SIZE (aarch64_fmv_feature_data); - for (int i = 0; i < num_features; i++) - { - if (feature_mask & aarch64_fmv_feature_data[i].feature_mask) + if (feature_mask == 0ULL) // && not implemented! Could you explain the // comment? Apologies, this was a temporary marker for myself that slipped through. { - name += "M"; - name += aarch64_fmv_feature_data[i].name; + if (!DECL_INITIAL (decl)) + return id; + return clone_identifier (id, "default"); This different handling of defined vs. undefined functions feels a bit weird. It's not clear to me whether: (1) The .default version is effectively internal to the translation unit, and therefore other translation units cannot assume it exists. (2) Other translation units can assume that the .default version exists if they can see a target_version("default") or target_clones definition. (3) Other translation units can assume that the .default version exists if they can see a non-default target_version or a target_clones definition. (4) Something else. Argh, sorry, I meant declaration rather than definition for both (2) and (3). My question doesn't make sense as written. (2) would create a difference between implicit and explicit defaults and doesn't seem to be what the series implements (mv-symbols6.C from patch 14). (3) seems indirect, and IMO it's dangerous to assume that the mere existence of a non-default version X in one TU implies that version X will be visible in the TU that contains the resolver. I would have expected that it would be valid for version X not to be visible in the TU that contains the resolver, with the consequence that the resolver won't use it. (1) seems more appealing on the face of it, so that .default is more like .resolver. But that doesn't seem to be what the spec says. I would say its (4) Something else. This code is the result of that and a discussion we had where we decided we can avoid mangling the default version symbol if it is external. As we know that in the TU where the default is defined, the default version will be mangled, and the dispatched symbol will take its name. As the default is the only resolvable version, then all calls and references will already have the correct target. This allows us to avoid making the dispatched symbol and redirecting default decl calls/references to the dispatched symbol. That sounds like what I meant by (1): i.e. that the only TU that can reference .default is the one that creates it (i.e. the one with the definition). A declaration with __attribute__((target_version("default")) doesn't itself imply that a .default version exists. The reason for asking is that the ACLE says: The "default" version is mangled with ".default" on top of the language-specific name mangling. All versioned functions with their mangled names are always resolvable. which made it sound like other TUs could rely on ".default" existing. Ahh that does help. I follow now. My current understanding comes from following the clang implementation and discussions regarding the spec. But, I see your point though that this isn't what is specified. I think a good correct solution would be that if there is target_version ("default") then to always mangle the default. Then in the case that there's a default and no other versions, emit a non-mangled symbol alias to the default version? Then we cover all cases well, follow the spec, and will be broadly compatible with clang. (ie. in that case currently clang emits the default as if it wasn't annotated, we would emit the default mangled, and include an alias to the unmangled symbol) It's a bit of a hack, and we can instead always mangle the default, create the dispatched symbol? Apologies if I misunderstood the earlier conversation. } - } - if (DECL_ASSEMBLER_NAME_SET_P (decl)) - SET_DECL_RTL (decl, NULL); + std::string suffix = "_"; + + int num_features = ARRAY_SIZE (aarch64_fmv_feature_data); + for (int i = 0; i < num_features; i++) + if (feature_mask & aarch64_fmv_feature_data[i].feature_mask) + { + suffix += "M"; + suffix += aarch64_fmv_feature_data[i].name; + } + + if (DECL_ASSEMBLER_NAME_SET_P (decl)) + SET_DECL_RTL (decl, NULL); Why is it necessary to conditionally res
[PATCH] c++: ICE with unparsed noexcept [PR117106]
Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk? -- >8 -- In a member-specification of a class, a noexcept-specifier is a complete-class context. Thus we delay parsing until the end of the class via our DEFERRED_PARSE mechanism; see cp_parser_save_noexcept and cp_parser_late_noexcept_specifier. We also attempt to defer instantiation of noexcept-specifiers in order to reduce the number of instantiations; this is done via DEFERRED_NOEXCEPT. We can even have both, as in noexcept65.C: a DEFERRED_PARSE wrapped in DEFERRED_NOEXCEPT, which uses the DEFPARSE_INSTANTIATIONS mechanism. noexcept65.C works, because when we really need the noexcept, which is when parsing the body of S::A::A(), the noexcept will have been parsed already; noexcepts are parsed before bodies of member function. But in this test we have: struct A { int x; template void foo() noexcept(noexcept(x)) {} auto bar() -> decltype(foo()) {} // #1 }; and I think the decltype in #1 needs the unparsed noexcept before it could have been parsed. clang++ rejects the test and I suppose we should reject it as well, rather than crashing on a DEFERRED_PARSE in tsubst_expr. PR c++/117106 PR c++/118190 gcc/cp/ChangeLog: * pt.cc (maybe_instantiate_noexcept): Give an error if the noexcept hasn't been parsed yet. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/noexcept89.C: New test. * g++.dg/cpp0x/noexcept90.C: New test. --- gcc/cp/pt.cc| 16 +++- gcc/testsuite/g++.dg/cpp0x/noexcept89.C | 9 + gcc/testsuite/g++.dg/cpp0x/noexcept90.C | 12 3 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept89.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept90.C diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 39232b5e67f..8108bf5de65 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -27453,7 +27453,8 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t complain) { static hash_set* fns = new hash_set; bool added = false; - if (DEFERRED_NOEXCEPT_PATTERN (noex) == NULL_TREE) + tree pattern = DEFERRED_NOEXCEPT_PATTERN (noex); + if (pattern == NULL_TREE) { spec = get_defaulted_eh_spec (fn, complain); if (spec == error_mark_node) @@ -27464,13 +27465,19 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t complain) else if (!(added = !fns->add (fn))) { /* If hash_set::add returns true, the element was already there. */ - location_t loc = cp_expr_loc_or_loc (DEFERRED_NOEXCEPT_PATTERN (noex), - DECL_SOURCE_LOCATION (fn)); + location_t loc = cp_expr_loc_or_loc (pattern, + DECL_SOURCE_LOCATION (fn)); error_at (loc, "exception specification of %qD depends on itself", fn); spec = noexcept_false_spec; } + else if (TREE_CODE (pattern) == DEFERRED_PARSE) + { + error ("exception specification of %qD is not available " +"until end of class definition", fn); + spec = noexcept_false_spec; + } else if (push_tinst_level (fn)) { const bool push_to_top = maybe_push_to_top_level (fn); @@ -27497,8 +27504,7 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t complain) ++processing_template_decl; /* Do deferred instantiation of the noexcept-specifier. */ - noex = tsubst_expr (DEFERRED_NOEXCEPT_PATTERN (noex), - DEFERRED_NOEXCEPT_ARGS (noex), + noex = tsubst_expr (pattern, DEFERRED_NOEXCEPT_ARGS (noex), tf_warning_or_error, fn); /* Build up the noexcept-specification. */ spec = build_noexcept_spec (noex, tf_warning_or_error); diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept89.C b/gcc/testsuite/g++.dg/cpp0x/noexcept89.C new file mode 100644 index 000..308abf6fb45 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/noexcept89.C @@ -0,0 +1,9 @@ +// PR c++/117106 +// { dg-do compile { target c++11 } } + +struct A { +int x; +template +void foo() noexcept(noexcept(x)) {} +auto bar() -> decltype(foo()) {} // { dg-error "not available until end of class" } +}; diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept90.C b/gcc/testsuite/g++.dg/cpp0x/noexcept90.C new file mode 100644 index 000..6d403f66e72 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/noexcept90.C @@ -0,0 +1,12 @@ +// PR c++/118190 +// { dg-do compile { target c++11 } } + +struct S { + template + struct S5 { +void f1() noexcept(noexcept(i)) { } +int i; + }; + S5 s5; + static_assert (noexcept(s5.f1()), ""); // { dg-error "not available until end of class|static assertion failed" } +}; base-commit: a69b728b5b9efa34d0af9f9ce0b248d05f7791b0 -- 2.48.1
Re: [PATCH] c++: Allow constexpr reads from volatile std::nullptr_t objects [PR118661]
On Thu, Feb 06, 2025 at 07:32:43PM +0100, Jakub Jelinek wrote: > Hi! > > As mentioned in the PR, https://eel.is/c++draft/conv.lval#note-1 > says that even volatile reads from std::nullptr_t typed objects actually > don't read anything and https://eel.is/c++draft/expr.const#10.9 > says that even those are ok in constant expressions. > > So, the following patch adjusts the r9-4793 changes to have an exception > for NULLPTR_TYPE. > As [conv.lval]/3 also talks about accessing to inactive member, I've added > testcase to cover that as well. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2025-02-06 Jakub Jelinek > > PR c++/118661 > * constexpr.cc (potential_constant_expression_1): Don't diagnose > lvalue-to-rvalue conversion of volatile lvalue if it has NULLPTR_TYPE. > * decl2.cc (decl_maybe_constant_var_p): Return true for constexpr > decls with NULLPTR_TYPE even if they are volatile. > > * g++.dg/cpp0x/constexpr-volatile4.C: New test. > * g++.dg/cpp0x/constexpr-union9.C: New test. > > --- gcc/cp/constexpr.cc.jj2025-02-05 13:14:34.771198185 +0100 > +++ gcc/cp/constexpr.cc 2025-02-06 09:53:03.236587121 +0100 > @@ -9717,7 +9717,8 @@ potential_constant_expression_1 (tree t, > return true; > >if (TREE_THIS_VOLATILE (t) && want_rval > - && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t))) > + && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t)) > + && TREE_CODE (TREE_TYPE (t)) != NULLPTR_TYPE) Patch looks good but we should use NULLPTR_TYPE_P. > { >if (flags & tf_error) > constexpr_error (loc, fundef_p, "lvalue-to-rvalue conversion of " > --- gcc/cp/decl2.cc.jj2025-01-27 16:45:45.455970792 +0100 > +++ gcc/cp/decl2.cc 2025-02-06 09:53:53.150890600 +0100 > @@ -4985,7 +4985,8 @@ decl_maybe_constant_var_p (tree decl) >tree type = TREE_TYPE (decl); >if (!VAR_P (decl)) > return false; > - if (DECL_DECLARED_CONSTEXPR_P (decl) && !TREE_THIS_VOLATILE (decl)) > + if (DECL_DECLARED_CONSTEXPR_P (decl) > + && (!TREE_THIS_VOLATILE (decl) || TREE_CODE (type) == NULLPTR_TYPE)) > return true; >if (DECL_HAS_VALUE_EXPR_P (decl)) > /* A proxy isn't constant. */ > --- gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C.jj 2025-02-06 > 09:50:43.339539282 +0100 > +++ gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C 2025-02-06 > 09:50:16.071919784 +0100 > @@ -0,0 +1,20 @@ > +// PR c++/118661 > +// { dg-do compile { target c++11 } } > + > +using nullptr_t = decltype (nullptr); > +constexpr volatile nullptr_t a = {}; > +constexpr nullptr_t b = a; > + > +constexpr nullptr_t > +foo () > +{ > +#if __cplusplus >= 201402L > + volatile nullptr_t c = {}; > + return c; > +#else > + return nullptr; > +#endif > +} > + > +static_assert (b == nullptr, ""); > +static_assert (foo () == nullptr, ""); > --- gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C.jj 2025-02-06 > 09:57:46.149639270 +0100 > +++ gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C 2025-02-06 > 10:01:08.472815988 +0100 > @@ -0,0 +1,16 @@ > +// PR c++/118661 > +// { dg-do compile { target c++11 } } > + > +using nullptr_t = decltype (nullptr); > +union U { int i; nullptr_t n; }; > +constexpr U u = { 42 }; > +static_assert (u.n == nullptr, ""); > + > +#if __cplusplus >= 201402L > +constexpr nullptr_t > +foo () > +{ > + union U { int i; nullptr_t n; } u = { 42 }; > + return u.n; > +} > +#endif > > Jakub > Marek
[PATCH] c++, v2: Allow constexpr reads from volatile std::nullptr_t objects [PR118661]
On Thu, Feb 06, 2025 at 01:45:59PM -0500, Marek Polacek wrote: > > --- gcc/cp/constexpr.cc.jj 2025-02-05 13:14:34.771198185 +0100 > > +++ gcc/cp/constexpr.cc 2025-02-06 09:53:03.236587121 +0100 > > @@ -9717,7 +9717,8 @@ potential_constant_expression_1 (tree t, > > return true; > > > >if (TREE_THIS_VOLATILE (t) && want_rval > > - && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t))) > > + && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t)) > > + && TREE_CODE (TREE_TYPE (t)) != NULLPTR_TYPE) > > Patch looks good but we should use NULLPTR_TYPE_P. You're right. Here is the patch adjusted: 2025-02-06 Jakub Jelinek PR c++/118661 * constexpr.cc (potential_constant_expression_1): Don't diagnose lvalue-to-rvalue conversion of volatile lvalue if it has NULLPTR_TYPE. * decl2.cc (decl_maybe_constant_var_p): Return true for constexpr decls with NULLPTR_TYPE even if they are volatile. * g++.dg/cpp0x/constexpr-volatile4.C: New test. * g++.dg/cpp0x/constexpr-union9.C: New test. --- gcc/cp/constexpr.cc.jj 2025-02-05 13:14:34.771198185 +0100 +++ gcc/cp/constexpr.cc 2025-02-06 09:53:03.236587121 +0100 @@ -9717,7 +9717,8 @@ potential_constant_expression_1 (tree t, return true; if (TREE_THIS_VOLATILE (t) && want_rval - && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t))) + && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t)) + && !NULLPTR_TYPE_P (TREE_TYPE (t))) { if (flags & tf_error) constexpr_error (loc, fundef_p, "lvalue-to-rvalue conversion of " --- gcc/cp/decl2.cc.jj 2025-01-27 16:45:45.455970792 +0100 +++ gcc/cp/decl2.cc 2025-02-06 09:53:53.150890600 +0100 @@ -4985,7 +4985,8 @@ decl_maybe_constant_var_p (tree decl) tree type = TREE_TYPE (decl); if (!VAR_P (decl)) return false; - if (DECL_DECLARED_CONSTEXPR_P (decl) && !TREE_THIS_VOLATILE (decl)) + if (DECL_DECLARED_CONSTEXPR_P (decl) + && (!TREE_THIS_VOLATILE (decl) || NULLPTR_TYPE_P (type))) return true; if (DECL_HAS_VALUE_EXPR_P (decl)) /* A proxy isn't constant. */ --- gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C.jj 2025-02-06 09:50:43.339539282 +0100 +++ gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C2025-02-06 09:50:16.071919784 +0100 @@ -0,0 +1,20 @@ +// PR c++/118661 +// { dg-do compile { target c++11 } } + +using nullptr_t = decltype (nullptr); +constexpr volatile nullptr_t a = {}; +constexpr nullptr_t b = a; + +constexpr nullptr_t +foo () +{ +#if __cplusplus >= 201402L + volatile nullptr_t c = {}; + return c; +#else + return nullptr; +#endif +} + +static_assert (b == nullptr, ""); +static_assert (foo () == nullptr, ""); --- gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C.jj2025-02-06 09:57:46.149639270 +0100 +++ gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C 2025-02-06 10:01:08.472815988 +0100 @@ -0,0 +1,16 @@ +// PR c++/118661 +// { dg-do compile { target c++11 } } + +using nullptr_t = decltype (nullptr); +union U { int i; nullptr_t n; }; +constexpr U u = { 42 }; +static_assert (u.n == nullptr, ""); + +#if __cplusplus >= 201402L +constexpr nullptr_t +foo () +{ + union U { int i; nullptr_t n; } u = { 42 }; + return u.n; +} +#endif Jakub
Re: [PATCH] RISC-V: Fix ICE when prefetching addresses less than 12 bits for zicbop
On 1/19/25 7:32 AM, Jin Ma wrote: gcc/ChangeLog: * config/riscv/riscv.md: Change 'r' to 'p'. gcc/testsuite/ChangeLog: * gcc.target/riscv/prefetch-zicbop-ice.c: New test. I think that rather than adjusting the constraint we should probably adjust the operand predicate. In general we want the predicates to represent what the hardware can do. Accepting "address_operand" here implies that the prefetch opcodes can accept any valid address for their mode. That's not the case for prefetch. In particular the low 5 bits in the offset must be zero according to the ISA specification. This also implies that for an absolute address that the low 5 bits of that address must be zero. We could certainly create a new predicate that allows immediate operands and one could construct testcases that would use it, but I don't see it as all that useful in practice. So I'm inclined to go with the simpler solution here and only allow register indirect addresses with a zero offset. That's how Wang Pencheng's patch works. I still need to re-review Tsukasa's patch from a while back. But my current inclination is to take Wang Pencheng's patch as I think it's more correct. I'll likely take testcases from all the patches in this space. Jeff
Re: C++ patch ping
On 1/29/25 12:27 PM, Jakub Jelinek wrote: I'd like to ping 4 C++ patches: https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662379.html https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662380.html https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662381.html P2552R3 - On the ignorability of standard attributes - series [PR110345] Note, all the FE changes are already in, the above are solely missing tests which would allow us to mark the paper as resolved OK. Jason
Re: [PATCH] loop-iv, riscv: Fix get_biv_step_1 for RISC-V [PR117506]
On 2/5/25 11:11 AM, Palmer Dabbelt wrote: On Wed, 05 Feb 2025 09:57:56 PST (-0800), ja...@redhat.com wrote: Hi! The following test ICEs on RISC-V at least latently since r14-1622-g99bfdb072e67fa3fe294d86b4b2a9f686f8d9705 which added RISC-V specific case to get_biv_step_1 to recognize also ({zero,sign}_extend:DI (plus:SI op0 op1)) The reason for the ICE is that op1 in this case is CONST_POLY_INT which unlike the really expected VOIDmode CONST_INTs has its own mode and still satisfies CONSTANT_P. GET_MODE (rhs) (SImode) is different from outer_mode (DImode), so the function later does *inner_step = simplify_gen_binary (code, outer_mode, *inner_step, op1); but that obviously ICEs because while *inner_step is either VOIDmode or DImode, op1 has SImode. The following patch fixes it by extending op1 using code so that simplify_gen_binary can handle it. Another option would be to change the !CONSTANT_P (op1) 3 lines above this to !CONST_INT_P (op1), I think it isn't very likely that we get something useful from other constants there. Bootstrapped/regtested on x86_64-linux and i686-linux (which doesn't tell much because such code isn't encountered there, just that the gcc.dg test at least works), but I have no way to test this on riscv easily. Could somebody test it there? The precommit CI should pick it up, as it's got "RISC-V" in the subject line. I don't see it in the list yet, though. It picked it up, but Jakub strips a component off the pathnames in his patches (the leading a/b) which confused the pre-commit CI tester. Regardless I can spin it in my own. Or do you want the !CONST_INT_P (op1) instead? Jeff would know way better than I do here, but I think this is just trying to match addiw-type patterns and thus CONST_INT would be OK because we only have 12-bit constants here. My recollection of the last touch of this code was it helps us "see through" the extension that commonly appears in risc-v code. It addressed a relatively minor regression Jivan saw when changing the various risc-v expanders to expose the implicit sign extension. Jeff
[PATCH] RISC-V: Move UNSPEC_SSP_SET and UNSPEC_SSP_TEST to correct enum
stack_protect_{set,test}_ were showing up in RTL dumps as UNSPEC_COPYSIGN and UNSPEC_FMV_X_W due to UNSPEC_SSP_SET and UNSPEC_SSP_TEST being put in the unspecv enum instead of unspec. gcc/ChangeLog: * config/riscv/riscv.md: Move UNSPEC_SSP_SET and UNSPEC_SSP_TEST to unspec enum. --- gcc/config/riscv/riscv.md | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 09053df1eb9..f7070766783 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -99,6 +99,10 @@ ;; CRC unspecs UNSPEC_CRC UNSPEC_CRC_REV + + ;; Stack Smash Protector + UNSPEC_SSP_SET + UNSPEC_SSP_TEST ]) (define_c_enum "unspecv" [ @@ -123,10 +127,6 @@ UNSPECV_FENCE UNSPECV_FENCE_I - ;; Stack Smash Protector - UNSPEC_SSP_SET - UNSPEC_SSP_TEST - ;; CMO instructions. UNSPECV_CLEAN UNSPECV_FLUSH -- 2.43.0
Re: [PATCH v3] [ifcombine] avoid creating out-of-bounds BIT_FIELD_REFs [PR118514]
Richard Biener writes: > On Thu, Feb 6, 2025 at 2:41 PM Alexandre Oliva wrote: >> >> On Jan 27, 2025, Richard Biener wrote: >> >> > What I was saying that the conservative tree_could_trap_p could say >> > 'yes' to a certain encoding of a ref but 'no' to another if in reality >> > the ref can never trap. We of course cannot (apart from bugs in >> > tree_could_trap_p) turn a for-sure trap into a not-trap by simply >> > rewriting the ref. >> >> I see. Yeah, that makes sense. >> >> > So I think we want this bit in (and it's dependences), but >> >> 'k, I'll split that bit out, after some clarification: >> >> > (I see the assert is no longer in the patch). >> >> That's because the assert went in as part of an earlier patch. I take >> it it should be backed out along with the to-be-split-out bits above, >> right? > > Yes. > > (IIRC there's also a PR tripping over this or a similar assert) Right, PR118706. > > Richard. > >> >> -- >> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/ >>Free Software Activist GNU Toolchain Engineer >> More tolerance and less prejudice are key for inclusion and diversity >> Excluding neuro-others for not behaving ""normal"" is *not* inclusive
Re: [PATCH] driver: -fhardened and -z lazy/-z norelro [PR117739]
Ping. On Tue, Jan 21, 2025 at 11:05:46AM -0500, Marek Polacek wrote: > Ping. > > On Fri, Jan 10, 2025 at 03:07:52PM -0500, Marek Polacek wrote: > > Ping. > > > > On Fri, Dec 20, 2024 at 08:58:05AM -0500, Marek Polacek wrote: > > > Ping. > > > > > > On Tue, Nov 26, 2024 at 05:35:50PM -0500, Marek Polacek wrote: > > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk? > > > > > > > > -- >8 -- > > > > As the manual states, using "-fhardened -fstack-protector" will produce > > > > a warning because -fhardened wants to enable -fstack-protector-strong, > > > > but it can't since it's been overriden by the weaker -fstack-protector. > > > > > > > > -fhardened also attempts to enable -Wl,-z,relro,-z,now. By the same > > > > logic as above, "-fhardened -z norelro" or "-fhardened -z lazy" should > > > > produce the same warning. But we don't detect this combination, so > > > > this patch fixes it. I also renamed a variable to better reflect its > > > > purpose. > > > > > > > > Also don't check warn_hardened in process_command, since it's always > > > > true there. > > > > > > > > Also tweak wording in the manual as Jon Wakely suggested on IRC. > > > > > > > > PR driver/117739 > > > > > > > > gcc/ChangeLog: > > > > > > > > * doc/invoke.texi: Tweak wording for -Whardened. > > > > * gcc.cc (driver_handle_option): If -z lazy or -z norelro was > > > > specified, don't enable linker hardening. > > > > (process_command): Don't check warn_hardened. > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > * c-c++-common/fhardened-16.c: New test. > > > > * c-c++-common/fhardened-17.c: New test. > > > > * c-c++-common/fhardened-18.c: New test. > > > > * c-c++-common/fhardened-19.c: New test. > > > > * c-c++-common/fhardened-20.c: New test. > > > > * c-c++-common/fhardened-21.c: New test. > > > > --- > > > > gcc/doc/invoke.texi | 4 ++-- > > > > gcc/gcc.cc| 20 ++-- > > > > gcc/testsuite/c-c++-common/fhardened-16.c | 5 + > > > > gcc/testsuite/c-c++-common/fhardened-17.c | 5 + > > > > gcc/testsuite/c-c++-common/fhardened-18.c | 5 + > > > > gcc/testsuite/c-c++-common/fhardened-19.c | 5 + > > > > gcc/testsuite/c-c++-common/fhardened-20.c | 5 + > > > > gcc/testsuite/c-c++-common/fhardened-21.c | 5 + > > > > 8 files changed, 46 insertions(+), 8 deletions(-) > > > > create mode 100644 gcc/testsuite/c-c++-common/fhardened-16.c > > > > create mode 100644 gcc/testsuite/c-c++-common/fhardened-17.c > > > > create mode 100644 gcc/testsuite/c-c++-common/fhardened-18.c > > > > create mode 100644 gcc/testsuite/c-c++-common/fhardened-19.c > > > > create mode 100644 gcc/testsuite/c-c++-common/fhardened-20.c > > > > create mode 100644 gcc/testsuite/c-c++-common/fhardened-21.c > > > > > > > > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > > > > index 346ac1369b8..371f723539c 100644 > > > > --- a/gcc/doc/invoke.texi > > > > +++ b/gcc/doc/invoke.texi > > > > @@ -7012,8 +7012,8 @@ This warning is enabled by @option{-Wall}. > > > > Warn when @option{-fhardened} did not enable an option from its set > > > > (for > > > > which see @option{-fhardened}). For instance, using > > > > @option{-fhardened} > > > > and @option{-fstack-protector} at the same time on the command line > > > > causes > > > > -@option{-Whardened} to warn because @option{-fstack-protector-strong} > > > > is > > > > -not enabled by @option{-fhardened}. > > > > +@option{-Whardened} to warn because @option{-fstack-protector-strong} > > > > will > > > > +not be enabled by @option{-fhardened}. > > > > > > > > This warning is enabled by default and has effect only when > > > > @option{-fhardened} > > > > is enabled. > > > > diff --git a/gcc/gcc.cc b/gcc/gcc.cc > > > > index 92c92996401..d2718d263bb 100644 > > > > --- a/gcc/gcc.cc > > > > +++ b/gcc/gcc.cc > > > > @@ -305,9 +305,10 @@ static size_t dumpdir_length = 0; > > > > driver added to dumpdir after dumpbase or linker output name. */ > > > > static bool dumpdir_trailing_dash_added = false; > > > > > > > > -/* True if -r, -shared, -pie, or -no-pie were specified on the command > > > > - line. */ > > > > -static bool any_link_options_p; > > > > +/* True if -r, -shared, -pie, -no-pie, -z lazy, or -z norelro were > > > > + specified on the command line, and therefore -fhardened should not > > > > + add -z now/relro. */ > > > > +static bool avoid_linker_hardening_p; > > > > > > > > /* True if -static was specified on the command line. */ > > > > static bool static_p; > > > > @@ -4434,10 +4435,17 @@ driver_handle_option (struct gcc_options *opts, > > > > } > > > > /* Record the part after the last comma. */ > > > > add_infile (arg + prev, "*"); > > > > + if (strcmp (arg, "-z,lazy") == 0 || strcmp (arg, "-z,norelro") > > > > == 0) > > >
Re: [PATCH v3] [ifcombine] avoid creating out-of-bounds BIT_FIELD_REFs [PR118514]
On Jan 27, 2025, Richard Biener wrote: > What I was saying that the conservative tree_could_trap_p could say > 'yes' to a certain encoding of a ref but 'no' to another if in reality > the ref can never trap. We of course cannot (apart from bugs in > tree_could_trap_p) turn a for-sure trap into a not-trap by simply > rewriting the ref. I see. Yeah, that makes sense. > So I think we want this bit in (and it's dependences), but 'k, I'll split that bit out, after some clarification: > (I see the assert is no longer in the patch). That's because the assert went in as part of an earlier patch. I take it it should be backed out along with the to-be-split-out bits above, right? -- Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/ Free Software Activist GNU Toolchain Engineer More tolerance and less prejudice are key for inclusion and diversity Excluding neuro-others for not behaving ""normal"" is *not* inclusive
[PATCH] [testsuite] [sparc] select ultrasparc for fsmuld test
vis3move-3.c expects fsmuld, that is not available on all variants of sparc. Select a cpu that supports it for the test. Now, -mfix-ut699 irrevocbly disables fsmuld, so skip the test if the test configuration uses that option. Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting leon3-elf with -mfix-ut699. Ok to install? for gcc/testsuite/ChangeLog * gcc.target/sparc/vis3move-3.c: Select ultrasparc. Skip with -mfix-ut699. --- gcc/testsuite/gcc.target/sparc/vis3move-3.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/sparc/vis3move-3.c b/gcc/testsuite/gcc.target/sparc/vis3move-3.c index 3b2116eec0cb3..f32ca918bac91 100644 --- a/gcc/testsuite/gcc.target/sparc/vis3move-3.c +++ b/gcc/testsuite/gcc.target/sparc/vis3move-3.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target ilp32 } */ -/* { dg-options "-O1 -mvis3" } */ +/* { dg-skip-if "prevents fsmuld" { *-*-* } { "-mfix-ut699" } { "" } } */ +/* { dg-options "-O1 -mvis3 -mcpu=ultrasparc" } */ float fnegs (float a) { -- Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/ Free Software Activist GNU Toolchain Engineer More tolerance and less prejudice are key for inclusion and diversity Excluding neuro-others for not behaving ""normal"" is *not* inclusive
[PATCH] [testsuite] [sparc] use -mtune in alignment tuning test
If -mcpu=leon3 is present in the command line for a test run, overriding it with -mcpu=niagara7 is not enough to override the tuning for leon3 selected by the previous -mcpu option. niagara7-align.c tests for niagara7 alignment tuning, so use -mtune rather than -mcpu. Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting leon3-elf. Ok to install? for gcc/testsuite/ChangeLog * gcc.target/sparc/niagara7-align.c: Use -mtune. --- gcc/testsuite/gcc.target/sparc/niagara7-align.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/sparc/niagara7-align.c b/gcc/testsuite/gcc.target/sparc/niagara7-align.c index a46aac17c3294..01a8cb621d5c0 100644 --- a/gcc/testsuite/gcc.target/sparc/niagara7-align.c +++ b/gcc/testsuite/gcc.target/sparc/niagara7-align.c @@ -1,4 +1,4 @@ /* { dg-do compile } */ -/* { dg-options "-falign-functions -mcpu=niagara7" } */ +/* { dg-options "-falign-functions -mtune=niagara7" } */ /* { dg-final { scan-assembler "\.align 64" } } */ void foo(void) {} -- Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/ Free Software Activist GNU Toolchain Engineer More tolerance and less prejudice are key for inclusion and diversity Excluding neuro-others for not behaving ""normal"" is *not* inclusive
Re: [RFC][libgcc][PATCH] Fix two issues in libgcc/unwind-dw2-fde.c.
On Thu, Feb 6, 2025 at 2:57 PM Qing Zhao wrote: > > > > > On Feb 6, 2025, at 04:36, Richard Biener wrote: > > > > On Wed, Feb 5, 2025 at 4:40 PM Qing Zhao wrote: > >> > >> > >> > >>> On Feb 5, 2025, at 07:46, Richard Biener > >>> wrote: > >>> > >>> On Tue, Feb 4, 2025 at 10:12 PM Qing Zhao wrote: > > Hi, > > One of our big application segv in libgcc code while unwinding the stack. > > This is a random crash while the application throws a c++ exception and > unwinds the stack. Incidents are random and never can be reproduced by > any > test case. > > The libgcc that is used is based on GCC 8. > > Fortunately, we got some information through the stack trace, and > narrowed > down the crash is in the routine "init_object" of > libgcc/unwind-dw2-pde.c: > > 777 count = classify_object_over_fdes (ob, ob->u.single); > > when loading the 2nd parameter of the call to > "classify_object_over_fdes". > i.e, the address that is pointed by "ob->u.single" is invalid. > > And the outer caller we can track back is the following line 1066 of the > routine > "_Unwind_Find_FDE": > > 1060 /* Classify and search the objects we've not yet processed. */ > 1061 while ((ob = unseen_objects)) > 1062 { > 1063 struct object **p; > 1064 > 1065 unseen_objects = ob->next; > 1066 f = search_object (ob, pc); > > Then we suspected that the construction of the static variable > "unseen_objects" > might have some bug. > > Then after studying the source code that constructs "unseen_objects" in > libgcc/unwind-dw2-pde.c, I found an issue in the routines > "__register_frame" > and "__register_frame_table" as following: > > 127 void > 128 __register_frame (void *begin) > 129 { > 130 struct object *ob; > 131 > 132 /* If .eh_frame is empty, don't register at all. */ > 133 if (*(uword *) begin == 0) > 134 return; > 135 > 136 ob = malloc (sizeof (struct object)); > 137 __register_frame_info (begin, ob); > 138 } > > 181 void > 182 __register_frame_table (void *begin) > 183 { > 184 struct object *ob = malloc (sizeof (struct object)); > 185 __register_frame_info_table (begin, ob); > 186 } > 187 > > In the above, one obvious issue in the source code is at line 136, line > 137, > and line 184 and line 185: the return value of call to "malloc" is not > checked > against NULL before it was passed as the second parameter of the > follwoing call. > > This might cause unpredicted random behavior. > > I checked the latest trunk GCC libgcc, the same issue is there too. > > This patch is for the latest trunk GCC. > > Please let me know any comments on this? > >>> > >>> I think I've seen this elsewhere — > >> > >> Do you mean that you saw this problem (malloc return NULL during > >> register_frame) previously? > > > > It was probably reported to us (SUSE) from a customer/partner, also > > from a (llvm?) JIT context. > > Okay. > So, that bug has not been resolved yet? It was resolved from our side IIRC as being a problem in the application with the JIT and otherwise a feature request (better unwind API for this use case, allowing a failure mode). Richard. > Qing > > > >>> the issue is the unwind register API does > >>> not allow for failures but I also think calling abort() is bad. > >> > >> Okay, I see. > >> > >>> > >>> Are you calling this from a JIT context or so? > >> > >> I will ask the bug submitter on this to make sure. > >> > >>> We're assuming that at program > >>> start malloc() will not fail. > >> > >> So, the routine __register_frame is only called at the _start_ of a > >> program? > >>> > >>> The proper solution is probably to add an alternate ABI which has a way > >>> to fail > >>> during registration. > >> > >> Any suggestion on this (or any other routine I can refer to?) > >> > >> > >> Thanks a lot for the help. > >> > >> Qing > >>> > >>> Richard. > >>> > thanks. > > Qing > > > === > > Fix two issues in libgcc/unwind-dw2-fde.c: > > A. The returned value of call to malloc is not checked against NULL. > This might cause unpredicted behavior during stack unwinding. > > B. Check for null begin parameter (as well as pointer to null) in > __register_frame_info_table_bases and __register_frame_table. > > libgcc/ChangeLog: > > * unwind-dw2-fde.c (__register_frame): Check the return value of > call > to malloc. > (__register_frame_info_table_bases): Check for null begin > parameter. > (__register_frame_table): Check the return value o
Re: [patch, libfortran] PR114618 Version 2 Format produces incorrect output when contains 1x, ok when uses " "
On 1/31/25 11:30 AM, Harald Anlauf wrote: Hi Jerry, Am 30.01.25 um 21:50 schrieb Jerry D: On 1/29/25 10:30 AM, Jerry D wrote: Follow-up: On 1/28/25 1:33 PM, Harald Anlauf wrote: Jerry, while I haven't read your actual patch yet, I think the testcase is slightly incorrect. In fact, Intel, NAG, Nvidia and AMD flang disagree with it. --- snip --- The following adjustment to the patch puts this right. case FMT_X: case FMT_TR: consume_data_flag = 0; dtp->u.p.skips += f->u.n; tab_pos = bytes_used + dtp->u.p.skips - 1; dtp->u.p.pending_spaces = tab_pos - dtp->u.p.max_pos + 1; dtp->u.p.pending_spaces = dtp->u.p.pending_spaces < 0 ? f->u.n : dtp->u.p.pending_spaces; //if (t == FMT_X && tab_pos < dtp->u.p.max_pos) //{ //write_x (dtp, dtp->u.p.skips, dtp->u.p.pending_spaces); //dtp->u.p.skips = dtp->u.p.pending_spaces = 0; //} Interestingly, it also fixes a floating point exception I ran into while setting up another test case for part 2 of this effort. I suspect this may be what was detected by the auto patch tester. I will clean this up, adjust the test case for this part and re-submit. Regards, Jerry Here is version 2 of the patch cleaned up and with the test case revised accordingly. Thank you Herald for helping with my blindness. Regressions tested on x86_64. I will wait a bit to see if the auto patch tester reports anything. Otherwise, OK for trunk? this looks mostly good. There is only one (minor?) issue I saw: writing output to a file, there is a discrepancy between stream- and non-stream I/O. Adding write (*, fmt1) 'RADIX', radix(pi) write (*, fmt2) 'RADIX', radix(pi) open (10, form="formatted") write(10, fmt1) 'RADIX', radix(pi) write(10, fmt2) 'RADIX', radix(pi) close(10) open (11, form="formatted", access="stream") write(11, fmt1) 'RADIX', radix(pi) write(11, fmt2) 'RADIX', radix(pi) close(11) shows agreement between stdout and fort.10, while fort.11 differs: % head fort.10 fort.11 ==> fort.10 <== RADIX.. 2 RADIX . 2 ==> fort.11 <== RADIX 2 RADIX 2... Other brands do not show this discrepancy. Regards, Jerry PS working on part 2 still. Are you addressing this in the second part of your work? As the test program crashes with current HEAD and earlier anyway, your patch is already progress, but we don't want to leave it that way. So either commit the current version and track this issue in a PR if not yet done, or have a look if there is a quick fix. Thanks for the work! Harald After studying this a bit more I will commit the subject patch. I have opened a new PR to track this. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118774 Regards, Jerry
Re: [PATCH v2] ira: Add a target hook for callee-saved register cost scale
Vladimir Makarov writes: > On 2/3/25 1:20 AM, H.J. Lu wrote: >> commit 3b9b8d6cfdf59337f4b7ce10ce92a98044b2657b >> Author: Surya Kumari Jangala >> Date: Tue Jun 25 08:37:49 2024 -0500 >> >> ira: Scale save/restore costs of callee save registers with block >> frequency >> >> scales the cost of saving/restoring a callee-save hard register in epilogue >> and prologue with the entry block frequency, which, if not optimizing for >> size, is 1, for all targets. As the result, callee-saved registers >> may not be used to preserve local variable values across calls on some >> targets, like x86. Add a target hook for the callee-saved register cost >> scale in epilogue and prologue used by IRA. The default version of this >> target hook returns 1 if optimizing for size, otherwise returns the entry >> block frequency. Add an x86 version of this target hook to restore the >> old behavior prior to the above commit. > > This is a complicated problem resulted in many tries to fix it in some > general way. > > In general I am agree with Surya's approach to scale cost of reg > saves/restores somehow. But the general approach, although solved some > problems, also created a lot of new ones. May be because IRA does not > take some other aspects of using callee saved regs. And some of them > were addressed by other patches. e.g. recently proposed by Surya and one > for PR118497. > > I also agree with Richard Sandiford's comment that we should avoid > introducing the new hooks for RA and I actually tried to stick to this > policy for a long time. But I don't see another solution to introducing > the new hook in this case. It is hard to figure out generally in RA > that saves/restores will be insns different from ld/st (e.g. x86 > push/pop) and that they will be cheaper. > > So after some time to think about the patch I decided to approve the RA > part of the patch. I also hope that the work on this problem will > continue (e.g. improving default and target hook implementations and > documentation how to better use it). For the record, I strongly object to this. The hook just seems like a complete hack to me. Even if we accept that there is target-specific information in play, the hook isn't providing that information. In contrast to what you said above, my objection isn't to having hooks -- those are often needed and good. What bothers me is that the hook isn't well designed. Hooks should provide information rather than override code by brute force. On the other hand, I accept that you're (rightly!) the maintainer. Thanks, Richard
[PATCH] x86: Verify that PUSH/POP can be skipped
For --- int f(int); int advance(int dz) { if (dz > 0) return (dz + dz) * dz; else return dz * f(dz); } --- Before r15-1619-g3b9b8d6cfdf593 advance(int): pushrbx mov ebx, edi testedi, edi jle .L2 imulebx, edi lea eax, [rbx+rbx] pop rbx ret .L2: callf(int) imuleax, ebx pop rbx ret After advance(int): testedi, edi jle .L2 imuledi, edi lea eax, [rdi+rdi] ret .L2: sub rsp, 24 mov DWORD PTR [rsp+12], edi callf(int) imuleax, DWORD PTR [rsp+12] add rsp, 24 ret There's no call in if branch, it's not optimal to push rbx at the entry of the function, it can be sinked to else branch. When "jle .L2" is not taken, it can save one push instruction. Update pr111673.c to verify that this optimization isn't turned off. PR rtl-optimization/111673 * gcc.target/i386/pr111673.c: Verify that PUSH/POP can be skipped. -- H.J. From 6606ec5573e724295bdceb572ddc2813f021709f Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 7 Feb 2025 13:49:30 +0800 Subject: [PATCH] x86: Verify that PUSH/POP can be skipped For --- int f(int); int advance(int dz) { if (dz > 0) return (dz + dz) * dz; else return dz * f(dz); } --- Before r15-1619-g3b9b8d6cfdf593 advance(int): pushrbx mov ebx, edi testedi, edi jle .L2 imulebx, edi lea eax, [rbx+rbx] pop rbx ret .L2: callf(int) imuleax, ebx pop rbx ret After advance(int): testedi, edi jle .L2 imuledi, edi lea eax, [rdi+rdi] ret .L2: sub rsp, 24 mov DWORD PTR [rsp+12], edi callf(int) imuleax, DWORD PTR [rsp+12] add rsp, 24 ret There's no call in if branch, it's not optimal to push rbx at the entry of the function, it can be sinked to else branch. When "jle .L2" is not taken, it can save one push instruction. Update pr111673.c to verify that this optimization isn't turned off. PR rtl-optimization/111673 * gcc.target/i386/pr111673.c: Verify that PUSH/POP can be skipped. Signed-off-by: H.J. Lu --- gcc/testsuite/gcc.target/i386/pr111673.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/gcc/testsuite/gcc.target/i386/pr111673.c b/gcc/testsuite/gcc.target/i386/pr111673.c index 8d8a5a764f0..b9ceacf7651 100644 --- a/gcc/testsuite/gcc.target/i386/pr111673.c +++ b/gcc/testsuite/gcc.target/i386/pr111673.c @@ -1,5 +1,19 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -fdump-rtl-pro_and_epilogue" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.} } } */ + +/* +**advance: +**.LFB0: +** .cfi_startproc +** testl %edi, %edi +** jle .L2 +** imull %edi, %edi +** leal \(%rdi,%rdi\), %eax +** ret +**... +*/ /* Verify there is an early return without the prolog and shrink-wrap the function. */ -- 2.48.1
[PATCH] aarch64: Fix bootstrap with --enable-checking=release [PR118771]
With release checking we get an uninitialization warning inside aarch64_split_move because of jump threading for the case of `npieces==0` but `npieces` is never 0 (but there is no way the compiler can know that. So this fixes the issue by adding a `gcc_assert` to the function which asserts that `npieces > 0` and fixes the uninitialization warning. Bootstrapped and tested on aarch64-linux-gnu (with and without --enable-checking=release). The warning: aarch64.cc: In function 'void aarch64_split_move(rtx, rtx, machine_mode)': aarch64.cc:3418:31: error: '*(rtx_def**)((char*)&dst_pieces + offsetof(auto_vec,auto_vec::m_data[0]))' may be used uninitialized [-Werror=maybe-uninitialized] 3418 | if (reg_overlap_mentioned_p (dst_pieces[0], src)) | ^~~~ aarch64.cc:3408:20: note: 'dst_pieces' declared here 3408 | auto_vec dst_pieces, src_pieces; |^~ PR target/118771 gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_split_move): Assert that npieces is greater than 0. Signed-off-by: Andrew Pinski --- gcc/config/aarch64/aarch64.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index c1e40200806..f5f23f6ff4b 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -3407,6 +3407,9 @@ aarch64_split_move (rtx dst, rtx src, machine_mode single_mode) GET_MODE_SIZE (single_mode)).to_constant (); auto_vec dst_pieces, src_pieces; + /* There should be at least one piece. */ + gcc_assert (npieces > 0); + for (unsigned int i = 0; i < npieces; ++i) { auto off = i * GET_MODE_SIZE (single_mode); -- 2.43.0
Re: [PATCH v2] ira: Add a target hook for callee-saved register cost scale
On Thu, Feb 6, 2025 at 11:40 PM Vladimir Makarov wrote: > > > On 2/6/25 4:54 PM, Richard Sandiford wrote: > > Vladimir Makarov writes: > > This is a complicated problem resulted in many tries to fix it in some > general way. > > In general I am agree with Surya's approach to scale cost of reg > saves/restores somehow. But the general approach, although solved some > problems, also created a lot of new ones. May be because IRA does not > take some other aspects of using callee saved regs. And some of them > were addressed by other patches. e.g. recently proposed by Surya and one > for PR118497. > > I also agree with Richard Sandiford's comment that we should avoid > introducing the new hooks for RA and I actually tried to stick to this > policy for a long time. But I don't see another solution to introducing > the new hook in this case. It is hard to figure out generally in RA > that saves/restores will be insns different from ld/st (e.g. x86 > push/pop) and that they will be cheaper. > > So after some time to think about the patch I decided to approve the RA > part of the patch. I also hope that the work on this problem will > continue (e.g. improving default and target hook implementations and > documentation how to better use it). > > For the record, I strongly object to this. The hook just seems like a > complete hack to me. Even if we accept that there is target-specific > information in play, the hook isn't providing that information. > > In contrast to what you said above, my objection isn't to having hooks > -- those are often needed and good. What bothers me is that the hook > isn't well designed. Hooks should provide information rather than > override code by brute force. > > On the other hand, I accept that you're (rightly!) the maintainer. > > I also don't like the hook implementation for x86-64 (although this is a > matter of target maintainers). All these costs look voodoo and random to me. > > But this problem is longing for more than half year. I spent a lot of time > too on this. Patches were submitted and reverted and nobody did find so far > any solution satisfying all GCC tests. If somebody finds a solution without > the hook, I will be glad to get rid off the hook. Also the related PRs are > marked as P1 ones, it means people think they are important (I am not sure > about this myself). Without fixing them (or downgrading them) there will be > no GCC release. So I am in difficult situation with these PRs and need some > resolution. Just to chime in as the one who likely made those PRs P1. 'P1' here is really about the testsuite FAIL, how to resolve it is up to target maintainers - P1 should make them look, and adjusting the testcase (or even XFAILing it) is a valid resolution of the P1 regression. I do agree with Richards comment on the proposed hook to be badly designed and I do like Honzas suggestion to instead hookize the biasing. Richard. > >
Re: [PATCH] [testsuite] [sparc] use -mtune in alignment tuning test
> niagara7-align.c tests for niagara7 alignment tuning, so use -mtune > rather than -mcpu. > > Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting > leon3-elf. Ok to install? > > > for gcc/testsuite/ChangeLog > > * gcc.target/sparc/niagara7-align.c: Use -mtune. OK. -- Eric Botcazou
Re: [PATCH] [testsuite] [sparc] select ultrasparc for fsmuld test
> vis3move-3.c expects fsmuld, that is not available on all variants of > sparc. Select a cpu that supports it for the test. > > Now, -mfix-ut699 irrevocbly disables fsmuld, so skip the test if the > test configuration uses that option. > > Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting > leon3-elf with -mfix-ut699. Ok to install? > > > for gcc/testsuite/ChangeLog > > * gcc.target/sparc/vis3move-3.c: Select ultrasparc. Skip with > -mfix-ut699. OK. -- Eric Botcazou
[Patch] [gcn] Fix gfx906's sramecc setting
ROCm 6.3.2 does not like my patch for reasons that I do not understand; https://gcc.gnu.org/pipermail/gcc-patches/2025-February/675200.html Until that's sorted, I decided to split off two obvious fixes; I might suggest some further changes, but the full patch has to wait until generic really works. * * * The attached patch adds '...' to the device macro to avoid touching this when adding new fields. And it fixes an issue with gfx906, which shows up when compiling, e.g. as gcc -g -fopenmp -foffload-options=amdgcn-amdhsa=-march=gfx906 file.c (with offloading code in file.c). The reason is that '-g' causes mkoffload.cc to create a .o file with debugging symbols - and that .o file is linked with the GCN device files. While that file does not contain executable code, the ELF header still needs to match the GCN .o files as otherwise the linker complains that there is a mismatch. For the line above, it complains about: "ld: error: incompatible sramecc: /tmp/ccLhwZle.o". And in line with the GCC 14 code in mkoffload.cc and with the entries in https://llvm.org/docs/AMDGPUUsage.html#amdgpu-processor-table for gfx906 + the llvm-mc / lld implementation, that means that the sramecc type is 'any' and not unsupported. OK for mainline? Tobias [gcn] Fix gfx906's sramecc setting When compiling with -g, mkoffload.cc creates a device object file itself; however, in order that the linker dos not complain, the ELF flags must match what the compiler / linker does. For gfx906, the assembler defaults to sramecc = any, but gcn-devices.def contained unsupported, which is not the same - causing link errors. That's a regression caused by commit r15-4540-ga6b26e5ea09779 - which can be best seen by looking at the changes to mkoffload.cc. Additionally, this commit adds '...' to the GCN_DEVICE #define in gcn.cc to make it agnostic to the addition of fields. gcc/config/gcn/gcn-devices.def | 2 +- gcc/config/gcn/gcn.cc | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/gcc/config/gcn/gcn-devices.def b/gcc/config/gcn/gcn-devices.def index 7d47a7b495d..a8b21a358b4 100644 --- a/gcc/config/gcn/gcn-devices.def +++ b/gcc/config/gcn/gcn-devices.def @@ -91,7 +91,7 @@ GCN_DEVICE(gfx900, GFX900, 0x2c, ISA_GCN5, GCN_DEVICE(gfx906, GFX906, 0x2f, ISA_GCN5, /* XNACK default */ HSACO_ATTR_OFF, - /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED, + /* SRAM_ECC default */ HSACO_ATTR_ANY, /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED, /* CU mode */ HSACO_ATTR_UNSUPPORTED, /* Max ISA VGPRs */ 256, diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 4200cfaf006..82fc6ff1e41 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -101,7 +101,8 @@ static hash_map lds_allocs; /* Import all the data from gcn-devices.def. The PROCESSOR_GFXnnn should be indices for this table. */ const struct gcn_device_def gcn_devices[] = { -#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, GEN_VER,ARCH_FAM) \ +#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, \ + GEN_VER, ARCH_FAM, ...) \ {PROCESSOR_ ## NAME, #name, #NAME, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, \ GEN_VER, #ARCH_FAM}, #include "gcn-devices.def"
Re: [PATCH] [testsuite] [sparc] skip sparc-ret-1 with -mfix-ut699
> Option -mfix-ut699 changes the set of instructions that can be placed > in the delay slot, preventing the expected insn placement. Skip the > test if the option is present. > > Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting > leon3-elf with -mfix-ut699. Ok to install? > > > for gcc/testsuite/ChangeLog > > * gcc.target/sparc/sparc-ret-1.c: Skip on -mfix-ut699. OK. -- Eric Botcazou
Re: [PATCH] RISC-V: Fix ICE when prefetching addresses less than 12 bits for zicbop
On 1/19/25 7:32 AM, Jin Ma wrote: gcc/ChangeLog: * config/riscv/riscv.md: Change 'r' to 'p'. gcc/testsuite/ChangeLog: * gcc.target/riscv/prefetch-zicbop-ice.c: New test. As expected, there's more to this issue than just adjusting constraints or predicates. As Tsukasa noted a while back the current builtin is largely useless as the most natural ways to pass instruction addresses simply don't work. The builtin expansion path we're using is custom to the backend and as a result we don't get the usual methods to force arguments into registers, so we'd need to turn the relevant patters into expanders with a corresponding pattern. We'll need to make the operands fairly loose on the expander, then force things into registers for the pattern. And there's the question of the extraneous imm5 operand in the pattern. And there's a nonzero possibility that we need to adjust the API as well. The net is I think we should fix the obvious ICE, but that the deeper issues should wait for gcc-16's new development window. Jeff
Re: [PATCH v2] ira: Add a target hook for callee-saved register cost scale
> > +/* Implement TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE. */ > > + > > +static int > > +ix86_ira_callee_saved_register_cost_scale (int) > > +{ > > + return 1; > > +} > > + > > return cl; > > } > > +int > > +default_ira_callee_saved_register_cost_scale (int) > > +{ > > + return (optimize_size > > + ? 1 > > + : REG_FREQ_FROM_BB (ENTRY_BLOCK_PTR_FOR_FN (cfun))); > > +} > > + I am not sure how this makes sense - why x86 would be significantly different from other targets? I think the only bit non-standard thing is that prologue/epilogue code can use push/pop that is shorter than mov used to save caller saved registers. I went through few testcases: void d(); void a() { int b; asm ("use %0":"=r" (b)); d(); asm volatile (""::"r" (b)); } compiler with -O2 -fira-verbose=1 gives: Popping a0(r99,l0) -- (0=12000,12000) (1=12000,12000) (2=12000,12000) (4=12000,12000) (5=12000,12000) (36=12000,12000) (37=12000,12000) (38=12000,12000) (39=12000,12000) (3=11000,11000) (6=11000,11000) (40=11000,11000) (41=11000,11000) (42=11000,11000) (43=11000,11000) load and save costs are 6. So spill pair is 12 weighted by 1000 that is REG_FREQ_MAX. Register 0 (EAX) has cost 12000 which makes sense to me: - load and save costs are 6, combined spill pair is 12 - REG_FREQ_MAX is 1000 and since function has only one BB, it has maximal frequency, so we get 12000. Register 3 (first caller saved) has cost 11000. This comes from: add_cost = ((ira_memory_move_cost[mode][rclass][0] + ira_memory_move_cost[mode][rclass][1]) * saved_nregs / hard_regno_nregs (hard_regno, mode) - 1) ^^ here * (optimize_size ? 1 : REG_FREQ_FROM_BB (ENTRY_BLOCK_PTR_FOR_FN (cfun))); There is no comment why -1, but I suppose it is there to biass costs to use prologue/epilogue instad of caller save sequence when runtime cost estimate is even. Now for void d(); void a() { for (int i = 0; i < 100; i++) d(); int b; asm ("use %0":"=r" (b)); d(); asm volatile (""::"r" (b)); } I get Popping a0(r100,l0) -- (0=120,120) (1=120,120) (2=120,120) (4=120,120) (5=120,120) (36=120,120) (37=120,120) (38=120,120) (39=120,120) (3=0,0) (6=110,110) (40=110,110) (41=110,110) (42=110,110) (43=110,110) This also makes sense to me, since there is loop the basic block has lower frequency of 10, thus costs are scaled down. void d(); int cnd; void a() { int b; asm ("use %0":"=r" (b)); if (__builtin_expect_with_probability (cnd, 1, 0.8)) d(); asm volatile (""::"r" (b)); } I get Popping a0(r100,l0) -- (0=9600,9600) (1=9600,9600) (2=9600,9600) (4=9600,9600) (5=9600,9600) (36=9600,9600) (37=9600,9600) (38=9600,9600) (39=9600,9600) (3=11000,11000) (6=11000,11000) (40=11000,11000) (41=11000,11000) (42=11000,11000) (43=11000,11000) which seems also correct. It is better to use caller saved registr since call to d() has lower frequency then the entry basic block. This is what gcc 13 and this patch gets wrong Popping a0(r100,l0) -- (1=9600,9600) (2=9600,9600) (4=9600,9600) (5=9600,9600) (36=9600,9600) (37=9600,9600) (38=9600,9600) (39=9600,9600) (3=11,11) (6=11,11) (40=11,11) (41=11,11) (42=11,11) (43=11,11) Due to missing scaling factor we think that using callee saved registr is win while it is not. GCC13 gets this wrong even for probability 0. Looking into PRs referneced in the patch: PR111673 is the original bug that motivated correcting the cost (adding the scale by entry block frequency) PR115932 is cris-elf I don't know how to bencmark easily. PR116028 seems to be about shrink wrapping in void f(int *i) { if (!i) return; else { __builtin_printf("Hi"); *i=0; } } here I see tha tthe cost model misses the fact that epilogue will be shrink-wrapped so both caller and callee saving will result in one spill after the early exit. PR117081 is about regression in povray. The reducted testcase: void foo (void); void bar (void); int test (int a) { int r; if (r = -a) foo (); else bar (); return r; } shows that we now use caller saved register (EAX) to hold the return value which yields longer code. The costs are Popping a0(r98,l0) -- (0=13000,13000) (3=15000,15000) (6=15000,15000) (40=15000,15000) (41=15000,15000) (42=15000,15000) (43=15000,15000) here 15000 is 11000+4000 where I think 4000 is cost of 2 reg-reg moves multiplied by REG_FREQ_MAX. This seems correct. GCC 13 uses callee saved register and produces: : 0: 53 push %rbx
Re: [PATCH] [testsuite] [sparc] skip tls tests if emulated
> A number of tls tests expect TLS-specific relocations, that are not > present when tls is emulated, as on e.g. leon3-elf. Skip the tests > when tls is emulated. > > Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting > leon3-elf. Ok to install? > > > for gcc/testsuite/ChangeLog > > * gcc.target/sparc/tls-ld-int16.c: Skip when tls is emulated. > * gcc.target/sparc/tls-ld-int32.c: Likewise. > * gcc.target/sparc/tls-ld-int8.c: Likewise. > * gcc.target/sparc/tls-ld-uint16.c: Likewise. > * gcc.target/sparc/tls-ld-uint32.c: Likewise. > * gcc.target/sparc/tls-ld-uint8.c: Likewise. OK. -- Eric Botcazou
Re: [PATCH v2] ira: Add a target hook for callee-saved register cost scale
On 2/6/25 4:54 PM, Richard Sandiford wrote: Vladimir Makarov writes: This is a complicated problem resulted in many tries to fix it in some general way. In general I am agree with Surya's approach to scale cost of reg saves/restores somehow. But the general approach, although solved some problems, also created a lot of new ones. May be because IRA does not take some other aspects of using callee saved regs. And some of them were addressed by other patches. e.g. recently proposed by Surya and one for PR118497. I also agree with Richard Sandiford's comment that we should avoid introducing the new hooks for RA and I actually tried to stick to this policy for a long time. But I don't see another solution to introducing the new hook in this case. It is hard to figure out generally in RA that saves/restores will be insns different from ld/st (e.g. x86 push/pop) and that they will be cheaper. So after some time to think about the patch I decided to approve the RA part of the patch. I also hope that the work on this problem will continue (e.g. improving default and target hook implementations and documentation how to better use it). For the record, I strongly object to this. The hook just seems like a complete hack to me. Even if we accept that there is target-specific information in play, the hook isn't providing that information. In contrast to what you said above, my objection isn't to having hooks -- those are often needed and good. What bothers me is that the hook isn't well designed. Hooks should provide information rather than override code by brute force. On the other hand, I accept that you're (rightly!) the maintainer. I also don't like the hook implementation for x86-64 (although this is a matter of target maintainers). All these costs look voodoo and random to me. But this problem is longing for more than half year. I spent a lot of time too on this. Patches were submitted and reverted and nobody did find so far any solution satisfying all GCC tests. If somebody finds a solution without the hook, I will be glad to get rid off the hook. Also the related PRs are marked as P1 ones, it means people think they are important (I am not sure about this myself). Without fixing them (or downgrading them) there will be no GCC release. So I am in difficult situation with these PRs and need some resolution.
Re: [Patch] [GCN] Handle generic ISA names in libgomp's plugin-gcn.c
After spending some time with the debugger, I am now convinced that ROCm 6.3.2 does not yet support generic. The amd-staging branch at https://github.com/ROCm/ROCR-Runtime/ support does, albeit only after the tag rocm-6.3.2. However, the released ROCm 6.3.2 does not match that tagged commit as it seems to contain at least the much newer commit eec21304 (but not the generic support that appeared in an in-between commit.) [AOMP is also different; it seems as if 20.0-1 does not support it, but 20.0-2 might; AOMP does not seem to have commit eec21304 to confuse things.] * * * The attached patch now adds gfx9-generic - alongside the existing gfx{10-3,1}-generic and all gfx* that are enabled by those. See previous thread for the related discussions. OK for mainline? (This patch depends on the just submitted patch: "[Patch] [gcn] Fix gfx906's sramecc setting", https://gcc.gnu.org/pipermail/gcc-patches/2025-February/675251.html ) * * * RFC: The patch currently does not document any gfx*-generic nor the gfx902 etc. I wonder whether a follow-up patch should just add the non-generic ones with "(Experimental)" to invoke.texi? Or any better ideas regarding what to make available now and what only later, once a generic-supporting ROCm is available? In other words: What bits of: https://gcc.gnu.org/pipermail/gcc-patches/2025-February/675200.html ? Tobias [gcn] Add gfx9-generic and generic-associated gfx* This patch adds gfx9-generic, completing the gfx*-generic support. It also adds all gfx* devices that are part of any of the gfx*-generic, i.e. gfx902, gfx904, gfx909, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1101, gfx1102, gfx1150, gfx1151, gfx1152, and gfx1153. gcc/ChangeLog: * config/gcn/gcn-devices.def (GCN_DEVICE): Add gfx9-generic, gfx902, gfx904, gfx909, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1101, gfx1102, gfx1150, gfx1151, gfx1152, and gfx1153. Add a currently unused column linking, a specific ISA to a generic one (if it exists). * config/gcn/gcn-tables.opt: Regenerate gcc/config/gcn/gcn-devices.def | 202 ++--- gcc/config/gcn/gcn-tables.opt | 45 + 2 files changed, 236 insertions(+), 11 deletions(-) diff --git a/gcc/config/gcn/gcn-devices.def b/gcc/config/gcn/gcn-devices.def index a8b21a358b4..af1420382e2 100644 --- a/gcc/config/gcn/gcn-devices.def +++ b/gcc/config/gcn/gcn-devices.def @@ -71,6 +71,10 @@ generated by the used llvm-mc assembler. 10 "Architecture Family Name" (string, external) Used to #define '__GFX<...>__'. + 11 "GENERIC NAME" (text, external) + The name of the generic ISA this device is compatible with or "NONE", + where the generic name is the NAME (field 2) of the associated + generic device. Fields marked "external", above, have values defined elsewhere (HSA, ROCM, LLVM, ELF, etc.) and must have matching definitions here. Fields marked @@ -86,7 +90,30 @@ GCN_DEVICE(gfx900, GFX900, 0x2c, ISA_GCN5, /* CU mode */ HSACO_ATTR_UNSUPPORTED, /* Max ISA VGPRs */ 256, /* Generic code obj version */ 0, /* non-generic */ - /* Architecture Family */ GFX9 + /* Architecture Family */ GFX9, + /* Generic Name */ GFX9_GENERIC + ) + +GCN_DEVICE(gfx902, GFX902, 0x2d, ISA_GCN5, + /* XNACK default */ HSACO_ATTR_OFF, + /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED, + /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED, + /* CU mode */ HSACO_ATTR_UNSUPPORTED, + /* Max ISA VGPRs */ 256, + /* Generic code obj version */ 0, /* non-generic */ + /* Architecture Family */ GFX9, + /* Generic Name */ GFX9_GENERIC + ) + +GCN_DEVICE(gfx904, GFX904, 0x2e, ISA_GCN5, + /* XNACK default */ HSACO_ATTR_OFF, + /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED, + /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED, + /* CU mode */ HSACO_ATTR_UNSUPPORTED, + /* Max ISA VGPRs */ 256, + /* Generic code obj version */ 0, /* non-generic */ + /* Architecture Family */ GFX9, + /* Generic Name */ GFX9_GENERIC ) GCN_DEVICE(gfx906, GFX906, 0x2f, ISA_GCN5, @@ -96,7 +123,8 @@ GCN_DEVICE(gfx906, GFX906, 0x2f, ISA_GCN5, /* CU mode */ HSACO_ATTR_UNSUPPORTED, /* Max ISA VGPRs */ 256, /* Generic code obj version */ 0, /* non-generic */ - /* Architecture Family */ GFX9 + /* Architecture Family */ GFX9, + /* Generic Name */ GFX9_GENERIC ) GCN_DEVICE(gfx908, GFX908, 0x30, ISA_CDNA1, @@ -106,7 +134,19 @@ GCN_DEVICE(gfx908, GFX908, 0x30, ISA_CDNA1, /* CU mode */ HSACO_ATTR_UNSUPPORTED, /* Max ISA VGPRs */ 256, /* Generic code obj version */ 0, /* non-generic */ - /* Architecture Family */ GFX9 + /* Architecture Family */ GFX9, + /* Generic Name */ NONE + ) + +GCN_DEVICE(gfx909, GFX909, 0x31, ISA_GCN5, + /* XNACK default */ HSACO_ATTR_ANY, + /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED, + /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED, + /* CU mode */ HSACO_ATTR_UNSUPPORTED, + /* Max ISA VGPRs */ 256, + /* Ge
Patch ping^5 (Re: [PATCH] analyzer: Handle nonnull_if_nonzero attribute [PR117023])
On Tue, Jan 21, 2025 at 04:59:16PM +0100, Jakub Jelinek wrote: > On Tue, Jan 07, 2025 at 01:49:04PM +0100, Jakub Jelinek wrote: > > On Wed, Dec 18, 2024 at 12:15:15PM +0100, Jakub Jelinek wrote: > > > On Fri, Dec 06, 2024 at 05:07:40PM +0100, Jakub Jelinek wrote: > > > > I'd like to ping the > > > > https://gcc.gnu.org/pipermail/gcc-patches/2024-November/668699.html > > > > patch. > > > > > > > > The patches it depended on are already committed and there is a patch > > > > which depends on this (the builtins shift from nonnull to > > > > nonnull_if_nonzero > > > > where needed) which has been approved but can't be committed. > > > > > > Gentle ping on this one. > > > > Ping. > > Ping again. Ping. > > > > > 2024-11-14 Jakub Jelinek > > > > > > > > > > PR c/117023 > > > > > gcc/analyzer/ > > > > > * sm-malloc.cc (malloc_state_machine::on_stmt): Handle > > > > > also nonnull_if_nonzero attributes. > > > > > gcc/testsuite/ > > > > > * c-c++-common/analyzer/call-summaries-malloc.c > > > > > (test_use_without_check): Pass 4 rather than sz to memset. > > > > > * c-c++-common/analyzer/strncpy-1.c (test_null_dst, > > > > > test_null_src): Pass 42 rather than count to strncpy. Jakub
Re: [PATCH v2] ira: Add a target hook for callee-saved register cost scale
On 2/3/25 1:20 AM, H.J. Lu wrote: commit 3b9b8d6cfdf59337f4b7ce10ce92a98044b2657b Author: Surya Kumari Jangala Date: Tue Jun 25 08:37:49 2024 -0500 ira: Scale save/restore costs of callee save registers with block frequency scales the cost of saving/restoring a callee-save hard register in epilogue and prologue with the entry block frequency, which, if not optimizing for size, is 1, for all targets. As the result, callee-saved registers may not be used to preserve local variable values across calls on some targets, like x86. Add a target hook for the callee-saved register cost scale in epilogue and prologue used by IRA. The default version of this target hook returns 1 if optimizing for size, otherwise returns the entry block frequency. Add an x86 version of this target hook to restore the old behavior prior to the above commit. This is a complicated problem resulted in many tries to fix it in some general way. In general I am agree with Surya's approach to scale cost of reg saves/restores somehow. But the general approach, although solved some problems, also created a lot of new ones. May be because IRA does not take some other aspects of using callee saved regs. And some of them were addressed by other patches. e.g. recently proposed by Surya and one for PR118497. I also agree with Richard Sandiford's comment that we should avoid introducing the new hooks for RA and I actually tried to stick to this policy for a long time. But I don't see another solution to introducing the new hook in this case. It is hard to figure out generally in RA that saves/restores will be insns different from ld/st (e.g. x86 push/pop) and that they will be cheaper. So after some time to think about the patch I decided to approve the RA part of the patch. I also hope that the work on this problem will continue (e.g. improving default and target hook implementations and documentation how to better use it). We have a lot of tests checking expected code generation (some of them are wrong, e.g. expecting particular hard regs). But those are small tests. We should pay more attention to check impact on SPEC for such patches. It is more time-consuming but it is the right way. A patch can introduce worse code generation for small test but can be a win for real applications as ones in SPEC. Any RA in industrial compiler can not generate optimal code for most cases. So I would not mark such GCC test failures as P1 PRs. The RA patch is OK for me for committing it into the trunk. Thank you. PR rtl-optimization/111673 PR rtl-optimization/115932 PR rtl-optimization/116028 PR rtl-optimization/117081 PR rtl-optimization/117082 PR rtl-optimization/118497 * ira-color.cc (assign_hard_reg): Call the target hook for the callee-saved register cost scale in epilogue and prologue. * target.def (ira_callee_saved_register_cost_scale): New target hook. * targhooks.cc (default_ira_callee_saved_register_cost_scale): New. * targhooks.h (default_ira_callee_saved_register_cost_scale): Likewise. * config/i386/i386.cc (ix86_ira_callee_saved_register_cost_scale): New. (TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE): Likewise. * doc/tm.texi: Regenerated. * doc/tm.texi.in (TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE): New. Signed-off-by: H.J. Lu --- gcc/config/i386/i386.cc | 11 +++ gcc/doc/tm.texi | 8 gcc/doc/tm.texi.in | 2 ++ gcc/ira-color.cc| 3 +-- gcc/target.def | 12 gcc/targhooks.cc| 8 gcc/targhooks.h | 1 + 7 files changed, 43 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index f89201684a8..3128973ba79 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -20600,6 +20600,14 @@ ix86_class_likely_spilled_p (reg_class_t rclass) return false; } +/* Implement TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE. */ + +static int +ix86_ira_callee_saved_register_cost_scale (int) +{ + return 1; +} + /* Return true if a set of DST by the expression SRC should be allowed. This prevents complex sets of likely_spilled hard regs before split1. */ @@ -27078,6 +27086,9 @@ ix86_libgcc_floating_mode_supported_p #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class #undef TARGET_CLASS_LIKELY_SPILLED_P #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p +#undef TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE +#define TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE \ + ix86_ira_callee_saved_register_cost_scale #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 0de24eda6f0..9f42913a4ef 100644 --- a/gcc/doc/tm.texi +++ b/gcc/d
[Patch, fortran] PR118750 - [14/15 Regression] ICE on associate with elemental function with polymorphic array dummy argument
This regression must have been generated during my ASSOCIATE campaign; I am not sure when or where. Fortunately the fix is extremely simple and is explained in the ChangeLog. Regression tests fine. OK for both affected branches? Paul Change.Logs Description: Binary data diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc index 7f73d53e31e..a961843edc4 100644 --- a/gcc/fortran/resolve.cc +++ b/gcc/fortran/resolve.cc @@ -10741,7 +10741,7 @@ resolve_assoc_var (gfc_symbol* sym, bool resolve_target) || gfc_is_ptr_fcn (target)); /* Finally resolve if this is an array or not. */ - if (target->expr_type == EXPR_FUNCTION + if (target->expr_type == EXPR_FUNCTION && target->rank == 0 && (sym->ts.type == BT_CLASS || sym->ts.type == BT_DERIVED)) { gfc_expression_rank (target); diff --git a/gcc/testsuite/gfortran.dg/associate_72.f90 b/gcc/testsuite/gfortran.dg/associate_72.f90 new file mode 100644 index 000..993ebdfd5a7 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/associate_72.f90 @@ -0,0 +1,26 @@ +! { dg-do compile } +! { dg-options "-fdump-tree-original" } +! +! Test the fix for the 14/15 regression PR118750 +! +! Contributed by Damian Rouson +! + implicit none + + type string_t +character(:), allocatable :: str + end type + + associate(str_a => get_string([string_t ("abcd"),string_t ("ef")])) +if (str_a(1)%str//str_a(2)%str /= "abcdef") STOP 1 ! Returned "Invalid array reference at (1)" + end associate + +contains + + type(string_t) elemental function get_string(mold) +class(string_t), intent(in) :: mold +get_string = string_t(mold%str) + end function + +end +! { dg-final { scan-tree-dump-times "array01_string_t str_a" 1 "original" } }
Re: [PATCH v2] c++: Reject cdtors and conversion operators with a single * as return type [PR118306]
On 2/5/25 2:21 PM, Simon Martin wrote: Hi Jason, On 4 Feb 2025, at 21:23, Jason Merrill wrote: On 2/4/25 3:03 PM, Jason Merrill wrote: On 2/4/25 11:45 AM, Simon Martin wrote: On 4 Feb 2025, at 17:17, Jason Merrill wrote: On 2/4/25 10:56 AM, Simon Martin wrote: Hi Jason, On 4 Feb 2025, at 16:39, Jason Merrill wrote: On 1/15/25 9:56 AM, Jason Merrill wrote: On 1/15/25 7:24 AM, Simon Martin wrote: Hi, On 14 Jan 2025, at 23:31, Jason Merrill wrote: On 1/14/25 2:13 PM, Simon Martin wrote: On 10 Jan 2025, at 19:10, Andrew Pinski wrote: On Fri, Jan 10, 2025 at 3:18 AM Simon Martin wrote: We currently accept the following invalid code (EDG and MSVC do as well) clang does too: https://github.com/llvm/llvm-project/issues/121706 . Note it might be useful if a testcase with multiply `*` is included too: ``` struct A { A (); }; ``` Thanks, makes sense to add those. Done in the attached updated revision, successfully tested on x86_64-pc-linux-gnu. +/* Check that it's OK to declare a function at ID_LOC with the indicated TYPE, + TYPE_QUALS and DECLARATOR. SFK indicates the kind of special function (if + any) that this function is. OPTYPE is the type given in a conversion operator declaration, or the class type for a constructor/destructor. Returns the actual return type of the function; that may be different than TYPE if an error occurs, or for certain special functions. */ @@ -12361,8 +12362,19 @@ check_special_function_return_type (special_function_kind sfk, tree type, tree optype, int type_quals, + const cp_declarator *declarator, + location_t id_loc, id_loc should be the same as declarator->id_loc? You’re right. const location_t* locations) { + /* If TYPE is unspecified, DECLARATOR, if set, should not represent a pointer + or a reference type. */ + if (type == NULL_TREE + && declarator + && (declarator->kind == cdk_pointer + || declarator->kind == cdk_reference)) + error_at (id_loc, "expected unqualified-id before %qs token", + declarator->kind == cdk_pointer ? "*" : "&"); ...and id_loc isn't the location of the ptr-operator, it's the location of the identifier, so this indicates the wrong column. I think using declarator->id_loc makes sense, just not pretending it's the location of the *. Good catch, thanks. Let's give diagnostics more like the others later in the function instead of trying to emulate cp_parser_error. Makes sense. This is what the updated patch does, successfully tested on x86_64-pc-linux-gnu. OK for GCC 16? OK. Does this also fix 118304? If so, let's go ahead and apply it to GCC 15. I have checked just now, and we still ICE for 118304’s testcase with that fix. Why doesn't the preeexisting type = void_type_node; in check_special_function_return_type fix the return type and avoid the ICE? We hit the gcc_assert at method.cc:3593, that Marek’s fix bypasses. Yes, but why doesn't check_special_function_return_type prevent that? Ah, because we call it before walking the declarator. We need to check again later, perhaps in grokfndecl, that the type is correct. Perhaps instead of your patch. One “issue” with adding another check in or close to grokfndecl is that DECLARATOR will have “been moved to the ID”, and the fact that we had a CDK_POINTER kind is “lost”. We could obviously somehow propagate this information, but there might be something easier. The information isn't lost: it's now reflected in the (wrong) return type. One place it would make sense to check would be if (ctype && (sfk == sfk_constructor || sfk == sfk_destructor)) { /* We are within a class's scope. If our declarator name is the same as the class name, and we are defining a function, then it is a constructor/destructor, and therefore returns a void type. */ Here 'type' is still the return type, we haven't gotten to build_function_type yet. Jason
Re: [RFC][libgcc][PATCH] Fix two issues in libgcc/unwind-dw2-fde.c.
> On Feb 6, 2025, at 09:25, Richard Biener wrote: > > On Thu, Feb 6, 2025 at 2:57 PM Qing Zhao wrote: >> >> >> >>> On Feb 6, 2025, at 04:36, Richard Biener wrote: >>> >>> On Wed, Feb 5, 2025 at 4:40 PM Qing Zhao wrote: > On Feb 5, 2025, at 07:46, Richard Biener > wrote: > > On Tue, Feb 4, 2025 at 10:12 PM Qing Zhao wrote: >> >> Hi, >> >> One of our big application segv in libgcc code while unwinding the stack. >> >> This is a random crash while the application throws a c++ exception and >> unwinds the stack. Incidents are random and never can be reproduced by >> any >> test case. >> >> The libgcc that is used is based on GCC 8. >> >> Fortunately, we got some information through the stack trace, and >> narrowed >> down the crash is in the routine "init_object" of >> libgcc/unwind-dw2-pde.c: >> >> 777 count = classify_object_over_fdes (ob, ob->u.single); >> >> when loading the 2nd parameter of the call to >> "classify_object_over_fdes". >> i.e, the address that is pointed by "ob->u.single" is invalid. >> >> And the outer caller we can track back is the following line 1066 of the >> routine >> "_Unwind_Find_FDE": >> >> 1060 /* Classify and search the objects we've not yet processed. */ >> 1061 while ((ob = unseen_objects)) >> 1062 { >> 1063 struct object **p; >> 1064 >> 1065 unseen_objects = ob->next; >> 1066 f = search_object (ob, pc); >> >> Then we suspected that the construction of the static variable >> "unseen_objects" >> might have some bug. >> >> Then after studying the source code that constructs "unseen_objects" in >> libgcc/unwind-dw2-pde.c, I found an issue in the routines >> "__register_frame" >> and "__register_frame_table" as following: >> >> 127 void >> 128 __register_frame (void *begin) >> 129 { >> 130 struct object *ob; >> 131 >> 132 /* If .eh_frame is empty, don't register at all. */ >> 133 if (*(uword *) begin == 0) >> 134 return; >> 135 >> 136 ob = malloc (sizeof (struct object)); >> 137 __register_frame_info (begin, ob); >> 138 } >> >> 181 void >> 182 __register_frame_table (void *begin) >> 183 { >> 184 struct object *ob = malloc (sizeof (struct object)); >> 185 __register_frame_info_table (begin, ob); >> 186 } >> 187 >> >> In the above, one obvious issue in the source code is at line 136, line >> 137, >> and line 184 and line 185: the return value of call to "malloc" is not >> checked >> against NULL before it was passed as the second parameter of the >> follwoing call. >> >> This might cause unpredicted random behavior. >> >> I checked the latest trunk GCC libgcc, the same issue is there too. >> >> This patch is for the latest trunk GCC. >> >> Please let me know any comments on this? > > I think I've seen this elsewhere — Do you mean that you saw this problem (malloc return NULL during register_frame) previously? >>> >>> It was probably reported to us (SUSE) from a customer/partner, also >>> from a (llvm?) JIT context. >> >> Okay. >> So, that bug has not been resolved yet? > > It was resolved from our side IIRC as being a problem in the > application with the JIT and > otherwise a feature request (better unwind API for this use case, > allowing a failure mode). Okay, I see. Thanks. Qing > > Richard. > >> Qing >>> > the issue is the unwind register API does > not allow for failures but I also think calling abort() is bad. Okay, I see. > > Are you calling this from a JIT context or so? I will ask the bug submitter on this to make sure. > We're assuming that at program > start malloc() will not fail. So, the routine __register_frame is only called at the _start_ of a program? > > The proper solution is probably to add an alternate ABI which has a way > to fail > during registration. Any suggestion on this (or any other routine I can refer to?) Thanks a lot for the help. Qing > > Richard. > >> thanks. >> >> Qing >> >> >> === >> >> Fix two issues in libgcc/unwind-dw2-fde.c: >> >> A. The returned value of call to malloc is not checked against NULL. >> This might cause unpredicted behavior during stack unwinding. >> >> B. Check for null begin parameter (as well as pointer to null) in >> __register_frame_info_table_bases and __register_frame_table. >> >> libgcc/ChangeLog: >> >> * unwind-dw2-fde.c (__register_frame): Check the return value of >> call >> to mal
Re: [PATCH] loop-iv, riscv: Fix get_biv_step_1 for RISC-V [PR117506]
On 2/5/25 10:57 AM, Jakub Jelinek wrote: Hi! The following test ICEs on RISC-V at least latently since r14-1622-g99bfdb072e67fa3fe294d86b4b2a9f686f8d9705 which added RISC-V specific case to get_biv_step_1 to recognize also ({zero,sign}_extend:DI (plus:SI op0 op1)) The reason for the ICE is that op1 in this case is CONST_POLY_INT which unlike the really expected VOIDmode CONST_INTs has its own mode and still satisfies CONSTANT_P. GET_MODE (rhs) (SImode) is different from outer_mode (DImode), so the function later does *inner_step = simplify_gen_binary (code, outer_mode, *inner_step, op1); but that obviously ICEs because while *inner_step is either VOIDmode or DImode, op1 has SImode. The following patch fixes it by extending op1 using code so that simplify_gen_binary can handle it. Another option would be to change the !CONSTANT_P (op1) 3 lines above this to !CONST_INT_P (op1), I think it isn't very likely that we get something useful from other constants there. Bootstrapped/regtested on x86_64-linux and i686-linux (which doesn't tell much because such code isn't encountered there, just that the gcc.dg test at least works), but I have no way to test this on riscv easily. Could somebody test it there? Or do you want the !CONST_INT_P (op1) instead? 2025-02-05 Jakub Jelinek PR rtl-optimization/117506 * loop-iv.cc (get_biv_step_1): For {ZERO,SIGN}_EXTEND of PLUS apply {ZERO,SIGN}_EXTEND to op1. * gcc.dg/pr117506.c: New test. * gcc.target/riscv/pr117506.c: New test. LGTM. While there was a regression reported in the run in my tester, it was actually from a patch from Richard S. that just landed which appears to need a trivial scan-asm adjustment. Jeff
Re: [Patch, fortran] PR118750 - [14/15 Regression] ICE on associate with elemental function with polymorphic array dummy argument
Hi Paul, looks fine to me. Thanks for the patch, Andre Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen Tel.: +49 178 3637538 * ve...@gmx.de Am 6. Februar 2025 16:49:18 schrieb Paul Richard Thomas : This regression must have been generated during my ASSOCIATE campaign; I am not sure when or where. Fortunately the fix is extremely simple and is explained in the ChangeLog. Regression tests fine. OK for both affected branches? Paul
Re: [patch,avr] Add support for a Compact Vector Table
Georg-Johann Lay writes: > Some AVR devices support a Compact Vector Table. > > The support is provided by means of a startup code file > crt-cvt.o from AVR-LibC that can be linked instead > of the traditional crt.o. > > This patch adds a new command line option -mcvt that links > that CVT startup code (or issues an error when the device > doesn't support a CVT). > > Ok for trunk? Ok. Denis
[PATCH] RISC-V: Fix ratio in vsetvl fuse rule [PR115703].
Hi, in PR115703 we fuse two vsetvls: Fuse curr info since prev info compatible with it: prev_info: VALID (insn 438, bb 2) Demand fields: demand_ge_sew demand_non_zero_avl SEW=32, VLMUL=m1, RATIO=32, MAX_SEW=64 TAIL_POLICY=agnostic, MASK_POLICY=agnostic AVL=(reg:DI 0 zero) VL=(reg:DI 9 s1 [312]) curr_info: VALID (insn 92, bb 20) Demand fields: demand_ratio_and_ge_sew demand_avl SEW=64, VLMUL=m1, RATIO=64, MAX_SEW=64 TAIL_POLICY=agnostic, MASK_POLICY=agnostic AVL=(const_int 4 [0x4]) VL=(nil) prev_info after fused: VALID (insn 438, bb 2) Demand fields: demand_ratio_and_ge_sew demand_avl SEW=64, VLMUL=mf2, RATIO=64, MAX_SEW=64 TAIL_POLICY=agnostic, MASK_POLICY=agnostic AVL=(const_int 4 [0x4]) VL=(nil). The result is vsetvl zero, zero, e64, mf2, ta, ma. The previous vsetvl set vl = 4 but here we wrongly set it to vl = 2. As all the following vsetvls only ever change the ratio we never recover. The issue is quite difficult to trigger. The last known bad commit is r15-3458-g5326306e7d9d36. With that commit the output is wrong but -fno-schedule-insns makes it correct. From the next commit on the issue is latent. I still added the PR's test as scan and run check even if they don't trigger right now (and I'm not sure the run test will ever fail, but well). I verified that the patch fixes the issue when applied on top of r15-3458-g5326306e7d9d36. Regtested on rv64gcv_zvl512b. Let's see what the CI says. Regards Robin PR target/115703 gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc: Use max_sew for calculating the new LMUL. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr115703-run.c: New test. * gcc.target/riscv/rvv/autovec/pr115703.c: New test. --- gcc/config/riscv/riscv-vsetvl.cc | 3 +- .../riscv/rvv/autovec/pr115703-run.c | 44 +++ .../gcc.target/riscv/rvv/autovec/pr115703.c | 38 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 72c4c59514e..82284624a24 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1756,7 +1756,8 @@ private: inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev, const vsetvl_info &next) { -prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ())); +int max_sew = MAX (prev.get_sew (), next.get_sew ()); +prev.set_vlmul (calculate_vlmul (max_sew, next.get_ratio ())); use_max_sew (prev, next); prev.set_ratio (next.get_ratio ()); } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c new file mode 100644 index 000..0c2c3d7d4fc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ +/* { dg-require-effective-target rvv_zvl256b_ok } */ +/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -fwhole-program -fwrapv" } */ + +int a, i; +unsigned long b; +unsigned c, f; +long long d = 1; +short e, m; +long g, h; + +__attribute__ ((noipa)) +void check (unsigned long long x) +{ + if (x != 13667643351234938049ull) +__builtin_abort (); +} + +int main() { + for (int q = 0; q < 2; q += 1) { +for (short r = 0; r < 2; r += 1) + for (char s = 0; s < 6; s++) +for (short t = 0; t < 011; t += 12081 - 12080) + for (short u = 0; u < 11; u++) { +a = ({ a > 1 ? a : 1; }); +b = ({ b > 5 ? b : 5; }); +for (short j = 0; j < 2; j = 2080) + c = ({ c > 030 ? c : 030; }); +for (short k = 0; k < 2; k += 2080) + d *= 7; +e *= 10807; +f = ({ f > 3 ? f : 3; }); + } +for (int l = 0; l < 21; l += 1) + for (int n = 0; n < 16; n++) { +g = ({ m ? g : m; }); +for (char o = 0; o < 7; o += 1) + h *= 3; +i = ({ i < 0 ? i : 0; }); + } + } + + check (d); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c new file mode 100644 index 000..fc147fefa98 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv_zvl256b -fwhole-program -fwrapv" } */ + +int a, i; +unsigned long b; +unsigned c, f; +long long d = 1; +short e, m; +long g, h; + +int main() { + for (int q = 0; q < 2; q += 1) { +for (short r = 0; r < 2; r += 1) + for (char s = 0; s < 6; s++) +for (short t = 0; t < 011; t
Re: [Patch, fortran] PR118750 - [14/15 Regression] ICE on associate with elemental function with polymorphic array dummy argument
Hi Andre, Thanks for the review - pushed as commit r15-7389. Paul On Thu, 6 Feb 2025 at 16:22, Andre Vehreschild wrote: > Hi Paul, > > looks fine to me. > > Thanks for the patch, > Andre > Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen > Tel.: +49 178 3637538 * ve...@gmx.de > > Am 6. Februar 2025 16:49:18 schrieb Paul Richard Thomas < > paul.richard.tho...@gmail.com>: > >> This regression must have been generated during my ASSOCIATE campaign; I >> am not sure when or where. Fortunately the fix is extremely simple and is >> explained in the ChangeLog. >> >> Regression tests fine. OK for both affected branches? >> >> Paul >> >> >
Re: [Patch] [GCN] Handle generic ISA names in libgomp's plugin-gcn.c
Updated version; changes: * Fixed SRAMECC setting for gfx906 - should be ANY (as in GCC 14) not UNSUPPORTED. (Shows up as link error when compiling with '-g'.) * Fixed generic handling for gfx9* - copy'n'paste bug: gfx11-generic is clearly wrong. * Implemented autoconvert from -march=gfx to -march=gfx*-generic if the latter but not the former has configured run-time libraries. (with a warning). * Largely improved the runtime diagnostic. * It also uses non-deprecated functions for the loader - in the hope that it would help, but it makes no difference. As the hsa.h header shipping with GCC is quite old, I think changing it should be fine as I doubt that there is any libhsa*.so in use that doesn't support it. - On the other hand, as it doesn't solve a known issue, we don't have to update it. * * * Thus, it should work in principle. - BUT: I have now a ROCm 6.3.2 available - and it DOES NOT work. ROCm 6.3.2 supports gfx*generic as a grep shows. Trying a hello-world program with -march=gfx9-generic and running it with LOADER_ENABLE_LOGGING=1 set, it fails with: LoaderError: code object's ISA (amdgcn-amd-amdhsa--gfx9-generic) is invalid and returns HSA_STATUS_ERROR_INVALID_ISA_NAME.Looking at https://github.com/ROCm/ROCR-Runtime I have *no* idea why it fails - the name "amd-amdhsa--gfx9-generic" should be in the map as generic name, there is nothing suspicious with the lookup - and the fail is before there is any compatibility check regarding the actual hardware, which is the next check. 'gfx11-generic' is a bit longer supported and fails identically. (The hardware is gfx906, but that shouldn't matter.) As I assume that someone has tested it, some variant should work. The question is only what. The library as too many indirections but all code is rather simple and I fail to see why it wouldn't work. (Some code is not obvious but as gfx906 works, those bits are probably fine.) * * * Thus, if anyone has an idea how to make progress from here ... BTW: The gfx906 SRAMECC fix should be committed separately and soon - before we forget it and before GCC 15 ships. Tobias [GCN] Handle generic ISA names in libgomp's plugin-gcn.c There are plenty of AMD GPUs, each having its own ISA; to reduce the number of compile-for ISA, some of them were bundled under a generic name, supporting either all features or a large subset of it. Initial support for this was added in r15-4550-g1bdeebe69b71bf. On the assembler/linker side, it requires LLVM 19 - and on the runtime side, ROCm 6.3.x (released in December 2024). This commit adds gfx9-generic alongside gfx10-3-generic and gfx11-generic and all previously unsupported specific devices supported by either of them, i.e. gfx902, gfx904, gfx909, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1101, gfx1102, gfx1150, gfx1151, gfx1152, and gfx1153. Those are marked as 'experimental' as they have not been tested and also because not all features of, e.g., gfx115x are supported - only those of the common subset provided as by gfx11-generic. While gfx10-3-generic and gfx11-generic cover all GPUs, gfx9-generic only covers a small subset - and, in particular, the compute MI100 and MI200 devices are not included. On the runtime side, the assumption that all generic code will work and only when the ROCm (actually: rocr) fails to load, a specific error message is printed, providing some suggestions how to fix a fail. On the compiler side: In order to make it easier to ship only the generic libraries, the compiler will fallback to the generic version if a (multi)lib exists for it but not for the specific version. gcc/ChangeLog: * config/gcn/gcn-devices.def (GCN_DEVICE): Add field to for the generic ISA for the device. Fix sramecc setting of gfx906 - should be ANY not UNSUPPORTED. * config/gcn/gcn-tables.opt: Regenerate. * config/gcn/gcn.cc (gcn_devices): Add missing tailing ... to GCN_DEVICE macro #define. * config/gcn/mkoffload.cc (enum elf_arch_code): Add EF_AMDGPU_MACH_AMDGCN_NONE. (elf_arch): Use enum elf_arch_code as type. (tool_cleanup): Silence warning by removing tailing '.' from error. (get_arch_name): Return enum elf_arch_code. (elf_arch_generic_update): New; replace specific device by generic device if only the latter has a multilib. (main): Call it; replace -march= as needed. * doc/install.texi (AMD GCN): Document gfx*-generic handling and dependencies. * doc/invoke.texi (AMD GCN): Update -march= for gfx{9,10-3,11}-generic and added compatible gfx* devices. libgomp/ChangeLog: * plugin/plugin-gcn.c (EF_AMDGPU_MACH): Add EF_AMDGPU_MACH_AMDGCN_NONE. (EF_AMDGPU_GENERIC_VERSION_V, EF_AMDGPU_GENERIC_VERSION_OFFSET, GET_GENERIC_VERSION): Define. (elf_gcn_isa_is_generic, get_generic_isa_code): New. (isa_matches_agent): Add 'failed' flag, if false, accept all generic if true, print extended diagnostic. (create_and_finalize_hsa_program): Call it also for ERROR_INVALID_CODE_OBJECT and output more diagnostic if the error is
[PATCH] [testsuite] [sparc] skip sparc-ret-1 with -mfix-ut699
Option -mfix-ut699 changes the set of instructions that can be placed in the delay slot, preventing the expected insn placement. Skip the test if the option is present. Regstrapped on x86_64-linux-gnu, also tested with gcc-14 targeting leon3-elf with -mfix-ut699. Ok to install? for gcc/testsuite/ChangeLog * gcc.target/sparc/sparc-ret-1.c: Skip on -mfix-ut699. --- gcc/testsuite/gcc.target/sparc/sparc-ret-1.c |1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.target/sparc/sparc-ret-1.c b/gcc/testsuite/gcc.target/sparc/sparc-ret-1.c index 808e8a98f0e89..ef459c5016e96 100644 --- a/gcc/testsuite/gcc.target/sparc/sparc-ret-1.c +++ b/gcc/testsuite/gcc.target/sparc/sparc-ret-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-skip-if "no register windows" { *-*-* } { "-mflat" } { "" } } */ +/* { dg-skip-if "prevents expected asm" { *-*-* } { "-mfix-ut699" } { "" } } */ /* { dg-require-effective-target ilp32 } */ /* { dg-options "-mcpu=ultrasparc -O" } */ -- Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/ Free Software Activist GNU Toolchain Engineer More tolerance and less prejudice are key for inclusion and diversity Excluding neuro-others for not behaving ""normal"" is *not* inclusive
Re: [PATCH v3] [ifcombine] avoid creating out-of-bounds BIT_FIELD_REFs [PR118514]
On Thu, Feb 6, 2025 at 2:41 PM Alexandre Oliva wrote: > > On Jan 27, 2025, Richard Biener wrote: > > > What I was saying that the conservative tree_could_trap_p could say > > 'yes' to a certain encoding of a ref but 'no' to another if in reality > > the ref can never trap. We of course cannot (apart from bugs in > > tree_could_trap_p) turn a for-sure trap into a not-trap by simply > > rewriting the ref. > > I see. Yeah, that makes sense. > > > So I think we want this bit in (and it's dependences), but > > 'k, I'll split that bit out, after some clarification: > > > (I see the assert is no longer in the patch). > > That's because the assert went in as part of an earlier patch. I take > it it should be backed out along with the to-be-split-out bits above, > right? Yes. (IIRC there's also a PR tripping over this or a similar assert) Richard. > > -- > Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/ >Free Software Activist GNU Toolchain Engineer > More tolerance and less prejudice are key for inclusion and diversity > Excluding neuro-others for not behaving ""normal"" is *not* inclusive
Re: [PATCH] c++: Fix up handling of for/while loops with declarations in condition [PR86769]
On 2/5/25 12:02 PM, Jakub Jelinek wrote: Hi! As the following testcase show (note, just for-{3,4,6,7,8}.C, constexpr-86769.C and stmtexpr27.C FAIL without the patch, the rest is just that I couldn't find coverage for some details and so added tests we don't regress or for5.C is from Marek's attempt in the PR), we weren't correctly handling for/while loops with declarations as conditions. The C++ FE has the simplify_loop_decl_cond function which transforms such loops as mentioned in the comment: while (A x = 42) { } for (; A x = 42;) { } becomes while (true) { A x = 42; if (!x) break; } for (;;) { A x = 42; if (!x) break; } For for loops this is not enough, as the x declaration should be still in scope when expr (if any) is executed, and injecting the expr expression into the body in the FE needs to have the continue label in between, something normally added by the c-family genericization. One of my thoughts was to just add there an artificial label plus the expr expression in the FE and tell c-family about that label, so that it doesn't create it but uses what has been emitted. Unfortunately break/continue are resolved to labels only at c-family genericization time and by moving the condition (and its preparation statements such as the DECL_EXPR) into the body (and perhaps by also moving there the (increment) expr as well) we resolve incorrectly any break/continue statement appearing in cond (or newly perhaps also expr) expression(s). While in standard C++ one can't have something like that there, with statement expressions they are possible there, and we actually have testsuite coverage that when they appear outside of the body of the loop they bind to an outer loop rather than the inner one. When the FE moves everything into the body, c-family can't distinguish any more between the user body vs. the condition/preparation statements vs. expr expression. So, the following patch instead keeps them separate and does the merging only at the c-family loop genericization time. For that the patch introduces two new operands of FOR_STMT and WHILE_STMT, *_COND_PREP which is forced to be a BIND_EXPR which contains the preparation statements like DECL_EXPR, and the initialization of that variable, so basically what {FOR,WHILE}_BODY has when we encounter the function dealing with this, except one optional CLEANUP_STMT at the end which holds cleanups for the variable if it needs to be destructed. This CLEANUP_STMT is removed and the actual cleanup saved into another new operand, *_COND_CLEANUP. The c-family loop genericization handles such loops roughly the way https://eel.is/c++draft/stmt.for and https://eel.is/c++draft/stmt.while specifies, so the body is (if *_COND_CLEANUP is non-NULL) { A x = 42; try { if (!x) break; body; cont_label: expr; } finally { cleanup; } } and otherwise { A x = 42; if (!x) break; body; cont_label: expr; } i.e. the *_COND, *_BODY, optional continue label, FOR_EXPR are appended into the body of the *_COND_PREP BIND_EXPR. And when doing constexpr evaluation of such FOR/WHILE loops, we treat it similarly, first evaluate *_COND_PREP except the for (tree decl = BIND_EXPR_VARS (t); decl; decl = DECL_CHAIN (decl)) destroy_value_checked (ctx, decl, non_constant_p); part of BIND_EXPR handling for it, then evaluate *_COND (and decide based on whether it was false or true like before), then *_BODY, then FOR_EXPR, then *_COND_CLEANUP (similarly to the way how CLEANUP_STMT handling handles that) and finally do those destroy_value_checked. Note, the constexpr-86769.C testcase FAILs with both clang++ and MSVC (note, the rest of tests PASS with clang++) but I believe it must be just a bug in those compilers, new int is done in all the constructors and delete is done in the destructor, so when clang++ reports one of the new int weren't deallocated during constexpr evaluation I don't see how that would be possible. When the same test has all the constexpr stuff, all the new int are properly deleted at runtime when compiled by both compilers and valgrind is happy about it, no leaks. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2025-02-05 Jakub Jelinek PR c++/86769 gcc/c-family/ * c-common.def (FOR_STMT): Add 2 operands and document them. (WHILE_STMT): Likewise. * c-common.h (WHILE_COND_PREP, WHILE_COND_CLEANUP): Define. (FOR_COND_PREP, FOR_COND_CLEANUP): Define. * c-gimplify.cc (genericize_c_loop): Add COND_PREP and COND_CLEANUP arguments, handle them if they are non-NULL. (genericize_for_stmt, genericize_while_stmt, genericize_do_stmt): Adjust callers. gcc/c/ * c-parser.cc (c_parser_while_statement): Add 2 further NULL_TREE operands to build_stmt. (c_parser_for_statement): Likewise. gcc/cp/ * semantics.cc (simplify_loop_decl_cond): Remove. (adjust_loop_decl_cond): New function.
[PATCH] c++, v2: Fix up handling of for/while loops with declarations in condition [PR86769]
On Thu, Feb 06, 2025 at 10:40:52AM -0500, Jason Merrill wrote: Thanks. > > @@ -7202,6 +7210,28 @@ cxx_eval_loop_expr (const constexpr_ctx > > cxx_eval_constant_expression (ctx, expr, vc_prvalue, > > non_constant_p, overflow_p, > > jump_target); > > + > > + if (cond_cleanup && !*non_constant_p) > > + { > > + iloc_sentinel ils (loc); > > What does this do, given that you're setting the locations on the cleanups? > > I guess this was copied from the CLEANUP_STMT handling, but I'm not sure why > it's there either; commenting it out there doesn't seem to break any > constexpr* tests. Indeed, it also looked weird to me. > Let's share this repeated code with the copy below by using a lambda, as in > the attached. So like this? So far tested with GXX_TESTSUITE_STDS=98,11,14,17,20,23,26 make check-g++ \ RUNTESTFLAGS="debug.exp=cleanup1.C dg.exp='name-independent-decl*.C redeclaration-*.C pr18770.C for*.C stmtexpr* constexpr-86769.C' old-deja.exp=cond.C" Ok for trunk if it passes full bootstrap/regtest? 2025-02-06 Jakub Jelinek Jason Merrill PR c++/86769 gcc/c-family/ * c-common.def (FOR_STMT): Add 2 operands and document them. (WHILE_STMT): Likewise. * c-common.h (WHILE_COND_PREP, WHILE_COND_CLEANUP): Define. (FOR_COND_PREP, FOR_COND_CLEANUP): Define. * c-gimplify.cc (genericize_c_loop): Add COND_PREP and COND_CLEANUP arguments, handle them if they are non-NULL. (genericize_for_stmt, genericize_while_stmt, genericize_do_stmt): Adjust callers. gcc/c/ * c-parser.cc (c_parser_while_statement): Add 2 further NULL_TREE operands to build_stmt. (c_parser_for_statement): Likewise. gcc/cp/ * semantics.cc (set_one_cleanup_loc): New function. (set_cleanup_locs): Use it. (simplify_loop_decl_cond): Remove. (adjust_loop_decl_cond): New function. (begin_while_stmt): Add 2 further NULL_TREE operands to build_stmt. (finish_while_stmt_cond): Call adjust_loop_decl_cond instead of simplify_loop_decl_cond. (finish_while_stmt): Call do_poplevel also on WHILE_COND_PREP if non-NULL and also use pop_stmt_list rather than do_poplevel for WHILE_BODY in that case. Call set_one_cleanup_loc. (begin_for_stmt): Add 2 further NULL_TREE operands to build_stmt. (finish_for_cond): Call adjust_loop_decl_cond instead of simplify_loop_decl_cond. (finish_for_stmt): Call do_poplevel also on FOR_COND_PREP if non-NULL and also use pop_stmt_list rather than do_poplevel for FOR_BODY in that case. Call set_one_cleanup_loc. * constexpr.cc (cxx_eval_loop_expr): Handle {WHILE,FOR}_COND_{PREP,CLEANUP}. (check_for_return_continue): Handle {WHILE,FOR}_COND_PREP. (potential_constant_expression_1): RECUR on {WHILE,FOR}_COND_{PREP,CLEANUP}. gcc/testsuite/ * g++.dg/diagnostic/redeclaration-7.C: New test. * g++.dg/expr/for3.C: New test. * g++.dg/expr/for4.C: New test. * g++.dg/expr/for5.C: New test. * g++.dg/expr/for6.C: New test. * g++.dg/expr/for7.C: New test. * g++.dg/expr/for8.C: New test. * g++.dg/ext/stmtexpr27.C: New test. * g++.dg/cpp2a/constexpr-86769.C: New test. * g++.dg/cpp26/name-independent-decl7.C: New test. * g++.dg/cpp26/name-independent-decl8.C: New test. --- gcc/c-family/c-common.def.jj2025-02-05 13:14:34.464202519 +0100 +++ gcc/c-family/c-common.def 2025-02-06 18:50:18.191080462 +0100 @@ -58,13 +58,14 @@ DEFTREECODE (SIZEOF_EXPR, "sizeof_expr", DEFTREECODE (PAREN_SIZEOF_EXPR, "paren_sizeof_expr", tcc_expression, 1) /* Used to represent a `for' statement. The operands are - FOR_INIT_STMT, FOR_COND, FOR_EXPR, FOR_BODY, FOR_SCOPE, and FOR_NAME - respectively. */ -DEFTREECODE (FOR_STMT, "for_stmt", tcc_statement, 6) + FOR_INIT_STMT, FOR_COND, FOR_EXPR, FOR_BODY, FOR_SCOPE, FOR_NAME, + FOR_COND_PREP and FOR_COND_CLEANUP, respectively. */ +DEFTREECODE (FOR_STMT, "for_stmt", tcc_statement, 8) /* Used to represent a 'while' statement. The operands are WHILE_COND, - WHILE_BODY, and WHILE_NAME, respectively. */ -DEFTREECODE (WHILE_STMT, "while_stmt", tcc_statement, 3) + WHILE_BODY, WHILE_NAME, WHILE_COND_PREP and WHILE_COND_CLEANUP, + respectively. */ +DEFTREECODE (WHILE_STMT, "while_stmt", tcc_statement, 5) /* Used to represent a 'do' statement. The operands are DO_COND, DO_BODY, and DO_NAME, respectively. */ --- gcc/c-family/c-common.h.jj 2025-02-05 13:14:34.505201941 +0100 +++ gcc/c-family/c-common.h 2025-02-06 18:50:18.198080368 +0100 @@ -1517,10 +1517,13 @@ extern tree build_userdef_literal (tree /* WHILE_STMT accessors. These give access to the condition of the - while statement, the body and name of the while statement, res
[PATCH] c++: Allow constexpr reads from volatile std::nullptr_t objects [PR118661]
Hi! As mentioned in the PR, https://eel.is/c++draft/conv.lval#note-1 says that even volatile reads from std::nullptr_t typed objects actually don't read anything and https://eel.is/c++draft/expr.const#10.9 says that even those are ok in constant expressions. So, the following patch adjusts the r9-4793 changes to have an exception for NULLPTR_TYPE. As [conv.lval]/3 also talks about accessing to inactive member, I've added testcase to cover that as well. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2025-02-06 Jakub Jelinek PR c++/118661 * constexpr.cc (potential_constant_expression_1): Don't diagnose lvalue-to-rvalue conversion of volatile lvalue if it has NULLPTR_TYPE. * decl2.cc (decl_maybe_constant_var_p): Return true for constexpr decls with NULLPTR_TYPE even if they are volatile. * g++.dg/cpp0x/constexpr-volatile4.C: New test. * g++.dg/cpp0x/constexpr-union9.C: New test. --- gcc/cp/constexpr.cc.jj 2025-02-05 13:14:34.771198185 +0100 +++ gcc/cp/constexpr.cc 2025-02-06 09:53:03.236587121 +0100 @@ -9717,7 +9717,8 @@ potential_constant_expression_1 (tree t, return true; if (TREE_THIS_VOLATILE (t) && want_rval - && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t))) + && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t)) + && TREE_CODE (TREE_TYPE (t)) != NULLPTR_TYPE) { if (flags & tf_error) constexpr_error (loc, fundef_p, "lvalue-to-rvalue conversion of " --- gcc/cp/decl2.cc.jj 2025-01-27 16:45:45.455970792 +0100 +++ gcc/cp/decl2.cc 2025-02-06 09:53:53.150890600 +0100 @@ -4985,7 +4985,8 @@ decl_maybe_constant_var_p (tree decl) tree type = TREE_TYPE (decl); if (!VAR_P (decl)) return false; - if (DECL_DECLARED_CONSTEXPR_P (decl) && !TREE_THIS_VOLATILE (decl)) + if (DECL_DECLARED_CONSTEXPR_P (decl) + && (!TREE_THIS_VOLATILE (decl) || TREE_CODE (type) == NULLPTR_TYPE)) return true; if (DECL_HAS_VALUE_EXPR_P (decl)) /* A proxy isn't constant. */ --- gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C.jj 2025-02-06 09:50:43.339539282 +0100 +++ gcc/testsuite/g++.dg/cpp0x/constexpr-volatile4.C2025-02-06 09:50:16.071919784 +0100 @@ -0,0 +1,20 @@ +// PR c++/118661 +// { dg-do compile { target c++11 } } + +using nullptr_t = decltype (nullptr); +constexpr volatile nullptr_t a = {}; +constexpr nullptr_t b = a; + +constexpr nullptr_t +foo () +{ +#if __cplusplus >= 201402L + volatile nullptr_t c = {}; + return c; +#else + return nullptr; +#endif +} + +static_assert (b == nullptr, ""); +static_assert (foo () == nullptr, ""); --- gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C.jj2025-02-06 09:57:46.149639270 +0100 +++ gcc/testsuite/g++.dg/cpp0x/constexpr-union9.C 2025-02-06 10:01:08.472815988 +0100 @@ -0,0 +1,16 @@ +// PR c++/118661 +// { dg-do compile { target c++11 } } + +using nullptr_t = decltype (nullptr); +union U { int i; nullptr_t n; }; +constexpr U u = { 42 }; +static_assert (u.n == nullptr, ""); + +#if __cplusplus >= 201402L +constexpr nullptr_t +foo () +{ + union U { int i; nullptr_t n; } u = { 42 }; + return u.n; +} +#endif Jakub
[PATCH] c++: Fix up name independent decl in structured binding handling in range for [PR115586]
Hi! cp_parser_range_for temporarily reverts IDENTIFIER_BINDING changes to hide the decls from the structured bindings from lookup during parsing of the expression after : If there are 2 or more name independent decls, we undo IDENTIFIER_BINDING for the same name multiple times, even when just one has been added (with a TREE_LIST inside of it as decl). The following patch fixes it by handling the _ name at most once, the later loop will DTRT then and just reinstall the temporarily hidden binding with the TREE_LIST in there. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2025-02-06 Jakub Jelinek PR c++/115586 * parser.cc (cp_parser_range_for): For name independent decls in structured bindings, only push the name/binding once per structured binding. * g++.dg/cpp26/name-independent-decl9.C: New test. * g++.dg/cpp26/name-independent-decl10.C: New test. --- gcc/cp/parser.cc.jj 2025-02-04 21:14:50.0 +0100 +++ gcc/cp/parser.cc2025-02-06 11:01:49.018050841 +0100 @@ -14570,9 +14570,19 @@ cp_parser_range_for (cp_parser *parser, decomp = &decomp_d; decomp->count = tree_to_uhwi (TREE_OPERAND (v, 1)) + 1; decomp->decl = d; + bool seen_name_independent_decl = false; for (unsigned int i = 0; i < decomp->count; i++, d = DECL_CHAIN (d)) { + if (name_independent_decl_p (d)) + { + /* If there is more than one _ decl in +the structured binding, just push and move it +away once. */ + if (seen_name_independent_decl) + continue; + seen_name_independent_decl = true; + } tree name = DECL_NAME (d); names.safe_push (name); bindings.safe_push (IDENTIFIER_BINDING (name)); --- gcc/testsuite/g++.dg/cpp26/name-independent-decl9.C.jj 2025-02-06 11:09:15.450816690 +0100 +++ gcc/testsuite/g++.dg/cpp26/name-independent-decl9.C 2025-02-06 11:19:01.704625049 +0100 @@ -0,0 +1,49 @@ +// PR c++/115586 +// { dg-do compile { target c++11 } } +// { dg-options "-Wunused-variable -Wunused-but-set-variable -Wunused-parameter -Wshadow" } + +struct S { int a, b, c; }; + +void +foo () +{ + S s[4] = {}; + for (auto [_, _, a] : s) // { dg-warning "name-independent declarations only available with" "" { target c++23_down } } +++a; // { dg-warning "structured bindings only available with" "" { target c++14_down } .-1 } + for (auto _ : s) +++_.b; + for (auto [a, _, b] : s) // { dg-warning "structured bindings only available with" "" { target c++14_down } } +++a; + for (auto [_, _, a] : s) // { dg-warning "name-independent declarations only available with" "" { target c++23_down } } +int _ = ++a; // { dg-warning "structured bindings only available with" "" { target c++14_down } .-1 } + for (auto _ : s) +{ + int _ = 2; // { dg-warning "name-independent declarations only available with" "" { target c++23_down } } +} + for (auto [a, _, b] : s) // { dg-warning "structured bindings only available with" "" { target c++14_down } } +int _ = ++b; // { dg-warning "name-independent declarations only available with" "" { target c++23_down } } +} + +void +bar () +{ + S s[4] = {}; + auto [_, c, _] = s[0]; // { dg-warning "name-independent declarations only available with" "" { target c++23_down } } + ++c; // { dg-warning "structured bindings only available with" "" { target c++14_down } .-1 } + for (auto [a, _, _] : s) // { dg-warning "name-independent declarations only available with" "" { target c++23_down } } +++a; // { dg-warning "structured bindings only available with" "" { target c++14_down } .-1 } + for (auto _ : s) +++_.c; + for (auto [a, b, _] : s) // { dg-warning "structured bindings only available with" "" { target c++14_down } } +++b; + for (auto [a, _, _] : s) // { dg-warning "name-independent declarations only available with" "" { target c++23_down } } +{ // { dg-warning "structured bindings only available with" "" { target c++14_down } .-1 } + int _ = ++a; +} + for (auto _ : s) +int _ = 5; // { dg-warning "name-independent declarations only available with" "" { target c++23_down } } + for (auto [a, b, _] : s) // { dg-warning "structured bindings only available with" "" { target c++14_down } } +{ + int _ = a + b; // { dg-warning "name-independent declarations only available with" "" { target c++23_down } } +} +} --- gcc/testsuite/g++.dg/cpp26/name-independent-decl10.C.jj 2025-02-06 11:23:38.787751260 +0100 +++ gcc/testsuit