[gcc(refs/users/meissner/heads/work206-submit)] Add p9-minmax-2
https://gcc.gnu.org/g:2ef6533424759273166222048010f2eb4cb665e4 commit 2ef6533424759273166222048010f2eb4cb665e4 Author: Michael Meissner Date: Thu May 29 02:39:16 2025 -0400 Add p9-minmax-2 Diff: --- gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c | 190 + 1 file changed, 190 insertions(+) diff --git a/gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c b/gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c new file mode 100644 index ..0684eb501c56 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c @@ -0,0 +1,190 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=power9 -mvsx -O2 -mpower9-minmax" } */ +/* { dg-require-effective-target powerpc_vsx } */ +/* { dg-final { scan-assembler-not "fsel" } } */ +/* { dg-final { scan-assembler "xscmpeqdp" } } */ +/* { dg-final { scan-assembler "xscmpgtdp" } } */ +/* { dg-final { scan-assembler-not "xscmpodp" } } */ +/* { dg-final { scan-assembler-not "xscmpudp" } } */ +/* { dg-final { scan-assembler "xsmaxcdp" } } */ +/* { dg-final { scan-assembler-not "xsmaxdp" } } */ +/* { dg-final { scan-assembler "xsmincdp" } } */ +/* { dg-final { scan-assembler-not "xsmindp" } } */ +/* { dg-final { scan-assembler "xxsel" } } */ + +/* Due to NaN support, <= and >= are not handled presently unless -ffast-math + is used. At some point this will be fixed and the xscmpgedp instruction can + be generated normally. The <= and >= tests are bracketed with + #ifdef DO_GE_LE. */ + +#ifdef DO_GE_LE +double +dbl_max1 (double a, double b) +{ + return (a >= b) ? a : b; +} +#endif + +double +dbl_max2 (double a, double b) +{ + return (a > b) ? a : b; +} + +double +dbl_min1 (double a, double b) +{ + return (a < b) ? a : b; +} + +#ifdef DO_GE_LE +double +dbl_min2 (double a, double b) +{ + return (a <= b) ? a : b; +} +#endif + +double +dbl_cmp_eq (double a, double b, double c, double d) +{ + return (a == b) ? c : d; +} + +double +dbl_cmp_ne (double a, double b, double c, double d) +{ + return (a != b) ? c : d; +} + +double +dbl_cmp_gt (double a, double b, double c, double d) +{ + return (a > b) ? c : d; +} + +#ifdef DO_GE_LE +double +dbl_cmp_ge (double a, double b, double c, double d) +{ + return (a >= b) ? c : d; +} +#endif + +double +dbl_cmp_lt (double a, double b, double c, double d) +{ + return (a < b) ? c : d; +} + +#ifdef DO_GE_LE +double +dbl_cmp_le (double a, double b, double c, double d) +{ + return (a <= b) ? c : d; +} +#endif + +#ifdef DO_GE_LE +float +flt_max1 (float a, float b) +{ + return (a >= b) ? a : b; +} +#endif + +float +flt_max2 (float a, float b) +{ + return (a > b) ? a : b; +} + +float +flt_min1 (float a, float b) +{ + return (a < b) ? a : b; +} + +#ifdef DO_GE_LE +float +flt_min2 (float a, float b) +{ + return (a <= b) ? a : b; +} +#endif + +float +flt_cmp_eq (float a, float b, float c, float d) +{ + return (a == b) ? c : d; +} + +float +flt_cmp_ne (float a, float b, float c, float d) +{ + return (a != b) ? c : d; +} + +float +flt_cmp_gt (float a, float b, float c, float d) +{ + return (a > b) ? c : d; +} + +#ifdef DO_GE_LE +float +flt_cmp_ge (float a, float b, float c, float d) +{ + return (a >= b) ? c : d; +} +#endif + +float +flt_cmp_lt (float a, float b, float c, float d) +{ + return (a < b) ? c : d; +} + +#ifdef DO_GE_LE +float +flt_cmp_le (float a, float b, float c, float d) +{ + return (a <= b) ? c : d; +} +#endif + +double +dbl_flt_max1 (float a, float b) +{ + return (a > b) ? a : b; +} + +double +dbl_flt_max2 (double a, float b) +{ + return (a > b) ? a : b; +} + +double +dbl_flt_max3 (float a, double b) +{ + return (a > b) ? a : b; +} + +double +dbl_flt_min1 (float a, float b) +{ + return (a < b) ? a : b; +} + +double +dbl_flt_min2 (double a, float b) +{ + return (a < b) ? a : b; +} + +double +dbl_flt_min3 (float a, double b) +{ + return (a < b) ? a : b; +}
[gcc(refs/users/meissner/heads/work206-submit)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
https://gcc.gnu.org/g:2ecab381b9eb2bc22073f96d5fc97e620646fae1 commit 2ecab381b9eb2bc22073f96d5fc97e620646fae1 Author: Michael Meissner Date: Thu May 29 02:40:48 2025 -0400 Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. In bug PR target/118541 on power9, power10, and power11 systems, for the function: extern double __ieee754_acos (double); double __acospi (double x) { double ret = __ieee754_acos (x) / 3.14; return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; } GCC currently generates the following code: Power9 Power10 and Power11 == === bl __ieee754_acos bl __ieee754_acos@notoc nop plfd 0,.LC0@pcrel addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 addi 1,1,32 addi 1,1,32 lfd 0,.LC2@toc@l(9) ld 0,16(1) addis 9,2,.LC0@toc@ha fdiv 0,1,0 ld 0,16(1) mtlr 0 lfd 12,.LC0@toc@l(9)xscmpgtdp 1,0,12 fdiv 0,1,0 xxsel 1,0,12,1 mtlr 0 blr xscmpgtdp 1,0,12 xxsel 1,0,12,1 blr This is because ifcvt.c optimizes the conditional floating point move to use the XSCMPGTDP instruction. However, the XSCMPGTDP instruction traps if one of the arguments is a signaling NaN. This patch disables generating XSCMP{EQ,GT,GE}{DP,QP} instructions unless -ffinite-math-only is in effect so that we do not get a trap. 2025-05-28 Michael Meissner gcc/ PR target/118541 * config/rs6000/rs6000.cc (have_compare_and_set_mask): Don't do compare and set mask operations unless -ffinite-math-only. * config/rs6000/rs6000.md (movcc_p9): Disable generating XSCMP{EQ,GT,GE}{DP,QP} unless -ffinite-math-only is in effect. (movcc_invert_p9): Likewise. (fpmask, SFDF iterator): Likewise. (xxsel, SFDF iterator): Likewise. (movcc, IEEE128 iterator): Likewise. (movcc_p10): Likewise. (movcc_invert_p10): Likewise. (fpmask, IEEE128 iterator): Likewise. (xxsel, IEEE128 iterator): Likewise. gcc/testsuite/ PR target/118541 * gcc.target/powerpc/float128-cmove.c: Change optimization flag to -Ofast instead of -O2. * gcc.target/powerpc/float128-minmax-3.: Likewise. * gcc.target/powerpc/p9-minmax-2.c: Delete test, the code is no longer valid unless NaNs are not handled. * gcc.target/powerpc/pr118541-1.c: New test. * gcc.target/powerpc/pr118541-2.c: Likewise. Diff: --- gcc/config/rs6000/rs6000.cc| 8 +- gcc/config/rs6000/rs6000.md| 27 ++- gcc/testsuite/gcc.target/powerpc/float128-cmove.c | 6 +- .../gcc.target/powerpc/float128-minmax-3.c | 6 +- gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c | 190 - gcc/testsuite/gcc.target/powerpc/pr118541-1.c | 28 +++ gcc/testsuite/gcc.target/powerpc/pr118541-2.c | 26 +++ 7 files changed, 89 insertions(+), 202 deletions(-) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 7ea377781034..0ef509f06230 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -16409,11 +16409,17 @@ rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) /* Helper function to return true if the target has instructions to do a compare and set mask instruction that can be used with XXSEL to implement a conditional move. It is also assumed that such a target also supports the - "C" minimum and maximum instructions. */ + "C" minimum and maximum instructions. + + However, these instructions will trap if given a signaling NaN, so we can + only use them if NaNs are not expected. */ static bool have_compare_and_set_mask (machine_mode mode) { + if (!flag_finite_math_only) +return false; + switch (mode) { case E_SFmode: diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 9c718ca2a226..c13101eb4318 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5653,6 +5653,10 @@ "fsel %0,%1,%2,%3" [(set_attr "type" "fp")]) +;; On power9, we can generate XSCMP{EQ,GT,GE}DP and XXSEL to do a floating +;; point conditional move. However, these instructions trap if one of the +;; arguments is a signalling NaN. Therefore we can only do this optimize if +;; NaNs are not expected in the code. (define_insn_and_split "*movcc
[gcc(refs/users/meissner/heads/work206-submit)] Update ChangeLog.*
https://gcc.gnu.org/g:c43cf0cc1f466f2b9f85e8de618073805db4e02f commit c43cf0cc1f466f2b9f85e8de618073805db4e02f Author: Michael Meissner Date: Thu May 29 02:43:22 2025 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.submit | 82 1 file changed, 82 insertions(+) diff --git a/gcc/ChangeLog.submit b/gcc/ChangeLog.submit index c82dde02db6d..41a23dc58d71 100644 --- a/gcc/ChangeLog.submit +++ b/gcc/ChangeLog.submit @@ -1,5 +1,87 @@ + Branch work206-submit, patch #201 + +Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. + +In bug PR target/118541 on power9, power10, and power11 systems, for the +function: + +extern double __ieee754_acos (double); + +double +__acospi (double x) +{ + double ret = __ieee754_acos (x) / 3.14; + return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; +} + +GCC currently generates the following code: + +Power9 Power10 and Power11 +== === +bl __ieee754_acos bl __ieee754_acos@notoc +nop plfd 0,.LC0@pcrel +addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 +addi 1,1,32 addi 1,1,32 +lfd 0,.LC2@toc@l(9) ld 0,16(1) +addis 9,2,.LC0@toc@ha fdiv 0,1,0 +ld 0,16(1) mtlr 0 +lfd 12,.LC0@toc@l(9)xscmpgtdp 1,0,12 +fdiv 0,1,0 xxsel 1,0,12,1 +mtlr 0 blr +xscmpgtdp 1,0,12 +xxsel 1,0,12,1 +blr + +This is because ifcvt.c optimizes the conditional floating point move to use the +XSCMPGTDP instruction. + +However, the XSCMPGTDP instruction traps if one of the arguments is a signaling +NaN. This patch disables generating XSCMP{EQ,GT,GE}{DP,QP} instructions unless +-ffinite-math-only is in effect so that we do not get a trap. + +2025-05-28 Michael Meissner + +gcc/ + + PR target/118541 + * config/rs6000/rs6000.cc (have_compare_and_set_mask): Don't do compare + and set mask operations unless -ffinite-math-only. + * config/rs6000/rs6000.md (movcc_p9): Disable + generating XSCMP{EQ,GT,GE}{DP,QP} unless -ffinite-math-only is in + effect. + (movcc_invert_p9): Likewise. + (fpmask, SFDF iterator): Likewise. + (xxsel, SFDF iterator): Likewise. + (movcc, IEEE128 iterator): Likewise. + (movcc_p10): Likewise. + (movcc_invert_p10): Likewise. + (fpmask, IEEE128 iterator): Likewise. + (xxsel, IEEE128 iterator): Likewise. + +gcc/testsuite/ + + PR target/118541 + * gcc.target/powerpc/float128-cmove.c: Change optimization flag to + -Ofast instead of -O2. + * gcc.target/powerpc/float128-minmax-3.: Likewise. + * gcc.target/powerpc/p9-minmax-2.c: Delete test, the code is no longer + valid unless NaNs are not handled. + * gcc.target/powerpc/pr118541-1.c: New test. + * gcc.target/powerpc/pr118541-2.c: Likewise. + + Branch work206-submit, patch #200 was revoked + Branch work206-submit, baseline +Add ChangeLog.submit and update REVISION. + +2025-05-19 Michael Meissner + +gcc/ + + * ChangeLog.submit: New file for branch. + * REVISION: Update. + 2025-05-19 Michael Meissner Clone branch
[gcc r16-948] Fortran: Fix ChangeLog.
https://gcc.gnu.org/g:bd9d8f9b294d9aef8799227234bd09e26a9a6640 commit r16-948-gbd9d8f9b294d9aef8799227234bd09e26a9a6640 Author: Jerry DeLisle Date: Wed May 28 21:04:13 2025 -0700 Fortran: Fix ChangeLog. PR fortran/119856 gcc/fortran/ChangeLog: * ChangeLog: Fix PR number in log. Diff: --- gcc/fortran/ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 0c4edf84ef43..3496d06e0305 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -6,7 +6,7 @@ 2025-05-28 Jerry DeLisle - PR fortran/119586 + PR fortran/119856 * io.cc: Set missing comma error checks to STD_STD_LEGACY. 2025-05-28 Yuao Ma
[gcc(refs/users/meissner/heads/work206-submit)] Revert changes
https://gcc.gnu.org/g:fd9674dfa52a259a4788814bd16253bb7b5861e3 commit fd9674dfa52a259a4788814bd16253bb7b5861e3 Author: Michael Meissner Date: Thu May 29 02:38:13 2025 -0400 Revert changes Diff: --- gcc/config/rs6000/rs6000.cc| 8 +-- gcc/config/rs6000/rs6000.md| 27 -- gcc/testsuite/gcc.target/powerpc/float128-cmove.c | 6 + .../gcc.target/powerpc/float128-minmax-3.c | 6 + 4 files changed, 12 insertions(+), 35 deletions(-) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 0ef509f06230..7ea377781034 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -16409,17 +16409,11 @@ rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) /* Helper function to return true if the target has instructions to do a compare and set mask instruction that can be used with XXSEL to implement a conditional move. It is also assumed that such a target also supports the - "C" minimum and maximum instructions. - - However, these instructions will trap if given a signaling NaN, so we can - only use them if NaNs are not expected. */ + "C" minimum and maximum instructions. */ static bool have_compare_and_set_mask (machine_mode mode) { - if (!flag_finite_math_only) -return false; - switch (mode) { case E_SFmode: diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index c13101eb4318..9c718ca2a226 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5653,10 +5653,6 @@ "fsel %0,%1,%2,%3" [(set_attr "type" "fp")]) -;; On power9, we can generate XSCMP{EQ,GT,GE}DP and XXSEL to do a floating -;; point conditional move. However, these instructions trap if one of the -;; arguments is a signalling NaN. Therefore we can only do this optimize if -;; NaNs are not expected in the code. (define_insn_and_split "*movcc_p9" [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa") (if_then_else:SFDF @@ -5666,7 +5662,7 @@ (match_operand:SFDF 4 "vsx_register_operand" "wa,wa") (match_operand:SFDF 5 "vsx_register_operand" "wa,wa"))) (clobber (match_scratch:V2DI 6 "=0,&wa"))] - "TARGET_P9_MINMAX && flag_finite_math_only" + "TARGET_P9_MINMAX" "#" "&& 1" [(set (match_dup 6) @@ -5698,7 +5694,7 @@ (match_operand:SFDF 4 "vsx_register_operand" "wa,wa") (match_operand:SFDF 5 "vsx_register_operand" "wa,wa"))) (clobber (match_scratch:V2DI 6 "=0,&wa"))] - "TARGET_P9_MINMAX && flag_finite_math_only" + "TARGET_P9_MINMAX" "#" "&& 1" [(set (match_dup 6) @@ -5733,7 +5729,7 @@ (match_operand:SFDF 3 "vsx_register_operand" "wa")]) (match_operand:V2DI 4 "all_ones_constant" "") (match_operand:V2DI 5 "zero_constant" "")))] - "TARGET_P9_MINMAX && flag_finite_math_only" + "TARGET_P9_MINMAX" "xscmp%V1dp %x0,%x2,%x3" [(set_attr "type" "fpcompare")]) @@ -5743,23 +5739,18 @@ (match_operand:V2DI 2 "zero_constant" "")) (match_operand:SFDF 3 "vsx_register_operand" "wa") (match_operand:SFDF 4 "vsx_register_operand" "wa")))] - "TARGET_P9_MINMAX && flag_finite_math_only" + "TARGET_P9_MINMAX" "xxsel %x0,%x4,%x3,%x1" [(set_attr "type" "vecmove")]) ;; Support for ISA 3.1 IEEE 128-bit conditional move. The mode used in the ;; comparison must be the same as used in the move. -;; -;; On power10, we can generate XSCMP{EQ,GT,GE}QP and XXSEL to do a floating -;; point conditional move for IEEE 128-bit values. However, these instructions -;; trap if one of the arguments is a signalling NaN. Therefore we can only do -;; this optimize if NaNs are not expected in the code. (define_expand "movcc" [(set (match_operand:IEEE128 0 "gpc_reg_operand") (if_then_else:IEEE128 (match_operand 1 "comparison_operator") (match_operand:IEEE128 2 "gpc_reg_operand") (match_operand:IEEE128 3 "gpc_reg_operand")))] - "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only" + "TARGET_POWER10 && TARGET_FLOAT128_HW" { if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3])) DONE; @@ -5776,7 +5767,7 @@ (match_operand:IEEE128 4 "altivec_register_operand" "v,v") (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) (clobber (match_scratch:V2DI 6 "=0,&v"))] - "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only" + "TARGET_POWER10 && TARGET_FLOAT128_HW" "#" "&& 1" [(set (match_dup 6) @@ -5808,7 +5799,7 @@ (match_operand:IEEE128 4 "altivec_register_operand" "v,v") (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) (clobber (match_scratch:V2DI 6 "=0,&v"))] - "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only" + "TARGET_POWER10 && TARG
[gcc r16-947] RISC-V: Add test for vec_duplicate + vmul.vv combine case 1 with GR2VR cost 0, 1 and 2
https://gcc.gnu.org/g:663cb52b0e8df70830a8def86a5254a59d373732 commit r16-947-g663cb52b0e8df70830a8def86a5254a59d373732 Author: Pan Li Date: Wed May 28 16:22:04 2025 +0800 RISC-V: Add test for vec_duplicate + vmul.vv combine case 1 with GR2VR cost 0, 1 and 2 Add asm dump check test for vec_duplicate + vmul.vv combine to vmul.vx, with the GR2VR cost is 0, 1 and 2. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c: Add asm check for vmul.vx combine. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c: Ditto. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c | 2 ++ 12 files changed, 24 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c index 58dc66dcec9c..a1b24f710e02 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c @@ -11,6 +11,7 @@ DEF_VX_BINARY_REVERSE_CASE_1_WRAP(T, -, rsub, VX_BINARY_REVERSE_BODY_X16) DEF_VX_BINARY_CASE_1_WRAP(T, &, and, VX_BINARY_BODY_X16) DEF_VX_BINARY_CASE_1_WRAP(T, |, or, VX_BINARY_BODY_X16) DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY_X16) +DEF_VX_BINARY_CASE_1_WRAP(T, *, mul, VX_BINARY_BODY_X16) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ @@ -18,3 +19,4 @@ DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY_X16) /* { dg-final { scan-assembler {vand.vx} } } */ /* { dg-final { scan-assembler {vor.vx} } } */ /* { dg-final { scan-assembler {vxor.vx} } } */ +/* { dg-final { scan-assembler {vmul.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c index b13ec16983c3..53bd7448bfe0 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c @@ -11,6 +11,7 @@ DEF_VX_BINARY_REVERSE_CASE_1_WRAP(T, -, rsub, VX_BINARY_REVERSE_BODY_X4) DEF_VX_BINARY_CASE_1_WRAP(T, &, and, VX_BINARY_BODY_X4) DEF_VX_BINARY_CASE_1_WRAP(T, |, or, VX_BINARY_BODY_X4) DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY_X4) +DEF_VX_BINARY_CASE_1_WRAP(T, *, mul, VX_BINARY_BODY_X4) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ @@ -18,3 +19,4 @@ DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY_X4) /* { dg-final { scan-assembler {vand.vx} } } */ /* { dg-final { scan-assembler {vor.vx} } } */ /* { dg-final { scan-assembler {vxor.vx} } } */ +/* { dg-final { scan-assembler {vmul.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c index cd861a4ba6ba..73cb89d2ad69 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c @@ -11,6 +11,7 @@ DEF_VX_BINARY_REVERSE_CASE_1_WRAP(T, -, rsub, VX_BINARY_REVERSE_BODY) DEF_VX_BINARY_CASE_1_WRAP(T, &, and, VX_BINARY_BODY) DEF_VX_BINARY_CASE_1_WRAP(T, |, or, VX_BINARY_BODY) DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY) +DEF_VX_BINARY_CASE_1_WRAP(T, *, mul, VX_BINARY_BODY) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ @@ -18,3 +19,
[gcc r16-946] RISC-V: Add test for vec_duplicate + vmul.vv combine case 0 with GR2VR cost 0, 2 and 15
https://gcc.gnu.org/g:b36bde2fc5cc7048f294adee45fb9a0be0092d13 commit r16-946-gb36bde2fc5cc7048f294adee45fb9a0be0092d13 Author: Pan Li Date: Wed May 28 16:20:32 2025 +0800 RISC-V: Add test for vec_duplicate + vmul.vv combine case 0 with GR2VR cost 0, 2 and 15 Add asm dump check test for vec_duplicate + vmul.vv combine to vmul.vx, with the GR2VR cost is 0, 2 and 15. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c: Add asm check for vmul.vx combine. * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: Add test data for vmul run test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i32.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i64.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i8.c: New test. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i16.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i32.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i64.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-i8.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i16.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i32.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i64.c | 2 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-i8.c | 2 + .../riscv/rvv/autovec/vx_vf/vx_binary_data.h | 196 + .../riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i16.c| 15 ++ .../riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i32.c| 15 ++ .../riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i64.c| 15 ++ .../riscv/rvv/autovec/vx_vf/vx_vmul-run-1-i8.c | 15 ++ 17 files changed, 280 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c index b9be0f674aec..144d1bad6afc 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c @@ -11,6 +11,7 @@ DEF_VX_BINARY_REVERSE_CASE_0_WRAP(T, -, rsub); DEF_VX_BINARY_CASE_0_WRAP(T, &, and) DEF_VX_BINARY_CASE_0_WRAP(T, |, or) DEF_VX_BINARY_CASE_0_WRAP(T, ^, xor) +DEF_VX_BINARY_CASE_0_WRAP(T, *, mul) /* { dg-final { scan-assembler-times {vadd.vx} 1 } } */ /* { dg-final { scan-assembler-times {vsub.vx} 1 } } */ @@ -18,3 +19,4 @@ DEF_VX_BINARY_CASE_0_WRAP(T, ^, xor) /* { dg-final { scan-assembler-times {vand.vx} 1 } } */ /* { dg-final { scan-assembler-times {vor.vx} 1 } } */ /* { dg-final { scan-assembler-times {vxor.vx} 1 } } */ +/* { dg-final { scan-assembler-times {vmul.vx} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c index 2a84980cb503..74d35d13cf6a 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c @@ -11,6 +11,7 @@ DEF_VX_BINARY_REVERSE_CASE_0_WRAP(T, -, rsub); DEF_VX_BINARY_CASE_0_WRAP(T, &, and) DEF_VX_BINARY_CASE_0_WRAP(T, |, or) DEF_VX_BINARY_CASE_0_WRAP(T, ^, xor) +DEF_VX_BINARY_CASE_0_WRAP(T, *, mul) /* { dg-final { scan-assembler-times {vadd.vx} 1 } } */ /* { dg-final { scan-assembler-times {vsub.vx} 1 } } */ @@ -18,3 +19,4 @@ DEF_VX_BINARY_CASE_0_WRAP(T, ^, xor) /* { dg-final { scan-assembler-times {vand.vx} 1 } } */ /* { dg-final { scan-assembler-times {vor.vx} 1 } } */ /* { dg-final { scan-assembler-times {vxor.vx} 1 } } */ +/* { dg-final { scan-assembler-times {vmul.vx} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c index 9c7ea5fa4132..ac512ff7fbd9 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/
[gcc r16-945] RISC-V: Combine vec_duplicate + vmul.vv to vmul.vx on GR2VR cost
https://gcc.gnu.org/g:2e4267a6fe143bd72376653812f59f343cb1c101 commit r16-945-g2e4267a6fe143bd72376653812f59f343cb1c101 Author: Pan Li Date: Wed May 28 16:16:49 2025 +0800 RISC-V: Combine vec_duplicate + vmul.vv to vmul.vx on GR2VR cost This patch would like to combine the vec_duplicate + vmul.vv to the vmul.vx. From example as below code. The related pattern will depend on the cost of vec_duplicate from GR2VR. Then the late-combine will take action if the cost of GR2VR is zero, and reject the combination if the GR2VR cost is greater than zero. Assume we have example code like below, GR2VR cost is 0. #define DEF_VX_BINARY(T, OP)\ void\ test_vx_binary (T * restrict out, T * restrict in, T x, unsigned n) \ { \ for (unsigned i = 0; i < n; i++) \ out[i] = in[i] OP x;\ } DEF_VX_BINARY(int32_t, |) Before this patch: 10 │ test_vx_binary_or_int32_t_case_0: 11 │ beq a3,zero,.L8 12 │ vsetvli a5,zero,e32,m1,ta,ma 13 │ vmv.v.x v2,a2 14 │ sllia3,a3,32 15 │ srlia3,a3,32 16 │ .L3: 17 │ vsetvli a5,a3,e32,m1,ta,ma 18 │ vle32.v v1,0(a1) 19 │ sllia4,a5,2 20 │ sub a3,a3,a5 21 │ add a1,a1,a4 22 │ vmul.vv v1,v1,v2 23 │ vse32.v v1,0(a0) 24 │ add a0,a0,a4 25 │ bne a3,zero,.L3 After this patch: 10 │ test_vx_binary_or_int32_t_case_0: 11 │ beq a3,zero,.L8 12 │ sllia3,a3,32 13 │ srlia3,a3,32 14 │ .L3: 15 │ vsetvli a5,a3,e32,m1,ta,ma 16 │ vle32.v v1,0(a1) 17 │ sllia4,a5,2 18 │ sub a3,a3,a5 19 │ add a1,a1,a4 20 │ vmul.vx v1,v1,a2 21 │ vse32.v v1,0(a0) 22 │ add a0,a0,a4 23 │ bne a3,zero,.L3 The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_vx_binary_vec_dup_vec): Add new case for MULT op. (expand_vx_binary_vec_vec_dup): Ditto. * config/riscv/riscv.cc (riscv_rtx_costs): Ditto. * config/riscv/vector-iterators.md: Add new op mult to no_shift_vx_ops. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-v.cc | 2 ++ gcc/config/riscv/riscv.cc| 1 + gcc/config/riscv/vector-iterators.md | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index eedcda2b8ff5..616279757250 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -5536,6 +5536,7 @@ expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2, case AND: case IOR: case XOR: +case MULT: icode = code_for_pred_scalar (code, mode); break; case MINUS: @@ -5565,6 +5566,7 @@ expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2, case AND: case IOR: case XOR: +case MULT: icode = code_for_pred_scalar (code, mode); break; default: diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 2cc69b4458a9..d3cee96d3aad 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3919,6 +3919,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN case AND: case IOR: case XOR: + case MULT: { rtx op_0 = XEXP (x, 0); rtx op_1 = XEXP (x, 1); diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index 77d72a78c1be..2bd99ee5372b 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -4042,7 +4042,7 @@ ]) (define_code_iterator any_int_binop_no_shift_vx [ - plus minus and ior xor + plus minus and ior xor mult ]) (define_code_iterator any_int_unop [neg not])
[gcc r16-949] RISC-V: Add minimal support of double trap extension 1.0
https://gcc.gnu.org/g:077cdc9dc0ff7f5c9d12829bfed220d1e63b1525 commit r16-949-g077cdc9dc0ff7f5c9d12829bfed220d1e63b1525 Author: Jerry Zhang Jian Date: Wed May 28 10:17:36 2025 +0800 RISC-V: Add minimal support of double trap extension 1.0 Add support of double trap extension [1], enabling GCC to recognize the following extensions at compile time. New extensions: - ssdbltrp - smdbltrp [1] https://github.com/riscv/riscv-double-trap/releases/download/v1.0/riscv-double-trap.pdf gcc/ChangeLog: * config/riscv/riscv-ext.def: New extensions * config/riscv/riscv-ext.opt: Auto re-generated * doc/riscv-ext.texi: Auto re-generated gcc/testsuite/ChangeLog: * gcc.target/riscv/arch-57.c: New test * gcc.target/riscv/arch-58.c: New test Signed-off-by: Jerry Zhang Jian Diff: --- gcc/config/riscv/riscv-ext.def | 26 ++ gcc/config/riscv/riscv-ext.opt | 4 gcc/doc/riscv-ext.texi | 8 gcc/testsuite/gcc.target/riscv/arch-57.c | 6 ++ gcc/testsuite/gcc.target/riscv/arch-58.c | 6 ++ 5 files changed, 50 insertions(+) diff --git a/gcc/config/riscv/riscv-ext.def b/gcc/config/riscv/riscv-ext.def index 97b576617add..dbda8ded3974 100644 --- a/gcc/config/riscv/riscv-ext.def +++ b/gcc/config/riscv/riscv-ext.def @@ -1727,6 +1727,19 @@ DEFINE_RISCV_EXT( /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, /* EXTRA_EXTENSION_FLAGS */ 0) +DEFINE_RISCV_EXT( + /* NAME */ smdbltrp, + /* UPPERCAE_NAME */ SMDBLTRP, + /* FULL_NAME */ "Double Trap Extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sm, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + DEFINE_RISCV_EXT( /* NAME */ ssaia, /* UPPERCAE_NAME */ SSAIA, @@ -1818,6 +1831,19 @@ DEFINE_RISCV_EXT( /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, /* EXTRA_EXTENSION_FLAGS */ 0) +DEFINE_RISCV_EXT( + /* NAME */ ssdbltrp, + /* UPPERCAE_NAME */ SSDBLTRP, + /* FULL_NAME */ "Double Trap Extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + DEFINE_RISCV_EXT( /* NAME */ supm, /* UPPERCAE_NAME */ SUPM, diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt index 9199aa31b420..5e9c5f56ad67 100644 --- a/gcc/config/riscv/riscv-ext.opt +++ b/gcc/config/riscv/riscv-ext.opt @@ -343,6 +343,8 @@ Mask(SMNPM) Var(riscv_sm_subext) Mask(SMSTATEEN) Var(riscv_sm_subext) +Mask(SMDBLTRP) Var(riscv_sm_subext) + Mask(SSAIA) Var(riscv_ss_subext) Mask(SSCOFPMF) Var(riscv_ss_subext) @@ -357,6 +359,8 @@ Mask(SSTC) Var(riscv_ss_subext) Mask(SSSTRICT) Var(riscv_ss_subext) +Mask(SSDBLTRP) Var(riscv_ss_subext) + Mask(SUPM) Var(riscv_su_subext) Mask(SVINVAL) Var(riscv_sv_subext) diff --git a/gcc/doc/riscv-ext.texi b/gcc/doc/riscv-ext.texi index bd3d29c75ab5..7a22d841d1b6 100644 --- a/gcc/doc/riscv-ext.texi +++ b/gcc/doc/riscv-ext.texi @@ -510,6 +510,10 @@ @tab 1.0 @tab State enable extension +@item smdbltrp +@tab 1.0 +@tab Double Trap Extensions + @item ssaia @tab 1.0 @tab Advanced interrupt architecture extension for supervisor-mode @@ -538,6 +542,10 @@ @tab 1.0 @tab ssstrict extension +@item ssdbltrp +@tab 1.0 +@tab Double Trap Extensions + @item supm @tab 1.0 @tab supm extension diff --git a/gcc/testsuite/gcc.target/riscv/arch-57.c b/gcc/testsuite/gcc.target/riscv/arch-57.c new file mode 100644 index ..08d3761a4700 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-57.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64i_smdbltrp -mabi=lp64" } */ + +void foo(){} + +/* { dg-final { scan-assembler ".attribute arch, \"rv64i2p1_zicsr2p0_smdbltrp1p0\"" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/arch-58.c b/gcc/testsuite/gcc.target/riscv/arch-58.c new file mode 100644 index ..1481da5ecdbe --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-58.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64i_ssdbltrp -mabi=lp64" } */ + +void foo(){} + +/* { dg-final { scan-assembler ".attribute arch, \"rv64i2p1_zicsr2p0_ssdbltrp1p0\"" } } */
[gcc r16-926] Do not erase static profile by 0 autofdo profile
https://gcc.gnu.org/g:aa1b47aa41af2a6a094035bd446504cf352c4d71 commit r16-926-gaa1b47aa41af2a6a094035bd446504cf352c4d71 Author: Jan Hubicka Date: Wed May 28 12:15:32 2025 +0200 Do not erase static profile by 0 autofdo profile This patch makes auto-fdo more careful about keeping info we have from static profile prediction. If all counters in function are 0, we can keep original auto-fdo profile. Having all 0 profile is not very useful especially becuase 0 in autofdo is not very informative and the code still may have been executed in the train run. I added comment about adding GUESSED_GLOBAL0_AFDO which would still preserve info that the function is not hot in the profile, but I would like to do this incrementally. If function has non-zero counters, we can still keep info about zero being reliable from static prediction (i.e. after EH or with cold attribute). gcc/ChangeLog: * auto-profile.cc (update_count_by_afdo_count): New function. (afdo_set_bb_count): Add debug output; only set count if it is non-zero. (afdo_find_equiv_class): Add debug output. (afdo_calculate_branch_prob): Fix formating. (afdo_annotate_cfg): Add debug output; do not erase static profile if autofdo profile is all 0. Diff: --- gcc/auto-profile.cc | 119 1 file changed, 101 insertions(+), 18 deletions(-) diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index 91d829908d2e..3eefb970fde3 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -1061,6 +1061,19 @@ set_bb_annotated (basic_block bb, bb_set *annotated) annotated->insert (bb); } +/* Update profile_count by known autofdo count. */ +void +update_count_by_afdo_count (profile_count *count, gcov_type c) +{ + if (c) +*count = profile_count::from_gcov_type (c).afdo (); + /* In case we have guessed profile which is already zero, preserve + quality info. */ + else if (count->nonzero_p () + || count->quality () == GUESSED) +*count = profile_count::zero ().afdo (); +} + /* For a given BB, set its execution count. Attach value profile if a stmt is not in PROMOTED, because we only want to promote an indirect call once. Return TRUE if BB is annotated. */ @@ -1071,6 +1084,8 @@ afdo_set_bb_count (basic_block bb, const stmt_set &promoted) gimple_stmt_iterator gsi; gcov_type max_count = 0; bool has_annotated = false; + if (dump_file) +fprintf (dump_file, " Looking up AFDO count of bb %i\n", bb->index); for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { @@ -1082,6 +1097,12 @@ afdo_set_bb_count (basic_block bb, const stmt_set &promoted) { if (info.count > max_count) max_count = info.count; + if (dump_file && info.count) + { + fprintf (dump_file, " count %" PRIu64 " in stmt: ", + (int64_t)info.count); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + } has_annotated = true; if (info.targets.size () > 0 && promoted.find (stmt) == promoted.end ()) @@ -1112,6 +1133,13 @@ afdo_set_bb_count (basic_block bb, const stmt_set &promoted) { if (info.count > max_count) max_count = info.count; + if (dump_file && info.count) + { + fprintf (dump_file, + " phi op in BB %i with count %" PRIu64": ", + bb_succ->index, (int64_t)info.count); + print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); + } has_annotated = true; } } @@ -1121,7 +1149,14 @@ afdo_set_bb_count (basic_block bb, const stmt_set &promoted) return false; } - bb->count = profile_count::from_gcov_type (max_count).afdo (); + if (max_count) +{ + update_count_by_afdo_count (&bb->count, max_count); + if (dump_file) + fprintf (dump_file, +" Annotated bb %i with count %" PRId64 "\n", +bb->index, (int64_t)max_count); +} return true; } @@ -1154,6 +1189,14 @@ afdo_find_equiv_class (bb_set *annotated_bb) bb1->aux = bb; if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) { + if (dump_file) + { + fprintf (dump_file, + " Copying count of bb %i to bb %i; count is:", + bb1->index, + bb->index); + bb1->count.dump (dump_file); + } bb->count = bb1->count; set_bb_annotated (bb, annotated_bb); } @@ -1166,6 +1209,14 @@ afdo_find_equiv_class (bb_set *annotated_bb)
[gcc r16-928] aarch64: Enable newly implemented features for FUJITSU-MONAKA
https://gcc.gnu.org/g:33ee574a7444b238005d89fdfdf2f21f50b1fc6e commit r16-928-g33ee574a7444b238005d89fdfdf2f21f50b1fc6e Author: Yuta Mukai Date: Fri May 23 04:51:11 2025 + aarch64: Enable newly implemented features for FUJITSU-MONAKA This patch enables newly implemented features in GCC (FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT) for FUJITSU-MONAKA processor (-mcpu=fujitsu-monaka). 2025-05-23 Yuta Mukai gcc/ChangeLog: * config/aarch64/aarch64-cores.def (fujitsu-monaka): Update ISA features. Diff: --- gcc/config/aarch64/aarch64-cores.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 12096300d012..24b7cd362aaf 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -132,7 +132,7 @@ AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, (CRYPTO, PROFI /* Fujitsu ('F') cores. */ AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, (F16, SVE), a64fx, 0x46, 0x001, -1) -AARCH64_CORE("fujitsu-monaka", fujitsu_monaka, cortexa57, V9_3A, (F16, FP8, LS64, RNG, CRYPTO, SVE2_AES, SVE2_BITPERM, SVE2_SHA3, SVE2_SM4), fujitsu_monaka, 0x46, 0x003, -1) +AARCH64_CORE("fujitsu-monaka", fujitsu_monaka, cortexa57, V9_3A, (F16, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LS64, LUT, RNG, CRYPTO, SVE2_AES, SVE2_BITPERM, SVE2_SHA3, SVE2_SM4), fujitsu_monaka, 0x46, 0x003, -1) /* HiSilicon ('H') cores. */ AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0xd01, -1)
[gcc r16-927] Fix profile_probability quality of switch
https://gcc.gnu.org/g:8058e1f8857d580698c29590273e72c61de2c121 commit r16-927-g8058e1f8857d580698c29590273e72c61de2c121 Author: Jan Hubicka Date: Wed May 28 12:23:48 2025 +0200 Fix profile_probability quality of switch This fixes ages old bug I noticed only now where switch cases, in situation prediction is completely missing, gets all equal probability that should be GUESSED instead of ADJUSTED. gcc/ChangeLog: * predict.cc (set_even_probabilities): Set quality to guessed. Diff: --- gcc/predict.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/predict.cc b/gcc/predict.cc index ef31c48bfe25..16dd9b01112b 100644 --- a/gcc/predict.cc +++ b/gcc/predict.cc @@ -961,7 +961,7 @@ set_even_probabilities (basic_block bb, if (unlikely_edges != NULL && unlikely_edges->contains (e)) e->probability = profile_probability::very_unlikely (); else - e->probability = all / scale; + e->probability = (all / scale).guessed (); } else e->probability = profile_probability::never ();
[gcc(refs/users/omachota/heads/rtl-ssa-dce)] rtl-ssa-dce: fix index out of range when debugizing
https://gcc.gnu.org/g:a260eca59789d4dc2d064205d7ca0f2e544a commit a260eca59789d4dc2d064205d7ca0f2e544a Author: Ondřej Machota Date: Wed May 28 10:13:14 2025 +0200 rtl-ssa-dce: fix index out of range when debugizing Diff: --- gcc/dce.cc | 84 +- 1 file changed, 61 insertions(+), 23 deletions(-) diff --git a/gcc/dce.cc b/gcc/dce.cc index 46806f18db80..2a203b673f32 100644 --- a/gcc/dce.cc +++ b/gcc/dce.cc @@ -1380,7 +1380,7 @@ private: void debugize_insn (insn_info *); - void unmark_debugizable(insn_info &, sbitmap); + void unmark_debugizable(insn_info *, sbitmap); sbitmap find_debugizable(const std::unordered_set &); void debugize_insns (const sbitmap); @@ -1701,6 +1701,8 @@ rtl_ssa_dce::sweep () fprintf (dump_file, "DCE: Sweep phase\n"); auto_vec to_delete; + + // Previously created debug instructions won't be visited here for (insn_info *insn : crtl->ssa->nondebug_insns ()) { // Artificial or marked insns should not be deleted. @@ -1906,15 +1908,13 @@ replace_dead_reg(rtx x, const_rtx old_rtx ATTRIBUTE_UNUSED, void *data) // visit every marked instruction in INSN dependency tree and unmark it void -rtl_ssa_dce::unmark_debugizable(insn_info &insn, sbitmap debugizable) +rtl_ssa_dce::unmark_debugizable (insn_info *insn, sbitmap debugizable) { auto_vec worklist; - gcc_assert(!insn.is_artificial()); - if (insn.uid () < 0) -std::cerr << "WTF" << insn.uid() << '\n'; - std::cout << insn.uid () << '\n'; - bitmap_set_bit (debugizable, insn.uid ()); - worklist.safe_push (&insn); + gcc_checking_assert (!insn->is_artificial ()); + + bitmap_set_bit (debugizable, insn->uid ()); + worklist.safe_push (insn); // process all marked dependencies and unmark them while (!worklist.is_empty ()) { @@ -1930,13 +1930,22 @@ rtl_ssa_dce::unmark_debugizable(insn_info &insn, sbitmap debugizable) // add all marked dependencies to the worklist for (def_info *def : current->defs()) { - if (def->kind() != access_kind::SET) + if (def->kind() != access_kind::SET) // skip clobbers continue; - set_info *set = static_cast(def); + auto *set = static_cast(def); for (use_info *use : set->all_uses()) { +// this phi node might not be dead +if (use->is_in_phi ()) + continue; + insn_info *use_insn = use->insn(); + +// artificial instruction will never be debugizable +if (use_insn->is_artificial ()) + continue; + if (bitmap_bit_p(debugizable, use_insn->uid())) worklist.safe_push (use_insn); } @@ -1949,27 +1958,34 @@ rtl_ssa_dce::unmark_debugizable(insn_info &insn, sbitmap debugizable) sbitmap rtl_ssa_dce::find_debugizable(const std::unordered_set &depends_on_dead_phi) { - // only real instructions + // only real instructions can be turned to debug instructions sbitmap debugizable = sbitmap_alloc (get_max_uid () + 1); bitmap_clear(debugizable); for (insn_info *insn : crtl->ssa->reverse_all_insns ()) { // Skip live nondebug instrunctions. Debug instructions are by default live -// and we cannot skip them here +// and we cannot skip them here - they have to be marked as debugizable if (insn->is_artificial () || (m_marked.get_bit (insn->uid ()) && !insn->is_debug_insn())) continue; +// instructions that depend on a dead phi node cannot be debugized if (depends_on_dead_phi.count (insn) > 0) { if (insn->is_debug_insn ()) reset_dead_debug_insn (insn); - // we don't have to call unmark_debugizable, because dead nondebug - // instructions that depend on a dead phi won't be turned into a + // we don't have to call unmark_debugizable, because a dead nondebug + // instructions that depends on a dead phi won't be turned into a // debug instrunction continue; } +// handle debug instrunctions - mark them and skip +if (insn->is_debug_insn ()) { + bitmap_set_bit (debugizable, insn->uid ()); + continue; +} + // this insn may have some debugizable dependencies and if we find that // current insn is not debugizable, we have to reset those dependencies @@ -1983,35 +1999,38 @@ rtl_ssa_dce::find_debugizable(const std::unordered_set &depends_on_ side_effects_p (SET_SRC (rtx_set)) || asm_noperands (PATTERN (rtl)) >= 0) { -unmark_debugizable(*insn, debugizable); +std::cerr << "FAILED TO CREATE DEBUG\n"; +unmark_debugizable(insn, debugizable); continue; } -// some of the checks might be duplicate: +// insn is definitely a single_set, following if statement is useless: if (insn->num_defs () != 1) { + gcc_assert (false); if (insn->num_defs() > 1) -unmark_debugizable(*insn, debugizable); +unmark_debugizable(insn, debugi
[gcc r13-9722] testsuite, gm2: Use -B option for libstdc++ where required.
https://gcc.gnu.org/g:963fb992f7e4c5c7ae24e49c02006db659b81da4 commit r13-9722-g963fb992f7e4c5c7ae24e49c02006db659b81da4 Author: Iain Sandoe Date: Mon Mar 10 08:44:41 2025 + testsuite, gm2: Use -B option for libstdc++ where required. We need to add testsuite options to locate gm2 libs and libstdc++. Usually '-L' options are added to point to the relevant directories for the uninstalled libraries. In cases where libraries are available as both shared and convenience some additional checks are made. For some targets -static- options are handled by specs substitution and need a '-B' option rather than '-L'. For Darwin, when embedded runpaths are in use (the default for all versions after macOS 10.11), '-B' is also needed to provide the runpath. When '-B' is used, this results in a '-L' for each path that exists (so that appending a '-L' as well is a needless duplicate). There are also cases where tools warn for duplicates, leading to spurious fails. Therefore the objective of the code here is to add just one '-L' or '-B' for each of the libraries. Currently, we are forcing the full paths to each of the gm2 convenience libs onto the link line and therefore the B/L logic is not needed there. It would need to be added if/when gm2 is tested with shared libraries gcc/testsuite/ChangeLog: * lib/gm2.exp: Arrange for a '-B' option to be added for the libstdc++ paths on targets that need it. Signed-off-by: Iain Sandoe (cherry picked from commit 6b9ceac9e4e2be304c39e6bc8744edf21faac4fb) Diff: --- gcc/testsuite/lib/gm2.exp | 46 -- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/gcc/testsuite/lib/gm2.exp b/gcc/testsuite/lib/gm2.exp index ab58a567e489..bc9400a1138d 100644 --- a/gcc/testsuite/lib/gm2.exp +++ b/gcc/testsuite/lib/gm2.exp @@ -281,12 +281,35 @@ proc gm2_link_flags { paths } { set shlib_ext [get_shlib_extension] verbose "shared lib extension: $shlib_ext" + +# We need to add options to locate gm2 libs and libstdc++ +# Usually '-L' options are added to point to the relevant directories for +# the uninstalled libraries. + +# In cases where libraries are available as both shared and convenience +# some additional checks are made. + +# For some targets -static- options are handled by specs substitution +# and need a '-B' option rather than '-L'. For Darwin, when embedded +# runpaths are in use (the default for all versions after macOS 10.11), +# '-B' is also needed to provide the runpath. +# When '-B' is used, this results in a '-L' for each path that exists (so +# that appending a '-L' as well is a needless duplicate). There are also +# cases where tools warn for duplicates, leading to spurious fails. +# Therefore the objective of the code below is to add just one '-L' or +# '-B' for each of the libraries. + +set target_wants_B_option 0 +if { [istarget *-*-darwin9* ] || [istarget *-*-darwin\[12\]* ] } { + set target_wants_B_option 1 +} + if { $gccpath == "" } { global tool_root_dir set libstdcpp [lookfor_file ${tool_root_dir} libstdc++] if { $libstdcpp != "" } { - append flags "-L${libstdcpp} " + append flags " -L${libstdcpp} " append ld_library_path ":${libstdcpp}" } } else { @@ -294,19 +317,22 @@ proc gm2_link_flags { paths } { append ld_library_path ":${gccpath}/lib" } if [file exists "${gccpath}/libstdc++/libstdc++.a"] { - append flags "-L${gccpath}/libstdc++ " + append flags " -L${gccpath}/libstdc++ " append ld_library_path ":${gccpath}/libstdc++" } - if [file exists "${gccpath}/libstdc++-v3/src/.libs/libstdc++.a"] { - append flags " -L${gccpath}/libstdc++-v3/src/.libs " - append ld_library_path ":${gccpath}/libstdc++-v3/src/.libs" - } - # Look for libstdc++.${shlib_ext}. - if [file exists "${gccpath}/libstdc++-v3/src/.libs/libstdc++.${shlib_ext}"] { - append flags " -L${gccpath}/libstdc++-v3/src/.libs " + # Look for libstdc++.. +if { [file exists "${gccpath}/libstdc++-v3/src/.libs/libstdc++.a"] \ +|| [file exists "${gccpath}/libstdc++-v3/src/.libs/libstdc++.${shlib_ext}"] } { +if { $target_wants_B_option } { + append flags " -B${gccpath}/libstdc++-v3/src/.libs " +} else { + append flags " -L${gccpath}/libstdc++-v3/src/.libs " +} append ld_library_path ":${gccpath}/libstdc++-v3/src/.libs" } + # Here we are forcing the static libraries, with complete paths so + # there's no -L/-B logic needed # puts stderr "${gm2_link_libraries} before foreach" foreach d [li
[gcc r16-925] doc: Fix extend.texi menu
https://gcc.gnu.org/g:011962c3fc60d9e34a14babef7c99184750cada6 commit r16-925-g011962c3fc60d9e34a14babef7c99184750cada6 Author: Haochen Jiang Date: Wed May 28 10:36:34 2025 +0800 doc: Fix extend.texi menu commit 517c9487f8fdc4e4e90252a9365e5823259dc783 Author: Alejandro Colomar Date: Thu May 22 01:15:36 2025 +0200 c: Add _Countof operator [PR117025] broke gcc build on RHEL 9 when building texi files (with bundled makeinfo 6.7): gcc/doc/extend.texi:6: node `C Extensions' lacks menu item for `_Countof' despite being its Up target The same fail will happen for makeinfo <= 6.7, while won't fail when makeinfo >= 6.8. Fixed by adding the missing menu entires. gcc/ChangeLog: * doc/extend.texi (C Extensions): Add missing menu items. Diff: --- gcc/doc/extend.texi | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index edd3a0d96c52..fe22d34c2cf6 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -31,6 +31,7 @@ extensions, accepted by GCC in C90 mode and in C++. * Thread-Local::Per-thread variables. * OpenMP:: Multiprocessing extensions. * OpenACC:: Extensions for offloading code to accelerator devices. +* _Countof::The number of elements of arrays. * Inline:: Defining inline functions (as fast as macros). * Volatiles:: What constitutes an access to a volatile object. * Using Assembly Language with C:: Instructions and extensions for interfacing C with assembler.
[gcc r13-9719] libgcc, Darwin: Drop the legacy library build for macOS >= 10.12 [PR116809].
https://gcc.gnu.org/g:3599245e4972b2322f75c909bc81961e41bc64ad commit r13-9719-g3599245e4972b2322f75c909bc81961e41bc64ad Author: Mark Mentovai Date: Tue Sep 24 16:11:14 2024 -0400 libgcc, Darwin: Drop the legacy library build for macOS >= 10.12 [PR116809]. From macOSX15 SDK, the unwinder no longer exports some of the symbols used in that library which (a) causes bootstrap fail and (b) means that the legacy library is no longer useful. No open branch of GCC emits references to this library - and any already -built code that depends on the symbols would need rework anyway. We have been asked to extend this back to the earliest OS vesion supported by the SDK (10.12). PR target/116809 libgcc/ChangeLog: * config.host: Build legacy libgcc_s.1 on hosts before macOS 10.12. * config/i386/t-darwin: Remove reference to legacy libgcc_s.1 * config/rs6000/t-darwin: Likewise. * config/t-darwin-libgccs1: New file. Signed-off-by: Iain Sandoe (cherry picked from commit d9cafa0c4f0a81304d9b95a78ccc8e9003c6d7a3) Diff: --- libgcc/config.host | 9 ++--- libgcc/config/i386/t-darwin | 3 --- libgcc/config/rs6000/t-darwin | 3 --- libgcc/config/t-darwin-libgccs1 | 3 +++ 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/libgcc/config.host b/libgcc/config.host index 8621de4f6387..9aa36bf22109 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -238,16 +238,19 @@ case ${host} in *-*-darwin1[89]* | *-*-darwin2* ) tmake_file="t-darwin-min-8 $tmake_file" ;; -*-*-darwin9* | *-*-darwin1[0-7]*) +*-*-darwin1[67]]*) tmake_file="t-darwin-min-5 $tmake_file" ;; +*-*-darwin9* | *-*-darwin1[0-5]*) + tmake_file="t-darwin-min-5 t-darwin-libgccs1 $tmake_file" + ;; *-*-darwin[4-8]*) - tmake_file="t-darwin-min-1 $tmake_file" + tmake_file="t-darwin-min-1 t-darwin-libgccs1 $tmake_file" ;; *) # Fall back to configuring for the oldest system known to work with # all archs and the current sources. - tmake_file="t-darwin-min-5 $tmake_file" + tmake_file="t-darwin-min-5 t-darwin-libgccs1 $tmake_file" echo "Warning: libgcc configured to support macOS 10.5" 1>&2 ;; esac diff --git a/libgcc/config/i386/t-darwin b/libgcc/config/i386/t-darwin index 4c18da1efbfd..c6b3acaaca28 100644 --- a/libgcc/config/i386/t-darwin +++ b/libgcc/config/i386/t-darwin @@ -4,6 +4,3 @@ LIB2FUNCS_EXCLUDE = _fixtfdi _fixunstfdi _floatditf _floatunditf # Extra symbols for this port. SHLIB_MAPFILES += $(srcdir)/config/i386/libgcc-darwin.ver - -# Build a legacy libgcc_s.1 -BUILD_LIBGCCS1 = YES diff --git a/libgcc/config/rs6000/t-darwin b/libgcc/config/rs6000/t-darwin index 183d0df92ce9..8b513bdb1d78 100644 --- a/libgcc/config/rs6000/t-darwin +++ b/libgcc/config/rs6000/t-darwin @@ -56,6 +56,3 @@ unwind-dw2_s.o: HOST_LIBGCC2_CFLAGS += -maltivec unwind-dw2.o: HOST_LIBGCC2_CFLAGS += -maltivec LIB2ADDEH += $(srcdir)/config/rs6000/darwin-fallback.c - -# Build a legacy libgcc_s.1 -BUILD_LIBGCCS1 = YES diff --git a/libgcc/config/t-darwin-libgccs1 b/libgcc/config/t-darwin-libgccs1 new file mode 100644 index ..b88b1a5bba8a --- /dev/null +++ b/libgcc/config/t-darwin-libgccs1 @@ -0,0 +1,3 @@ + +# Build a legacy libgcc_s.1 +BUILD_LIBGCCS1 = YES
[gcc r13-9721] Darwin: Pass -macos_version_min to the linker [PR119172].
https://gcc.gnu.org/g:d9cd8d7babe838b577c6ede08589a79bb3df7810 commit r13-9721-gd9cd8d7babe838b577c6ede08589a79bb3df7810 Author: Iain Sandoe Date: Sun Mar 9 09:24:34 2025 + Darwin: Pass -macos_version_min to the linker [PR119172]. For binaries to be notarised, the SDK version must be available. Since we do not, at present, parse this information we have been passing "0.0" to ld64. This now results in a warning and a fail to notarise. As a quick-fix, we can fall back to letting ld64 figure out the SDK version (which it does for -macos_version_min). TODO: Parse the SDKSetting.plist at some point. cherry-picked from 952e17223d3a9 and fc728cfd569e291a5 PR target/119172 gcc/ChangeLog: * config.in: Regenerate. * config/darwin.h (DARWIN_PLATFORM_ID): Add the option to use -macos_version_min where available. * configure: Regenerate. * configure.ac: Check for ld64 support of -macos_version_min. Co-authored-by: Andrew Pinski Signed-off-by: Iain Sandoe Signed-off-by: Andrew Pinski Diff: --- gcc/config.in | 6 ++ gcc/config/darwin.h | 13 + gcc/configure | 17 + gcc/configure.ac| 12 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/gcc/config.in b/gcc/config.in index 7a2295196bf0..712cae85e4ad 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -2214,6 +2214,12 @@ #endif +/* Define to 1 if ld64 supports '-macos_version_min'. */ +#ifndef USED_FOR_TARGET +#undef LD64_HAS_MACOS_VERSION_MIN +#endif + + /* Define to 1 if ld64 supports '-platform_version'. */ #ifndef USED_FOR_TARGET #undef LD64_HAS_PLATFORM_VERSION diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h index 5c6c38ddc63b..2ea034c06d8a 100644 --- a/gcc/config/darwin.h +++ b/gcc/config/darwin.h @@ -276,12 +276,17 @@ extern GTY(()) int darwin_ms_struct; #define DARWIN_RDYNAMIC "%{rdynamic:%nrdynamic is not supported}" #endif -#if LD64_HAS_PLATFORM_VERSION -#define DARWIN_PLATFORM_ID \ - "%{mmacosx-version-min=*: -platform_version macos %* 0.0} " +#if LD64_HAS_MACOS_VERSION_MIN +# define DARWIN_PLATFORM_ID \ + "%{mmacosx-version-min=*:-macos_version_min %*} " #else -#define DARWIN_PLATFORM_ID \ +# if LD64_HAS_PLATFORM_VERSION +# define DARWIN_PLATFORM_ID \ + "%{mmacosx-version-min=*: -platform_version macos %* 0.0} " +# else +# define DARWIN_PLATFORM_ID \ "%{mmacosx-version-min=*:-macosx_version_min %*} " +# endif #endif /* Code built with mdynamic-no-pic does not support PIE/PIC, so we disallow diff --git a/gcc/configure b/gcc/configure index 27585d03e7d0..b693850af46f 100755 --- a/gcc/configure +++ b/gcc/configure @@ -30674,6 +30674,7 @@ if test x"$ld64_flag" = x"yes"; then # Set defaults for possibly untestable items. gcc_cv_ld64_export_dynamic=0 gcc_cv_ld64_platform_version=0 + gcc_cv_ld64_macos_version_min=0 if test "$build" = "$host"; then darwin_try_test=1 @@ -30702,6 +30703,7 @@ $as_echo "$gcc_cv_ld64_major" >&6; } fi if test "$gcc_cv_ld64_major" -ge 512; then gcc_cv_ld64_platform_version=1 + gcc_cv_ld64_macos_version_min=1 fi elif test -x "$gcc_cv_ld" -a "$darwin_try_test" -eq 1; then # If the version was not specified, try to find it. @@ -30731,6 +30733,15 @@ $as_echo_n "checking linker for -platform_version support... " >&6; } fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_ld64_platform_version" >&5 $as_echo "$gcc_cv_ld64_platform_version" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking linker for -macos_version_min" >&5 +$as_echo_n "checking linker for -macos_version_min... " >&6; } +gcc_cv_ld64_macos_version_min=1 +if $gcc_cv_ld -macos_version_min 10.5 < /dev/null 2>&1 | grep 'unknown option' > /dev/null; then + gcc_cv_ld64_macos_version_min=0 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_ld64_macos_version_min" >&5 +$as_echo "$gcc_cv_ld64_macos_version_min" >&6; } fi if test x"${gcc_cv_ld64_version}" != x; then @@ -30752,6 +30763,12 @@ cat >>confdefs.h <<_ACEOF #define LD64_HAS_PLATFORM_VERSION $gcc_cv_ld64_platform_version _ACEOF + + +cat >>confdefs.h <<_ACEOF +#define LD64_HAS_MACOS_VERSION_MIN $gcc_cv_ld64_macos_version_min +_ACEOF + fi if test x"$dsymutil_flag" = x"yes"; then diff --git a/gcc/configure.ac b/gcc/configure.ac index 4fd323483d2f..0c5bd6548daf 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -6288,6 +6288,7 @@ if test x"$ld64_flag" = x"yes"; then # Set defaults for possibly untestable items. gcc_cv_ld64_export_dynamic=0 gcc_cv_ld64_platform_version=0 + gcc_cv_ld64_macos_version_min=0 if test "$build" = "$host"; then darwin_try_test=1 @@ -6314,6 +6315,7 @@ if test x"$ld64_flag" = x"yes"; then fi if test "$gcc_cv_ld64_major" -ge 512; then gcc_cv_ld64_platform_version=
[gcc r13-9723] configure, Darwin: Recognise new naming for Xcode ld.
https://gcc.gnu.org/g:d86ed0e17824b06425e7e4f58175c1a9a19f7344 commit r13-9723-gd86ed0e17824b06425e7e4f58175c1a9a19f7344 Author: Iain Sandoe Date: Tue Apr 15 14:02:21 2025 +0100 configure, Darwin: Recognise new naming for Xcode ld. The latest editions of XCode have altered the identify reported by 'ld -v' (again). This means that GCC configure no longer detects the version. Fixed by adding the new name to the set checked. gcc/ChangeLog: * configure: Regenerate. * configure.ac: Recognise PROJECT:ld-.nn.aa as an identifier for Darwin's static linker. Signed-off-by: Iain Sandoe (cherry picked from commit 7f56a8e8ad1c33d358e9e09fcbaf263c2caba1b9) Diff: --- gcc/configure| 7 --- gcc/configure.ac | 7 --- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/gcc/configure b/gcc/configure index b693850af46f..6cb58cd6ec60 100755 --- a/gcc/configure +++ b/gcc/configure @@ -3896,7 +3896,7 @@ if test x"${DEFAULT_LINKER+set}" = x"set"; then as_fn_error $? "cannot execute: $DEFAULT_LINKER: check --with-ld or env. var. DEFAULT_LINKER" "$LINENO" 5 elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep GNU > /dev/null; then gnu_ld_flag=yes - elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep ld64- > /dev/null; then + elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep 'PROJECT:ld\(64\)*-' > /dev/null; then ld64_flag=yes fi @@ -30710,8 +30710,9 @@ $as_echo "$gcc_cv_ld64_major" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking linker version" >&5 $as_echo_n "checking linker version... " >&6; } if test x"${gcc_cv_ld64_version}" = x; then - gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld' \ - | sed -e 's/.*ld64-//' -e 's/.*dyld-//'| awk '{print $1}'` + gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld|PROJECT:ld' \ + | sed -e 's/.*ld64-//' -e 's/.*dyld-//' -e 's/.*PROJECT:ld-//' \ + | awk '{print $1}'` fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_ld64_version" >&5 $as_echo "$gcc_cv_ld64_version" >&6; } diff --git a/gcc/configure.ac b/gcc/configure.ac index 0c5bd6548daf..8382b4e7b3fe 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -358,7 +358,7 @@ if test x"${DEFAULT_LINKER+set}" = x"set"; then AC_MSG_ERROR([cannot execute: $DEFAULT_LINKER: check --with-ld or env. var. DEFAULT_LINKER]) elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep GNU > /dev/null; then gnu_ld_flag=yes - elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep ld64- > /dev/null; then + elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep 'PROJECT:ld\(64\)*-' > /dev/null; then ld64_flag=yes fi AC_DEFINE_UNQUOTED(DEFAULT_LINKER,"$DEFAULT_LINKER", @@ -6321,8 +6321,9 @@ if test x"$ld64_flag" = x"yes"; then # If the version was not specified, try to find it. AC_MSG_CHECKING(linker version) if test x"${gcc_cv_ld64_version}" = x; then - gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld' \ - | sed -e 's/.*ld64-//' -e 's/.*dyld-//'| awk '{print $1}'` + gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld|PROJECT:ld' \ + | sed -e 's/.*ld64-//' -e 's/.*dyld-//' -e 's/.*PROJECT:ld-//' \ + | awk '{print $1}'` fi AC_MSG_RESULT($gcc_cv_ld64_version)
[gcc r13-9720] fixincludes: adjust stdio fix for macOS 15 headers
https://gcc.gnu.org/g:19ddfa147ea4bcb16d4af2d1ee8ce4ea273a2392 commit r13-9720-g19ddfa147ea4bcb16d4af2d1ee8ce4ea273a2392 Author: Francois-Xavier Coudert Date: Thu Jun 27 18:55:22 2024 +0200 fixincludes: adjust stdio fix for macOS 15 headers fixincludes/ChangeLog: * fixincl.x: Regenerate. * inclhack.def (apple_local_stdio_fn_deprecation): Also apply to _stdio.h. (cherry picked from commit 1dc143181550573c9c902fb7a3b495e9b409d0b0) Diff: --- fixincludes/fixincl.x| 6 +++--- fixincludes/inclhack.def | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fixincludes/fixincl.x b/fixincludes/fixincl.x index 5e955a288d25..ab86aa383dbe 100644 --- a/fixincludes/fixincl.x +++ b/fixincludes/fixincl.x @@ -2,11 +2,11 @@ * * DO NOT EDIT THIS FILE (fixincl.x) * - * It has been AutoGen-ed December 11, 2023 at 02:50:24 PM by AutoGen 5.18.16 + * It has been AutoGen-ed May 27, 2025 at 10:40:20 AM by AutoGen 5.18.16 * From the definitionsinclhack.def * and the template file fixincl */ -/* DO NOT SVN-MERGE THIS FILE, EITHER Mon Dec 11 14:50:24 CET 2023 +/* DO NOT SVN-MERGE THIS FILE, EITHER Tue May 27 10:40:20 BST 2025 * * You must regenerate it. Use the ./genfixes script. * @@ -2619,7 +2619,7 @@ tSCC zApple_Local_Stdio_Fn_DeprecationName[] = * File name selection pattern */ tSCC zApple_Local_Stdio_Fn_DeprecationList[] = - "stdio.h\0"; + "stdio.h\0_stdio.h\0"; /* * Machine/OS name selection pattern */ diff --git a/fixincludes/inclhack.def b/fixincludes/inclhack.def index a2970fcc0a93..f916a3bebd44 100644 --- a/fixincludes/inclhack.def +++ b/fixincludes/inclhack.def @@ -1273,6 +1273,7 @@ fix = { hackname = apple_local_stdio_fn_deprecation; mach = "*-*-*darwin2*"; files = stdio.h; +files = _stdio.h; select= "__deprecated_msg([^\n]*)$"; c_fix = format; c_fix_arg = "#if defined(__APPLE_LOCAL_DEPRECATIONS)\n"
[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression zero_sized_15
https://gcc.gnu.org/g:b2f4942f061f5c6a006d04c8af7fea2e245ae08e commit b2f4942f061f5c6a006d04c8af7fea2e245ae08e Author: Mikael Morin Date: Tue May 27 22:55:49 2025 +0200 Correction régression zero_sized_15 Diff: --- gcc/fortran/trans-array.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index dc4c0399cb2f..0109f135cfbe 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -1057,7 +1057,7 @@ gfc_trans_create_temp_array (stmtblock_t * pre, stmtblock_t * post, gfc_ss * ss, to[0] = NULL_TREE; } type = -gfc_get_array_type_bounds (eltype, total_dim, 0, from, to, 1, +gfc_get_array_type_bounds (eltype, total_dim, 0, from, to, PACKED_STATIC, GFC_ARRAY_UNKNOWN, true, ss->info->expr ? ss->info->expr->ts.type : BT_UNKNOWN); @@ -7629,6 +7629,8 @@ gfc_conv_expr_descriptor (gfc_se *se, gfc_expr *expr) : NULL), loop.dimen); + loop.temp_ss->info->expr = expr; + se->string_length = loop.temp_ss->info->string_length; gcc_assert (loop.temp_ss->dimen == loop.dimen); gfc_add_ss_to_loop (&loop, loop.temp_ss);
[gcc r16-929] Do not recompute profile when entry block has afdo count of 0
https://gcc.gnu.org/g:a4dc4001999a8b504cde468618af12c9b870589b commit r16-929-ga4dc4001999a8b504cde468618af12c9b870589b Author: Jan Hubicka Date: Wed May 28 14:18:39 2025 +0200 Do not recompute profile when entry block has afdo count of 0 With normal profile feedback checking entry block count to be non-zero is quite reliable check for presence of non-0 profile in the body since the function body can only be executed if the entry block was executed. With autofdo this is not true, since the entry block may just execute too few times to be recorded. As a consequence we currently drop AFDO profile quite often. This patch fixes it. gcc/ChangeLog: * predict.cc (rebuild_frequencies): look harder for presence of profile feedback. Diff: --- gcc/predict.cc | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gcc/predict.cc b/gcc/predict.cc index 16dd9b01112b..872f54d957a5 100644 --- a/gcc/predict.cc +++ b/gcc/predict.cc @@ -,11 +,14 @@ rebuild_frequencies (void) bool inconsistency_found = false; bool uninitialized_probablity_found = false; bool uninitialized_count_found = false; + bool feedback_found = false; cfun->cfg->count_max = profile_count::uninitialized (); FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) { cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); + if (bb->count.nonzero_p () && bb->count.quality () >= AFDO) + feedback_found = true; /* Uninitialized count may be result of inlining or an omision in an optimization pass. */ if (!bb->count.initialized_p ()) @@ -4516,8 +4519,7 @@ rebuild_frequencies (void) Propagating from probabilities would make profile look consistent, but because probablities after code duplication may not be representative for a given run, we would only propagate the error further. */ - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ().nonzero_p () - && !uninitialized_count_found) + if (feedback_found && !uninitialized_count_found) { if (dump_file) fprintf (dump_file,
[gcc r16-930] Handle auto-fdo 0 more carefully
https://gcc.gnu.org/g:17f7b6250628c31182fd4f71c9ecdeca9568ffd1 commit r16-930-g17f7b6250628c31182fd4f71c9ecdeca9568ffd1 Author: Jan Hubicka Date: Wed May 28 14:26:11 2025 +0200 Handle auto-fdo 0 more carefully This patch fixes few other places where auto-fdo 0 should be be treated as actual 0 (i.e. probably never executed). Overall I think we should end up combining static profile with auto-fdo profile where auto-fdo has 0 counts, but that is something that should be benchmarked and first it is neccessary to get something benchmarkeable out of auto-FDO. gcc/ChangeLog: * cgraph.cc (cgraph_edge::maybe_hot_p): For auto-fdo turn 0 to non-zero. * ipa-cp.cc (cs_interesting_for_ipcp_p): Do not trust auto-fdo 0. * profile-count.cc (profile_count::adjust_for_ipa_scaling): Likewise. (profile_count::from_gcov_type): Fix formating. Diff: --- gcc/cgraph.cc| 9 - gcc/ipa-cp.cc| 8 ++-- gcc/profile-count.cc | 30 +- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc index ac0f2519361b..3f95ca1fa85c 100644 --- a/gcc/cgraph.cc +++ b/gcc/cgraph.cc @@ -3019,7 +3019,14 @@ cgraph_edge::maybe_hot_p (sreal scale) /* Use IPA count and if it s not available appy local heuristics. */ if (c.initialized_p ()) -return maybe_hot_count_p (NULL, c * scale); +{ + /* A special case; AFDO zero means that function may quite possibly +be executed few times per execution. If scale is large, we still +want to consider the call hot. */ + if (c.quality () == AFDO) + c = c.force_nonzero (); + return maybe_hot_count_p (NULL, c * scale); +} if (!count.initialized_p ()) return true; cgraph_node *where = caller->inlined_to ? caller->inlined_to : caller; diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc index f06ac46dfffb..73cf9040fad7 100644 --- a/gcc/ipa-cp.cc +++ b/gcc/ipa-cp.cc @@ -544,8 +544,12 @@ cs_interesting_for_ipcp_p (cgraph_edge *e) if (e->count.ipa ().nonzero_p ()) return true; /* If local (possibly guseed or adjusted 0 profile) claims edge is - not executed, do not propagate. */ - if (e->count.initialized_p () && !e->count.nonzero_p ()) + not executed, do not propagate. + Do not trust AFDO since branch needs to be executed multiple + time to count while we want to propagate even call called + once during the train run if callee is important. */ + if (e->count.initialized_p () && !e->count.nonzero_p () + && e->count.quality () != AFDO) return false; /* If we have zero IPA profile, still consider edge for cloning in case we do partial training. */ diff --git a/gcc/profile-count.cc b/gcc/profile-count.cc index 374f06f4c083..2d9c778b3758 100644 --- a/gcc/profile-count.cc +++ b/gcc/profile-count.cc @@ -364,8 +364,12 @@ profile_count::adjust_for_ipa_scaling (profile_count *num, /* Scaling to zero is always zero. */ if (*num == zero ()) return; - /* If den is non-zero we are safe. */ - if (den->force_nonzero () == *den) + /* If den is non-zero we are safe. + However take care of zeros in AFDO profiles since + they simply means that no useful samples were collected. + Called function still may contain important loop. */ + if (den->force_nonzero () == *den + && num->quality () != AFDO) return; /* Force both to non-zero so we do not push profiles to 0 when both num == 0 and den == 0. */ @@ -417,17 +421,17 @@ profile_count::combine_with_ipa_count_within (profile_count ipa, profile_count profile_count::from_gcov_type (gcov_type v, profile_quality quality) - { -profile_count ret; -gcc_checking_assert (v >= 0); -if (dump_file && v >= (gcov_type)max_count) - fprintf (dump_file, - "Capping gcov count %" PRId64 " to max_count %" PRId64 "\n", - (int64_t) v, (int64_t) max_count); -ret.m_val = MIN (v, (gcov_type)max_count); -ret.m_quality = quality; -return ret; - } +{ + profile_count ret; + gcc_checking_assert (v >= 0); + if (dump_file && v >= (gcov_type)max_count) +fprintf (dump_file, +"Capping gcov count %" PRId64 " to max_count %" PRId64 "\n", +(int64_t) v, (int64_t) max_count); + ret.m_val = MIN (v, (gcov_type)max_count); + ret.m_quality = quality; + return ret; +} /* COUNT1 times event happens with *THIS probability, COUNT2 times OTHER happens with COUNT2 probability. Return probability that either *THIS or
[gcc r16-933] RISC-V: Add test cases for avg_floor vaadd implementation
https://gcc.gnu.org/g:d4a2f9ba6ece32fb8500f10204fcf409aa26fbfb commit r16-933-gd4a2f9ba6ece32fb8500f10204fcf409aa26fbfb Author: Pan Li Date: Tue May 27 10:27:01 2025 +0800 RISC-V: Add test cases for avg_floor vaadd implementation Add asm and run testcase for avg_floor vaadd implementation. The below test suites are passed for this patch series. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/avg.h: New test. * gcc.target/riscv/rvv/autovec/avg_data.h: New test. * gcc.target/riscv/rvv/autovec/avg_floor-1-i16-from-i32.c: New test. * gcc.target/riscv/rvv/autovec/avg_floor-1-i16-from-i64.c: New test. * gcc.target/riscv/rvv/autovec/avg_floor-1-i32-from-i64.c: New test. * gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i16.c: New test. * gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i32.c: New test. * gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i64.c: New test. * gcc.target/riscv/rvv/autovec/avg_floor-run-1-i16-from-i32.c: New test. * gcc.target/riscv/rvv/autovec/avg_floor-run-1-i16-from-i64.c: New test. * gcc.target/riscv/rvv/autovec/avg_floor-run-1-i32-from-i64.c: New test. * gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i16.c: New test. * gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i32.c: New test. * gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i64.c: New test. * gcc.target/riscv/rvv/autovec/avg_run.h: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/rvv/autovec/avg.h | 23 +++ .../gcc.target/riscv/rvv/autovec/avg_data.h| 185 + .../riscv/rvv/autovec/avg_floor-1-i16-from-i32.c | 12 ++ .../riscv/rvv/autovec/avg_floor-1-i16-from-i64.c | 12 ++ .../riscv/rvv/autovec/avg_floor-1-i32-from-i64.c | 12 ++ .../riscv/rvv/autovec/avg_floor-1-i8-from-i16.c| 12 ++ .../riscv/rvv/autovec/avg_floor-1-i8-from-i32.c| 12 ++ .../riscv/rvv/autovec/avg_floor-1-i8-from-i64.c| 12 ++ .../rvv/autovec/avg_floor-run-1-i16-from-i32.c | 16 ++ .../rvv/autovec/avg_floor-run-1-i16-from-i64.c | 16 ++ .../rvv/autovec/avg_floor-run-1-i32-from-i64.c | 16 ++ .../rvv/autovec/avg_floor-run-1-i8-from-i16.c | 16 ++ .../rvv/autovec/avg_floor-run-1-i8-from-i32.c | 16 ++ .../rvv/autovec/avg_floor-run-1-i8-from-i64.c | 16 ++ .../gcc.target/riscv/rvv/autovec/avg_run.h | 26 +++ 15 files changed, 402 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/avg.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/avg.h new file mode 100644 index ..746c635ae577 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/avg.h @@ -0,0 +1,23 @@ +#ifndef HAVE_DEFINED_AVG_H +#define HAVE_DEFINED_AVG_H + +#include + +#define DEF_AVG_0(NT, WT, NAME) \ +__attribute__((noinline)) \ +void\ +test_##NAME##_##WT##_##NT##_0(NT * restrict a, NT * restrict b, \ + NT * restrict out, int n) \ +{ \ + for (int i = 0; i < n; i++) { \ +out[i] = (NT)(((WT)a[i] + (WT)b[i]) >> 1); \ + } \ +} +#define DEF_AVG_0_WRAP(NT, WT, NAME) DEF_AVG_0(NT, WT, NAME) + +#define RUN_AVG_0(NT, WT, NAME, a, b, out, n) \ + test_##NAME##_##WT##_##NT##_0(a, b, out, n) +#define RUN_AVG_0_WRAP(NT, WT, NAME, a, b, out, n) \ + RUN_AVG_0(NT, WT, NAME, a, b, out, n) + +#endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_data.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_data.h new file mode 100644 index ..cbeed147a565 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_data.h @@ -0,0 +1,185 @@ +#ifndef HAVE_DEFINED_AVG_DATA_H +#define HAVE_DEFINED_AVG_DATA_H + +#define N 16 + +#define TEST_AVG_DATA(T, NAME) test_##T##_##NAME##_data +#define TEST_AVG_DATA_WRAP(T, NAME) TEST_AVG_DATA(T, NAME) + +int8_t TEST_AVG_DATA(int8_t, avg_floor)[][3][N] = +{ + { +{ + 0, 0, 0, 0, + 1, 1, 1, 1, + -1, -1, -1, -1, + 8, 8, 8, 8, +}, +{ + 0, 0, 0, 0, + 0, 0, 0, 0, + -2, -2, -2, -2, + 1, 1, 1, 1, +}, +{ + 0, 0, 0, 0, + 0, 0, 0, 0, + -2, -2, -2, -2, + 4, 4, 4, 4, +}, + }, + { +{ + 127, 127, 127, 127, + 127, 127, 127, 127, + -128, -128, -128, -128, + -128, -128, -128, -128, +}, +{ + 126, 126, 126, 126, +-2, -2, -2, -2, + 127, 127, 127, 127, + -127, -127, -127, -127, +}, +
[gcc r16-938] fortran: add constant input support for trig functions with half-revolutions
https://gcc.gnu.org/g:e8fdd55ec907496ff3c80fed55d8da3ddbdc1a2b commit r16-938-ge8fdd55ec907496ff3c80fed55d8da3ddbdc1a2b Author: Yuao Ma Date: Wed May 28 23:13:45 2025 +0800 fortran: add constant input support for trig functions with half-revolutions This patch introduces constant input support for trigonometric functions, including those involving half-revolutions. Both valid and invalid inputs have been thoroughly tested, as have mpfr versions greater than or equal to 4.2 and less than 4.2. Inspired by Steve's previous work, this patch also fixes subtle bugs revealed by newly added test cases. If this patch is merged, I plan to work on middle-end optimization support for previously added GCC built-ins and libgfortran intrinsics. PR fortran/113152 gcc/fortran/ChangeLog: * gfortran.h (enum gfc_isym_id): Add new enum. * intrinsic.cc (add_functions): Register new intrinsics. Changing the call from gfc_resolve_trigd{,2} to gfc_resolve_trig{,2}. * intrinsic.h (gfc_simplify_acospi, gfc_simplify_asinpi, gfc_simplify_asinpi, gfc_simplify_atanpi, gfc_simplify_atan2pi, gfc_simplify_cospi, gfc_simplify_sinpi, gfc_simplify_tanpi): New. (gfc_resolve_trig): Rename from gfc_resolve_trigd. (gfc_resolve_trig2): Rename from gfc_resolve_trigd2. * iresolve.cc (gfc_resolve_trig): Rename from gfc_resolve_trigd. (gfc_resolve_trig2): Rename from gfc_resolve_trigd2. * mathbuiltins.def: Add 7 new math builtins and re-align. * simplify.cc (gfc_simplify_acos, gfc_simplify_asin, gfc_simplify_acosd, gfc_simplify_asind): Revise error message. (gfc_simplify_acospi, gfc_simplify_asinpi, gfc_simplify_asinpi, gfc_simplify_atanpi, gfc_simplify_atan2pi, gfc_simplify_cospi, gfc_simplify_sinpi, gfc_simplify_tanpi): New. gcc/testsuite/ChangeLog: * gfortran.dg/dec_math_3.f90: Test invalid input. * gfortran.dg/dec_math_5.f90: Test valid output. * gfortran.dg/dec_math_6.f90: New test. Signed-off-by: Yuao Ma Co-authored-by: Steven G. Kargl Diff: --- gcc/fortran/gfortran.h | 8 + gcc/fortran/intrinsic.cc | 93 --- gcc/fortran/intrinsic.h | 11 +- gcc/fortran/iresolve.cc | 8 +- gcc/fortran/mathbuiltins.def | 63 gcc/fortran/simplify.cc | 262 ++- gcc/testsuite/gfortran.dg/dec_math_3.f90 | 15 +- gcc/testsuite/gfortran.dg/dec_math_5.f90 | 63 gcc/testsuite/gfortran.dg/dec_math_6.f90 | 12 ++ 9 files changed, 468 insertions(+), 67 deletions(-) diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 4740c3676d98..e461aa68470d 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -721,6 +721,14 @@ enum gfc_isym_id remains compatible. */ GFC_ISYM_SU_KIND, GFC_ISYM_UINT, + + GFC_ISYM_ACOSPI, + GFC_ISYM_ASINPI, + GFC_ISYM_ATANPI, + GFC_ISYM_ATAN2PI, + GFC_ISYM_COSPI, + GFC_ISYM_SINPI, + GFC_ISYM_TANPI, }; enum init_local_logical diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc index 908e1dae128b..9e07627503de 100644 --- a/gcc/fortran/intrinsic.cc +++ b/gcc/fortran/intrinsic.cc @@ -3452,37 +3452,37 @@ add_functions (void) add_sym_1 ("acosd", GFC_ISYM_ACOSD, CLASS_ELEMENTAL, ACTUAL_YES, BT_REAL, dr, GFC_STD_F2023, -gfc_check_fn_r, gfc_simplify_acosd, gfc_resolve_trigd, +gfc_check_fn_r, gfc_simplify_acosd, gfc_resolve_trig, x, BT_REAL, dr, REQUIRED); make_generic ("acosd", GFC_ISYM_ACOSD, GFC_STD_F2023); add_sym_1 ("dacosd", GFC_ISYM_ACOSD, CLASS_ELEMENTAL, ACTUAL_YES, BT_REAL, dd, GFC_STD_GNU, -gfc_check_fn_d, gfc_simplify_acosd, gfc_resolve_trigd, +gfc_check_fn_d, gfc_simplify_acosd, gfc_resolve_trig, x, BT_REAL, dd, REQUIRED); add_sym_1 ("asind", GFC_ISYM_ASIND, CLASS_ELEMENTAL, ACTUAL_YES, BT_REAL, dr, GFC_STD_F2023, -gfc_check_fn_r, gfc_simplify_asind, gfc_resolve_trigd, +gfc_check_fn_r, gfc_simplify_asind, gfc_resolve_trig, x, BT_REAL, dr, REQUIRED); make_generic ("asind", GFC_ISYM_ASIND, GFC_STD_F2023); add_sym_1 ("dasind", GFC_ISYM_ASIND, CLASS_ELEMENTAL, ACTUAL_YES, BT_REAL, dd, GFC_STD_GNU, -gfc_check_fn_d, gfc_simplify_asind, gfc_resolve_trigd, +gfc_check_fn_d, gfc_simplify_asind, gfc_resolve_trig, x, BT_REAL, dd, REQUIRED); add_sym_1 ("atand", GFC_ISYM_ATAND, CLASS_ELEMENTAL, ACTUAL_YES, BT_REAL, dr, GFC_STD_F2023, -gfc_check_fn_r, gfc_simplify_atand, gfc_resolve_trigd, +gfc_check_fn_r, gfc_simplify_atand, gfc_reso
[gcc r16-939] Fortran: Adjust handling of optional comma in FORMAT.
https://gcc.gnu.org/g:e2bf0b3910de7e65363435f0a7fa606e2448a677 commit r16-939-ge2bf0b3910de7e65363435f0a7fa606e2448a677 Author: Jerry DeLisle Date: Wed May 28 07:56:12 2025 -0700 Fortran: Adjust handling of optional comma in FORMAT. This change adjusts the error messages for optional commas in format strings to give a warning at compile time unless -std=legacy is used. This is more consistant with the runtime library. A missing comma separator should not be encouraged as it is non-standard fortran. PR fortran/119586 gcc/fortran/ChangeLog: * io.cc: Set missing comma error checks to STD_STD_LEGACY. gcc/testsuite/ChangeLog: * gfortran.dg/comma_format_extension_1.f: Update dg-options to "-std=legacy". * gfortran.dg/comma_format_extension_3.f: Likewise. * gfortran.dg/continuation_13.f90: Likewise. Diff: --- gcc/fortran/io.cc| 6 -- gcc/testsuite/gfortran.dg/comma_format_extension_1.f | 2 +- gcc/testsuite/gfortran.dg/comma_format_extension_3.f | 2 +- gcc/testsuite/gfortran.dg/continuation_13.f90| 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/gcc/fortran/io.cc b/gcc/fortran/io.cc index b5c9d3337496..7466d8fe0945 100644 --- a/gcc/fortran/io.cc +++ b/gcc/fortran/io.cc @@ -1228,7 +1228,8 @@ between_desc: default: if (mode != MODE_FORMAT) format_locus.nextc += format_string_pos - 1; - if (!gfc_notify_std (GFC_STD_GNU, "Missing comma at %L", &format_locus)) + if (!gfc_notify_std (GFC_STD_LEGACY, + "Missing comma in FORMAT string at %L", &format_locus)) return false; /* If we do not actually return a failure, we need to unwind this before the next round. */ @@ -1290,7 +1291,8 @@ extension_optional_comma: default: if (mode != MODE_FORMAT) format_locus.nextc += format_string_pos; - if (!gfc_notify_std (GFC_STD_GNU, "Missing comma at %L", &format_locus)) + if (!gfc_notify_std (GFC_STD_LEGACY, + "Missing comma in FORMAT string at %L", &format_locus)) return false; /* If we do not actually return a failure, we need to unwind this before the next round. */ diff --git a/gcc/testsuite/gfortran.dg/comma_format_extension_1.f b/gcc/testsuite/gfortran.dg/comma_format_extension_1.f index a3a5a98f155f..c4b43f01bc3a 100644 --- a/gcc/testsuite/gfortran.dg/comma_format_extension_1.f +++ b/gcc/testsuite/gfortran.dg/comma_format_extension_1.f @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-options "" } +! { dg-options "-std=legacy" } ! test that the extension for a missing comma is accepted subroutine mysub diff --git a/gcc/testsuite/gfortran.dg/comma_format_extension_3.f b/gcc/testsuite/gfortran.dg/comma_format_extension_3.f index 0b002249b469..9d974d6b90c2 100644 --- a/gcc/testsuite/gfortran.dg/comma_format_extension_3.f +++ b/gcc/testsuite/gfortran.dg/comma_format_extension_3.f @@ -3,7 +3,7 @@ ! did do the correct thing at runtime. ! Note the missing , before i1 in the format. ! { dg-do run } -! { dg-options "" } +! { dg-options "-std=legacy" } character*12 c write (c,100) 0, 1 diff --git a/gcc/testsuite/gfortran.dg/continuation_13.f90 b/gcc/testsuite/gfortran.dg/continuation_13.f90 index 9799b59e86ef..475c89639980 100644 --- a/gcc/testsuite/gfortran.dg/continuation_13.f90 +++ b/gcc/testsuite/gfortran.dg/continuation_13.f90 @@ -1,5 +1,5 @@ ! { dg-do run } -! { dg-options "-std=gnu" } +! { dg-options "-std=legacy" } ! PR64506 character(25) :: astring
[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression elemental_dependency_4
https://gcc.gnu.org/g:51dd902bd248b4d2866eaf37460137e219a3b95a commit 51dd902bd248b4d2866eaf37460137e219a3b95a Author: Mikael Morin Date: Wed May 28 10:41:11 2025 +0200 Correction régression elemental_dependency_4 Diff: --- gcc/testsuite/gfortran.dg/elemental_dependency_4.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gfortran.dg/elemental_dependency_4.f90 b/gcc/testsuite/gfortran.dg/elemental_dependency_4.f90 index 58d90c019284..748f5f31e67c 100644 --- a/gcc/testsuite/gfortran.dg/elemental_dependency_4.f90 +++ b/gcc/testsuite/gfortran.dg/elemental_dependency_4.f90 @@ -61,7 +61,7 @@ PROGRAM Main ! This should not create a temporary array = Charles(array) If (any (array .ne. index)) STOP 2 -! { dg-final { scan-tree-dump-times "array\\\[\[^\\\]\]*\\\]\\s*=\\s*charles\\s*\\(&array\\\[\[^\\\]\]*\\\]\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times {array\[[^\]]*\](?:{lb: . sz: 4})?\s*=\s*charles\s*\(&array\[[^\]]*\](?:{lb: . sz: 4})?\);} 1 "original" } } ! Check use association of the function works correctly. arraym = Bill(index,arraym) @@ -75,7 +75,7 @@ PROGRAM Main array = (/ (i+0.0, i = 1,5) /) ! This should not create a temporary array = index + Henry2(0) - array -! { dg-final { scan-tree-dump-times "array\\\[\[^\\\]\]*\\\]\\s*=\\s*\\(\\(real\\(kind=4\\)\\)\\s*index\\\[\[^\\\]\]*\\\]\\s*\\+\\s*D.\\d*\\)\\s*-\\s*array\\\[\[^\\\]\]*\\\];" 1 "original" } } +! { dg-final { scan-tree-dump-times {array\[[^\]]*\](?:{lb: . sz: 4})?\s*=\s*\(\(real\(kind=4\)\)\s*index\[[^\]]*\](?:{lb: . sz: 4})?\s*\+\s*D.\d*\)\s*-\s*array\[[^\]]*\](?:{lb: . sz: 4})?;} 1 "original" } } if (any (array .ne. 15.0)) STOP 5 arraym = (/ (i+0.0, i = 1,5) /)
[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression bind-c-contiguous-2
https://gcc.gnu.org/g:374a2f35ed729903fd38b1c1ba16ebd64bd2ce0f commit 374a2f35ed729903fd38b1c1ba16ebd64bd2ce0f Author: Mikael Morin Date: Wed May 28 11:05:30 2025 +0200 Correction régression bind-c-contiguous-2 Diff: --- gcc/testsuite/gfortran.dg/bind-c-contiguous-2.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gfortran.dg/bind-c-contiguous-2.f90 b/gcc/testsuite/gfortran.dg/bind-c-contiguous-2.f90 index 243c4a57cba4..9f8dc9005df4 100644 --- a/gcc/testsuite/gfortran.dg/bind-c-contiguous-2.f90 +++ b/gcc/testsuite/gfortran.dg/bind-c-contiguous-2.f90 @@ -67,7 +67,7 @@ end ! { dg-final { scan-tree-dump-times "yy->data = \\(void \\* restrict\\) _yy->base_addr;" 1 "original" } } ! { dg-final { scan-tree-dump-times {__builtin_memcpy \(_yy->base_addr \+ shift.[0-9]+, \(void \*\) yy->data \+ yy->dtype.elem_len \* arrayidx.[0-9]+, (?:NON_LVALUE_EXPR <)?yy->dtype.elem_len>?\);} 1 "original" } } -! { dg-final { scan-tree-dump-times "zz = \\(character\\(kind=1\\)\\\[0:\\\]\\\[1:zz.\[0-9\]+\\\] \\* restrict\\) _zz->base_addr;" 1 "original" } } +! { dg-final { scan-tree-dump-times {zz = \(character\(kind=1\)\[.:\]\[1:zz.[0-9]+\] \* restrict\) _zz->base_addr;} 1 "original" } } ! { dg-final { scan-tree-dump-times "__builtin_memcpy \\(\\(void \\*\\) zz \\+ _zz->elem_len \\* arrayidx.\[0-9\]+, _zz->base_addr \\+ shift.\[0-9\]+, _zz->elem_len\\);" 1 "original" } } ! { dg-final { scan-tree-dump-times "__builtin_memcpy \\(_zz->base_addr \\+ shift.\[0-9\]+, \\(void \\*\\) zz \\+ _zz->elem_len \\* arrayidx.\[0-9\]+, _zz->elem_len\\);" 1 "original" } } @@ -78,5 +78,5 @@ end ! { dg-final { scan-tree-dump-times "aa->data = \\(void \\* restrict\\) _aa->base_addr;" 1 "original" } } ! { dg-final { scan-tree-dump-times {__builtin_memcpy \(\(void \*\) bb->data \+ bb->dtype.elem_len \* arrayidx.[0-9]+, _bb->base_addr \+ shift.[0-9]+, (?:NON_LVALUE_EXPR <)?bb->dtype.elem_len>?\);} 1 "original" } } ! { dg-final { scan-tree-dump-times "bb->data = \\(void \\* restrict\\) _bb->base_addr;" 1 "original" } } -! { dg-final { scan-tree-dump-times "cc = \\(character\\(kind=1\\)\\\[0:\\\]\\\[1:cc.\[0-9\]+\\\] \\* restrict\\) _cc->base_addr;" 1 "original" } } +! { dg-final { scan-tree-dump-times {cc = \(character\(kind=1\)\[.:\]\[1:cc.[0-9]+\] \* restrict\) _cc->base_addr;} 1 "original" } } ! { dg-final { scan-tree-dump-times "__builtin_memcpy \\(\\(void \\*\\) cc \\+ _cc->elem_len \\* arrayidx.\[0-9\]+, _cc->base_addr \\+ shift.\[0-9\]+, _cc->elem_len\\);" 1 "original" } }
[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression class_dummy_11
https://gcc.gnu.org/g:b9e044f7916d2f59766bd7fdfa7d9b0e110f78ca commit b9e044f7916d2f59766bd7fdfa7d9b0e110f78ca Author: Mikael Morin Date: Wed May 28 12:28:20 2025 +0200 Correction régression class_dummy_11 Diff: --- gcc/fortran/trans-descriptor.cc | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/trans-descriptor.cc b/gcc/fortran/trans-descriptor.cc index 600c1bd43a5a..c10d1d9c6f95 100644 --- a/gcc/fortran/trans-descriptor.cc +++ b/gcc/fortran/trans-descriptor.cc @@ -2613,11 +2613,14 @@ gfc_copy_sequence_descriptor (stmtblock_t &block, tree lhs_desc, tree rhs_desc, gfc_conv_descriptor_lbound_set (&block, arr, gfc_index_zero_node, gfc_index_zero_node); tree size = gfc_conv_descriptor_size (rhs_desc, rhs_rank); + tree size_m1 = fold_build2_loc (input_location, MINUS_EXPR, + gfc_array_index_type, size, + gfc_index_one_node); + gfc_conv_descriptor_ubound_set (&block, arr, gfc_index_zero_node, size_m1); tree spacing0 = gfc_conv_descriptor_spacing_get (rhs_desc, gfc_index_zero_node); size = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type, size, spacing0); - gfc_conv_descriptor_ubound_set (&block, arr, gfc_index_zero_node, size); gfc_conv_descriptor_spacing_set ( &block, arr, gfc_index_zero_node, spacing0); for (int i = 1; i < lhs_rank; i++) {
[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction partielle régressions array_reference_3
https://gcc.gnu.org/g:db1c7c1f5610d8f49ac4edfc863594e6acaaec14 commit db1c7c1f5610d8f49ac4edfc863594e6acaaec14 Author: Mikael Morin Date: Wed May 28 17:41:37 2025 +0200 Correction partielle régressions array_reference_3 Diff: --- gcc/fortran/trans-expr.cc | 6 +- gcc/fortran/trans-types.cc | 15 +++ gcc/testsuite/gfortran.dg/array_reference_3.f90 | 24 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index 9845f7fe71d6..c7c53649bcfd 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -12578,7 +12578,11 @@ gfc_trans_assignment_1 (gfc_expr * expr1, gfc_expr * expr2, bool init_flag, loop.reverse[n] = GFC_ENABLE_REVERSE; /* Resolve any data dependencies in the statement. */ if (may_alias) - gfc_conv_resolve_dependencies (&loop, lss, rss); + { + gfc_conv_resolve_dependencies (&loop, lss, rss); + if (loop.temp_ss) + loop.temp_ss->info->expr = expr2; + } /* Setup the scalarizing loops. */ gfc_conv_loop_setup (&loop, &expr2->where); diff --git a/gcc/fortran/trans-types.cc b/gcc/fortran/trans-types.cc index 19e83a8c3b4b..52ed1ade9623 100644 --- a/gcc/fortran/trans-types.cc +++ b/gcc/fortran/trans-types.cc @@ -1627,9 +1627,15 @@ gfc_build_array_type (tree type, gfc_array_spec * as, ubound[n] = gfc_conv_array_bound (as->upper[n]); } - if (as->type == AS_ASSUMED_SHAPE) -akind = contiguous ? GFC_ARRAY_ASSUMED_SHAPE_CONT - : GFC_ARRAY_ASSUMED_SHAPE; + gfc_packed packed = PACKED_NO; + if (contiguous) +packed = PACKED_FULL; + else if (akind == GFC_ARRAY_ALLOCATABLE + && type_type != BT_CLASS + && type_type != BT_UNKNOWN + && type_type != BT_CHARACTER) +packed = PACKED_STATIC; + else if (as->type == AS_ASSUMED_RANK) { if (akind == GFC_ARRAY_ALLOCATABLE) @@ -1641,9 +1647,10 @@ gfc_build_array_type (tree type, gfc_array_spec * as, akind = contiguous ? GFC_ARRAY_ASSUMED_RANK_CONT : GFC_ARRAY_ASSUMED_RANK; } + return gfc_get_array_type_bounds (type, as->rank == -1 ? GFC_MAX_DIMENSIONS : as->rank, - corank, lbound, ubound, 0, akind, + corank, lbound, ubound, packed, akind, restricted, type_type); } diff --git a/gcc/testsuite/gfortran.dg/array_reference_3.f90 b/gcc/testsuite/gfortran.dg/array_reference_3.f90 index 85fa3317d985..e1e2f0f9afa1 100644 --- a/gcc/testsuite/gfortran.dg/array_reference_3.f90 +++ b/gcc/testsuite/gfortran.dg/array_reference_3.f90 @@ -35,7 +35,7 @@ contains call cases(x) if (any(x /= (/ 0, 10, 0 /))) stop 10 ! Assumed shape array are referenced with pointer arithmetic. -! { dg-final { scan-tree-dump-times "\\*\\(\\(integer\\(kind=4\\) \\*\\) assumed_shape_x.\\d+ \\+ \\(sizetype\\) \\(\\(stride.\\d+ \\* 2 \\+ offset.\\d+\\) \\* 4\\)\\) = 10;" 1 "original" } } +! { dg-final { scan-tree-dump-times {\*\(\(integer\(kind=4\) \*\) assumed_shape_x.\d+ \+ \(sizetype\) \(spacing.\d+ \* 2 \+ offset.\d+\)\) = 10;} 1 "original" } } end subroutine check_assumed_shape_elem subroutine casss(assumed_shape_y) integer :: assumed_shape_y(:) @@ -46,7 +46,7 @@ contains call casss(y) if (any(y /= 11)) stop 11 ! Assumed shape array are referenced with pointer arithmetic. -! { dg-final { scan-tree-dump-times "\\*\\(\\(integer\\(kind=4\\) \\*\\) assumed_shape_y.\\d+ \\+ \\(sizetype\\) \\(\\(S.\\d+ \\* D.\\d+ \\+ D.\\d+\\) \\* 4\\)\\) = 11;" 1 "original" } } +! { dg-final { scan-tree-dump-times {\*\(\(integer\(kind=4\) \*\) assumed_shape_y.\d+ \+ \(sizetype\) \(\(?S.\d+(?: \+ -?\d+\))? \* D.\d+\)\) = 11;} 1 "original" } } end subroutine check_assumed_shape_scalarized subroutine check_descriptor_dim integer, allocatable :: descriptor(:) @@ -87,7 +87,7 @@ contains ptr_x(4) = 16 if (any(ptr_x /= (/ 0, 0, 0, 16, 0, 0, 0 /))) stop 16 ! pointers are referenced with pointer arithmetic. -! { dg-final { scan-tree-dump-times "\\*\\(integer\\(kind=4\\) \\*\\) \\(ptr_x\\.data \\+ \\(sizetype\\) \\(\\(ptr_x\\.offset \\+ ptr_x\\.dim\\\[0\\\]\\.stride \\* 4\\) \\* ptr_x\\.span\\)\\) = 16;" 1 "original" } } +! { dg-final { scan-tree-dump-times {\*\(\(integer\(kind=4\) \*\) ptr_x\.data \+ \(sizetype\) \(ptr_x\.offset \+ ptr_x\.dim\[0\]\.spacing \* 4\)\) = 16;} 1 "original" } } end subroutine check_ptr_elem subroutine check_ptr_scalarized integer, target :: y(8) @@ -97,7 +97,7 @@ contains ptr_y = 17 if (any(ptr_y /= 17)) stop 17 ! pointers are referenced with pointer arithmetic. -! { dg-final { scan-tree-dump-times "\\*\\(\\(integer\\(kind=4\\) \\*\\) D.\\d+ \\+ \\(si
[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression no_arg_check
https://gcc.gnu.org/g:979f87a17a57eb468e544f910149bd56f9b6def9 commit 979f87a17a57eb468e544f910149bd56f9b6def9 Author: Mikael Morin Date: Wed May 28 12:56:28 2025 +0200 Correction régression no_arg_check Diff: --- gcc/testsuite/gfortran.dg/no_arg_check_2.f90 | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gfortran.dg/no_arg_check_2.f90 b/gcc/testsuite/gfortran.dg/no_arg_check_2.f90 index 3570b9719ebb..89652ea3eeba 100644 --- a/gcc/testsuite/gfortran.dg/no_arg_check_2.f90 +++ b/gcc/testsuite/gfortran.dg/no_arg_check_2.f90 @@ -129,22 +129,22 @@ end ! { dg-final { scan-tree-dump-times "sub_scalar .&scalar_int," 1 "original" } } ! { dg-final { scan-tree-dump-times "sub_scalar .&scalar_t1," 1 "original" } } -! { dg-final { scan-tree-dump-times "sub_scalar .&array_int.1.," 1 "original" } } +! { dg-final { scan-tree-dump-times {sub_scalar \(&array_int\[1\](?:{lb: 0 sz: 4})?,} 1 "original" } } ! { dg-final { scan-tree-dump-times "sub_scalar .&scalar_t1," 1 "original" } } ! { dg-final { scan-tree-dump-times "sub_scalar .&\\(.\\(real.kind=4..0:. . restrict\\) array_real_alloc.data" 1 "original" } } -! { dg-final { scan-tree-dump-times "sub_scalar .\\(character.kind=1..1:1. .\\) .array_char_ptr.data" 1 "original" } } +! { dg-final { scan-tree-dump-times {sub_scalar \(\(character\(kind=1\)\[1:1\] \*\) \(?array_char_ptr\.data \+} 1 "original" } } ! { dg-final { scan-tree-dump-times "sub_scalar .&\\(.\\(struct t2.0:. . restrict\\) array_t2_alloc.data" 1 "original" } } -! { dg-final { scan-tree-dump-times "sub_scalar .\\(struct t3 .\\) .array_t3_ptr.data" 1 "original" } } +! { dg-final { scan-tree-dump-times {sub_scalar \(\(struct t3 \*\) \(?array_t3_ptr\.data \+} 1 "original" } } ! { dg-final { scan-tree-dump-times "sub_scalar .\\(struct t1 .\\) array_class_t1_alloc._data.data" 1 "original" } } -! { dg-final { scan-tree-dump-times "sub_scalar .\\(struct t1 .\\) \\(array_class_t1_ptr._data.dat" 1 "original" } } +! { dg-final { scan-tree-dump-times {sub_scalar \(\(struct t1 \*\) \(?array_class_t1_ptr\._data\.data \+} 1 "original" } } ! { dg-final { scan-tree-dump-times "sub_array_assumed \\(D" 4 "original" } } ! { dg-final { scan-tree-dump-times " = _gfortran_internal_pack \\(&parm" 1 "original" } } ! { dg-final { scan-tree-dump-times "sub_array_assumed \\(&array_int\\)" 1 "original" } } ! { dg-final { scan-tree-dump-times "sub_array_assumed \\(\\(real\\(kind=4\\).0:. . restrict\\) array_real_alloc.data" 1 "original" } } ! { dg-final { scan-tree-dump-times " = _gfortran_internal_pack \\(&array_char_ptr\\);" 1 "original" } } -! { dg-final { scan-tree-dump-times "\\.data = \\(void .\\) &array_t1.0.;" 1 "original" } } +! { dg-final { scan-tree-dump-times {\.data = \(void \*\) &array_t1\[0\](?:{lb: 0 sz: 4})?;} 1 "original" } } ! { dg-final { scan-tree-dump-times "sub_array_assumed \\(\\(struct t1.0:. .\\) parm" 1 "original" } } ! { dg-final { scan-tree-dump-times "sub_array_assumed \\(\\(struct t2.0:. . restrict\\) array_t2_alloc.data\\);" 1 "original" } } ! { dg-final { scan-tree-dump-times "sub_array_assumed \\(\\(struct t1.0:. . restrict\\) array_class_t1_alloc._data.data\\);" 1 "original" } }
[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression intrinsic_spread_1
https://gcc.gnu.org/g:87e3baca4b69a856c3857702d4b83394eb6286c0 commit 87e3baca4b69a856c3857702d4b83394eb6286c0 Author: Mikael Morin Date: Wed May 28 15:34:53 2025 +0200 Correction régression intrinsic_spread_1 Diff: --- libgfortran/generated/spread_c10.c | 5 ++--- libgfortran/generated/spread_c16.c | 5 ++--- libgfortran/generated/spread_c17.c | 5 ++--- libgfortran/generated/spread_c4.c | 5 ++--- libgfortran/generated/spread_c8.c | 5 ++--- libgfortran/generated/spread_i1.c | 5 ++--- libgfortran/generated/spread_i16.c | 5 ++--- libgfortran/generated/spread_i2.c | 5 ++--- libgfortran/generated/spread_i4.c | 5 ++--- libgfortran/generated/spread_i8.c | 5 ++--- libgfortran/generated/spread_r10.c | 5 ++--- libgfortran/generated/spread_r16.c | 5 ++--- libgfortran/generated/spread_r17.c | 5 ++--- libgfortran/generated/spread_r4.c | 5 ++--- libgfortran/generated/spread_r8.c | 5 ++--- libgfortran/intrinsics/spread_generic.c | 5 ++--- libgfortran/m4/spread.m4| 5 ++--- 17 files changed, 34 insertions(+), 51 deletions(-) diff --git a/libgfortran/generated/spread_c10.c b/libgfortran/generated/spread_c10.c index 35a46a088796..d0efc10f5afe 100644 --- a/libgfortran/generated/spread_c10.c +++ b/libgfortran/generated/spread_c10.c @@ -247,9 +247,8 @@ spread_scalar_c10 (gfc_array_c10 *ret, const GFC_COMPLEX_10 *source, } else { - if (ncopies - 1 > (GFC_DESCRIPTOR_EXTENT(ret,0) - 1) - / GFC_DESCRIPTOR_SPACING(ret,0)) - runtime_error ("dim too large in spread()"); + if (ncopies != GFC_DESCRIPTOR_EXTENT(ret,0)) + runtime_error ("ncopies does not match result extent in spread()"); } dest = ret->base_addr; diff --git a/libgfortran/generated/spread_c16.c b/libgfortran/generated/spread_c16.c index a5537600fb00..a2293d1ee9df 100644 --- a/libgfortran/generated/spread_c16.c +++ b/libgfortran/generated/spread_c16.c @@ -247,9 +247,8 @@ spread_scalar_c16 (gfc_array_c16 *ret, const GFC_COMPLEX_16 *source, } else { - if (ncopies - 1 > (GFC_DESCRIPTOR_EXTENT(ret,0) - 1) - / GFC_DESCRIPTOR_SPACING(ret,0)) - runtime_error ("dim too large in spread()"); + if (ncopies != GFC_DESCRIPTOR_EXTENT(ret,0)) + runtime_error ("ncopies does not match result extent in spread()"); } dest = ret->base_addr; diff --git a/libgfortran/generated/spread_c17.c b/libgfortran/generated/spread_c17.c index ecf99d32abc6..d4fbc68ea123 100644 --- a/libgfortran/generated/spread_c17.c +++ b/libgfortran/generated/spread_c17.c @@ -247,9 +247,8 @@ spread_scalar_c17 (gfc_array_c17 *ret, const GFC_COMPLEX_17 *source, } else { - if (ncopies - 1 > (GFC_DESCRIPTOR_EXTENT(ret,0) - 1) - / GFC_DESCRIPTOR_SPACING(ret,0)) - runtime_error ("dim too large in spread()"); + if (ncopies != GFC_DESCRIPTOR_EXTENT(ret,0)) + runtime_error ("ncopies does not match result extent in spread()"); } dest = ret->base_addr; diff --git a/libgfortran/generated/spread_c4.c b/libgfortran/generated/spread_c4.c index dcf621dad033..41e5109097ea 100644 --- a/libgfortran/generated/spread_c4.c +++ b/libgfortran/generated/spread_c4.c @@ -247,9 +247,8 @@ spread_scalar_c4 (gfc_array_c4 *ret, const GFC_COMPLEX_4 *source, } else { - if (ncopies - 1 > (GFC_DESCRIPTOR_EXTENT(ret,0) - 1) - / GFC_DESCRIPTOR_SPACING(ret,0)) - runtime_error ("dim too large in spread()"); + if (ncopies != GFC_DESCRIPTOR_EXTENT(ret,0)) + runtime_error ("ncopies does not match result extent in spread()"); } dest = ret->base_addr; diff --git a/libgfortran/generated/spread_c8.c b/libgfortran/generated/spread_c8.c index f9304954d823..28e1b3efae43 100644 --- a/libgfortran/generated/spread_c8.c +++ b/libgfortran/generated/spread_c8.c @@ -247,9 +247,8 @@ spread_scalar_c8 (gfc_array_c8 *ret, const GFC_COMPLEX_8 *source, } else { - if (ncopies - 1 > (GFC_DESCRIPTOR_EXTENT(ret,0) - 1) - / GFC_DESCRIPTOR_SPACING(ret,0)) - runtime_error ("dim too large in spread()"); + if (ncopies != GFC_DESCRIPTOR_EXTENT(ret,0)) + runtime_error ("ncopies does not match result extent in spread()"); } dest = ret->base_addr; diff --git a/libgfortran/generated/spread_i1.c b/libgfortran/generated/spread_i1.c index 3652320a0650..a6740e474781 100644 --- a/libgfortran/generated/spread_i1.c +++ b/libgfortran/generated/spread_i1.c @@ -247,9 +247,8 @@ spread_scalar_i1 (gfc_array_i1 *ret, const GFC_INTEGER_1 *source, } else { - if (ncopies - 1 > (GFC_DESCRIPTOR_EXTENT(ret,0) - 1) - / GFC_DESCRIPTOR_SPACING(ret,0)) - runtime_error ("dim too large in spread()"); + if (ncopies != GFC_DESCRIPTOR_EXTENT(ret,0)) + runtime_error ("ncopies does not mat
[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régressions array_reference_3
https://gcc.gnu.org/g:f58af2eecc319030c685e56c31c14877e15e3b16 commit f58af2eecc319030c685e56c31c14877e15e3b16 Author: Mikael Morin Date: Wed May 28 18:56:02 2025 +0200 Correction régressions array_reference_3 Diff: --- gcc/fortran/trans-array.cc | 82 + gcc/testsuite/gfortran.dg/array_reference_3.f90 | 4 +- 2 files changed, 59 insertions(+), 27 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 0109f135cfbe..127cc0cd0951 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -3590,6 +3590,59 @@ build_array_ref (tree array, tree offset, bool use_array_ref) } +static bool +array_section_parent_ref_is_contiguous (gfc_expr *expr, gfc_array_ref *ar) +{ + if (expr == nullptr || ar == nullptr) +return false; + + gfc_symbol *root_sym = expr->symtree->n.sym; + bt last_type = root_sym->ts.type; + bool last_is_allocatable = root_sym->attr.allocatable; + bool last_is_contiguous = root_sym->attr.contiguous; + for (gfc_ref *ref = expr->ref; ref; ref = ref->next) +{ + if (ref->type == REF_ARRAY && &ref->u.ar == ar) + break; + + if (ref->type != REF_COMPONENT) + continue; + + if (last_type == BT_CLASS + && strcmp (ref->u.c.component->name, "_data") == 0) + continue; + + last_type = ref->u.c.component->ts.type; + last_is_allocatable = ref->u.c.component->attr.allocatable; + last_is_contiguous = ref->u.c.component->attr.contiguous; +} + + if (!(last_type == BT_CLASS || last_type == BT_CHARACTER) + && (last_is_allocatable || last_is_contiguous)) +return true; + + return false; +} + + +static bool +array_section_parent_ref_is_contiguous (gfc_expr *expr, gfc_ref *array_ref) +{ + if (array_ref == nullptr) +return false; + + return array_section_parent_ref_is_contiguous (expr, &array_ref->u.ar); +} + + +static bool +array_section_parent_ref_is_contiguous (gfc_ss *ss) +{ + return array_section_parent_ref_is_contiguous (ss->info->expr, +ss->info->data.array.ref); +} + + tree build_array_ref_dim (gfc_ss *ss, tree index, tree lbound, tree spacing, bool tmp_array = false) @@ -3603,6 +3656,8 @@ build_array_ref_dim (gfc_ss *ss, tree index, tree lbound, tree spacing, || ss_type == GFC_SS_CONSTRUCTOR || ss_type == GFC_SS_INTRINSIC || tmp_array +|| (ss_type == GFC_SS_SECTION +&& array_section_parent_ref_is_contiguous (ss)) || non_negative_strides_array_p (info->descriptor); return gfc_build_array_ref (base, index, non_negative_stride, lbound, spacing); @@ -3667,9 +3722,6 @@ add_to_offset (tree *cst_offset, tree *offset, tree t) bool array_ref_safe_p (gfc_expr *expr, gfc_array_ref *ar, tree array, tree *elt_size) { - if (!non_negative_strides_array_p (array)) -return false; - STRIP_NOPS (array); if (TREE_CODE (array) == COMPONENT_REF) { @@ -3682,29 +3734,9 @@ array_ref_safe_p (gfc_expr *expr, gfc_array_ref *ar, tree array, tree *elt_size) return false; } - gfc_symbol *root_sym = expr->symtree->n.sym; - bt last_type = root_sym->ts.type; - bool last_is_allocatable = root_sym->attr.allocatable; - for (gfc_ref *ref = expr->ref; ref; ref = ref->next) -{ - if (ref->type == REF_ARRAY && &ref->u.ar == ar) - break; - - if (ref->type != REF_COMPONENT) - continue; - - if (last_type == BT_CLASS - && strcmp (ref->u.c.component->name, "_data") == 0) - continue; - - last_type = ref->u.c.component->ts.type; - last_is_allocatable = ref->u.c.component->attr.allocatable; -} - - if (!(last_type == BT_CLASS || last_type == BT_CHARACTER) - && last_is_allocatable) + if (array_section_parent_ref_is_contiguous (expr, ar)) ; - else if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (array))) + else if (!non_negative_strides_array_p (array)) return false; tree elt_type = gfc_get_element_type (TREE_TYPE (array)); diff --git a/gcc/testsuite/gfortran.dg/array_reference_3.f90 b/gcc/testsuite/gfortran.dg/array_reference_3.f90 index e1e2f0f9afa1..a7a5c03561a9 100644 --- a/gcc/testsuite/gfortran.dg/array_reference_3.f90 +++ b/gcc/testsuite/gfortran.dg/array_reference_3.f90 @@ -178,7 +178,7 @@ contains call casces(x) if (any(x /= (/ 0, 0, 0, 24, 0, 0, 0, 0 /))) stop 24 ! Contiguous assumed shape arrays are referenced with array indexing. -! { dg-final { scan-tree-dump-times "\\(\\*assumed_shape_cont_x.\\d+\\)\\\[stride.\\d+ \\* 4 \\+ offset.\\d+\\\] = 24;" 1 "original" } } +! { dg-final { scan-tree-dump-times {\(\*assumed_shape_cont_x.\d+\)\[offset.\d+ /\[ex\] 4 \+ spacing\.\d+\](?:{lb: 0 sz: 4})? = 24;} 1 "original" } } end subroutine check_assumed
[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression bind_c_char_9
https://gcc.gnu.org/g:3eed929a366719417099e95832157737c0aae732 commit 3eed929a366719417099e95832157737c0aae732 Author: Mikael Morin Date: Wed May 28 12:36:43 2025 +0200 Correction régression bind_c_char_9 Diff: --- gcc/testsuite/gfortran.dg/bind_c_char_9.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gfortran.dg/bind_c_char_9.f90 b/gcc/testsuite/gfortran.dg/bind_c_char_9.f90 index 64d73409df37..1c1f69527190 100644 --- a/gcc/testsuite/gfortran.dg/bind_c_char_9.f90 +++ b/gcc/testsuite/gfortran.dg/bind_c_char_9.f90 @@ -201,7 +201,7 @@ end ! { dg-final { scan-tree-dump-not "dtype" "original" } } ! { dg-final { scan-tree-dump-times "void s1 \\(character\\(kind=1\\)\\\[1:1\\\] & restrict x1\\)" 1 "original" } } ! { dg-final { scan-tree-dump-not "void s2 " "original" } } -! { dg-final { scan-tree-dump-times "void az1 \\(character\\(kind=1\\)\\\[0:\\\]\\\[1:1\\\] \\* restrict x1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times {void az1 \(character\(kind=1\)\[.:\]\[1:1\] \* restrict x1\)} 1 "original" } } ! { dg-final { scan-tree-dump-not "void az2 " "original" } } -! { dg-final { scan-tree-dump-times "void ae1 \\(character\\(kind=1\\)\\\[6\\\]\\\[1:1\\\] \\* restrict x1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times {void ae1 \(character\(kind=1\)\[(?:1:)?6\]\[1:1\] \* restrict x1\)} 1 "original" } } ! { dg-final { scan-tree-dump-not "void ae2 " "original" } }
[gcc r16-931] RISC-V: Leverage vaadd.vv for signed standard name avg_floor
https://gcc.gnu.org/g:f4456ea9e955b971573cdfebd1d10797fd30ad3a commit r16-931-gf4456ea9e955b971573cdfebd1d10797fd30ad3a Author: Pan Li Date: Tue May 27 09:53:56 2025 +0800 RISC-V: Leverage vaadd.vv for signed standard name avg_floor The signed avg_floor totally match the sematics of fixed point rvv insn vaadd, within round down. Thus, leverage it directly to implement the avf_floor. The spec of RVV is somehow not that clear about the difference between the float point and fixed point for the rounding that discard least-significant information. For float point which is not two's complement, the "discard least-significant information" indicates truncation round. For example as below: * 3.5 -> 3 * -2.3 -> -2 For fixed point which is two's complement, the "discard least-significant information" indicates round down. For example as below: * 3.5 -> 3 * -2.3 -> -3 And the vaadd takes the round down which is totally matching the sematics of the avf_floor. The below test suites are passed for this patch series. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/autovec.md (avg3_floor): Add insn expand to leverage vaadd directly. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/autovec.md | 19 ++- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 9e51e3ce6a30..a54f552a80ce 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2491,19 +2491,12 @@ (sign_extend:VWEXTI (match_operand: 2 "register_operand"))] "TARGET_VECTOR" -{ - /* First emit a widening addition. */ - rtx tmp1 = gen_reg_rtx (mode); - rtx ops1[] = {tmp1, operands[1], operands[2]}; - insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, mode); - riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1); - - /* Then a narrowing shift. */ - rtx ops2[] = {operands[0], tmp1, const1_rtx}; - icode = code_for_pred_narrow_scalar (ASHIFTRT, mode); - riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2); - DONE; -}) + { +insn_code icode = code_for_pred (UNSPEC_VAADD, mode); +riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, operands); +DONE; + } +) (define_expand "avg3_ceil" [(set (match_operand: 0 "register_operand")
[gcc r16-934] c++: modules and using-directives
https://gcc.gnu.org/g:06b9c58c70a4fee7cc680aaeb5790aaadbb9fc77 commit r16-934-g06b9c58c70a4fee7cc680aaeb5790aaadbb9fc77 Author: Jason Merrill Date: Wed Nov 20 23:46:54 2024 +0100 c++: modules and using-directives We weren't representing 'using namespace' at all in modules, which broke some of the literals tests. This only represents exported using-declarations; others should be irrelevant to importers, as any name lookup in the imported module that would have cared about them was done while compiling the header unit. I experimented with various approaches to representing them; this patch handles them in read/write_namespaces, after the namespaces themselves. I spent a while pondering how to deal with the depset code in order to connect them, but then realized it would be simpler to refer to them based on their index in the array of namespaces. Any using-directives from an indirect import are ignored, so in an export import, any imported using-directives are exported again. gcc/cp/ChangeLog: * module.cc (module_state::write_namespaces): Write using-directives. (module_state::read_namespaces): And read them. * name-lookup.cc (add_using_namespace): Add overload. Build a USING_DECL for modules. (name_lookup::search_usings, name_lookup::queue_usings) (using_directives_contain_std_p): Strip the USING_DECL. * name-lookup.h: Declare it. * parser.cc (cp_parser_import_declaration): Set MK_EXPORTING for export import. gcc/testsuite/ChangeLog: * g++.dg/modules/namespace-8_a.C: New test. * g++.dg/modules/namespace-8_b.C: New test. * g++.dg/modules/namespace-9_a.C: New test. * g++.dg/modules/namespace-9_b.C: New test. * g++.dg/modules/namespace-10_a.C: New test. * g++.dg/modules/namespace-10_b.C: New test. * g++.dg/modules/namespace-10_c.C: New test. * g++.dg/modules/namespace-11_a.C: New test. * g++.dg/modules/namespace-11_b.C: New test. * g++.dg/modules/namespace-11_c.C: New test. Diff: --- gcc/cp/name-lookup.h | 1 + gcc/cp/module.cc | 71 +++ gcc/cp/name-lookup.cc | 24 +++-- gcc/cp/parser.cc | 6 +++ gcc/testsuite/g++.dg/modules/namespace-10_a.C | 11 + gcc/testsuite/g++.dg/modules/namespace-10_b.C | 9 gcc/testsuite/g++.dg/modules/namespace-10_c.C | 6 +++ gcc/testsuite/g++.dg/modules/namespace-11_a.C | 11 + gcc/testsuite/g++.dg/modules/namespace-11_b.C | 9 gcc/testsuite/g++.dg/modules/namespace-11_c.C | 6 +++ gcc/testsuite/g++.dg/modules/namespace-8_a.C | 12 + gcc/testsuite/g++.dg/modules/namespace-8_b.C | 8 +++ gcc/testsuite/g++.dg/modules/namespace-9_a.C | 11 + gcc/testsuite/g++.dg/modules/namespace-9_b.C | 6 +++ 14 files changed, 188 insertions(+), 3 deletions(-) diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h index f1596c60c7a1..4216a515afad 100644 --- a/gcc/cp/name-lookup.h +++ b/gcc/cp/name-lookup.h @@ -462,6 +462,7 @@ extern cxx_binding *outer_binding (tree, cxx_binding *, bool); extern void cp_emit_debug_info_for_using (tree, tree); extern void finish_nonmember_using_decl (tree scope, tree name); +extern void add_using_namespace (tree, tree); extern void finish_using_directive (tree target, tree attribs); void push_local_extern_decl_alias (tree decl); extern tree pushdecl (tree, bool hiding = false); diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index 17c040d26b06..ad2acaf559fb 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -16865,11 +16865,15 @@ module_state::write_namespaces (elf_out *to, vec spaces, bytes_out sec (to); sec.begin (); + hash_map ns_map; + for (unsigned ix = 0; ix != num; ix++) { depset *b = spaces[ix]; tree ns = b->get_entity (); + ns_map.put (ns, ix); + /* This could be an anonymous namespace even for a named module, since we can still emit no-linkage decls. */ gcc_checking_assert (TREE_CODE (ns) == NAMESPACE_DECL); @@ -16911,6 +16915,31 @@ module_state::write_namespaces (elf_out *to, vec spaces, } } + /* Now write exported using-directives, as a sequence of 1-origin indices in + the spaces array (not entity indices): First the using namespace, then the + used namespaces. And then a zero terminating the list. :: is + represented as index -1. */ + auto emit_one_ns = [&](unsigned ix, tree ns) { +for (auto udir: NAMESPACE_LEVEL (ns)->using_directives) + { + if (TREE_CODE (udir) != USING_DECL || !DECL_MODULE_EXPORT_P (udir)) + continue; + tree ns2 = USING_DECL_DECLS (udir); + dump() && dump ("Writing using-direct
[gcc r16-935] libgomp.fortran/metadirective-1.f90: Expect 'error:' for nvptx compile [PR118694]
https://gcc.gnu.org/g:5b2e7afb135aa10e17e51b891d4b2c85ee541ade commit r16-935-g5b2e7afb135aa10e17e51b891d4b2c85ee541ade Author: Tobias Burnus Date: Wed May 28 15:14:14 2025 +0200 libgomp.fortran/metadirective-1.f90: Expect 'error:' for nvptx compile [PR118694] This should have been part of commit r16-838-gb3d07ec7ac2ccd or r16-883-g5d6ed6d604ff94 - all showing the same issue: '!$omp target' followed by a metadirective with 'teams'; if the metadirective cannot be early resolved, a diagnostic error is shown about using directives between 'target' and 'teams'. While the message is misleading, the problem is that the host invokes 'target' differently when 'teams' is present; in this case, host fallback + amdgcn offload require the no-teams case, nvptx offload the teams case such that it only can be resolved at runtime. Mark the error as 'dg-bogus + xfail' to silence the FAIL, when nvptx offloading is compiled for. (If not, the metadirective can be resolved early during compilation.) libgomp/ChangeLog: PR middle-end/118694 * testsuite/libgomp.fortran/metadirective-1.f90: xfail when compiling (also) for nvptx offloading as an error is then expected. Diff: --- libgomp/testsuite/libgomp.fortran/metadirective-1.f90 | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 index 7b3e09f7c2a5..d6f4d5bbde69 100644 --- a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 @@ -1,4 +1,5 @@ -! { dg-do run } +! { dg-do run { target { ! offload_target_nvptx } } } +! { dg-do compile { target offload_target_nvptx } } program test implicit none @@ -33,6 +34,10 @@ program test contains subroutine f (x, y, z) integer :: x(N), y(N), z(N) +! The following fails as on the host the target side cannot be +! resolved - and the 'teams' or not status affects how 'target' +! is called. -> See PR118694, esp. comment 9. +! Note also the dg-do compile above for offload_target_nvptx !$omp target map (to: x, y) map(from: z) block @@ -43,6 +48,7 @@ contains z(i) = x(i) * y(i) enddo end block +! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */ end subroutine subroutine g (x, y, z) integer :: x(N), y(N), z(N) @@ -56,6 +62,7 @@ contains z(i) = x(i) * y(i) enddo end block +! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */ !$omp end target end subroutine end program
[gcc r16-940] Fortran: gfc_simplify_{cospi, sinpi} - fix for MPFR < 4.2.0
https://gcc.gnu.org/g:a64a7f0a6cf8afcbad4bd6c9e1b0f1d4f2d5b958 commit r16-940-ga64a7f0a6cf8afcbad4bd6c9e1b0f1d4f2d5b958 Author: Tobias Burnus Date: Wed May 28 22:35:03 2025 +0200 Fortran: gfc_simplify_{cospi,sinpi} - fix for MPFR < 4.2.0 gcc/fortran/ChangeLog: PR fortran/113152 * simplify.cc (gfc_simplify_cospi, gfc_simplify_sinpi): Avoid using mpfr_fmod_ui in the MPFR < 4.2.0 version. Diff: --- gcc/fortran/simplify.cc | 18 ++ 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/gcc/fortran/simplify.cc b/gcc/fortran/simplify.cc index 2ceb479faf55..b25cd2c2388b 100644 --- a/gcc/fortran/simplify.cc +++ b/gcc/fortran/simplify.cc @@ -2288,10 +2288,10 @@ gfc_simplify_cospi (gfc_expr *x) #if MPFR_VERSION >= MPFR_VERSION_NUM(4, 2, 0) mpfr_cospi (result->value.real, x->value.real, GFC_RND_MODE); #else - mpfr_t cs, n, r; + mpfr_t cs, n, r, two; int s; - mpfr_inits2 (2 * mpfr_get_prec (x->value.real), cs, n, r, NULL); + mpfr_inits2 (2 * mpfr_get_prec (x->value.real), cs, n, r, two, NULL); mpfr_abs (r, x->value.real, GFC_RND_MODE); mpfr_modf (n, r, r, GFC_RND_MODE); @@ -2302,7 +2302,8 @@ gfc_simplify_cospi (gfc_expr *x) return result; } - mpfr_fmod_ui (cs, n, 2, GFC_RND_MODE); + mpfr_set_ui (two, 2, GFC_RND_MODE); + mpfr_fmod (cs, n, two, GFC_RND_MODE); s = mpfr_cmp_ui (cs, 0) == 0 ? 1 : -1; mpfr_const_pi (cs, GFC_RND_MODE); @@ -2310,7 +2311,7 @@ gfc_simplify_cospi (gfc_expr *x) mpfr_cos (cs, cs, GFC_RND_MODE); mpfr_mul_si (result->value.real, cs, s, GFC_RND_MODE); - mpfr_clears (cs, n, r, NULL); + mpfr_clears (cs, n, r, two, NULL); #endif return range_check (result, "COSPI"); @@ -2329,10 +2330,10 @@ gfc_simplify_sinpi (gfc_expr *x) #if MPFR_VERSION >= MPFR_VERSION_NUM(4, 2, 0) mpfr_sinpi (result->value.real, x->value.real, GFC_RND_MODE); #else - mpfr_t sn, n, r; + mpfr_t sn, n, r, two; int s; - mpfr_inits2 (2 * mpfr_get_prec (x->value.real), sn, n, r, NULL); + mpfr_inits2 (2 * mpfr_get_prec (x->value.real), sn, n, r, two, NULL); mpfr_abs (r, x->value.real, GFC_RND_MODE); mpfr_modf (n, r, r, GFC_RND_MODE); @@ -2343,7 +2344,8 @@ gfc_simplify_sinpi (gfc_expr *x) return result; } - mpfr_fmod_ui (sn, n, 2, GFC_RND_MODE); + mpfr_set_ui (two, 2, GFC_RND_MODE); + mpfr_fmod (sn, n, two, GFC_RND_MODE); s = mpfr_cmp_si (x->value.real, 0) < 0 ? -1 : 1; s *= mpfr_cmp_ui (sn, 0) == 0 ? 1 : -1; @@ -2352,7 +2354,7 @@ gfc_simplify_sinpi (gfc_expr *x) mpfr_sin (sn, sn, GFC_RND_MODE); mpfr_mul_si (result->value.real, sn, s, GFC_RND_MODE); - mpfr_clears (sn, n, r, NULL); + mpfr_clears (sn, n, r, two, NULL); #endif return range_check (result, "SINPI");
[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression transfer_class_3
https://gcc.gnu.org/g:39fbf4e71a6356ca96e704613d037a902b1da2f0 commit 39fbf4e71a6356ca96e704613d037a902b1da2f0 Author: Mikael Morin Date: Wed May 28 22:54:16 2025 +0200 Correction régression transfer_class_3 Diff: --- gcc/fortran/trans-types.cc | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/trans-types.cc b/gcc/fortran/trans-types.cc index 52ed1ade9623..a2cafaf1a8ef 100644 --- a/gcc/fortran/trans-types.cc +++ b/gcc/fortran/trans-types.cc @@ -3286,6 +3286,11 @@ gfc_get_derived_type (gfc_symbol * derived, int codimen) && c->ts.u.derived->attr.unlimited_polymorphic && field_type == ptr_type_node) field_type = char_type_node; + + bt type_type = derived->attr.is_class +&& strcmp (c->name, "_data") == 0 +? BT_CLASS : c->ts.type; + /* Pointers to arrays aren't actually pointer types. The descriptors are separate, but the data is common. Every array pointer in a coarray derived type needs to provide space @@ -3296,7 +3301,7 @@ gfc_get_derived_type (gfc_symbol * derived, int codimen) field_type, c->as, akind, !c->attr.target && !c->attr.pointer, c->attr.contiguous, c->attr.codimension || c->attr.pointer ? codimen : 0, - c->ts.type + type_type ); } else
[gcc(refs/users/meissner/heads/work206-bugs)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
https://gcc.gnu.org/g:caa5d021b3b198397b94086f2ddf58c7d7485237 commit caa5d021b3b198397b94086f2ddf58c7d7485237 Author: Michael Meissner Date: Wed May 28 17:16:56 2025 -0400 Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. In bug PR target/118541 on power9, power10, and power11 systems, for the function: extern double __ieee754_acos (double); double __acospi (double x) { double ret = __ieee754_acos (x) / 3.14; return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; } GCC currently generates the following code: Power9 Power10 and Power11 == === bl __ieee754_acos bl __ieee754_acos@notoc nop plfd 0,.LC0@pcrel addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 addi 1,1,32 addi 1,1,32 lfd 0,.LC2@toc@l(9) ld 0,16(1) addis 9,2,.LC0@toc@ha fdiv 0,1,0 ld 0,16(1) mtlr 0 lfd 12,.LC0@toc@l(9)xscmpgtdp 1,0,12 fdiv 0,1,0 xxsel 1,0,12,1 mtlr 0 blr xscmpgtdp 1,0,12 xxsel 1,0,12,1 blr This is because ifcvt.c optimizes the conditional floating point move to use the XSCMPGTDP instruction. However, the XSCMPGTDP instruction traps if one of the arguments is a signaling NaN. This patch disables generating XSCMP{EQ,GT,GE}{DP,QP} instructions unless -ffinite-math-only is in effect so that we do not get a trap. 2025-05-28 Michael Meissner gcc/ PR target/118541 * config/rs6000/rs6000.cc (have_compare_and_set_mask): Don't do compare and set mask operations unless -ffinite-math-only. * config/rs6000/rs6000.md (movcc_p9): Disable generating XSCMP{EQ,GT,GE}{DP,QP} unless -ffinite-math-only is in effect. (movcc_invert_p9): Likewise. (fpmask, SFDF iterator): Likewise. (xxsel, SFDF iterator): Likewise. (movcc, IEEE128 iterator): Likewise. (movcc_p10): Likewise. (movcc_invert_p10): Likewise. (fpmask, IEEE128 iterator): Likewise. (xxsel, IEEE128 iterator): Likewise. gcc/testsuite/ PR target/118541 * gcc.target/powerpc/float128-cmove.c: Change optimization flag to -Ofast instead of -O2. * gcc.target/powerpc/float128-minmax-3.: Likewise. * gcc.target/powerpc/p9-minmax-2.c: Delete test, the code is no longer valid unless NaNs are not handled. * gcc.target/powerpc/pr118541-1.c: New test. * gcc.target/powerpc/pr118541-2.c: Likewise. Diff: --- gcc/config/rs6000/rs6000.cc| 8 ++- gcc/config/rs6000/rs6000.md| 27 ++--- gcc/testsuite/gcc.target/powerpc/float128-cmove.c | 6 - .../gcc.target/powerpc/float128-minmax-3.c | 6 - gcc/testsuite/gcc.target/powerpc/pr118541-1.c | 28 ++ gcc/testsuite/gcc.target/powerpc/pr118541-2.c | 26 6 files changed, 89 insertions(+), 12 deletions(-) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 11dfde7f288b..a8a0e153c362 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -16509,11 +16509,17 @@ rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) /* Helper function to return true if the target has instructions to do a compare and set mask instruction that can be used with XXSEL to implement a conditional move. It is also assumed that such a target also supports the - "C" minimum and maximum instructions. */ + "C" minimum and maximum instructions. + + However, these instructions will trap if given a signaling NaN, so we can + only use them if NaNs are not expected. */ static bool have_compare_and_set_mask (machine_mode mode) { + if (!flag_finite_math_only) +return false; + switch (mode) { case E_SFmode: diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 65da0c653304..1f8cfcf0d255 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5699,6 +5699,10 @@ "fsel %0,%1,%2,%3" [(set_attr "type" "fp")]) +;; On power9, we can generate XSCMP{EQ,GT,GE}DP and XXSEL to do a floating +;; point conditional move. However, these instructions trap if one of the +;; arguments is a signalling NaN. Therefore we can only do this optimize if +;; NaNs are not expected in the code. (define_insn_and_split "*movcc_p9" [(set (match_o
[gcc(refs/users/meissner/heads/work206-bugs)] Update ChangeLog.*
https://gcc.gnu.org/g:3d6402aba30c6ed00f607611b73b1d5c6b033523 commit 3d6402aba30c6ed00f607611b73b1d5c6b033523 Author: Michael Meissner Date: Wed May 28 17:20:56 2025 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.bugs | 40 +--- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs index 2779be6810bb..27c0ff898d83 100644 --- a/gcc/ChangeLog.bugs +++ b/gcc/ChangeLog.bugs @@ -1,28 +1,4 @@ - Branch work206-bugs, patch #115 - -Update tests. - -2025-05-22 Michael Meissner - -gcc/testsuite/ - - PR target/118541 - * gcc.target/powerpc/p9-minmax-2.c: Delete test. - * gcc.target/powerpc/float128-minmax-3.c: Use -Ofast, not -O2. - - Branch work206-bugs, patch #114 - -Disable fp cmove on power9 - -2025-05-22 Michael Meissner - -gcc/ - - PR target/118541 - * config/rs6000/rs6000.cc (have_compare_and_set_mask): Disable unless - NaNs are disabled. - - Branch work206-bugs, patch #113 + Branch work206-bugs, patch #120 Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. @@ -63,11 +39,13 @@ However, the XSCMPGTDP instruction traps if one of the arguments is a signaling NaN. This patch disables generating XSCMP{EQ,GT,GE}{DP,QP} instructions unless -ffinite-math-only is in effect so that we do not get a trap. -2025-05-22 Michael Meissner +2025-05-28 Michael Meissner gcc/ PR target/118541 + * config/rs6000/rs6000.cc (have_compare_and_set_mask): Don't do compare + and set mask operations unless -ffinite-math-only. * config/rs6000/rs6000.md (movcc_p9): Disable generating XSCMP{EQ,GT,GE}{DP,QP} unless -ffinite-math-only is in effect. @@ -85,7 +63,15 @@ gcc/testsuite/ PR target/118541 * gcc.target/powerpc/float128-cmove.c: Change optimization flag to -Ofast instead of -O2. - + * gcc.target/powerpc/float128-minmax-3.: Likewise. + * gcc.target/powerpc/p9-minmax-2.c: Delete test, the code is no longer + valid unless NaNs are not handled. + * gcc.target/powerpc/pr118541-1.c: New test. + * gcc.target/powerpc/pr118541-2.c: Likewise. + + Branch work206-bugs, patch #115 was reverted + Branch work206-bugs, patch #114 was reverted + Branch work206-bugs, patch #113 was reverted Branch work206-bugs, patch #112 was reverted Branch work206-bugs, patch #111 was reverted Branch work206-bugs, patch #110 was reverted
[gcc/meissner/heads/work206] (13 commits) Update ChangeLog.*
The branch 'meissner/heads/work206' was updated to point to: 0afb7fa1bd78... Update ChangeLog.* It previously pointed to: 7669ec100a3a... Add ChangeLog.meissner and REVISION. Diff: Summary of changes (added commits): --- 0afb7fa... Update ChangeLog.* (*) 80b6743... Update ChangeLog.* (*) 6d6e74f... Use architecture flags for defining _ARCH_PWR macros. (*) 087bbad... Add rs6000 architecture masks. (*) fde0a6c... Use vector pair load/store for memcpy with -mcpu=future (*) 0335a4b... Add -mcpu=future tests. (*) dfea870... Add -mcpu=future tuning support. (*) e54cfbc... Add support for -mcpu=future (*) 9d47661... Change TARGET_MODULO to TARGET_POWER9. (*) 3474e41... Change TARGET_MODULO to TARGET_POWER9. (*) 8426a40... Change TARGET_POPCNTD to TARGET_POWER7. (*) 212b1fa... Change TARGET_FPRND to TARGET_POWER5X. (*) e60083e... Change TARGET_POPCNTB to TARGET_POWER5. (*) (*) This commit already exists in another branch. Because the reference `refs/users/meissner/heads/work206' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc(refs/users/meissner/heads/work206-bugs)] Revert changes
https://gcc.gnu.org/g:ec40186f1e6c8615cd19456c1af8fc89d9e1a774 commit ec40186f1e6c8615cd19456c1af8fc89d9e1a774 Author: Michael Meissner Date: Wed May 28 13:00:24 2025 -0400 Revert changes Diff: --- gcc/config/rs6000/rs6000.cc| 8 +-- gcc/config/rs6000/rs6000.md| 27 -- gcc/testsuite/gcc.target/powerpc/float128-cmove.c | 6 + .../gcc.target/powerpc/float128-minmax-3.c | 6 + 4 files changed, 12 insertions(+), 35 deletions(-) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index a8a0e153c362..11dfde7f288b 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -16509,17 +16509,11 @@ rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) /* Helper function to return true if the target has instructions to do a compare and set mask instruction that can be used with XXSEL to implement a conditional move. It is also assumed that such a target also supports the - "C" minimum and maximum instructions. - - However, these instructions will trap if given a signaling NaN, so we can - only use them if NaNs are not expected. */ + "C" minimum and maximum instructions. */ static bool have_compare_and_set_mask (machine_mode mode) { - if (!flag_finite_math_only) -return false; - switch (mode) { case E_SFmode: diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 1f8cfcf0d255..65da0c653304 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5699,10 +5699,6 @@ "fsel %0,%1,%2,%3" [(set_attr "type" "fp")]) -;; On power9, we can generate XSCMP{EQ,GT,GE}DP and XXSEL to do a floating -;; point conditional move. However, these instructions trap if one of the -;; arguments is a signalling NaN. Therefore we can only do this optimize if -;; NaNs are not expected in the code. (define_insn_and_split "*movcc_p9" [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa") (if_then_else:SFDF @@ -5712,7 +5708,7 @@ (match_operand:SFDF 4 "vsx_register_operand" "wa,wa") (match_operand:SFDF 5 "vsx_register_operand" "wa,wa"))) (clobber (match_scratch:V2DI 6 "=0,&wa"))] - "TARGET_P9_MINMAX && flag_finite_math_only" + "TARGET_P9_MINMAX" "#" "&& 1" [(set (match_dup 6) @@ -5744,7 +5740,7 @@ (match_operand:SFDF 4 "vsx_register_operand" "wa,wa") (match_operand:SFDF 5 "vsx_register_operand" "wa,wa"))) (clobber (match_scratch:V2DI 6 "=0,&wa"))] - "TARGET_P9_MINMAX && flag_finite_math_only" + "TARGET_P9_MINMAX" "#" "&& 1" [(set (match_dup 6) @@ -5779,7 +5775,7 @@ (match_operand:SFDF 3 "vsx_register_operand" "wa")]) (match_operand:V2DI 4 "all_ones_constant" "") (match_operand:V2DI 5 "zero_constant" "")))] - "TARGET_P9_MINMAX && flag_finite_math_only" + "TARGET_P9_MINMAX" "xscmp%V1dp %x0,%x2,%x3" [(set_attr "type" "fpcompare")]) @@ -5789,23 +5785,18 @@ (match_operand:V2DI 2 "zero_constant" "")) (match_operand:SFDF 3 "vsx_register_operand" "wa") (match_operand:SFDF 4 "vsx_register_operand" "wa")))] - "TARGET_P9_MINMAX && flag_finite_math_only" + "TARGET_P9_MINMAX" "xxsel %x0,%x4,%x3,%x1" [(set_attr "type" "vecmove")]) ;; Support for ISA 3.1 IEEE 128-bit conditional move. The mode used in the ;; comparison must be the same as used in the move. -;; -;; On power10, we can generate XSCMP{EQ,GT,GE}QP and XXSEL to do a floating -;; point conditional move for IEEE 128-bit values. However, these instructions -;; trap if one of the arguments is a signalling NaN. Therefore we can only do -;; this optimize if NaNs are not expected in the code. (define_expand "movcc" [(set (match_operand:IEEE128 0 "gpc_reg_operand") (if_then_else:IEEE128 (match_operand 1 "comparison_operator") (match_operand:IEEE128 2 "gpc_reg_operand") (match_operand:IEEE128 3 "gpc_reg_operand")))] - "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only" + "TARGET_POWER10 && TARGET_FLOAT128_HW" { if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3])) DONE; @@ -5822,7 +5813,7 @@ (match_operand:IEEE128 4 "altivec_register_operand" "v,v") (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) (clobber (match_scratch:V2DI 6 "=0,&v"))] - "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only" + "TARGET_POWER10 && TARGET_FLOAT128_HW" "#" "&& 1" [(set (match_dup 6) @@ -5854,7 +5845,7 @@ (match_operand:IEEE128 4 "altivec_register_operand" "v,v") (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) (clobber (match_scratch:V2DI 6 "=0,&v"))] - "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only" + "TARGET_POWER10 && TARG
[gcc r16-932] RISC-V: Reconcile the existing test for avg_floor
https://gcc.gnu.org/g:ef0c2ed1382d6edc26ba63322e1009f9acf97efd commit r16-932-gef0c2ed1382d6edc26ba63322e1009f9acf97efd Author: Pan Li Date: Tue May 27 10:24:56 2025 +0800 RISC-V: Reconcile the existing test for avg_floor Some existing avg_floor test need updated due to change to leverage vaadd.vv directly. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Update asm check to vaadd. * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c | 5 ++--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c | 5 ++--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c | 5 ++--- gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c | 7 ++- gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c | 7 ++- 5 files changed, 10 insertions(+), 19 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c index 30e60d520d68..4920fa6ad417 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c @@ -25,9 +25,8 @@ DEF_AVG_FLOOR (uint8_t, uint16_t, 512) DEF_AVG_FLOOR (uint8_t, uint16_t, 1024) DEF_AVG_FLOOR (uint8_t, uint16_t, 2048) -/* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */ -/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 10 } } */ -/* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 20 } } */ +/* { dg-final { scan-assembler-times {vaadd\.vv} 10 } } */ /* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c index 33df429a634e..c6a120b76137 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c @@ -23,9 +23,8 @@ DEF_AVG_FLOOR (uint16_t, uint32_t, 256) DEF_AVG_FLOOR (uint16_t, uint32_t, 512) DEF_AVG_FLOOR (uint16_t, uint32_t, 1024) -/* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */ -/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 9 } } */ -/* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 18 } } */ +/* { dg-final { scan-assembler-times {vaadd\.vv} 9 } } */ /* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c index 9058905e3f5b..2838c1ed106d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c @@ -21,9 +21,8 @@ DEF_AVG_FLOOR (uint32_t, uint64_t, 128) DEF_AVG_FLOOR (uint32_t, uint64_t, 256) DEF_AVG_FLOOR (uint32_t, uint64_t, 512) -/* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */ -/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 8 } } */ -/* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 16 } } */ +/* { dg-final { scan-assembler-times {vaadd\.vv} 8 } } */ /* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c index 5880ccca4775..b7246a38dba7 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c @@ -3,9 +3,6 @@ #include "vec-avg-template.h" -/* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */ -/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */ -/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */ -/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */ -/* { dg-final { scan-assembler-times {\tvnsra.wi} 6 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 6 } } */ /* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */ +/* { dg-final { scan-assembler-times {vaadd\.vv} 3 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-
[gcc r15-9740] libgomp.fortran/metadirective-1.f90: Expect 'error:' for nvptx compile [PR118694]
https://gcc.gnu.org/g:c38760cdb2ac21f690647e965acc0eaba3250f6d commit r15-9740-gc38760cdb2ac21f690647e965acc0eaba3250f6d Author: Tobias Burnus Date: Wed May 28 15:14:14 2025 +0200 libgomp.fortran/metadirective-1.f90: Expect 'error:' for nvptx compile [PR118694] This should have been part of commit r16-838-gb3d07ec7ac2ccd or r16-883-g5d6ed6d604ff94 - all showing the same issue: '!$omp target' followed by a metadirective with 'teams'; if the metadirective cannot be early resolved, a diagnostic error is shown about using directives between 'target' and 'teams'. While the message is misleading, the problem is that the host invokes 'target' differently when 'teams' is present; in this case, host fallback + amdgcn offload require the no-teams case, nvptx offload the teams case such that it only can be resolved at runtime. Mark the error as 'dg-bogus + xfail' to silence the FAIL, when nvptx offloading is compiled for. (If not, the metadirective can be resolved early during compilation.) libgomp/ChangeLog: PR middle-end/118694 * testsuite/libgomp.fortran/metadirective-1.f90: xfail when compiling (also) for nvptx offloading as an error is then expected. (cherry picked from commit 5b2e7afb135aa10e17e51b891d4b2c85ee541ade) Diff: --- libgomp/testsuite/libgomp.fortran/metadirective-1.f90 | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 index 7b3e09f7c2a5..d6f4d5bbde69 100644 --- a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 @@ -1,4 +1,5 @@ -! { dg-do run } +! { dg-do run { target { ! offload_target_nvptx } } } +! { dg-do compile { target offload_target_nvptx } } program test implicit none @@ -33,6 +34,10 @@ program test contains subroutine f (x, y, z) integer :: x(N), y(N), z(N) +! The following fails as on the host the target side cannot be +! resolved - and the 'teams' or not status affects how 'target' +! is called. -> See PR118694, esp. comment 9. +! Note also the dg-do compile above for offload_target_nvptx !$omp target map (to: x, y) map(from: z) block @@ -43,6 +48,7 @@ contains z(i) = x(i) * y(i) enddo end block +! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */ end subroutine subroutine g (x, y, z) integer :: x(N), y(N), z(N) @@ -56,6 +62,7 @@ contains z(i) = x(i) * y(i) enddo end block +! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */ !$omp end target end subroutine end program
[gcc r16-937] vect: Remove non-SLP paths in strided slp/elementwise.
https://gcc.gnu.org/g:0f73ae763c02ad3b2917c33eecba9b47efdcc73b commit r16-937-g0f73ae763c02ad3b2917c33eecba9b47efdcc73b Author: Robin Dapp Date: Tue May 20 11:23:34 2025 +0200 vect: Remove non-SLP paths in strided slp/elementwise. This patch removes non-SLP paths in the VMAT_STRIDED_SLP/VMAT_ELEMENTWISE part of vectorizable_load. gcc/ChangeLog: * tree-vect-stmts.cc (vectorizable_load): Remove non-SLP paths. Diff: --- gcc/tree-vect-stmts.cc | 49 ++--- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 66958543bf86..3710694ac75d 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -10689,8 +10689,7 @@ vectorizable_load (vec_info *vinfo, first_dr_info = dr_info; } - if (slp && grouped_load - && memory_access_type == VMAT_STRIDED_SLP) + if (grouped_load && memory_access_type == VMAT_STRIDED_SLP) { group_size = DR_GROUP_SIZE (first_stmt_info); ref_type = get_group_alias_ptr_type (first_stmt_info); @@ -10830,22 +10829,20 @@ vectorizable_load (vec_info *vinfo, ltype = build_aligned_type (ltype, align * BITS_PER_UNIT); } - if (slp) + /* For SLP permutation support we need to load the whole group, +not only the number of vector stmts the permutation result +fits in. */ + if (slp_perm) { - /* For SLP permutation support we need to load the whole group, -not only the number of vector stmts the permutation result -fits in. */ - if (slp_perm) - { - /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for -variable VF. */ - unsigned int const_vf = vf.to_constant (); - ncopies = CEIL (group_size * const_vf, const_nunits); - dr_chain.create (ncopies); - } - else - ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for +variable VF. */ + unsigned int const_vf = vf.to_constant (); + ncopies = CEIL (group_size * const_vf, const_nunits); + dr_chain.create (ncopies); } + else + ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + unsigned int group_el = 0; unsigned HOST_WIDE_INT elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); @@ -10883,14 +10880,13 @@ vectorizable_load (vec_info *vinfo, CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_temp); group_el += lnel; - if (! slp - || group_el == group_size) + if (group_el == group_size) { n_groups++; /* When doing SLP make sure to not load elements from the next vector iteration, those will not be accessed so just use the last element again. See PR107451. */ - if (!slp || known_lt (n_groups, vf)) + if (known_lt (n_groups, vf)) { tree newoff = copy_ssa_name (running_off); gimple *incr @@ -10938,19 +10934,10 @@ vectorizable_load (vec_info *vinfo, if (!costing_p) { - if (slp) - { - if (slp_perm) - dr_chain.quick_push (gimple_assign_lhs (new_stmt)); - else - slp_node->push_vec_def (new_stmt); - } + if (slp_perm) + dr_chain.quick_push (gimple_assign_lhs (new_stmt)); else - { - if (j == 0) - *vec_stmt = new_stmt; - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); - } + slp_node->push_vec_def (new_stmt); } } if (slp_perm)
[gcc r16-936] RISC-V: Avoid division by zero in check_builtin_call [PR120436].
https://gcc.gnu.org/g:6ccf845d9fa157e7ebcf2c589a9fc5d8f298961f commit r16-936-g6ccf845d9fa157e7ebcf2c589a9fc5d8f298961f Author: Robin Dapp Date: Mon May 26 16:16:36 2025 +0200 RISC-V: Avoid division by zero in check_builtin_call [PR120436]. In check_builtin_call we eventually perform a division by zero when no vector modes are present. This patch just avoids the division in that case. PR target/120436 gcc/ChangeLog: * config/riscv/riscv-vector-builtins-shapes.cc (struct vset_def): Avoid division by zero. (struct vget_def): Ditto. * config/riscv/riscv-vector-builtins.h (struct function_group_info): Use required_extensions_specified instead of duplicating code. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr120436.c: New test. Diff: --- gcc/config/riscv/riscv-vector-builtins-shapes.cc | 4 +++ gcc/config/riscv/riscv-vector-builtins.h | 40 +- gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c | 16 + 3 files changed, 21 insertions(+), 39 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index b855d4c5fa5a..9832eb9e3d1b 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -908,6 +908,8 @@ struct vset_def : public build_base { poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0)); poly_int64 inner_size = GET_MODE_SIZE (c.arg_mode (2)); +if (maybe_eq (inner_size, 0)) + return false; unsigned int nvecs = exact_div (outer_size, inner_size).to_constant (); return c.require_immediate (1, 0, nvecs - 1); } @@ -920,6 +922,8 @@ struct vget_def : public misc_def { poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0)); poly_int64 inner_size = GET_MODE_SIZE (c.ret_mode ()); +if (maybe_eq (inner_size, 0)) + return false; unsigned int nvecs = exact_div (outer_size, inner_size).to_constant (); return c.require_immediate (1, 0, nvecs - 1); } diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index ffc289364b06..1f2587ab6afa 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -331,45 +331,7 @@ struct function_group_info /* Return true if required extension is enabled */ bool match (required_ext ext_value) const { -switch (ext_value) -{ - case VECTOR_EXT: -return TARGET_VECTOR; - case ZVBB_EXT: -return TARGET_ZVBB; - case ZVBB_OR_ZVKB_EXT: -return (TARGET_ZVBB || TARGET_ZVKB); - case ZVBC_EXT: -return TARGET_ZVBC; - case ZVKG_EXT: -return TARGET_ZVKG; - case ZVKNED_EXT: -return TARGET_ZVKNED; - case ZVKNHA_OR_ZVKNHB_EXT: -return (TARGET_ZVKNHA || TARGET_ZVKNHB); - case ZVKNHB_EXT: -return TARGET_ZVKNHB; - case ZVKSED_EXT: -return TARGET_ZVKSED; - case ZVKSH_EXT: -return TARGET_ZVKSH; - case XTHEADVECTOR_EXT: - return TARGET_XTHEADVECTOR; - case ZVFBFMIN_EXT: - return TARGET_ZVFBFMIN; - case ZVFBFWMA_EXT: - return TARGET_ZVFBFWMA; - case XSFVQMACCQOQ_EXT: - return TARGET_XSFVQMACCQOQ; - case XSFVQMACCDOD_EXT: - return TARGET_XSFVQMACCDOD; - case XSFVFNRCLIPXFQF_EXT: - return TARGET_XSFVFNRCLIPXFQF; - case XSFVCP_EXT: - return TARGET_XSFVCP; - default: -gcc_unreachable (); -} +return required_extensions_specified (ext_value); } /* The base name, as a string. */ const char *base_name; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c new file mode 100644 index ..d22091e59490 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O0" } */ + +/* Use -O0 as otherwise the unused intrinsics get + optimized away. We used to ICE here instead of + emitting an error. */ + +#include "riscv_vector.h" + +void +clean_subreg (int32_t *in, int32_t *out, size_t m) /* { dg-error {this operation requires the RVV ISA extension} } */ +{ + vint16m8_t v24, v8, v16; + vint32m8_t result = __riscv_vle32_v_i32m8 (in, 32); /* { dg-error {built-in function '__riscv_vle32_v_i32m8\(in, 32\)' requires the 'v' ISA extension} } */ + vint32m1_t v0 = __riscv_vget_v_i32m8_i32m1 (result, 0); +}
[gcc r13-9724] tree-sra: Do not create stores into const aggregates (PR111873)
https://gcc.gnu.org/g:a067a18d42e338aea990347bb4d16d6a852c4480 commit r13-9724-ga067a18d42e338aea990347bb4d16d6a852c4480 Author: Martin Jambor Date: Wed May 14 12:08:24 2025 +0200 tree-sra: Do not create stores into const aggregates (PR111873) This patch fixes (hopefully the) one remaining place where gimple SRA was still creating a load into const aggregates. It occurs when there is a replacement for a load but that replacement is not type compatible - typically because it is a single field structure. I have used testcases from duplicates because the original test-case no longer reproduces for me. gcc/ChangeLog: 2025-05-13 Martin Jambor PR tree-optimization/111873 * tree-sra.cc (sra_modify_expr): When processing a load which has a type-incompatible replacement, do not store the contents of the replacement into the original aggregate when that aggregate is const. gcc/testsuite/ChangeLog: 2025-05-13 Martin Jambor * gcc.dg/ipa/pr120044-1.c: New test. * gcc.dg/ipa/pr120044-2.c: Likewise. * gcc.dg/tree-ssa/pr114864.c: Likewise. (cherry picked from commit 9d039eff453f777c58642ff16178c1ce2a4be6ab) Diff: --- gcc/testsuite/gcc.dg/ipa/pr120044-1.c| 17 + gcc/testsuite/gcc.dg/ipa/pr120044-2.c| 17 + gcc/testsuite/gcc.dg/tree-ssa/pr114864.c | 15 +++ gcc/tree-sra.cc | 4 +++- 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/ipa/pr120044-1.c b/gcc/testsuite/gcc.dg/ipa/pr120044-1.c new file mode 100644 index ..f9fee3e85afb --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr120044-1.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fno-early-inlining -fno-tree-fre -fno-tree-pre -fno-code-hoisting -fno-inline" } */ + +struct a { + int b; +} const c; +void d(char p, struct a e) { + while (e.b) +; +} +static unsigned short f(const struct a g) { + d(g.b, g); + return g.b; +} +int main() { + return f(c); +} diff --git a/gcc/testsuite/gcc.dg/ipa/pr120044-2.c b/gcc/testsuite/gcc.dg/ipa/pr120044-2.c new file mode 100644 index ..5130791f5444 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr120044-2.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fno-early-inlining -fno-tree-fre -fno-tree-pre -fno-code-hoisting -fno-ipa-cp" } */ + +struct a { + int b; +} const c; +void d(char p, struct a e) { + while (e.b) +; +} +static unsigned short f(const struct a g) { + d(g.b, g); + return g.b; +} +int main() { + return f(c); +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114864.c b/gcc/testsuite/gcc.dg/tree-ssa/pr114864.c new file mode 100644 index ..cd9b94c094fc --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114864.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ +/* { dg-options "-O1 -fno-tree-dce -fno-tree-fre" } */ + +struct a { + int b; +} const c; +void d(const struct a f) {} +void e(const struct a f) { + f.b == 0 ? 1 : f.b; + d(f); +} +int main() { + e(c); + return 0; +} diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc index ec499fdd5109..c3c0a70338d2 100644 --- a/gcc/tree-sra.cc +++ b/gcc/tree-sra.cc @@ -3988,8 +3988,10 @@ sra_modify_expr (tree *expr, gimple_stmt_iterator *gsi, bool write) } else { - gassign *stmt; + if (TREE_READONLY (access->base)) + return false; + gassign *stmt; if (access->grp_partial_lhs) repl = force_gimple_operand_gsi (gsi, repl, true, NULL_TREE, true, GSI_SAME_STMT);
[gcc r16-941] Set znver5 addss cost to 2 again
https://gcc.gnu.org/g:8859a06fc8143561dd94ef2342234dbc5ccfd937 commit r16-941-g8859a06fc8143561dd94ef2342234dbc5ccfd937 Author: Jan Hubicka Date: Wed May 28 23:43:51 2025 +0200 Set znver5 addss cost to 2 again since uses of addss for other purposes then modelling FP addition/subtraction should be gone now, this patch sets addss cost back to 2. gcc/ChangeLog: PR target/119298 * config/i386/x86-tune-costs.h (struct processor_costs): Set addss cost back to 2. Diff: --- gcc/config/i386/x86-tune-costs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index e50912935094..b08081e37cfb 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -2252,7 +2252,7 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ /* ADDSS has throughput 2 and latency 2 (in some cases when source is another addition). */ - COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */ /* MULSS has throughput 2 and latency 3. */ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ COSTS_N_INSNS (3), /* cost of MULSD instruction. */
[gcc r16-942] [AUTOFDO] Fix autogen remake issue
https://gcc.gnu.org/g:fcb60292984fa7181ec91d7f81fd18549d1aaf39 commit r16-942-gfcb60292984fa7181ec91d7f81fd18549d1aaf39 Author: Kugan Vivekanandarajah Date: Thu May 29 08:47:19 2025 +1000 [AUTOFDO] Fix autogen remake issue Fix autogen issue introduced by commit commit 86dc974cf30f926a014438a5fccdc9d41e26282b ChangeLog: * Makefile.def: Fix typo in cpu_type * Makefile.tpl: Add cpu_type Signed-off-by: Kugan Vivekanandarajah Diff: --- Makefile.def | 2 +- Makefile.tpl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile.def b/Makefile.def index b0382713609a..fa60f6ea0b90 100644 --- a/Makefile.def +++ b/Makefile.def @@ -758,7 +758,7 @@ bootstrap_stage = { bootstrap_target=profiledbootstrap ; }; bootstrap_stage = { id=autoprofile ; prev=1 ; -autoprofile="$$s/gcc/config/@cpu_typet@/$(AUTO_PROFILE)" ; }; +autoprofile="$$s/gcc/config/$(cpu_type)/$(AUTO_PROFILE)" ; }; bootstrap_stage = { id=autofeedback ; prev=autoprofile ; bootstrap_target=autoprofiledbootstrap ; diff --git a/Makefile.tpl b/Makefile.tpl index 7dccf91ed8ec..b164a17e700a 100644 --- a/Makefile.tpl +++ b/Makefile.tpl @@ -41,6 +41,7 @@ build_alias=@build_noncanonical@ build_vendor=@build_vendor@ build_os=@build_os@ build=@build@ +cpu_type=@cpu_type@ host_alias=@host_noncanonical@ host_vendor=@host_vendor@ host_os=@host_os@
[gcc r16-944] c++: add __is_*destructible builtins [PR107600]
https://gcc.gnu.org/g:0629924777ea20d56d9ea40c3915eb0327a22ac7 commit r16-944-g0629924777ea20d56d9ea40c3915eb0327a22ac7 Author: Jason Merrill Date: Wed May 28 11:42:00 2025 -0400 c++: add __is_*destructible builtins [PR107600] Typically "does this class have a trivial destructor" is the wrong question to ask, we rather want "can I destroy this class trivially", thus the std::is_trivially_destructible standard trait. Let's provide a builtin for it, and complain about asking whether a deleted destructor is trivial. Clang and MSVC also have these traits. PR c++/107600 gcc/cp/ChangeLog: * cp-trait.def (IS_DESTRUCTIBLE, IS_NOTHROW_DESTRUCTIBLE) (IS_TRIVIALLY_DESTRUCTIBLE): New. * constraint.cc (diagnose_trait_expr): Explain them. * method.cc (destructible_expr): New. (is_xible_helper): Use it. * semantics.cc (finish_trait_expr): Handle new traits. (trait_expr_value): Likewise. Complain about asking whether a deleted dtor is trivial. gcc/testsuite/ChangeLog: * g++.dg/ext/is_destructible1.C: New test. Diff: --- gcc/cp/constraint.cc| 9 + gcc/cp/method.cc| 15 gcc/cp/semantics.cc | 20 ++ gcc/testsuite/g++.dg/ext/is_destructible1.C | 60 + gcc/cp/cp-trait.def | 3 ++ 5 files changed, 107 insertions(+) diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index 44fb086c6306..90625707043f 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3100,6 +3100,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_CONVERTIBLE: inform (loc, " %qT is not convertible from %qE", t2, t1); break; +case CPTK_IS_DESTRUCTIBLE: + inform (loc, " %qT is not destructible", t1); + break; case CPTK_IS_EMPTY: inform (loc, " %qT is not an empty class", t1); break; @@ -3145,6 +3148,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_NOTHROW_CONVERTIBLE: inform (loc, " %qT is not nothrow convertible from %qE", t2, t1); break; +case CPTK_IS_NOTHROW_DESTRUCTIBLE: + inform (loc, " %qT is not nothrow destructible", t1); + break; case CPTK_IS_NOTHROW_INVOCABLE: if (!t2) inform (loc, " %qT is not nothrow invocable", t1); @@ -3194,6 +3200,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_TRIVIALLY_COPYABLE: inform (loc, " %qT is not trivially copyable", t1); break; +case CPTK_IS_TRIVIALLY_DESTRUCTIBLE: + inform (loc, " %qT is not trivially destructible", t1); + break; case CPTK_IS_UNBOUNDED_ARRAY: inform (loc, " %qT is not an unbounded array", t1); break; diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc index 092bae277875..3a675d9f8723 100644 --- a/gcc/cp/method.cc +++ b/gcc/cp/method.cc @@ -2330,6 +2330,19 @@ constructible_expr (tree to, tree from) return expr; } +/* Return declval().~T() treated as an unevaluated operand. */ + +static tree +destructible_expr (tree to) +{ + cp_unevaluated cp_uneval_guard; + int flags = LOOKUP_NORMAL|LOOKUP_DESTRUCTOR; + to = build_trait_object (to); + tree r = build_delete (input_location, TREE_TYPE (to), to, +sfk_complete_destructor, flags, 0, tf_none); + return r; +} + /* Returns a tree iff TO is assignable (if CODE is MODIFY_EXPR) or constructible (otherwise) from FROM, which is a single type for assignment or a list of types for construction. */ @@ -2346,6 +2359,8 @@ is_xible_helper (enum tree_code code, tree to, tree from, bool trivial) tree expr; if (code == MODIFY_EXPR) expr = assignable_expr (to, from); + else if (code == BIT_NOT_EXPR) +expr = destructible_expr (to); else if (trivial && TREE_VEC_LENGTH (from) > 1 && cxx_dialect < cxx20) return error_mark_node; // only 0- and 1-argument ctors can be trivial diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index ef4a668a4e4d..241f2730878b 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -13235,6 +13235,14 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_HAS_TRIVIAL_DESTRUCTOR: type1 = strip_array_types (type1); + if (CLASS_TYPE_P (type1) && type_build_dtor_call (type1)) + { + deferring_access_check_sentinel dacs (dk_no_check); + tree fn = get_dtor (type1, tf_none); + if (!fn && !seen_error ()) + warning (0, "checking %qs for type %qT with a destructor that " +"cannot be called", "__has_trivial_destructor", type1); + } return (trivial_type_p (type1) || type_code1 == REFERENCE_TYPE || (CLASS_TYPE_P (type1) && TYPE_HAS_TRIVIAL_DESTRUCTOR (type1))); @@ -13290,6 +13298,9